QUDA  v1.1.0
A library for QCD on GPUs
transfer.cpp
Go to the documentation of this file.
1 
2 #include <transfer.h>
3 
4 #include <blas_quda.h>
5 
6 #include <transfer.h>
7 #include <multigrid.h>
8 #include <malloc_quda.h>
9 
10 #include <iostream>
11 #include <algorithm>
12 #include <vector>
13 #include <limits>
14 
15 namespace quda {
16 
17  /*
18  * for the staggered case, there is no spin blocking,
19  * however we do even-odd to preserve chirality (that is straightforward)
20  */
21  Transfer::Transfer(const std::vector<ColorSpinorField *> &B, int Nvec, int n_block_ortho, int *geo_bs, int spin_bs,
22  QudaPrecision null_precision, const QudaTransferType transfer_type, TimeProfile &profile) :
23  B(B),
24  Nvec(Nvec),
25  NblockOrtho(n_block_ortho),
26  null_precision(null_precision),
27  V_h(nullptr),
28  V_d(nullptr),
29  fine_tmp_h(nullptr),
30  fine_tmp_d(nullptr),
31  coarse_tmp_h(nullptr),
32  coarse_tmp_d(nullptr),
33  geo_bs(nullptr),
34  fine_to_coarse_h(nullptr),
35  coarse_to_fine_h(nullptr),
36  fine_to_coarse_d(nullptr),
37  coarse_to_fine_d(nullptr),
38  spin_bs(spin_bs),
39  spin_map(0),
40  nspin_fine(B[0]->Nspin()),
41  site_subset(QUDA_FULL_SITE_SUBSET),
43  enable_gpu(false),
44  enable_cpu(false),
45  use_gpu(true),
46  transfer_type(transfer_type),
47  flops_(0),
48  profile(profile)
49  {
50  postTrace();
51  int ndim = B[0]->Ndim();
52 
53  // Only loop over four dimensions for now, we don't have
54  // to worry about the fifth dimension until we hit chiral fermions.
55  for (int d = 0; d < 4; d++) {
56  while (geo_bs[d] > 0) {
57  if (d==0 && B[0]->X(0) == geo_bs[0])
58  warningQuda("X-dimension length %d cannot block length %d", B[0]->X(0), geo_bs[0]);
59  else if ( (B[0]->X(d)/geo_bs[d]+1)%2 == 0)
60  warningQuda("Indexing does not (yet) support odd coarse dimensions: X(%d) = %d", d, B[0]->X(d)/geo_bs[d]);
61  else if ( (B[0]->X(d)/geo_bs[d]) * geo_bs[d] != B[0]->X(d) )
62  warningQuda("cannot block dim[%d]=%d with block size = %d", d, B[0]->X(d), geo_bs[d]);
63  else
64  break; // this is a valid block size so let's use it
65  geo_bs[d] /= 2;
66  }
67  if (geo_bs[d] == 0) errorQuda("Unable to block dimension %d", d);
68  }
69 
70  if (ndim > 4) {
71  geo_bs[4] = 1;
72  warningQuda("5th dimension block size is being set to 1. This is a benign side effect of staggered fermions");
73  }
74 
75  this->geo_bs = new int[ndim];
76  int total_block_size = 1;
77  for (int d = 0; d < ndim; d++) {
78  this->geo_bs[d] = geo_bs[d];
79  total_block_size *= geo_bs[d];
80  }
81 
82  // Various consistency checks for optimized KD "transfers"
83  if (transfer_type == QUDA_TRANSFER_OPTIMIZED_KD) {
84 
85  // Aggregation size is "technically" 1 for optimized KD
86  if (total_block_size != 1)
87  errorQuda("Invalid total geometric block size %d for transfer type optimized-kd, must be 1", total_block_size);
88 
89  // The number of coarse dof is technically fineColor for optimized KD
90  if (Nvec != B[0]->Ncolor())
91  errorQuda("Invalid Nvec %d for optimized-kd aggregation, must be fine color %d", Nvec, B[0]->Ncolor());
92  }
93 
94  std::string block_str = std::to_string(geo_bs[0]);
95  for (int d = 1; d < ndim; d++) block_str += " x " + std::to_string(geo_bs[d]);
96  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Transfer: using block size %s\n", block_str.c_str());
97 
98  if (transfer_type == QUDA_TRANSFER_COARSE_KD) {
99  for (int d = 0; d < 4; d++) {
100  if (geo_bs[d] != 2) errorQuda("Invalid staggered KD block size %d for dimension %d, must be 2", geo_bs[d], d);
101  }
102  if (Nvec != 24) errorQuda("Invalid number of coarse vectors %d for staggered KD multigrid, must be 24", Nvec);
103  }
104 
105  createV(B[0]->Location()); // allocate V field
106  createTmp(QUDA_CPU_FIELD_LOCATION); // allocate temporaries
107 
108  // allocate and compute the fine-to-coarse and coarse-to-fine site maps
109  fine_to_coarse_h = static_cast<int*>(pool_pinned_malloc(B[0]->Volume()*sizeof(int)));
110  coarse_to_fine_h = static_cast<int*>(pool_pinned_malloc(B[0]->Volume()*sizeof(int)));
111 
112  if (enable_gpu) {
113  fine_to_coarse_d = static_cast<int*>(pool_device_malloc(B[0]->Volume()*sizeof(int)));
114  coarse_to_fine_d = static_cast<int*>(pool_device_malloc(B[0]->Volume()*sizeof(int)));
115  }
116 
117  createGeoMap(geo_bs);
118 
119  // allocate the fine-to-coarse spin map
120  spin_map = static_cast<int**>(safe_malloc(nspin_fine*sizeof(int*)));
121  for (int s = 0; s < B[0]->Nspin(); s++) spin_map[s] = static_cast<int*>(safe_malloc(2*sizeof(int)));
122  createSpinMap(spin_bs);
123 
124  reset();
125  postTrace();
126  }
127 
128  void Transfer::createV(QudaFieldLocation location) const
129  {
130  postTrace();
131 
132  // create the storage for the final block orthogonal elements
133  ColorSpinorParam param(*B[0]); // takes the geometry from the null-space vectors
134 
135  // the ordering of the V vector is defined by these parameters and
136  // the Packed functions in ColorSpinorFieldOrder
137 
138  param.nSpin = B[0]->Nspin(); // spin has direct mapping
139  param.nColor = B[0]->Ncolor()*Nvec; // nColor = number of colors * number of vectors
140  param.nVec = Nvec;
141  param.create = QUDA_NULL_FIELD_CREATE;
142  // the V field is defined on all sites regardless of B field (maybe the B fields are always full?)
143  if (param.siteSubset == QUDA_PARITY_SITE_SUBSET) {
144  //keep it the same for staggered:
145  param.siteSubset = QUDA_FULL_SITE_SUBSET;
146  param.x[0] *= 2;
147  }
148  param.location = location;
150  param.setPrecision(location == QUDA_CUDA_FIELD_LOCATION ? null_precision : B[0]->Precision());
151 
152  if (transfer_type == QUDA_TRANSFER_COARSE_KD || transfer_type == QUDA_TRANSFER_OPTIMIZED_KD) {
153  // Need to create V_d and V_h as metadata containers, but we don't
154  // actually need to allocate the memory.
156 
157  // These never get accessed, `nullptr` on its own leads to an error in texture binding
158  param.v = (void *)std::numeric_limits<uint64_t>::max();
159  param.norm = (void *)std::numeric_limits<uint64_t>::max();
160  }
161 
162  if (location == QUDA_CUDA_FIELD_LOCATION) {
164  enable_gpu = true;
165  } else {
167  enable_cpu = true;
168  }
169  postTrace();
170  }
171 
172  void Transfer::createTmp(QudaFieldLocation location) const
173  {
174  // The CPU temporaries are needed for creating geometry mappings.
175  if ((transfer_type == QUDA_TRANSFER_COARSE_KD || transfer_type == QUDA_TRANSFER_OPTIMIZED_KD)
176  && location != QUDA_CPU_FIELD_LOCATION) {
177  return;
178  }
179 
180  postTrace();
181  ColorSpinorParam param(*B[0]);
182  param.create = QUDA_NULL_FIELD_CREATE;
183  param.location = location;
185  if (param.Precision() < QUDA_SINGLE_PRECISION) param.setPrecision(QUDA_SINGLE_PRECISION);
186 
187  if (location == QUDA_CUDA_FIELD_LOCATION) {
188  if (fine_tmp_d && coarse_tmp_d) return;
189  fine_tmp_d = ColorSpinorField::Create(param);
190  coarse_tmp_d = fine_tmp_d->CreateCoarse(geo_bs, spin_bs, Nvec);
191  } else {
192  fine_tmp_h = ColorSpinorField::Create(param);
193  coarse_tmp_h = fine_tmp_h->CreateCoarse(geo_bs, spin_bs, Nvec);
194  }
195  postTrace();
196  }
197 
198  void Transfer::initializeLazy(QudaFieldLocation location) const
199  {
200  if (!enable_cpu && !enable_gpu) errorQuda("Neither CPU or GPU coarse fields initialized");
201 
202  // delayed allocating this temporary until we need it
203  if (B[0]->Location() == QUDA_CUDA_FIELD_LOCATION) createTmp(QUDA_CUDA_FIELD_LOCATION);
204 
205  switch (location) {
207  if (enable_gpu) return;
208  createV(location);
209  if (transfer_type == QUDA_TRANSFER_AGGREGATE) *V_d = *V_h;
210  createTmp(location);
211  fine_to_coarse_d = static_cast<int*>(pool_device_malloc(B[0]->Volume()*sizeof(int)));
212  coarse_to_fine_d = static_cast<int*>(pool_device_malloc(B[0]->Volume()*sizeof(int)));
213  qudaMemcpy(fine_to_coarse_d, fine_to_coarse_h, B[0]->Volume()*sizeof(int), cudaMemcpyHostToDevice);
214  qudaMemcpy(coarse_to_fine_d, coarse_to_fine_h, B[0]->Volume()*sizeof(int), cudaMemcpyHostToDevice);
215  break;
217  if (enable_cpu) return;
218  createV(location);
219  if (transfer_type == QUDA_TRANSFER_AGGREGATE) *V_h = *V_d;
220  break;
221  default:
222  errorQuda("Unknown location %d", location);
223  }
224  }
225 
227  {
228  postTrace();
229 
230  if (transfer_type == QUDA_TRANSFER_COARSE_KD || transfer_type == QUDA_TRANSFER_OPTIMIZED_KD) { return; }
231  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Transfer: block orthogonalizing\n");
232 
233  if (B[0]->Location() == QUDA_CUDA_FIELD_LOCATION) {
234  if (!enable_gpu) errorQuda("enable_gpu = %d so cannot reset", enable_gpu);
235  BlockOrthogonalize(*V_d, B, fine_to_coarse_d, coarse_to_fine_d, geo_bs, spin_bs, NblockOrtho);
236  if (enable_cpu) {
237  *V_h = *V_d;
238  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Transferred prolongator back to CPU\n");
239  }
240  } else {
241  if (!enable_cpu) errorQuda("enable_cpu = %d so cannot reset", enable_cpu);
242  BlockOrthogonalize(*V_h, B, fine_to_coarse_h, coarse_to_fine_h, geo_bs, spin_bs, NblockOrtho);
243  if (enable_gpu) { // if the GPU fields has been initialized then we need to update
244  *V_d = *V_h;
245  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Transferred prolongator to GPU\n");
246  }
247  }
248  postTrace();
249  }
250 
252  if (spin_map)
253  {
254  for (int s = 0; s < nspin_fine; s++) { if (spin_map[s]) host_free(spin_map[s]); }
255  host_free(spin_map);
256  }
257  if (coarse_to_fine_d) pool_device_free(coarse_to_fine_d);
258  if (fine_to_coarse_d) pool_device_free(fine_to_coarse_d);
259  if (coarse_to_fine_h) pool_pinned_free(coarse_to_fine_h);
260  if (fine_to_coarse_h) pool_pinned_free(fine_to_coarse_h);
261  if (V_h) delete V_h;
262  if (V_d) delete V_d;
263 
264  if (fine_tmp_h) delete fine_tmp_h;
265  if (fine_tmp_d) delete fine_tmp_d;
266 
267  if (coarse_tmp_h) delete coarse_tmp_h;
268  if (coarse_tmp_d) delete coarse_tmp_d;
269 
270  if (geo_bs) delete []geo_bs;
271  }
272 
274  {
275  if (site_subset_ == QUDA_PARITY_SITE_SUBSET && parity_ != QUDA_EVEN_PARITY && parity_ != QUDA_ODD_PARITY)
276  errorQuda("Undefined parity %d", parity_);
277  parity = parity_;
278 
279  if (site_subset == site_subset_) return;
280  site_subset = site_subset_;
281  }
282 
283  struct Int2 {
284  int x, y;
285  Int2() : x(0), y(0) { }
286  Int2(int x, int y) : x(x), y(y) { }
287 
288  bool operator<(const Int2 &a) const {
289  return (x < a.x) ? true : (x==a.x && y<a.y) ? true : false;
290  }
291  };
292 
293  // compute the fine-to-coarse site map
294  void Transfer::createGeoMap(int *geo_bs) {
295 
296  int x[QUDA_MAX_DIM];
297 
298  ColorSpinorField &fine(*fine_tmp_h);
299  ColorSpinorField &coarse(*coarse_tmp_h);
300 
301  // compute the coarse grid point for every site (assuming parity ordering currently)
302  for (size_t i = 0; i < fine.Volume(); i++) {
303  // compute the lattice-site index for this offset index
304  fine.LatticeIndex(x, i);
305 
306  //printfQuda("fine idx %d = fine (%d,%d,%d,%d), ", i, x[0], x[1], x[2], x[3]);
307 
308  // compute the corresponding coarse-grid index given the block size
309  for (int d=0; d<fine.Ndim(); d++) x[d] /= geo_bs[d];
310 
311  // compute the coarse-offset index and store in fine_to_coarse
312  int k;
313  coarse.OffsetIndex(k, x); // this index is parity ordered
314  fine_to_coarse_h[i] = k;
315 
316  //printfQuda("coarse after (%d,%d,%d,%d), coarse idx %d\n", x[0], x[1], x[2], x[3], k);
317  }
318 
319  // now create an inverse-like variant of this
320 
321  std::vector<Int2> geo_sort(B[0]->Volume());
322  for (unsigned int i=0; i<geo_sort.size(); i++) geo_sort[i] = Int2(fine_to_coarse_h[i], i);
323  std::sort(geo_sort.begin(), geo_sort.end());
324  for (unsigned int i=0; i<geo_sort.size(); i++) coarse_to_fine_h[i] = geo_sort[i].y;
325 
326  if (enable_gpu) {
327  qudaMemcpy(fine_to_coarse_d, fine_to_coarse_h, B[0]->Volume()*sizeof(int), cudaMemcpyHostToDevice);
328  qudaMemcpy(coarse_to_fine_d, coarse_to_fine_h, B[0]->Volume()*sizeof(int), cudaMemcpyHostToDevice);
329  }
330 
331  }
332 
333  // compute the fine spin and checkerboard to coarse spin map
334  void Transfer::createSpinMap(int spin_bs) {
335  if (spin_bs == 0) // staggered
336  {
337  spin_map[0][0] = 0; // fine even
338  spin_map[0][1] = 1; // fine odd
339  }
340  else
341  {
342  for (int s=0; s<B[0]->Nspin(); s++) {
343  spin_map[s][0] = s / spin_bs; // not staggered, doesn't care about parity.
344  spin_map[s][1] = s / spin_bs;
345  }
346  }
347  }
348 
349  // apply the prolongator
350  void Transfer::P(ColorSpinorField &out, const ColorSpinorField &in) const {
351  profile.TPSTART(QUDA_PROFILE_COMPUTE);
352 
353  ColorSpinorField *input = const_cast<ColorSpinorField*>(&in);
354  ColorSpinorField *output = &out;
355  initializeLazy(use_gpu ? QUDA_CUDA_FIELD_LOCATION : QUDA_CPU_FIELD_LOCATION);
356  const int *fine_to_coarse = use_gpu ? fine_to_coarse_d : fine_to_coarse_h;
357 
358  if (transfer_type == QUDA_TRANSFER_COARSE_KD) {
359  StaggeredProlongate(*output, *input, fine_to_coarse, spin_map, parity);
360  flops_ += 0; // it's only a permutation
361  } else if (transfer_type == QUDA_TRANSFER_OPTIMIZED_KD) {
362 
363  if (in.SiteSubset() != QUDA_FULL_SITE_SUBSET) errorQuda("Optimized KD op only supports full-parity spinors");
364 
365  if (output->VolumeCB() != input->VolumeCB()) errorQuda("Optimized KD transfer is only between equal volumes");
366 
367  // the optimized KD op acts on fine spinors
368  if (out.SiteSubset() == QUDA_PARITY_SITE_SUBSET) {
369  *output = input->Even();
370  } else {
371  *output = *input;
372  }
373  flops_ += 0;
374 
375  } else if (transfer_type == QUDA_TRANSFER_AGGREGATE) {
376 
377  const ColorSpinorField *V = use_gpu ? V_d : V_h;
378 
379  if (use_gpu) {
380  if (in.Location() == QUDA_CPU_FIELD_LOCATION) input = coarse_tmp_d;
381  if (out.Location() == QUDA_CPU_FIELD_LOCATION || out.GammaBasis() != V->GammaBasis())
382  output = (out.SiteSubset() == QUDA_FULL_SITE_SUBSET) ? fine_tmp_d : &fine_tmp_d->Even();
383  if (!enable_gpu) errorQuda("not created with enable_gpu set, so cannot run on GPU");
384  } else {
385  if (out.Location() == QUDA_CUDA_FIELD_LOCATION)
386  output = (out.SiteSubset() == QUDA_FULL_SITE_SUBSET) ? fine_tmp_h : &fine_tmp_h->Even();
387  }
388 
389  *input = in; // copy result to input field (aliasing handled automatically)
390 
391  if (V->SiteSubset() == QUDA_PARITY_SITE_SUBSET && out.SiteSubset() == QUDA_FULL_SITE_SUBSET)
392  errorQuda("Cannot prolongate to a full field since only have single parity null-space components");
393 
394  if ((V->Nspin() != 1) && ((output->GammaBasis() != V->GammaBasis()) || (input->GammaBasis() != V->GammaBasis()))) {
395  errorQuda("Cannot apply prolongator using fields in a different basis from the null space (%d,%d) != %d",
396  output->GammaBasis(), in.GammaBasis(), V->GammaBasis());
397  }
398 
399  Prolongate(*output, *input, *V, Nvec, fine_to_coarse, spin_map, parity);
400 
401  flops_ += 8 * in.Ncolor() * out.Ncolor() * out.VolumeCB() * out.SiteSubset();
402  } else {
403  errorQuda("Invalid transfer type in prolongate");
404  }
405 
406  out = *output; // copy result to out field (aliasing handled automatically)
407 
408  profile.TPSTOP(QUDA_PROFILE_COMPUTE);
409  }
410 
411  // apply the restrictor
412  void Transfer::R(ColorSpinorField &out, const ColorSpinorField &in) const
413  {
414  profile.TPSTART(QUDA_PROFILE_COMPUTE);
415 
416  ColorSpinorField *input = &const_cast<ColorSpinorField&>(in);
417  ColorSpinorField *output = &out;
418  initializeLazy(use_gpu ? QUDA_CUDA_FIELD_LOCATION : QUDA_CPU_FIELD_LOCATION);
419  const int *fine_to_coarse = use_gpu ? fine_to_coarse_d : fine_to_coarse_h;
420  const int *coarse_to_fine = use_gpu ? coarse_to_fine_d : coarse_to_fine_h;
421 
422  if (transfer_type == QUDA_TRANSFER_COARSE_KD) {
423  StaggeredRestrict(*output, *input, fine_to_coarse, spin_map, parity);
424  flops_ += 0; // it's only a permutation
425  } else if (transfer_type == QUDA_TRANSFER_OPTIMIZED_KD) {
426 
427  if (out.SiteSubset() != QUDA_FULL_SITE_SUBSET) errorQuda("Optimized KD op only supports full-parity spinors");
428 
429  if (output->VolumeCB() != input->VolumeCB()) errorQuda("Optimized KD transfer is only between equal volumes");
430 
431  // the optimized KD op acts on fine spinors
432  if (in.SiteSubset() == QUDA_PARITY_SITE_SUBSET) {
433  output->Even() = *input;
434  blas::zero(output->Odd());
435  } else {
436  *output = *input;
437  }
438  flops_ += 0;
439  } else if (transfer_type == QUDA_TRANSFER_AGGREGATE) {
440 
441  const ColorSpinorField *V = use_gpu ? V_d : V_h;
442 
443  if (use_gpu) {
444  if (out.Location() == QUDA_CPU_FIELD_LOCATION) output = coarse_tmp_d;
445  if (in.Location() == QUDA_CPU_FIELD_LOCATION || in.GammaBasis() != V->GammaBasis())
446  input = (in.SiteSubset() == QUDA_FULL_SITE_SUBSET) ? fine_tmp_d : &fine_tmp_d->Even();
447  if (!enable_gpu) errorQuda("not created with enable_gpu set, so cannot run on GPU");
448  } else {
450  input = (in.SiteSubset() == QUDA_FULL_SITE_SUBSET) ? fine_tmp_h : &fine_tmp_h->Even();
451  }
452 
453  *input = in;
454 
455  if (V->SiteSubset() == QUDA_PARITY_SITE_SUBSET && in.SiteSubset() == QUDA_FULL_SITE_SUBSET)
456  errorQuda("Cannot restrict a full field since only have single parity null-space components");
457 
458  if (V->Nspin() != 1 && (output->GammaBasis() != V->GammaBasis() || input->GammaBasis() != V->GammaBasis()))
459  errorQuda("Cannot apply restrictor using fields in a different basis from the null space (%d,%d) != %d",
460  out.GammaBasis(), input->GammaBasis(), V->GammaBasis());
461 
462  Restrict(*output, *input, *V, Nvec, fine_to_coarse, coarse_to_fine, spin_map, parity);
463 
464  flops_ += 8 * out.Ncolor() * in.Ncolor() * in.VolumeCB() * in.SiteSubset();
465  } else {
466  errorQuda("Invalid transfer type in restrict");
467  }
468 
469  out = *output; // copy result to out field (aliasing handled automatically)
470 
471  // only need to synchronize if we're transferring from GPU to CPU
474 
475  profile.TPSTOP(QUDA_PROFILE_COMPUTE);
476  }
477 
478  double Transfer::flops() const {
479  double rtn = flops_;
480  flops_ = 0;
481  return rtn;
482  }
483 
484 } // namespace quda
int Nspin
Definition: blas_test.cpp:50
int Ncolor
Definition: blas_test.cpp:51
const ColorSpinorField & Odd() const
QudaSiteSubset SiteSubset() const
static ColorSpinorField * Create(const ColorSpinorParam &param)
ColorSpinorField * CreateCoarse(const int *geoBlockSize, int spinBlockSize, int Nvec, QudaPrecision precision=QUDA_INVALID_PRECISION, QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION, QudaMemoryType mem_Type=QUDA_MEMORY_INVALID)
Create a coarse color-spinor field, using this field to set the meta data.
const ColorSpinorField & Even() const
QudaGammaBasis GammaBasis() const
QudaFieldLocation Location() const
void reset()
for resetting the Transfer when the null vectors have changed
Definition: transfer.cpp:226
virtual ~Transfer()
Definition: transfer.cpp:251
void R(ColorSpinorField &out, const ColorSpinorField &in) const
Definition: transfer.cpp:412
Transfer(const std::vector< ColorSpinorField * > &B, int Nvec, int NblockOrtho, int *geo_bs, int spin_bs, QudaPrecision null_precision, const QudaTransferType transfer_type, TimeProfile &profile)
Definition: transfer.cpp:21
void setSiteSubset(QudaSiteSubset site_subset, QudaParity parity)
Sets whether the transfer operator is to act on full fields or single parity fields,...
Definition: transfer.cpp:273
double flops() const
Definition: transfer.cpp:478
void P(ColorSpinorField &out, const ColorSpinorField &in) const
Definition: transfer.cpp:350
quda::mgarray< int > n_block_ortho
int V
Definition: host_utils.cpp:37
QudaParity parity
Definition: covdev_test.cpp:40
enum QudaPrecision_s QudaPrecision
@ QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:326
@ QUDA_CPU_FIELD_LOCATION
Definition: enum_quda.h:325
@ QUDA_VERBOSE
Definition: enum_quda.h:267
@ QUDA_FULL_SITE_SUBSET
Definition: enum_quda.h:333
@ QUDA_PARITY_SITE_SUBSET
Definition: enum_quda.h:332
enum QudaTransferType_s QudaTransferType
@ QUDA_EVEN_PARITY
Definition: enum_quda.h:284
@ QUDA_ODD_PARITY
Definition: enum_quda.h:284
@ QUDA_INVALID_PARITY
Definition: enum_quda.h:284
@ QUDA_TRANSFER_COARSE_KD
Definition: enum_quda.h:454
@ QUDA_TRANSFER_AGGREGATE
Definition: enum_quda.h:453
@ QUDA_TRANSFER_OPTIMIZED_KD
Definition: enum_quda.h:455
enum QudaSiteSubset_s QudaSiteSubset
enum QudaFieldLocation_s QudaFieldLocation
@ QUDA_SINGLE_PRECISION
Definition: enum_quda.h:64
@ QUDA_FLOAT2_FIELD_ORDER
Definition: enum_quda.h:348
@ QUDA_SPACE_SPIN_COLOR_FIELD_ORDER
Definition: enum_quda.h:351
@ QUDA_REFERENCE_FIELD_CREATE
Definition: enum_quda.h:363
@ QUDA_NULL_FIELD_CREATE
Definition: enum_quda.h:360
enum QudaParity_s QudaParity
#define pool_pinned_malloc(size)
Definition: malloc_quda.h:172
#define pool_device_malloc(size)
Definition: malloc_quda.h:170
#define safe_malloc(size)
Definition: malloc_quda.h:106
#define pool_pinned_free(ptr)
Definition: malloc_quda.h:173
#define pool_device_free(ptr)
Definition: malloc_quda.h:171
#define host_free(ptr)
Definition: malloc_quda.h:115
void zero(ColorSpinorField &a)
@ QUDA_PROFILE_COMPUTE
Definition: timer.h:108
void StaggeredRestrict(ColorSpinorField &out, const ColorSpinorField &in, const int *fine_to_coarse, const int *const *spin_map, int parity=QUDA_INVALID_PARITY)
Apply the unitary "restriction" operator for Kahler-Dirac preconditioning.
void Restrict(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &v, int Nvec, const int *fine_to_coarse, const int *coarse_to_fine, const int *const *spin_map, int parity=QUDA_INVALID_PARITY)
Apply the restriction operator.
void BlockOrthogonalize(ColorSpinorField &V, const std::vector< ColorSpinorField * > &B, const int *fine_to_coarse, const int *coarse_to_fine, const int *geo_bs, const int spin_bs, const int n_block_ortho)
Block orthogonnalize the matrix field, where the blocks are defined by lookup tables that map the fin...
void StaggeredProlongate(ColorSpinorField &out, const ColorSpinorField &in, const int *fine_to_coarse, const int *const *spin_map, int parity=QUDA_INVALID_PARITY)
Apply the unitary "prolongation" operator for Kahler-Dirac preconditioning.
void Prolongate(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &v, int Nvec, const int *fine_to_coarse, const int *const *spin_map, int parity=QUDA_INVALID_PARITY)
Apply the prolongation operator.
::std::string string
Definition: gtest-port.h:891
QudaGaugeParam param
Definition: pack_test.cpp:18
#define qudaMemcpy(dst, src, count, kind)
Definition: quda_api.h:204
#define qudaDeviceSynchronize()
Definition: quda_api.h:250
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5.
QudaFieldLocation location
Definition: quda.h:33
bool operator<(const Int2 &a) const
Definition: transfer.cpp:288
Int2(int x, int y)
Definition: transfer.cpp:286
#define postTrace()
Definition: tune_quda.h:643
#define printfQuda(...)
Definition: util_quda.h:114
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define warningQuda(...)
Definition: util_quda.h:132
#define errorQuda(...)
Definition: util_quda.h:120