QUDA  v1.1.0
A library for QCD on GPUs
dirac_coarse.cpp
Go to the documentation of this file.
1 #include <string.h>
2 #include <multigrid.h>
3 #include <algorithm>
4 
5 namespace quda {
6 
7  DiracCoarse::DiracCoarse(const DiracParam &param, bool gpu_setup, bool mapped) :
8  Dirac(param),
9  mass(param.mass),
10  mu(param.mu),
12  transfer(param.transfer),
13  dirac(param.dirac),
14  need_bidirectional(param.need_bidirectional),
15  use_mma(param.use_mma),
16  Y_h(nullptr),
17  X_h(nullptr),
18  Xinv_h(nullptr),
19  Yhat_h(nullptr),
20  Y_d(nullptr),
21  X_d(nullptr),
22  Xinv_d(nullptr),
23  Yhat_d(nullptr),
24  enable_gpu(false),
25  enable_cpu(false),
26  gpu_setup(gpu_setup),
27  init_gpu(gpu_setup),
28  init_cpu(!gpu_setup),
29  mapped(mapped)
30  {
31  if (gpu_setup == false) errorQuda("CPU setup of the coarse Dirac operator is disabled");
33  }
34 
36  cpuGaugeField *Yhat_h, // cpu link fields
38  cudaGaugeField *Yhat_d) // gpu link field
39  :
40  Dirac(param),
41  mass(param.mass),
42  mu(param.mu),
44  transfer(nullptr),
45  dirac(nullptr),
46  need_bidirectional(false),
47  use_mma(param.use_mma),
48  Y_h(Y_h),
49  X_h(X_h),
50  Xinv_h(Xinv_h),
51  Yhat_h(Yhat_h),
52  Y_d(Y_d),
53  X_d(X_d),
54  Xinv_d(Xinv_d),
55  Yhat_d(Yhat_d),
56  enable_gpu(Y_d ? true : false),
57  enable_cpu(Y_h ? true : false),
58  gpu_setup(true),
59  init_gpu(enable_gpu ? false : true),
60  init_cpu(enable_cpu ? false : true),
61  mapped(Y_d->MemType() == QUDA_MEMORY_MAPPED)
62  {
63 
64  }
65 
67  Dirac(param),
68  mass(param.mass),
69  mu(param.mu),
71  transfer(param.transfer),
72  dirac(param.dirac),
73  need_bidirectional(param.need_bidirectional),
74  use_mma(param.use_mma),
75  Y_h(dirac.Y_h),
76  X_h(dirac.X_h),
79  Y_d(dirac.Y_d),
80  X_d(dirac.X_d),
83  enable_gpu(dirac.enable_gpu),
84  enable_cpu(dirac.enable_cpu),
85  gpu_setup(dirac.gpu_setup),
86  init_gpu(enable_gpu ? false : true),
87  init_cpu(enable_cpu ? false : true),
88  mapped(dirac.mapped)
89  {
90  }
91 
93  {
94  if (init_cpu) {
95  if (Y_h) delete Y_h;
96  if (X_h) delete X_h;
97  if (Xinv_h) delete Xinv_h;
98  if (Yhat_h) delete Yhat_h;
99  }
100  if (init_gpu) {
101  if (Y_d) delete Y_d;
102  if (X_d) delete X_d;
103  if (Xinv_d) delete Xinv_d;
104  if (Yhat_d) delete Yhat_d;
105  }
106  }
107 
108  void DiracCoarse::createY(bool gpu, bool mapped) const
109  {
110  int ndim = transfer->Vectors().Ndim();
111  // FIXME MRHS NDIM hack
112  if (ndim == 5 && transfer->Vectors().Nspin() != 4) ndim = 4; // forced case for staggered, coarsened staggered
113  int x[QUDA_MAX_DIM];
114  const int *geo_bs = transfer->Geo_bs(); // Number of coarse sites.
115  for (int i = 0; i < ndim; i++) x[i] = transfer->Vectors().X(i)/geo_bs[i];
116  int Nc_c = transfer->nvec(); // Coarse Color
117  // Coarse Spin
118  int Ns_c = (transfer->Spin_bs() == 0) ? 2 : transfer->Vectors().Nspin() / transfer->Spin_bs();
120  memcpy(gParam.x, x, QUDA_MAX_DIM*sizeof(int));
121  gParam.nColor = Nc_c*Ns_c;
122  gParam.reconstruct = QUDA_RECONSTRUCT_NO;
124  gParam.link_type = QUDA_COARSE_LINKS;
125  gParam.t_boundary = QUDA_PERIODIC_T;
127  // use null-space precision for coarse links on gpu
129  gParam.nDim = ndim;
130  gParam.siteSubset = QUDA_FULL_SITE_SUBSET;
131  gParam.ghostExchange = QUDA_GHOST_EXCHANGE_PAD;
132  gParam.nFace = 1;
133  gParam.geometry = QUDA_COARSE_GEOMETRY;
134  if (mapped) gParam.mem_type = QUDA_MEMORY_MAPPED;
135 
136  int pad = std::max( { (x[0]*x[1]*x[2])/2, (x[1]*x[2]*x[3])/2, (x[0]*x[2]*x[3])/2, (x[0]*x[1]*x[3])/2 } );
137  gParam.pad = gpu ? gParam.nFace * pad * 2 : 0; // factor of 2 since we have to store bi-directional ghost zone
138 
139  if (gpu) Y_d = new cudaGaugeField(gParam);
140  else Y_h = new cpuGaugeField(gParam);
141 
142  gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
143  gParam.nFace = 0;
144  gParam.geometry = QUDA_SCALAR_GEOMETRY;
145  gParam.pad = 0;
146 
147  if (gpu) X_d = new cudaGaugeField(gParam);
148  else X_h = new cpuGaugeField(gParam);
149  }
150 
151  void DiracCoarse::createYhat(bool gpu) const
152  {
153  int ndim = transfer->Vectors().Ndim();
154  if (ndim == 5 && transfer->Vectors().Nspin() != 4) ndim = 4; // forced case for staggered, coarsened staggered
155  int x[QUDA_MAX_DIM];
156  const int *geo_bs = transfer->Geo_bs(); // Number of coarse sites.
157  for (int i = 0; i < ndim; i++) x[i] = transfer->Vectors().X(i)/geo_bs[i];
158  int Nc_c = transfer->nvec(); // Coarse Color
159  int Ns_c = (transfer->Spin_bs() == 0) ? 2 : transfer->Vectors().Nspin() / transfer->Spin_bs();
160 
162  memcpy(gParam.x, x, QUDA_MAX_DIM*sizeof(int));
163  gParam.nColor = Nc_c*Ns_c;
164  gParam.reconstruct = QUDA_RECONSTRUCT_NO;
166  gParam.link_type = QUDA_COARSE_LINKS;
167  gParam.t_boundary = QUDA_PERIODIC_T;
169  // use null-space precision for preconditioned links on gpu
171  gParam.nDim = ndim;
172  gParam.siteSubset = QUDA_FULL_SITE_SUBSET;
173  gParam.ghostExchange = QUDA_GHOST_EXCHANGE_PAD;
174  gParam.nFace = 1;
175  gParam.geometry = QUDA_COARSE_GEOMETRY;
176 
177  int pad = std::max( { (x[0]*x[1]*x[2])/2, (x[1]*x[2]*x[3])/2, (x[0]*x[2]*x[3])/2, (x[0]*x[1]*x[3])/2 } );
178  gParam.pad = gpu ? gParam.nFace * pad * 2 : 0; // factor of 2 since we have to store bi-directional ghost zone
179 
180  if (gpu) Yhat_d = new cudaGaugeField(gParam);
181  else Yhat_h = new cpuGaugeField(gParam);
182 
183  gParam.setPrecision(gpu ? X_d->Precision() : X_h->Precision());
184  gParam.ghostExchange = QUDA_GHOST_EXCHANGE_NO;
185  gParam.nFace = 0;
186  gParam.geometry = QUDA_SCALAR_GEOMETRY;
187  gParam.pad = 0;
188 
189  if (gpu) Xinv_d = new cudaGaugeField(gParam);
190  else Xinv_h = new cpuGaugeField(gParam);
191  }
192 
194  {
196 
197  if (!gpu_setup) {
198 
200  // save the intermediate tunecache after the UV and VUV tune
201  saveTuneCache();
202  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("About to build the preconditioned coarse clover\n");
203 
205 
206  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Finished building the preconditioned coarse clover\n");
207  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("About to create the preconditioned coarse op\n");
208 
210 
211  } else {
212 
213  // The following fancy copies reduce the number of gauge field
214  // copies (from and to QUDA_MILC_GAUGE_ORDER) by 2: one for X
215  // and one for Y, both to QUDA_MILC_GAUGE_ORDER.
216  if (use_mma && dirac->isCoarse()) {
217 
218  constexpr QudaGaugeFieldOrder gOrder = QUDA_MILC_GAUGE_ORDER;
219 
220  GaugeFieldParam Y_param(*Y_d);
221  GaugeFieldParam X_param(*X_d);
222 
223  Y_param.order = gOrder;
224  X_param.order = gOrder;
225 
226  GaugeField *Y_order = cudaGaugeField::Create(Y_param);
227  GaugeField *X_order = cudaGaugeField::Create(X_param);
228 
229  dirac->createCoarseOp(*Y_order, *X_order, *transfer, kappa, mass, Mu(), MuFactor());
230 
231  // save the intermediate tunecache after the UV and VUV tune
232  saveTuneCache();
233 
234  X_d->copy(*X_order);
235 
236  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("About to build the preconditioned coarse clover\n");
237 
239 
240  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Finished building the preconditioned coarse clover\n");
241  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("About to create the preconditioned coarse op\n");
242 
243  calculateYhat(*Yhat_d, *Xinv_d, *Y_order, *X_order, use_mma);
244 
245  Y_d->copy(*Y_order);
246 
247  // this extra exchange shouldn't be needed, but at present the
248  // copy from Y_order to Y_d doesn't preserve the
249  // bi-directional halo (in_offset isn't set in the copy
250  // routine)
252 
253  delete Y_order;
254  delete X_order;
255 
256  } else {
258 
259  // save the intermediate tunecache after the UV and VUV tune
260  saveTuneCache();
261 
262  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("About to build the preconditioned coarse clover\n");
263 
265 
266  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Finished building the preconditioned coarse clover\n");
267  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("About to create the preconditioned coarse op\n");
268 
270  }
271  }
272 
273  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Finished creating the preconditioned coarse op\n");
274 
275  // save the intermediate tunecache after the Yhat tune
276  saveTuneCache();
277 
278  if (gpu_setup) {
279  enable_gpu = true;
280  init_gpu = true;
281  } else {
282  enable_cpu = true;
283  init_cpu = true;
284  }
285  }
286 
287  // we only copy to host or device lazily on demand
289  {
290  if (!enable_cpu && !enable_gpu) errorQuda("Neither CPU or GPU coarse fields initialized");
291  switch(location) {
293  if (enable_gpu) return;
294  createY(true, mapped);
295  createYhat(true);
296  Y_d->copy(*Y_h);
297  Yhat_d->copy(*Yhat_h);
298  X_d->copy(*X_h);
299  Xinv_d->copy(*Xinv_h);
300  enable_gpu = true;
301  init_gpu = true;
302  break;
304  if (enable_cpu) return;
305  createY(false);
306  createYhat(false);
307  Y_h->copy(*Y_d);
308  Yhat_h->copy(*Yhat_d);
309  X_h->copy(*X_d);
310  Xinv_h->copy(*Xinv_d);
311  enable_cpu = true;
312  init_cpu = true;
313  break;
314  default:
315  errorQuda("Unknown location");
316  }
317  }
318 
320  calculateYhat(Yhat, Xinv, Y, X, use_mma);
321  }
322 
324  {
325  if (&in == &out) errorQuda("Fields cannot alias");
326  QudaFieldLocation location = checkLocation(out,in);
327  initializeLazy(location);
328  if (location == QUDA_CUDA_FIELD_LOCATION) {
329  ApplyCoarse(out, in, in, *Y_d, *X_d, kappa, parity, false, true, dagger, commDim);
330  } else if (location == QUDA_CPU_FIELD_LOCATION) {
331  ApplyCoarse(out, in, in, *Y_h, *X_h, kappa, parity, false, true, dagger, commDim);
332  }
333  int n = in.Nspin()*in.Ncolor();
334  flops += (8*n*n-2*n)*(long long)in.VolumeCB();
335  }
336 
338  {
339  if (&in == &out) errorQuda("Fields cannot alias");
340  QudaFieldLocation location = checkLocation(out,in);
341  initializeLazy(location);
342  if ( location == QUDA_CUDA_FIELD_LOCATION ) {
343  ApplyCoarse(out, in, in, *Y_d, *Xinv_d, kappa, parity, false, true, dagger, commDim);
344  } else if ( location == QUDA_CPU_FIELD_LOCATION ) {
345  ApplyCoarse(out, in, in, *Y_h, *Xinv_h, kappa, parity, false, true, dagger, commDim);
346  }
347  int n = in.Nspin()*in.Ncolor();
348  flops += (8*n*n-2*n)*(long long)in.VolumeCB();
349  }
350 
352  const QudaParity parity) const
353  {
354  QudaFieldLocation location = checkLocation(out,in);
355  initializeLazy(location);
356  if ( location == QUDA_CUDA_FIELD_LOCATION ) {
357  ApplyCoarse(out, in, in, *Y_d, *X_d, kappa, parity, true, false, dagger, commDim, halo_precision);
358  } else if ( location == QUDA_CPU_FIELD_LOCATION ) {
359  ApplyCoarse(out, in, in, *Y_h, *X_h, kappa, parity, true, false, dagger, commDim, halo_precision);
360  }
361  int n = in.Nspin()*in.Ncolor();
362  flops += (8*(8*n*n)-2*n)*(long long)in.VolumeCB()*in.SiteSubset();
363  }
364 
366  const QudaParity parity, const ColorSpinorField &x,
367  const double &k) const
368  {
369  if (k!=1.0) errorQuda("%s not supported for k!=1.0", __func__);
370 
371  QudaFieldLocation location = checkLocation(out,in);
372  initializeLazy(location);
373  if ( location == QUDA_CUDA_FIELD_LOCATION ) {
374  ApplyCoarse(out, in, x, *Y_d, *X_d, kappa, parity, true, true, dagger, commDim, halo_precision);
375  } else if ( location == QUDA_CPU_FIELD_LOCATION ) {
376  ApplyCoarse(out, in, x, *Y_h, *X_h, kappa, parity, true, true, dagger, commDim, halo_precision);
377  }
378  int n = in.Nspin()*in.Ncolor();
379  flops += (9*(8*n*n)-2*n)*(long long)in.VolumeCB()*in.SiteSubset();
380  }
381 
383  {
384  QudaFieldLocation location = checkLocation(out,in);
385  initializeLazy(location);
386  if ( location == QUDA_CUDA_FIELD_LOCATION ) {
387  ApplyCoarse(out, in, in, *Y_d, *X_d, kappa, QUDA_INVALID_PARITY, true, true, dagger, commDim, halo_precision);
388  } else if ( location == QUDA_CPU_FIELD_LOCATION ) {
389  ApplyCoarse(out, in, in, *Y_h, *X_h, kappa, QUDA_INVALID_PARITY, true, true, dagger, commDim, halo_precision);
390  }
391  int n = in.Nspin()*in.Ncolor();
392  flops += (9*(8*n*n)-2*n)*(long long)in.VolumeCB()*in.SiteSubset();
393  }
394 
396  {
397  bool reset1 = newTmp(&tmp1, in);
398  if (tmp1->SiteSubset() != QUDA_FULL_SITE_SUBSET) errorQuda("Temporary vector is not full-site vector");
399 
400  M(*tmp1, in);
401  Mdag(out, *tmp1);
402 
403  deleteTmp(&tmp1, reset1);
404  }
405 
408  const QudaSolutionType solType) const
409  {
410  if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) {
411  errorQuda("Preconditioned solution requires a preconditioned solve_type");
412  }
413 
414  src = &b;
415  sol = &x;
416  }
417 
419  const QudaSolutionType solType) const
420  {
421  /* do nothing */
422  }
423 
424  //Make the coarse operator one level down. Pass both the coarse gauge field and coarse clover field.
425  void DiracCoarse::createCoarseOp(GaugeField &Y, GaugeField &X, const Transfer &T, double kappa, double mass, double mu, double mu_factor) const
426  {
428  errorQuda("Coarse operators only support aggregation coarsening");
429 
430  double a = 2.0 * kappa * mu * T.Vectors().TwistFlavor();
431  if (checkLocation(Y, X) == QUDA_CPU_FIELD_LOCATION) {
433  CoarseCoarseOp(Y, X, T, *(this->Y_h), *(this->X_h), *(this->Xinv_h), kappa, mass, a, mu_factor, QUDA_COARSE_DIRAC,
435  } else {
437  CoarseCoarseOp(Y, X, T, *(this->Y_d), *(this->X_d), *(this->Xinv_d), kappa, mass, a, mu_factor, QUDA_COARSE_DIRAC,
439  }
440  }
441 
443  {
444  Dirac::prefetch(mem_space, stream);
445  if (Y_d) Y_d->prefetch(mem_space, stream);
446  if (X_d) X_d->prefetch(mem_space, stream);
447  }
448 
449  DiracCoarsePC::DiracCoarsePC(const DiracParam &param, bool gpu_setup) : DiracCoarse(param, gpu_setup)
450  {
451  /* do nothing */
452  }
453 
455  {
456  /* do nothing */
457  }
458 
460 
462  {
463  QudaFieldLocation location = checkLocation(out,in);
464  initializeLazy(location);
465  if ( location == QUDA_CUDA_FIELD_LOCATION) {
466  ApplyCoarse(out, in, in, *Yhat_d, *X_d, kappa, parity, true, false, dagger, commDim, halo_precision);
467  } else if ( location == QUDA_CPU_FIELD_LOCATION ) {
468  ApplyCoarse(out, in, in, *Yhat_h, *X_h, kappa, parity, true, false, dagger, commDim, halo_precision);
469  }
470 
471  int n = in.Nspin()*in.Ncolor();
472  flops += (8*(8*n*n)-2*n)*in.VolumeCB()*in.SiteSubset();
473  }
474 
476  const ColorSpinorField &x, const double &k) const
477  {
478  // emulated for now
479  Dslash(out, in, parity);
480  blas::xpay(const_cast<ColorSpinorField&>(x), k, out);
481 
482  int n = in.Nspin()*in.Ncolor();
483  flops += (8*(8*n*n)-2*n)*in.VolumeCB(); // blas flops counted separately so only need to count dslash flops
484  }
485 
487  {
488  bool reset1 = newTmp(&tmp1, in);
489 
492  errorQuda("Cannot apply preconditioned operator to full field (subsets = %d %d %d)",
493  in.SiteSubset(), out.SiteSubset(), tmp1->SiteSubset());
494 
496  // DiracCoarsePC::Dslash applies A^{-1}Dslash
497  Dslash(*tmp1, in, QUDA_ODD_PARITY);
498  // DiracCoarse::DslashXpay applies (A - D) // FIXME this ignores the -1
501  blas::xpay(*tmp1, -1.0, out);
503  // DiracCoarsePC::Dslash applies A^{-1}Dslash
505  // DiracCoarse::DslashXpay applies (A - D) // FIXME this ignores the -1
507  Clover(*tmp1, in, QUDA_ODD_PARITY);
508  blas::xpay(*tmp1, -1.0, out);
509  } else if (matpcType == QUDA_MATPC_EVEN_EVEN) {
510  Dslash(*tmp1, in, QUDA_ODD_PARITY);
511  DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, -1.0);
512  } else if (matpcType == QUDA_MATPC_ODD_ODD) {
514  DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, -1.0);
515  } else {
516  errorQuda("MatPCType %d not valid for DiracCoarsePC", matpcType);
517  }
518 
519  deleteTmp(&tmp1, reset1);
520  }
521 
523  {
524  bool reset1 = newTmp(&tmp2, in);
525  M(*tmp2, in);
526  Mdag(out, *tmp2);
527  deleteTmp(&tmp2, reset1);
528  }
529 
531  const QudaSolutionType solType) const
532  {
533  // we desire solution to preconditioned system
534  if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) {
535  src = &b;
536  sol = &x;
537  return;
538  }
539 
540  bool reset = newTmp(&tmp1, b.Even());
541 
542  // we desire solution to full system
544  // src = A_ee^-1 (b_e - D_eo A_oo^-1 b_o)
545  src = &(x.Odd());
546 #if 0
547  CloverInv(*src, b.Odd(), QUDA_ODD_PARITY);
549  blas::xpay(const_cast<ColorSpinorField&>(b.Even()), -1.0, *tmp1);
551 #endif
552  // src = A_ee^{-1} b_e - (A_ee^{-1} D_eo) A_oo^{-1} b_o
553  CloverInv(*src, b.Odd(), QUDA_ODD_PARITY);
554  Dslash(*tmp1, *src, QUDA_EVEN_PARITY);
555  CloverInv(*src, b.Even(), QUDA_EVEN_PARITY);
556  blas::axpy(-1.0, *tmp1, *src);
557 
558  sol = &(x.Even());
559  } else if (matpcType == QUDA_MATPC_ODD_ODD) {
560  // src = A_oo^-1 (b_o - D_oe A_ee^-1 b_e)
561  src = &(x.Even());
562 #if 0
563  CloverInv(*src, b.Even(), QUDA_EVEN_PARITY);
565  blas::xpay(const_cast<ColorSpinorField&>(b.Odd()), -1.0, *tmp1);
566  CloverInv(*src, *tmp1, QUDA_ODD_PARITY);
567 #endif
568  // src = A_oo^{-1} b_o - (A_oo^{-1} D_oe) A_ee^{-1} b_e
569  CloverInv(*src, b.Even(), QUDA_EVEN_PARITY);
570  Dslash(*tmp1, *src, QUDA_ODD_PARITY);
571  CloverInv(*src, b.Odd(), QUDA_ODD_PARITY);
572  blas::axpy(-1.0, *tmp1, *src);
573 
574  sol = &(x.Odd());
576  // src = b_e - D_eo A_oo^-1 b_o
577  src = &(x.Odd());
580  blas::xpay(const_cast<ColorSpinorField&>(b.Even()), -1.0, *src);
581  sol = &(x.Even());
583  // src = b_o - D_oe A_ee^-1 b_e
584  src = &(x.Even());
587  blas::xpay(const_cast<ColorSpinorField&>(b.Odd()), -1.0, *src);
588  sol = &(x.Odd());
589  } else {
590  errorQuda("MatPCType %d not valid for DiracCloverPC", matpcType);
591  }
592 
593  // here we use final solution to store parity solution and parity source
594  // b is now up for grabs if we want
595 
596  deleteTmp(&tmp1, reset);
597  }
598 
600  {
601  if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) {
602  return;
603  }
604 
605  checkFullSpinor(x, b);
606 
607  bool reset = newTmp(&tmp1, b.Even());
608 
609  // create full solution
610 
613 #if 0
614  // x_o = A_oo^-1 (b_o - D_oe x_e)
616  blas::xpay(const_cast<ColorSpinorField&>(b.Odd()), -1.0, *tmp1);
618 #endif
619  // x_o = A_oo^{-1} b_o - (A_oo^{-1} D_oe) x_e
621  CloverInv(x.Odd(), b.Odd(), QUDA_ODD_PARITY);
622  blas::axpy(-1.0, const_cast<ColorSpinorField &>(*tmp1), x.Odd());
623 
624  } else if (matpcType == QUDA_MATPC_ODD_ODD ||
626 #if 0
627  // x_e = A_ee^-1 (b_e - D_eo x_o)
629  blas::xpay(const_cast<ColorSpinorField&>(b.Even()), -1.0, *tmp1);
631 #endif
632  // x_e = A_ee^{-1} b_e - (A_ee^{-1} D_eo) x_o
634  CloverInv(x.Even(), b.Even(), QUDA_EVEN_PARITY);
635  blas::axpy(-1.0, const_cast<ColorSpinorField &>(*tmp1), x.Even());
636 
637  } else {
638  errorQuda("MatPCType %d not valid for DiracCoarsePC", matpcType);
639  }
640 
641  deleteTmp(&tmp1, reset);
642  }
643 
644  //Make the coarse operator one level down. For the preconditioned
645  //operator we are coarsening the Yhat links, not the Y links. We
646  //pass the fine clover fields, though they are actually ignored.
647  void DiracCoarsePC::createCoarseOp(GaugeField &Y, GaugeField &X, const Transfer &T, double kappa, double mass, double mu, double mu_factor) const
648  {
650  errorQuda("Coarse operators only support aggregation coarsening");
651 
652  double a = -2.0 * kappa * mu * T.Vectors().TwistFlavor();
653  if (checkLocation(Y, X) == QUDA_CPU_FIELD_LOCATION) {
655  CoarseCoarseOp(Y, X, T, *(this->Yhat_h), *(this->X_h), *(this->Xinv_h), kappa, mass, a, -mu_factor,
657  } else {
659  CoarseCoarseOp(Y, X, T, *(this->Yhat_d), *(this->X_d), *(this->Xinv_d), kappa, mass, a, -mu_factor,
661  }
662  }
663 
665  {
666  Dirac::prefetch(mem_space, stream);
667  if (Xinv_d) Xinv_d->prefetch(mem_space, stream);
668  if (Yhat_d) Yhat_d->prefetch(mem_space, stream);
669  }
670 }
const ColorSpinorField & Odd() const
QudaTwistFlavorType TwistFlavor() const
QudaSiteSubset SiteSubset() const
const ColorSpinorField & Even() const
const int * X() const
cudaGaugeField * Yhat_d
Definition: dirac_quda.h:1578
void Clover(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply the coarse clover operator.
double MuFactor() const
accessor for mu factoo for MG/ – override can return a better value
Definition: dirac_quda.h:1616
virtual ~DiracCoarse()
virtual void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply DslashXpay out = (D * in)
void createPreconditionedCoarseOp(GaugeField &Yhat, GaugeField &Xinv, const GaugeField &Y, const GaugeField &X)
Create the precondtioned coarse operator.
cpuGaugeField * Yhat_h
Definition: dirac_quda.h:1573
void createYhat(bool gpu=true) const
Allocate the Yhat and Xinv fields.
const bool mapped
Definition: dirac_quda.h:1598
cpuGaugeField * Xinv_h
Definition: dirac_quda.h:1572
virtual void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType) const
cpuGaugeField * Y_h
Definition: dirac_quda.h:1570
const bool gpu_setup
Definition: dirac_quda.h:1595
virtual void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType) const
const Transfer * transfer
Definition: dirac_quda.h:1565
cudaGaugeField * Xinv_d
Definition: dirac_quda.h:1577
void createY(bool gpu=true, bool mapped=false) const
Allocate the Y and X fields.
double Mu() const
accessor for twist parameter – overrride can return better value
Definition: dirac_quda.h:1615
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply the full operator.
void initializeLazy(QudaFieldLocation location) const
Create the CPU or GPU coarse gauge fields on demand (requires that the fields have been created in th...
const bool use_mma
Definition: dirac_quda.h:1568
cpuGaugeField * X_h
Definition: dirac_quda.h:1571
virtual void prefetch(QudaFieldLocation mem_space, qudaStream_t stream=0) const
If managed memory and prefetch is enabled, prefetch all relevant memory fields (X,...
void CloverInv(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply the inverse coarse clover operator.
const Dirac * dirac
Definition: dirac_quda.h:1566
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
Apply MdagM operator which may be optimized.
cudaGaugeField * X_d
Definition: dirac_quda.h:1576
void createCoarseOp(GaugeField &Y, GaugeField &X, const Transfer &T, double kappa, double mass, double mu, double mu_factor=0.) const
Create the coarse operator from this coarse operator.
const bool need_bidirectional
Definition: dirac_quda.h:1567
virtual void DslashXpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const ColorSpinorField &x, const double &k) const
Apply DslashXpay out = (D * in + A * x)
cudaGaugeField * Y_d
Definition: dirac_quda.h:1575
void initializeCoarse()
Initialize the coarse gauge fields. Location is determined by gpu_setup variable.
DiracCoarse(const DiracParam &param, bool gpu_setup=true, bool mapped=false)
Definition: dirac_coarse.cpp:7
void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply the full operator.
void DslashXpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const ColorSpinorField &x, const double &k) const
Apply DslashXpay out = (D * in + A * x)
DiracCoarsePC(const DiracParam &param, bool gpu_setup=true)
void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType) const
void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply DslashXpay out = (D * in)
virtual void prefetch(QudaFieldLocation mem_space, qudaStream_t stream=0) const
If managed memory and prefetch is enabled, prefetch all relevant memory fields (Xhat,...
void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
Apply MdagM operator which may be optimized.
void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType) const
void createCoarseOp(GaugeField &Y, GaugeField &X, const Transfer &T, double kappa, double mass, double mu, double mu_factor=0.) const
Create the coarse even-odd preconditioned coarse operator. Unlike the Wilson operator,...
unsigned long long flops
Definition: dirac_quda.h:150
bool newTmp(ColorSpinorField **, const ColorSpinorField &) const
Definition: dirac.cpp:72
double kappa
Definition: dirac_quda.h:145
virtual bool isCoarse() const
Whether the Dirac object is the DiracCoarse.
Definition: dirac_quda.h:181
virtual void prefetch(QudaFieldLocation mem_space, qudaStream_t stream=0) const
If managed memory and prefetch is enabled, prefetch the gauge field and temporary spinors to the CPU ...
Definition: dirac.cpp:305
QudaPrecision halo_precision
Definition: dirac_quda.h:154
QudaMatPCType matpcType
Definition: dirac_quda.h:148
void deleteTmp(ColorSpinorField **, const bool &reset) const
Definition: dirac.cpp:83
ColorSpinorField * tmp1
Definition: dirac_quda.h:151
virtual void createCoarseOp(GaugeField &Y, GaugeField &X, const Transfer &T, double kappa, double mass=0., double mu=0., double mu_factor=0.) const
Create the coarse operator (virtual parent)
Definition: dirac_quda.h:377
QudaDagType dagger
Definition: dirac_quda.h:149
virtual void checkFullSpinor(const ColorSpinorField &, const ColorSpinorField &) const
check full spinors are compatible (check geometry ?)
Definition: dirac.cpp:138
ColorSpinorField * tmp2
Definition: dirac_quda.h:152
int commDim[QUDA_MAX_DIM]
Definition: dirac_quda.h:159
void Mdag(ColorSpinorField &out, const ColorSpinorField &in) const
Apply Mdag (daggered operator of M.
Definition: dirac.cpp:92
static GaugeField * Create(const GaugeFieldParam &param)
Create the gauge field, with meta data specified in the parameter struct.
QudaPrecision Precision() const
int nvec() const
Definition: transfer.h:222
const int * Geo_bs() const
Definition: transfer.h:234
QudaPrecision NullPrecision(QudaFieldLocation location) const
The precision of the packed null-space vectors.
Definition: transfer.h:199
QudaTransferType getTransferType() const
Definition: transfer.h:240
int Spin_bs() const
Definition: transfer.h:228
const ColorSpinorField & Vectors(QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION) const
Definition: transfer.h:209
void copy(const GaugeField &src)
void copy(const GaugeField &src)
void prefetch(QudaFieldLocation mem_space, qudaStream_t stream=0) const
If managed memory and prefetch is enabled, prefetch the gauge field and buffers to the CPU or the GPU...
void exchangeGhost(QudaLinkDirection link_direction=QUDA_LINK_BACKWARDS)
Exchange the ghost and store store in the padded region.
double kappa
double mass
double mu
quda::mgarray< double > mu_factor
GaugeCovDev * dirac
Definition: covdev_test.cpp:42
QudaParity parity
Definition: covdev_test.cpp:40
@ QUDA_COARSEPC_DIRAC
Definition: enum_quda.h:316
@ QUDA_COARSE_DIRAC
Definition: enum_quda.h:315
@ QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:326
@ QUDA_CPU_FIELD_LOCATION
Definition: enum_quda.h:325
@ QUDA_VERBOSE
Definition: enum_quda.h:267
@ QUDA_FULL_SITE_SUBSET
Definition: enum_quda.h:333
@ QUDA_LINK_BIDIRECTIONAL
Definition: enum_quda.h:497
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
@ QUDA_RECONSTRUCT_NO
Definition: enum_quda.h:70
@ QUDA_PERIODIC_T
Definition: enum_quda.h:57
@ QUDA_EVEN_PARITY
Definition: enum_quda.h:284
@ QUDA_ODD_PARITY
Definition: enum_quda.h:284
@ QUDA_INVALID_PARITY
Definition: enum_quda.h:284
@ QUDA_MEMORY_MAPPED
Definition: enum_quda.h:15
@ QUDA_SCALAR_GEOMETRY
Definition: enum_quda.h:500
@ QUDA_COARSE_GEOMETRY
Definition: enum_quda.h:503
@ QUDA_TRANSFER_AGGREGATE
Definition: enum_quda.h:453
enum QudaSolutionType_s QudaSolutionType
enum QudaFieldLocation_s QudaFieldLocation
@ QUDA_GHOST_EXCHANGE_NO
Definition: enum_quda.h:508
@ QUDA_GHOST_EXCHANGE_PAD
Definition: enum_quda.h:509
@ QUDA_MATPC_ODD_ODD_ASYMMETRIC
Definition: enum_quda.h:219
@ QUDA_MATPC_EVEN_EVEN_ASYMMETRIC
Definition: enum_quda.h:218
@ QUDA_MATPC_ODD_ODD
Definition: enum_quda.h:217
@ QUDA_MATPC_EVEN_EVEN
Definition: enum_quda.h:216
@ QUDA_MATPC_INVALID
Definition: enum_quda.h:220
@ QUDA_MATPC_SOLUTION
Definition: enum_quda.h:159
@ QUDA_MATPCDAG_MATPC_SOLUTION
Definition: enum_quda.h:161
@ QUDA_FLOAT2_GAUGE_ORDER
Definition: enum_quda.h:40
@ QUDA_QDP_GAUGE_ORDER
Definition: enum_quda.h:44
@ QUDA_MILC_GAUGE_ORDER
Definition: enum_quda.h:47
@ QUDA_ZERO_FIELD_CREATE
Definition: enum_quda.h:361
enum QudaParity_s QudaParity
@ QUDA_COARSE_LINKS
Definition: enum_quda.h:28
GaugeFieldParam gParam
#define checkLocation(...)
cudaGaugeField * Xinv_d
cpuGaugeField * Xinv_h
cudaGaugeField * X_d
cudaGaugeField * Y_d
cpuGaugeField * X_h
cpuGaugeField * Yhat_h
cpuGaugeField * Y_h
cudaGaugeField * Yhat_d
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
Definition: blas_quda.h:45
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:43
void saveTuneCache(bool error=false)
Definition: tune.cpp:439
void calculateYhat(GaugeField &Yhat, GaugeField &Xinv, const GaugeField &Y, const GaugeField &X, bool use_mma=false)
Calculate preconditioned coarse links and coarse clover inverse field.
void CoarseCoarseOp(GaugeField &Y, GaugeField &X, const Transfer &T, const GaugeField &gauge, const GaugeField &clover, const GaugeField &cloverInv, double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc, bool need_bidirectional, bool use_mma=false)
Coarse operator construction from an intermediate-grid operator (Coarse)
void ApplyCoarse(ColorSpinorField &out, const ColorSpinorField &inA, const ColorSpinorField &inB, const GaugeField &Y, const GaugeField &X, double kappa, int parity=QUDA_INVALID_PARITY, bool dslash=true, bool clover=true, bool dagger=false, const int *commDim=0, QudaPrecision halo_precision=QUDA_INVALID_PRECISION)
Apply the coarse dslash stencil. This single driver accounts for all variations with and without the ...
qudaStream_t * stream
QudaGaugeParam param
Definition: pack_test.cpp:18
cudaStream_t qudaStream_t
Definition: quda_api.h:9
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5.
QudaGaugeFieldOrder order
Definition: gauge_field.h:51
#define printfQuda(...)
Definition: util_quda.h:114
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define errorQuda(...)
Definition: util_quda.h:120