QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
multigrid.cpp
Go to the documentation of this file.
1 #include <multigrid.h>
2 #include <qio_field.h>
3 #include <string.h>
4 
5 #include <eigensolve_quda.h>
6 
7 namespace quda
8 {
9 
10  using namespace blas;
11 
12  static bool debug = false;
13 
14  MG::MG(MGParam &param, TimeProfile &profile_global) :
15  Solver(param, profile),
16  param(param),
17  transfer(0),
18  resetTransfer(false),
19  presmoother(nullptr),
20  postsmoother(nullptr),
21  profile_global(profile_global),
22  profile("MG level " + std::to_string(param.level), false),
23  coarse(nullptr),
24  coarse_solver(nullptr),
25  param_coarse(nullptr),
26  param_presmooth(nullptr),
27  param_postsmooth(nullptr),
28  param_coarse_solver(nullptr),
29  r(nullptr),
30  r_coarse(nullptr),
31  x_coarse(nullptr),
32  tmp_coarse(nullptr),
33  tmp2_coarse(nullptr),
34  diracResidual(param.matResidual->Expose()),
35  diracSmoother(param.matSmooth->Expose()),
36  diracSmootherSloppy(param.matSmoothSloppy->Expose()),
37  diracCoarseResidual(nullptr),
38  diracCoarseSmoother(nullptr),
39  diracCoarseSmootherSloppy(nullptr),
40  matCoarseResidual(nullptr),
41  matCoarseSmoother(nullptr),
42  matCoarseSmootherSloppy(nullptr),
43  rng(nullptr)
44  {
45  sprintf(prefix, "MG level %d (%s): ", param.level, param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
46  pushLevel(param.level);
47 
48  if (param.level >= QUDA_MAX_MG_LEVEL)
49  errorQuda("Level=%d is greater than limit of multigrid recursion depth", param.level);
50 
52  errorQuda("Cannot use preconditioned coarse grid solution without preconditioned smoother solve");
53 
54  // allocating vectors
55  {
56  // create residual vectors
57  ColorSpinorParam csParam(*(param.B[0]));
59  csParam.location = param.location;
61  csParam.location == QUDA_CUDA_FIELD_LOCATION ? true : false);
62  if (csParam.location==QUDA_CUDA_FIELD_LOCATION) {
64  }
65  if (param.B[0]->Nspin() == 1) csParam.gammaBasis = param.B[0]->GammaBasis(); // hack for staggered to avoid unnecessary basis checks
66  r = ColorSpinorField::Create(csParam);
67 
68  // if we're using preconditioning then allocate storage for the preconditioned source vector
70  csParam.x[0] /= 2;
73  }
74  }
75 
76  rng = new RNG(*param.B[0], 1234);
77  rng->Init();
78 
79  if (param.level < param.Nlevel-1) {
81  if (param.mg_global.generate_all_levels == QUDA_BOOLEAN_TRUE || param.level == 0) {
82 
83  // Initializing to random vectors
84  for(int i=0; i<(int)param.B.size(); i++) {
86  *param.B[i] = *r;
87  param.evals.push_back(0.0);
88  }
89  }
90  if (param.mg_global.num_setup_iter[param.level] > 0) {
91  if (strcmp(param.mg_global.vec_infile[param.level], "")
92  != 0) { // only load if infile is defined and not computing
93  loadVectors(param.B);
94  } else if (param.mg_global.use_eig_solver[param.level]) {
95  generateEigenVectors(); // Run the eigensolver
96  } else {
97  generateNullVectors(param.B);
98  }
99  }
100  } else if (strcmp(param.mg_global.vec_infile[param.level], "")
101  != 0) { // only load if infile is defined and not computing
102  if ( param.mg_global.num_setup_iter[param.level] > 0 ) generateNullVectors(param.B);
103  } else if (param.mg_global.vec_load[param.level] == QUDA_BOOLEAN_TRUE) { // only conditional load of null vectors
104 
105  loadVectors(param.B);
106  } else { // generate free field vectors
107  buildFreeVectors(param.B);
108  }
109  }
110 
111  // in case of iterative setup with MG the coarse level may be already built
112  if (!transfer) reset();
113 
114  popLevel(param.level);
115  }
116 
117  void MG::reset(bool refresh) {
119 
120  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("%s level %d\n", transfer ? "Resetting" : "Creating", param.level);
121 
122  destroySmoother();
124 
125  // reset the Dirac operator pointers since these may have changed
129 
130  // Refresh the null-space vectors if we need to
131  if (refresh && param.level < param.Nlevel-1) {
133  }
134 
135  // if not on the coarsest level, update next
136  if (param.level < param.Nlevel-1) {
137 
138  if (transfer) {
139  // restoring FULL parity in Transfer changed at the end of this procedure
141  if (resetTransfer || refresh) {
142  transfer->reset();
143  resetTransfer = false;
144  }
145  } else {
146  // create transfer operator
147  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Creating transfer operator\n");
151 
152  // create coarse temporary vector
154 
155  // create coarse temporary vector
158 
159  // create coarse residual vector
161 
162  // create coarse solution vector
164 
165  B_coarse = new std::vector<ColorSpinorField*>();
166  int nVec_coarse = std::max(param.Nvec, param.mg_global.n_vec[param.level + 1]);
167  B_coarse->resize(nVec_coarse);
168 
169  // only have single precision B vectors on the coarse grid
170  QudaPrecision B_coarse_precision = std::max(param.mg_global.precision_null[param.level+1], QUDA_SINGLE_PRECISION);
171  for (int i=0; i<nVec_coarse; i++)
172  (*B_coarse)[i] = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, B_coarse_precision, param.mg_global.setup_location[param.level+1]);
173 
174  // if we're not generating on all levels then we need to propagate the vectors down
176  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Restricting null space vectors\n");
177  for (int i=0; i<param.Nvec; i++) {
178  zero(*(*B_coarse)[i]);
179  transfer->R(*(*B_coarse)[i], *(param.B[i]));
180  }
181  }
182  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Transfer operator done\n");
183  }
184 
186  }
187 
188  // delay allocating smoother until after coarse-links have been created
189  createSmoother();
190 
191  if (param.level < param.Nlevel-1) {
192  // creating or resetting the coarse level
193  if (coarse) {
195  coarse->param.delta = 1e-20;
200  coarse->reset(refresh);
201  } else {
202  // create the next multigrid level
205  param_coarse->fine = this;
206  param_coarse->delta = 1e-20;
208 
210  }
211  setOutputPrefix(prefix); // restore since we just popped back from coarse grid
212 
214 
216  // if we are deflating the coarsest grid, then run a dummy solve
217  // so that the deflation is done during the setup
219  param_coarse_solver->maxiter = 1; // do a single iteration on the dummy solve
220  (*coarse_solver)(*x_coarse, *r_coarse);
222  }
223  }
224 
225  if (param.level == 0) {
226  // now we can run the verification if requested
228  }
229 
230  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Setup of level %d done\n", param.level);
231 
233  }
234 
235  void MG::pushLevel(int level) const
236  {
237  postTrace();
240  }
241 
242  void MG::popLevel(int level) const
243  {
244  popVerbosity();
245  popOutputPrefix();
246  postTrace();
247  }
248 
250  {
252 
253  if (presmoother) {
254  delete presmoother;
255  presmoother = nullptr;
256  }
257 
258  if (param_presmooth) {
259  delete param_presmooth;
260  param_presmooth = nullptr;
261  }
262 
263  if (postsmoother) {
264  delete postsmoother;
265  postsmoother = nullptr;
266  }
267 
268  if (param_postsmooth) {
269  delete param_postsmooth;
270  param_postsmooth = nullptr;
271  }
272 
274  }
275 
278 
279  // create the smoother for this level
280  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Creating smoother\n");
281  destroySmoother();
283 
286  param_presmooth->return_residual = true; // pre-smoother returns the residual vector for subsequent coarsening
288 
292 
298 
303 
304  param_presmooth->sloppy_converge = true; // this means we don't check the true residual before declaring convergence
305 
307  // inner solver should recompute the true residual after each cycle if using Schwarz preconditioning
309 
313 
314  if (param.level < param.Nlevel-1) { //Create the post smoother
316  param_postsmooth->return_residual = false; // post smoother does not need to return the residual vector
318 
322 
323  // we never need to compute the true residual for a post smoother
325 
328  }
329  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Smoother done\n");
330 
332  }
333 
336 
337  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Creating coarse Dirac operator\n");
338  // check if we are coarsening the preconditioned system then
341 
342  // create coarse grid operator
343  DiracParam diracParam;
344  diracParam.transfer = transfer;
345 
346  diracParam.dirac = preconditioned_coarsen ? const_cast<Dirac*>(diracSmoother) : const_cast<Dirac*>(diracResidual);
347  diracParam.kappa = diracParam.dirac->Kappa();
348  diracParam.mu = diracParam.dirac->Mu();
350 
351  // Need to figure out if we need to force bi-directional build. If any previous level (incl this one) was
352  // preconditioned, we have to force bi-directional builds.
354  for (int i = 0; i <= param.level; i++) {
358  }
359  }
360 
361  diracParam.dagger = QUDA_DAG_NO;
362  diracParam.matpcType = matpc_type;
363  diracParam.type = QUDA_COARSE_DIRAC;
364  diracParam.tmp1 = tmp_coarse;
365  diracParam.tmp2 = tmp2_coarse;
367  constexpr int MAX_BLOCK_FLOAT_NC=32; // FIXME this is the maximum number of colors for which we support block-float format
368  if (param.Nvec > MAX_BLOCK_FLOAT_NC) diracParam.halo_precision = QUDA_SINGLE_PRECISION;
369 
370  // use even-odd preconditioning for the coarse grid solver
374 
375  // create smoothing operators
376  diracParam.dirac = const_cast<Dirac*>(param.matSmooth->Expose());
378 
382  diracParam.type = QUDA_COARSEPC_DIRAC;
383  diracParam.tmp1 = &(tmp_coarse->Even());
384  diracParam.tmp2 = &(tmp2_coarse->Even());
385  diracCoarseSmoother = new DiracCoarsePC(static_cast<DiracCoarse&>(*diracCoarseResidual), diracParam);
386  {
388  for (int i=0; i<4; i++) diracParam.commDim[i] = schwarz ? 0 : 1;
389  }
390  diracCoarseSmootherSloppy = new DiracCoarsePC(static_cast<DiracCoarse&>(*diracCoarseSmoother),diracParam);
391  } else {
392  diracParam.type = QUDA_COARSE_DIRAC;
393  diracParam.tmp1 = tmp_coarse;
394  diracParam.tmp2 = tmp2_coarse;
395  diracCoarseSmoother = new DiracCoarse(static_cast<DiracCoarse&>(*diracCoarseResidual), diracParam);
396  {
398  for (int i=0; i<4; i++) diracParam.commDim[i] = schwarz ? 0 : 1;
399  }
400  diracCoarseSmootherSloppy = new DiracCoarse(static_cast<DiracCoarse&>(*diracCoarseSmoother),diracParam);
401  }
402 
409 
410  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Coarse Dirac operator done\n");
411 
413  }
414 
417 
419  // nothing to do
421  if (coarse_solver) {
422  delete coarse_solver;
423  coarse_solver = nullptr;
424  }
425  if (param_coarse_solver) {
426  delete param_coarse_solver;
427  param_coarse_solver = nullptr;
428  }
429  } else {
430  errorQuda("Multigrid cycle type %d not supported", param.cycle_type);
431  }
432 
434  }
435 
438 
439  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Creating coarse solver wrapper\n");
442  // if coarse solver is not a bottom solver and on the second to bottom level then we can just use the coarse solver as is
444  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Assigned coarse solver to coarse MG operator\n");
446 
450  param_coarse_solver->sloppy_converge = true; // this means we don't check the true residual before declaring convergence
451 
453  param_coarse_solver->return_residual = false; // coarse solver does need to return residual vector
454 
456  // Coarse level deflation is triggered if the eig param structure exists
457  // on the coarsest level, and we are on the next to coarsest level.
461  // Due to coherence between these levels, an initial guess
462  // might be beneficial.
465  }
466 
467  // Deflation on the coarse is supported for 6 solvers only
472  errorQuda("Coarse grid deflation not supported with coarse solver %d", param_coarse_solver->inv_type);
473  }
474 
475  if (strcmp(param_coarse_solver->eig_param.vec_infile, "") == 0 && // check that input file not already set
477  && (strcmp(param.mg_global.vec_infile[param.level + 1], "") != 0)) {
478  std::string vec_infile(param.mg_global.vec_infile[param.level + 1]);
479  vec_infile += "_level_";
480  vec_infile += std::to_string(param.level + 1);
481  vec_infile += "_defl_";
482  vec_infile += std::to_string(param.mg_global.n_vec[param.level + 1]);
483  strcpy(param_coarse_solver->eig_param.vec_infile, vec_infile.c_str());
484  }
485 
486  if (strcmp(param_coarse_solver->eig_param.vec_outfile, "") == 0 && // check that output file not already set
488  && (strcmp(param.mg_global.vec_outfile[param.level + 1], "") != 0)) {
489  std::string vec_outfile(param.mg_global.vec_outfile[param.level + 1]);
490  vec_outfile += "_level_";
491  vec_outfile += std::to_string(param.level + 1);
492  vec_outfile += "_defl_";
493  vec_outfile += std::to_string(param.mg_global.n_vec[param.level + 1]);
494  strcpy(param_coarse_solver->eig_param.vec_outfile, vec_outfile.c_str());
495  }
496  }
497 
501  param_coarse_solver->delta = 1e-8;
503 
514  }
519 
520  // preconditioned solver wrapper is uniform precision
524 
527  sprintf(coarse_prefix, "MG level %d (%s): ", param.level + 1,
528  param.mg_global.location[param.level + 1] == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
530  } else {
532  sprintf(coarse_prefix, "MG level %d (%s): ", param.level + 1,
533  param.mg_global.location[param.level + 1] == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
535  }
536 
537  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Assigned coarse solver to preconditioned GCR solver\n");
538  } else {
539  errorQuda("Multigrid cycle type %d not supported", param.cycle_type);
540  }
541  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Coarse solver wrapper done\n");
542 
544  }
545 
547  {
549 
550  if (param.level < param.Nlevel - 1) {
551  if (coarse) delete coarse;
553  if (coarse_solver) delete coarse_solver;
555  }
556 
557  if (B_coarse) {
558  int nVec_coarse = std::max(param.Nvec, param.mg_global.n_vec[param.level + 1]);
559  for (int i = 0; i < nVec_coarse; i++)
560  if ((*B_coarse)[i]) delete (*B_coarse)[i];
561  delete B_coarse;
562  }
563  if (transfer) delete transfer;
570  if (postsmoother) delete postsmoother;
572  }
573 
574  if (rng) {
575  rng->Release();
576  delete rng;
577  }
578 
579  if (presmoother) delete presmoother;
580  if (param_presmooth) delete param_presmooth;
581 
583  if (r) delete r;
584  if (r_coarse) delete r_coarse;
585  if (x_coarse) delete x_coarse;
586  if (tmp_coarse) delete tmp_coarse;
587  if (tmp2_coarse) delete tmp2_coarse;
588 
589  if (param_coarse) delete param_coarse;
590 
592 
594  }
595 
596  // FIXME need to make this more robust (implement Solver::flops() for all solvers)
597  double MG::flops() const {
598  double flops = 0;
599 
600  if (param_coarse_solver) {
601  flops += param_coarse_solver->gflops * 1e9;
603  } else if (param.level < param.Nlevel-1) {
604  flops += coarse->flops();
605  }
606 
607  if (param_presmooth) {
608  flops += param_presmooth->gflops * 1e9;
609  param_presmooth->gflops = 0;
610  }
611 
612  if (param_postsmooth) {
613  flops += param_postsmooth->gflops * 1e9;
615  }
616 
617  if (transfer) {
618  flops += transfer->flops();
619  }
620 
621  return flops;
622  }
623 
627  void MG::verify() {
629 
630  // temporary fields used for verification
632  csParam.create = QUDA_NULL_FIELD_CREATE;
635  double deviation;
636 
639  csParam.Precision();
640 
641  // may want to revisit this---these were relaxed for cases where ghost_precision < precision
642  // these were set while hacking in tests of quarter precision ghosts
643  double tol = (prec == QUDA_QUARTER_PRECISION || prec == QUDA_HALF_PRECISION) ? 5e-2 : prec == QUDA_SINGLE_PRECISION ? 1e-3 : 1e-8;
644 
645  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Checking 0 = (1 - P P^\\dagger) v_k for %d vectors\n", param.Nvec);
646 
647  for (int i=0; i<param.Nvec; i++) {
648  // as well as copying to the correct location this also changes basis if necessary
649  *tmp1 = *param.B[i];
650 
651  transfer->R(*r_coarse, *tmp1);
652  transfer->P(*tmp2, *r_coarse);
653  deviation = sqrt(xmyNorm(*tmp1, *tmp2) / norm2(*tmp1));
654 
655  if (getVerbosity() >= QUDA_VERBOSE)
656  printfQuda("Vector %d: norms v_k = %e P^\\dagger v_k = %e P P^\\dagger v_k = %e, L2 relative deviation = %e\n",
657  i, norm2(*tmp1), norm2(*r_coarse), norm2(*tmp2), deviation);
658  if (deviation > tol) errorQuda("L2 relative deviation for k=%d failed, %e > %e", i, deviation, tol);
659  }
660 
662 
663  sprintf(prefix, "MG level %d (%s): Null vector Oblique Projections : ", param.level + 1,
664  param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
666 
667  // Oblique projections
668  if (getVerbosity() >= QUDA_SUMMARIZE)
669  printfQuda("Checking 1 > || (1 - DP(P^dagDP)P^dag) v_k || / || v_k || for %d vectors\n", param.Nvec);
670 
671  for (int i = 0; i < param.Nvec; i++) {
672  transfer->R(*r_coarse, *(param.B[i]));
673  (*coarse_solver)(*x_coarse, *r_coarse); // this needs to be an exact solve to pass
674  setOutputPrefix(prefix); // restore prefix after return from coarse grid
675  transfer->P(*tmp2, *x_coarse);
676  (*param.matResidual)(*tmp1, *tmp2);
677  *tmp2 = *(param.B[i]);
678  if (getVerbosity() >= QUDA_SUMMARIZE) {
679  printfQuda("Vector %d: norms %e %e\n", i, norm2(*param.B[i]), norm2(*tmp1));
680  printfQuda("relative residual = %e\n", sqrt(xmyNorm(*tmp2, *tmp1) / norm2(*param.B[i])));
681  }
682  }
683  sprintf(prefix, "MG level %d (%s): ", param.level + 1, param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
685  }
686 #if 0
687 
688  if (getVerbosity() >= QUDA_SUMMARIZE)
689  printfQuda("Checking 1 > || (1 - D P (P^\\dagger D P) P^\\dagger v_k || / || v_k || for %d vectors\n",
690  param.Nvec);
691 
692  for (int i=0; i<param.Nvec; i++) {
693  transfer->R(*r_coarse, *(param.B[i]));
694  (*coarse)(*x_coarse, *r_coarse); // this needs to be an exact solve to pass
695  setOutputPrefix(prefix); // restore output prefix
696  transfer->P(*tmp2, *x_coarse);
697  param.matResidual(*tmp1,*tmp2);
698  *tmp2 = *(param.B[i]);
699  if (getVerbosity() >= QUDA_VERBOSE) {
700  printfQuda("Vector %d: norms %e %e ", i, norm2(*param.B[i]), norm2(*tmp1));
701  printfQuda("relative residual = %e\n", sqrt(xmyNorm(*tmp2, *tmp1) / norm2(*param.B[i])) );
702  }
703  }
704 #endif
705 
706  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Checking 0 = (1 - P^\\dagger P) eta_c\n");
707 
709 
710  transfer->P(*tmp2, *x_coarse);
711  transfer->R(*r_coarse, *tmp2);
712  if (getVerbosity() >= QUDA_VERBOSE)
713  printfQuda("L2 norms %e %e (fine tmp %e) ", norm2(*x_coarse), norm2(*r_coarse), norm2(*tmp2));
714 
715  deviation = sqrt( xmyNorm(*x_coarse, *r_coarse) / norm2(*x_coarse) );
716  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("relative deviation = %e\n", deviation);
717  if (deviation > tol ) errorQuda("L2 relative deviation = %e > %e failed", deviation, tol);
718  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Checking 0 = (D_c - P^\\dagger D P) (native coarse operator to emulated operator)\n");
719 
721  zero(*tmp_coarse);
722  zero(*r_coarse);
723 
724  spinorNoise(*tmp_coarse, *coarse->rng, QUDA_NOISE_UNIFORM);
725  transfer->P(*tmp1, *tmp_coarse);
726 
728  double kappa = diracResidual->Kappa();
729  if (param.level==0) {
730  diracSmoother->DslashXpay(tmp2->Even(), tmp1->Odd(), QUDA_EVEN_PARITY, tmp1->Even(), -kappa);
731  diracSmoother->DslashXpay(tmp2->Odd(), tmp1->Even(), QUDA_ODD_PARITY, tmp1->Odd(), -kappa);
732  } else { // this is a hack since the coarse Dslash doesn't properly use the same xpay conventions yet
733  diracSmoother->DslashXpay(tmp2->Even(), tmp1->Odd(), QUDA_EVEN_PARITY, tmp1->Even(), 1.0);
734  diracSmoother->DslashXpay(tmp2->Odd(), tmp1->Even(), QUDA_ODD_PARITY, tmp1->Odd(), 1.0);
735  }
736  } else {
737  (*param.matResidual)(*tmp2,*tmp1);
738  }
739 
740  transfer->R(*x_coarse, *tmp2);
741  (*param_coarse->matResidual)(*r_coarse, *tmp_coarse);
742 
743 #if 0 // enable to print out emulated and actual coarse-grid operator vectors for debugging
744  setOutputPrefix("");
745 
746  for (int i=0; i<comm_rank(); i++) { // this ensures that we print each rank in order
747  if (i==comm_rank()) {
748  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("emulated\n");
749  for (int x=0; x<x_coarse->Volume(); x++) tmp1->PrintVector(x);
750 
751  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("actual\n");
752  for (int x=0; x<r_coarse->Volume(); x++) tmp2->PrintVector(x);
753  }
754  comm_barrier();
755  }
757 #endif
758 
759  double r_nrm = norm2(*r_coarse);
760  deviation = sqrt( xmyNorm(*x_coarse, *r_coarse) / norm2(*x_coarse) );
761 
762  if (diracResidual->Mu() != 0.0) {
763  // When the mu is shifted on the coarse level; we can compute exactly the error we introduce in the check:
764  // it is given by 2*kappa*delta_mu || tmp_coarse ||; where tmp_coarse is the random vector generated for the test
765  double delta_factor = param.mg_global.mu_factor[param.level+1] - param.mg_global.mu_factor[param.level];
766  if(fabs(delta_factor) > tol ) {
767  double delta_a = delta_factor * 2.0 * diracResidual->Kappa() *
769  deviation -= fabs(delta_a) * sqrt( norm2(*tmp_coarse) / norm2(*x_coarse) );
770  deviation = fabs(deviation);
771  }
772  }
773  if (getVerbosity() >= QUDA_VERBOSE)
774  printfQuda("L2 norms: Emulated = %e, Native = %e, relative deviation = %e\n", norm2(*x_coarse), r_nrm, deviation);
775  if (deviation > tol) errorQuda("failed, deviation = %e (tol=%e)", deviation, tol);
776 
777  // check the preconditioned operator construction on the lower level if applicable
778  bool coarse_was_preconditioned = (param.mg_global.coarse_grid_solution_type[param.level + 1] == QUDA_MATPC_SOLUTION
780  if (coarse_was_preconditioned) {
781  // check eo
782  if (getVerbosity() >= QUDA_SUMMARIZE)
783  printfQuda("Checking Deo of preconditioned operator 0 = \\hat{D}_c - A^{-1} D_c\n");
784  static_cast<DiracCoarse *>(diracCoarseResidual)->Dslash(r_coarse->Even(), tmp_coarse->Odd(), QUDA_EVEN_PARITY);
785  static_cast<DiracCoarse *>(diracCoarseResidual)->CloverInv(x_coarse->Even(), r_coarse->Even(), QUDA_EVEN_PARITY);
786  static_cast<DiracCoarsePC *>(diracCoarseSmoother)->Dslash(r_coarse->Even(), tmp_coarse->Odd(), QUDA_EVEN_PARITY);
787  r_nrm = norm2(r_coarse->Even());
788  deviation = sqrt(xmyNorm(x_coarse->Even(), r_coarse->Even()) / norm2(x_coarse->Even()));
789  if (getVerbosity() >= QUDA_VERBOSE)
790  printfQuda("L2 norms: Emulated = %e, Native = %e, relative deviation = %e\n", norm2(x_coarse->Even()), r_nrm,
791  deviation);
792  if (deviation > tol) errorQuda("failed, deviation = %e (tol=%e)", deviation, tol);
793 
794  // check Doe
795  if (getVerbosity() >= QUDA_SUMMARIZE)
796  printfQuda("Checking Doe of preconditioned operator 0 = \\hat{D}_c - A^{-1} D_c\n");
797  static_cast<DiracCoarse *>(diracCoarseResidual)->Dslash(r_coarse->Odd(), tmp_coarse->Even(), QUDA_ODD_PARITY);
798  static_cast<DiracCoarse *>(diracCoarseResidual)->CloverInv(x_coarse->Odd(), r_coarse->Odd(), QUDA_ODD_PARITY);
799  static_cast<DiracCoarsePC *>(diracCoarseSmoother)->Dslash(r_coarse->Odd(), tmp_coarse->Even(), QUDA_ODD_PARITY);
800  r_nrm = norm2(r_coarse->Odd());
801  deviation = sqrt(xmyNorm(x_coarse->Odd(), r_coarse->Odd()) / norm2(x_coarse->Odd()));
802  if (getVerbosity() >= QUDA_VERBOSE)
803  printfQuda("L2 norms: Emulated = %e, Native = %e, relative deviation = %e\n", norm2(x_coarse->Odd()), r_nrm,
804  deviation);
805  if (deviation > tol) errorQuda("failed, deviation = %e (tol=%e)", deviation, tol);
806  }
807 
808  // here we check that the Hermitian conjugate operator is working
809  // as expected for both the smoother and residual Dirac operators
811  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Checking normality of preconditioned operator\n");
812  diracSmoother->MdagM(tmp2->Even(), tmp1->Odd());
813  Complex dot = cDotProduct(tmp2->Even(),tmp1->Odd());
814  double deviation = std::fabs(dot.imag()) / std::fabs(dot.real());
815  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Smoother normal operator test (eta^dag M^dag M eta): real=%e imag=%e, relative imaginary deviation=%e\n",
816  real(dot), imag(dot), deviation);
817  if (deviation > tol) errorQuda("failed, deviation = %e (tol=%e)", deviation, tol);
818  }
819 
820  { // normal operator check for residual operator
821  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Checking normality of residual operator\n");
822  diracResidual->MdagM(*tmp2, *tmp1);
823  Complex dot = cDotProduct(*tmp1,*tmp2);
824  double deviation = std::fabs(dot.imag()) / std::fabs(dot.real());
825  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Normal operator test (eta^dag M^dag M eta): real=%e imag=%e, relative imaginary deviation=%e\n",
826  real(dot), imag(dot), deviation);
827  if (deviation > tol) errorQuda("failed, deviation = %e (tol=%e)", deviation, tol);
828  }
829 
831 
832  sprintf(prefix, "MG level %d (%s): eigenvector overlap : ", param.level + 1,
833  param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
835 
836  // Reuse the space for the Null vectors. By this point,
837  // the coarse grid has already been constructed.
839 
840  for (int i = 0; i < param.Nvec; i++) {
841 
842  // Restrict Evec, place result in r_coarse
843  transfer->R(*r_coarse, *param.B[i]);
844  // Prolong r_coarse, place result in tmp2
845  transfer->P(*tmp2, *r_coarse);
846 
847  printfQuda("Vector %d: norms v_k = %e P^dag v_k = %e PP^dag v_k = %e\n", i, norm2(*param.B[i]),
848  norm2(*r_coarse), norm2(*tmp2));
849 
850  // Compare v_k and PP^dag v_k.
851  deviation = sqrt(xmyNorm(*param.B[i], *tmp2) / norm2(*param.B[i]));
852  printfQuda("L2 relative deviation = %e\n", deviation);
853 
855 
856  sprintf(prefix, "MG level %d (%s): eigenvector Oblique Projections : ", param.level + 1,
857  param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
859 
860  // Oblique projections
861  if (getVerbosity() >= QUDA_SUMMARIZE)
862  printfQuda("Checking 1 > || (1 - DP(P^dagDP)P^dag) v_k || / || v_k || for vector %d\n", i);
863 
864  transfer->R(*r_coarse, *param.B[i]);
865  (*coarse_solver)(*x_coarse, *r_coarse); // this needs to be an exact solve to pass
866  setOutputPrefix(prefix); // restore prefix after return from coarse grid
867  transfer->P(*tmp2, *x_coarse);
868  (*param.matResidual)(*tmp1, *tmp2);
869 
870  if (getVerbosity() >= QUDA_SUMMARIZE) {
871  printfQuda("Vector %d: norms v_k %e DP(P^dagDP)P^dag v_k %e\n", i, norm2(*param.B[i]), norm2(*tmp1));
872  printfQuda("L2 relative deviation = %e\n", sqrt(xmyNorm(*param.B[i], *tmp1) / norm2(*param.B[i])));
873  }
874  }
875 
876  sprintf(prefix, "MG level %d (%s): ", param.level + 1,
877  param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
879  }
880  }
881 
882  delete tmp1;
883  delete tmp2;
884  delete tmp_coarse;
885 
886  if (param.level < param.Nlevel - 2) coarse->verify();
887 
889  }
890 
893 
894  if (param.level < param.Nlevel - 1) { // set parity for the solver in the transfer operator
895  QudaSiteSubset site_subset
898  QudaParity parity = (matpc_type == QUDA_MATPC_EVEN_EVEN || matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) ?
901  transfer->setSiteSubset(site_subset, parity); // use this to force location of transfer
902  }
903 
904  // if input vector is single parity then we must be solving the
905  // preconditioned system in general this can only happen on the
906  // top level
908  QudaSolutionType inner_solution_type = param.coarse_grid_solution_type;
909 
910  if (debug) printfQuda("outer_solution_type = %d, inner_solution_type = %d\n", outer_solution_type, inner_solution_type);
911 
912  if ( outer_solution_type == QUDA_MATPC_SOLUTION && inner_solution_type == QUDA_MAT_SOLUTION)
913  errorQuda("Unsupported solution type combination");
914 
915  if ( inner_solution_type == QUDA_MATPC_SOLUTION && param.smoother_solve_type != QUDA_DIRECT_PC_SOLVE)
916  errorQuda("For this coarse grid solution type, a preconditioned smoother is required");
917 
918  if ( debug ) printfQuda("entering V-cycle with x2=%e, r2=%e\n", norm2(x), norm2(b));
919 
920  if (param.level < param.Nlevel-1) {
921  //transfer->setTransferGPU(false); // use this to force location of transfer (need to check if still works for multi-level)
922 
923  // do the pre smoothing
924  if ( debug ) printfQuda("pre-smoothing b2=%e\n", norm2(b));
925 
926  ColorSpinorField *out=nullptr, *in=nullptr;
927 
928  ColorSpinorField &residual = b.SiteSubset() == QUDA_FULL_SITE_SUBSET ? *r : r->Even();
929 
930  // FIXME only need to make a copy if not preconditioning
931  residual = b; // copy source vector since we will overwrite source with iterated residual
932 
933  diracSmoother->prepare(in, out, x, residual, outer_solution_type);
934 
935  // b_tilde holds either a copy of preconditioned source or a pointer to original source
937  else b_tilde = &b;
938  if (presmoother) (*presmoother)(*out, *in); else zero(*out);
939  ColorSpinorField &solution = inner_solution_type == outer_solution_type ? x : x.Even();
940  diracSmoother->reconstruct(solution, b, inner_solution_type);
941 
942  // if using preconditioned smoother then need to reconstruct full residual
943  // FIXME extend this check for precision, Schwarz, etc.
944  bool use_solver_residual =
945  ( (param.smoother_solve_type == QUDA_DIRECT_PC_SOLVE && inner_solution_type == QUDA_MATPC_SOLUTION) ||
946  (param.smoother_solve_type == QUDA_DIRECT_SOLVE && inner_solution_type == QUDA_MAT_SOLUTION) )
947  ? true : false;
948  // FIXME this is currently borked if inner solver is preconditioned
949  double r2 = 0.0;
950  if (use_solver_residual) {
951  if (debug) r2 = norm2(*r);
952  } else {
953  (*param.matResidual)(*r, x);
954  if (debug) r2 = xmyNorm(b, *r);
955  else axpby(1.0, b, -1.0, *r);
956  }
957 
958  // We need this to ensure that the coarse level has been created.
959  // e.g. in case of iterative setup with MG we use just pre- and post-smoothing at the first iteration.
960  if (transfer) {
961  // restrict to the coarse grid
962  transfer->R(*r_coarse, residual);
963  if ( debug ) printfQuda("after pre-smoothing x2 = %e, r2 = %e, r_coarse2 = %e\n", norm2(x), r2, norm2(*r_coarse));
964 
965  // recurse to the next lower level
966  (*coarse_solver)(*x_coarse, *r_coarse);
967  if (debug) printfQuda("after coarse solve x_coarse2 = %e r_coarse2 = %e\n", norm2(*x_coarse), norm2(*r_coarse));
968 
969  // prolongate back to this grid
970  ColorSpinorField &x_coarse_2_fine = inner_solution_type == QUDA_MAT_SOLUTION ? *r : r->Even(); // define according to inner solution type
971  transfer->P(x_coarse_2_fine, *x_coarse); // repurpose residual storage
972  xpy(x_coarse_2_fine, solution); // sum to solution FIXME - sum should be done inside the transfer operator
973  if ( debug ) {
974  printfQuda("Prolongated coarse solution y2 = %e\n", norm2(*r));
975  printfQuda("after coarse-grid correction x2 = %e, r2 = %e\n", norm2(x), norm2(*r));
976  }
977  }
978 
979  // do the post smoothing
980  //residual = outer_solution_type == QUDA_MAT_SOLUTION ? *r : r->Even(); // refine for outer solution type
982  in = b_tilde;
983  } else { // this incurs unecessary copying
984  *r = b;
985  in = r;
986  }
987 
988  // we should keep a copy of the prepared right hand side as we've already destroyed it
989  //dirac.prepare(in, out, solution, residual, inner_solution_type);
990 
991  if (postsmoother) (*postsmoother)(*out, *in); // for inner solve preconditioned, in the should be the original prepared rhs
992 
993  diracSmoother->reconstruct(x, b, outer_solution_type);
994 
995  } else { // do the coarse grid solve
996 
997  ColorSpinorField *out=nullptr, *in=nullptr;
998  diracSmoother->prepare(in, out, x, b, outer_solution_type);
999  if (presmoother) (*presmoother)(*out, *in);
1000  diracSmoother->reconstruct(x, b, outer_solution_type);
1001  }
1002 
1003  if ( debug ) {
1004  (*param.matResidual)(*r, x);
1005  double r2 = xmyNorm(b, *r);
1006  printfQuda("leaving V-cycle with x2=%e, r2=%e\n", norm2(x), r2);
1007  }
1008 
1009  popOutputPrefix();
1010  }
1011 
1012  // supports separate reading or single file read
1013  void MG::loadVectors(std::vector<ColorSpinorField *> &B)
1014  {
1018  std::string vec_infile(param.mg_global.vec_infile[param.level]);
1019  vec_infile += "_level_";
1020  vec_infile += std::to_string(param.level);
1021  vec_infile += "_nvec_";
1022  vec_infile += std::to_string(param.mg_global.n_vec[param.level]);
1023  EigenSolver::loadVectors(B, vec_infile);
1024  popLevel(param.level);
1027  }
1028 
1029  void MG::saveVectors(const std::vector<ColorSpinorField *> &B) const
1030  {
1034  std::string vec_outfile(param.mg_global.vec_outfile[param.level]);
1035  vec_outfile += "_level_";
1036  vec_outfile += std::to_string(param.level);
1037  vec_outfile += "_nvec_";
1038  vec_outfile += std::to_string(param.mg_global.n_vec[param.level]);
1039  EigenSolver::saveVectors(B, vec_outfile);
1040  popLevel(param.level);
1043  }
1044 
1045  void MG::dumpNullVectors() const
1046  {
1047  saveVectors(param.B);
1048  if (param.level < param.Nlevel - 2) coarse->dumpNullVectors();
1049  }
1050 
1051  void MG::generateNullVectors(std::vector<ColorSpinorField *> &B, bool refresh)
1052  {
1054 
1055  SolverParam solverParam(param); // Set solver field parameters:
1056  // set null-space generation options - need to expose these
1057  solverParam.maxiter
1059  solverParam.tol = param.mg_global.setup_tol[param.level];
1061  solverParam.delta = 1e-1;
1063  // Hard coded for now...
1064  if (solverParam.inv_type == QUDA_CA_CG_INVERTER || solverParam.inv_type == QUDA_CA_CGNE_INVERTER
1065  || solverParam.inv_type == QUDA_CA_CGNR_INVERTER || solverParam.inv_type == QUDA_CA_GCR_INVERTER) {
1070  } else {
1071  solverParam.Nkrylov = 4;
1072  }
1073  solverParam.pipeline
1074  = (solverParam.inv_type == QUDA_BICGSTAB_INVERTER ? 0 : 4); // FIXME: pipeline != 0 breaks BICGSTAB
1075  solverParam.precision = r->Precision();
1076 
1077  if (param.level == 0) { // this enables half precision on the fine grid only if set
1080  } else {
1081  solverParam.precision_precondition = solverParam.precision;
1082  }
1083  solverParam.residual_type = static_cast<QudaResidualType>(QUDA_L2_RELATIVE_RESIDUAL);
1085  ColorSpinorParam csParam(*B[0]); // Create spinor field parameters:
1086  csParam.setPrecision(r->Precision(), r->Precision(), true); // ensure native ordering
1087  csParam.location = QUDA_CUDA_FIELD_LOCATION; // hard code to GPU location for null-space generation for now
1089  csParam.create = QUDA_ZERO_FIELD_CREATE;
1090  ColorSpinorField *b = static_cast<ColorSpinorField *>(new cudaColorSpinorField(csParam));
1091  ColorSpinorField *x = static_cast<ColorSpinorField *>(new cudaColorSpinorField(csParam));
1092 
1093  csParam.create = QUDA_NULL_FIELD_CREATE;
1094 
1095  // if we not using GCR/MG smoother then we need to switch off Schwarz since regular Krylov solvers do not support it
1096  bool schwarz_reset = solverParam.inv_type != QUDA_MG_INVERTER
1098  if (schwarz_reset) {
1099  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Disabling Schwarz for null-space finding");
1100  int commDim[QUDA_MAX_DIM];
1101  for (int i = 0; i < QUDA_MAX_DIM; i++) commDim[i] = 1;
1102  diracSmootherSloppy->setCommDim(commDim);
1103  }
1104 
1105  // if quarter precision halo, promote for null-space finding to half precision
1106  QudaPrecision halo_precision = diracSmootherSloppy->HaloPrecision();
1108 
1109  Solver *solve;
1110  DiracMdagM *mdagm = (solverParam.inv_type == QUDA_CG_INVERTER || solverParam.inv_type == QUDA_CA_CG_INVERTER) ? new DiracMdagM(*diracSmoother) : nullptr;
1111  DiracMdagM *mdagmSloppy = (solverParam.inv_type == QUDA_CG_INVERTER || solverParam.inv_type == QUDA_CA_CG_INVERTER) ? new DiracMdagM(*diracSmootherSloppy) : nullptr;
1112  if (solverParam.inv_type == QUDA_CG_INVERTER || solverParam.inv_type == QUDA_CA_CG_INVERTER) {
1113  solve = Solver::create(solverParam, *mdagm, *mdagmSloppy, *mdagmSloppy, profile);
1114  } else if(solverParam.inv_type == QUDA_MG_INVERTER) {
1115  // in case MG has not been created, we create the Smoother
1116  if (!transfer) createSmoother();
1117 
1118  // run GCR with the MG as a preconditioner
1120  solverParam.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
1121  solverParam.precondition_cycle = 1;
1122  solverParam.tol_precondition = 1e-1;
1123  solverParam.maxiter_precondition = 1;
1124  solverParam.omega = 1.0;
1126  solverParam.precision_sloppy = solverParam.precision;
1127  solverParam.compute_true_res = 0;
1128  solverParam.preconditioner = this;
1129 
1130  solverParam.inv_type = QUDA_GCR_INVERTER;
1132  solverParam.inv_type = QUDA_MG_INVERTER;
1133  } else {
1135  }
1136 
1137  for (int si = 0; si < param.mg_global.num_setup_iter[param.level]; si++) {
1138  if (getVerbosity() >= QUDA_VERBOSE)
1139  printfQuda("Running vectors setup on level %d iter %d of %d\n", param.level, si + 1,
1141 
1142  // global orthonormalization of the initial null-space vectors
1144  for(int i=0; i<(int)B.size(); i++) {
1145  for (int j=0; j<i; j++) {
1146  Complex alpha = cDotProduct(*B[j], *B[i]);// <j,i>
1147  caxpy(-alpha, *B[j], *B[i]); // i-<j,i>j
1148  }
1149  double nrm2 = norm2(*B[i]);
1150  if (nrm2 > 1e-16) ax(1.0 /sqrt(nrm2), *B[i]);// i/<i,i>
1151  else errorQuda("\nCannot normalize %u vector\n", i);
1152  }
1153  }
1154 
1155  // launch solver for each source
1156  for (int i=0; i<(int)B.size(); i++) {
1157  if (param.mg_global.setup_type == QUDA_TEST_VECTOR_SETUP) { // DDalphaAMG test vector idea
1158  *b = *B[i]; // inverting against the vector
1159  zero(*x); // with zero initial guess
1160  } else {
1161  *x = *B[i];
1162  zero(*b);
1163  }
1164 
1165  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Initial guess = %g\n", norm2(*x));
1166  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Initial rhs = %g\n", norm2(*b));
1167 
1168  ColorSpinorField *out=nullptr, *in=nullptr;
1169  diracSmoother->prepare(in, out, *x, *b, QUDA_MAT_SOLUTION);
1170  (*solve)(*out, *in);
1172 
1173  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Solution = %g\n", norm2(*x));
1174  *B[i] = *x;
1175  }
1176 
1177  // global orthonormalization of the generated null-space vectors
1179  for(int i=0; i<(int)B.size(); i++) {
1180  for (int j=0; j<i; j++) {
1181  Complex alpha = cDotProduct(*B[j], *B[i]);// <j,i>
1182  caxpy(-alpha, *B[j], *B[i]); // i-<j,i>j
1183  }
1184  double nrm2 = norm2(*B[i]);
1185  if (sqrt(nrm2) > 1e-16) ax(1.0/sqrt(nrm2), *B[i]);// i/<i,i>
1186  else errorQuda("\nCannot normalize %u vector (nrm=%e)\n", i, sqrt(nrm2));
1187  }
1188  }
1189 
1190  if (solverParam.inv_type == QUDA_MG_INVERTER) {
1191 
1192  if (transfer) {
1193  resetTransfer = true;
1194  reset();
1195  if ( param.level < param.Nlevel-2 ) {
1197  coarse->generateNullVectors(*B_coarse, refresh);
1198  } else {
1199  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Restricting null space vectors\n");
1200  for (int i=0; i<param.Nvec; i++) {
1201  zero(*(*B_coarse)[i]);
1202  transfer->R(*(*B_coarse)[i], *(param.B[i]));
1203  }
1204  // rebuild the transfer operator in the coarse level
1205  coarse->resetTransfer = true;
1206  coarse->reset();
1207  }
1208  }
1209  } else {
1210  reset();
1211  }
1212  }
1213  }
1214 
1215  delete solve;
1216  if (mdagm) delete mdagm;
1217  if (mdagmSloppy) delete mdagmSloppy;
1218 
1219  diracSmootherSloppy->setHaloPrecision(halo_precision); // restore halo precision
1220 
1221  delete x;
1222  delete b;
1223 
1224  // reenable Schwarz
1225  if (schwarz_reset) {
1226  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Reenabling Schwarz for null-space finding");
1227  int commDim[QUDA_MAX_DIM];
1228  for (int i=0; i<QUDA_MAX_DIM; i++) commDim[i] = 0;
1229  diracSmootherSloppy->setCommDim(commDim);
1230  }
1231 
1232  if (param.mg_global.vec_store[param.level] == QUDA_BOOLEAN_TRUE) { // conditional store of null vectors
1233  saveVectors(B);
1234  }
1235 
1236  popLevel(param.level);
1237  }
1238 
1239  // generate a full span of free vectors.
1240  // FIXME: Assumes fine level is SU(3).
1241  void MG::buildFreeVectors(std::vector<ColorSpinorField *> &B)
1242  {
1244  const int Nvec = B.size();
1245 
1246  // Given the number of colors and spins, figure out if the number
1247  // of vectors in 'B' makes sense.
1248  const int Ncolor = B[0]->Ncolor();
1249  const int Nspin = B[0]->Nspin();
1250 
1251  if (Ncolor == 3) // fine level
1252  {
1253  if (Nspin == 4) // Wilson or Twisted Mass (singlet)
1254  {
1255  // There needs to be 6 null vectors -> 12 after chirality.
1256  if (Nvec != 6) errorQuda("\nError in MG::buildFreeVectors: Wilson-type fermions require Nvec = 6");
1257 
1258  if (getVerbosity() >= QUDA_VERBOSE)
1259  printfQuda("Building %d free field vectors for Wilson-type fermions\n", Nvec);
1260 
1261  // Zero the null vectors.
1262  for (int i = 0; i < Nvec; i++) zero(*B[i]);
1263 
1264  // Create a temporary vector.
1265  ColorSpinorParam csParam(*B[0]);
1266  csParam.create = QUDA_ZERO_FIELD_CREATE;
1268 
1269  int counter = 0;
1270  for (int c = 0; c < Ncolor; c++) {
1271  for (int s = 0; s < 2; s++) {
1272  tmp->Source(QUDA_CONSTANT_SOURCE, 1, s, c);
1273  xpy(*tmp, *B[counter]);
1274  tmp->Source(QUDA_CONSTANT_SOURCE, 1, s + 2, c);
1275  xpy(*tmp, *B[counter]);
1276  counter++;
1277  }
1278  }
1279 
1280  delete tmp;
1281  } else if (Nspin == 1) // Staggered
1282  {
1283  // There needs to be 24 null vectors -> 48 after chirality.
1284  if (Nvec != 24) errorQuda("\nError in MG::buildFreeVectors: Staggered-type fermions require Nvec = 24\n");
1285 
1286  if (getVerbosity() >= QUDA_VERBOSE)
1287  printfQuda("Building %d free field vectors for Staggered-type fermions\n", Nvec);
1288 
1289  // Zero the null vectors.
1290  for (int i = 0; i < Nvec; i++) zero(*B[i]);
1291 
1292  // Create a temporary vector.
1293  ColorSpinorParam csParam(*B[0]);
1294  csParam.create = QUDA_ZERO_FIELD_CREATE;
1296 
1297  // Build free null vectors.
1298  for (int c = 0; c < B[0]->Ncolor(); c++) {
1299  // Need to pair an even+odd corner together
1300  // since they'll get split up.
1301 
1302  // 0000, 0001
1303  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x0, c);
1304  xpy(*tmp, *B[8 * c + 0]);
1305  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x1, c);
1306  xpy(*tmp, *B[8 * c + 0]);
1307 
1308  // 0010, 0011
1309  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x2, c);
1310  xpy(*tmp, *B[8 * c + 1]);
1311  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x3, c);
1312  xpy(*tmp, *B[8 * c + 1]);
1313 
1314  // 0100, 0101
1315  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x4, c);
1316  xpy(*tmp, *B[8 * c + 2]);
1317  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x5, c);
1318  xpy(*tmp, *B[8 * c + 2]);
1319 
1320  // 0110, 0111
1321  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x6, c);
1322  xpy(*tmp, *B[8 * c + 3]);
1323  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x7, c);
1324  xpy(*tmp, *B[8 * c + 3]);
1325 
1326  // 1000, 1001
1327  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x8, c);
1328  xpy(*tmp, *B[8 * c + 4]);
1329  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x9, c);
1330  xpy(*tmp, *B[8 * c + 4]);
1331 
1332  // 1010, 1011
1333  tmp->Source(QUDA_CORNER_SOURCE, 1, 0xA, c);
1334  xpy(*tmp, *B[8 * c + 5]);
1335  tmp->Source(QUDA_CORNER_SOURCE, 1, 0xB, c);
1336  xpy(*tmp, *B[8 * c + 5]);
1337 
1338  // 1100, 1101
1339  tmp->Source(QUDA_CORNER_SOURCE, 1, 0xC, c);
1340  xpy(*tmp, *B[8 * c + 6]);
1341  tmp->Source(QUDA_CORNER_SOURCE, 1, 0xD, c);
1342  xpy(*tmp, *B[8 * c + 6]);
1343 
1344  // 1110, 1111
1345  tmp->Source(QUDA_CORNER_SOURCE, 1, 0xE, c);
1346  xpy(*tmp, *B[8 * c + 7]);
1347  tmp->Source(QUDA_CORNER_SOURCE, 1, 0xF, c);
1348  xpy(*tmp, *B[8 * c + 7]);
1349  }
1350 
1351  delete tmp;
1352  } else {
1353  errorQuda("\nError in MG::buildFreeVectors: Unsupported combo of Nc %d, Nspin %d", Ncolor, Nspin);
1354  }
1355  } else // coarse level
1356  {
1357  if (Nspin == 2) {
1358  // There needs to be Ncolor null vectors.
1359  if (Nvec != Ncolor) errorQuda("\nError in MG::buildFreeVectors: Coarse fermions require Nvec = Ncolor");
1360 
1361  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Building %d free field vectors for Coarse fermions\n", Ncolor);
1362 
1363  // Zero the null vectors.
1364  for (int i = 0; i < Nvec; i++) zero(*B[i]);
1365 
1366  // Create a temporary vector.
1367  ColorSpinorParam csParam(*B[0]);
1368  csParam.create = QUDA_ZERO_FIELD_CREATE;
1370 
1371  for (int c = 0; c < Ncolor; c++) {
1372  tmp->Source(QUDA_CONSTANT_SOURCE, 1, 0, c);
1373  xpy(*tmp, *B[c]);
1374  tmp->Source(QUDA_CONSTANT_SOURCE, 1, 1, c);
1375  xpy(*tmp, *B[c]);
1376  }
1377 
1378  delete tmp;
1379  } else if (Nspin == 1) {
1380  // There needs to be Ncolor null vectors.
1381  if (Nvec != Ncolor) errorQuda("\nError in MG::buildFreeVectors: Coarse fermions require Nvec = Ncolor");
1382 
1383  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Building %d free field vectors for Coarse fermions\n", Ncolor);
1384 
1385  // Zero the null vectors.
1386  for (int i = 0; i < Nvec; i++) zero(*B[i]);
1387 
1388  // Create a temporary vector.
1389  ColorSpinorParam csParam(*B[0]);
1390  csParam.create = QUDA_ZERO_FIELD_CREATE;
1392 
1393  for (int c = 0; c < Ncolor; c++) {
1394  tmp->Source(QUDA_CONSTANT_SOURCE, 1, 0, c);
1395  xpy(*tmp, *B[c]);
1396  }
1397 
1398  delete tmp;
1399  } else {
1400  errorQuda("\nError in MG::buildFreeVectors: Unexpected Nspin = %d for coarse fermions", Nspin);
1401  }
1402  }
1403 
1404  // global orthonormalization of the generated null-space vectors
1406  for(int i=0; i<(int)B.size(); i++) {
1407  double nrm2 = norm2(*B[i]);
1408  if (nrm2 > 1e-16) ax(1.0 /sqrt(nrm2), *B[i]);// i/<i,i>
1409  else errorQuda("\nCannot normalize %u vector\n", i);
1410  }
1411  }
1412 
1413  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Done building free vectors\n");
1414 
1415  popLevel(param.level);
1416  }
1417 
1419  {
1421 
1422  // Extract eigensolver params
1423  int nConv = param.mg_global.eig_param[param.level]->nConv;
1425  bool normop = param.mg_global.eig_param[param.level]->use_norm_op;
1426 
1427  // Dummy array to keep the eigensolver happy.
1428  std::vector<Complex> evals(nConv, 0.0);
1429 
1430  std::vector<ColorSpinorField *> B_evecs;
1433  csParam.create = QUDA_ZERO_FIELD_CREATE;
1434  // This is the vector precision used by matResidual
1436 
1437  for (int i = 0; i < nConv; i++) B_evecs.push_back(ColorSpinorField::Create(csParam));
1438 
1439  // before entering the eigen solver, lets free the B vectors to save some memory
1440  ColorSpinorParam bParam(*param.B[0]);
1441  for (int i = 0; i < (int)param.B.size(); i++) delete param.B[i];
1442 
1444  if (!normop && !dagger) {
1445  DiracM *mat = new DiracM(*diracResidual);
1447  (*eig_solve)(B_evecs, evals);
1448  delete eig_solve;
1449  delete mat;
1450  } else if (!normop && dagger) {
1453  (*eig_solve)(B_evecs, evals);
1454  delete eig_solve;
1455  delete mat;
1456  } else if (normop && !dagger) {
1459  (*eig_solve)(B_evecs, evals);
1460  delete eig_solve;
1461  delete mat;
1462  } else if (normop && dagger) {
1465  (*eig_solve)(B_evecs, evals);
1466  delete eig_solve;
1467  delete mat;
1468  }
1469 
1470  // now reallocate the B vectors copy in e-vectors
1471  for (int i = 0; i < (int)param.B.size(); i++) {
1472  param.B[i] = ColorSpinorField::Create(bParam);
1473  *param.B[i] = *B_evecs[i];
1474  }
1475 
1476  // Local clean-up
1477  for (auto b : B_evecs) { delete b; }
1478 
1479  // only save if outfile is defined
1480  if (strcmp(param.mg_global.vec_outfile[param.level], "") != 0) { saveVectors(param.B); }
1481 
1482  popLevel(param.level);
1483  }
1484 
1485 } // namespace quda
cudaColorSpinorField * tmp2
QudaCABasis ca_basis
Definition: invert_quda.h:208
bool mg_instance
whether to use a global or local (node) reduction for this solver
Definition: invert_quda.h:248
bool global_reduction
whether the solver acting as a preconditioner for another solver
Definition: invert_quda.h:243
int comm_rank(void)
Definition: comm_mpi.cpp:82
QudaSchwarzType schwarz_type
Definition: invert_quda.h:217
void Init()
Initialize CURAND RNG states.
Definition: random.cu:122
DiracMatrix * matSmoothSloppy
Definition: multigrid.h:84
QudaBoolean global_reduction
Definition: multigrid.h:75
#define postTrace()
Definition: tune_quda.h:591
void dumpNullVectors() const
Dump the null-space vectors to disk. Will recurse dumping all levels.
Definition: multigrid.cpp:1045
QudaMultigridParam & mg_global
Definition: multigrid.h:30
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
QudaVerbosity verbosity_precondition
Definition: invert_quda.h:239
SolverParam * param_coarse_solver
Definition: multigrid.h:220
cudaColorSpinorField * tmp1
QudaBoolean use_dagger
Definition: quda.h:401
double Kappa() const
Definition: dirac_quda.h:173
std::vector< ColorSpinorField * > & B
Definition: multigrid.h:57
QudaEigParam * eig_param[QUDA_MAX_MG_LEVEL]
Definition: quda.h:480
enum QudaPrecision_s QudaPrecision
bool need_bidirectional
Definition: dirac_quda.h:51
double setup_ca_lambda_max[QUDA_MAX_MG_LEVEL]
Definition: quda.h:528
MGParam * param_coarse
Definition: multigrid.h:211
QudaFieldLocation location
Definition: multigrid.h:97
void generateNullVectors(std::vector< ColorSpinorField *> &B, bool refresh=false)
Generate the null-space vectors.
Definition: multigrid.cpp:1051
virtual void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)=0
void verify()
Definition: multigrid.cpp:627
TimeProfile & profile_global
Definition: multigrid.h:193
enum QudaResidualType_s QudaResidualType
QudaInverterType inv_type
Definition: invert_quda.h:22
const Dirac * diracResidual
Definition: multigrid.h:244
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
double kappa
Definition: test_util.cpp:1647
QudaFieldLocation setup_location
Definition: multigrid.h:100
QudaBoolean use_eig_solver[QUDA_MAX_MG_LEVEL]
Definition: quda.h:604
void pushLevel(int level) const
Helper function called on entry to each MG function.
Definition: multigrid.cpp:235
void createSmoother()
Create the smoothers.
Definition: multigrid.cpp:276
ColorSpinorField * b_tilde
Definition: multigrid.h:229
#define errorQuda(...)
Definition: util_quda.h:121
void destroyCoarseSolver()
Destroy the solver wrapper.
Definition: multigrid.cpp:415
void generateEigenVectors()
Generate lowest eigenvectors.
Definition: multigrid.cpp:1418
void R(ColorSpinorField &out, const ColorSpinorField &in) const
Definition: transfer.cpp:344
QudaSolveType smoother_solve_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:589
void destroySmoother()
Destroy the smoothers.
Definition: multigrid.cpp:249
QudaSetupType setup_type
Definition: quda.h:531
double setup_tol[QUDA_MAX_MG_LEVEL]
Definition: quda.h:510
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
Definition: reduce_quda.cu:764
int num_setup_iter[QUDA_MAX_MG_LEVEL]
Definition: quda.h:507
void setOutputPrefix(const char *prefix)
Definition: util_quda.cpp:69
Dirac * diracCoarseSmoother
Definition: multigrid.h:256
Transfer * transfer
Definition: dirac_quda.h:49
DiracMatrix * matCoarseSmootherSloppy
Definition: multigrid.h:268
SolverParam * param_presmooth
Definition: multigrid.h:214
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44
const ColorSpinorField & Even() const
double coarse_solver_tol[QUDA_MAX_MG_LEVEL]
Definition: quda.h:543
const ColorSpinorField & Odd() const
static ColorSpinorField * Create(const ColorSpinorParam &param)
static void loadVectors(std::vector< ColorSpinorField *> &eig_vecs, std::string file)
Load vectors from file.
QudaBoolean pre_orthonormalize
Definition: quda.h:534
QudaSchwarzType smoother_schwarz_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:579
ColorSpinorField * r
Definition: multigrid.h:226
QudaBoolean vec_load[QUDA_MAX_MG_LEVEL]
Definition: quda.h:627
MGParam & param
Definition: multigrid.h:181
void P(ColorSpinorField &out, const ColorSpinorField &in) const
Definition: transfer.cpp:305
STL namespace.
double setup_ca_lambda_min[QUDA_MAX_MG_LEVEL]
Definition: quda.h:525
QudaPrecision HaloPrecision() const
Definition: dirac_quda.h:200
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:75
QudaPrecision smoother_halo_precision[QUDA_MAX_MG_LEVEL]
Definition: quda.h:576
void loadVectors(std::vector< ColorSpinorField *> &B)
Load the null space vectors in from file.
Definition: multigrid.cpp:1013
QudaInverterType inv_type_precondition
Definition: invert_quda.h:28
void setHaloPrecision(QudaPrecision halo_precision_) const
Definition: dirac_quda.h:201
QudaPreserveSource preserve_source
Definition: invert_quda.h:154
void reset(bool refresh=false)
This method resets the solver, e.g., when a parameter has changed such as the mass.
Definition: multigrid.cpp:117
double mu_factor
Definition: dirac_quda.h:38
int spinBlockSize
Definition: multigrid.h:42
virtual double Mu() const
Definition: dirac_quda.h:174
void setSiteSubset(QudaSiteSubset site_subset, QudaParity parity)
Sets whether the transfer operator is to act on full fields or single parity fields, and if single-parity which parity.
Definition: transfer.cpp:227
QudaBoolean setup_minimize_memory
Definition: quda.h:609
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const =0
double mu_factor[QUDA_MAX_MG_LEVEL]
Definition: quda.h:648
int Nspin
Definition: blas_test.cu:45
QudaSolveType smoother_solve_type
Definition: multigrid.h:94
QudaBoolean coarse_guess
Definition: quda.h:639
QudaSiteSubset siteSubset
Definition: lattice_field.h:71
QudaCABasis setup_ca_basis[QUDA_MAX_MG_LEVEL]
Definition: quda.h:519
ColorSpinorField * xD
Definition: blas_test.cu:41
static void axpby(Float a, Float *x, Float b, Float *y, int len)
Definition: dslash_util.h:33
double norm2(const CloverField &a, bool inverse=false)
Solver * presmoother
Definition: multigrid.h:190
const Dirac * diracSmootherSloppy
Definition: multigrid.h:250
void operator()(ColorSpinorField &out, ColorSpinorField &in)
Definition: multigrid.cpp:891
int n_vec[QUDA_MAX_MG_LEVEL]
Definition: quda.h:492
void popLevel(int level) const
Helper function called on exit to each MG member function.
Definition: multigrid.cpp:242
QudaGaugeParam param
Definition: pack_test.cpp:17
int nConv
Definition: quda.h:420
QudaFieldLocation setup_location[QUDA_MAX_MG_LEVEL]
Definition: quda.h:601
static void saveVectors(const std::vector< ColorSpinorField *> &eig_vecs, std::string file)
Save vectors to file.
DiracMatrix * matCoarseSmoother
Definition: multigrid.h:265
static bool debug
Definition: multigrid.cpp:12
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:67
void saveVectors(const std::vector< ColorSpinorField *> &B) const
Save the null space vectors in from file.
Definition: multigrid.cpp:1029
QudaComputeNullVector compute_null_vector
Definition: invert_quda.h:67
int coarse_solver_maxiter[QUDA_MAX_MG_LEVEL]
Definition: quda.h:546
ColorSpinorField * tmp_coarse
Definition: multigrid.h:238
void ax(const double &a, GaugeField &u)
Scale the gauge field by the scalar a.
double tol
Definition: test_util.cpp:1656
__device__ __host__ void caxpy(const complex< Float > &a, const complex< Float > &x, complex< Float > &y)
QudaFieldLocation location
void buildFreeVectors(std::vector< ColorSpinorField *> &B)
Build free-field null-space vectors.
Definition: multigrid.cpp:1241
QudaBoolean post_orthonormalize
Definition: quda.h:537
static EigenSolver * create(QudaEigParam *eig_param, const DiracMatrix &mat, TimeProfile &profile)
Creates the eigensolver using the parameters given and the matrix.
QudaResidualType residual_type
Definition: invert_quda.h:49
SolverParam * param_postsmooth
Definition: multigrid.h:217
void pushOutputPrefix(const char *prefix)
Push a new output prefix onto the stack.
Definition: util_quda.cpp:105
void updateInvertParam(QudaInvertParam &param, int offset=-1)
Definition: invert_quda.h:428
char prefix[128]
Definition: multigrid.h:199
QudaPrecision cuda_prec_sloppy
Definition: quda.h:215
void Release()
Release Device memory for CURAND RNG states.
Definition: random.cu:145
int setup_maxiter[QUDA_MAX_MG_LEVEL]
Definition: quda.h:513
const Dirac * diracSmoother
Definition: multigrid.h:247
ColorSpinorParam csParam
Definition: pack_test.cpp:24
virtual void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType) const =0
int commDim[QUDA_MAX_DIM]
Definition: dirac_quda.h:44
std::vector< Complex > evals
Definition: multigrid.h:60
QudaDagType dagger
Definition: dirac_quda.h:30
QudaInverterType smoother
Definition: multigrid.h:87
cpuColorSpinorField * in
virtual void DslashXpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const ColorSpinorField &x, const double &k) const =0
static Solver * create(SolverParam &param, DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, TimeProfile &profile)
Definition: solver.cpp:33
void Print()
Definition: timer.cpp:7
QudaSiteSubset SiteSubset() const
Class declaration to initialize and hold CURAND RNG states.
Definition: random_quda.h:23
enum QudaMatPCType_s QudaMatPCType
DiracMatrix * matSmooth
Definition: multigrid.h:81
int geoBlockSize[QUDA_MAX_DIM]
Definition: multigrid.h:39
Dirac * diracCoarseResidual
Definition: multigrid.h:253
enum QudaSolutionType_s QudaSolutionType
bool is_preconditioner
verbosity to use for preconditioner
Definition: invert_quda.h:241
QudaBoolean use_norm_op
Definition: quda.h:402
QudaDiracType type
Definition: dirac_quda.h:22
QudaPrecision precision_null[QUDA_MAX_MG_LEVEL]
Definition: quda.h:495
void popOutputPrefix()
Pop the output prefix restoring the prior one on the stack.
Definition: util_quda.cpp:121
QudaVerbosity verbosity[QUDA_MAX_MG_LEVEL]
Definition: quda.h:501
QudaInverterType coarse_solver[QUDA_MAX_MG_LEVEL]
Definition: quda.h:540
QudaMatPCType matpcType
Definition: dirac_quda.h:29
std::complex< double > Complex
Definition: quda_internal.h:46
EigenSolver * eig_solve
Definition: invert_quda.h:545
QudaBoolean run_verify
Definition: quda.h:618
std::vector< ColorSpinorField * > * B_coarse
Definition: multigrid.h:223
enum QudaParity_s QudaParity
QudaMatPCType matpc_type
Definition: test_util.cpp:1662
char coarse_prefix[128]
Definition: multigrid.h:202
QudaEigParam eig_param
Definition: invert_quda.h:55
ColorSpinorField * tmp2_coarse
Definition: multigrid.h:241
ColorSpinorField * r_coarse
Definition: multigrid.h:232
QudaInverterType coarse_solver[QUDA_MAX_MG_LEVEL]
Definition: test_util.cpp:1691
#define MAX_BLOCK_FLOAT_NC
bool resetTransfer
Definition: multigrid.h:187
double tol_precondition
Definition: invert_quda.h:199
DiracMatrix * matCoarseResidual
Definition: multigrid.h:262
QudaBoolean vec_store[QUDA_MAX_MG_LEVEL]
Definition: quda.h:633
QudaPrecision halo_precision
Definition: dirac_quda.h:46
void pushVerbosity(QudaVerbosity verbosity)
Push a new verbosity onto the stack.
Definition: util_quda.cpp:83
QudaPrecision precision_precondition
Definition: invert_quda.h:151
QudaBoolean run_oblique_proj_check
Definition: quda.h:624
Dirac * diracCoarseSmootherSloppy
Definition: multigrid.h:259
void setCommDim(const int commDim_[QUDA_MAX_DIM]) const
Enable / disable communications for the Dirac operator.
Definition: dirac_quda.h:145
QudaFieldLocation location[QUDA_MAX_MG_LEVEL]
Definition: quda.h:598
enum QudaSiteSubset_s QudaSiteSubset
QudaPrecision precision
Definition: invert_quda.h:142
int smoother_schwarz_cycle[QUDA_MAX_MG_LEVEL]
Definition: quda.h:582
char vec_outfile[QUDA_MAX_MG_LEVEL][256]
Definition: quda.h:636
QudaBoolean run_low_mode_check
Definition: quda.h:621
char vec_outfile[256]
Definition: quda.h:462
double smoother_tol
Definition: multigrid.h:69
double coarse_solver_ca_lambda_max[QUDA_MAX_MG_LEVEL]
Definition: quda.h:558
static int commDim[QUDA_MAX_DIM]
Definition: dslash_pack.cuh:9
cpuColorSpinorField * out
QudaPrecision cuda_prec_precondition
Definition: quda.h:217
QudaSolutionType coarse_grid_solution_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:586
TimeProfile profile
Definition: multigrid.h:196
int Ncolor
Definition: blas_test.cu:46
virtual ~MG()
Definition: multigrid.cpp:546
double flops() const
Return the total flops done on this and all coarser levels.
Definition: multigrid.cpp:597
int geo_block_size[QUDA_MAX_MG_LEVEL][QUDA_MAX_DIM]
Definition: quda.h:486
QudaBoolean generate_all_levels
Definition: quda.h:615
RNG * rng
Definition: multigrid.h:271
void * preconditioner
Definition: invert_quda.h:33
#define QUDA_MAX_MG_LEVEL
Maximum number of multi-grid levels. This number may be increased if needed.
__shared__ float s[]
QudaPrecision Precision() const
Definition: lattice_field.h:58
int NblockOrtho
Definition: multigrid.h:48
int coarse_solver_ca_basis_size[QUDA_MAX_MG_LEVEL]
Definition: quda.h:552
QudaCABasis coarse_solver_ca_basis[QUDA_MAX_MG_LEVEL]
Definition: quda.h:549
#define printfQuda(...)
Definition: util_quda.h:115
void createCoarseDirac()
Create the coarse dirac operator.
Definition: multigrid.cpp:334
char vec_infile[QUDA_MAX_MG_LEVEL][256]
Definition: quda.h:630
void xpy(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:33
QudaTwistFlavorType TwistFlavor() const
Transfer * transfer
Definition: multigrid.h:184
int transfer
Definition: covdev_test.cpp:55
int setup_maxiter_refresh[QUDA_MAX_MG_LEVEL]
Definition: quda.h:516
ColorSpinorField * tmp2
Definition: dirac_quda.h:42
virtual void PrintVector(unsigned int x) const =0
Solver * postsmoother
Definition: multigrid.h:190
QudaUseInitGuess use_init_guess
Definition: invert_quda.h:64
void popVerbosity()
Pop the verbosity restoring the prior one on the stack.
Definition: util_quda.cpp:94
QudaPrecision precision_sloppy
Definition: invert_quda.h:145
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
MG(MGParam &param, TimeProfile &profile)
Definition: multigrid.cpp:14
QudaComputeNullVector compute_null_vector
Definition: quda.h:612
void createCoarseSolver()
Create the solver wrapper.
Definition: multigrid.cpp:436
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
virtual void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType) const =0
QudaPrecision Precision() const
QudaSolutionType coarse_grid_solution_type
Definition: multigrid.h:91
QudaDagType dagger
Definition: test_util.cpp:1620
MG * coarse
Definition: multigrid.h:205
int setup_ca_basis_size[QUDA_MAX_MG_LEVEL]
Definition: quda.h:522
double coarse_solver_ca_lambda_min[QUDA_MAX_MG_LEVEL]
Definition: quda.h:555
QudaParity parity
Definition: covdev_test.cpp:54
double flops() const
Definition: transfer.cpp:387
QudaPrecision prec
Definition: test_util.cpp:1608
const ColorSpinorField & Vectors(QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION) const
Definition: transfer.h:205
Solver * coarse_solver
Definition: multigrid.h:208
QudaMultigridCycleType cycle_type
Definition: multigrid.h:72
__device__ __host__ void zero(vector_type< scalar, n > &v)
Definition: cub_helper.cuh:54
QudaInverterType setup_inv_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:504
QudaMatPCType matpc_type
Definition: quda.h:206
char vec_infile[256]
Definition: quda.h:459
const Dirac * Expose() const
Definition: dirac_quda.h:1135
ColorSpinorField * tmp1
Definition: dirac_quda.h:41
DiracMatrix * matResidual
Definition: multigrid.h:78
void spinorNoise(ColorSpinorField &src, RNG &randstates, QudaNoiseType type)
Generate a random noise spinor. This variant allows the user to manage the RNG state.
ColorSpinorField * x_coarse
Definition: multigrid.h:235
QudaInvertParam * invert_param
Definition: quda.h:478
static void dot(sFloat *res, gFloat *a, sFloat *b)
Definition: dslash_util.h:56
void comm_barrier(void)
Definition: comm_mpi.cpp:326
void reset()
for resetting the Transfer when the null vectors have changed
Definition: transfer.cpp:182