QUDA  v1.1.0
A library for QCD on GPUs
multigrid.cpp
Go to the documentation of this file.
1 #include <cstring>
2 
3 #include <multigrid.h>
4 #include <vector_io.h>
5 
6 // for building the KD inverse op
8 
9 namespace quda
10 {
11 
12  using namespace blas;
13 
14  static bool debug = false;
15 
16  MG::MG(MGParam &param, TimeProfile &profile_global) :
17  Solver(*param.matResidual, *param.matSmooth, *param.matSmoothSloppy, *param.matSmoothSloppy, param, profile),
18  param(param),
19  transfer(0),
20  resetTransfer(false),
21  presmoother(nullptr),
22  postsmoother(nullptr),
23  profile_global(profile_global),
24  profile("MG level " + std::to_string(param.level), false),
25  coarse(nullptr),
26  coarse_solver(nullptr),
27  param_coarse(nullptr),
28  param_presmooth(nullptr),
29  param_postsmooth(nullptr),
30  param_coarse_solver(nullptr),
31  r(nullptr),
32  b_tilde(nullptr),
33  r_coarse(nullptr),
34  x_coarse(nullptr),
35  tmp_coarse(nullptr),
36  tmp2_coarse(nullptr),
37  xInvKD(nullptr),
38  diracResidual(param.matResidual->Expose()),
39  diracSmoother(param.matSmooth->Expose()),
40  diracSmootherSloppy(param.matSmoothSloppy->Expose()),
41  diracCoarseResidual(nullptr),
42  diracCoarseSmoother(nullptr),
43  diracCoarseSmootherSloppy(nullptr),
44  matCoarseResidual(nullptr),
45  matCoarseSmoother(nullptr),
46  matCoarseSmootherSloppy(nullptr),
47  rng(nullptr)
48  {
49  sprintf(prefix, "MG level %d (%s): ", param.level, param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
50  pushLevel(param.level);
51 
52  if (param.level >= QUDA_MAX_MG_LEVEL)
53  errorQuda("Level=%d is greater than limit of multigrid recursion depth", param.level);
54 
55  if (param.coarse_grid_solution_type == QUDA_MATPC_SOLUTION && param.smoother_solve_type != QUDA_DIRECT_PC_SOLVE)
56  errorQuda("Cannot use preconditioned coarse grid solution without preconditioned smoother solve");
57 
58  if (param.mg_global.location[param.level] == QUDA_CPU_FIELD_LOCATION
59  || param.mg_global.location[param.level + 1] == QUDA_CPU_FIELD_LOCATION)
60  errorQuda("CPU solver location for MG is disabled");
61 
62  // allocating vectors
63  {
64  // create residual vectors
67  csParam.location = param.location;
68  csParam.setPrecision(param.mg_global.invert_param->cuda_prec_sloppy, QUDA_INVALID_PRECISION,
69  csParam.location == QUDA_CUDA_FIELD_LOCATION ? true : false);
70  if (csParam.location==QUDA_CUDA_FIELD_LOCATION) {
72  }
73  if (param.B[0]->Nspin() == 1) csParam.gammaBasis = param.B[0]->GammaBasis(); // hack for staggered to avoid unnecessary basis checks
75 
76  // if we're using preconditioning then allocate storage for the preconditioned source vector
77  if (param.smoother_solve_type == QUDA_DIRECT_PC_SOLVE) {
78  csParam.x[0] /= 2;
79  csParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
81  }
82  }
83 
84  rng = new RNG(*param.B[0], 1234);
85  rng->Init();
86 
87  if (param.transfer_type == QUDA_TRANSFER_AGGREGATE) {
88  if (param.level < param.Nlevel - 1) {
89  if (param.mg_global.compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_YES) {
90  if (param.mg_global.generate_all_levels == QUDA_BOOLEAN_TRUE || param.level == 0) {
91 
92  // Initializing to random vectors
93  for (int i = 0; i < (int)param.B.size(); i++) {
95  *param.B[i] = *r;
96  }
97  }
98  if (param.mg_global.num_setup_iter[param.level] > 0) {
99  if (param.mg_global.vec_load[param.level] == QUDA_BOOLEAN_TRUE
100  && strcmp(param.mg_global.vec_infile[param.level], "")
101  != 0) { // only load if infile is defined and not computing
102  loadVectors(param.B);
103  } else if (param.mg_global.use_eig_solver[param.level]) {
104  generateEigenVectors(); // Run the eigensolver
105  } else {
107  }
108  }
109  } else if (strcmp(param.mg_global.vec_infile[param.level], "")
110  != 0) { // only load if infile is defined and not computing
111  if (param.mg_global.num_setup_iter[param.level] > 0) generateNullVectors(param.B);
112  } else if (param.mg_global.vec_load[param.level] == QUDA_BOOLEAN_TRUE) { // only conditional load of null vectors
113  loadVectors(param.B);
114  } else { // generate free field vectors
116  }
117  }
118  }
119 
120  // in case of iterative setup with MG the coarse level may be already built
121  if (!transfer) reset();
122 
123  popLevel(param.level);
124  }
125 
126  void MG::reset(bool refresh) {
127  pushLevel(param.level);
128 
129  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("%s level %d\n", transfer ? "Resetting" : "Creating", param.level);
130 
131  if (param.mg_global.setup_location[param.level + 1] == QUDA_CPU_FIELD_LOCATION)
132  errorQuda("MG setup location %d disabled", param.mg_global.setup_location[param.level + 1]);
133  if (param.mg_global.location[param.level + 1] == QUDA_CPU_FIELD_LOCATION)
134  errorQuda("MG location %d disabled", param.mg_global.location[param.level + 1]);
135 
136  destroySmoother();
138 
139  // reset the Dirac operator pointers since these may have changed
140  diracResidual = param.matResidual->Expose();
141  diracSmoother = param.matSmooth->Expose();
142  diracSmootherSloppy = param.matSmoothSloppy->Expose();
143 
144  // Only refresh if we needed to generate near-nulls, that is,
145  // if we aren't doing a staggered KD solve
146  if (param.level != 0 || param.transfer_type == QUDA_TRANSFER_AGGREGATE) {
147  // Refresh the null-space vectors if we need to
148  if (refresh && param.level < param.Nlevel - 1) {
149  if (param.mg_global.setup_maxiter_refresh[param.level]) generateNullVectors(param.B, refresh);
150  }
151  }
152 
153  // if not on the coarsest level, update next
154  if (param.level < param.Nlevel-1) {
155 
156  if (transfer) {
157  // restoring FULL parity in Transfer changed at the end of this procedure
159  if (resetTransfer || refresh) {
160  transfer->reset();
161  resetTransfer = false;
162  }
163  } else {
164  // create transfer operator
165  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Creating transfer operator\n");
166  transfer = new Transfer(param.B, param.Nvec, param.NblockOrtho, param.geoBlockSize, param.spinBlockSize,
167  param.mg_global.precision_null[param.level], param.mg_global.transfer_type[param.level],
168  profile);
169  for (int i=0; i<QUDA_MAX_MG_LEVEL; i++) param.mg_global.geo_block_size[param.level][i] = param.geoBlockSize[i];
170 
171  // create coarse temporary vector if not already created in verify()
172  if (!tmp_coarse)
173  tmp_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(),
174  param.mg_global.location[param.level + 1]);
175 
176  // create coarse temporary vector if not already created in verify()
177  if (!tmp2_coarse)
178  tmp2_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(),
179  param.mg_global.location[param.level + 1]);
180 
181  // create coarse residual vector if not already created in verify()
182  if (!r_coarse)
183  r_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(),
184  param.mg_global.location[param.level + 1]);
185 
186  // create coarse solution vector if not already created in verify()
187  if (!x_coarse)
188  x_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(),
189  param.mg_global.location[param.level + 1]);
190 
191  B_coarse = new std::vector<ColorSpinorField*>();
192  int nVec_coarse = std::max(param.Nvec, param.mg_global.n_vec[param.level + 1]);
193  B_coarse->resize(nVec_coarse);
194 
195  // only have single precision B vectors on the coarse grid
196  QudaPrecision B_coarse_precision = std::max(param.mg_global.precision_null[param.level+1], QUDA_SINGLE_PRECISION);
197  for (int i=0; i<nVec_coarse; i++)
198  (*B_coarse)[i] = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, B_coarse_precision, param.mg_global.setup_location[param.level+1]);
199 
200  // if we're not generating on all levels then we need to propagate the vectors down
201  if ((param.level != 0 || param.Nlevel - 1) && param.mg_global.generate_all_levels == QUDA_BOOLEAN_FALSE) {
202  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Restricting null space vectors\n");
203  for (int i=0; i<param.Nvec; i++) {
204  zero(*(*B_coarse)[i]);
205  transfer->R(*(*B_coarse)[i], *(param.B[i]));
206  }
207  }
208  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Transfer operator done\n");
209  }
210 
211  // we no longer need the B fields for this level, can evict them to host memory
212  // (only if using managed memory and prefetching is enabled, otherwise no-op)
213  for (int i = 0; i < param.Nvec; i++) { param.B[i]->prefetch(QUDA_CPU_FIELD_LOCATION); }
214 
216  }
217 
218  // delay allocating smoother until after coarse-links have been created
219  createSmoother();
220 
221  if (param.level < param.Nlevel-1) {
222  // If enabled, verify the coarse links and fine solvers were correctly built
223  if (param.mg_global.run_verify) verify();
224 
225  // creating or resetting the coarse level temporaries and solvers
226  if (coarse) {
227  coarse->param.updateInvertParam(*param.mg_global.invert_param);
228  coarse->param.delta = 1e-20;
230  coarse->param.matResidual = matCoarseResidual;
231  coarse->param.matSmooth = matCoarseSmoother;
232  coarse->param.matSmoothSloppy = matCoarseSmootherSloppy;
233  coarse->reset(refresh);
234  } else {
235  // create the next multigrid level
236  param_coarse = new MGParam(param, *B_coarse, matCoarseResidual, matCoarseSmoother, matCoarseSmootherSloppy,
237  param.level + 1);
238  param_coarse->fine = this;
239  param_coarse->delta = 1e-20;
241 
242  coarse = new MG(*param_coarse, profile_global);
243  }
244  setOutputPrefix(prefix); // restore since we just popped back from coarse grid
245 
247  }
248 
249  // We're going back up the coarse construct stack now, prefetch the gauge fields on
250  // this level back to device memory.
251  diracResidual->prefetch(QUDA_CUDA_FIELD_LOCATION);
252  diracSmoother->prefetch(QUDA_CUDA_FIELD_LOCATION);
253  diracSmootherSloppy->prefetch(QUDA_CUDA_FIELD_LOCATION);
254 
255  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Setup of level %d done\n", param.level);
256 
257  popLevel(param.level);
258  }
259 
260  void MG::pushLevel(int level) const
261  {
262  postTrace();
263  pushVerbosity(param.mg_global.verbosity[level]);
264  pushOutputPrefix(prefix);
265  }
266 
267  void MG::popLevel(int level) const
268  {
269  popVerbosity();
270  popOutputPrefix();
271  postTrace();
272  }
273 
275  {
276  pushLevel(param.level);
277 
278  if (presmoother) {
279  delete presmoother;
280  presmoother = nullptr;
281  }
282 
283  if (param_presmooth) {
284  delete param_presmooth;
285  param_presmooth = nullptr;
286  }
287 
288  if (postsmoother) {
289  delete postsmoother;
290  postsmoother = nullptr;
291  }
292 
293  if (param_postsmooth) {
294  delete param_postsmooth;
295  param_postsmooth = nullptr;
296  }
297 
298  popLevel(param.level);
299  }
300 
302  pushLevel(param.level);
303 
304  // create the smoother for this level
305  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Creating smoother\n");
306  destroySmoother();
307  param_presmooth = new SolverParam(param);
308 
309  param_presmooth->is_preconditioner = false;
310  param_presmooth->preserve_source = QUDA_PRESERVE_SOURCE_NO;
311  param_presmooth->return_residual = true; // pre-smoother returns the residual vector for subsequent coarsening
312  param_presmooth->use_init_guess = QUDA_USE_INIT_GUESS_NO;
313 
314  param_presmooth->precision = param.mg_global.invert_param->cuda_prec_sloppy;
317 
318  param_presmooth->inv_type = param.smoother;
319  param_presmooth->inv_type_precondition = QUDA_INVALID_INVERTER;
320  param_presmooth->residual_type = (param_presmooth->inv_type == QUDA_MR_INVERTER) ? QUDA_INVALID_RESIDUAL : QUDA_L2_RELATIVE_RESIDUAL;
321  param_presmooth->Nsteps = param.mg_global.smoother_schwarz_cycle[param.level];
322  param_presmooth->maxiter = (param.level < param.Nlevel-1) ? param.nu_pre : param.nu_pre + param.nu_post;
323 
324  param_presmooth->Nkrylov = param_presmooth->maxiter;
325  param_presmooth->pipeline = param_presmooth->maxiter;
326  param_presmooth->tol = param.smoother_tol;
327  param_presmooth->global_reduction = param.global_reduction;
328 
329  param_presmooth->sloppy_converge = true; // this means we don't check the true residual before declaring convergence
330 
331  param_presmooth->schwarz_type = param.mg_global.smoother_schwarz_type[param.level];
332  // inner solver should recompute the true residual after each cycle if using Schwarz preconditioning
333  param_presmooth->compute_true_res = (param_presmooth->schwarz_type != QUDA_INVALID_SCHWARZ) ? true : false;
334 
335  presmoother = ((param.level < param.Nlevel - 1 || param_presmooth->schwarz_type != QUDA_INVALID_SCHWARZ)
336  && param_presmooth->inv_type != QUDA_INVALID_INVERTER && param_presmooth->maxiter > 0) ?
337  Solver::create(*param_presmooth, *param.matSmooth, *param.matSmoothSloppy, *param.matSmoothSloppy,
338  *param.matSmoothSloppy, profile) :
339  nullptr;
340  if (param.level < param.Nlevel - 1) { // Create the post smoother
341  param_postsmooth = new SolverParam(*param_presmooth);
342  param_postsmooth->return_residual = false; // post smoother does not need to return the residual vector
343  param_postsmooth->use_init_guess = QUDA_USE_INIT_GUESS_YES;
344 
345  param_postsmooth->maxiter = param.nu_post;
346  param_postsmooth->Nkrylov = param_postsmooth->maxiter;
347  param_postsmooth->pipeline = param_postsmooth->maxiter;
348 
349  // we never need to compute the true residual for a post smoother
350  param_postsmooth->compute_true_res = false;
351 
352  postsmoother = (param_postsmooth->inv_type != QUDA_INVALID_INVERTER && param_postsmooth->maxiter > 0) ?
353  Solver::create(*param_postsmooth, *param.matSmooth, *param.matSmoothSloppy, *param.matSmoothSloppy,
354  *param.matSmoothSloppy, profile) :
355  nullptr;
356  }
357  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Smoother done\n");
358 
359  popLevel(param.level);
360  }
361 
363  pushLevel(param.level);
364 
365  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Creating coarse Dirac operator\n");
366 
367  // check if we are coarsening the preconditioned system then
368  bool preconditioned_coarsen = (param.coarse_grid_solution_type == QUDA_MATPC_SOLUTION && param.smoother_solve_type == QUDA_DIRECT_PC_SOLVE);
370 
371  // use even-odd preconditioning for the coarse grid solver
372  if (diracCoarseResidual) delete diracCoarseResidual;
373  if (diracCoarseSmoother) delete diracCoarseSmoother;
374  if (diracCoarseSmootherSloppy) delete diracCoarseSmootherSloppy;
375 
376  // custom setup for the staggered KD ops
377  if (param.level == 0 && param.mg_global.transfer_type[param.level] == QUDA_TRANSFER_OPTIMIZED_KD) {
378  auto dirac_type = diracSmoother->getDiracType();
379 
380  auto smoother_solve_type = param.mg_global.smoother_solve_type[param.level + 1];
382  errorQuda("Invalid solve type %d for optimized KD operator", smoother_solve_type);
383  }
384 
385  // Allocate and build the KD inverse block (inverse coarse clover)
386  auto fine_dirac_type = diracSmoother->getDiracType();
387  if (fine_dirac_type != dirac_type)
388  errorQuda("Input dirac type %d does not match smoother type %d\n", dirac_type, fine_dirac_type);
389 
390  cudaGaugeField *fine_gauge = nullptr;
391  if (dirac_type == QUDA_STAGGERED_DIRAC || dirac_type == QUDA_STAGGEREDPC_DIRAC)
392  fine_gauge
393  = const_cast<cudaGaugeField *>(reinterpret_cast<const DiracStaggered *>(diracSmoother)->getGaugeField());
394  if (dirac_type == QUDA_ASQTAD_DIRAC || dirac_type == QUDA_ASQTADPC_DIRAC)
395  fine_gauge = const_cast<cudaGaugeField *>(
396  reinterpret_cast<const DiracImprovedStaggered *>(diracSmoother)->getFatLinkField());
397 
398  xInvKD = AllocateAndBuildStaggeredKahlerDiracInverse(*fine_gauge, diracSmoother->Mass(),
400 
401  DiracParam diracParamKD;
402  diracParamKD.kappa
403  = -1.0; // Cancels automatic kappa in Y field application, which may be relevant if it propagates down
404  diracParamKD.mass = diracSmoother->Mass();
405  diracParamKD.mu = diracSmoother->Mu(); // doesn't matter
406  diracParamKD.mu_factor = 1.0; // doesn't matter
407  diracParamKD.dagger = QUDA_DAG_NO;
408  diracParamKD.matpcType = QUDA_MATPC_EVEN_EVEN; // I guess we could hack this for left vs right block Jacobi?
409  diracParamKD.gauge = const_cast<cudaGaugeField *>(fine_gauge);
410  diracParamKD.xInvKD = xInvKD;
411 
412  diracParamKD.tmp1 = tmp_coarse;
413  diracParamKD.tmp2 = tmp2_coarse;
414 
415  if (dirac_type == QUDA_STAGGERED_DIRAC || dirac_type == QUDA_STAGGEREDPC_DIRAC) {
416  diracParamKD.type = QUDA_STAGGEREDKD_DIRAC;
417 
418  diracCoarseResidual = new DiracStaggeredKD(diracParamKD);
419  diracCoarseSmoother = new DiracStaggeredKD(diracParamKD);
420  diracCoarseSmootherSloppy = new DiracStaggeredKD(diracParamKD);
421  } else if (dirac_type == QUDA_ASQTAD_DIRAC || dirac_type == QUDA_ASQTADPC_DIRAC) {
422  diracParamKD.type = QUDA_ASQTADKD_DIRAC;
423 
424  diracParamKD.fatGauge = fine_gauge;
425  diracParamKD.longGauge = const_cast<cudaGaugeField *>(
426  reinterpret_cast<const DiracImprovedStaggered *>(diracSmoother)->getLongLinkField());
427 
428  diracCoarseResidual = new DiracImprovedStaggeredKD(diracParamKD);
429  diracCoarseSmoother = new DiracImprovedStaggeredKD(diracParamKD);
430  diracCoarseSmootherSloppy = new DiracImprovedStaggeredKD(diracParamKD);
431  } else {
432  errorQuda("Invalid dirac_type %d", dirac_type);
433  }
434 
435  } else {
436 
437  // create coarse grid operator
438  DiracParam diracParam;
439  diracParam.transfer = transfer;
440 
441  // Parameters that matter for coarse construction and application
442  diracParam.dirac = preconditioned_coarsen ? const_cast<Dirac *>(diracSmoother) : const_cast<Dirac *>(diracResidual);
443  diracParam.kappa = (param.B[0]->Nspin() == 1) ?
444  -1.0 :
445  diracParam.dirac->Kappa(); // -1 cancels automatic kappa in application of Y fields
446  diracParam.mass = diracParam.dirac->Mass();
447  diracParam.mu = diracParam.dirac->Mu();
448  diracParam.mu_factor = param.mg_global.mu_factor[param.level + 1] - param.mg_global.mu_factor[param.level];
449 
450  // Need to figure out if we need to force bi-directional build. If any previous level (incl this one) was
451  // preconditioned, we have to force bi-directional builds.
453  for (int i = 0; i <= param.level; i++) {
457  }
458  }
459 
460  diracParam.dagger = QUDA_DAG_NO;
461  diracParam.matpcType = matpc_type;
462  diracParam.type = QUDA_COARSE_DIRAC;
463  diracParam.tmp1 = tmp_coarse;
464  diracParam.tmp2 = tmp2_coarse;
465  diracParam.halo_precision = param.mg_global.precision_null[param.level];
466  diracParam.use_mma = param.use_mma;
467 
468  diracCoarseResidual = new DiracCoarse(diracParam, param.setup_location == QUDA_CUDA_FIELD_LOCATION ? true : false,
469  param.mg_global.setup_minimize_memory == QUDA_BOOLEAN_TRUE ? true : false);
470 
471  // create smoothing operators
472  diracParam.dirac = const_cast<Dirac *>(param.matSmooth->Expose());
473  diracParam.halo_precision = param.mg_global.smoother_halo_precision[param.level + 1];
474 
475  if (param.mg_global.smoother_solve_type[param.level + 1] == QUDA_DIRECT_PC_SOLVE) {
476  diracParam.type = QUDA_COARSEPC_DIRAC;
477  diracParam.tmp1 = &(tmp_coarse->Even());
478  diracParam.tmp2 = &(tmp2_coarse->Even());
479  diracCoarseSmoother = new DiracCoarsePC(static_cast<DiracCoarse &>(*diracCoarseResidual), diracParam);
480  {
481  bool schwarz = param.mg_global.smoother_schwarz_type[param.level + 1] != QUDA_INVALID_SCHWARZ;
482  for (int i = 0; i < 4; i++) diracParam.commDim[i] = schwarz ? 0 : 1;
483  }
484  diracCoarseSmootherSloppy = new DiracCoarsePC(static_cast<DiracCoarse &>(*diracCoarseSmoother), diracParam);
485  } else {
486  diracParam.type = QUDA_COARSE_DIRAC;
487  diracParam.tmp1 = tmp_coarse;
488  diracParam.tmp2 = tmp2_coarse;
489  diracCoarseSmoother = new DiracCoarse(static_cast<DiracCoarse &>(*diracCoarseResidual), diracParam);
490  {
491  bool schwarz = param.mg_global.smoother_schwarz_type[param.level + 1] != QUDA_INVALID_SCHWARZ;
492  for (int i = 0; i < 4; i++) diracParam.commDim[i] = schwarz ? 0 : 1;
493  }
494  diracCoarseSmootherSloppy = new DiracCoarse(static_cast<DiracCoarse &>(*diracCoarseSmoother), diracParam);
495  }
496  }
497 
498  if (matCoarseResidual) delete matCoarseResidual;
499  if (matCoarseSmoother) delete matCoarseSmoother;
500  if (matCoarseSmootherSloppy) delete matCoarseSmootherSloppy;
501  matCoarseResidual = new DiracM(*diracCoarseResidual);
502  matCoarseSmoother = new DiracM(*diracCoarseSmoother);
503  matCoarseSmootherSloppy = new DiracM(*diracCoarseSmootherSloppy);
504 
505  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Coarse Dirac operator done\n");
506 
507  popLevel(param.level);
508  }
509 
511  pushLevel(param.level);
512 
513  if (param.cycle_type == QUDA_MG_CYCLE_VCYCLE && param.level < param.Nlevel-2) {
514  // nothing to do
515  } else if (param.cycle_type == QUDA_MG_CYCLE_RECURSIVE || param.level == param.Nlevel-2) {
516  if (coarse_solver) {
517  auto &coarse_solver_inner = reinterpret_cast<PreconditionedSolver *>(coarse_solver)->ExposeSolver();
518  // int defl_size = coarse_solver_inner.evecs.size();
519  int defl_size = coarse_solver_inner.deflationSpaceSize();
520  if (defl_size > 0 && transfer && param.mg_global.preserve_deflation) {
521  // Deflation space exists and we are going to create a new solver. Extract deflation space.
522  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Extracting deflation space size %d to MG\n", defl_size);
523  coarse_solver_inner.extractDeflationSpace(evecs);
524  }
525  delete coarse_solver;
526  coarse_solver = nullptr;
527  }
528  if (param_coarse_solver) {
529  delete param_coarse_solver;
530  param_coarse_solver = nullptr;
531  }
532  } else {
533  errorQuda("Multigrid cycle type %d not supported", param.cycle_type);
534  }
535 
536  popLevel(param.level);
537  }
538 
540  pushLevel(param.level);
541 
542  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Creating coarse solver wrapper\n");
544  if (param.cycle_type == QUDA_MG_CYCLE_VCYCLE && param.level < param.Nlevel-2) {
545  // if coarse solver is not a bottom solver and on the second to bottom level then we can just use the coarse solver as is
546  coarse_solver = coarse;
547  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Assigned coarse solver to coarse MG operator\n");
548  } else if (param.cycle_type == QUDA_MG_CYCLE_RECURSIVE || param.level == param.Nlevel-2) {
549 
550  param_coarse_solver = new SolverParam(param);
551  param_coarse_solver->inv_type = param.mg_global.coarse_solver[param.level + 1];
552  param_coarse_solver->is_preconditioner = false;
553  param_coarse_solver->sloppy_converge = true; // this means we don't check the true residual before declaring convergence
554 
555  param_coarse_solver->preserve_source = QUDA_PRESERVE_SOURCE_NO; // or can this be no
556  param_coarse_solver->return_residual = false; // coarse solver does need to return residual vector
557 
558  param_coarse_solver->use_init_guess = QUDA_USE_INIT_GUESS_NO;
559  // Coarse level deflation is triggered if the eig param structure exists
560  // on the coarsest level, and we are on the next to coarsest level.
561  if (param.mg_global.use_eig_solver[param.Nlevel - 1] && (param.level == param.Nlevel - 2)) {
562  param_coarse_solver->eig_param = *param.mg_global.eig_param[param.Nlevel - 1];
563  param_coarse_solver->deflate = QUDA_BOOLEAN_TRUE;
564  // Due to coherence between these levels, an initial guess
565  // might be beneficial.
567  param_coarse_solver->use_init_guess = QUDA_USE_INIT_GUESS_YES;
568  }
569 
570  // Deflation on the coarse is supported for 6 solvers only
571  if (param_coarse_solver->inv_type != QUDA_CA_CGNR_INVERTER && param_coarse_solver->inv_type != QUDA_CGNR_INVERTER
572  && param_coarse_solver->inv_type != QUDA_CA_CGNE_INVERTER
573  && param_coarse_solver->inv_type != QUDA_CGNE_INVERTER && param_coarse_solver->inv_type != QUDA_CA_GCR_INVERTER
574  && param_coarse_solver->inv_type != QUDA_GCR_INVERTER) {
575  errorQuda("Coarse grid deflation not supported with coarse solver %d", param_coarse_solver->inv_type);
576  }
577 
578  if (strcmp(param_coarse_solver->eig_param.vec_infile, "") == 0 && // check that input file not already set
579  param.mg_global.vec_load[param.level + 1] == QUDA_BOOLEAN_TRUE
580  && (strcmp(param.mg_global.vec_infile[param.level + 1], "") != 0)) {
581  std::string vec_infile(param.mg_global.vec_infile[param.level + 1]);
582  vec_infile += "_level_";
583  vec_infile += std::to_string(param.level + 1);
584  vec_infile += "_defl_";
585  vec_infile += std::to_string(param.mg_global.n_vec[param.level + 1]);
586  strcpy(param_coarse_solver->eig_param.vec_infile, vec_infile.c_str());
587  }
588 
589  if (strcmp(param_coarse_solver->eig_param.vec_outfile, "") == 0 && // check that output file not already set
590  param.mg_global.vec_store[param.level + 1] == QUDA_BOOLEAN_TRUE
591  && (strcmp(param.mg_global.vec_outfile[param.level + 1], "") != 0)) {
592  std::string vec_outfile(param.mg_global.vec_outfile[param.level + 1]);
593  vec_outfile += "_level_";
594  vec_outfile += std::to_string(param.level + 1);
595  vec_outfile += "_defl_";
596  vec_outfile += std::to_string(param.mg_global.n_vec[param.level + 1]);
597  strcpy(param_coarse_solver->eig_param.vec_outfile, vec_outfile.c_str());
598  }
599  }
600 
601  param_coarse_solver->tol = param.mg_global.coarse_solver_tol[param.level+1];
602  param_coarse_solver->global_reduction = true;
603  param_coarse_solver->compute_true_res = false;
604  param_coarse_solver->delta = 1e-8;
605  param_coarse_solver->pipeline = 8;
606 
607  param_coarse_solver->maxiter = param.mg_global.coarse_solver_maxiter[param.level+1];
608  param_coarse_solver->Nkrylov = param_coarse_solver->maxiter < param_coarse_solver->Nkrylov ?
609  param_coarse_solver->maxiter :
610  param_coarse_solver->Nkrylov;
611  if (param_coarse_solver->inv_type == QUDA_CA_CG_INVERTER ||
612  param_coarse_solver->inv_type == QUDA_CA_CGNE_INVERTER ||
613  param_coarse_solver->inv_type == QUDA_CA_CGNR_INVERTER ||
614  param_coarse_solver->inv_type == QUDA_CA_GCR_INVERTER) {
615  param_coarse_solver->ca_basis = param.mg_global.coarse_solver_ca_basis[param.level+1];
616  param_coarse_solver->ca_lambda_min = param.mg_global.coarse_solver_ca_lambda_min[param.level+1];
617  param_coarse_solver->ca_lambda_max = param.mg_global.coarse_solver_ca_lambda_max[param.level+1];
618  param_coarse_solver->Nkrylov = param.mg_global.coarse_solver_ca_basis_size[param.level+1];
619  }
620  param_coarse_solver->inv_type_precondition = (param.level<param.Nlevel-2 || coarse->presmoother) ? QUDA_MG_INVERTER : QUDA_INVALID_INVERTER;
621  param_coarse_solver->preconditioner = (param.level<param.Nlevel-2 || coarse->presmoother) ? coarse : nullptr;
622  param_coarse_solver->mg_instance = true;
623  param_coarse_solver->verbosity_precondition = param.mg_global.verbosity[param.level+1];
624 
625  // preconditioned solver wrapper is uniform precision
626  param_coarse_solver->precision = r_coarse->Precision();
627  param_coarse_solver->precision_sloppy = param_coarse_solver->precision;
628  param_coarse_solver->precision_precondition = param_coarse_solver->precision_sloppy;
629 
631  Solver *solver = Solver::create(*param_coarse_solver, *matCoarseSmoother, *matCoarseSmoother,
632  *matCoarseSmoother, *matCoarseSmoother, profile);
633  sprintf(coarse_prefix, "MG level %d (%s): ", param.level + 1,
634  param.mg_global.location[param.level + 1] == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
635  coarse_solver = new PreconditionedSolver(*solver, *matCoarseSmoother->Expose(), *param_coarse_solver, profile,
636  coarse_prefix);
637  } else {
638  Solver *solver = Solver::create(*param_coarse_solver, *matCoarseResidual, *matCoarseResidual,
639  *matCoarseResidual, *matCoarseResidual, profile);
640  sprintf(coarse_prefix, "MG level %d (%s): ", param.level + 1,
641  param.mg_global.location[param.level + 1] == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
642  coarse_solver = new PreconditionedSolver(*solver, *matCoarseResidual->Expose(), *param_coarse_solver, profile, coarse_prefix);
643  }
644 
645  setOutputPrefix(prefix); // restore since we just popped back from coarse grid
646 
647  if (param.level == param.Nlevel - 2 && param.mg_global.use_eig_solver[param.level + 1]) {
648 
649  // Test if a coarse grid deflation space needs to be transferred to the coarse solver to prevent recomputation
650  int defl_size = evecs.size();
651  auto &coarse_solver_inner = reinterpret_cast<PreconditionedSolver *>(coarse_solver)->ExposeSolver();
652  if (defl_size > 0 && transfer && param.mg_global.preserve_deflation) {
653  // We shall not recompute the deflation space, we shall transfer
654  // vectors stored in the parent MG instead
655  coarse_solver_inner.setDeflateCompute(false);
656  coarse_solver_inner.setRecomputeEvals(true);
657  if (getVerbosity() >= QUDA_VERBOSE)
658  printfQuda("Transferring deflation space size %d to coarse solver\n", defl_size);
659  // Create space in coarse solver to hold deflation space, destroy space in MG.
660  coarse_solver_inner.injectDeflationSpace(evecs);
661  }
662 
663  // Run a dummy solve so that the deflation space is constructed and computed if needed during the MG setup,
664  // or the eigenvalues are recomputed during transfer.
665  spinorNoise(*r_coarse, *coarse->rng, QUDA_NOISE_UNIFORM);
666  param_coarse_solver->maxiter = 1; // do a single iteration on the dummy solve
667  (*coarse_solver)(*x_coarse, *r_coarse);
668  setOutputPrefix(prefix); // restore since we just popped back from coarse grid
669  param_coarse_solver->maxiter = param.mg_global.coarse_solver_maxiter[param.level + 1];
670  }
671 
672  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Assigned coarse solver to preconditioned GCR solver\n");
673  } else {
674  errorQuda("Multigrid cycle type %d not supported", param.cycle_type);
675  }
676  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Coarse solver wrapper done\n");
677 
678  popLevel(param.level);
679  }
680 
682  {
683  pushLevel(param.level);
684 
685  if (param.level < param.Nlevel - 1) {
686  if (coarse) delete coarse;
687  if (param.level == param.Nlevel-1 || param.cycle_type == QUDA_MG_CYCLE_RECURSIVE) {
688  if (coarse_solver) delete coarse_solver;
689  if (param_coarse_solver) delete param_coarse_solver;
690  }
691 
692  if (B_coarse) {
693  int nVec_coarse = std::max(param.Nvec, param.mg_global.n_vec[param.level + 1]);
694  for (int i = 0; i < nVec_coarse; i++)
695  if ((*B_coarse)[i]) delete (*B_coarse)[i];
696  delete B_coarse;
697  }
698  if (transfer) delete transfer;
699  if (matCoarseSmootherSloppy) delete matCoarseSmootherSloppy;
700  if (diracCoarseSmootherSloppy) delete diracCoarseSmootherSloppy;
701  if (matCoarseSmoother) delete matCoarseSmoother;
702  if (diracCoarseSmoother) delete diracCoarseSmoother;
703  if (matCoarseResidual) delete matCoarseResidual;
704  if (diracCoarseResidual) delete diracCoarseResidual;
705  if (postsmoother) delete postsmoother;
706  if (param_postsmooth) delete param_postsmooth;
707  }
708 
709  if (rng) {
710  rng->Release();
711  delete rng;
712  }
713 
714  if (presmoother) delete presmoother;
715  if (param_presmooth) delete param_presmooth;
716 
717  if (b_tilde && param.smoother_solve_type == QUDA_DIRECT_PC_SOLVE) delete b_tilde;
718  if (r) delete r;
719  if (r_coarse) delete r_coarse;
720  if (x_coarse) delete x_coarse;
721  if (tmp_coarse) delete tmp_coarse;
722  if (tmp2_coarse) delete tmp2_coarse;
723 
724  if (xInvKD) delete xInvKD;
725 
726  if (param_coarse) delete param_coarse;
727 
728  if (getVerbosity() >= QUDA_VERBOSE) profile.Print();
729 
730  popLevel(param.level);
731  }
732 
733  // FIXME need to make this more robust (implement Solver::flops() for all solvers)
734  double MG::flops() const {
735  double flops = 0;
736 
737  if (param_coarse_solver) {
738  flops += param_coarse_solver->gflops * 1e9;
739  param_coarse_solver->gflops = 0;
740  } else if (param.level < param.Nlevel-1) {
741  flops += coarse->flops();
742  }
743 
744  if (param_presmooth) {
745  flops += param_presmooth->gflops * 1e9;
746  param_presmooth->gflops = 0;
747  }
748 
749  if (param_postsmooth) {
750  flops += param_postsmooth->gflops * 1e9;
751  param_postsmooth->gflops = 0;
752  }
753 
754  if (transfer) {
755  flops += transfer->flops();
756  }
757 
758  return flops;
759  }
760 
764  void MG::verify(bool recursively)
765  {
766  pushLevel(param.level);
767 
768  // temporary fields used for verification
773  double deviation;
774 
775  QudaPrecision prec = (param.mg_global.precision_null[param.level] < csParam.Precision()) ?
776  param.mg_global.precision_null[param.level] :
777  csParam.Precision();
778 
779  // may want to revisit this---these were relaxed for cases where ghost_precision < precision
780  // these were set while hacking in tests of quarter precision ghosts
781  double tol = (prec == QUDA_QUARTER_PRECISION || prec == QUDA_HALF_PRECISION) ? 5e-2 : prec == QUDA_SINGLE_PRECISION ? 1e-3 : 1e-8;
782 
783  // No need to check (projector) v_k for staggered case
784  if (param.transfer_type == QUDA_TRANSFER_AGGREGATE) {
785 
786  if (getVerbosity() >= QUDA_SUMMARIZE)
787  printfQuda("Checking 0 = (1 - P P^\\dagger) v_k for %d vectors\n", param.Nvec);
788 
789  for (int i = 0; i < param.Nvec; i++) {
790  // as well as copying to the correct location this also changes basis if necessary
791  *tmp1 = *param.B[i];
792 
793  transfer->R(*r_coarse, *tmp1);
794  transfer->P(*tmp2, *r_coarse);
795  deviation = sqrt(xmyNorm(*tmp1, *tmp2) / norm2(*tmp1));
796 
797  if (getVerbosity() >= QUDA_VERBOSE)
798  printfQuda(
799  "Vector %d: norms v_k = %e P^\\dagger v_k = %e (1 - P P^\\dagger) v_k = %e, L2 relative deviation = %e\n",
800  i, norm2(*tmp1), norm2(*r_coarse), norm2(*tmp2), deviation);
801  if (deviation > tol) errorQuda("L2 relative deviation for k=%d failed, %e > %e", i, deviation, tol);
802  }
803 
804  // the oblique check
805  if (param.mg_global.run_oblique_proj_check) {
806  sprintf(prefix, "MG level %d (%s): Null vector Oblique Projections : ", param.level + 1,
807  param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
808  setOutputPrefix(prefix);
809 
810  // Oblique projections
811  if (getVerbosity() >= QUDA_SUMMARIZE)
812  printfQuda("Checking 1 > || (1 - DP(P^dagDP)P^dag) v_k || / || v_k || for %d vectors\n", param.Nvec);
813 
814  for (int i = 0; i < param.Nvec; i++) {
815  transfer->R(*r_coarse, *(param.B[i]));
816  (*coarse_solver)(*x_coarse, *r_coarse); // this needs to be an exact solve to pass
817  setOutputPrefix(prefix); // restore prefix after return from coarse grid
818  transfer->P(*tmp2, *x_coarse);
819  (*param.matResidual)(*tmp1, *tmp2);
820  *tmp2 = *(param.B[i]);
821  if (getVerbosity() >= QUDA_SUMMARIZE) {
822  printfQuda("Vector %d: norms %e %e\n", i, norm2(*param.B[i]), norm2(*tmp1));
823  printfQuda("relative residual = %e\n", sqrt(xmyNorm(*tmp2, *tmp1) / norm2(*param.B[i])));
824  }
825  }
826  sprintf(prefix, "MG level %d (%s): ", param.level + 1,
827  param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
828  setOutputPrefix(prefix);
829  }
830  }
831 
832 #if 0
833 
834  if (getVerbosity() >= QUDA_SUMMARIZE)
835  printfQuda("Checking 1 > || (1 - D P (P^\\dagger D P) P^\\dagger v_k || / || v_k || for %d vectors\n",
836  param.Nvec);
837 
838  for (int i=0; i<param.Nvec; i++) {
839  transfer->R(*r_coarse, *(param.B[i]));
840  (*coarse)(*x_coarse, *r_coarse); // this needs to be an exact solve to pass
841  setOutputPrefix(prefix); // restore output prefix
842  transfer->P(*tmp2, *x_coarse);
843  param.matResidual(*tmp1,*tmp2);
844  *tmp2 = *(param.B[i]);
845  if (getVerbosity() >= QUDA_VERBOSE) {
846  printfQuda("Vector %d: norms %e %e ", i, norm2(*param.B[i]), norm2(*tmp1));
847  printfQuda("relative residual = %e\n", sqrt(xmyNorm(*tmp2, *tmp1) / norm2(*param.B[i])) );
848  }
849  }
850 #endif
851 
852  // We need to create temporary coarse vectors here for various verifications
853  // Otherwise these get created in the coarse `reset()` routine later
854 
855  if (!tmp_coarse)
856  tmp_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(),
857  param.mg_global.location[param.level + 1]);
858 
859  // create coarse temporary vector if not already created in verify()
860  if (!tmp2_coarse)
861  tmp2_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(),
862  param.mg_global.location[param.level + 1]);
863 
864  // create coarse residual vector if not already created in verify()
865  if (!r_coarse)
866  r_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(),
867  param.mg_global.location[param.level + 1]);
868 
869  // create coarse solution vector if not already created in verify()
870  if (!x_coarse)
871  x_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(),
872  param.mg_global.location[param.level + 1]);
873 
874  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Checking 0 = (1 - P^\\dagger P) eta_c\n");
875 
876  spinorNoise(*x_coarse, *rng, QUDA_NOISE_UNIFORM);
877 
878  transfer->P(*tmp2, *x_coarse);
879  transfer->R(*r_coarse, *tmp2);
880  if (getVerbosity() >= QUDA_VERBOSE)
881  printfQuda("L2 norms %e %e (fine tmp %e) ", norm2(*x_coarse), norm2(*r_coarse), norm2(*tmp2));
882 
883  deviation = sqrt( xmyNorm(*x_coarse, *r_coarse) / norm2(*x_coarse) );
884  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("relative deviation = %e\n", deviation);
885  if (deviation > tol ) errorQuda("L2 relative deviation = %e > %e failed", deviation, tol);
886  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Checking 0 = (D_c - P^\\dagger D P) (native coarse operator to emulated operator)\n");
887 
888  zero(*tmp_coarse);
889  zero(*r_coarse);
890 
891 #if 0 // debugging trick: point source matrix elements
892  tmp_coarse->Source(QUDA_POINT_SOURCE, 0, 0, 0);
893 #else
894  spinorNoise(*tmp_coarse, *rng, QUDA_NOISE_UNIFORM);
895 #endif
896 
897  // the three-hop terms break the verification b/c the coarse ops don't have the long links baked in
898  // need a more robust fix to this
900  && diracSmoother->getDiracType() != QUDA_ASQTAD_DIRAC && diracSmoother->getDiracType() != QUDA_ASQTADPC_DIRAC
901  && diracSmoother->getDiracType() != QUDA_ASQTADKD_DIRAC) {
902 
903  transfer->P(*tmp1, *tmp_coarse);
904 
906  double kappa = diracResidual->Kappa();
907  double mass = diracResidual->Mass();
908  if (param.level == 0) {
909  if (tmp1->Nspin() == 4) {
910  diracSmoother->DslashXpay(tmp2->Even(), tmp1->Odd(), QUDA_EVEN_PARITY, tmp1->Even(), -kappa);
911  diracSmoother->DslashXpay(tmp2->Odd(), tmp1->Even(), QUDA_ODD_PARITY, tmp1->Odd(), -kappa);
912  } else if (tmp1->Nspin() == 2) { // if the coarse op is on top
913  diracSmoother->DslashXpay(tmp2->Even(), tmp1->Odd(), QUDA_EVEN_PARITY, tmp1->Even(), 1.0);
914  diracSmoother->DslashXpay(tmp2->Odd(), tmp1->Even(), QUDA_ODD_PARITY, tmp1->Odd(), 1.0);
915  } else { // staggered
916  diracSmoother->DslashXpay(tmp2->Even(), tmp1->Odd(), QUDA_EVEN_PARITY, tmp1->Even(),
917  2.0 * mass); // stag convention
918  diracSmoother->DslashXpay(tmp2->Odd(), tmp1->Even(), QUDA_ODD_PARITY, tmp1->Odd(),
919  2.0 * mass); // stag convention
920  }
921  } else { // this is a hack since the coarse Dslash doesn't properly use the same xpay conventions yet
922  diracSmoother->DslashXpay(tmp2->Even(), tmp1->Odd(), QUDA_EVEN_PARITY, tmp1->Even(), 1.0);
923  diracSmoother->DslashXpay(tmp2->Odd(), tmp1->Even(), QUDA_ODD_PARITY, tmp1->Odd(), 1.0);
924  }
925  } else {
926  (*param.matResidual)(*tmp2, *tmp1);
927  }
928 
929  transfer->R(*x_coarse, *tmp2);
930  static_cast<DiracCoarse *>(diracCoarseResidual)->M(*r_coarse, *tmp_coarse);
931 
932 #if 0 // enable to print out emulated and actual coarse-grid operator vectors for debugging
933  setOutputPrefix("");
934 
935  for (unsigned int i=0; i<comm_size(); i++) { // this ensures that we print each rank in order
936  if (i==comm_rank()) {
937  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("emulated\n");
938  for (int x=0; x<x_coarse->Volume(); x++) x_coarse->PrintVector(x);
939 
940  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("actual\n");
941  for (int x=0; x<r_coarse->Volume(); x++) r_coarse->PrintVector(x);
942  }
943  comm_barrier();
944  }
945  setOutputPrefix(prefix);
946 #endif
947 
948  double r_nrm = norm2(*r_coarse);
949  deviation = sqrt(xmyNorm(*x_coarse, *r_coarse) / norm2(*x_coarse));
950 
951  if (diracResidual->Mu() != 0.0) {
952  // When the mu is shifted on the coarse level; we can compute exactly the error we introduce in the check:
953  // it is given by 2*kappa*delta_mu || tmp_coarse ||; where tmp_coarse is the random vector generated for the test
954  double delta_factor = param.mg_global.mu_factor[param.level + 1] - param.mg_global.mu_factor[param.level];
955  if (fabs(delta_factor) > tol) {
956  double delta_a
957  = delta_factor * 2.0 * diracResidual->Kappa() * diracResidual->Mu() * transfer->Vectors().TwistFlavor();
958  deviation -= fabs(delta_a) * sqrt(norm2(*tmp_coarse) / norm2(*x_coarse));
959  deviation = fabs(deviation);
960  }
961  }
962  if (getVerbosity() >= QUDA_VERBOSE)
963  printfQuda("L2 norms: Emulated = %e, Native = %e, relative deviation = %e\n", norm2(*x_coarse), r_nrm, deviation);
964 
965  if (deviation > tol) errorQuda("failed, deviation = %e (tol=%e)", deviation, tol);
966  } else {
967  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("...skipping check due to long links\n"); // asqtad operator only
968  }
969 
970  // check the preconditioned operator construction on the lower level if applicable
971  bool coarse_was_preconditioned = (param.mg_global.coarse_grid_solution_type[param.level + 1] == QUDA_MATPC_SOLUTION
973  if (coarse_was_preconditioned) {
974  // check eo
975  if (getVerbosity() >= QUDA_SUMMARIZE)
976  printfQuda("Checking Deo of preconditioned operator 0 = \\hat{D}_c - A^{-1} D_c\n");
977  static_cast<DiracCoarse *>(diracCoarseResidual)->Dslash(r_coarse->Even(), tmp_coarse->Odd(), QUDA_EVEN_PARITY);
978  static_cast<DiracCoarse *>(diracCoarseResidual)->CloverInv(x_coarse->Even(), r_coarse->Even(), QUDA_EVEN_PARITY);
979  static_cast<DiracCoarsePC *>(diracCoarseSmoother)->Dslash(r_coarse->Even(), tmp_coarse->Odd(), QUDA_EVEN_PARITY);
980  double r_nrm = norm2(r_coarse->Even());
981  deviation = sqrt(xmyNorm(x_coarse->Even(), r_coarse->Even()) / norm2(x_coarse->Even()));
982  if (getVerbosity() >= QUDA_VERBOSE)
983  printfQuda("L2 norms: Emulated = %e, Native = %e, relative deviation = %e\n", norm2(x_coarse->Even()), r_nrm,
984  deviation);
985  if (deviation > tol) errorQuda("failed, deviation = %e (tol=%e)", deviation, tol);
986 
987  // check Doe
988  if (getVerbosity() >= QUDA_SUMMARIZE)
989  printfQuda("Checking Doe of preconditioned operator 0 = \\hat{D}_c - A^{-1} D_c\n");
990  static_cast<DiracCoarse *>(diracCoarseResidual)->Dslash(r_coarse->Odd(), tmp_coarse->Even(), QUDA_ODD_PARITY);
991  static_cast<DiracCoarse *>(diracCoarseResidual)->CloverInv(x_coarse->Odd(), r_coarse->Odd(), QUDA_ODD_PARITY);
992  static_cast<DiracCoarsePC *>(diracCoarseSmoother)->Dslash(r_coarse->Odd(), tmp_coarse->Even(), QUDA_ODD_PARITY);
993  r_nrm = norm2(r_coarse->Odd());
994  deviation = sqrt(xmyNorm(x_coarse->Odd(), r_coarse->Odd()) / norm2(x_coarse->Odd()));
995  if (getVerbosity() >= QUDA_VERBOSE)
996  printfQuda("L2 norms: Emulated = %e, Native = %e, relative deviation = %e\n", norm2(x_coarse->Odd()), r_nrm,
997  deviation);
998  if (deviation > tol) errorQuda("failed, deviation = %e (tol=%e)", deviation, tol);
999  }
1000 
1001  // here we check that the Hermitian conjugate operator is working
1002  // as expected for both the smoother and residual Dirac operators
1004  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Checking normality of preconditioned operator\n");
1005  if (tmp2->Nspin() == 1) { // if the outer op is the staggered op, just use M.
1006  diracSmoother->M(tmp2->Even(), tmp1->Odd());
1007  } else {
1008  diracSmoother->MdagM(tmp2->Even(), tmp1->Odd());
1009  }
1010  Complex dot = cDotProduct(tmp2->Even(),tmp1->Odd());
1011  double deviation = std::fabs(dot.imag()) / std::fabs(dot.real());
1012  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Smoother normal operator test (eta^dag M^dag M eta): real=%e imag=%e, relative imaginary deviation=%e\n",
1013  real(dot), imag(dot), deviation);
1014  if (deviation > tol) errorQuda("failed, deviation = %e (tol=%e)", deviation, tol);
1015  }
1016 
1017  { // normal operator check for residual operator
1018  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Checking normality of residual operator\n");
1019  if (tmp2->Nspin() != 1 || tmp2->SiteSubset() == QUDA_FULL_SITE_SUBSET) {
1020  diracResidual->MdagM(*tmp2, *tmp1);
1021  } else {
1022  // staggered preconditioned op.
1023  diracResidual->M(*tmp2, *tmp1);
1024  }
1025  Complex dot = cDotProduct(*tmp1,*tmp2);
1026  double deviation = std::fabs(dot.imag()) / std::fabs(dot.real());
1027  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Normal operator test (eta^dag M^dag M eta): real=%e imag=%e, relative imaginary deviation=%e\n",
1028  real(dot), imag(dot), deviation);
1029  if (deviation > tol) errorQuda("failed, deviation = %e (tol=%e)", deviation, tol);
1030  }
1031 
1032  // Not useful for staggered op since it's a unitary transform
1033  if (param.transfer_type == QUDA_TRANSFER_AGGREGATE) {
1034  if (param.mg_global.run_low_mode_check) {
1035 
1036  sprintf(prefix, "MG level %d (%s): eigenvector overlap : ", param.level + 1,
1037  param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
1038  setOutputPrefix(prefix);
1039 
1040  // Reuse the space for the Null vectors. By this point,
1041  // the coarse grid has already been constructed.
1043 
1044  for (int i = 0; i < param.Nvec; i++) {
1045 
1046  // Restrict Evec, place result in r_coarse
1047  transfer->R(*r_coarse, *param.B[i]);
1048  // Prolong r_coarse, place result in tmp2
1049  transfer->P(*tmp2, *r_coarse);
1050 
1051  printfQuda("Vector %d: norms v_k = %e P^dag v_k = %e PP^dag v_k = %e\n", i, norm2(*param.B[i]),
1052  norm2(*r_coarse), norm2(*tmp2));
1053 
1054  // Compare v_k and PP^dag v_k.
1055  deviation = sqrt(xmyNorm(*param.B[i], *tmp2) / norm2(*param.B[i]));
1056  printfQuda("L2 relative deviation = %e\n", deviation);
1057 
1058  if (param.mg_global.run_oblique_proj_check) {
1059 
1060  sprintf(prefix, "MG level %d (%s): eigenvector Oblique Projections : ", param.level + 1,
1061  param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
1062  setOutputPrefix(prefix);
1063 
1064  // Oblique projections
1065  if (getVerbosity() >= QUDA_SUMMARIZE)
1066  printfQuda("Checking 1 > || (1 - DP(P^dagDP)P^dag) v_k || / || v_k || for vector %d\n", i);
1067 
1068  transfer->R(*r_coarse, *param.B[i]);
1069  (*coarse_solver)(*x_coarse, *r_coarse); // this needs to be an exact solve to pass
1070  setOutputPrefix(prefix); // restore prefix after return from coarse grid
1071  transfer->P(*tmp2, *x_coarse);
1072  (*param.matResidual)(*tmp1, *tmp2);
1073 
1074  if (getVerbosity() >= QUDA_SUMMARIZE) {
1075  printfQuda("Vector %d: norms v_k %e DP(P^dagDP)P^dag v_k %e\n", i, norm2(*param.B[i]), norm2(*tmp1));
1076  printfQuda("L2 relative deviation = %e\n", sqrt(xmyNorm(*param.B[i], *tmp1) / norm2(*param.B[i])));
1077  }
1078  }
1079 
1080  sprintf(prefix, "MG level %d (%s): ", param.level + 1,
1081  param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
1082  setOutputPrefix(prefix);
1083  }
1084  }
1085  }
1086 
1087  delete tmp1;
1088  delete tmp2;
1089 
1090  if (recursively && param.level < param.Nlevel - 2) coarse->verify(true);
1091 
1092  popLevel(param.level);
1093  }
1094 
1096  pushOutputPrefix(prefix);
1097 
1098  if (param.level < param.Nlevel - 1) { // set parity for the solver in the transfer operator
1099  QudaSiteSubset site_subset
1105  transfer->setSiteSubset(site_subset, parity); // use this to force location of transfer
1106  }
1107 
1108  // if input vector is single parity then we must be solving the
1109  // preconditioned system in general this can only happen on the
1110  // top level
1112  QudaSolutionType inner_solution_type = param.coarse_grid_solution_type;
1113 
1114  if (debug) printfQuda("outer_solution_type = %d, inner_solution_type = %d\n", outer_solution_type, inner_solution_type);
1115 
1116  if ( outer_solution_type == QUDA_MATPC_SOLUTION && inner_solution_type == QUDA_MAT_SOLUTION)
1117  errorQuda("Unsupported solution type combination");
1118 
1119  if ( inner_solution_type == QUDA_MATPC_SOLUTION && param.smoother_solve_type != QUDA_DIRECT_PC_SOLVE)
1120  errorQuda("For this coarse grid solution type, a preconditioned smoother is required");
1121 
1122  if ( debug ) printfQuda("entering V-cycle with x2=%e, r2=%e\n", norm2(x), norm2(b));
1123 
1124  if (param.level < param.Nlevel-1) {
1125  //transfer->setTransferGPU(false); // use this to force location of transfer (need to check if still works for multi-level)
1126 
1127  // do the pre smoothing
1128  if (debug) printfQuda("pre-smoothing b2=%e site subset %d\n", norm2(b), b.SiteSubset());
1129 
1130  ColorSpinorField *out=nullptr, *in=nullptr;
1131 
1132  ColorSpinorField &residual = b.SiteSubset() == QUDA_FULL_SITE_SUBSET ? *r : r->Even();
1133 
1134  // FIXME only need to make a copy if not preconditioning
1135  residual = b; // copy source vector since we will overwrite source with iterated residual
1136 
1137  diracSmoother->prepare(in, out, x, residual, outer_solution_type);
1138 
1139  // b_tilde holds either a copy of preconditioned source or a pointer to original source
1140  if (param.smoother_solve_type == QUDA_DIRECT_PC_SOLVE) *b_tilde = *in;
1141  else b_tilde = &b;
1142 
1143  if (presmoother) (*presmoother)(*out, *in); else zero(*out);
1144 
1145  ColorSpinorField &solution = inner_solution_type == outer_solution_type ? x : x.Even();
1146  diracSmoother->reconstruct(solution, b, inner_solution_type);
1147 
1148  // if using preconditioned smoother then need to reconstruct full residual
1149  // FIXME extend this check for precision, Schwarz, etc.
1150  bool use_solver_residual
1151  = ((param.smoother_solve_type == QUDA_DIRECT_PC_SOLVE && inner_solution_type == QUDA_MATPC_SOLUTION)
1152  || (param.smoother_solve_type == QUDA_DIRECT_SOLVE && inner_solution_type == QUDA_MAT_SOLUTION)) ?
1153  true :
1154  false;
1155 
1156  // FIXME this is currently borked if inner solver is preconditioned
1157  double r2 = 0.0;
1158  if (use_solver_residual) {
1159  if (debug) r2 = norm2(*r);
1160  } else {
1161  (*param.matResidual)(*r, x);
1162  if (debug)
1163  r2 = xmyNorm(b, *r);
1164  else
1165  axpby(1.0, b, -1.0, *r);
1166  }
1167 
1168  // We need this to ensure that the coarse level has been created.
1169  // e.g. in case of iterative setup with MG we use just pre- and post-smoothing at the first iteration.
1170  if (transfer) {
1171 
1172  // restrict to the coarse grid
1173  transfer->R(*r_coarse, residual);
1174  if ( debug ) printfQuda("after pre-smoothing x2 = %e, r2 = %e, r_coarse2 = %e\n", norm2(x), r2, norm2(*r_coarse));
1175 
1176  // recurse to the next lower level
1177  (*coarse_solver)(*x_coarse, *r_coarse);
1178  if (debug) printfQuda("after coarse solve x_coarse2 = %e r_coarse2 = %e\n", norm2(*x_coarse), norm2(*r_coarse));
1179 
1180  // prolongate back to this grid
1181  ColorSpinorField &x_coarse_2_fine = inner_solution_type == QUDA_MAT_SOLUTION ? *r : r->Even(); // define according to inner solution type
1182  transfer->P(x_coarse_2_fine, *x_coarse); // repurpose residual storage
1183  xpy(x_coarse_2_fine, solution); // sum to solution FIXME - sum should be done inside the transfer operator
1184  if ( debug ) {
1185  printfQuda("Prolongated coarse solution y2 = %e\n", norm2(*r));
1186  printfQuda("after coarse-grid correction x2 = %e, r2 = %e\n", norm2(x), norm2(*r));
1187  }
1188  }
1189 
1190  if (debug) printfQuda("preparing to post smooth\n");
1191 
1192  // do the post smoothing
1193  //residual = outer_solution_type == QUDA_MAT_SOLUTION ? *r : r->Even(); // refine for outer solution type
1195  in = b_tilde;
1196  } else { // this incurs unecessary copying
1197  *r = b;
1198  in = r;
1199  }
1200 
1201  // we should keep a copy of the prepared right hand side as we've already destroyed it
1202  //dirac.prepare(in, out, solution, residual, inner_solution_type);
1203 
1204  if (postsmoother) (*postsmoother)(*out, *in); // for inner solve preconditioned, in the should be the original prepared rhs
1205 
1206  if (debug) printfQuda("exited postsmooth, about to reconstruct\n");
1207 
1208  diracSmoother->reconstruct(x, b, outer_solution_type);
1209 
1210  if (debug) printfQuda("finished reconstruct\n");
1211 
1212  } else { // do the coarse grid solve
1213 
1214  ColorSpinorField *out=nullptr, *in=nullptr;
1215  diracSmoother->prepare(in, out, x, b, outer_solution_type);
1216  if (presmoother) (*presmoother)(*out, *in);
1217  diracSmoother->reconstruct(x, b, outer_solution_type);
1218  }
1219 
1220  // FIXME on subset check
1221  if (debug && b.SiteSubset() == r->SiteSubset()) {
1222  (*param.matResidual)(*r, x);
1223  double r2 = xmyNorm(b, *r);
1224  printfQuda("leaving V-cycle with x2=%e, r2=%e\n", norm2(x), r2);
1225  }
1226 
1227  popOutputPrefix();
1228  }
1229 
1230  // supports separate reading or single file read
1231  void MG::loadVectors(std::vector<ColorSpinorField *> &B)
1232  {
1233  if (param.transfer_type != QUDA_TRANSFER_AGGREGATE) {
1234  warningQuda("Cannot load near-null vectors for top level of staggered MG solve.");
1235  } else {
1236  bool is_running = profile_global.isRunning(QUDA_PROFILE_INIT);
1237  if (is_running) profile_global.TPSTOP(QUDA_PROFILE_INIT);
1238  profile_global.TPSTART(QUDA_PROFILE_IO);
1239  pushLevel(param.level);
1240  std::string vec_infile(param.mg_global.vec_infile[param.level]);
1241  vec_infile += "_level_";
1242  vec_infile += std::to_string(param.level);
1243  vec_infile += "_nvec_";
1244  vec_infile += std::to_string(param.mg_global.n_vec[param.level]);
1245  VectorIO io(vec_infile);
1246  io.load(B);
1247  popLevel(param.level);
1248  profile_global.TPSTOP(QUDA_PROFILE_IO);
1249  if (is_running) profile_global.TPSTART(QUDA_PROFILE_INIT);
1250  }
1251  }
1252 
1253  void MG::saveVectors(const std::vector<ColorSpinorField *> &B) const
1254  {
1255  if (param.transfer_type != QUDA_TRANSFER_AGGREGATE) {
1256  warningQuda("Cannot save near-null vectors for top level of staggered MG solve.");
1257  } else {
1258  bool is_running = profile_global.isRunning(QUDA_PROFILE_INIT);
1259  if (is_running) profile_global.TPSTOP(QUDA_PROFILE_INIT);
1260  profile_global.TPSTART(QUDA_PROFILE_IO);
1261  pushLevel(param.level);
1262  std::string vec_outfile(param.mg_global.vec_outfile[param.level]);
1263  vec_outfile += "_level_";
1264  vec_outfile += std::to_string(param.level);
1265  vec_outfile += "_nvec_";
1266  vec_outfile += std::to_string(param.mg_global.n_vec[param.level]);
1267  VectorIO io(vec_outfile);
1268  io.save(B);
1269  popLevel(param.level);
1270  profile_global.TPSTOP(QUDA_PROFILE_IO);
1271  if (is_running) profile_global.TPSTART(QUDA_PROFILE_INIT);
1272  }
1273  }
1274 
1275  void MG::dumpNullVectors() const
1276  {
1277  if (param.transfer_type != QUDA_TRANSFER_AGGREGATE) {
1278  warningQuda("Cannot dump near-null vectors for top level of staggered MG solve.");
1279  } else {
1280  saveVectors(param.B);
1281  }
1282  if (param.level < param.Nlevel - 2) coarse->dumpNullVectors();
1283  }
1284 
1285  void MG::generateNullVectors(std::vector<ColorSpinorField *> &B, bool refresh)
1286  {
1287  pushLevel(param.level);
1288 
1289  SolverParam solverParam(param); // Set solver field parameters:
1290  // set null-space generation options - need to expose these
1291  solverParam.maxiter
1292  = refresh ? param.mg_global.setup_maxiter_refresh[param.level] : param.mg_global.setup_maxiter[param.level];
1293  solverParam.tol = param.mg_global.setup_tol[param.level];
1295  solverParam.delta = 1e-1;
1296  solverParam.inv_type = param.mg_global.setup_inv_type[param.level];
1297  // Hard coded for now...
1298  if (solverParam.inv_type == QUDA_CA_CG_INVERTER || solverParam.inv_type == QUDA_CA_CGNE_INVERTER
1299  || solverParam.inv_type == QUDA_CA_CGNR_INVERTER || solverParam.inv_type == QUDA_CA_GCR_INVERTER) {
1300  solverParam.ca_basis = param.mg_global.setup_ca_basis[param.level];
1301  solverParam.ca_lambda_min = param.mg_global.setup_ca_lambda_min[param.level];
1302  solverParam.ca_lambda_max = param.mg_global.setup_ca_lambda_max[param.level];
1303  solverParam.Nkrylov = param.mg_global.setup_ca_basis_size[param.level];
1304  } else {
1305  solverParam.Nkrylov = 4;
1306  }
1307  solverParam.pipeline
1308  = (solverParam.inv_type == QUDA_BICGSTAB_INVERTER ? 0 : 4); // FIXME: pipeline != 0 breaks BICGSTAB
1309  solverParam.precision = r->Precision();
1310 
1311  if (param.level == 0) { // this enables half precision on the fine grid only if set
1314  } else {
1315  solverParam.precision_precondition = solverParam.precision;
1316  }
1317  solverParam.residual_type = static_cast<QudaResidualType>(QUDA_L2_RELATIVE_RESIDUAL);
1319  ColorSpinorParam csParam(*B[0]); // Create spinor field parameters:
1320  csParam.setPrecision(r->Precision(), r->Precision(), true); // ensure native ordering
1321  csParam.location = QUDA_CUDA_FIELD_LOCATION; // hard code to GPU location for null-space generation for now
1322  csParam.gammaBasis = B[0]->Nspin() == 1 ? QUDA_DEGRAND_ROSSI_GAMMA_BASIS :
1323  QUDA_UKQCD_GAMMA_BASIS; // degrand-rossi required for staggered
1327 
1329 
1330  // if we not using GCR/MG smoother then we need to switch off Schwarz since regular Krylov solvers do not support it
1331  bool schwarz_reset = solverParam.inv_type != QUDA_MG_INVERTER
1333  if (schwarz_reset) {
1334  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Disabling Schwarz for null-space finding");
1335  int commDim[QUDA_MAX_DIM];
1336  for (int i = 0; i < QUDA_MAX_DIM; i++) commDim[i] = 1;
1337  diracSmootherSloppy->setCommDim(commDim);
1338  }
1339 
1340  // if quarter precision halo, promote for null-space finding to half precision
1341  QudaPrecision halo_precision = diracSmootherSloppy->HaloPrecision();
1342  if (halo_precision == QUDA_QUARTER_PRECISION) diracSmootherSloppy->setHaloPrecision(QUDA_HALF_PRECISION);
1343 
1344  Solver *solve;
1345  DiracMdagM *mdagm = (solverParam.inv_type == QUDA_CG_INVERTER || solverParam.inv_type == QUDA_CA_CG_INVERTER) ? new DiracMdagM(*diracSmoother) : nullptr;
1346  DiracMdagM *mdagmSloppy = (solverParam.inv_type == QUDA_CG_INVERTER || solverParam.inv_type == QUDA_CA_CG_INVERTER) ? new DiracMdagM(*diracSmootherSloppy) : nullptr;
1347  if (solverParam.inv_type == QUDA_CG_INVERTER || solverParam.inv_type == QUDA_CA_CG_INVERTER) {
1348  solve = Solver::create(solverParam, *mdagm, *mdagmSloppy, *mdagmSloppy, *mdagmSloppy, profile);
1349  } else if (solverParam.inv_type == QUDA_MG_INVERTER) {
1350  // in case MG has not been created, we create the Smoother
1351  if (!transfer) createSmoother();
1352 
1353  // run GCR with the MG as a preconditioner
1355  solverParam.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
1356  solverParam.precondition_cycle = 1;
1357  solverParam.tol_precondition = 1e-1;
1358  solverParam.maxiter_precondition = 1;
1359  solverParam.omega = 1.0;
1360  solverParam.verbosity_precondition = param.mg_global.verbosity[param.level+1];
1361  solverParam.precision_sloppy = solverParam.precision;
1362  solverParam.compute_true_res = 0;
1363  solverParam.preconditioner = this;
1364 
1365  solverParam.inv_type = QUDA_GCR_INVERTER;
1366  solve = Solver::create(solverParam, *param.matSmooth, *param.matSmooth, *param.matSmoothSloppy,
1367  *param.matSmoothSloppy, profile);
1368  solverParam.inv_type = QUDA_MG_INVERTER;
1369  } else {
1370  solve = Solver::create(solverParam, *param.matSmooth, *param.matSmoothSloppy, *param.matSmoothSloppy,
1371  *param.matSmoothSloppy, profile);
1372  }
1373 
1374  for (int si = 0; si < param.mg_global.num_setup_iter[param.level]; si++) {
1375  if (getVerbosity() >= QUDA_VERBOSE)
1376  printfQuda("Running vectors setup on level %d iter %d of %d\n", param.level, si + 1,
1377  param.mg_global.num_setup_iter[param.level]);
1378 
1379  // global orthonormalization of the initial null-space vectors
1380  if(param.mg_global.pre_orthonormalize) {
1381  for(int i=0; i<(int)B.size(); i++) {
1382  for (int j=0; j<i; j++) {
1383  Complex alpha = cDotProduct(*B[j], *B[i]);// <j,i>
1384  caxpy(-alpha, *B[j], *B[i]); // i-<j,i>j
1385  }
1386  double nrm2 = norm2(*B[i]);
1387  if (nrm2 > 1e-16) ax(1.0 /sqrt(nrm2), *B[i]);// i/<i,i>
1388  else errorQuda("\nCannot normalize %u vector\n", i);
1389  }
1390  }
1391 
1392  // launch solver for each source
1393  for (int i=0; i<(int)B.size(); i++) {
1394  if (param.mg_global.setup_type == QUDA_TEST_VECTOR_SETUP) { // DDalphaAMG test vector idea
1395  *b = *B[i]; // inverting against the vector
1396  zero(*x); // with zero initial guess
1397  } else {
1398  *x = *B[i];
1399  zero(*b);
1400  }
1401 
1402  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Initial guess = %g\n", norm2(*x));
1403  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Initial rhs = %g\n", norm2(*b));
1404 
1405  ColorSpinorField *out=nullptr, *in=nullptr;
1406  diracSmoother->prepare(in, out, *x, *b, QUDA_MAT_SOLUTION);
1407  (*solve)(*out, *in);
1408  diracSmoother->reconstruct(*x, *b, QUDA_MAT_SOLUTION);
1409 
1410  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Solution = %g\n", norm2(*x));
1411  *B[i] = *x;
1412  }
1413 
1414  // global orthonormalization of the generated null-space vectors
1415  if (param.mg_global.post_orthonormalize) {
1416  for(int i=0; i<(int)B.size(); i++) {
1417  for (int j=0; j<i; j++) {
1418  Complex alpha = cDotProduct(*B[j], *B[i]);// <j,i>
1419  caxpy(-alpha, *B[j], *B[i]); // i-<j,i>j
1420  }
1421  double nrm2 = norm2(*B[i]);
1422  if (sqrt(nrm2) > 1e-16) ax(1.0/sqrt(nrm2), *B[i]);// i/<i,i>
1423  else errorQuda("\nCannot normalize %u vector (nrm=%e)\n", i, sqrt(nrm2));
1424  }
1425  }
1426 
1427  if (solverParam.inv_type == QUDA_MG_INVERTER) {
1428 
1429  if (transfer) {
1430  resetTransfer = true;
1431  reset();
1432  if ( param.level < param.Nlevel-2 ) {
1434  coarse->generateNullVectors(*B_coarse, refresh);
1435  } else {
1436  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Restricting null space vectors\n");
1437  for (int i=0; i<param.Nvec; i++) {
1438  zero(*(*B_coarse)[i]);
1439  transfer->R(*(*B_coarse)[i], *(param.B[i]));
1440  }
1441  // rebuild the transfer operator in the coarse level
1442  coarse->resetTransfer = true;
1443  coarse->reset();
1444  }
1445  }
1446  } else {
1447  reset();
1448  }
1449  }
1450  }
1451 
1452  delete solve;
1453  if (mdagm) delete mdagm;
1454  if (mdagmSloppy) delete mdagmSloppy;
1455 
1456  diracSmootherSloppy->setHaloPrecision(halo_precision); // restore halo precision
1457 
1458  delete x;
1459  delete b;
1460 
1461  // reenable Schwarz
1462  if (schwarz_reset) {
1463  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Reenabling Schwarz for null-space finding");
1464  int commDim[QUDA_MAX_DIM];
1465  for (int i=0; i<QUDA_MAX_DIM; i++) commDim[i] = 0;
1466  diracSmootherSloppy->setCommDim(commDim);
1467  }
1468 
1469  if (param.mg_global.vec_store[param.level] == QUDA_BOOLEAN_TRUE) { // conditional store of null vectors
1470  saveVectors(B);
1471  }
1472 
1473  popLevel(param.level);
1474  }
1475 
1476  // generate a full span of free vectors.
1477  // FIXME: Assumes fine level is SU(3).
1478  void MG::buildFreeVectors(std::vector<ColorSpinorField *> &B)
1479  {
1480  pushLevel(param.level);
1481  const int Nvec = B.size();
1482 
1483  // Given the number of colors and spins, figure out if the number
1484  // of vectors in 'B' makes sense.
1485  const int Ncolor = B[0]->Ncolor();
1486  const int Nspin = B[0]->Nspin();
1487 
1488  if (Ncolor == 3) // fine level
1489  {
1490  if (Nspin == 4) // Wilson or Twisted Mass (singlet)
1491  {
1492  // There needs to be 6 null vectors -> 12 after chirality.
1493  if (Nvec != 6) errorQuda("\nError in MG::buildFreeVectors: Wilson-type fermions require Nvec = 6");
1494 
1495  if (getVerbosity() >= QUDA_VERBOSE)
1496  printfQuda("Building %d free field vectors for Wilson-type fermions\n", Nvec);
1497 
1498  // Zero the null vectors.
1499  for (int i = 0; i < Nvec; i++) zero(*B[i]);
1500 
1501  // Create a temporary vector.
1502  ColorSpinorParam csParam(*B[0]);
1505 
1506  int counter = 0;
1507  for (int c = 0; c < Ncolor; c++) {
1508  for (int s = 0; s < 2; s++) {
1509  tmp->Source(QUDA_CONSTANT_SOURCE, 1, s, c);
1510  xpy(*tmp, *B[counter]);
1511  tmp->Source(QUDA_CONSTANT_SOURCE, 1, s + 2, c);
1512  xpy(*tmp, *B[counter]);
1513  counter++;
1514  }
1515  }
1516 
1517  delete tmp;
1518  } else if (Nspin == 1) { // Staggered
1519 
1520  // There needs to be 24 null vectors -> 48 after chirality.
1521  if (Nvec != 24) errorQuda("\nError in MG::buildFreeVectors: Staggered-type fermions require Nvec = 24\n");
1522 
1523  if (getVerbosity() >= QUDA_VERBOSE)
1524  printfQuda("Building %d free field vectors for Staggered-type fermions\n", Nvec);
1525 
1526  // Zero the null vectors.
1527  for (int i = 0; i < Nvec; i++) zero(*B[i]);
1528 
1529  // Create a temporary vector.
1530  ColorSpinorParam csParam(*B[0]);
1533 
1534  // Build free null vectors.
1535  for (int c = 0; c < B[0]->Ncolor(); c++) {
1536  // Need to pair an even+odd corner together
1537  // since they'll get split up.
1538  // 0000, 0001
1539  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x0, c);
1540  xpy(*tmp, *B[8 * c + 0]);
1541  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x1, c);
1542  xpy(*tmp, *B[8 * c + 0]);
1543 
1544  // 0010, 0011
1545  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x2, c);
1546  xpy(*tmp, *B[8 * c + 1]);
1547  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x3, c);
1548  xpy(*tmp, *B[8 * c + 1]);
1549 
1550  // 0100, 0101
1551  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x4, c);
1552  xpy(*tmp, *B[8 * c + 2]);
1553  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x5, c);
1554  xpy(*tmp, *B[8 * c + 2]);
1555 
1556  // 0110, 0111
1557  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x6, c);
1558  xpy(*tmp, *B[8 * c + 3]);
1559  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x7, c);
1560  xpy(*tmp, *B[8 * c + 3]);
1561 
1562  // 1000, 1001
1563  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x8, c);
1564  xpy(*tmp, *B[8 * c + 4]);
1565  tmp->Source(QUDA_CORNER_SOURCE, 1, 0x9, c);
1566  xpy(*tmp, *B[8 * c + 4]);
1567 
1568  // 1010, 1011
1569  tmp->Source(QUDA_CORNER_SOURCE, 1, 0xA, c);
1570  xpy(*tmp, *B[8 * c + 5]);
1571  tmp->Source(QUDA_CORNER_SOURCE, 1, 0xB, c);
1572  xpy(*tmp, *B[8 * c + 5]);
1573 
1574  // 1100, 1101
1575  tmp->Source(QUDA_CORNER_SOURCE, 1, 0xC, c);
1576  xpy(*tmp, *B[8 * c + 6]);
1577  tmp->Source(QUDA_CORNER_SOURCE, 1, 0xD, c);
1578  xpy(*tmp, *B[8 * c + 6]);
1579 
1580  // 1110, 1111
1581  tmp->Source(QUDA_CORNER_SOURCE, 1, 0xE, c);
1582  xpy(*tmp, *B[8 * c + 7]);
1583  tmp->Source(QUDA_CORNER_SOURCE, 1, 0xF, c);
1584  xpy(*tmp, *B[8 * c + 7]);
1585  }
1586 
1587  delete tmp;
1588  } else {
1589  errorQuda("\nError in MG::buildFreeVectors: Unsupported combo of Nc %d, Nspin %d", Ncolor, Nspin);
1590  }
1591  } else { // coarse level
1592  if (Nspin == 2) {
1593  // There needs to be Ncolor null vectors.
1594  if (Nvec != Ncolor) errorQuda("\nError in MG::buildFreeVectors: Coarse fermions require Nvec = Ncolor");
1595 
1596  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Building %d free field vectors for Coarse fermions\n", Ncolor);
1597 
1598  // Zero the null vectors.
1599  for (int i = 0; i < Nvec; i++) zero(*B[i]);
1600 
1601  // Create a temporary vector.
1602  ColorSpinorParam csParam(*B[0]);
1605 
1606  for (int c = 0; c < Ncolor; c++) {
1607  tmp->Source(QUDA_CONSTANT_SOURCE, 1, 0, c);
1608  xpy(*tmp, *B[c]);
1609  tmp->Source(QUDA_CONSTANT_SOURCE, 1, 1, c);
1610  xpy(*tmp, *B[c]);
1611  }
1612 
1613  delete tmp;
1614  } else if (Nspin == 1) {
1615  // There needs to be Ncolor null vectors.
1616  if (Nvec != Ncolor) errorQuda("\nError in MG::buildFreeVectors: Coarse fermions require Nvec = Ncolor");
1617 
1618  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Building %d free field vectors for Coarse fermions\n", Ncolor);
1619 
1620  // Zero the null vectors.
1621  for (int i = 0; i < Nvec; i++) zero(*B[i]);
1622 
1623  // Create a temporary vector.
1624  ColorSpinorParam csParam(*B[0]);
1627 
1628  for (int c = 0; c < Ncolor; c++) {
1629  tmp->Source(QUDA_CONSTANT_SOURCE, 1, 0, c);
1630  xpy(*tmp, *B[c]);
1631  }
1632 
1633  delete tmp;
1634  } else {
1635  errorQuda("\nError in MG::buildFreeVectors: Unexpected Nspin = %d for coarse fermions", Nspin);
1636  }
1637  }
1638 
1639  // global orthonormalization of the generated null-space vectors
1640  if(param.mg_global.post_orthonormalize) {
1641  for(int i=0; i<(int)B.size(); i++) {
1642  double nrm2 = norm2(*B[i]);
1643  if (nrm2 > 1e-16) ax(1.0 /sqrt(nrm2), *B[i]);// i/<i,i>
1644  else errorQuda("\nCannot normalize %u vector\n", i);
1645  }
1646  }
1647 
1648  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Done building free vectors\n");
1649 
1650  popLevel(param.level);
1651  }
1652 
1654  {
1655  pushLevel(param.level);
1656 
1657  // Extract eigensolver params
1658  int n_conv = param.mg_global.eig_param[param.level]->n_conv;
1659  bool dagger = param.mg_global.eig_param[param.level]->use_dagger;
1660  bool normop = param.mg_global.eig_param[param.level]->use_norm_op;
1661 
1662  // Dummy array to keep the eigensolver happy.
1663  std::vector<Complex> evals(n_conv, 0.0);
1664 
1665  std::vector<ColorSpinorField *> B_evecs;
1666  ColorSpinorParam csParam(*param.B[0]);
1667  csParam.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
1669  // This is the vector precision used by matResidual
1671 
1672  for (int i = 0; i < n_conv; i++) B_evecs.push_back(ColorSpinorField::Create(csParam));
1673 
1674  // before entering the eigen solver, let's free the B vectors to save some memory
1675  ColorSpinorParam bParam(*param.B[0]);
1676  for (int i = 0; i < (int)param.B.size(); i++) delete param.B[i];
1677 
1679  if (!normop && !dagger) {
1680  DiracM *mat = new DiracM(*diracResidual);
1681  eig_solve = EigenSolver::create(param.mg_global.eig_param[param.level], *mat, profile);
1682  (*eig_solve)(B_evecs, evals);
1683  delete eig_solve;
1684  delete mat;
1685  } else if (!normop && dagger) {
1686  DiracMdag *mat = new DiracMdag(*diracResidual);
1687  eig_solve = EigenSolver::create(param.mg_global.eig_param[param.level], *mat, profile);
1688  (*eig_solve)(B_evecs, evals);
1689  delete eig_solve;
1690  delete mat;
1691  } else if (normop && !dagger) {
1692  DiracMdagM *mat = new DiracMdagM(*diracResidual);
1693  eig_solve = EigenSolver::create(param.mg_global.eig_param[param.level], *mat, profile);
1694  (*eig_solve)(B_evecs, evals);
1695  delete eig_solve;
1696  delete mat;
1697  } else if (normop && dagger) {
1698  DiracMMdag *mat = new DiracMMdag(*diracResidual);
1699  eig_solve = EigenSolver::create(param.mg_global.eig_param[param.level], *mat, profile);
1700  (*eig_solve)(B_evecs, evals);
1701  delete eig_solve;
1702  delete mat;
1703  }
1704 
1705  // now reallocate the B vectors copy in e-vectors
1706  for (int i = 0; i < (int)param.B.size(); i++) {
1707  param.B[i] = ColorSpinorField::Create(bParam);
1708  *param.B[i] = *B_evecs[i];
1709  }
1710 
1711  // Local clean-up
1712  for (auto b : B_evecs) { delete b; }
1713 
1714  // only save if outfile is defined
1715  if (strcmp(param.mg_global.vec_outfile[param.level], "") != 0) { saveVectors(param.B); }
1716 
1717  popLevel(param.level);
1718  }
1719 
1720 } // namespace quda
int Nspin
Definition: blas_test.cpp:50
int Ncolor
Definition: blas_test.cpp:51
virtual void PrintVector(unsigned int x) const =0
const ColorSpinorField & Odd() const
QudaTwistFlavorType TwistFlavor() const
QudaSiteSubset SiteSubset() const
static ColorSpinorField * Create(const ColorSpinorParam &param)
const ColorSpinorField & Even() const
virtual void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)=0
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const =0
Apply M for the dirac op. E.g. the Schur Complement operator.
double Kappa() const
accessor for Kappa (mass parameter)
Definition: dirac_quda.h:293
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const =0
Apply MdagM operator which may be optimized.
virtual double Mu() const
accessor for twist parameter – overrride can return better value
Definition: dirac_quda.h:303
virtual double Mass() const
accessor for Mass (in case of a factor of 2 for staggered)
Definition: dirac_quda.h:298
virtual void prefetch(QudaFieldLocation mem_space, qudaStream_t stream=0) const
If managed memory and prefetch is enabled, prefetch the gauge field and temporary spinors to the CPU ...
Definition: dirac.cpp:305
void setHaloPrecision(QudaPrecision halo_precision_) const
Definition: dirac_quda.h:382
QudaPrecision HaloPrecision() const
Definition: dirac_quda.h:381
virtual void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType solType) const =0
virtual QudaDiracType getDiracType() const =0
returns the Dirac type
virtual void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType solType) const =0
void setCommDim(const int commDim_[QUDA_MAX_DIM]) const
Enable / disable communications for the Dirac operator.
Definition: dirac_quda.h:174
virtual void DslashXpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const ColorSpinorField &x, const double &k) const =0
Xpay version of Dslash.
const Dirac * Expose() const
Definition: dirac_quda.h:1964
Transfer * transfer
Definition: dirac_quda.h:60
ColorSpinorField * tmp1
Definition: dirac_quda.h:52
cudaGaugeField * gauge
Definition: dirac_quda.h:41
int commDim[QUDA_MAX_DIM]
Definition: dirac_quda.h:55
QudaDiracType type
Definition: dirac_quda.h:24
cudaGaugeField * longGauge
Definition: dirac_quda.h:43
double mu_factor
Definition: dirac_quda.h:49
ColorSpinorField * tmp2
Definition: dirac_quda.h:53
bool need_bidirectional
Definition: dirac_quda.h:62
cudaGaugeField * fatGauge
Definition: dirac_quda.h:42
QudaPrecision halo_precision
Definition: dirac_quda.h:57
QudaMatPCType matpcType
Definition: dirac_quda.h:39
QudaDagType dagger
Definition: dirac_quda.h:40
cudaGaugeField * xInvKD
Definition: dirac_quda.h:46
This is the generic driver for launching Dslash kernels (the base kernel of which is defined in dslas...
Definition: dslash.h:33
static EigenSolver * create(QudaEigParam *eig_param, const DiracMatrix &mat, TimeProfile &profile)
Creates the eigensolver using the parameters given and the matrix.
QudaPrecision Precision() const
double flops() const
Return the total flops done on this and all coarser levels.
Definition: multigrid.cpp:734
MG(MGParam &param, TimeProfile &profile)
Definition: multigrid.cpp:16
void operator()(ColorSpinorField &out, ColorSpinorField &in)
Definition: multigrid.cpp:1095
void destroySmoother()
Destroy the smoothers.
Definition: multigrid.cpp:274
void createCoarseDirac()
Create the coarse dirac operator.
Definition: multigrid.cpp:362
void createSmoother()
Create the smoothers.
Definition: multigrid.cpp:301
void saveVectors(const std::vector< ColorSpinorField * > &B) const
Save the null space vectors in from file.
Definition: multigrid.cpp:1253
void buildFreeVectors(std::vector< ColorSpinorField * > &B)
Build free-field null-space vectors.
Definition: multigrid.cpp:1478
void createCoarseSolver()
Create the solver wrapper.
Definition: multigrid.cpp:539
void verify(bool recursively=false)
Verify the correctness of the MG method, optionally recursively starting from the top down.
Definition: multigrid.cpp:764
void dumpNullVectors() const
Dump the null-space vectors to disk. Will recurse dumping all levels.
Definition: multigrid.cpp:1275
void loadVectors(std::vector< ColorSpinorField * > &B)
Load the null space vectors in from file.
Definition: multigrid.cpp:1231
virtual ~MG()
Definition: multigrid.cpp:681
void generateEigenVectors()
Generate lowest eigenvectors.
Definition: multigrid.cpp:1653
void destroyCoarseSolver()
Destroy the solver wrapper.
Definition: multigrid.cpp:510
void reset(bool refresh=false)
This method resets the solver, e.g., when a parameter has changed such as the mass.
Definition: multigrid.cpp:126
void generateNullVectors(std::vector< ColorSpinorField * > &B, bool refresh=false)
Generate the null-space vectors.
Definition: multigrid.cpp:1285
Class declaration to initialize and hold CURAND RNG states.
Definition: random_quda.h:23
void Release()
void Init()
const DiracMatrix & M()
Definition: invert_quda.h:489
const DiracMatrix & mat
Definition: invert_quda.h:465
std::vector< ColorSpinorField * > evecs
Definition: invert_quda.h:477
int deflationSpaceSize() const
Returns the size of deflation space.
Definition: invert_quda.h:615
void setDeflateCompute(bool flag)
Sets the deflation compute boolean.
Definition: invert_quda.h:621
std::vector< Complex > evals
Definition: invert_quda.h:478
EigenSolver * eig_solve
Definition: invert_quda.h:473
static Solver * create(SolverParam &param, const DiracMatrix &mat, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon, const DiracMatrix &matEig, TimeProfile &profile)
Solver factory.
Definition: solver.cpp:42
void Print()
Definition: timer.cpp:7
bool isRunning(QudaProfileType idx)
Definition: timer.h:260
void reset()
for resetting the Transfer when the null vectors have changed
Definition: transfer.cpp:226
void R(ColorSpinorField &out, const ColorSpinorField &in) const
Definition: transfer.cpp:412
void setSiteSubset(QudaSiteSubset site_subset, QudaParity parity)
Sets whether the transfer operator is to act on full fields or single parity fields,...
Definition: transfer.cpp:273
double flops() const
Definition: transfer.cpp:478
void P(ColorSpinorField &out, const ColorSpinorField &in) const
Definition: transfer.cpp:350
const ColorSpinorField & Vectors(QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION) const
Definition: transfer.h:209
VectorIO is a simple wrapper class for loading and saving sets of vector fields using QIO.
Definition: vector_io.h:13
void load(std::vector< ColorSpinorField * > &vecs)
Load vectors from filename.
Definition: vector_io.cpp:20
void save(const std::vector< ColorSpinorField * > &vecs)
Save vectors to filename.
Definition: vector_io.cpp:119
void comm_barrier(void)
int commDim(int)
int comm_rank(void)
int comm_size(void)
double kappa
double mass
double tol
quda::mgarray< QudaInverterType > coarse_solver
QudaMatPCType matpc_type
QudaPrecision prec
quda::mgarray< QudaSolveType > smoother_solve_type
bool dagger
QudaParity parity
Definition: covdev_test.cpp:40
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:34
@ QUDA_COARSEPC_DIRAC
Definition: enum_quda.h:316
@ QUDA_STAGGERED_DIRAC
Definition: enum_quda.h:305
@ QUDA_ASQTAD_DIRAC
Definition: enum_quda.h:308
@ QUDA_STAGGEREDPC_DIRAC
Definition: enum_quda.h:306
@ QUDA_ASQTADPC_DIRAC
Definition: enum_quda.h:309
@ QUDA_COARSE_DIRAC
Definition: enum_quda.h:315
@ QUDA_ASQTADKD_DIRAC
Definition: enum_quda.h:310
@ QUDA_STAGGEREDKD_DIRAC
Definition: enum_quda.h:307
@ QUDA_MG_CYCLE_VCYCLE
Definition: enum_quda.h:179
@ QUDA_MG_CYCLE_RECURSIVE
Definition: enum_quda.h:182
enum QudaPrecision_s QudaPrecision
@ QUDA_NOISE_UNIFORM
Definition: enum_quda.h:385
@ QUDA_CONSTANT_SOURCE
Definition: enum_quda.h:377
@ QUDA_POINT_SOURCE
Definition: enum_quda.h:375
@ QUDA_CORNER_SOURCE
Definition: enum_quda.h:379
@ QUDA_TEST_VECTOR_SETUP
Definition: enum_quda.h:448
@ QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:326
@ QUDA_CPU_FIELD_LOCATION
Definition: enum_quda.h:325
@ QUDA_DAG_NO
Definition: enum_quda.h:223
@ QUDA_USE_INIT_GUESS_NO
Definition: enum_quda.h:429
@ QUDA_USE_INIT_GUESS_YES
Definition: enum_quda.h:430
@ QUDA_SUMMARIZE
Definition: enum_quda.h:266
@ QUDA_VERBOSE
Definition: enum_quda.h:267
@ QUDA_FULL_SITE_SUBSET
Definition: enum_quda.h:333
@ QUDA_PARITY_SITE_SUBSET
Definition: enum_quda.h:332
@ QUDA_BOOLEAN_FALSE
Definition: enum_quda.h:460
@ QUDA_BOOLEAN_TRUE
Definition: enum_quda.h:461
@ QUDA_DEGRAND_ROSSI_GAMMA_BASIS
Definition: enum_quda.h:368
@ QUDA_UKQCD_GAMMA_BASIS
Definition: enum_quda.h:369
@ QUDA_EVEN_PARITY
Definition: enum_quda.h:284
@ QUDA_ODD_PARITY
Definition: enum_quda.h:284
@ QUDA_INVALID_PARITY
Definition: enum_quda.h:284
@ QUDA_INVALID_RESIDUAL
Definition: enum_quda.h:196
@ QUDA_L2_RELATIVE_RESIDUAL
Definition: enum_quda.h:193
@ QUDA_TRANSFER_COARSE_KD
Definition: enum_quda.h:454
@ QUDA_TRANSFER_AGGREGATE
Definition: enum_quda.h:453
@ QUDA_TRANSFER_OPTIMIZED_KD
Definition: enum_quda.h:455
enum QudaSiteSubset_s QudaSiteSubset
enum QudaSolutionType_s QudaSolutionType
@ QUDA_MATPC_EVEN_EVEN_ASYMMETRIC
Definition: enum_quda.h:218
@ QUDA_MATPC_EVEN_EVEN
Definition: enum_quda.h:216
enum QudaMatPCType_s QudaMatPCType
enum QudaResidualType_s QudaResidualType
@ QUDA_CA_CGNE_INVERTER
Definition: enum_quda.h:130
@ QUDA_GCR_INVERTER
Definition: enum_quda.h:109
@ QUDA_CGNE_INVERTER
Definition: enum_quda.h:124
@ QUDA_MR_INVERTER
Definition: enum_quda.h:110
@ QUDA_CA_CG_INVERTER
Definition: enum_quda.h:129
@ QUDA_CGNR_INVERTER
Definition: enum_quda.h:125
@ QUDA_CA_GCR_INVERTER
Definition: enum_quda.h:132
@ QUDA_CG_INVERTER
Definition: enum_quda.h:107
@ QUDA_CA_CGNR_INVERTER
Definition: enum_quda.h:131
@ QUDA_INVALID_INVERTER
Definition: enum_quda.h:133
@ QUDA_MG_INVERTER
Definition: enum_quda.h:122
@ QUDA_BICGSTAB_INVERTER
Definition: enum_quda.h:108
@ QUDA_PRESERVE_SOURCE_NO
Definition: enum_quda.h:238
@ QUDA_MATPC_SOLUTION
Definition: enum_quda.h:159
@ QUDA_MAT_SOLUTION
Definition: enum_quda.h:157
@ QUDA_SINGLE_PRECISION
Definition: enum_quda.h:64
@ QUDA_INVALID_PRECISION
Definition: enum_quda.h:66
@ QUDA_QUARTER_PRECISION
Definition: enum_quda.h:62
@ QUDA_HALF_PRECISION
Definition: enum_quda.h:63
@ QUDA_ADDITIVE_SCHWARZ
Definition: enum_quda.h:187
@ QUDA_INVALID_SCHWARZ
Definition: enum_quda.h:189
@ QUDA_ZERO_FIELD_CREATE
Definition: enum_quda.h:361
@ QUDA_NULL_FIELD_CREATE
Definition: enum_quda.h:360
@ QUDA_DIRECT_SOLVE
Definition: enum_quda.h:167
@ QUDA_DIRECT_PC_SOLVE
Definition: enum_quda.h:169
enum QudaParity_s QudaParity
@ QUDA_COMPUTE_NULL_VECTOR_YES
Definition: enum_quda.h:442
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:79
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void xpy(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:41
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
void axpby(double a, ColorSpinorField &x, double b, ColorSpinorField &y)
Definition: blas_quda.h:44
cudaGaugeField * AllocateAndBuildStaggeredKahlerDiracInverse(const cudaGaugeField &gauge, const double mass, const QudaPrecision override_prec)
Allocate and build the Kahler-Dirac inverse block for KD operators.
double norm2(const CloverField &a, bool inverse=false)
__device__ __host__ void zero(double &a)
Definition: float_vector.h:318
void ax(const double &a, GaugeField &u)
Scale the gauge field by the scalar a.
std::complex< double > Complex
Definition: quda_internal.h:86
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
@ QUDA_PROFILE_INIT
Definition: timer.h:106
@ QUDA_PROFILE_IO
Definition: timer.h:112
void spinorNoise(ColorSpinorField &src, RNG &randstates, QudaNoiseType type)
Generate a random noise spinor. This variant allows the user to manage the RNG state.
::std::string string
Definition: gtest-port.h:891
ColorSpinorParam csParam
Definition: pack_test.cpp:25
QudaGaugeParam param
Definition: pack_test.cpp:18
#define QUDA_MAX_MG_LEVEL
Maximum number of multi-grid levels. This number may be increased if needed.
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5.
QudaBoolean use_dagger
Definition: quda.h:449
char vec_outfile[256]
Definition: quda.h:525
QudaBoolean use_norm_op
Definition: quda.h:450
char vec_infile[256]
Definition: quda.h:522
int n_conv
Definition: quda.h:475
QudaFieldLocation location
Definition: quda.h:33
QudaPrecision cuda_prec_sloppy
Definition: quda.h:51
QudaMatPCType matpc_type
Definition: quda.h:230
QudaPrecision cuda_prec_sloppy
Definition: quda.h:239
QudaPrecision cuda_prec_precondition
Definition: quda.h:241
QudaBoolean pre_orthonormalize
Definition: quda.h:607
int smoother_schwarz_cycle[QUDA_MAX_MG_LEVEL]
Definition: quda.h:655
QudaBoolean run_verify
Definition: quda.h:691
double coarse_solver_ca_lambda_min[QUDA_MAX_MG_LEVEL]
Definition: quda.h:628
double setup_ca_lambda_max[QUDA_MAX_MG_LEVEL]
Definition: quda.h:601
QudaSolutionType coarse_grid_solution_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:659
double coarse_solver_tol[QUDA_MAX_MG_LEVEL]
Definition: quda.h:616
int setup_maxiter_refresh[QUDA_MAX_MG_LEVEL]
Definition: quda.h:589
QudaPrecision precision_null[QUDA_MAX_MG_LEVEL]
Definition: quda.h:568
char vec_infile[QUDA_MAX_MG_LEVEL][256]
Definition: quda.h:703
QudaBoolean use_eig_solver[QUDA_MAX_MG_LEVEL]
Definition: quda.h:677
int coarse_solver_maxiter[QUDA_MAX_MG_LEVEL]
Definition: quda.h:619
QudaEigParam * eig_param[QUDA_MAX_MG_LEVEL]
Definition: quda.h:553
int n_vec[QUDA_MAX_MG_LEVEL]
Definition: quda.h:565
int geo_block_size[QUDA_MAX_MG_LEVEL][QUDA_MAX_DIM]
Definition: quda.h:559
QudaInverterType coarse_solver[QUDA_MAX_MG_LEVEL]
Definition: quda.h:613
QudaBoolean coarse_guess
Definition: quda.h:712
int coarse_solver_ca_basis_size[QUDA_MAX_MG_LEVEL]
Definition: quda.h:625
QudaTransferType transfer_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:727
QudaFieldLocation setup_location[QUDA_MAX_MG_LEVEL]
Definition: quda.h:674
QudaBoolean post_orthonormalize
Definition: quda.h:610
int num_setup_iter[QUDA_MAX_MG_LEVEL]
Definition: quda.h:580
double mu_factor[QUDA_MAX_MG_LEVEL]
Definition: quda.h:724
QudaCABasis setup_ca_basis[QUDA_MAX_MG_LEVEL]
Definition: quda.h:592
QudaInverterType setup_inv_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:577
int setup_maxiter[QUDA_MAX_MG_LEVEL]
Definition: quda.h:586
int setup_ca_basis_size[QUDA_MAX_MG_LEVEL]
Definition: quda.h:595
QudaBoolean preserve_deflation
Definition: quda.h:715
double coarse_solver_ca_lambda_max[QUDA_MAX_MG_LEVEL]
Definition: quda.h:631
char vec_outfile[QUDA_MAX_MG_LEVEL][256]
Definition: quda.h:709
QudaBoolean run_low_mode_check
Definition: quda.h:694
double setup_ca_lambda_min[QUDA_MAX_MG_LEVEL]
Definition: quda.h:598
QudaCABasis coarse_solver_ca_basis[QUDA_MAX_MG_LEVEL]
Definition: quda.h:622
QudaSetupType setup_type
Definition: quda.h:604
QudaBoolean vec_store[QUDA_MAX_MG_LEVEL]
Definition: quda.h:706
QudaSolveType smoother_solve_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:662
QudaFieldLocation location[QUDA_MAX_MG_LEVEL]
Definition: quda.h:671
QudaVerbosity verbosity[QUDA_MAX_MG_LEVEL]
Definition: quda.h:574
QudaBoolean setup_minimize_memory
Definition: quda.h:682
QudaBoolean generate_all_levels
Definition: quda.h:688
QudaSchwarzType smoother_schwarz_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:652
QudaBoolean run_oblique_proj_check
Definition: quda.h:697
QudaInvertParam * invert_param
Definition: quda.h:551
double setup_tol[QUDA_MAX_MG_LEVEL]
Definition: quda.h:583
QudaPrecision smoother_halo_precision[QUDA_MAX_MG_LEVEL]
Definition: quda.h:649
QudaBoolean vec_load[QUDA_MAX_MG_LEVEL]
Definition: quda.h:700
DiracMatrix * matResidual
Definition: multigrid.h:74
double smoother_tol
Definition: multigrid.h:65
int spinBlockSize
Definition: multigrid.h:41
std::vector< ColorSpinorField * > & B
Definition: multigrid.h:56
QudaInverterType smoother
Definition: multigrid.h:83
QudaSolutionType coarse_grid_solution_type
Definition: multigrid.h:87
DiracMatrix * matSmoothSloppy
Definition: multigrid.h:80
QudaFieldLocation location
Definition: multigrid.h:93
QudaBoolean global_reduction
Definition: multigrid.h:71
QudaMultigridCycleType cycle_type
Definition: multigrid.h:68
int geoBlockSize[QUDA_MAX_DIM]
Definition: multigrid.h:38
QudaMultigridParam & mg_global
Definition: multigrid.h:29
QudaSolveType smoother_solve_type
Definition: multigrid.h:90
int NblockOrtho
Definition: multigrid.h:47
DiracMatrix * matSmooth
Definition: multigrid.h:77
QudaTransferType transfer_type
Definition: multigrid.h:102
QudaFieldLocation setup_location
Definition: multigrid.h:96
QudaPreserveSource preserve_source
Definition: invert_quda.h:151
QudaCABasis ca_basis
Definition: invert_quda.h:205
QudaPrecision precision
Definition: invert_quda.h:136
QudaComputeNullVector compute_null_vector
Definition: invert_quda.h:61
bool is_preconditioner
verbosity to use for preconditioner
Definition: invert_quda.h:238
QudaSchwarzType schwarz_type
Definition: invert_quda.h:214
double tol_precondition
Definition: invert_quda.h:196
QudaResidualType residual_type
Definition: invert_quda.h:49
QudaPrecision precision_precondition
Definition: invert_quda.h:145
QudaPrecision precision_sloppy
Definition: invert_quda.h:139
bool mg_instance
whether to use a global or local (node) reduction for this solver
Definition: invert_quda.h:245
void * preconditioner
Definition: invert_quda.h:33
QudaInverterType inv_type
Definition: invert_quda.h:22
QudaVerbosity verbosity_precondition
Definition: invert_quda.h:236
QudaEigParam eig_param
Definition: invert_quda.h:55
QudaUseInitGuess use_init_guess
Definition: invert_quda.h:58
QudaInverterType inv_type_precondition
Definition: invert_quda.h:28
void updateInvertParam(QudaInvertParam &param, int offset=-1)
Definition: invert_quda.h:428
bool global_reduction
whether the solver acting as a preconditioner for another solver
Definition: invert_quda.h:240
#define postTrace()
Definition: tune_quda.h:643
void pushVerbosity(QudaVerbosity verbosity)
Push a new verbosity onto the stack.
Definition: util_quda.cpp:83
void popOutputPrefix()
Pop the output prefix restoring the prior one on the stack.
Definition: util_quda.cpp:121
#define printfQuda(...)
Definition: util_quda.h:114
void popVerbosity()
Pop the verbosity restoring the prior one on the stack.
Definition: util_quda.cpp:94
void pushOutputPrefix(const char *prefix)
Push a new output prefix onto the stack.
Definition: util_quda.cpp:105
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define warningQuda(...)
Definition: util_quda.h:132
void setOutputPrefix(const char *prefix)
Definition: util_quda.cpp:69
#define errorQuda(...)
Definition: util_quda.h:120