QUDA  0.9.0
multigrid.cpp
Go to the documentation of this file.
1 #include <multigrid.h>
2 #include <qio_field.h>
3 #include <string.h>
4 
6 
7 namespace quda {
8 
9  using namespace blas;
10 
11  static bool debug = false;
12 
13  MG::MG(MGParam &param, TimeProfile &profile_global)
14  : Solver(param, profile), param(param), transfer(0), presmoother(0), postsmoother(0),
15  profile_global(profile_global),
16  profile( "MG level " + std::to_string(param.level+1), false ),
17  coarse(nullptr), fine(param.fine), coarse_solver(nullptr),
18  param_coarse(nullptr), param_presmooth(nullptr), param_postsmooth(nullptr),
19  r(nullptr), r_coarse(nullptr), x_coarse(nullptr), tmp_coarse(nullptr),
20  diracCoarseResidual(nullptr), diracCoarseSmoother(nullptr), matCoarseResidual(nullptr), matCoarseSmoother(nullptr) {
21 
22  // for reporting level 1 is the fine level but internally use level 0 for indexing
23  sprintf(prefix,"MG level %d (%s): ", param.level+1, param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU" );
25 
26  printfQuda("Creating level %d of %d levels\n", param.level+1, param.Nlevel);
27 
28  if (param.level < param.Nlevel-1) {
31  } else if (strcmp(param.mg_global.vec_infile,"")!=0) { // only load if infile is defined and not computing
33  }
34  }
35 
37  errorQuda("Level=%d is greater than limit of multigrid recursion depth", param.level+1);
38 
40 
42  errorQuda("Cannot use preconditioned coarse grid solution without preconditioned smoother solve");
43 
44  // create residual vectors
45  {
48  csParam.location = param.location;
49  if (csParam.location==QUDA_CUDA_FIELD_LOCATION) {
50  // all coarse GPU vectors use FLOAT2 ordering
51  csParam.fieldOrder = (csParam.precision == QUDA_DOUBLE_PRECISION || param.level > 0) ?
53  csParam.setPrecision(csParam.precision);
55  }
57 
58  // if we're using preconditioning then allocate storate for the preconditioned source vector
60  csParam.x[0] /= 2;
61  csParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
63  }
64  }
65 
66  // if not on the coarsest level, construct it
67  if (param.level < param.Nlevel-1) {
69 
70  // create transfer operator
71  printfQuda("start creating transfer operator\n");
73  param.location == QUDA_CUDA_FIELD_LOCATION ? true : false, profile);
75 
76  //transfer->setTransferGPU(false); // use this to force location of transfer
77  printfQuda("end creating transfer operator\n");
78 
79  // create coarse residual vector
81 
82  // create coarse solution vector
84 
85  // create coarse temporary vector
87 
88  // check if we are coarsening the preconditioned system then
90 
91  // create coarse grid operator
92  DiracParam diracParam;
93  diracParam.transfer = transfer;
94 
95  diracParam.dirac = preconditioned_coarsen ? const_cast<Dirac*>(param.matSmooth->Expose()) : const_cast<Dirac*>(param.matResidual->Expose());
96  diracParam.kappa = param.matResidual->Expose()->Kappa();
97  diracParam.mu = param.matResidual->Expose()->Mu();
99 
100  diracParam.dagger = QUDA_DAG_NO;
101  diracParam.matpcType = matpc_type;
102  diracParam.tmp1 = tmp_coarse;
103  // use even-odd preconditioning for the coarse grid solver
104  diracCoarseResidual = new DiracCoarse(diracParam);
106 
107  // create smoothing operators
108  diracParam.dirac = const_cast<Dirac*>(param.matSmooth->Expose());
112  new DiracCoarsePC(static_cast<DiracCoarse&>(*diracCoarseResidual), diracParam) :
113  new DiracCoarse(static_cast<DiracCoarse&>(*diracCoarseResidual), diracParam);
114  diracCoarseSmootherSloppy = diracCoarseSmoother; // for coarse grids these always alias for now (FIXME half precision support for coarse op)
115 
118 
119  printfQuda("Creating coarse null-space vectors\n");
120  B_coarse = new std::vector<ColorSpinorField*>();
121  int nVec_coarse = std::max(param.Nvec, param.mg_global.n_vec[param.level+1]);
122  B_coarse->resize(nVec_coarse);
123 
124  for (int i=0; i<nVec_coarse; i++)
125  (*B_coarse)[i] = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec);
126 
127  // if we're not generating on all levels then we need to propagate the vectors down
129  for (int i=0; i<param.Nvec; i++) {
130  zero(*(*B_coarse)[i]);
131  transfer->R(*(*B_coarse)[i], *(param.B[i]));
132  }
133  }
134 
135  // create the next multigrid level
136  printfQuda("Creating next multigrid level\n");
138  param_coarse->fine = this;
139  param_coarse->delta = 1e-20;
140 
142 
143  setOutputPrefix(prefix); // restore since we just popped back from coarse grid
144 
145  // if on the second to bottom level then we can just use the coarse solver as is
148  printfQuda("Assigned coarse solver to coarse MG operator\n");
149  } else if (param.cycle_type == QUDA_MG_CYCLE_RECURSIVE) {
151 
155 
159  param_coarse_solver->maxiter = 11; // FIXME - dirty hack
167 
168  // need this to ensure we don't use half precision on the preconditioner in GCR
170 
173  sprintf(coarse_prefix,"MG level %d (%s): ", param.level+2, param.mg_global.location[param.level+1] == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU" );
175  } else {
177  sprintf(coarse_prefix,"MG level %d (%s): ", param.level+2, param.mg_global.location[param.level+1] == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU" );
179  }
180 
181  printfQuda("Assigned coarse solver to preconditioned GCR solver\n");
182  } else {
183  errorQuda("Multigrid cycle type %d not supported", param.cycle_type);
184  }
185 
186  }
187 
188  printfQuda("setup completed\n");
189 
190  // now we can run through the verification if requested
191  if (param.level == 0 && param.mg_global.run_verify) verify();
192 
193  if (param.level == 0) reset();
194 
195  // print out profiling information for the adaptive setup
197  // Reset the profile for accurate solver timing
198  profile.TPRESET();
199 
200  setOutputPrefix("");
201  }
202 
203  void MG::reset() {
207  transfer->setSiteSubset(site_subset, parity); // use this to force location of transfer
208 
209  if (param.level < param.Nlevel-2) coarse->reset();
210  }
211 
212 
214  // create the smoother for this level
215  printfQuda("smoother has operator %s\n", typeid(param.matSmooth).name());
216 
218 
228  if (param.level == 0) {
231  }
232 
233  if (param.level==param.Nlevel-1) {
234  param_presmooth->Nkrylov = 20;
235  param_presmooth->maxiter = 1000;
237  param_presmooth->delta = 1e-8;
240  }
241 
244 
245  if (param.level < param.Nlevel-1) { //Create the post smoother
251  }
252  }
253 
255  if (param.level < param.Nlevel-1) {
256  if (postsmoother) delete postsmoother;
257  postsmoother = nullptr;
258  }
259  if (presmoother) delete presmoother;
260  presmoother = nullptr;
261 
262  if (param_presmooth) delete param_presmooth;
263  param_presmooth = nullptr;
265  param_postsmooth = nullptr;
266  }
267 
269  if (param.level < param.Nlevel-1) {
271  delete coarse_solver;
272  delete param_coarse_solver;
273  }
274 
275  if (B_coarse) {
276  int nVec_coarse = std::max(param.Nvec, param.mg_global.n_vec[param.level+1]);
277  for (int i=0; i<nVec_coarse; i++) if ((*B_coarse)[i]) delete (*B_coarse)[i];
278  delete B_coarse;
279  }
280  if (coarse) delete coarse;
281  if (transfer) delete transfer;
288  }
289 
290  destroySmoother();
291 
293  if (r) delete r;
294  if (r_coarse) delete r_coarse;
295  if (x_coarse) delete x_coarse;
296  if (tmp_coarse) delete tmp_coarse;
297 
298  if (param_coarse) delete param_coarse;
299 
301  }
302 
303  double MG::flops() const {
304  double flops = 0;
305  if (param.level < param.Nlevel-1) flops += coarse->flops();
306 
307  if (param_presmooth) {
308  flops += param_presmooth->gflops * 1e9;
309  param_presmooth->gflops = 0;
310  }
311 
312  if (param_postsmooth) {
313  flops += param_postsmooth->gflops * 1e9;
315  }
316 
317  if (transfer) {
318  flops += transfer->flops();
319  }
320 
321  return flops;
322  }
323 
327  void MG::verify() {
329 
330  // temporary fields used for verification
335  double deviation;
336  double tol = std::pow(10.0, 4-2*csParam.precision);
337 
338  printfQuda("\n");
339  printfQuda("Checking 0 = (1 - P P^\\dagger) v_k for %d vectors\n", param.Nvec);
340 
341  for (int i=0; i<param.Nvec; i++) {
342  // as well as copying to the correct location this also changes basis if necessary
343  *tmp1 = *param.B[i];
344 
345  transfer->R(*r_coarse, *tmp1);
346  transfer->P(*tmp2, *r_coarse);
347 
348  printfQuda("Vector %d: norms v_k = %e P^\\dagger v_k = %e P P^\\dagger v_k = %e\n",
349  i, norm2(*tmp1), norm2(*r_coarse), norm2(*tmp2));
350 
351  deviation = sqrt( xmyNorm(*tmp1, *tmp2) / norm2(*tmp1) );
352  printfQuda("L2 relative deviation = %e\n", deviation);
353  if (deviation > tol) errorQuda("failed");
354  }
355 
356 #if 0
357  printfQuda("Checking 1 > || (1 - D P (P^\\dagger D P) P^\\dagger v_k || / || v_k || for %d vectors\n",
358  param.Nvec);
359 
360  for (int i=0; i<param.Nvec; i++) {
361  transfer->R(*r_coarse, *(param.B[i]));
362  (*coarse)(*x_coarse, *r_coarse); // this needs to be an exact solve to pass
363  setOutputPrefix(prefix); // restore output prefix
364  transfer->P(*tmp2, *x_coarse);
366  *tmp2 = *(param.B[i]);
367  printfQuda("Vector %d: norms %e %e ", i, norm2(*param.B[i]), norm2(*tmp1));
368  printfQuda("relative residual = %e\n", sqrt(xmyNorm(*tmp2, *tmp1) / norm2(*param.B[i])) );
369  }
370 #endif
371 
372  printfQuda("\n");
373  printfQuda("Checking 0 = (1 - P^\\dagger P) eta_c\n");
375  transfer->P(*tmp2, *x_coarse);
376  transfer->R(*r_coarse, *tmp2);
377  printfQuda("Vector norms %e %e (fine tmp %e) ", norm2(*x_coarse), norm2(*r_coarse), norm2(*tmp2));
378 
379  deviation = sqrt( xmyNorm(*x_coarse, *r_coarse) / norm2(*x_coarse) );
380  printfQuda("L2 relative deviation = %e\n", deviation);
381  if (deviation > tol ) errorQuda("failed");
382 
383  printfQuda("\n");
384  printfQuda("Comparing native coarse operator to emulated operator\n");
386  zero(*tmp_coarse);
387  zero(*r_coarse);
388 
390  transfer->P(*tmp1, *tmp_coarse);
391 
393  const Dirac &dirac = *(param.matSmooth->Expose());
394  double kappa = param.matResidual->Expose()->Kappa();
395  if (param.level==0) {
396  dirac.DslashXpay(tmp2->Even(), tmp1->Odd(), QUDA_EVEN_PARITY, tmp1->Even(), -kappa);
397  dirac.DslashXpay(tmp2->Odd(), tmp1->Even(), QUDA_ODD_PARITY, tmp1->Odd(), -kappa);
398  } else { // this is a hack since the coarse Dslash doesn't properly use the same xpay conventions yet
399  dirac.DslashXpay(tmp2->Even(), tmp1->Odd(), QUDA_EVEN_PARITY, tmp1->Even(), 1.0);
400  dirac.DslashXpay(tmp2->Odd(), tmp1->Even(), QUDA_ODD_PARITY, tmp1->Odd(), 1.0);
401  }
402  } else {
403  (*param.matResidual)(*tmp2,*tmp1);
404  }
405 
406  transfer->R(*x_coarse, *tmp2);
408 
409 #if 0 // enable to print out emulated and actual coarse-grid operator vectors for debugging
410  printfQuda("emulated\n");
411  for (int x=0; x<x_coarse->Volume(); x++) tmp1->PrintVector(x);
412 
413  printfQuda("actual\n");
414  for (int x=0; x<r_coarse->Volume(); x++) tmp2->PrintVector(x);
415 #endif
416 
417  printfQuda("Vector norms Emulated=%e Native=%e ", norm2(*x_coarse), norm2(*r_coarse));
418 
419  deviation = sqrt( xmyNorm(*x_coarse, *r_coarse) / norm2(*x_coarse) );
420 
421  // When the mu is shifted on the coarse level; we can compute exxactly the error we introduce in the check:
422  // it is given by 2*kappa*delta_mu || tmp_coarse ||; where tmp_coarse is the random vector generated for the test
423  if(param.matResidual->Expose()->Mu() != 0) {
424  double delta_factor = param.mg_global.mu_factor[param.level+1] - param.mg_global.mu_factor[param.level];
425  if(fabs(delta_factor) > tol ) {
426  double delta_a = delta_factor * 2.0 * param.matResidual->Expose()->Kappa() *
428  deviation -= fabs(delta_a) * sqrt( norm2(*tmp_coarse) / norm2(*x_coarse) );
429  deviation = fabs(deviation);
430  }
431  }
432  printfQuda("L2 relative deviation = %e\n\n", deviation);
433  if (deviation > tol) errorQuda("failed");
434 
435  // here we check that the Hermitian conjugate operator is working
436  // as expected for both the smoother and residual Dirac operators
438  const Dirac &diracS = *(param.matSmooth->Expose());
439  diracS.MdagM(tmp2->Even(), tmp1->Odd());
440  Complex dot = cDotProduct(tmp2->Even(),tmp1->Odd());
441  double deviation = std::fabs(dot.imag()) / std::fabs(dot.real());
442  printfQuda("Smoother normal operator test (eta^dag M^dag M eta): real=%e imag=%e, relative imaginary deviation=%e\n",
443  real(dot), imag(dot), deviation);
444  if (deviation > tol) errorQuda("failed");
445 
446  const Dirac &diracR = *(param.matResidual->Expose());
447  diracR.MdagM(*tmp2, *tmp1);
448  dot = cDotProduct(*tmp2,*tmp1);
449 
450  deviation = std::fabs(dot.imag()) / std::fabs(dot.real());
451  printfQuda("Residual normal operator test (eta^dag M^dag M eta): real=%e imag=%e, relative imaginary deviation=%e\n",
452  real(dot), imag(dot), deviation);
453  if (deviation > tol) errorQuda("failed");
454  } else {
455  const Dirac &dirac = *(param.matResidual->Expose());
456 
457  dirac.MdagM(*tmp2, *tmp1);
459 
460  double deviation = std::fabs(dot.imag()) / std::fabs(dot.real());
461  printfQuda("Normal operator test (eta^dag M^dag M eta): real=%e imag=%e, relative imaginary deviation=%e\n",
462  real(dot), imag(dot), deviation);
463  if (deviation > tol) errorQuda("failed");
464  }
465 
466 #ifdef ARPACK_LIB
467  printfQuda("\n");
468  printfQuda("Check eigenvector overlap for level %d\n", param.level );
469 
470  int nmodes = 128;
471  int ncv = 256;
472  double arpack_tol = 1e-7;
473  char *which = (char*)malloc(256*sizeof(char));
474  sprintf(which, "SM");/* ARPACK which="{S,L}{R,I,M}" */
475 
476  ColorSpinorParam cpuParam(*param.B[0]);
477  cpuParam.create = QUDA_ZERO_FIELD_CREATE;
478 
481 
483  cpuParam.x[0] /= 2;
485  }
486 
487  std::vector<ColorSpinorField*> evecsBuffer;
488  evecsBuffer.reserve(nmodes);
489 
490  for (int i = 0; i < nmodes; i++) evecsBuffer.push_back( new cpuColorSpinorField(cpuParam) );
491 
492  QudaPrecision matPrecision = QUDA_SINGLE_PRECISION;//manually ajusted?
493  QudaPrecision arpPrecision = QUDA_DOUBLE_PRECISION;//precision used in ARPACK routines, may not coincide with matvec precision
494 
495  void *evalsBuffer = arpPrecision == QUDA_DOUBLE_PRECISION ? static_cast<void*>(new std::complex<double>[nmodes+1]) : static_cast<void*>( new std::complex<float>[nmodes+1]);
496  //
497  arpackSolve( evecsBuffer, evalsBuffer, *param.matSmooth, matPrecision, arpPrecision, arpack_tol, nmodes, ncv, which);
498 
499  for (int i=0; i<nmodes; i++) {
500  // as well as copying to the correct location this also changes basis if necessary
501  *tmp1 = *evecsBuffer[i];
502 
503  transfer->R(*r_coarse, *tmp1);
504  transfer->P(*tmp2, *r_coarse);
505 
506  printfQuda("Vector %d: norms v_k = %e P^\\dagger v_k = %e P P^\\dagger v_k = %e\n",
507  i, norm2(*tmp1), norm2(*r_coarse), norm2(*tmp2));
508 
509  deviation = sqrt( xmyNorm(*tmp1, *tmp2) / norm2(*tmp1) );
510  printfQuda("L2 relative deviation = %e\n", deviation);
511  }
512 
513  for (unsigned int i = 0; i < evecsBuffer.size(); i++) delete evecsBuffer[i];
514 
515  if( arpPrecision == QUDA_DOUBLE_PRECISION ) delete static_cast<std::complex<double>* >(evalsBuffer);
516  else delete static_cast<std::complex<float>* > (evalsBuffer);
517 
518  free(which);
519 #else
520  warningQuda("\nThis test requires ARPACK.\n");
521 #endif
522 
523  delete tmp1;
524  delete tmp2;
525  delete tmp_coarse;
526 
527  if (param.level < param.Nlevel-2) coarse->verify();
528  }
529 
531  char prefix_bkup[100]; strncpy(prefix_bkup, prefix, 100); setOutputPrefix(prefix);
532 
533  // if input vector is single parity then we must be solving the
534  // preconditioned system in general this can only happen on the
535  // top level
536  QudaSolutionType outer_solution_type = b.SiteSubset() == QUDA_FULL_SITE_SUBSET ? QUDA_MAT_SOLUTION : QUDA_MATPC_SOLUTION;
537  QudaSolutionType inner_solution_type = param.coarse_grid_solution_type;
538 
539  if (debug) printfQuda("outer_solution_type = %d, inner_solution_type = %d\n", outer_solution_type, inner_solution_type);
540 
541  if ( outer_solution_type == QUDA_MATPC_SOLUTION && inner_solution_type == QUDA_MAT_SOLUTION)
542  errorQuda("Unsupported solution type combination");
543 
544  if ( inner_solution_type == QUDA_MATPC_SOLUTION && param.smoother_solve_type != QUDA_DIRECT_PC_SOLVE)
545  errorQuda("For this coarse grid solution type, a preconditioned smoother is required");
546 
547  if ( debug ) printfQuda("entering V-cycle with x2=%e, r2=%e\n", norm2(x), norm2(b));
548 
549  if (param.level < param.Nlevel-1) {
550  //transfer->setTransferGPU(false); // use this to force location of transfer (need to check if still works for multi-level)
551 
552  // do the pre smoothing
553  if ( debug ) printfQuda("pre-smoothing b2=%e\n", norm2(b));
554 
555  ColorSpinorField *out=nullptr, *in=nullptr;
556 
557  ColorSpinorField &residual = b.SiteSubset() == QUDA_FULL_SITE_SUBSET ? *r : r->Even();
558 
559  // FIXME only need to make a copy if not preconditioning
560  residual = b; // copy source vector since we will overwrite source with iterated residual
561 
562  const Dirac &dirac = *(param.matSmooth->Expose());
563  dirac.prepare(in, out, x, residual, outer_solution_type);
564 
565  // b_tilde holds either a copy of preconditioned source or a pointer to original source
567  else b_tilde = &b;
568 
569  (*presmoother)(*out, *in);
570 
571  ColorSpinorField &solution = inner_solution_type == outer_solution_type ? x : x.Even();
572  dirac.reconstruct(solution, b, inner_solution_type);
573 
574  // if using preconditioned smoother then need to reconstruct full residual
575  // FIXME extend this check for precision, Schwarz, etc.
576  bool use_solver_residual =
577  ( (param.smoother_solve_type == QUDA_DIRECT_PC_SOLVE && inner_solution_type == QUDA_MATPC_SOLUTION) ||
578  (param.smoother_solve_type == QUDA_DIRECT_SOLVE && inner_solution_type == QUDA_MAT_SOLUTION) )
579  ? true : false;
580 
581  // FIXME this is currently borked if inner solver is preconditioned
582  double r2 = 0.0;
583  if (use_solver_residual) {
584  if (debug) r2 = norm2(*r);
585  } else {
586  (*param.matResidual)(*r, x);
587  if (debug) r2 = xmyNorm(b, *r);
588  else axpby(1.0, b, -1.0, *r);
589  }
590 
591  // restrict to the coarse grid
592  transfer->R(*r_coarse, residual);
593  if ( debug ) printfQuda("after pre-smoothing x2 = %e, r2 = %e, r_coarse2 = %e\n", norm2(x), r2, norm2(*r_coarse));
594 
595  // recurse to the next lower level
596  (*coarse_solver)(*x_coarse, *r_coarse);
597 
598  setOutputPrefix(prefix); // restore prefix after return from coarse grid
599 
600  if ( debug ) printfQuda("after coarse solve x_coarse2 = %e r_coarse2 = %e\n", norm2(*x_coarse), norm2(*r_coarse));
601 
602  // prolongate back to this grid
603  ColorSpinorField &x_coarse_2_fine = inner_solution_type == QUDA_MAT_SOLUTION ? *r : r->Even(); // define according to inner solution type
604  transfer->P(x_coarse_2_fine, *x_coarse); // repurpose residual storage
605 
606  xpy(x_coarse_2_fine, solution); // sum to solution FIXME - sum should be done inside the transfer operator
607  if ( debug ) {
608  printfQuda("Prolongated coarse solution y2 = %e\n", norm2(*r));
609  printfQuda("after coarse-grid correction x2 = %e, r2 = %e\n",
610  norm2(x), norm2(*r));
611  }
612 
613  // do the post smoothing
614  //residual = outer_solution_type == QUDA_MAT_SOLUTION ? *r : r->Even(); // refine for outer solution type
616  in = b_tilde;
617  } else { // this incurs unecessary copying
618  *r = b;
619  in = r;
620  }
621 
622  //dirac.prepare(in, out, solution, residual, inner_solution_type);
623  // we should keep a copy of the prepared right hand side as we've already destroyed it
624  (*postsmoother)(*out, *in); // for inner solve preconditioned, in the should be the original prepared rhs
625 
626  dirac.reconstruct(x, b, outer_solution_type);
627 
628  } else { // do the coarse grid solve
629 
630  const Dirac &dirac = *(param.matSmooth->Expose());
631  ColorSpinorField *out=nullptr, *in=nullptr;
632 
633  dirac.prepare(in, out, x, b, outer_solution_type);
634  (*presmoother)(*out, *in);
635  dirac.reconstruct(x, b, outer_solution_type);
636  }
637 
638  if ( debug ) {
639  (*param.matResidual)(*r, x);
640  double r2 = xmyNorm(b, *r);
641  printfQuda("leaving V-cycle with x2=%e, r2=%e\n", norm2(x), r2);
642  }
643 
644  setOutputPrefix(param.level == 0 ? "" : prefix_bkup);
645  }
646 
647  //supports seperate reading or single file read
648  void MG::loadVectors(std::vector<ColorSpinorField*> &B) {
651 
652  std::string vec_infile(param.mg_global.vec_infile);
653  vec_infile += "_level_";
654  vec_infile += std::to_string(param.level);
655 
656  const int Nvec = B.size();
657  printfQuda("Start loading %d vectors from %s\n", Nvec, vec_infile.c_str());
658 
659  void **V = new void*[Nvec];
660  for (int i=0; i<Nvec; i++) {
661  V[i] = B[i]->V();
662  if (V[i] == NULL) {
663  printfQuda("Could not allocate V[%d]\n", i);
664  }
665  }
666 
667  if (strcmp(vec_infile.c_str(),"")!=0) {
668 #ifdef HAVE_QIO
669  read_spinor_field(vec_infile.c_str(), &V[0], B[0]->Precision(), B[0]->X(),
670  B[0]->Ncolor(), B[0]->Nspin(), Nvec, 0, (char**)0);
671 #else
672  errorQuda("\nQIO library was not built.\n");
673 #endif
674  } else {
675  printfQuda("Using %d constant nullvectors\n", Nvec);
676  //errorQuda("No nullspace file defined");
677 
678  for (int i = 0; i < (Nvec < 2 ? Nvec : 2); i++) {
679  zero(*B[i]);
680 #if 1
684  for (int s=i; s<4; s+=2) {
685  for (int c=0; c<B[i]->Ncolor(); c++) {
686  tmp->Source(QUDA_CONSTANT_SOURCE, 1, s, c);
687  //tmp->Source(QUDA_SINUSOIDAL_SOURCE, 3, s, 2); // sin in dim 3, mode s, offset = 2
688  xpy(*tmp,*B[i]);
689  }
690  }
691  delete tmp;
692 #else
693  printfQuda("Using random source for nullvector = %d\n",i);
694  B[i]->Source(QUDA_RANDOM_SOURCE);
695 #endif
696  //printfQuda("B[%d]\n",i);
697  //for (int x=0; x<B[i]->Volume(); x++) static_cast<cpuColorSpinorField*>(B[i])->PrintVector(x);
698  }
699 
700  for (int i=2; i<Nvec; i++) B[i] -> Source(QUDA_RANDOM_SOURCE);
701  }
702 
703  printfQuda("Done loading vectors\n");
706  }
707 
708  void MG::saveVectors(std::vector<ColorSpinorField*> &B) {
709 #ifdef HAVE_QIO
712  std::string vec_outfile(param.mg_global.vec_outfile);
713  vec_outfile += "_level_";
714  vec_outfile += std::to_string(param.level);
715 
716  if (strcmp(param.mg_global.vec_outfile,"")!=0) {
717  const int Nvec = B.size();
718  printfQuda("Start saving %d vectors to %s\n", Nvec, vec_outfile.c_str());
719 
720  void **V = static_cast<void**>(safe_malloc(Nvec*sizeof(void*)));
721  for (int i=0; i<Nvec; i++) {
722  V[i] = B[i]->V();
723  if (V[i] == NULL) {
724  printfQuda("Could not allocate V[%d]\n", i);
725  }
726  }
727 
728  write_spinor_field(vec_outfile.c_str(), &V[0], B[0]->Precision(), B[0]->X(),
729  B[0]->Ncolor(), B[0]->Nspin(), Nvec, 0, (char**)0);
730 
731  host_free(V);
732  printfQuda("Done saving vectors\n");
733  }
734 
737 #else
738  if (strcmp(param.mg_global.vec_outfile,"")!=0) {
739  errorQuda("\nQIO library was not built.\n");
740  }
741 #endif
742  }
743 
744  void MG::generateNullVectors(std::vector<ColorSpinorField*> B) {
745  printfQuda("\nGenerate null vectors\n");
746 
747  SolverParam solverParam(param); // Set solver field parameters:
748 
749  // set null-space generation options - need to expose these
750  solverParam.maxiter = 500;
751  solverParam.tol = param.mg_global.setup_tol[param.level];
753  solverParam.delta = 1e-7;
755  solverParam.Nkrylov = 4;
756  solverParam.pipeline = (solverParam.inv_type == QUDA_BICGSTABL_INVERTER ? 4 : 0); // pipeline != 0 breaks BICGSTAB
757 
758  if (param.level == 0) { // this enables half precision on the fine grid only if set
761  if (solverParam.precision_sloppy == QUDA_HALF_PRECISION) solverParam.delta = 1e-1;
762  }
763  solverParam.residual_type = static_cast<QudaResidualType>(QUDA_L2_RELATIVE_RESIDUAL);
765 
766  ColorSpinorParam csParam(*B[0]); // Create spinor field parameters:
767  // to force setting the field to be native first set to double-precision native order
768  // then use the setPrecision method to set to native order
769  csParam.fieldOrder = QUDA_FLOAT2_FIELD_ORDER;
770  csParam.precision = QUDA_DOUBLE_PRECISION;
771  csParam.setPrecision(B[0]->Precision());
772 
773  csParam.location = QUDA_CUDA_FIELD_LOCATION; // hard code to GPU location for null-space generation for now
774  csParam.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
778 
779  std::vector<ColorSpinorField*> B_gpu;
780 
781  const Dirac &dirac = *(param.matSmooth->Expose());
782  Solver *solve;
783  DiracMdagM mdagm(dirac);
784  const Dirac &diracSloppy = *(param.matSmoothSloppy->Expose());
785  DiracMdagM mdagmSloppy(diracSloppy);
786  if(solverParam.inv_type == QUDA_CG_INVERTER) {
787  solverParam.maxiter = 2000;
788  solve = Solver::create(solverParam, mdagm, mdagmSloppy, mdagmSloppy, profile);
789  } else if(solverParam.inv_type == QUDA_GCR_INVERTER) {
793  solverParam.precondition_cycle = 1;
795  } else {
797  }
798 
799  // Generate sources and launch solver for each source:
800  for(unsigned int i=0; i<B.size(); i++) {
801  B[i]->Source(QUDA_RANDOM_SOURCE); //random initial guess
802 
803  B_gpu.push_back(ColorSpinorField::Create(csParam));
804  ColorSpinorField *x = B_gpu[i];
805  *x = *B[i]; // copy initial guess to GPU:
806 
807  zero(*b); // need zero rhs
808 
809  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Initial guess = %g\n", norm2(*x));
810 
811  ColorSpinorField *out=nullptr, *in=nullptr;
812  dirac.prepare(in, out, *x, *b, QUDA_MAT_SOLUTION);
813  (*solve)(*out, *in);
814  dirac.reconstruct(*x, *b, QUDA_MAT_SOLUTION);
815 
816  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Solution = %g\n", norm2(*x));
817 
818  // global orthonormalization of the generated null-space vectors
819  for (int i=0; B_gpu[i] != x; ++i) {
820  Complex alpha = cDotProduct(*B_gpu[i], *x);//<j,i>
821  caxpy(-alpha, *B_gpu[i], *x); //i-<j,i>j
822  }
823 
824  double nrm2 = norm2(*x);
825  if (nrm2 > 1e-16) ax(1.0 /sqrt(nrm2), *x);
826  else errorQuda("\nCannot orthogonalize %u vector\n", i);
827 
828  }
829 
830  delete solve;
831  delete b;
832 
833  for (int i=0; i<(int)B.size(); i++) {
834  *B[i] = *B_gpu[i];
835  delete B_gpu[i];
836  }
837 
838  if (strcmp(param.mg_global.vec_outfile,"")!=0) { // only save if outfile is defined
839  saveVectors(B);
840  }
841 
842  return;
843  }
844 
845 }
bool global_reduction
whether the solver acting as a preconditioner for another solver
Definition: invert_quda.h:201
DiracMatrix * matSmoothSloppy
Definition: multigrid.h:78
QudaBoolean global_reduction
Definition: multigrid.h:69
QudaMultigridParam & mg_global
Definition: multigrid.h:30
QudaVerbosity verbosity_precondition
Definition: invert_quda.h:197
SolverParam * param_coarse_solver
Definition: multigrid.h:215
void free(void *)
double Kappa() const
Definition: dirac_quda.h:144
std::vector< ColorSpinorField * > & B
Definition: multigrid.h:54
enum QudaPrecision_s QudaPrecision
const Dirac * Expose()
Definition: dirac_quda.h:1011
MGParam * param_coarse
Definition: multigrid.h:206
QudaFieldLocation location
Definition: multigrid.h:91
void saveVectors(std::vector< ColorSpinorField *> &B)
Save the null space vectors in from file.
Definition: multigrid.cpp:708
virtual void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)=0
void verify()
Definition: multigrid.cpp:327
TimeProfile & profile_global
Definition: multigrid.h:185
enum QudaResidualType_s QudaResidualType
QudaInverterType inv_type
Definition: invert_quda.h:19
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
void createSmoother()
Create the smoothers.
Definition: multigrid.cpp:213
ColorSpinorField * b_tilde
Definition: multigrid.h:227
#define errorQuda(...)
Definition: util_quda.h:90
void R(ColorSpinorField &out, const ColorSpinorField &in) const
Definition: transfer.cpp:300
#define host_free(ptr)
Definition: malloc_quda.h:59
QudaSolveType smoother_solve_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:426
void destroySmoother()
Free the smoothers.
Definition: multigrid.cpp:254
double setup_tol[QUDA_MAX_MG_LEVEL]
Definition: quda.h:416
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:105
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
Definition: reduce_quda.cu:500
std::complex< double > Complex
Definition: eig_variables.h:13
void setOutputPrefix(const char *prefix)
Definition: util_quda.cpp:68
Dirac * diracCoarseSmoother
Definition: multigrid.h:242
Transfer * transfer
Definition: dirac_quda.h:46
DiracMatrix * matCoarseSmootherSloppy
Definition: multigrid.h:254
SolverParam * param_presmooth
Definition: multigrid.h:209
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44
const ColorSpinorField & Even() const
static ColorSpinorField * Create(const ColorSpinorParam &param)
cudaMipmappedArray_const_t unsigned int level
ColorSpinorField * r
Definition: multigrid.h:224
void reset()
Definition: multigrid.cpp:203
MGParam & param
Definition: multigrid.h:176
void arpackSolve(std::vector< ColorSpinorField *> &B, void *evals, DiracMatrix &matEigen, QudaPrecision matPrec, QudaPrecision arpackPrec, double tol, int nev, int ncv, char *target)
void P(ColorSpinorField &out, const ColorSpinorField &in) const
Definition: transfer.cpp:262
STL namespace.
double pow(double, double)
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
Definition: reduce_quda.cu:364
void loadVectors(std::vector< ColorSpinorField *> &B)
Load the null space vectors in from file.
Definition: multigrid.cpp:648
QudaInverterType inv_type_precondition
Definition: invert_quda.h:25
QudaPreserveSource preserve_source
Definition: invert_quda.h:121
double mu_factor
Definition: dirac_quda.h:37
int spinBlockSize
Definition: multigrid.h:42
virtual double Mu() const
Definition: dirac_quda.h:145
void setSiteSubset(QudaSiteSubset site_subset, QudaParity parity)
Sets whether the transfer operator is to act on full fields or single parity fields, and if single-parity which parity. If site_subset is QUDA_FULL_SITE_SUBSET, the transfer operator can still be applied to single-parity fields, however, if site_subset is QUDA_PARITY_SITE_SUBSET, then the transfer operator cannot be applied to full fields, and setSiteSubset will need to be called first to reset to QUDA_FULL_SITE_SUBSET. This method exists to reduce GPU memory overhead - if only transfering single-parity fine fields then we only store a single-parity copy of the null space components on the device.
Definition: transfer.cpp:155
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const =0
double mu_factor[QUDA_MAX_MG_LEVEL]
Definition: quda.h:471
QudaSolveType smoother_solve_type
Definition: multigrid.h:88
QudaSiteSubset siteSubset
Definition: lattice_field.h:55
static void axpby(Float a, Float *x, Float b, Float *y, int len)
Definition: dslash_util.h:33
double norm2(const CloverField &a, bool inverse=false)
Solver * presmoother
Definition: multigrid.h:182
void operator()(ColorSpinorField &out, ColorSpinorField &in)
Definition: multigrid.cpp:530
int n_vec[QUDA_MAX_MG_LEVEL]
Definition: quda.h:407
QudaGaugeParam param
Definition: pack_test.cpp:17
#define b
DiracMatrix * matCoarseSmoother
Definition: multigrid.h:251
static bool debug
Definition: multigrid.cpp:11
int strcmp(const char *__s1, const char *__s2)
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:50
QudaComputeNullVector compute_null_vector
Definition: invert_quda.h:53
ColorSpinorField * tmp_coarse
Definition: multigrid.h:236
void ax(const double &a, GaugeField &u)
Scale the gauge field by the scalar a.
double tol
Definition: test_util.cpp:1647
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
QudaFieldLocation location
char vec_infile[]
Definition: test_util.cpp:1636
const ColorSpinorField & Vectors(QudaFieldLocation location=QUDA_CPU_FIELD_LOCATION) const
Definition: transfer.h:195
QudaResidualType residual_type
Definition: invert_quda.h:47
VOLATILE spinorFloat kappa
SolverParam * param_postsmooth
Definition: multigrid.h:212
char prefix[128]
Definition: multigrid.h:191
#define tmp2
Definition: tmc_core.h:16
ColorSpinorParam csParam
Definition: pack_test.cpp:24
QudaDagType dagger
Definition: dirac_quda.h:30
QudaInverterType smoother
Definition: multigrid.h:81
cpuColorSpinorField * in
static Solver * create(SolverParam &param, DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, TimeProfile &profile)
Definition: solver.cpp:13
void Print()
Definition: timer.cpp:6
int V
Definition: test_util.cpp:28
enum QudaMatPCType_s QudaMatPCType
DiracMatrix * matSmooth
Definition: multigrid.h:75
#define warningQuda(...)
Definition: util_quda.h:101
int geoBlockSize[QUDA_MAX_DIM]
Definition: multigrid.h:39
double smoother_tol[QUDA_MAX_MG_LEVEL]
Definition: quda.h:438
Dirac * diracCoarseResidual
Definition: multigrid.h:239
enum QudaSolutionType_s QudaSolutionType
bool is_preconditioner
verbosity to use for preconditioner
Definition: invert_quda.h:199
QudaDiracType type
Definition: dirac_quda.h:22
char vec_outfile[]
Definition: test_util.cpp:1637
QudaVerbosity verbosity[QUDA_MAX_MG_LEVEL]
Definition: quda.h:410
#define tmp1
Definition: tmc_core.h:15
QudaMatPCType matpcType
NEW: used by mobius domain wall only.
Definition: dirac_quda.h:29
QudaBoolean run_verify
Definition: quda.h:456
std::vector< ColorSpinorField * > * B_coarse
Definition: multigrid.h:221
enum QudaParity_s QudaParity
QudaMatPCType matpc_type
Definition: test_util.cpp:1652
char coarse_prefix[128]
Definition: multigrid.h:194
ColorSpinorField * r_coarse
Definition: multigrid.h:230
char vec_outfile[256]
Definition: quda.h:462
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.cu:246
double tol_precondition
Definition: invert_quda.h:164
DiracMatrix * matCoarseResidual
Definition: multigrid.h:248
#define safe_malloc(size)
Definition: malloc_quda.h:54
std::vector< ColorSpinorField * > * B
Definition: multigrid.h:218
QudaPrecision precision_precondition
Definition: invert_quda.h:118
Dirac * diracCoarseSmootherSloppy
Definition: multigrid.h:245
QudaFieldLocation location[QUDA_MAX_MG_LEVEL]
Definition: quda.h:447
enum QudaSiteSubset_s QudaSiteSubset
virtual void solve(ColorSpinorField &out, ColorSpinorField &in)
Definition: solver.cpp:114
char * strncpy(char *__dst, const char *__src, size_t __n)
double smoother_tol
Definition: multigrid.h:63
GaugeCovDev * dirac
Definition: covdev_test.cpp:75
cpuColorSpinorField * out
QudaPrecision cuda_prec_precondition
Definition: quda.h:193
QudaSolutionType coarse_grid_solution_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:423
TimeProfile profile
Definition: multigrid.h:188
virtual ~MG()
Definition: multigrid.cpp:268
double flops() const
Return the total flops done on this and all coarser levels.
Definition: multigrid.cpp:303
int geo_block_size[QUDA_MAX_MG_LEVEL][QUDA_MAX_DIM]
Definition: quda.h:401
QudaBoolean generate_all_levels
Definition: quda.h:453
int sprintf(char *, const char *,...) __attribute__((__format__(__printf__
int nu_pre[QUDA_MAX_MG_LEVEL]
Definition: quda.h:432
void * preconditioner
Definition: invert_quda.h:31
#define QUDA_MAX_MG_LEVEL
Maximum number of multi-grid levels. This number may be increased if needed.
#define printfQuda(...)
Definition: util_quda.h:84
double fabs(double)
void xpy(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.cu:128
QudaTwistFlavorType TwistFlavor() const
Transfer * transfer
Definition: multigrid.h:179
int transfer
Definition: covdev_test.cpp:55
void write_spinor_field(const char *filename, void *V[], QudaPrecision precision, const int *X, int nColor, int nSpin, int Nvec, int argc, char *argv[])
Definition: qio_field.h:22
const void * c
Solver * postsmoother
Definition: multigrid.h:182
QudaUseInitGuess use_init_guess
Definition: invert_quda.h:50
QudaPrecision precision_sloppy
Definition: invert_quda.h:115
int nu_post[QUDA_MAX_MG_LEVEL]
Definition: quda.h:435
MG(MGParam &param, TimeProfile &profile)
Definition: multigrid.cpp:13
QudaComputeNullVector compute_null_vector
Definition: quda.h:450
void generateNullVectors(std::vector< ColorSpinorField *> B)
Generate the null-space vectors.
Definition: multigrid.cpp:744
QudaSolutionType coarse_grid_solution_type
Definition: multigrid.h:85
MG * coarse
Definition: multigrid.h:197
QudaParity parity
Definition: covdev_test.cpp:53
double flops() const
Definition: transfer.cpp:342
char vec_infile[256]
Definition: quda.h:459
Solver * coarse_solver
Definition: multigrid.h:203
QudaMultigridCycleType cycle_type
Definition: multigrid.h:66
__device__ __host__ void zero(vector_type< scalar, n > &v)
Definition: cub_helper.cuh:82
QudaInverterType setup_inv_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:413
void read_spinor_field(const char *filename, void *V[], QudaPrecision precision, const int *X, int nColor, int nSpin, int Nvec, int argc, char *argv[])
Definition: qio_field.h:17
QudaInverterType smoother[QUDA_MAX_MG_LEVEL]
Definition: quda.h:419
QudaMatPCType matpc_type
Definition: quda.h:183
ColorSpinorField * tmp1
Definition: dirac_quda.h:40
DiracMatrix * matResidual
Definition: multigrid.h:72
ColorSpinorField * x_coarse
Definition: multigrid.h:233
QudaInvertParam * invert_param
Definition: quda.h:395
static void dot(sFloat *res, gFloat *a, sFloat *b)
Definition: dslash_util.h:56