QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
inv_eigcg_quda.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <memory>
5 #include <iostream>
6 
7 #include <quda_internal.h>
8 #include <color_spinor_field.h>
9 #include <blas_quda.h>
10 #include <dslash_quda.h>
11 #include <invert_quda.h>
12 #include <util_quda.h>
13 #include <string.h>
14 
15 #ifdef MAGMA_LIB
16 #include <blas_magma.h>
17 #endif
18 
19 
20 #include <Eigen/Dense>
21 
22 #include <deflation.h>
23 
24 
25 /*
26 Based on eigCG(nev, m) algorithm:
27 A. Stathopolous and K. Orginos, arXiv:0707.0131
28 */
29 
30 namespace quda {
31 
32  using namespace blas;
33  using namespace Eigen;
34 
35  using DynamicStride = Stride<Dynamic, Dynamic>;
36  using DenseMatrix = MatrixXcd;
37  using VectorSet = MatrixXcd;
38  using Vector = VectorXcd;
39  using RealVector = VectorXd;
40 
41 //special types needed for compatibility with QUDA blas:
43 
44  static int max_eigcg_cycles = 4;//how many eigcg cycles do we allow?
45 
46 
48 
49  class EigCGArgs{
50 
51  public:
52  //host Lanczos matrice, and its eigenvalue/vector arrays:
53  DenseMatrix Tm;//VH A V,
54  //eigenvectors:
55  VectorSet ritzVecs;//array of (m) ritz and of m length
56  //eigenvalues of both T[m, m ] and T[m-1, m-1] (re-used)
57  RealVector Tmvals;//eigenvalues of T[m, m ] and T[m-1, m-1] (re-used)
58  //Aux matrix for computing 2k Ritz vectors:
60 
61  int m;
62  int k;
63  int id;//cuurent search spase index
64 
65  int restarts;
66  double global_stop;
67 
68  bool run_residual_correction;//used in mixed precision cycles
69 
70  ColorSpinorFieldSet *V2k; //eigCG accumulation vectors needed to update Tm (spinor matrix of size eigen_vector_length x (2*k))
71 
72  EigCGArgs(int m, int k) : Tm(DenseMatrix::Zero(m,m)), ritzVecs(VectorSet::Zero(m,m)), Tmvals(m), H2k(2*k, 2*k),
73  m(m), k(k), id(0), restarts(0), global_stop(0.0), run_residual_correction(false), V2k(nullptr) { }
74 
76  if(V2k) delete V2k;
77  }
78 
79  //method for constructing Lanczos matrix :
80  inline void SetLanczos(Complex diag_val, Complex offdiag_val) {
81  if(run_residual_correction) return;
82 
83  Tm.diagonal<0>()[id] = diag_val;
84 
85  if (id < (m-1)){ //Load Lanczos off-diagonals:
86  Tm.diagonal<+1>()[id] = offdiag_val;
87  Tm.diagonal<-1>()[id] = offdiag_val;
88  }
89 
90  id += 1;
91 
92  return;
93  }
94 
95  inline void ResetArgs() {
96  id = 0;
97  Tm.setZero();
98  Tmvals.setZero();
99  ritzVecs.setZero();
100 
101  if(V2k) delete V2k;
102  V2k = nullptr;
103  }
104 
105  inline void ResetSearchIdx() { id = 2*k; restarts += 1; }
106 
107  void RestartLanczos(ColorSpinorField *w, ColorSpinorFieldSet *v, const double inv_sqrt_r2)
108  {
109  Tm.setZero();
110 
111  std::unique_ptr<Complex[] > s(new Complex[2*k]);
112 
113  for(int i = 0; i < 2*k; i++) Tm(i,i) = Tmvals(i);//??
114 
115  std::vector<ColorSpinorField*> w_;
116  w_.push_back(w);
117 
118  std::vector<ColorSpinorField*> v_(v->Components().begin(), v->Components().begin()+2*k);
119 
120  blas::cDotProduct(s.get(), w_, v_);
121 
122  Map<VectorXcd, Unaligned > s_(s.get(), 2*k);
123  s_ *= inv_sqrt_r2;
124 
125  Tm.col(2*k).segment(0, 2*k) = s_;
126  Tm.row(2*k).segment(0, 2*k) = s_.adjoint();
127 
128  return;
129  }
130  };
131 
132  //Rayleigh Ritz procedure:
133  template <libtype which_lib> void ComputeRitz(EigCGArgs &args) { errorQuda("\nUnknown library type."); }
134 
135  //pure eigen version:
136  template <> void ComputeRitz<libtype::eigen_lib>(EigCGArgs &args)
137  {
138  const int m = args.m;
139  const int k = args.k;
140  //Solve m dim eigenproblem:
141  SelfAdjointEigenSolver<MatrixXcd> es_tm(args.Tm);
142  args.ritzVecs.leftCols(k) = es_tm.eigenvectors().leftCols(k);
143  //Solve m-1 dim eigenproblem:
144  SelfAdjointEigenSolver<MatrixXcd> es_tm1(Map<MatrixXcd, Unaligned, DynamicStride >(args.Tm.data(), (m-1), (m-1), DynamicStride(m, 1)));
145  Block<MatrixXcd>(args.ritzVecs.derived(), 0, k, m-1, k) = es_tm1.eigenvectors().leftCols(k);
146  args.ritzVecs.block(m-1, k, 1, k).setZero();
147 
148  MatrixXcd Q2k(MatrixXcd::Identity(m, 2*k));
149  HouseholderQR<MatrixXcd> ritzVecs2k_qr( Map<MatrixXcd, Unaligned >(args.ritzVecs.data(), m, 2*k) );
150  Q2k.applyOnTheLeft( ritzVecs2k_qr.householderQ() );
151 
152  //2. Construct H = QH*Tm*Q :
153  args.H2k = Q2k.adjoint()*args.Tm*Q2k;
154 
155  /* solve the small evecm1 2nev x 2nev eigenproblem */
156  SelfAdjointEigenSolver<MatrixXcd> es_h2k(args.H2k);
157  Block<MatrixXcd>(args.ritzVecs.derived(), 0, 0, m, 2*k) = Q2k * es_h2k.eigenvectors();
158  args.Tmvals.segment(0,2*k) = es_h2k.eigenvalues();//this is ok
159 
160  return;
161  }
162 
163  //(supposed to be a pure) magma version:
164  template <> void ComputeRitz<libtype::magma_lib>(EigCGArgs &args)
165  {
166 #ifdef MAGMA_LIB
167  const int m = args.m;
168  const int k = args.k;
169  //Solve m dim eigenproblem:
170  args.ritzVecs = args.Tm;
171  Complex *evecm = static_cast<Complex*>( args.ritzVecs.data());
172  double *evalm = static_cast<double *>( args.Tmvals.data());
173 
174  cudaHostRegister(static_cast<void *>(evecm), m*m*sizeof(Complex), cudaHostRegisterDefault);
175  magma_Xheev(evecm, m, m, evalm, sizeof(Complex));
176  //Solve m-1 dim eigenproblem:
177  DenseMatrix ritzVecsm1(args.Tm);
178  Complex *evecm1 = static_cast<Complex*>( ritzVecsm1.data());
179 
180  cudaHostRegister(static_cast<void *>(evecm1), m*m*sizeof(Complex), cudaHostRegisterDefault);
181  magma_Xheev(evecm1, (m-1), m, evalm, sizeof(Complex));
182  // fill 0s in mth element of old evecs:
183  for(int l = 1; l <= m ; l++) evecm1[l*m-1] = 0.0 ;
184  // Attach the first nev old evecs at the end of the nev latest ones:
185  memcpy(&evecm[k*m], evecm1, k*m*sizeof(Complex));
186 //?
187  // Orthogonalize the 2*nev (new+old) vectors evecm=QR:
188 
189  MatrixXcd Q2k(MatrixXcd::Identity(m, 2*k));
190  HouseholderQR<MatrixXcd> ritzVecs2k_qr( Map<MatrixXcd, Unaligned >(args.ritzVecs.data(), m, 2*k) );
191  Q2k.applyOnTheLeft( ritzVecs2k_qr.householderQ() );
192 
193  //2. Construct H = QH*Tm*Q :
194  args.H2k = Q2k.adjoint()*args.Tm*Q2k;
195 
196  /* solve the small evecm1 2nev x 2nev eigenproblem */
197  SelfAdjointEigenSolver<MatrixXcd> es_h2k(args.H2k);
198  Block<MatrixXcd>(args.ritzVecs.derived(), 0, 0, m, 2*k) = Q2k * es_h2k.eigenvectors();
199  args.Tmvals.segment(0,2*k) = es_h2k.eigenvalues();//this is ok
200 //?
201  cudaHostUnregister(evecm);
202  cudaHostUnregister(evecm1);
203 #else
204  errorQuda("Magma library was not built.");
205 #endif
206  return;
207  }
208 
209  // set the required parameters for the inner solver
210  static void fillEigCGInnerSolverParam(SolverParam &inner, const SolverParam &outer, bool use_sloppy_partial_accumulator = true)
211  {
212  inner.tol = outer.tol_precondition;
213  inner.maxiter = outer.maxiter_precondition;
214  inner.delta = 1e-20; // no reliable updates within the inner solver
215  inner.precision = outer.precision_precondition; // preconditioners are uni-precision solvers
217 
218  inner.iter = 0;
219  inner.gflops = 0;
220  inner.secs = 0;
221 
223  inner.is_preconditioner = true; // used to tell the inner solver it is an inner solver
224 
225  inner.use_sloppy_partial_accumulator= use_sloppy_partial_accumulator;
226 
230  }
231 
232  // set the required parameters for the initCG solver
233  static void fillInitCGSolverParam(SolverParam &inner, const SolverParam &outer) {
234  inner.iter = 0;
235  inner.gflops = 0;
236  inner.secs = 0;
237 
238  inner.tol = outer.tol;
239  inner.tol_restart = outer.tol_restart;
240  inner.maxiter = outer.maxiter;
241  inner.delta = outer.delta;
242  inner.precision = outer.precision; // preconditioners are uni-precision solvers
244 
245  inner.inv_type = QUDA_CG_INVERTER; // use CG solver
246  inner.use_init_guess = QUDA_USE_INIT_GUESS_YES;// use deflated initial guess...
247 
248  inner.use_sloppy_partial_accumulator= false;//outer.use_sloppy_partial_accumulator;
249  }
250 
251 
253  Solver(param, profile), mat(mat), matSloppy(matSloppy), matPrecon(matPrecon), K(nullptr), Kparam(param), Vm(nullptr), r_pre(nullptr), p_pre(nullptr), eigcg_args(nullptr), profile(profile), init(false)
254  {
255 
256  if (2 * param.nev >= param.m)
257  errorQuda(
258  "Incorrect number of the requested low modes: m= %d while nev=%d (note that 2*nev must be less then m).",
259  param.m, param.nev);
260 
261  if (param.rhs_idx < param.deflation_grid)
262  printfQuda("\nInitialize eigCG(m=%d, nev=%d) solver.", param.m, param.nev);
263  else {
264  printfQuda("\nDeflation space is complete, running initCG solver.");
266  //K = new CG(mat, matPrecon, Kparam, profile);//Preconditioned Mat has comms flag on
267  return;
268  }
269 
270  if ( param.inv_type == QUDA_EIGCG_INVERTER ) {
272  } else if ( param.inv_type == QUDA_INC_EIGCG_INVERTER ) {
274  errorQuda("preconditioning is not supported for the incremental solver.");
276  }
277 
279  K = new CG(matPrecon, matPrecon, Kparam, profile);
280  }else if(param.inv_type_precondition == QUDA_MR_INVERTER){
281  K = new MR(matPrecon, matPrecon, Kparam, profile);
282  }else if(param.inv_type_precondition == QUDA_SD_INVERTER){
283  K = new SD(matPrecon, Kparam, profile);
284  }else if(param.inv_type_precondition != QUDA_INVALID_INVERTER){ // unknown preconditioner
285  errorQuda("Unknown inner solver %d", param.inv_type_precondition);
286  }
287  return;
288  }
289 
291 
292  if(init)
293  {
294  if(Vm) delete Vm;
295 
296  delete tmpp;
297  delete rp;
298  delete yp;
299  delete Ap;
300  delete p;
301 
302  if(Az) delete Az;
303 
304  if(K) {
305  delete r_pre;
306  delete p_pre;
307 
308  delete K;
309  }
310  delete eigcg_args;
311  } else if (K) {
312  //delete K; //hack for the init CG solver
313  }
314  }
315 
316  void IncEigCG::RestartVT(const double beta, const double rho)
317  {
318  EigCGArgs &args = *eigcg_args;
319 
321  ComputeRitz<libtype::magma_lib>(args);
322  } else if( param.extlib_type == QUDA_EIGEN_EXTLIB ) {
323  ComputeRitz<libtype::eigen_lib>(args);//if args.m > 128, one may better use libtype::magma_lib
324  } else {
325  errorQuda("Library type %d is currently not supported.", param.extlib_type);
326  }
327 
328  //Restart V:
329 
330  blas::zero(*args.V2k);
331 
332  std::vector<ColorSpinorField*> vm (Vm->Components());
333  std::vector<ColorSpinorField*> v2k(args.V2k->Components());
334 
335  RowMajorDenseMatrix Alpha(args.ritzVecs.topLeftCorner(args.m, 2*args.k));
336  blas::caxpy( static_cast<Complex*>(Alpha.data()), vm , v2k);
337 
338  for(int i = 0; i < 2*args.k; i++) blas::copy(Vm->Component(i), args.V2k->Component(i));
339 
340  //Restart T:
341  ColorSpinorField *omega = nullptr;
342 
343  //Compute Az = Ap - beta*Ap_old(=Az):
344  blas::xpay(*Ap, -beta, *Az);
345 
346  if(Vm->Precision() != Az->Precision())//we may not need this if multiprec blas is used
347  {
348  Vm->Component(args.m-1) = *Az;//use the last vector as a temporary
349  omega = &Vm->Component(args.m-1);
350  }
351  else omega = Az;
352 
353  args.RestartLanczos(omega, Vm, 1.0 / rho);
354  return;
355  }
356 
357  void IncEigCG::UpdateVm(ColorSpinorField &res, double beta, double sqrtr2)
358  {
359  EigCGArgs &args = *eigcg_args;
360 
361  if(args.run_residual_correction) return;
362 
363  if (args.id == param.m){//Begin Rayleigh-Ritz block:
364  //
365  RestartVT(beta, sqrtr2);
366  args.ResetSearchIdx();
367  } else if (args.id == (param.m-1)) {
368  blas::copy(*Az, *Ap);//save current mat-vec result if ready for the restart in the next cycle
369  }
370 
371  //load Lanczos basis vector:
372  blas::copy(Vm->Component(args.id), res);//convert arrays
373  //rescale the vector
374  blas::ax(1.0 / sqrtr2, Vm->Component(args.id));
375  return;
376  }
377 
378 /*
379  * This is a solo precision solver.
380 */
382 
383  int k=0;
384 
385  if (checkLocation(x, b) != QUDA_CUDA_FIELD_LOCATION) errorQuda("Not supported");
386 
387  profile.TPSTART(QUDA_PROFILE_INIT);
388 
389  // Check to see that we're not trying to invert on a zero-field source
390  const double b2 = blas::norm2(b);
391  if (b2 == 0) {
392  profile.TPSTOP(QUDA_PROFILE_INIT);
393  printfQuda("Warning: inverting on zero-field source\n");
394  x = b;
395  param.true_res = 0.0;
396  param.true_res_hq = 0.0;
397  return 0;
398  }
399 
401 
402  if (!init) {
403  eigcg_args = new EigCGArgs(param.m, param.nev);//need only deflation meta structure
404 
405  csParam.create = QUDA_COPY_FIELD_CREATE;
406  rp = ColorSpinorField::Create(b, csParam);
407  csParam.create = QUDA_ZERO_FIELD_CREATE;
408  yp = ColorSpinorField::Create(b, csParam);
409 
410  Ap = ColorSpinorField::Create(csParam);
411  p = ColorSpinorField::Create(csParam);
412 
413  tmpp = ColorSpinorField::Create(csParam);
414 
415  Az = ColorSpinorField::Create(csParam);
416 
419  p_pre = ColorSpinorField::Create(csParam);
420  r_pre = ColorSpinorField::Create(csParam);
421  }
422 
423  //Create a search vector set:
424  csParam.setPrecision(param.precision_ritz);//eigCG internal search space precision may not coincide with the solver precision!
425  csParam.is_composite = true;
426  csParam.composite_dim = param.m;
427 
428  Vm = ColorSpinorFieldSet::Create(csParam); //search space for Ritz vectors
429 
430  eigcg_args->global_stop = stopping(param.tol, b2, param.residual_type); // stopping condition of solver
431 
432  init = true;
433  }
434 
435  double local_stop = x.Precision() == QUDA_DOUBLE_PRECISION ? b2*param.tol*param.tol : b2*1e-11;
436 
437  EigCGArgs &args = *eigcg_args;
438 
440  profile.TPSTOP(QUDA_PROFILE_INIT);
441  (*K)(x, b);
442  return Kparam.iter;
443  }
444 
447 
448  csParam.create = QUDA_ZERO_FIELD_CREATE;
449  csParam.is_composite = true;
450  csParam.composite_dim = (2*args.k);
451 
452  args.V2k = ColorSpinorFieldSet::Create(csParam); //search space for Ritz vectors
454  ColorSpinorField &r = *rp;
455  ColorSpinorField &y = *yp;
457 
459  csParam.is_composite = false;
460 
461  // compute initial residual
462  matSloppy(r, x, y);
463  double r2 = blas::xmyNorm(b, r);
464 
465  ColorSpinorField *z = (K != nullptr) ? ColorSpinorField::Create(csParam) : rp;//
466 
467  if( K ) {//apply preconditioner
469 
470  ColorSpinorField &rPre = *r_pre;
471  ColorSpinorField &pPre = *p_pre;
472 
473  blas::copy(rPre, r);
474  commGlobalReductionSet(false);
475  (*K)(pPre, rPre);
477  blas::copy(*z, pPre);
478  }
479 
480  *p = *z;
481  blas::zero(y);
482 
483  const bool use_heavy_quark_res =
484  (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL) ? true : false;
485 
486  profile.TPSTOP(QUDA_PROFILE_INIT);
488 
489  double heavy_quark_res = 0.0; // heavy quark res idual
490 
491  if (use_heavy_quark_res) heavy_quark_res = sqrt(blas::HeavyQuarkResidualNorm(x, r).z);
492 
493  double pAp;
494  double alpha=1.0, alpha_inv=1.0, beta=0.0, alpha_old_inv = 1.0;
495 
496  double lanczos_diag, lanczos_offdiag;
497 
499  profile.TPSTART(QUDA_PROFILE_COMPUTE);
500  blas::flops = 0;
501 
502  double rMinvr = blas::reDotProduct(r,*z);
503  //Begin EigCG iterations:
504  args.restarts = 0;
505 
506  PrintStats("eigCG", k, r2, b2, heavy_quark_res);
507 
508  bool converged = convergence(r2, heavy_quark_res, args.global_stop, param.tol_hq);
509 
510  while ( !converged && k < param.maxiter ) {
511  matSloppy(*Ap, *p, tmp); // tmp as tmp
512 
513  pAp = blas::reDotProduct(*p, *Ap);
514  alpha_old_inv = alpha_inv;
515  alpha = rMinvr / pAp;
516  alpha_inv = 1.0 / alpha;
517 
518  lanczos_diag = (alpha_inv + beta*alpha_old_inv);
519 
520  UpdateVm(*z, beta, sqrt(r2));
521 
522  r2 = blas::axpyNorm(-alpha, *Ap, r);
523  if( K ) {//apply preconditioner
524  ColorSpinorField &rPre = *r_pre;
525  ColorSpinorField &pPre = *p_pre;
526 
527  blas::copy(rPre, r);
528  commGlobalReductionSet(false);
529  (*K)(pPre, rPre);
531  blas::copy(*z, pPre);
532  }
533  //
534  double rMinvr_old = rMinvr;
535  rMinvr = K ? blas::reDotProduct(r,*z) : r2;
536  beta = rMinvr / rMinvr_old;
537  blas::axpyZpbx(alpha, *p, y, *z, beta);
538 
539  //
540  lanczos_offdiag = (-sqrt(beta)*alpha_inv);
541  args.SetLanczos(lanczos_diag, lanczos_offdiag);
542 
543  k++;
544 
545  PrintStats("eigCG", k, r2, b2, heavy_quark_res);
546  // check convergence, if convergence is satisfied we only need to check that we had a reliable update for the heavy quarks recently
547  converged = convergence(r2, heavy_quark_res, args.global_stop, param.tol_hq) or convergence(r2, heavy_quark_res, local_stop, param.tol_hq);
548  }
549 
550  args.ResetArgs();//eigCG cycle finished, this cleans V2k as well
551 
552  blas::xpy(y, x);
553 
556 
558  double gflops = (blas::flops + matSloppy.flops())*1e-9;
559  param.gflops = gflops;
560  param.iter += k;
561 
562  if (k == param.maxiter)
563  warningQuda("Exceeded maximum iterations %d", param.maxiter);
564 
565  // compute the true residuals
566  matSloppy(r, x, y);
567  param.true_res = sqrt(blas::xmyNorm(b, r) / b2);
569 
570  PrintSummary("eigCG", k, r2, b2, args.global_stop, param.tol_hq);
571 
572  // reset the flops counters
573  blas::flops = 0;
574  matSloppy.flops();
575 
577  profile.TPSTART(QUDA_PROFILE_FREE);
578 
579  profile.TPSTOP(QUDA_PROFILE_FREE);
580  return k;
581  }
582 
584  int k = 0;
585  //Start init CG iterations:
586  deflated_solver *defl_p = static_cast<deflated_solver*>(param.deflation_op);
587  Deflation &defl = *(defl_p->defl);
588 
589  const double full_tol = Kparam.tol;
591 
593 
594  csParam.create = QUDA_ZERO_FIELD_CREATE;
595 
596  ColorSpinorField *tmpp2 = ColorSpinorField::Create(csParam);//full precision accumulator
597  ColorSpinorField &tmp2 = *tmpp2;
598  ColorSpinorField *rp = ColorSpinorField::Create(csParam);//full precision residual
599  ColorSpinorField &r = *rp;
600 
602 
604  ColorSpinorField &xProj = *xp_proj;
605 
607  ColorSpinorField &rProj = *rp_proj;
608 
609  int restart_idx = 0;
610 
611  xProj = x;
612  rProj = b;
613  //launch initCG:
614  while((Kparam.tol >= full_tol) && (restart_idx < param.max_restart_num)) {
615  restart_idx += 1;
616 
617  defl(xProj, rProj);
618  x = xProj;
619 
620  K = new CG(mat, matPrecon, Kparam, profile);
621  (*K)(x, b);
622  delete K;
623 
624  mat(r, x, tmp2);
625  blas::xpay(b, -1.0, r);
626 
627  xProj = x;
628  rProj = r;
629 
630  if(getVerbosity() >= QUDA_VERBOSE) printfQuda("\ninitCG stat: %i iter / %g secs = %g Gflops. \n", Kparam.iter, Kparam.secs, Kparam.gflops);
631 
632  Kparam.tol *= param.inc_tol;
633 
634  if(restart_idx == (param.max_restart_num-1)) Kparam.tol = full_tol;//do the last solve in the next cycle to full tolerance
635 
636  param.secs += Kparam.secs;
637  }
638 
639  if(getVerbosity() >= QUDA_VERBOSE) printfQuda("\ninitCG stat: %i iter / %g secs = %g Gflops. \n", Kparam.iter, Kparam.secs, Kparam.gflops);
640  //
641  param.secs += Kparam.secs;
643 
644  k += Kparam.iter;
645 
646  delete rp;
647  delete tmpp2;
648 
650  delete xp_proj;
651  delete rp_proj;
652  }
653  return k;
654  }
655 
657  {
658  if(param.rhs_idx == 0) max_eigcg_cycles = param.eigcg_max_restarts;
659 
660  const bool mixed_prec = (param.precision != param.precision_sloppy);
661  const double b2 = norm2(in);
662 
663  deflated_solver *defl_p = static_cast<deflated_solver*>(param.deflation_op);
664  Deflation &defl = *(defl_p->defl);
665 
666  //If deflation space is complete: use initCG solver
667  if( defl.is_complete() ) {
668 
669  if (K) errorQuda("\nInitCG does not (yet) support preconditioning.");
670 
671  int iters = initCGsolve(out, in);
672  param.iter += iters;
673 
674  return;
675  }
676 
677  //Start (incremental) eigCG solver:
679  csParam.create = QUDA_ZERO_FIELD_CREATE;
680 
681  ColorSpinorField *ep = ColorSpinorField::Create(csParam);//full precision accumulator
682  ColorSpinorField &e = *ep;
683  ColorSpinorField *rp = ColorSpinorField::Create(csParam);//full precision residual
684  ColorSpinorField &r = *rp;
685 
686  //deflate initial guess ('out'-field):
687  mat(r, out, e);
688  //
689  double r2 = xmyNorm(in, r);
690 
692 
693  ColorSpinorField *ep_sloppy = ( mixed_prec ) ? ColorSpinorField::Create(csParam) : ep;
694  ColorSpinorField &eSloppy = *ep_sloppy;
695  ColorSpinorField *rp_sloppy = ( mixed_prec ) ? ColorSpinorField::Create(csParam) : rp;
696  ColorSpinorField &rSloppy = *rp_sloppy;
697 
698  const double stop = b2*param.tol*param.tol;
699  //start iterative refinement cycles (or just one eigcg call for full (solo) precision solver):
700  int logical_rhs_id = 0;
701  bool dcg_cycle = false;
702  do {
703  blas::zero(e);
704  defl(e, r);
705  //
706  eSloppy = e, rSloppy = r;
707 
708  if( dcg_cycle ) { //run DCG instead
709  if(!K) {
711  Kparam.tol = 5*param.inc_tol;//former cg_iterref_tol param
712  K = new CG(matSloppy, matPrecon, Kparam, profile);
713  }
714 
716  printfQuda("Running DCG correction cycle.\n");
717  }
718 
719  int iters = eigCGsolve(eSloppy, rSloppy);
720 
721  bool update_ritz = !dcg_cycle && (eigcg_args->restarts > 1) && !defl.is_complete(); //too uglyyy
722 
723  if( update_ritz ) {
724 
725  defl.increment(*Vm, param.nev);
726  logical_rhs_id += 1;
727 
728  dcg_cycle = (logical_rhs_id >= max_eigcg_cycles);
729 
730  } else { //run DCG instead
731  dcg_cycle = true;
732  }
733 
734  // use mixed blas ??
735  e = eSloppy;
736  blas::xpy(e, out);
737  // compute the true residuals
738  blas::zero(e);
739  mat(r, out, e);
740  //
741  r2 = blas::xmyNorm(in, r);
742 
743  param.true_res = sqrt(r2 / b2);
745  PrintSummary( !dcg_cycle ? "EigCG:" : "DCG (correction cycle):", iters, r2, b2, stop, param.tol_hq);
746 
747  if( getVerbosity() >= QUDA_VERBOSE ) {
748  if( !dcg_cycle && (eigcg_args->restarts > 1) && !defl.is_complete() ) defl.verify();
749  }
750  } while ((r2 > stop) && mixed_prec);
751 
752  delete ep;
753  delete rp;
754 
755  if(mixed_prec){
756  delete ep_sloppy;
757  delete rp_sloppy;
758  }
759 
760  if (mixed_prec && max_eigcg_cycles > logical_rhs_id) {
761  printfQuda("Reset maximum eigcg cycles to %d (was %d)\n", logical_rhs_id, max_eigcg_cycles);
762  max_eigcg_cycles = logical_rhs_id;//adjust maximum allowed cycles based on the actual information
763  }
764 
765  param.rhs_idx += logical_rhs_id;
766 
767  if(defl.is_complete()) {
768  if(param.rhs_idx != param.deflation_grid) warningQuda("\nTotal rhs number (%d) does not match the deflation grid size (%d).\n", param.rhs_idx, param.deflation_grid);
769  if(Vm) delete Vm;//safe some space
770  Vm = nullptr;
771 
772  const int max_nev = defl.size();//param.m;
773  printfQuda("\nRequested to reserve %d eigenvectors with max tol %le.\n", max_nev, param.eigenval_tol);
774  defl.reduce(param.eigenval_tol, max_nev);
775  }
776  return;
777  }
778 
779 } // namespace quda
cudaColorSpinorField * tmp2
void ax(double a, ColorSpinorField &x)
Definition: blas_quda.cu:508
VectorXd RealVector
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
ColorSpinorField * Az
temporary for mat-vec
Definition: invert_quda.h:1242
void axpyZpbx(double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, double b)
Definition: blas_quda.cu:552
ColorSpinorField * p_pre
residual passed to preconditioner
Definition: invert_quda.h:1244
DiracMatrix & matSloppy
Definition: invert_quda.h:1229
QudaInverterType inv_type
Definition: invert_quda.h:22
ColorSpinorField * tmpp
Definition: invert_quda.h:1241
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
DiracMatrix & mat
Definition: invert_quda.h:1228
#define errorQuda(...)
Definition: util_quda.h:121
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:721
void init()
Definition: blas_quda.cu:483
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
Definition: reduce_quda.cu:764
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE) ...
Definition: solver.cpp:256
ColorSpinorField * Ap
Definition: invert_quda.h:1240
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44
IncEigCG(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam &param, TimeProfile &profile)
CompositeColorSpinorField & Components()
static ColorSpinorField * Create(const ColorSpinorParam &param)
int size()
return deflation space size
Definition: deflation.h:164
int initCGsolve(ColorSpinorField &out, ColorSpinorField &in)
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
Definition: solver.cpp:223
double reDotProduct(ColorSpinorField &x, ColorSpinorField &y)
Definition: reduce_quda.cu:728
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: copy_quda.cu:355
ColorSpinorField & Component(const int idx) const
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:75
int eigCGsolve(ColorSpinorField &out, ColorSpinorField &in)
QudaPrecision precision_ritz
Definition: invert_quda.h:227
QudaInverterType inv_type_precondition
Definition: invert_quda.h:28
QudaPreserveSource preserve_source
Definition: invert_quda.h:154
void reduce(double tol, int max_nev)
Definition: deflation.cpp:266
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
Definition: blas_quda.h:37
double norm2(const CloverField &a, bool inverse=false)
QudaGaugeParam param
Definition: pack_test.cpp:17
bool is_complete()
Test whether the deflation space is complete and therefore cannot be further extended.
Definition: deflation.h:159
bool is_composite
for deflation solvers:
void ComputeRitz(EigCGArgs &args)
double Last(QudaProfileType idx)
Definition: timer.h:251
void magma_Xheev(void *Mat, const int n, const int ldm, void *evalues, const int prec)
Definition: blas_magma.cu:296
QudaResidualType residual_type
Definition: invert_quda.h:49
Stride< Dynamic, Dynamic > DynamicStride
Definition: deflation.cpp:18
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
Definition: solver.cpp:206
ColorSpinorParam csParam
Definition: pack_test.cpp:24
cpuColorSpinorField * in
TimeProfile & profile
Definition: invert_quda.h:1248
void UpdateVm(ColorSpinorField &res, double beta, double sqrtr2)
double omega
Definition: test_util.cpp:1690
static void fillInitCGSolverParam(SolverParam &inner, const SolverParam &outer)
#define warningQuda(...)
Definition: util_quda.h:133
#define checkLocation(...)
static void fillEigCGInnerSolverParam(SolverParam &inner, const SolverParam &outer, bool use_sloppy_partial_accumulator=true)
bool is_preconditioner
verbosity to use for preconditioner
Definition: invert_quda.h:241
static int max_eigcg_cycles
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
Definition: reduce_quda.cu:809
std::complex< double > Complex
Definition: quda_internal.h:46
Deflation * defl
Definition: deflation.h:189
ColorSpinorField * rp
Definition: invert_quda.h:1237
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.cu:512
double tol_precondition
Definition: invert_quda.h:199
QudaExtLibType extlib_type
Definition: invert_quda.h:251
void zero(ColorSpinorField &a)
Definition: blas_quda.cu:472
void SetLanczos(Complex diag_val, Complex offdiag_val)
QudaPrecision precision_precondition
Definition: invert_quda.h:151
QudaPrecision precision
Definition: invert_quda.h:142
SolverParam & param
Definition: invert_quda.h:463
cpuColorSpinorField * out
ColorSpinorField * r_pre
Definition: invert_quda.h:1243
Conjugate-Gradient Solver.
Definition: invert_quda.h:570
ColorSpinorField * yp
residual vector
Definition: invert_quda.h:1238
__shared__ float s[]
unsigned long long flops() const
Definition: dirac_quda.h:1119
MatrixXcd DenseMatrix
EigCGArgs * eigcg_args
preconditioner result
Definition: invert_quda.h:1246
#define printfQuda(...)
Definition: util_quda.h:115
unsigned long long flops
Definition: blas_quda.cu:22
void xpy(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:33
DiracMatrix & matPrecon
Definition: invert_quda.h:1230
VectorXcd Vector
void increment(ColorSpinorField &V, int nev)
Definition: deflation.cpp:188
double axpyNorm(double a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:74
ColorSpinorField * p
high precision accumulator
Definition: invert_quda.h:1239
void RestartLanczos(ColorSpinorField *w, ColorSpinorFieldSet *v, const double inv_sqrt_r2)
QudaUseInitGuess use_init_guess
Definition: invert_quda.h:64
ColorSpinorFieldSet * V2k
QudaPrecision precision_sloppy
Definition: invert_quda.h:145
bool use_sloppy_partial_accumulator
Definition: invert_quda.h:76
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE). Assumes SolverParam.true_res and SolverParam.true_res_hq has been set.
Definition: solver.cpp:270
SolverParam Kparam
Definition: invert_quda.h:1233
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
QudaPrecision Precision() const
void RestartVT(const double beta, const double rho)
MatrixXcd VectorSet
EigCGArgs(int m, int k)
ColorSpinorFieldSet * Vm
Definition: invert_quda.h:1235
void commGlobalReductionSet(bool global_reduce)
void operator()(ColorSpinorField &out, ColorSpinorField &in)