QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
invert_quda.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include <quda.h>
4 #include <quda_internal.h>
5 #include <dirac_quda.h>
6 #include <color_spinor_field.h>
7 #include <qio_field.h>
8 #include <eigensolve_quda.h>
9 #include <vector>
10 #include <memory>
11 
12 namespace quda {
13 
17  struct SolverParam {
18 
23 
29 
34 
38  void *deflation_op;
39 
50 
52  bool deflate;
53 
56 
58  std::vector<ColorSpinorField *> evecs;
59 
61  std::vector<Complex> evals;
62 
65 
68 
70  double delta;
71 
74 
77 
87 
92 
97 
102 
107 
110 
112  int pipeline;
113 
115  double tol;
116 
118  double tol_restart;
119 
121  double tol_hq;
122 
125 
128 
130  double true_res;
131 
133  double true_res_hq;
134 
136  int maxiter;
137 
139  int iter;
140 
143 
146 
149 
152 
155 
159 
162 
164  int num_src;
165 
166  // Multi-shift solver parameters
167 
170 
173 
176 
179 
182 
185 
188 
190  int Nsteps;
191 
193  int Nkrylov;
194 
197 
200 
203 
205  double omega;
206 
209 
212 
214  double ca_lambda_max; // -1 -> power iter generate
215 
218 
220  double secs;
221 
223  double gflops;
224 
225  // Incremental EigCG solver parameters
227  QudaPrecision precision_ritz;//also search space precision
228 
229  int nev;//number of eigenvectors produced by EigCG
230  int m;//Dimension of the search space
232  int rhs_idx;
233 
236  double inc_tol;
237  double eigenval_tol;
238 
240 
242 
244 
249 
252 
257  compute_null_vector(QUDA_COMPUTE_NULL_VECTOR_NO),
258  compute_true_res(true),
259  sloppy_converge(false),
260  verbosity_precondition(QUDA_SILENT),
261  mg_instance(false)
262  {
263  ;
264  }
265 
272  inv_type(param.inv_type),
273  inv_type_precondition(param.inv_type_precondition),
274  preconditioner(param.preconditioner),
275  deflation_op(param.deflation_op),
276  residual_type(param.residual_type),
277  deflate(false),
278  use_init_guess(param.use_init_guess),
279  compute_null_vector(QUDA_COMPUTE_NULL_VECTOR_NO),
280  delta(param.reliable_delta),
281  use_alternative_reliable(param.use_alternative_reliable),
282  use_sloppy_partial_accumulator(param.use_sloppy_partial_accumulator),
283  solution_accumulator_pipeline(param.solution_accumulator_pipeline),
284  max_res_increase(param.max_res_increase),
285  max_res_increase_total(param.max_res_increase_total),
286  max_hq_res_increase(param.max_hq_res_increase),
287  max_hq_res_restart_total(param.max_hq_res_restart_total),
288  heavy_quark_check(param.heavy_quark_check),
289  pipeline(param.pipeline),
290  tol(param.tol),
291  tol_restart(param.tol_restart),
292  tol_hq(param.tol_hq),
293  compute_true_res(param.compute_true_res),
294  sloppy_converge(false),
295  true_res(param.true_res),
296  true_res_hq(param.true_res_hq),
297  maxiter(param.maxiter),
298  iter(param.iter),
299  precision(param.cuda_prec),
300  precision_sloppy(param.cuda_prec_sloppy),
301  precision_refinement_sloppy(param.cuda_prec_refinement_sloppy),
302  precision_precondition(param.cuda_prec_precondition),
303  preserve_source(param.preserve_source),
304  return_residual(preserve_source == QUDA_PRESERVE_SOURCE_NO ? true : false),
305  num_src(param.num_src),
306  num_offset(param.num_offset),
307  Nsteps(param.Nsteps),
308  Nkrylov(param.gcrNkrylov),
309  precondition_cycle(param.precondition_cycle),
310  tol_precondition(param.tol_precondition),
311  maxiter_precondition(param.maxiter_precondition),
312  omega(param.omega),
313  ca_basis(param.ca_basis),
314  ca_lambda_min(param.ca_lambda_min),
315  ca_lambda_max(param.ca_lambda_max),
316  schwarz_type(param.schwarz_type),
317  secs(param.secs),
318  gflops(param.gflops),
319  precision_ritz(param.cuda_prec_ritz),
320  nev(param.nev),
321  m(param.max_search_dim),
322  deflation_grid(param.deflation_grid),
323  rhs_idx(0),
324  eigcg_max_restarts(param.eigcg_max_restarts),
325  max_restart_num(param.max_restart_num),
326  inc_tol(param.inc_tol),
327  eigenval_tol(param.eigenval_tol),
328  verbosity_precondition(param.verbosity_precondition),
329  is_preconditioner(false),
330  global_reduction(true),
331  mg_instance(false),
332  extlib_type(param.extlib_type)
333  {
334  for (int i=0; i<num_offset; i++) {
335  offset[i] = param.offset[i];
336  tol_offset[i] = param.tol_offset[i];
337  tol_hq_offset[i] = param.tol_hq_offset[i];
338  }
339 
340  if (param.rhs_idx != 0
342  rhs_idx = param.rhs_idx;
343  }
344  }
345 
347  inv_type(param.inv_type),
348  inv_type_precondition(param.inv_type_precondition),
349  preconditioner(param.preconditioner),
350  deflation_op(param.deflation_op),
351  residual_type(param.residual_type),
352  deflate(false),
353  eig_param(param.eig_param),
354  use_init_guess(param.use_init_guess),
355  compute_null_vector(param.compute_null_vector),
356  delta(param.delta),
357  use_alternative_reliable(param.use_alternative_reliable),
358  use_sloppy_partial_accumulator(param.use_sloppy_partial_accumulator),
359  solution_accumulator_pipeline(param.solution_accumulator_pipeline),
360  max_res_increase(param.max_res_increase),
361  max_res_increase_total(param.max_res_increase_total),
362  heavy_quark_check(param.heavy_quark_check),
363  pipeline(param.pipeline),
364  tol(param.tol),
365  tol_restart(param.tol_restart),
366  tol_hq(param.tol_hq),
367  compute_true_res(param.compute_true_res),
368  sloppy_converge(param.sloppy_converge),
369  true_res(param.true_res),
370  true_res_hq(param.true_res_hq),
371  maxiter(param.maxiter),
372  iter(param.iter),
373  precision(param.precision),
374  precision_sloppy(param.precision_sloppy),
375  precision_refinement_sloppy(param.precision_refinement_sloppy),
376  precision_precondition(param.precision_precondition),
377  preserve_source(param.preserve_source),
378  return_residual(param.return_residual),
379  num_offset(param.num_offset),
380  Nsteps(param.Nsteps),
381  Nkrylov(param.Nkrylov),
382  precondition_cycle(param.precondition_cycle),
383  tol_precondition(param.tol_precondition),
384  maxiter_precondition(param.maxiter_precondition),
385  omega(param.omega),
386  ca_basis(param.ca_basis),
387  ca_lambda_min(param.ca_lambda_min),
388  ca_lambda_max(param.ca_lambda_max),
389  schwarz_type(param.schwarz_type),
390  secs(param.secs),
391  gflops(param.gflops),
392  precision_ritz(param.precision_ritz),
393  nev(param.nev),
394  m(param.m),
395  deflation_grid(param.deflation_grid),
396  rhs_idx(0),
397  eigcg_max_restarts(param.eigcg_max_restarts),
398  max_restart_num(param.max_restart_num),
399  inc_tol(param.inc_tol),
400  eigenval_tol(param.eigenval_tol),
401  verbosity_precondition(param.verbosity_precondition),
402  is_preconditioner(param.is_preconditioner),
403  global_reduction(param.global_reduction),
404  mg_instance(param.mg_instance),
405  extlib_type(param.extlib_type)
406  {
407  for (int i=0; i<num_offset; i++) {
408  offset[i] = param.offset[i];
409  tol_offset[i] = param.tol_offset[i];
410  tol_hq_offset[i] = param.tol_hq_offset[i];
411  }
412 
413  if((param.inv_type == QUDA_INC_EIGCG_INVERTER || param.inv_type == QUDA_EIGCG_INVERTER) && m % 16){//current hack for the magma library
414  m = (m / 16) * 16 + 16;
415  warningQuda("\nSwitched eigenvector search dimension to %d\n", m);
416  }
417  if(param.rhs_idx != 0 && (param.inv_type==QUDA_INC_EIGCG_INVERTER || param.inv_type==QUDA_GMRESDR_PROJ_INVERTER)){
418  rhs_idx = param.rhs_idx;
419  }
420  }
421 
423 
428  void updateInvertParam(QudaInvertParam &param, int offset=-1) {
429  param.true_res = true_res;
430  param.true_res_hq = true_res_hq;
431  param.iter += iter;
432  reduceDouble(gflops);
433  param.gflops += gflops;
434  param.secs += secs;
435  if (offset >= 0) {
436  param.true_res_offset[offset] = true_res_offset[offset];
437  param.iter_res_offset[offset] = iter_res_offset[offset];
438  param.true_res_hq_offset[offset] = true_res_hq_offset[offset];
439  } else {
440  for (int i=0; i<num_offset; i++) {
441  param.true_res_offset[i] = true_res_offset[i];
442  param.iter_res_offset[i] = iter_res_offset[i];
443  param.true_res_hq_offset[i] = true_res_hq_offset[i];
444  }
445  }
446  //for incremental eigCG:
447  param.rhs_idx = rhs_idx;
448 
451  }
452 
454  //for incremental eigCG:
455  rhs_idx = param.rhs_idx;
456  }
457 
458  };
459 
460  class Solver {
461 
462  protected:
466 
467  public:
468  Solver(SolverParam &param, TimeProfile &profile);
469  virtual ~Solver();
470 
471  virtual void operator()(ColorSpinorField &out, ColorSpinorField &in) = 0;
472 
473  virtual void blocksolve(ColorSpinorField &out, ColorSpinorField &in);
474 
478  static Solver* create(SolverParam &param, DiracMatrix &mat, DiracMatrix &matSloppy,
479  DiracMatrix &matPrecon, TimeProfile &profile);
480 
488  static double stopping(double tol, double b2, QudaResidualType residual_type);
489 
498  bool convergence(double r2, double hq2, double r2_tol, double hq_tol);
499 
508  bool convergenceHQ(double r2, double hq2, double r2_tol, double hq_tol);
509 
517  bool convergenceL2(double r2, double hq2, double r2_tol, double hq_tol);
518 
527  void PrintStats(const char *name, int k, double r2, double b2, double hq2);
528 
540  void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol);
541 
546  bool deflate_init = false;
547  std::vector<ColorSpinorField *> defl_tmp1;
548  std::vector<ColorSpinorField *> defl_tmp2;
549 
557  void constructDeflationSpace(const ColorSpinorField &meta, const DiracMatrix &mat, bool svd);
558 
563  virtual double flops() const { return 0; }
564  };
565 
570  class CG : public Solver {
571 
572  private:
573  const DiracMatrix &mat;
575  // pointers to fields to avoid multiple creation overhead
576  ColorSpinorField *yp, *rp, *rnewp, *pp, *App, *tmpp, *tmp2p, *tmp3p, *rSloppyp, *xSloppyp;
577  std::vector<ColorSpinorField*> p;
578  bool init;
579 
580  public:
581  CG(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam &param, TimeProfile &profile);
582  virtual ~CG();
589  (*this)(out, in, nullptr, 0.0);
590  };
591 
600  void operator()(ColorSpinorField &out, ColorSpinorField &in, ColorSpinorField *p_init, double r2_old_init);
601 
602  void blocksolve(ColorSpinorField& out, ColorSpinorField& in);
603  };
604 
605 
606 
607  class CG3 : public Solver {
608 
609  private:
610  const DiracMatrix &mat;
612  // pointers to fields to avoid multiple creation overhead
613  ColorSpinorField *yp, *rp, *tmpp, *ArSp, *rSp, *xSp, *xS_oldp, *tmpSp, *rS_oldp, *tmp2Sp;
614  bool init;
615 
616  public:
617  CG3(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam &param, TimeProfile &profile);
618  virtual ~CG3();
619 
620  void operator()(ColorSpinorField &out, ColorSpinorField &in);
621  };
622 
623 
624 
625  class CG3NE : public Solver {
626 
627  private:
628  const DiracMatrix &mat;
631  // pointers to fields to avoid multiple creation overhead
632  ColorSpinorField *yp, *rp, *AdagrSp, *AAdagrSp, *rSp, *xSp, *xS_oldp, *tmpSp, *rS_oldp;
633  bool init;
634 
635  public:
636  CG3NE(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam &param, TimeProfile &profile);
637  virtual ~CG3NE();
638 
639  void operator()(ColorSpinorField &out, ColorSpinorField &in);
640  };
641 
642  class CGNE : public CG {
643 
644  private:
649  bool init;
650 
651  public:
652  CGNE(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam &param, TimeProfile &profile);
653  virtual ~CGNE();
654 
655  void operator()(ColorSpinorField &out, ColorSpinorField &in);
656  };
657 
658  class CGNR : public CG {
659 
660  private:
664  bool init;
665 
666  public:
667  CGNR(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam &param, TimeProfile &profile);
668  virtual ~CGNR();
669 
670  void operator()(ColorSpinorField &out, ColorSpinorField &in);
671  };
672 
673  class MPCG : public Solver {
674  private:
675  const DiracMatrix &mat;
676  void computeMatrixPowers(cudaColorSpinorField out[], cudaColorSpinorField &in, int nvec);
677  void computeMatrixPowers(std::vector<cudaColorSpinorField>& out, std::vector<cudaColorSpinorField>& in, int nsteps);
678 
679 
680  public:
681  MPCG(DiracMatrix &mat, SolverParam &param, TimeProfile &profile);
682  virtual ~MPCG();
683 
684  void operator()(ColorSpinorField &out, ColorSpinorField &in);
685  };
686 
687 
688 
689  class PreconCG : public Solver {
690  private:
691  const DiracMatrix &mat;
694 
696  SolverParam Kparam; // parameters for preconditioner solve
697 
698  public:
699  PreconCG(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam &param, TimeProfile &profile);
700 
701  virtual ~PreconCG();
702 
703  void operator()(ColorSpinorField &out, ColorSpinorField &in);
704  };
705 
706 
707  class BiCGstab : public Solver {
708 
709  private:
713 
714  // pointers to fields to avoid multiple creation overhead
715  ColorSpinorField *yp, *rp, *pp, *vp, *tmpp, *tp;
716  bool init;
717 
718  public:
719  BiCGstab(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon,
720  SolverParam &param, TimeProfile &profile);
721  virtual ~BiCGstab();
722 
723  void operator()(ColorSpinorField &out, ColorSpinorField &in);
724  };
725 
726  class SimpleBiCGstab : public Solver {
727 
728  private:
730 
731  // pointers to fields to avoid multiple creation overhead
732  cudaColorSpinorField *yp, *rp, *pp, *vp, *tmpp, *tp;
733  bool init;
734 
735  public:
737  virtual ~SimpleBiCGstab();
738 
739  void operator()(ColorSpinorField &out, ColorSpinorField &in);
740  };
741 
742  class MPBiCGstab : public Solver {
743 
744  private:
746  void computeMatrixPowers(std::vector<cudaColorSpinorField>& pr, cudaColorSpinorField& p, cudaColorSpinorField& r, int nsteps);
747 
748  public:
750  virtual ~MPBiCGstab();
751 
752  void operator()(ColorSpinorField &out, ColorSpinorField &in);
753  };
754 
755  class BiCGstabL : public Solver {
756 
757  private:
760 
764  int nKrylov; // in the language of BiCGstabL, this is L.
765 
766  // Various coefficients and params needed on each iteration.
767  Complex rho0, rho1, alpha, omega, beta; // Various coefficients for the BiCG part of BiCGstab-L.
768  Complex *gamma, *gamma_prime, *gamma_prime_prime; // Parameters for MR part of BiCGstab-L. (L+1) length.
769  Complex **tau; // Parameters for MR part of BiCGstab-L. Tech. modified Gram-Schmidt coeffs. (L+1)x(L+1) length.
770  double *sigma; // Parameters for MR part of BiCGstab-L. Tech. the normalization part of Gram-Scmidt. (L+1) length.
771 
772  // pointers to fields to avoid multiple creation overhead
773  // full precision fields
776  // sloppy precision fields
778  std::vector<ColorSpinorField*> r; // Current residual + intermediate residual values, along the MR.
779  std::vector<ColorSpinorField*> u; // Search directions.
780 
781  // Saved, preallocated vectors. (may or may not get used depending on precision.)
785 
789  int reliable(double &rNorm, double &maxrx, double &maxrr, const double &r2, const double &delta);
790 
794  void computeTau(Complex **tau, double *sigma, std::vector<ColorSpinorField*> r, int begin, int size, int j);
795  void updateR(Complex **tau, std::vector<ColorSpinorField*> r, int begin, int size, int j);
796  void orthoDir(Complex **tau, double* sigma, std::vector<ColorSpinorField*> r, int j, int pipeline);
797 
798  void updateUend(Complex* gamma, std::vector<ColorSpinorField*> u, int nKrylov);
799  void updateXRend(Complex* gamma, Complex* gamma_prime, Complex* gamma_prime_prime,
800  std::vector<ColorSpinorField*> r, ColorSpinorField& x, int nKrylov);
801 
805  bool init;
806 
807  std::string solver_name; // holds BiCGstab-l, where 'l' literally equals nKrylov.
808 
809  public:
810  BiCGstabL(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam &param, TimeProfile &profile);
811  virtual ~BiCGstabL();
812 
813  void operator()(ColorSpinorField &out, ColorSpinorField &in);
814  };
815 
816  class GCR : public Solver {
817 
818  private:
819  const DiracMatrix &mat;
822 
824  SolverParam Kparam; // parameters for preconditioner solve
825 
829  int nKrylov;
830 
833  double *gamma;
834 
838  bool init;
839 
845 
846  std::vector<ColorSpinorField*> p; // GCR direction vectors
847  std::vector<ColorSpinorField*> Ap; // mat * direction vectors
848 
849  public:
850  GCR(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon,
851  SolverParam &param, TimeProfile &profile);
852 
856  GCR(DiracMatrix &mat, Solver &K, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam &param,
857  TimeProfile &profile);
858  virtual ~GCR();
859 
860  void operator()(ColorSpinorField &out, ColorSpinorField &in);
861  };
862 
863  class MR : public Solver {
864 
865  private:
866  const DiracMatrix &mat;
874  bool init;
875 
876  public:
877  MR(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam &param, TimeProfile &profile);
878  virtual ~MR();
879 
880  void operator()(ColorSpinorField &out, ColorSpinorField &in);
881  };
882 
891  class CACG : public Solver {
892 
893  private:
894  const DiracMatrix &mat;
896  bool init;
897 
900 
901  Complex *Q_AQandg; // Fused inner product matrix
902  Complex *Q_AS; // inner product matrix
903  Complex *alpha; // QAQ^{-1} g
904  Complex *beta; // QAQ^{-1} QpolyS
905 
911 
912  std::vector<ColorSpinorField*> S; // residual vectors
913  std::vector<ColorSpinorField*> AS; // mat * residual vectors. Can be replaced by a single temporary.
914  std::vector<ColorSpinorField*> Q; // CG direction vectors
915  std::vector<ColorSpinorField*> Qtmp; // CG direction vectors for pointer swap
916  std::vector<ColorSpinorField*> AQ; // mat * CG direction vectors.
917  // it's possible to avoid carrying these
918  // around, but there's a stability penalty,
919  // and computing QAQ becomes a pain (though
920  // it does let you fuse the reductions...)
921 
928  void create(ColorSpinorField &b);
929 
933  void compute_alpha();
934 
938  void compute_beta();
939 
943  int reliable(double &rNorm, double &maxrr, int &rUpdate, const double &r2, const double &delta);
944 
945  public:
946  CACG(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam &param, TimeProfile &profile);
947  virtual ~CACG();
948 
949  void operator()(ColorSpinorField &out, ColorSpinorField &in);
950  };
951 
952  class CACGNE : public CACG {
953 
954  private:
959  bool init;
960 
961  public:
962  CACGNE(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam &param, TimeProfile &profile);
963  virtual ~CACGNE();
964 
965  void operator()(ColorSpinorField &out, ColorSpinorField &in);
966  };
967 
968  class CACGNR : public CACG {
969 
970  private:
974  bool init;
975 
976  public:
977  CACGNR(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam &param, TimeProfile &profile);
978  virtual ~CACGNR();
979 
980  void operator()(ColorSpinorField &out, ColorSpinorField &in);
981  };
982 
990  class CAGCR : public Solver {
991 
992  private:
993  const DiracMatrix &mat;
995  bool init;
996 
997  // Basis. Currently anything except POWER_BASIS causes a warning
998  // then swap to POWER_BASIS.
1000 
1001  Complex *alpha; // Solution coefficient vectors
1002 
1006 
1007  std::vector<ColorSpinorField*> p; // GCR direction vectors
1008  std::vector<ColorSpinorField*> q; // mat * direction vectors
1009 
1016  void create(ColorSpinorField &b);
1017 
1025  void solve(Complex *psi_, std::vector<ColorSpinorField*> &q, ColorSpinorField &b);
1026 
1027 public:
1028  CAGCR(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam &param, TimeProfile &profile);
1029  virtual ~CAGCR();
1030 
1031  void operator()(ColorSpinorField &out, ColorSpinorField &in);
1032  };
1033 
1034  // Steepest descent solver used as a preconditioner
1035  class SD : public Solver {
1036  private:
1041  bool init;
1042 
1043  public:
1044  SD(DiracMatrix &mat, SolverParam &param, TimeProfile &profile);
1045  virtual ~SD();
1046 
1047  void operator()(ColorSpinorField &out, ColorSpinorField &in);
1048  };
1049 
1050  // Extended Steepest Descent solver used for overlapping DD preconditioning
1051  class XSD : public Solver {
1052  private:
1056  SD *sd; // extended sd is implemented using standard sd
1057  bool init;
1058  int R[4];
1059 
1060  public:
1061  XSD(DiracMatrix &mat, SolverParam &param, TimeProfile &profile);
1062  virtual ~XSD();
1063 
1064  void operator()(ColorSpinorField &out, ColorSpinorField &in);
1065  };
1066 
1068  {
1069 
1070 private:
1072  const Dirac &dirac;
1073  const char *prefix;
1074 
1075 public:
1076  PreconditionedSolver(Solver &solver, const Dirac &dirac, SolverParam &param, TimeProfile &profile,
1077  const char *prefix) :
1078  Solver(param, profile),
1079  solver(&solver),
1080  dirac(dirac),
1081  prefix(prefix)
1082  {
1083  }
1084 
1085  virtual ~PreconditionedSolver() { delete solver; }
1086 
1088  pushOutputPrefix(prefix);
1089 
1091 
1092  ColorSpinorField *out=nullptr;
1093  ColorSpinorField *in=nullptr;
1094 
1095  dirac.prepare(in, out, x, b, solution_type);
1096  (*solver)(*out, *in);
1097  dirac.reconstruct(x, b, solution_type);
1098 
1099  popOutputPrefix();
1100  }
1101  };
1102 
1104 
1105  protected:
1108 
1109  public:
1111  param(param), profile(profile) { ; }
1112  virtual ~MultiShiftSolver() { ; }
1113 
1114  virtual void operator()(std::vector<ColorSpinorField*> out, ColorSpinorField &in) = 0;
1115  bool convergence(const double *r2, const double *r2_tol, int n) const;
1116  };
1117 
1122 
1123  protected:
1126 
1127  public:
1128  MultiShiftCG(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam &param, TimeProfile &profile);
1129  virtual ~MultiShiftCG();
1138  void operator()(std::vector<ColorSpinorField*>x, ColorSpinorField &b, std::vector<ColorSpinorField*> &p, double* r2_old_array );
1139 
1146  void operator()(std::vector<ColorSpinorField*> out, ColorSpinorField &in){
1147  std::unique_ptr<double[]> r2_old(new double[QUDA_MAX_MULTI_SHIFT]);
1148  std::vector<ColorSpinorField*> p;
1149 
1150  (*this)(out, in, p, r2_old.get());
1151 
1152  for (auto& pp : p) delete pp;
1153  }
1154 
1155  };
1156 
1157 
1158 
1171  class MinResExt {
1172 
1173  protected:
1175  bool orthogonal;
1176  bool apply_mat;
1177  bool hermitian;
1179 
1188  void solve(Complex *psi_, std::vector<ColorSpinorField*> &p,
1189  std::vector<ColorSpinorField*> &q, ColorSpinorField &b, bool hermitian);
1190 
1191  public:
1198  MinResExt(DiracMatrix &mat, bool orthogonal, bool apply_mat, bool hermitian, TimeProfile &profile);
1199  virtual ~MinResExt();
1200 
1206  void operator()(ColorSpinorField &x, ColorSpinorField &b,
1207  std::vector<std::pair<ColorSpinorField*,ColorSpinorField*> > basis);
1208 
1215  void operator()(ColorSpinorField &x, ColorSpinorField &b,
1216  std::vector<ColorSpinorField*> p,
1217  std::vector<ColorSpinorField*> q);
1218  };
1219 
1221 
1222  //forward declaration
1223  class EigCGArgs;
1224 
1225  class IncEigCG : public Solver {
1226 
1227  private:
1231 
1233  SolverParam Kparam; // parameters for preconditioner solve
1234 
1235  ColorSpinorFieldSet *Vm; //eigCG search vectors (spinor matrix of size eigen_vector_length x m)
1236 
1239  ColorSpinorField* p; // conjugate vector
1240  ColorSpinorField* Ap; // mat * conjugate vector
1242  ColorSpinorField* Az; // mat * conjugate vector from the previous iteration
1245 
1247 
1248  TimeProfile &profile; // time profile for initCG solver
1249 
1250  bool init;
1251 
1252 public:
1253  IncEigCG(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam &param, TimeProfile &profile);
1254 
1255  virtual ~IncEigCG();
1256 
1262  void increment(ColorSpinorField &V, int nev);
1263 
1264  void RestartVT(const double beta, const double rho);
1265  void UpdateVm(ColorSpinorField &res, double beta, double sqrtr2);
1266  //EigCG solver:
1267  int eigCGsolve(ColorSpinorField &out, ColorSpinorField &in);
1268  //InitCG solver:
1269  int initCGsolve(ColorSpinorField &out, ColorSpinorField &in);
1270  //Incremental eigCG solver (for eigcg and initcg calls)
1271  void operator()(ColorSpinorField &out, ColorSpinorField &in);
1272  };
1273 
1274 //forward declaration
1275  class GMResDRArgs;
1276 
1277  class GMResDR : public Solver {
1278 
1279  private:
1280 
1284 
1286  SolverParam Kparam; // parameters for preconditioner solve
1287 
1288  ColorSpinorFieldSet *Vm;//arnoldi basis vectors, size (m+1)
1289  ColorSpinorFieldSet *Zm;//arnoldi basis vectors, size (m+1)
1290 
1297 
1298  TimeProfile &profile; //time profile for initCG solver
1299 
1301 
1302  bool init;
1303 
1304  public:
1305 
1306  GMResDR(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam &param, TimeProfile &profile);
1307  GMResDR(DiracMatrix &mat, Solver &K, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam &param, TimeProfile &profile);
1308 
1309  virtual ~GMResDR();
1310 
1311  //GMRES-DR solver
1312  void operator()(ColorSpinorField &out, ColorSpinorField &in);
1313  //
1314  //GMRESDR method
1315  void RunDeflatedCycles (ColorSpinorField *out, ColorSpinorField *in, const double tol_threshold);
1316  //
1317  int FlexArnoldiProcedure (const int start_idx, const bool do_givens);
1318 
1319  void RestartVZH();
1320 
1321  void UpdateSolution(ColorSpinorField *x, ColorSpinorField *r, bool do_gels);
1322 
1323  };
1324 
1325 } // namespace quda
1326 
QudaCABasis ca_basis
Definition: invert_quda.h:208
double secs
Definition: quda.h:251
double iter_res_offset[QUDA_MAX_MULTI_SHIFT]
Definition: invert_quda.h:184
Complex * alpha
Definition: invert_quda.h:1001
bool mg_instance
whether to use a global or local (node) reduction for this solver
Definition: invert_quda.h:248
double iter_res_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:188
enum QudaPreserveSource_s QudaPreserveSource
ColorSpinorField * x_sloppy_saved_p
Definition: invert_quda.h:782
bool global_reduction
whether the solver acting as a preconditioner for another solver
Definition: invert_quda.h:243
double tol_hq_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:182
ColorSpinorField * yp
Full precision residual.
Definition: invert_quda.h:775
QudaSchwarzType schwarz_type
Definition: invert_quda.h:217
cudaColorSpinorField * r
Definition: invert_quda.h:1039
Communication-avoiding CG solver. This solver does un-preconditioned CG, running in steps of nKrylov...
Definition: invert_quda.h:891
const DiracMatrix & mat
Definition: invert_quda.h:610
QudaVerbosity verbosity_precondition
Definition: invert_quda.h:239
void operator()(ColorSpinorField &x, ColorSpinorField &b)
Definition: invert_quda.h:1087
virtual double flops() const
Definition: invert_quda.h:563
enum QudaPrecision_s QudaPrecision
ColorSpinorField * Az
temporary for mat-vec
Definition: invert_quda.h:1242
void updateRhsIndex(QudaInvertParam &param)
Definition: invert_quda.h:453
DiracMMdag mmdag
Definition: invert_quda.h:645
ColorSpinorField * p_pre
residual passed to preconditioner
Definition: invert_quda.h:1244
DiracMatrix & matSloppy
Definition: invert_quda.h:1229
Multi-Shift Conjugate Gradient Solver.
Definition: invert_quda.h:1121
enum QudaResidualType_s QudaResidualType
ColorSpinorField * r0_saved_p
Sloppy solution vector.
Definition: invert_quda.h:783
DiracMdagM mdagm
Definition: invert_quda.h:661
#define QUDA_MAX_MULTI_SHIFT
Maximum number of shifts supported by the multi-shift solver. This number may be changed if need be...
QudaInverterType inv_type
Definition: invert_quda.h:22
ColorSpinorField * tmpp
Definition: invert_quda.h:1241
std::vector< ColorSpinorField * > p
sloppy residual vector
Definition: invert_quda.h:846
DiracMMdag mmdagSloppy
Definition: invert_quda.h:956
DiracMatrix & mat
Definition: invert_quda.h:1228
QudaInverterType inv_type
Definition: quda.h:103
Complex * Q_AS
Definition: invert_quda.h:902
SolverParam Kparam
Definition: invert_quda.h:824
ColorSpinorField * yp
Definition: invert_quda.h:632
const DiracMatrix & matSloppy
Definition: invert_quda.h:574
const DiracMatrix & mat
Definition: invert_quda.h:1124
ColorSpinorField * rp
Definition: invert_quda.h:1291
TimeProfile & profile
preconditioner result
Definition: invert_quda.h:1298
QudaPrecision & cuda_prec
cudaColorSpinorField * yp
Definition: invert_quda.h:732
ColorSpinorField * Ap
Definition: invert_quda.h:1240
std::vector< ColorSpinorField * > AQ
Definition: invert_quda.h:916
const DiracMatrix & matSloppy
Definition: invert_quda.h:895
Communication-avoiding GCR solver. This solver does un-preconditioned GCR, first building up a polyno...
Definition: invert_quda.h:990
ColorSpinorField * yp
Definition: invert_quda.h:648
cudaColorSpinorField * xx
Definition: invert_quda.h:1054
QudaPrecision precision_refinement_sloppy
Definition: invert_quda.h:148
Complex * alpha
Definition: invert_quda.h:903
const DiracMatrix & mat
Definition: invert_quda.h:993
Complex * beta
Definition: invert_quda.h:904
double tol_offset[QUDA_MAX_MULTI_SHIFT]
Definition: invert_quda.h:175
double * gamma
Definition: invert_quda.h:833
TimeProfile & profile
Definition: invert_quda.h:464
SolverParam(const QudaInvertParam &param)
Definition: invert_quda.h:271
static int R[4]
double offset[QUDA_MAX_MULTI_SHIFT]
Definition: invert_quda.h:172
const DiracMatrix & mat
Definition: invert_quda.h:573
ColorSpinorFieldSet * Vm
Definition: invert_quda.h:1288
QudaPrecision & cuda_prec_refinement_sloppy
double reliable_delta
Definition: test_util.cpp:1658
QudaCABasis basis
Definition: invert_quda.h:899
TimeProfile & profile
Definition: invert_quda.h:1107
std::vector< ColorSpinorField * > u
Definition: invert_quda.h:779
enum QudaComputeNullVector_s QudaComputeNullVector
std::vector< ColorSpinorField * > S
Definition: invert_quda.h:912
double true_res
Definition: quda.h:126
QudaPrecision precision_ritz
Definition: invert_quda.h:227
QudaInverterType inv_type_precondition
Definition: invert_quda.h:28
QudaPreserveSource preserve_source
Definition: invert_quda.h:154
int max_res_increase_total
Definition: invert_quda.h:96
std::vector< ColorSpinorField * > defl_tmp1
Definition: invert_quda.h:547
Complex ** beta
Definition: invert_quda.h:832
const DiracMatrix & matSloppy
Definition: invert_quda.h:629
ColorSpinorField * r_fullp
Definition: invert_quda.h:774
ColorSpinorField * r_sloppy
sloppy solution vector
Definition: invert_quda.h:844
double ca_lambda_max
Definition: quda.h:304
DiracMatrix & mat
Definition: invert_quda.h:729
ColorSpinorField * xp
Definition: invert_quda.h:647
ColorSpinorField * bp
Definition: invert_quda.h:663
std::string solver_name
Definition: invert_quda.h:807
QudaPrecision & cuda_prec_precondition
DiracMatrix & mat
Definition: invert_quda.h:758
bool hermitian
Whether to compute q = Ap or assume it is provided.
Definition: invert_quda.h:1177
Complex * alpha
Definition: invert_quda.h:831
ColorSpinorField * yp
Definition: invert_quda.h:958
const DiracMatrix & mat
Definition: invert_quda.h:675
double true_res_hq_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:191
std::vector< ColorSpinorField * > Qtmp
Definition: invert_quda.h:915
double ca_lambda_min
Definition: quda.h:301
ColorSpinorField * yp
residual vector
Definition: invert_quda.h:1292
QudaGaugeParam param
Definition: pack_test.cpp:17
ColorSpinorField * rp
Definition: invert_quda.h:1003
DiracMdagM mdagmSloppy
Definition: invert_quda.h:662
bool init
Definition: invert_quda.h:578
GMResDRArgs * gmresdr_args
Definition: invert_quda.h:1300
ColorSpinorField * tmp_sloppy
Definition: invert_quda.h:909
QudaPrecision & cuda_prec_ritz
DiracMatrix & mat
Definition: invert_quda.h:745
std::vector< ColorSpinorField * > defl_tmp2
Definition: invert_quda.h:548
TimeProfile & profile
whether A is hermitian ot not
Definition: invert_quda.h:1178
QudaPrecision & cuda_prec_sloppy
QudaComputeNullVector compute_null_vector
Definition: invert_quda.h:67
Complex * gamma_prime_prime
Definition: invert_quda.h:768
QudaSolutionType solution_type
Definition: test_util.cpp:1664
Solver * K
Definition: invert_quda.h:823
const DiracMatrix & matPrecon
Definition: invert_quda.h:693
ColorSpinorField * bp
Definition: invert_quda.h:973
const DiracMatrix & matSloppy
Definition: invert_quda.h:867
ColorSpinorField * tmp_sloppy2
Definition: invert_quda.h:910
QudaResidualType residual_type
Definition: invert_quda.h:49
const DiracMatrix & matSloppy
Definition: invert_quda.h:1125
double true_res_hq_offset[QUDA_MAX_MULTI_SHIFT]
Definition: invert_quda.h:187
void pushOutputPrefix(const char *prefix)
Push a new output prefix onto the stack.
Definition: util_quda.cpp:105
int max_hq_res_restart_total
Definition: invert_quda.h:106
void updateInvertParam(QudaInvertParam &param, int offset=-1)
Definition: invert_quda.h:428
SolverParam Kparam
Definition: invert_quda.h:1286
const DiracMatrix & matSloppy
Definition: invert_quda.h:711
virtual void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType) const =0
double tol_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:179
double true_res_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:185
cpuColorSpinorField * in
int nvec[QUDA_MAX_MG_LEVEL]
Definition: test_util.cpp:1637
double gflops
Definition: quda.h:250
QudaSiteSubset SiteSubset() const
int max_search_dim
Definition: test_util.cpp:1708
TimeProfile & profile
Definition: invert_quda.h:1248
constexpr int size
const DiracMatrix & mat
Definition: invert_quda.h:1053
ColorSpinorField * rp
Definition: invert_quda.h:906
const DiracMatrix & mat
Definition: invert_quda.h:691
DiracMatrix & matPrecon
Definition: invert_quda.h:1283
#define warningQuda(...)
Definition: util_quda.h:133
DiracMdagM mdagmSloppy
Definition: invert_quda.h:972
double true_res_hq
Definition: quda.h:127
ColorSpinorField * Arp
Definition: invert_quda.h:870
std::vector< ColorSpinorField * > Ap
Definition: invert_quda.h:847
enum QudaSolutionType_s QudaSolutionType
bool is_preconditioner
verbosity to use for preconditioner
Definition: invert_quda.h:241
DiracDagger matDagSloppy
Definition: invert_quda.h:630
ColorSpinorField * tmpp
Definition: invert_quda.h:907
enum QudaSchwarzType_s QudaSchwarzType
void popOutputPrefix()
Pop the output prefix restoring the prior one on the stack.
Definition: util_quda.cpp:121
std::vector< ColorSpinorField * > q
Definition: invert_quda.h:1008
ColorSpinorField * yp
residual vector
Definition: invert_quda.h:841
int gcrNkrylov
Definition: test_util.cpp:1630
double true_res_offset[QUDA_MAX_MULTI_SHIFT]
Definition: invert_quda.h:181
const DiracMatrix & mat
Definition: invert_quda.h:894
std::vector< ColorSpinorField * > AS
Definition: invert_quda.h:913
std::complex< double > Complex
Definition: quda_internal.h:46
double offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:176
EigenSolver * eig_solve
Definition: invert_quda.h:545
cudaColorSpinorField * Ar
Definition: invert_quda.h:1038
std::vector< ColorSpinorField * > r
Sloppy temporary vector.
Definition: invert_quda.h:778
std::vector< ColorSpinorField * > Q
Definition: invert_quda.h:914
ColorSpinorField * rp
Definition: invert_quda.h:868
DiracMMdag mmdagSloppy
Definition: invert_quda.h:646
bool init
Definition: invert_quda.h:874
std::vector< Complex > evals
Definition: invert_quda.h:61
const DiracMatrix & matSloppy
Definition: invert_quda.h:759
QudaEigParam eig_param
Definition: invert_quda.h:55
ColorSpinorField * rp
Definition: invert_quda.h:1237
QudaCABasis basis
Definition: invert_quda.h:999
ColorSpinorField * tmp_sloppy
Definition: invert_quda.h:1005
double tol_precondition
Definition: invert_quda.h:199
cudaColorSpinorField * y
Definition: invert_quda.h:1040
QudaExtLibType extlib_type
Definition: invert_quda.h:251
int V
Definition: test_util.cpp:27
ColorSpinorField * tempp
Full precision temporary.
Definition: invert_quda.h:777
std::vector< ColorSpinorField * > p
Definition: invert_quda.h:577
QudaPrecision precision_precondition
Definition: invert_quda.h:151
bool lambda_init
Definition: invert_quda.h:898
std::vector< ColorSpinorField * > evecs
Definition: invert_quda.h:58
QudaPrecision precision
Definition: invert_quda.h:142
ColorSpinorField * y_sloppy
temporary for mat-vec
Definition: invert_quda.h:843
void orthoDir(Complex **beta, std::vector< ColorSpinorField *> Ap, int k, int pipeline)
ColorSpinorField * tmp_sloppy
Definition: invert_quda.h:872
std::vector< ColorSpinorField * > p
Definition: invert_quda.h:1007
SolverParam & param
Definition: invert_quda.h:463
void operator()(std::vector< ColorSpinorField *> out, ColorSpinorField &in)
Run multi-shift and return Krylov-space at the end of the solve in p and r2_old_arry.
Definition: invert_quda.h:1146
ColorSpinorFieldSet * Zm
Definition: invert_quda.h:1289
cpuColorSpinorField * out
ColorSpinorField * yp
Definition: invert_quda.h:576
const DiracMatrix & matSloppy
Definition: invert_quda.h:994
ColorSpinorField * r_pre
Definition: invert_quda.h:1243
Conjugate-Gradient Solver.
Definition: invert_quda.h:570
bool apply_mat
Whether to construct an orthogonal basis or not.
Definition: invert_quda.h:1176
This computes the optimum guess for the system Ax=b in the L2 residual norm. For use in the HMD force...
Definition: invert_quda.h:1171
DiracMatrix & matSloppy
Definition: invert_quda.h:1282
Main header file for the QUDA library.
DiracMatrix & mat
Definition: invert_quda.h:1281
enum QudaCABasis_s QudaCABasis
void * preconditioner
Definition: invert_quda.h:33
ColorSpinorField * yp
residual vector
Definition: invert_quda.h:1238
ColorSpinorField * r_sloppy
Definition: invert_quda.h:869
EigCGArgs * eigcg_args
preconditioner result
Definition: invert_quda.h:1246
MultiShiftSolver(SolverParam &param, TimeProfile &profile)
Definition: invert_quda.h:1110
ColorSpinorField * tmpp2
Definition: invert_quda.h:908
SolverParam Kparam
Definition: invert_quda.h:696
void operator()(ColorSpinorField &out, ColorSpinorField &in)
Run CG.
Definition: invert_quda.h:588
ColorSpinorField * r_sloppy_saved_p
Shadow residual, in BiCG language.
Definition: invert_quda.h:784
const DiracMatrix & mat
Definition: invert_quda.h:628
int reliable(double &rNorm, double &maxrx, double &maxrr, const double &r2, const double &delta)
ColorSpinorField * tmpp
Definition: invert_quda.h:871
ColorSpinorField * tmpp
high precision accumulator
Definition: invert_quda.h:842
DiracMatrix & matPrecon
Definition: invert_quda.h:1230
double * sigma
Definition: invert_quda.h:770
PreconditionedSolver(Solver &solver, const Dirac &dirac, SolverParam &param, TimeProfile &profile, const char *prefix)
Definition: invert_quda.h:1076
DiracMdagM mdagm
Definition: invert_quda.h:971
const DiracMatrix & mat
Definition: invert_quda.h:819
ColorSpinorField * rp
Definition: invert_quda.h:840
const DiracMatrix & mat
Definition: invert_quda.h:866
bool use_alternative_reliable
Definition: invert_quda.h:73
const DiracMatrix & matPrecon
Definition: invert_quda.h:712
ColorSpinorField * p
high precision accumulator
Definition: invert_quda.h:1239
ColorSpinorField * x_sloppy
Definition: invert_quda.h:873
const DiracMatrix & matPrecon
Definition: invert_quda.h:821
ColorSpinorField * p_pre
residual passed to preconditioner
Definition: invert_quda.h:1296
ColorSpinorField * tmpp
Definition: invert_quda.h:1004
QudaUseInitGuess use_init_guess
Definition: invert_quda.h:64
cudaColorSpinorField * bx
Definition: invert_quda.h:1055
enum QudaVerbosity_s QudaVerbosity
const DiracMatrix & matSloppy
Definition: invert_quda.h:820
SolverParam(const SolverParam &param)
Definition: invert_quda.h:346
QudaPrecision precision_sloppy
Definition: invert_quda.h:145
bool use_sloppy_partial_accumulator
Definition: invert_quda.h:76
ColorSpinorField * tmpp
high precision accumulator
Definition: invert_quda.h:1293
ColorSpinorField * yp
Definition: invert_quda.h:613
SolverParam Kparam
Definition: invert_quda.h:1233
void reduceDouble(double &)
int solution_accumulator_pipeline
Definition: invert_quda.h:86
const DiracMatrix & matSloppy
Definition: invert_quda.h:611
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
virtual void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType) const =0
ColorSpinorField * r_pre
sloppy residual vector
Definition: invert_quda.h:1295
const DiracMatrix & mat
Definition: invert_quda.h:1037
DiracMMdag mmdag
Definition: invert_quda.h:955
enum QudaUseInitGuess_s QudaUseInitGuess
const DiracMatrix & mat
Definition: invert_quda.h:1174
ColorSpinorField * xp
Definition: invert_quda.h:957
enum QudaInverterType_s QudaInverterType
const DiracMatrix & matSloppy
Definition: invert_quda.h:692
DiracMatrix & mat
Definition: invert_quda.h:710
ColorSpinorFieldSet * Vm
Definition: invert_quda.h:1235
double tol_hq_offset[QUDA_MAX_MULTI_SHIFT]
Definition: invert_quda.h:178
enum QudaExtLibType_s QudaExtLibType
Complex ** tau
Definition: invert_quda.h:769
void updateR()
update the radius for halos.
Complex * Q_AQandg
Definition: invert_quda.h:901
ColorSpinorField * yp
Definition: invert_quda.h:715
ColorSpinorField * r_sloppy
temporary for mat-vec
Definition: invert_quda.h:1294