QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
inv_gmresdr_quda.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <string.h>
5 #include <iostream>
6 
7 #include <quda_internal.h>
8 #include <color_spinor_field.h>
9 #include <blas_quda.h>
10 #include <dslash_quda.h>
11 #include <invert_quda.h>
12 #include <util_quda.h>
13 
14 #ifdef MAGMA_LIB
15 #include <blas_magma.h>
16 #endif
17 
18 #include <algorithm>
19 #include <memory>
20 
21 #include <Eigen/Dense>
22 #include <Eigen/Eigenvalues>
23 
24 
25 /*
26 GMRES-DR algorithm:
27 R. B. Morgan, "GMRES with deflated restarting", SIAM J. Sci. Comput. 24 (2002) p. 20-37
28 See also: A.Frommer et al, "Deflation and Flexible SAP-Preconditioning of GMRES in Lattice QCD simulations" ArXiv hep-lat/1204.5463
29 */
30 
31 namespace quda {
32 
33  using namespace blas;
34  using namespace std;
35 
36  using namespace Eigen;
37 
38  using DynamicStride = Stride<Dynamic, Dynamic>;
39 
40  using DenseMatrix = MatrixXcd;
41  using VectorSet = MatrixXcd;
42  using Vector = VectorXcd;
43 
44 //special types needed for compatibility with QUDA blas:
46 
47  struct SortedEvals{
48 
49  double _val;
50  int _idx;
51 
52  SortedEvals(double val, int idx) : _val(val), _idx(idx) {};
53  static bool SelectSmall (SortedEvals v1, SortedEvals v2) { return (v1._val < v2._val);}
54  };
55 
56 
58 
59  class GMResDRArgs{
60 
61  public:
65 
66  int m;
67  int k;
68  int restarts;
69 
71 
72  ColorSpinorFieldSet *Vkp1;//high-precision accumulation array
73 
74  GMResDRArgs(int m, int nev) : ritzVecs(VectorSet::Zero(m+1,nev+1)), H(DenseMatrix::Zero(m+1,m)),
75  eta(Vector::Zero(m)), m(m), k(nev), restarts(0), Vkp1(nullptr) { c = static_cast<Complex*> (ritzVecs.col(k).data()); }
76 
77  inline void ResetArgs() {
78  ritzVecs.setZero();
79  H.setZero();
80  eta.setZero();
81  }
82 
84  if(Vkp1) delete Vkp1;
85  }
86  };
87 
88  template<libtype which_lib> void ComputeHarmonicRitz(GMResDRArgs &args) {errorQuda("\nUnknown library type.\n");}
89 
90  template <> void ComputeHarmonicRitz<libtype::magma_lib>(GMResDRArgs &args)
91  {
92 #ifdef MAGMA_LIB
93  DenseMatrix cH = args.H.block(0, 0, args.m, args.m).adjoint();
94  DenseMatrix Gk = args.H.block(0, 0, args.m, args.m);
95 
96  VectorSet harVecs = MatrixXcd::Zero(args.m, args.m);
97  Vector harVals = VectorXcd::Zero(args.m);
98 
99  Vector em = VectorXcd::Zero(args.m);
100 
101  em(args.m-1) = norm( args.H(args.m, args.m-1) );
102 
103  cudaHostRegister(static_cast<void *>(cH.data()), args.m*args.m*sizeof(Complex), cudaHostRegisterDefault);
104  magma_Xgesv(static_cast<void*>(em.data()), args.m, args.m, static_cast<void*>(cH.data()), args.m, sizeof(Complex));
105  cudaHostUnregister(cH.data());
106 
107  Gk.col(args.m-1) += em;
108 
109  cudaHostRegister(static_cast<void *>(Gk.data()), args.m*args.m*sizeof(Complex), cudaHostRegisterDefault);
110  magma_Xgeev(static_cast<void*>(Gk.data()), args.m, args.m, static_cast<void*>(harVecs.data()), static_cast<void*>(harVals.data()), args.m, sizeof(Complex));
111  cudaHostUnregister(Gk.data());
112 
113  std::vector<SortedEvals> sorted_evals;
114  sorted_evals.reserve(args.m);
115 
116  for(int e = 0; e < args.m; e++) sorted_evals.push_back( SortedEvals( abs(harVals.data()[e]), e ));
117  std::stable_sort(sorted_evals.begin(), sorted_evals.end(), SortedEvals::SelectSmall);
118 
119  for(int e = 0; e < args.k; e++) memcpy(args.ritzVecs.col(e).data(), harVecs.col(sorted_evals[e]._idx).data(), (args.m)*sizeof(Complex));
120 #else
121  errorQuda("Magma library was not built.\n");
122 #endif
123  return;
124  }
125 
126 
127  template <> void ComputeHarmonicRitz<libtype::eigen_lib>(GMResDRArgs &args)
128  {
129 
130  DenseMatrix cH = args.H.block(0, 0, args.m, args.m).adjoint();
131  DenseMatrix Gk = args.H.block(0, 0, args.m, args.m);
132 
133  VectorSet harVecs = MatrixXcd::Zero(args.m, args.m);
134  Vector harVals = VectorXcd::Zero(args.m);
135 
136  Vector em = VectorXcd::Zero(args.m);
137 
138  em(args.m-1) = norm( args.H(args.m, args.m-1) );
139  Gk.col(args.m-1) += cH.colPivHouseholderQr().solve(em);
140 
141  ComplexEigenSolver<DenseMatrix> es( Gk );
142  harVecs = es.eigenvectors();
143  harVals = es.eigenvalues ();
144 
145  std::vector<SortedEvals> sorted_evals;
146  sorted_evals.reserve(args.m);
147 
148  for(int e = 0; e < args.m; e++) sorted_evals.push_back( SortedEvals( abs(harVals.data()[e]), e ));
149  std::stable_sort(sorted_evals.begin(), sorted_evals.end(), SortedEvals::SelectSmall);
150 
151  for(int e = 0; e < args.k; e++) memcpy(args.ritzVecs.col(e).data(), harVecs.col(sorted_evals[e]._idx).data(), (args.m)*sizeof(Complex));
152 
153  return;
154  }
155 
156 
157  template<libtype which_lib> void ComputeEta(GMResDRArgs &args) {errorQuda("\nUnknown library type.\n");}
158 
159  template <> void ComputeEta<libtype::magma_lib>(GMResDRArgs &args) {
160 #ifdef MAGMA_LIB
161  DenseMatrix Htemp(DenseMatrix::Zero(args.m+1,args.m));
162  Htemp = args.H;
163 
164  Complex *ctemp = static_cast<Complex*> (args.ritzVecs.col(0).data());
165  memcpy(ctemp, args.c, (args.m+1)*sizeof(Complex));
166 
167  cudaHostRegister(static_cast<void*>(Htemp.data()), (args.m+1)*args.m*sizeof(Complex), cudaHostRegisterDefault);
168  magma_Xgels(static_cast<void*>(Htemp.data()), ctemp, args.m+1, args.m, args.m+1, sizeof(Complex));
169  cudaHostUnregister(Htemp.data());
170 
171  memcpy(args.eta.data(), ctemp, args.m*sizeof(Complex));
172  memset(ctemp, 0, (args.m+1)*sizeof(Complex));
173 #else
174  errorQuda("MAGMA library was not built.\n");
175 #endif
176  return;
177  }
178 
179  template <> void ComputeEta<libtype::eigen_lib>(GMResDRArgs &args) {
180 
181  Map<VectorXcd, Unaligned> c_(args.c, args.m+1);
182  args.eta = args.H.jacobiSvd(ComputeThinU | ComputeThinV).solve(c_);
183 
184  return;
185  }
186 
188  inner.tol = outer.tol_precondition;
189  inner.maxiter = outer.maxiter_precondition;
190  inner.delta = 1e-20;
191  inner.inv_type = outer.inv_type_precondition;
192 
193  inner.precision = outer.precision_precondition;
195 
196  inner.iter = 0;
197  inner.gflops = 0;
198  inner.secs = 0;
199 
201  inner.is_preconditioner = true;
202  inner.global_reduction = false;
203  if(inner.global_reduction) warningQuda("Set global reduction flag for preconditioner to true.\n");
204 
205  if (outer.precision_sloppy != outer.precision_precondition)
208  }
209 
210 
212  Solver(param, profile), mat(mat), matSloppy(matSloppy), matPrecon(matPrecon), K(nullptr), Kparam(param),
213  Vm(nullptr), Zm(nullptr), profile(profile), gmresdr_args(nullptr), init(false)
214  {
216 
218  K = new CG(matPrecon, matPrecon, Kparam, profile);
220  K = new BiCGstab(matPrecon, matPrecon, matPrecon, Kparam, profile);
221  else if (param.inv_type_precondition == QUDA_MR_INVERTER)
222  K = new MR(matPrecon, matPrecon, Kparam, profile);
223  else if (param.inv_type_precondition == QUDA_SD_INVERTER)
224  K = new SD(matPrecon, Kparam, profile);
225  else if (param.inv_type_precondition == QUDA_INVALID_INVERTER)
226  K = nullptr;
227  else
228  errorQuda("Unsupported preconditioner %d\n", param.inv_type_precondition);
229 
230 
231  return;
232  }
233 
235  Solver(param, profile), mat(mat), matSloppy(matSloppy), matPrecon(matPrecon), K(&K), Kparam(param),
236  Vm(nullptr), Zm(nullptr), profile(profile), gmresdr_args(nullptr), init(false) { }
237 
238 
240  profile.TPSTART(QUDA_PROFILE_FREE);
241 
242  if(init)
243  {
244  delete Vm;
245  Vm = nullptr;
246 
247  delete r_sloppy;
248 
250  {
251  delete r_pre;
252  delete p_pre;
253  }
254 
255  if(K) {
256  delete Zm;
257  delete K;
258  }
259 
260  delete tmpp;
261  delete yp;
262  delete rp;
263 
264  delete gmresdr_args;
265  }
266 
267  profile.TPSTOP(QUDA_PROFILE_FREE);
268  }
269 
270 
272  {
273  GMResDRArgs &args = *gmresdr_args;
274 
275  if(do_gels) {
276  if ( param.extlib_type == QUDA_MAGMA_EXTLIB ) {
277  ComputeEta<libtype::magma_lib>(args);
278  } else if ( param.extlib_type == QUDA_EIGEN_EXTLIB ) {
279  ComputeEta<libtype::eigen_lib>(args);
280  } else {
281  errorQuda("Library type %d is currently not supported.\n", param.extlib_type);
282  }
283  }
284 
285  std::vector<ColorSpinorField*> Z_(Zm->Components().begin(),Zm->Components().begin()+args.m);
286  std::vector<ColorSpinorField*> V_(Vm->Components());
287 
288  std::vector<ColorSpinorField*> x_, r_;
289  x_.push_back(x), r_.push_back(r);
290 
291  blas::caxpy( static_cast<Complex*> ( args.eta.data()), Z_, x_);
292 
293  VectorXcd minusHeta = - (args.H * args.eta);
294  Map<VectorXcd, Unaligned> c_(args.c, args.m+1);
295  c_ += minusHeta;
296 
297  blas::caxpy(static_cast<Complex*>(minusHeta.data()), V_, r_);
298  return;
299  }
300 
301 
303  {
304  GMResDRArgs &args = *gmresdr_args;
305 
306  if ( param.extlib_type == QUDA_MAGMA_EXTLIB ) {
307  ComputeHarmonicRitz<libtype::magma_lib>(args);
308  } else if(param.extlib_type == QUDA_EIGEN_EXTLIB) {
309  ComputeHarmonicRitz<libtype::eigen_lib>(args);
310  } else {
311  errorQuda("Library type %d is currently not supported.\n", param.extlib_type);
312  }
313 
314  DenseMatrix Qkp1(MatrixXcd::Identity((args.m+1), (args.k+1)));
315 
316  HouseholderQR<MatrixXcd> qr(args.ritzVecs);
317  Qkp1.applyOnTheLeft( qr.householderQ());
318 
319  DenseMatrix Res = Qkp1.adjoint()*args.H*Qkp1.topLeftCorner(args.m, args.k);
320  args.H.setZero();
321  args.H.topLeftCorner(args.k+1, args.k) = Res;
322 
323  blas::zero( *args.Vkp1 );
324 
325  std::vector<ColorSpinorField*> vkp1(args.Vkp1->Components());
326  std::vector<ColorSpinorField*> vm (Vm->Components());
327 
328  RowMajorDenseMatrix Alpha(Qkp1);//convert Qkp1 to Row-major format first
329  blas::caxpy(static_cast<Complex*>(Alpha.data()), vm , vkp1);
330 
331  for(int i = 0; i < (args.m+1); i++)
332  {
333  if(i < (args.k+1) )
334  {
335  blas::copy(Vm->Component(i), args.Vkp1->Component(i));
336  blas::zero(args.Vkp1->Component(i));
337  }
338  else blas::zero(Vm->Component(i));
339  }
340 
341  if( Zm->V() != Vm->V() )
342  {
343  std::vector<ColorSpinorField*> z (Zm->Components());
344  std::vector<ColorSpinorField*> vk(args.Vkp1->Components().begin(),args.Vkp1->Components().begin()+args.k);
345 
346  RowMajorDenseMatrix Beta(Qkp1.topLeftCorner(args.m,args.k));
347  blas::caxpy(static_cast<Complex*>(Beta.data()), z , vk);
348 
349  for(int i = 0; i < (args.m); i++)
350  {
351  if( i < (args.k) ) blas::copy(Zm->Component(i), args.Vkp1->Component(i));
352  else blas::zero(Zm->Component(i));
353  }
354  }
355 
356  checkCudaError();
357 
358  for(int j = 0; j < args.k; j++)
359  {
360  Complex alpha = cDotProduct(Vm->Component(j), Vm->Component(args.k));
361  caxpy(-alpha, Vm->Component(j), Vm->Component(args.k));
362  }
363 
364  blas::ax(1.0/ sqrt(blas::norm2(Vm->Component(args.k))), Vm->Component(args.k));
365 
366  args.ritzVecs.setZero();
367  return;
368  }
369 
370 
371 int GMResDR::FlexArnoldiProcedure(const int start_idx, const bool do_givens = false)
372  {
373  int j = start_idx;
374  GMResDRArgs &args = *gmresdr_args;
376 
377  std::unique_ptr<Complex[] > givensH((do_givens) ? new Complex[(args.m+1)*args.m] : nullptr);
378  std::unique_ptr<Complex[] > cn((do_givens) ? new Complex[args.m] : nullptr);
379  std::unique_ptr<double[] > sn((do_givens) ? new double[args.m] : nullptr);
380 
381  Complex c0 = args.c[0];
382 
383  while( j < args.m )
384  {
385  if(K) {
388 
390  zero(outPre);
392  (*K)( outPre ,inPre );
393  popVerbosity();
394 
396  }
397  matSloppy(Vm->Component(j+1), Zm->Component(j), tmp);
398 
399  args.H(0, j) = cDotProduct(Vm->Component(0), Vm->Component(j+1));
400  caxpy(-args.H(0, j), Vm->Component(0), Vm->Component(j+1));
401 
402  Complex h0 = do_givens ? args.H(0, j) : 0.0;
403 
404  for(int i = 1; i <= j; i++)
405  {
406  args.H(i, j) = cDotProduct(Vm->Component(i), Vm->Component(j+1));
407  caxpy(-args.H(i, j), Vm->Component(i), Vm->Component(j+1));
408 
409  if(do_givens) {
410  givensH[(args.m+1)*j+(i-1)] = conj(cn[i-1])*h0 + sn[i-1]*args.H(i,j);
411  h0 = -sn[i-1]*h0 + cn[i-1]*args.H(i,j);
412  }
413  }
414 
415  args.H(j+1, j) = Complex(sqrt(norm2(Vm->Component(j+1))), 0.0);
416  blas::ax( 1.0 / args.H(j+1, j).real(), Vm->Component(j+1));
417  if(do_givens)
418  {
419  double inv_denom = 1.0 / sqrt(norm(h0)+norm(args.H(j+1,j)));
420  cn[j] = h0 * inv_denom;
421  sn[j] = args.H(j+1,j).real() * inv_denom;
422  givensH[j*(args.m+1)+j] = conj(cn[j])*h0 + sn[j]*args.H(j+1,j);
423 
424  args.c[j+1] = -sn[j]*args.c[j];
425  args.c[j] *= conj(cn[j]);
426  }
427 
428  j += 1;
429  }
430 
431  if(do_givens)
432  {
433  Map<MatrixXcd, Unaligned, DynamicStride> givensH_(givensH.get(), args.m, args.m, DynamicStride(args.m+1,1) );
434  memcpy(args.eta.data(), args.c, args.m*sizeof(Complex));
435  memset((void*)args.c, 0, (args.m+1)*sizeof(Complex));
436  args.c[0] = c0;
437 
438  givensH_.triangularView<Upper>().solveInPlace<OnTheLeft>(args.eta);
439 
440  } else {
441  memset((void*)args.c, 0, (args.m+1)*sizeof(Complex));
442 
443  std::vector<ColorSpinorField*> v_(Vm->Components().begin(), Vm->Components().begin()+args.k+1);
444  std::vector<ColorSpinorField*> r_;
445  r_.push_back(static_cast<ColorSpinorField*>(r_sloppy));
446 
447  blas::cDotProduct(args.c, v_, r_);
448 
449  }
450  return (j-start_idx);
451  }
452 
454  {
455  profile.TPSTART(QUDA_PROFILE_INIT);
456 
457  const double tol_threshold = 1.2;
458  const double det_max_deviation = 0.4;
459 
460  ColorSpinorField *ep = nullptr;
461 
462  if (!init) {
463 
465 
467  csParam.create = QUDA_ZERO_FIELD_CREATE;
468  rp = ColorSpinorField::Create(csParam);
469  yp = ColorSpinorField::Create(csParam);
470  ep = ColorSpinorField::Create(csParam);
471 
473 
474  tmpp = ColorSpinorField::Create(csParam);
476 
478 
480  p_pre = ColorSpinorField::Create(csParam);
481  r_pre = ColorSpinorField::Create(csParam);
482 
483  }
484 
486  csParam.is_composite = true;
487  csParam.composite_dim = param.m+1;
488 
489  Vm = ColorSpinorFieldSet::Create(csParam);
490 
491  csParam.composite_dim = param.m;
492 
493  Zm = K ? ColorSpinorFieldSet::Create(csParam) : Vm;
494 
495 
496  csParam.composite_dim = (param.nev+1);
498 
500 
501  init = true;
502  }
503 
504  GMResDRArgs &args = *gmresdr_args;
505 
506  ColorSpinorField &r = *rp;
507  ColorSpinorField &y = *yp;
508  ColorSpinorField &e = *ep;
509 
510  ColorSpinorField &rSloppy = *r_sloppy;
511 
512  profile.TPSTOP(QUDA_PROFILE_INIT);
514 
515  int tot_iters = 0;
516 
517  double normb = norm2( b );
518  double stop = param.tol*param.tol* normb;
519 
520  mat(r, x);
521 
522  double r2 = xmyNorm(b, r);
523  double b2 = r2;
524  args.c[0] = Complex(sqrt(r2), 0.0);
525 
526  printfQuda("\nInitial residual squared: %1.16e, source %1.16e, tolerance %1.16e\n", r2, sqrt(normb), param.tol);
527 
528  rSloppy = r;
529 
531  blas::axpy(1.0 / args.c[0].real(), r, y);
532  Vm->Component(0) = y;
533  blas::zero(y);
534  } else {
535  blas::axpy(1.0 / args.c[0].real(), r, Vm->Component(0));
536  }
537 
539  profile.TPSTART(QUDA_PROFILE_COMPUTE);
540  blas::flops = 0;
541 
542  const bool use_heavy_quark_res = (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL) ? true : false;
543 
544  double heavy_quark_res = 0.0;
545  if (use_heavy_quark_res) heavy_quark_res = sqrt(blas::HeavyQuarkResidualNorm(x, r).z);
546 
547 
548  int restart_idx = 0, j = 0, check_interval = 4;
549 
550  DenseMatrix Gm = DenseMatrix::Zero(args.k+1, args.k+1);
551 
552  while(restart_idx < param.deflation_grid && !(convergence(r2, heavy_quark_res, stop, param.tol_hq) || !(r2 > stop)))
553  {
554  tot_iters += FlexArnoldiProcedure(j, (j == 0));
555  UpdateSolution(&e, r_sloppy, !(j == 0));
556 
557  r2 = norm2(rSloppy);
558 
559  bool do_clean_restart = false;
560  double ext_r2 = 1.0;
561 
562  if((restart_idx+1) % check_interval) {
563  mat(y, e);
564  ext_r2 = xmyNorm(r, y);
565 
566  // can this be done as a single 2-d reduction?
567  for(int l = 0; l < args.k+1; l++) {
568 
569  Complex *col = Gm.col(l).data();
570 
571  std::vector<ColorSpinorField*> v1_(Vm->Components().begin(), Vm->Components().begin()+args.k+1);
572  std::vector<ColorSpinorField*> v2_;
573  v2_.push_back(static_cast<ColorSpinorField*>(&Vm->Component(l)));
574 
575  blas::cDotProduct(col, v1_, v2_);
576 
577  }//end l-loop
578 
579  Complex detGm = Gm.determinant();
580 
581  PrintStats("FGMResDR:", tot_iters, r2, b2, heavy_quark_res);
582  printfQuda("\nCheck cycle %d, true residual squared %1.15e, Gramm det : (%le, %le)\n", restart_idx, ext_r2, detGm.real(), detGm.imag());
583 
584  Gm.setZero();
585 
586  do_clean_restart = ((sqrt(ext_r2) / sqrt(r2)) > tol_threshold) || fabs(1.0 - (norm(detGm)) > det_max_deviation);
587  }
588 
589  if( ((restart_idx != param.deflation_grid-1) && !do_clean_restart) ) {
590 
591  RestartVZH();
592  j = args.k;
593 
594  } else {
595 
596  printfQuda("\nClean restart for cycle %d, true residual squared %1.15e\n", restart_idx, ext_r2);
597  args.ResetArgs();
598 
599  //update solution:
600  xpy(e, x);
601  r = y;
602  zero(e);
603 
604  args.c[0] = Complex(sqrt(ext_r2), 0.0);
605  blas::zero(Vm->Component(0));
606  blas::axpy(1.0 / args.c[0].real(), rSloppy, Vm->Component(0));
607 
608  j = 0;
609  }
610 
611  restart_idx += 1;
612 
613  }
614 
615  //final solution:
616  xpy(e, x);
617 
620 
622  double gflops = (blas::flops + mat.flops())*1e-9;
623  param.gflops = gflops;
624  param.iter += tot_iters;
625 
626  mat(r, x);
627 
628  param.true_res = sqrt(xmyNorm(b, r) / b2);
629 
630  PrintSummary("FGMResDR:", tot_iters, r2, b2, stop, param.tol_hq);
631 
632  blas::flops = 0;
633  mat.flops();
634 
636 
637  param.rhs_idx += 1;
638 
639  if(ep) delete ep;
640  return;
641  }
642 
643 } // namespace quda
void ax(double a, ColorSpinorField &x)
Definition: blas_quda.cu:508
bool global_reduction
whether the solver acting as a preconditioner for another solver
Definition: invert_quda.h:243
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
QudaVerbosity verbosity_precondition
Definition: invert_quda.h:239
void magma_Xgeev(void *Mat, const int n, const int ldm, void *vr, void *evalues, const int ldv, const int prec)
Definition: blas_magma.cu:275
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
QudaInverterType inv_type
Definition: invert_quda.h:22
#define errorQuda(...)
Definition: util_quda.h:121
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:721
void init()
Definition: blas_quda.cu:483
int FlexArnoldiProcedure(const int start_idx, const bool do_givens)
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
Definition: reduce_quda.cu:764
static bool SelectSmall(SortedEvals v1, SortedEvals v2)
ColorSpinorField * rp
Definition: invert_quda.h:1291
TimeProfile & profile
preconditioner result
Definition: invert_quda.h:1298
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE) ...
Definition: solver.cpp:256
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44
CompositeColorSpinorField & Components()
static ColorSpinorField * Create(const ColorSpinorParam &param)
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
Definition: solver.cpp:223
SortedEvals(double val, int idx)
ColorSpinorFieldSet * Vkp1
GMResDRArgs(int m, int nev)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: copy_quda.cu:355
ColorSpinorFieldSet * Vm
Definition: invert_quda.h:1288
STL namespace.
ColorSpinorField & Component(const int idx) const
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:75
GMResDR(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam &param, TimeProfile &profile)
QudaInverterType inv_type_precondition
Definition: invert_quda.h:28
QudaPreserveSource preserve_source
Definition: invert_quda.h:154
Matrix< Complex, Dynamic, Dynamic, RowMajor > RowMajorDenseMatrix
double norm2(const CloverField &a, bool inverse=false)
ColorSpinorField * yp
residual vector
Definition: invert_quda.h:1292
QudaGaugeParam param
Definition: pack_test.cpp:17
GMResDRArgs * gmresdr_args
Definition: invert_quda.h:1300
bool is_composite
for deflation solvers:
double Last(QudaProfileType idx)
Definition: timer.h:251
__device__ __host__ void caxpy(const complex< Float > &a, const complex< Float > &x, complex< Float > &y)
QudaResidualType residual_type
Definition: invert_quda.h:49
void fillFGMResDRInnerSolveParam(SolverParam &inner, const SolverParam &outer)
SolverParam Kparam
Definition: invert_quda.h:1286
Stride< Dynamic, Dynamic > DynamicStride
Definition: deflation.cpp:18
ColorSpinorParam csParam
Definition: pack_test.cpp:24
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:35
DiracMatrix & matPrecon
Definition: invert_quda.h:1283
#define warningQuda(...)
Definition: util_quda.h:133
bool is_preconditioner
verbosity to use for preconditioner
Definition: invert_quda.h:241
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
Definition: reduce_quda.cu:809
void operator()(ColorSpinorField &out, ColorSpinorField &in)
std::complex< double > Complex
Definition: quda_internal.h:46
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.cu:512
double tol_precondition
Definition: invert_quda.h:199
QudaExtLibType extlib_type
Definition: invert_quda.h:251
void zero(ColorSpinorField &a)
Definition: blas_quda.cu:472
void pushVerbosity(QudaVerbosity verbosity)
Push a new verbosity onto the stack.
Definition: util_quda.cpp:83
QudaPrecision precision_precondition
Definition: invert_quda.h:151
void * memset(void *s, int c, size_t n)
void magma_Xgels(void *Mat, void *c, int rows, int cols, int ldm, const int prec)
Definition: blas_magma.cu:286
QudaPrecision precision
Definition: invert_quda.h:142
SolverParam & param
Definition: invert_quda.h:463
ColorSpinorFieldSet * Zm
Definition: invert_quda.h:1289
Conjugate-Gradient Solver.
Definition: invert_quda.h:570
DiracMatrix & matSloppy
Definition: invert_quda.h:1282
DiracMatrix & mat
Definition: invert_quda.h:1281
int nev
Definition: test_util.cpp:1707
unsigned long long flops() const
Definition: dirac_quda.h:1119
MatrixXcd DenseMatrix
#define printfQuda(...)
Definition: util_quda.h:115
unsigned long long flops
Definition: blas_quda.cu:22
void xpy(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:33
VectorXcd Vector
ColorSpinorField * p_pre
residual passed to preconditioner
Definition: invert_quda.h:1296
__host__ __device__ ValueType abs(ValueType x)
Definition: complex_quda.h:125
void popVerbosity()
Pop the verbosity restoring the prior one on the stack.
Definition: util_quda.cpp:94
QudaPrecision precision_sloppy
Definition: invert_quda.h:145
void UpdateSolution(ColorSpinorField *x, ColorSpinorField *r, bool do_gels)
#define checkCudaError()
Definition: util_quda.h:161
ColorSpinorField * tmpp
high precision accumulator
Definition: invert_quda.h:1293
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE). Assumes SolverParam.true_res and SolverParam.true_res_hq has been set.
Definition: solver.cpp:270
void ComputeHarmonicRitz(GMResDRArgs &args)
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:130
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
ColorSpinorField * r_pre
sloppy residual vector
Definition: invert_quda.h:1295
MatrixXcd VectorSet
__device__ __host__ void zero(vector_type< scalar, n > &v)
Definition: cub_helper.cuh:54
void magma_Xgesv(void *sol, const int ldn, const int n, void *Mat, const int ldm, const int prec)
Definition: blas_magma.cu:265
void ComputeEta(GMResDRArgs &args)
ColorSpinorField * r_sloppy
temporary for mat-vec
Definition: invert_quda.h:1294