QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
inv_gcr_quda.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 
5 #include <complex>
6 
7 #include <quda_internal.h>
8 #include <blas_quda.h>
9 #include <dslash_quda.h>
10 #include <invert_quda.h>
11 #include <util_quda.h>
12 #include <color_spinor_field.h>
13 
14 #include <sys/time.h>
15 
16 namespace quda {
17 
18  double timeInterval(struct timeval start, struct timeval end) {
19  long ds = end.tv_sec - start.tv_sec;
20  long dus = end.tv_usec - start.tv_usec;
21  return ds + 0.000001*dus;
22  }
23 
24  // set the required parameters for the inner solver
25  void fillInnerSolveParam(SolverParam &inner, const SolverParam &outer) {
26  inner.tol = outer.tol_precondition;
27  inner.delta = 1e-20; // no reliable updates within the inner solver
28 
29  inner.precision = outer.precision_sloppy;
31 
32  // this sets a fixed iteration count if we're using the MR solver
34 
35  inner.iter = 0;
36  inner.gflops = 0;
37  inner.secs = 0;
38 
40  inner.is_preconditioner = true; // tell inner solver it is a preconditioner
41 
42  inner.schwarz_type = outer.schwarz_type;
43  inner.global_reduction = inner.schwarz_type == QUDA_INVALID_SCHWARZ ? true : false;
44 
46 
47  inner.maxiter = outer.maxiter_precondition;
49  inner.Nkrylov = inner.maxiter / outer.precondition_cycle;
50  } else {
51  inner.Nsteps = outer.precondition_cycle;
52  }
53 
55 
57 
58  inner.compute_true_res = false;
59  inner.sloppy_converge = true;
60  }
61 
62  void computeBeta(Complex **beta, std::vector<ColorSpinorField*> Ap, int i, int N, int k) {
63  Complex *Beta = new Complex[N];
64  std::vector<ColorSpinorField*> a(N), b(1);
65  for (int j=0; j<N; j++) {
66  a[j] = Ap[i+j];
67  Beta[j] = 0;
68  }
69  b[0] = Ap[k];
70  blas::cDotProduct(Beta, a, b); // vectorized dot product
71 #if 0
72  for (int j=0; j<N; j++) {
73  printfQuda("%d/%d vectorized %e %e, regular %e %e\n", j+1, N, Beta[j].real(), Beta[j].imag(),
74  blas::cDotProduct(*a[j], *b[j]).real(), blas::cDotProduct(*a[j], *b[j]).imag());
75  }
76 #endif
77 
78  for (int j=0; j<N; j++) beta[i+j][k] = Beta[j];
79  delete [] Beta;
80  }
81 
82  void updateAp(Complex **beta, std::vector<ColorSpinorField*> Ap, int begin, int size, int k) {
83 
84  Complex *beta_ = new Complex[size];
85  for (int i=0; i<size; i++) beta_[i] = -beta[i+begin][k];
86 
87  std::vector<ColorSpinorField*> Ap_(Ap.begin() + begin, Ap.begin() + begin + size);
88  std::vector<ColorSpinorField*> Apk(Ap.begin() + k, Ap.begin() + k + 1);
89 
90  blas::caxpy(beta_, Ap_, Apk);
91 
92  delete []beta_;
93  }
94 
95  void orthoDir(Complex **beta, std::vector<ColorSpinorField*> Ap, int k, int pipeline) {
96 
97  switch (pipeline) {
98  case 0: // no kernel fusion
99  for (int i=0; i<k; i++) { // 5 (k-1) memory transactions here
100  beta[i][k] = blas::cDotProduct(*(Ap[i]), *(Ap[k]));
101  blas::caxpy(-beta[i][k], *Ap[i], *Ap[k]);
102  }
103  break;
104  case 1: // basic kernel fusion
105  if (k==0) break;
106  beta[0][k] = blas::cDotProduct(*Ap[0], *Ap[k]);
107  for (int i=0; i<k-1; i++) { // 4 (k-1) memory transactions here
108  beta[i+1][k] = blas::caxpyDotzy(-beta[i][k], *Ap[i], *Ap[k], *Ap[i+1]);
109  }
110  blas::caxpy(-beta[k-1][k], *Ap[k-1], *Ap[k]);
111  break;
112  default:
113  {
114  const int N = pipeline;
115  for (int i=0; i<k-(N-1); i+=N) {
116  computeBeta(beta, Ap, i, N, k);
117  updateAp(beta, Ap, i, N, k);
118  }
119 
120  if (k%N != 0) { // need to update the remainder
121  for (int r = N-1; r>0; r--) {
122  if ((k%N) % r == 0) { // if true this is the remainder
123  computeBeta(beta, Ap, k-r, r, k);
124  updateAp(beta, Ap, k-r, r, k);
125  break;
126  }
127  }
128  }
129  }
130  break;
131  }
132 
133  }
134 
135  void backSubs(const Complex *alpha, Complex** const beta, const double *gamma, Complex *delta, int n) {
136  for (int k=n-1; k>=0;k--) {
137  delta[k] = alpha[k];
138  for (int j=k+1;j<n; j++) {
139  delta[k] -= beta[k][j]*delta[j];
140  }
141  delta[k] /= gamma[k];
142  }
143  }
144 
145  void updateSolution(ColorSpinorField &x, const Complex *alpha, Complex** const beta,
146  double *gamma, int k, std::vector<ColorSpinorField*> p) {
147 
148  Complex *delta = new Complex[k];
149 
150  // Update the solution vector
151  backSubs(alpha, beta, gamma, delta, k);
152 
153  std::vector<ColorSpinorField*> X;
154  X.push_back(&x);
155 
156  std::vector<ColorSpinorField*> P;
157  for (int i=0; i<k; i++) P.push_back(p[i]);
158  blas::caxpy(delta, P, X);
159 
160  delete []delta;
161  }
162 
164  TimeProfile &profile) :
165  Solver(param, profile), mat(mat), matSloppy(matSloppy), matPrecon(matPrecon), K(0), Kparam(param),
166  nKrylov(param.Nkrylov), init(false), rp(nullptr), yp(nullptr), tmpp(nullptr), y_sloppy(nullptr),
167  r_sloppy(nullptr)
168  {
169 
170  fillInnerSolveParam(Kparam, param);
171 
172  if (param.inv_type_precondition == QUDA_CG_INVERTER) // inner CG solver
173  K = new CG(matSloppy, matPrecon, Kparam, profile);
174  else if (param.inv_type_precondition == QUDA_BICGSTAB_INVERTER) // inner BiCGstab solver
175  K = new BiCGstab(matSloppy, matPrecon, matPrecon, Kparam, profile);
176  else if (param.inv_type_precondition == QUDA_MR_INVERTER) // inner MR solver
177  K = new MR(matSloppy, matPrecon, Kparam, profile);
178  else if (param.inv_type_precondition == QUDA_SD_INVERTER) // inner SD solver
179  K = new SD(matSloppy, Kparam, profile);
180  else if (param.inv_type_precondition == QUDA_CA_GCR_INVERTER) // inner CA-GCR solver
181  K = new CAGCR(matSloppy, matPrecon, Kparam, profile);
182  else if (param.inv_type_precondition == QUDA_INVALID_INVERTER) // unsupported
183  K = NULL;
184  else
185  errorQuda("Unsupported preconditioner %d\n", param.inv_type_precondition);
186 
187  p.resize(nKrylov+1);
188  Ap.resize(nKrylov);
189 
190  alpha = new Complex[nKrylov];
191  beta = new Complex*[nKrylov];
192  for (int i=0; i<nKrylov; i++) beta[i] = new Complex[nKrylov];
193  gamma = new double[nKrylov];
194  }
195 
198  Solver(param, profile), mat(mat), matSloppy(matSloppy), matPrecon(matPrecon), K(&K), Kparam(param),
199  nKrylov(param.Nkrylov), init(false), rp(nullptr), yp(nullptr), tmpp(nullptr), y_sloppy(nullptr),
200  r_sloppy(nullptr)
201  {
202  p.resize(nKrylov+1);
203  Ap.resize(nKrylov);
204 
205  alpha = new Complex[nKrylov];
206  beta = new Complex*[nKrylov];
207  for (int i=0; i<nKrylov; i++) beta[i] = new Complex[nKrylov];
208  gamma = new double[nKrylov];
209  }
210 
212  profile.TPSTART(QUDA_PROFILE_FREE);
213  delete []alpha;
214  for (int i=0; i<nKrylov; i++) delete []beta[i];
215  delete []beta;
216  delete []gamma;
217 
218  if (K && param.inv_type_precondition != QUDA_MG_INVERTER) delete K;
219 
220  if (init && param.precision_sloppy != yp->Precision()) {
222  if (r_sloppy && r_sloppy != rp) delete r_sloppy;
223  }
224 
225  for (int i=0; i<nKrylov+1; i++) if (p[i]) delete p[i];
226  for (int i=0; i<nKrylov; i++) if (Ap[i]) delete Ap[i];
227 
228  if (tmpp) delete tmpp;
229  if (rp) delete rp;
230  if (yp) delete yp;
231 
232  if (deflate_init) {
233  for (auto veci : param.evecs)
234  if (veci) delete veci;
235  delete defl_tmp1[0];
236  delete defl_tmp2[0];
237  }
238  profile.TPSTOP(QUDA_PROFILE_FREE);
239  }
240 
242  {
243  if (nKrylov == 0) {
244  // Krylov space is zero-dimensional so return doing no work
246  return;
247  }
248 
249  profile.TPSTART(QUDA_PROFILE_INIT);
250 
251  if (!init) {
253  csParam.create = QUDA_NULL_FIELD_CREATE;
254 
255  rp = (K || x.Precision() != param.precision_sloppy) ? ColorSpinorField::Create(csParam) : nullptr;
256 
257  // high precision accumulator
258  yp = ColorSpinorField::Create(csParam);
259 
260  // create sloppy fields used for orthogonalization
262  for (int i = 0; i < nKrylov + 1; i++) p[i] = ColorSpinorField::Create(csParam);
263  for (int i=0; i<nKrylov; i++) Ap[i] = ColorSpinorField::Create(csParam);
264 
266  tmpp = ColorSpinorField::Create(csParam); //temporary for sloppy mat-vec
267 
270  } else {
271  y_sloppy = yp;
272  }
273 
274  if (param.precision_sloppy != x.Precision()) {
275  r_sloppy = K ? ColorSpinorField::Create(csParam) : nullptr;
276  } else {
277  r_sloppy = K ? rp : nullptr;
278  }
279 
280  init = true;
281  }
282 
283  // Once the GCR operator is called, we are able to construct an appropriate
284  // Krylov space for deflation
286 
287  ColorSpinorField &r = rp ? *rp : *p[0];
288  ColorSpinorField &rSloppy = r_sloppy ? *r_sloppy : *p[0];
289  ColorSpinorField &y = *yp;
290  ColorSpinorField &ySloppy = *y_sloppy;
292 
293  double b2 = blas::norm2(b); // norm sq of source
294  double r2; // norm sq of residual
295 
296  // compute initial residual depending on whether we have an initial guess or not
298  // Compute r = b - A * x
299  mat(r, x, y);
300  r2 = blas::xmyNorm(b, r);
301  // x contains the original guess.
302  } else {
303  blas::copy(r, b);
304  r2 = b2;
305  blas::zero(x);
306  }
307 
308  if (param.deflate == true) {
309  std::vector<ColorSpinorField *> rhs;
310  // Use residual from supplied guess r, or original
311  // rhs b. use `defl_tmp2` as a temp.
312  blas::copy(*defl_tmp2[0], r);
313  rhs.push_back(defl_tmp2[0]);
314 
315  // Deflate: Hardcoded to SVD
317 
318  // Compute r_defl = RHS - A * LHS
319  mat(r, *defl_tmp1[0]);
320  r2 = blas::xmyNorm(*rhs[0], r);
321 
322  // defl_tmp must be added to the solution at the end
323  blas::axpy(1.0, *defl_tmp1[0], x);
324  }
325 
326  blas::zero(y); // FIXME optimize first updates of y and ySloppy
327  if (&y != &ySloppy) blas::zero(ySloppy);
328 
329  // Check to see that we're not trying to invert on a zero-field source
330  if (b2 == 0) {
332  profile.TPSTOP(QUDA_PROFILE_INIT);
333  warningQuda("inverting on zero-field source\n");
334  x = b;
335  param.true_res = 0.0;
336  param.true_res_hq = 0.0;
337  return;
338  } else {
339  b2 = r2;
340  }
341  }
342 
343  double stop = stopping(param.tol, b2, param.residual_type); // stopping condition of solver
344 
345  const bool use_heavy_quark_res =
346  (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL) ? true : false;
347 
348  // this parameter determines how many consective reliable update
349  // reisudal increases we tolerate before terminating the solver,
350  // i.e., how long do we want to keep trying to converge
351  const int maxResIncrease = param.max_res_increase; // check if we reached the limit of our tolerance
352  const int maxResIncreaseTotal = param.max_res_increase_total;
353 
354  double heavy_quark_res = 0.0; // heavy quark residual
355  if(use_heavy_quark_res) heavy_quark_res = sqrt(blas::HeavyQuarkResidualNorm(x,r).z);
356 
357  int resIncrease = 0;
358  int resIncreaseTotal = 0;
359 
360  profile.TPSTOP(QUDA_PROFILE_INIT);
362 
363  blas::flops = 0;
364 
365  blas::copy(rSloppy, r);
366 
367  int total_iter = 0;
368  int restart = 0;
369  double r2_old = r2;
370  bool l2_converge = false;
371 
372  int pipeline = param.pipeline;
373  // Vectorized dot product only has limited support so work around
374  if (Ap[0]->Location() == QUDA_CPU_FIELD_LOCATION || pipeline == 0) pipeline = 1;
375  if (pipeline > nKrylov) pipeline = nKrylov;
376 
378  profile.TPSTART(QUDA_PROFILE_COMPUTE);
379 
380  int k = 0;
381  int k_break = 0;
382 
383  PrintStats("GCR", total_iter+k, r2, b2, heavy_quark_res);
384  while ( !convergence(r2, heavy_quark_res, stop, param.tol_hq) && total_iter < param.maxiter) {
385 
386  if (K) {
388  (*K)(*p[k], rSloppy);
389  popVerbosity();
390 
391  // relaxation p = omega*p + (1-omega)*r
392  //if (param.omega!=1.0) blas::axpby((1.0-param.omega), rPre, param.omega, pPre);
393  } else {
394  // no preconditioner
395  }
396 
397  matSloppy(*Ap[k], *p[k], tmp);
398 
400  printfQuda("GCR debug iter=%d: Ap2=%e, p2=%e, r2=%e\n",
401  total_iter, blas::norm2(*Ap[k]), blas::norm2(*p[k]), blas::norm2(rSloppy));
402 
403  orthoDir(beta, Ap, k, pipeline);
404 
405  double3 Apr = blas::cDotProductNormA(*Ap[k], K ? rSloppy : *p[k]);
406 
408  printfQuda("GCR debug iter=%d: Apr=(%e,%e,%e)\n", total_iter, Apr.x, Apr.y, Apr.z);
409  for (int i=0; i<k; i++)
410  for (int j=0; j<=k; j++)
411  printfQuda("GCR debug iter=%d: beta[%d][%d] = (%e,%e)\n",
412  total_iter, i, j, real(beta[i][j]), imag(beta[i][j]));
413  }
414 
415  gamma[k] = sqrt(Apr.z); // gamma[k] = Ap[k]
416  if (gamma[k] == 0.0) errorQuda("GCR breakdown\n");
417  alpha[k] = Complex(Apr.x, Apr.y) / gamma[k]; // alpha = (1/|Ap|) * (Ap, r)
418 
419  // r -= (1/|Ap|^2) * (Ap, r) r, Ap *= 1/|Ap|
420  r2 = blas::cabxpyzAxNorm(1.0 / gamma[k], -alpha[k], *Ap[k], K ? rSloppy : *p[k], K ? rSloppy : *p[k + 1]);
421 
422  k++;
423  total_iter++;
424 
425  PrintStats("GCR", total_iter, r2, b2, heavy_quark_res);
426 
427  // update since nKrylov or maxiter reached, converged or reliable update required
428  // note that the heavy quark residual will by definition only be checked every nKrylov steps
429  if (k==nKrylov || total_iter==param.maxiter || (r2 < stop && !l2_converge) || sqrt(r2/r2_old) < param.delta) {
430 
431  // update the solution vector
432  updateSolution(ySloppy, alpha, beta, gamma, k, p);
433 
434  // recalculate residual in high precision
435  blas::xpy(ySloppy, x);
436 
437  if ( (r2 < stop || total_iter==param.maxiter) && param.sloppy_converge) break;
438  mat(r, x, y);
439  r2 = blas::xmyNorm(b, r);
440 
441  if (use_heavy_quark_res) heavy_quark_res = sqrt(blas::HeavyQuarkResidualNorm(x, r).z);
442 
443  // break-out check if we have reached the limit of the precision
444  if (r2 > r2_old) {
445  resIncrease++;
446  resIncreaseTotal++;
447  warningQuda("GCR: new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)",
448  sqrt(r2), sqrt(r2_old), resIncreaseTotal);
449  if (resIncrease > maxResIncrease or resIncreaseTotal > maxResIncreaseTotal) {
450  warningQuda("GCR: solver exiting due to too many true residual norm increases");
451  break;
452  }
453  } else {
454  resIncrease = 0;
455  }
456 
457  k_break = k;
458  k = 0;
459 
460  if ( !convergence(r2, heavy_quark_res, stop, param.tol_hq) ) {
461  restart++; // restarting if residual is still too great
462 
463  PrintStats("GCR (restart)", restart, r2, b2, heavy_quark_res);
464  blas::copy(rSloppy, r);
465  blas::zero(ySloppy);
466 
467  r2_old = r2;
468 
469  // prevent ending the Krylov space prematurely if other convergence criteria not met
470  if (r2 < stop) l2_converge = true;
471  }
472 
473  r2_old = r2;
474 
475  }
476 
477  }
478 
481 
483 
484  double gflops = (blas::flops + mat.flops() + matSloppy.flops() + matPrecon.flops())*1e-9;
485  if (K) gflops += K->flops()*1e-9;
486 
487  if (k>=param.maxiter && getVerbosity() >= QUDA_SUMMARIZE)
488  warningQuda("Exceeded maximum iterations %d", param.maxiter);
489 
490  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("GCR: number of restarts = %d\n", restart);
491 
492  if (param.compute_true_res) {
493  // Calculate the true residual
494  mat(r, x, y);
495  double true_res = blas::xmyNorm(b, r);
496  param.true_res = sqrt(true_res / b2);
499  else
500  param.true_res_hq = 0.0;
501 
503  } else {
504  if (param.preserve_source == QUDA_PRESERVE_SOURCE_NO) blas::copy(b, K ? rSloppy : *p[k_break]);
505  }
506 
507  param.gflops += gflops;
508  param.iter += total_iter;
509 
510  // reset the flops counters
511  blas::flops = 0;
512  mat.flops();
513  matSloppy.flops();
514  matPrecon.flops();
515 
517  profile.TPSTART(QUDA_PROFILE_FREE);
518 
519  PrintSummary("GCR", total_iter, r2, b2, stop, param.tol_hq);
520 
521  profile.TPSTOP(QUDA_PROFILE_FREE);
522 
523  return;
524  }
525 
526 } // namespace quda
bool global_reduction
whether the solver acting as a preconditioner for another solver
Definition: invert_quda.h:243
double timeInterval(struct timeval start, struct timeval end)
QudaSchwarzType schwarz_type
Definition: invert_quda.h:217
void computeBeta(Complex **beta, std::vector< ColorSpinorField *> Ap, int i, int N, int k)
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
QudaVerbosity verbosity_precondition
Definition: invert_quda.h:239
virtual double flops() const
Definition: invert_quda.h:563
int pipeline
Definition: test_util.cpp:1634
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)
Definition: reduce_quda.cu:778
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
std::vector< ColorSpinorField * > p
sloppy residual vector
Definition: invert_quda.h:846
void end(void)
Definition: blas_quda.cu:489
#define errorQuda(...)
Definition: util_quda.h:121
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:721
SolverParam Kparam
Definition: invert_quda.h:824
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
Definition: reduce_quda.cu:764
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE) ...
Definition: solver.cpp:256
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44
Communication-avoiding GCR solver. This solver does un-preconditioned GCR, first building up a polyno...
Definition: invert_quda.h:990
static ColorSpinorField * Create(const ColorSpinorParam &param)
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
Definition: solver.cpp:223
double * gamma
Definition: invert_quda.h:833
TimeProfile & profile
Definition: invert_quda.h:464
void operator()(ColorSpinorField &out, ColorSpinorField &in)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: copy_quda.cu:355
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:75
QudaInverterType inv_type_precondition
Definition: invert_quda.h:28
QudaPreserveSource preserve_source
Definition: invert_quda.h:154
int max_res_increase_total
Definition: invert_quda.h:96
std::vector< ColorSpinorField * > defl_tmp1
Definition: invert_quda.h:547
Complex ** beta
Definition: invert_quda.h:832
ColorSpinorField * r_sloppy
sloppy solution vector
Definition: invert_quda.h:844
void fillInnerSolveParam(SolverParam &inner, const SolverParam &outer)
Complex * alpha
Definition: invert_quda.h:831
QudaGaugeParam param
Definition: pack_test.cpp:17
std::vector< ColorSpinorField * > defl_tmp2
Definition: invert_quda.h:548
void backSubs(const Complex *alpha, Complex **const beta, const double *gamma, Complex *delta, int n)
GCR(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam &param, TimeProfile &profile)
QudaComputeNullVector compute_null_vector
Definition: invert_quda.h:67
void deflateSVD(std::vector< ColorSpinorField *> vec_defl, std::vector< ColorSpinorField *> vec, std::vector< ColorSpinorField *> evecs, std::vector< Complex > evals)
Deflate vector with both left and Right singular vectors.
double Last(QudaProfileType idx)
Definition: timer.h:251
Solver * K
Definition: invert_quda.h:823
QudaResidualType residual_type
Definition: invert_quda.h:49
void updateSolution(ColorSpinorField &x, const Complex *alpha, Complex **const beta, double *gamma, int k, std::vector< ColorSpinorField *> p)
virtual ~GCR()
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
Definition: solver.cpp:206
ColorSpinorParam csParam
Definition: pack_test.cpp:24
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:35
constexpr int size
#define warningQuda(...)
Definition: util_quda.h:133
std::vector< ColorSpinorField * > Ap
Definition: invert_quda.h:847
bool is_preconditioner
verbosity to use for preconditioner
Definition: invert_quda.h:241
ColorSpinorField * yp
residual vector
Definition: invert_quda.h:841
void constructDeflationSpace(const ColorSpinorField &meta, const DiracMatrix &mat, bool svd)
Constructs the deflation space.
Definition: solver.cpp:159
double cabxpyzAxNorm(double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Definition: reduce_quda.cu:758
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
Definition: reduce_quda.cu:809
int X[4]
Definition: covdev_test.cpp:70
std::complex< double > Complex
Definition: quda_internal.h:46
EigenSolver * eig_solve
Definition: invert_quda.h:545
std::vector< Complex > evals
Definition: invert_quda.h:61
void init()
Create the CUBLAS context.
Definition: blas_cublas.cu:31
Complex caxpyDotzy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Definition: reduce_quda.cu:771
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.cu:512
double tol_precondition
Definition: invert_quda.h:199
void zero(ColorSpinorField &a)
Definition: blas_quda.cu:472
void pushVerbosity(QudaVerbosity verbosity)
Push a new verbosity onto the stack.
Definition: util_quda.cpp:83
QudaPrecision precision_precondition
Definition: invert_quda.h:151
std::vector< ColorSpinorField * > evecs
Definition: invert_quda.h:58
QudaPrecision precision
Definition: invert_quda.h:142
ColorSpinorField * y_sloppy
temporary for mat-vec
Definition: invert_quda.h:843
void orthoDir(Complex **beta, std::vector< ColorSpinorField *> Ap, int k, int pipeline)
SolverParam & param
Definition: invert_quda.h:463
Conjugate-Gradient Solver.
Definition: invert_quda.h:570
unsigned long long flops() const
Definition: dirac_quda.h:1119
#define printfQuda(...)
Definition: util_quda.h:115
unsigned long long flops
Definition: blas_quda.cu:22
ColorSpinorField * tmpp
high precision accumulator
Definition: invert_quda.h:842
void xpy(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:33
const DiracMatrix & mat
Definition: invert_quda.h:819
ColorSpinorField * rp
Definition: invert_quda.h:840
const DiracMatrix & matPrecon
Definition: invert_quda.h:821
QudaUseInitGuess use_init_guess
Definition: invert_quda.h:64
void popVerbosity()
Pop the verbosity restoring the prior one on the stack.
Definition: util_quda.cpp:94
const DiracMatrix & matSloppy
Definition: invert_quda.h:820
QudaPrecision precision_sloppy
Definition: invert_quda.h:145
bool use_sloppy_partial_accumulator
Definition: invert_quda.h:76
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE). Assumes SolverParam.true_res and SolverParam.true_res_hq has been set.
Definition: solver.cpp:270
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
QudaPrecision Precision() const
bool deflate_init
Definition: invert_quda.h:546
void updateAp(Complex **beta, std::vector< ColorSpinorField *> Ap, int begin, int size, int k)
const Dirac * Expose() const
Definition: dirac_quda.h:1135