QUDA  v1.1.0
A library for QCD on GPUs
inv_gcr_quda.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 
5 #include <complex>
6 
7 #include <quda_internal.h>
8 #include <blas_quda.h>
9 #include <dslash_quda.h>
10 #include <invert_quda.h>
11 #include <util_quda.h>
12 #include <color_spinor_field.h>
13 
14 #include <sys/time.h>
15 
16 namespace quda {
17 
18  double timeInterval(struct timeval start, struct timeval end) {
19  long ds = end.tv_sec - start.tv_sec;
20  long dus = end.tv_usec - start.tv_usec;
21  return ds + 0.000001*dus;
22  }
23 
24  // set the required parameters for the inner solver
25  void fillInnerSolveParam(SolverParam &inner, const SolverParam &outer) {
26  inner.tol = outer.tol_precondition;
27  inner.delta = 1e-20; // no reliable updates within the inner solver
28 
29  inner.precision = outer.precision_sloppy;
31 
32  // this sets a fixed iteration count if we're using the MR solver
34 
35  inner.iter = 0;
36  inner.gflops = 0;
37  inner.secs = 0;
38 
40  inner.is_preconditioner = true; // tell inner solver it is a preconditioner
41  inner.pipeline = true;
42 
43  inner.schwarz_type = outer.schwarz_type;
44  inner.global_reduction = inner.schwarz_type == QUDA_INVALID_SCHWARZ ? true : false;
45 
47 
48  inner.maxiter = outer.maxiter_precondition;
50  inner.Nkrylov = inner.maxiter / outer.precondition_cycle;
51  } else {
52  inner.Nsteps = outer.precondition_cycle;
53  }
54 
56 
58 
59  inner.compute_true_res = false;
60  inner.sloppy_converge = true;
61  }
62 
63  void computeBeta(Complex **beta, std::vector<ColorSpinorField*> Ap, int i, int N, int k) {
64  Complex *Beta = new Complex[N];
65  std::vector<ColorSpinorField*> a(N), b(1);
66  for (int j=0; j<N; j++) {
67  a[j] = Ap[i+j];
68  Beta[j] = 0;
69  }
70  b[0] = Ap[k];
71  blas::cDotProduct(Beta, a, b); // vectorized dot product
72 #if 0
73  for (int j=0; j<N; j++) {
74  printfQuda("%d/%d vectorized %e %e, regular %e %e\n", j+1, N, Beta[j].real(), Beta[j].imag(),
75  blas::cDotProduct(*a[j], *b[j]).real(), blas::cDotProduct(*a[j], *b[j]).imag());
76  }
77 #endif
78 
79  for (int j=0; j<N; j++) beta[i+j][k] = Beta[j];
80  delete [] Beta;
81  }
82 
83  void updateAp(Complex **beta, std::vector<ColorSpinorField*> Ap, int begin, int size, int k) {
84 
85  Complex *beta_ = new Complex[size];
86  for (int i=0; i<size; i++) beta_[i] = -beta[i+begin][k];
87 
88  std::vector<ColorSpinorField*> Ap_(Ap.begin() + begin, Ap.begin() + begin + size);
89  std::vector<ColorSpinorField*> Apk(Ap.begin() + k, Ap.begin() + k + 1);
90 
91  blas::caxpy(beta_, Ap_, Apk);
92 
93  delete []beta_;
94  }
95 
96  void orthoDir(Complex **beta, std::vector<ColorSpinorField*> Ap, int k, int pipeline) {
97 
98  switch (pipeline) {
99  case 0: // no kernel fusion
100  for (int i=0; i<k; i++) { // 5 (k-1) memory transactions here
101  beta[i][k] = blas::cDotProduct(*(Ap[i]), *(Ap[k]));
102  blas::caxpy(-beta[i][k], *Ap[i], *Ap[k]);
103  }
104  break;
105  case 1: // basic kernel fusion
106  if (k==0) break;
107  beta[0][k] = blas::cDotProduct(*Ap[0], *Ap[k]);
108  for (int i=0; i<k-1; i++) { // 4 (k-1) memory transactions here
109  beta[i+1][k] = blas::caxpyDotzy(-beta[i][k], *Ap[i], *Ap[k], *Ap[i+1]);
110  }
111  blas::caxpy(-beta[k-1][k], *Ap[k-1], *Ap[k]);
112  break;
113  default:
114  {
115  const int N = pipeline;
116  for (int i=0; i<k-(N-1); i+=N) {
117  computeBeta(beta, Ap, i, N, k);
118  updateAp(beta, Ap, i, N, k);
119  }
120 
121  if (k%N != 0) { // need to update the remainder
122  for (int r = N-1; r>0; r--) {
123  if ((k%N) % r == 0) { // if true this is the remainder
124  computeBeta(beta, Ap, k-r, r, k);
125  updateAp(beta, Ap, k-r, r, k);
126  break;
127  }
128  }
129  }
130  }
131  break;
132  }
133 
134  }
135 
136  void backSubs(const Complex *alpha, Complex** const beta, const double *gamma, Complex *delta, int n) {
137  for (int k=n-1; k>=0;k--) {
138  delta[k] = alpha[k];
139  for (int j=k+1;j<n; j++) {
140  delta[k] -= beta[k][j]*delta[j];
141  }
142  delta[k] /= gamma[k];
143  }
144  }
145 
146  void updateSolution(ColorSpinorField &x, const Complex *alpha, Complex **const beta, double *gamma, int k,
147  std::vector<ColorSpinorField *> p)
148  {
149  Complex *delta = new Complex[k];
150 
151  // Update the solution vector
152  backSubs(alpha, beta, gamma, delta, k);
153 
154  std::vector<ColorSpinorField*> X;
155  X.push_back(&x);
156 
157  std::vector<ColorSpinorField*> P;
158  for (int i=0; i<k; i++) P.push_back(p[i]);
159  blas::caxpy(delta, P, X);
160 
161  delete []delta;
162  }
163 
164  GCR::GCR(const DiracMatrix &mat, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon,
165  const DiracMatrix &matEig, SolverParam &param, TimeProfile &profile) :
166  Solver(mat, matSloppy, matPrecon, matEig, param, profile),
167  matMdagM(DiracMdagM(matEig.Expose())),
168  K(0),
169  Kparam(param),
170  n_krylov(param.Nkrylov),
171  init(false),
172  rp(nullptr),
173  tmpp(nullptr),
174  tmp_sloppy(nullptr),
175  r_sloppy(nullptr)
176  {
177  fillInnerSolveParam(Kparam, param);
178 
179  if (param.inv_type_precondition == QUDA_CG_INVERTER) // inner CG solver
180  K = new CG(matSloppy, matPrecon, matPrecon, matEig, Kparam, profile);
181  else if (param.inv_type_precondition == QUDA_BICGSTAB_INVERTER) // inner BiCGstab solver
182  K = new BiCGstab(matSloppy, matPrecon, matPrecon, matEig, Kparam, profile);
183  else if (param.inv_type_precondition == QUDA_MR_INVERTER) // inner MR solver
184  K = new MR(matSloppy, matPrecon, Kparam, profile);
185  else if (param.inv_type_precondition == QUDA_SD_INVERTER) // inner SD solver
186  K = new SD(matSloppy, Kparam, profile);
187  else if (param.inv_type_precondition == QUDA_CA_GCR_INVERTER) // inner CA-GCR solver
188  K = new CAGCR(matSloppy, matPrecon, matPrecon, matEig, Kparam, profile);
189  else if (param.inv_type_precondition == QUDA_INVALID_INVERTER) // unsupported
190  K = NULL;
191  else
192  errorQuda("Unsupported preconditioner %d\n", param.inv_type_precondition);
193 
194  p.resize(n_krylov + 1);
195  Ap.resize(n_krylov);
196 
197  alpha = new Complex[n_krylov];
198  beta = new Complex *[n_krylov];
199  for (int i = 0; i < n_krylov; i++) beta[i] = new Complex[n_krylov];
200  gamma = new double[n_krylov];
201  }
202 
203  GCR::GCR(const DiracMatrix &mat, Solver &K, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon,
204  const DiracMatrix &matEig, SolverParam &param, TimeProfile &profile) :
205  Solver(mat, matSloppy, matPrecon, matEig, param, profile),
206  matMdagM(matEig.Expose()),
207  K(&K),
208  Kparam(param),
209  n_krylov(param.Nkrylov),
210  init(false),
211  rp(nullptr),
212  tmpp(nullptr),
213  tmp_sloppy(nullptr),
214  r_sloppy(nullptr)
215  {
216  p.resize(n_krylov + 1);
217  Ap.resize(n_krylov);
218 
219  alpha = new Complex[n_krylov];
220  beta = new Complex *[n_krylov];
221  for (int i = 0; i < n_krylov; i++) beta[i] = new Complex[n_krylov];
222  gamma = new double[n_krylov];
223  }
224 
226  profile.TPSTART(QUDA_PROFILE_FREE);
227 
228  delete []alpha;
229  for (int i = 0; i < n_krylov; i++) delete[] beta[i];
230  delete []beta;
231  delete []gamma;
232 
233  if (K && param.inv_type_precondition != QUDA_MG_INVERTER) delete K;
234 
235  if (init && param.precision_sloppy != tmpp->Precision()) {
236  if (r_sloppy && r_sloppy != rp) delete r_sloppy;
237  }
238 
239  for (int i = 0; i < n_krylov + 1; i++)
240  if (p[i]) delete p[i];
241  for (int i = 0; i < n_krylov; i++)
242  if (Ap[i]) delete Ap[i];
243 
244  if (tmp_sloppy != tmpp) delete tmp_sloppy;
245  if (tmpp) delete tmpp;
246  if (rp) delete rp;
247 
249 
250  profile.TPSTOP(QUDA_PROFILE_FREE);
251  }
252 
254  {
255  if (n_krylov == 0) {
256  // Krylov space is zero-dimensional so return doing no work
258  return;
259  }
260 
261  profile.TPSTART(QUDA_PROFILE_INIT);
262 
263  if (!init) {
266 
267  rp = (K || x.Precision() != param.precision_sloppy) ? ColorSpinorField::Create(csParam) : nullptr;
268 
269  // high precision temporary
271 
272  // create sloppy fields used for orthogonalization
273  csParam.setPrecision(param.precision_sloppy);
274  for (int i = 0; i < n_krylov + 1; i++) p[i] = ColorSpinorField::Create(csParam);
275  for (int i = 0; i < n_krylov; i++) Ap[i] = ColorSpinorField::Create(csParam);
276 
277  csParam.setPrecision(param.precision_sloppy);
278  if (param.precision_sloppy != x.Precision()) {
279  tmp_sloppy = tmpp->CreateAlias(csParam);
280  } else {
281  tmp_sloppy = tmpp;
282  }
283 
284  if (param.precision_sloppy != x.Precision()) {
285  r_sloppy = K ? ColorSpinorField::Create(csParam) : nullptr;
286  } else {
287  r_sloppy = K ? rp : nullptr;
288  }
289 
290  init = true;
291  }
292 
293  if (param.deflate) {
294  // Construct the eigensolver and deflation space if requested.
296  constructDeflationSpace(b, matMdagM);
297  } else {
298  // Use Arnoldi to inspect the space only and turn off deflation
300  param.deflate = false;
301  }
302  if (deflate_compute) {
303  // compute the deflation space.
304  profile.TPSTOP(QUDA_PROFILE_INIT);
305  (*eig_solve)(evecs, evals);
306  if (param.deflate) {
307  // double the size of the Krylov space
309  // populate extra memory with L/R singular vectors
310  eig_solve->computeSVD(matMdagM, evecs, evals);
311  }
312  profile.TPSTART(QUDA_PROFILE_INIT);
313  deflate_compute = false;
314  }
315  if (recompute_evals) {
316  eig_solve->computeEvals(matMdagM, evecs, evals);
317  eig_solve->computeSVD(matMdagM, evecs, evals);
318  recompute_evals = false;
319  }
320  }
321 
322  ColorSpinorField &r = rp ? *rp : *p[0];
323  ColorSpinorField &rSloppy = r_sloppy ? *r_sloppy : *p[0];
324  ColorSpinorField &tmp = *tmpp;
325  ColorSpinorField &tmpSloppy = *tmp_sloppy;
326 
327  double b2 = blas::norm2(b); // norm sq of source
328  double r2; // norm sq of residual
329 
330  // compute initial residual depending on whether we have an initial guess or not
332  // Compute r = b - A * x
333  mat(r, x, tmp);
334  r2 = blas::xmyNorm(b, r);
335  // x contains the original guess.
336  } else {
337  blas::copy(r, b);
338  r2 = b2;
339  blas::zero(x);
340  }
341 
342  if (param.deflate && param.maxiter > 1) {
343  // Deflate: Hardcoded to SVD. If maxiter == 1, this is a dummy solve
344  eig_solve->deflateSVD(x, r, evecs, evals, true);
345 
346  // Compute r_defl = RHS - A * LHS
347  mat(r, x, tmp);
348  r2 = blas::xmyNorm(b, r);
349  }
350 
351  // Check to see that we're not trying to invert on a zero-field source
352  if (b2 == 0) {
354  profile.TPSTOP(QUDA_PROFILE_INIT);
355  warningQuda("inverting on zero-field source\n");
356  x = b;
357  param.true_res = 0.0;
358  param.true_res_hq = 0.0;
359  return;
360  } else {
361  b2 = r2;
362  }
363  }
364 
365  double stop = stopping(param.tol, b2, param.residual_type); // stopping condition of solver
366 
367  const bool use_heavy_quark_res =
368  (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL) ? true : false;
369 
370  // this parameter determines how many consective reliable update
371  // reisudal increases we tolerate before terminating the solver,
372  // i.e., how long do we want to keep trying to converge
373  const int maxResIncrease = param.max_res_increase; // check if we reached the limit of our tolerance
374  const int maxResIncreaseTotal = param.max_res_increase_total;
375 
376  double heavy_quark_res = 0.0; // heavy quark residual
377  if(use_heavy_quark_res) heavy_quark_res = sqrt(blas::HeavyQuarkResidualNorm(x,r).z);
378 
379  int resIncrease = 0;
380  int resIncreaseTotal = 0;
381 
382  profile.TPSTOP(QUDA_PROFILE_INIT);
384 
385  blas::flops = 0;
386 
387  blas::copy(rSloppy, r);
388 
389  int total_iter = 0;
390  int restart = 0;
391  double r2_old = r2;
392  double maxr_deflate = sqrt(r2);
393  bool l2_converge = false;
394 
395  int pipeline = param.pipeline;
396  // Vectorized dot product only has limited support so work around
397  if (Ap[0]->Location() == QUDA_CPU_FIELD_LOCATION || pipeline == 0) pipeline = 1;
398  if (pipeline > n_krylov) pipeline = n_krylov;
399 
401  profile.TPSTART(QUDA_PROFILE_COMPUTE);
402 
403  int k = 0;
404  int k_break = 0;
405 
406  PrintStats("GCR", total_iter+k, r2, b2, heavy_quark_res);
407  while ( !convergence(r2, heavy_quark_res, stop, param.tol_hq) && total_iter < param.maxiter) {
408 
409  if (K) {
411  (*K)(*p[k], rSloppy);
412  popVerbosity();
413  // relaxation p = omega*p + (1-omega)*r
414  //if (param.omega!=1.0) blas::axpby((1.0-param.omega), rPre, param.omega, pPre);
415  }
416 
417  matSloppy(*Ap[k], *p[k], tmpSloppy);
418 
420  printfQuda("GCR debug iter=%d: Ap2=%e, p2=%e, r2=%e\n",
421  total_iter, blas::norm2(*Ap[k]), blas::norm2(*p[k]), blas::norm2(rSloppy));
422 
423  orthoDir(beta, Ap, k, pipeline);
424 
425  double3 Apr = blas::cDotProductNormA(*Ap[k], K ? rSloppy : *p[k]);
426 
428  printfQuda("GCR debug iter=%d: Apr=(%e,%e,%e)\n", total_iter, Apr.x, Apr.y, Apr.z);
429  for (int i=0; i<k; i++)
430  for (int j=0; j<=k; j++)
431  printfQuda("GCR debug iter=%d: beta[%d][%d] = (%e,%e)\n",
432  total_iter, i, j, real(beta[i][j]), imag(beta[i][j]));
433  }
434 
435  gamma[k] = sqrt(Apr.z); // gamma[k] = Ap[k]
436  if (gamma[k] == 0.0) errorQuda("GCR breakdown\n");
437  alpha[k] = Complex(Apr.x, Apr.y) / gamma[k]; // alpha = (1/|Ap|) * (Ap, r)
438 
439  // r -= (1/|Ap|^2) * (Ap, r) r, Ap *= 1/|Ap|
440  r2 = blas::cabxpyzAxNorm(1.0 / gamma[k], -alpha[k], *Ap[k], K ? rSloppy : *p[k], K ? rSloppy : *p[k + 1]);
441 
442  k++;
443  total_iter++;
444 
445  PrintStats("GCR", total_iter, r2, b2, heavy_quark_res);
446 
447  // update since n_krylov or maxiter reached, converged or reliable update required
448  // note that the heavy quark residual will by definition only be checked every n_krylov steps
449  if (k == n_krylov || total_iter == param.maxiter || (r2 < stop && !l2_converge) || sqrt(r2 / r2_old) < param.delta) {
450 
451  // update the solution vector
452  updateSolution(x, alpha, beta, gamma, k, p);
453 
454  if ( (r2 < stop || total_iter==param.maxiter) && param.sloppy_converge) break;
455  mat(r, x, tmp);
456  r2 = blas::xmyNorm(b, r);
457 
458  if (param.deflate && sqrt(r2) < maxr_deflate * param.tol_restart) {
459  // Deflate: Hardcoded to SVD.
460  eig_solve->deflateSVD(x, r, evecs, evals, true);
461 
462  // Compute r_defl = RHS - A * LHS
463  mat(r, x, tmp);
464  r2 = blas::xmyNorm(b, r);
465 
466  maxr_deflate = sqrt(r2);
467  }
468 
469  if (use_heavy_quark_res) heavy_quark_res = sqrt(blas::HeavyQuarkResidualNorm(x, r).z);
470 
471  // break-out check if we have reached the limit of the precision
472  if (r2 > r2_old) {
473  resIncrease++;
474  resIncreaseTotal++;
475  warningQuda("GCR: new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)",
476  sqrt(r2), sqrt(r2_old), resIncreaseTotal);
477  if (resIncrease > maxResIncrease or resIncreaseTotal > maxResIncreaseTotal) {
478  warningQuda("GCR: solver exiting due to too many true residual norm increases");
479  break;
480  }
481  } else {
482  resIncrease = 0;
483  }
484 
485  k_break = k;
486  k = 0;
487 
488  if ( !convergence(r2, heavy_quark_res, stop, param.tol_hq) ) {
489  restart++; // restarting if residual is still too great
490 
491  PrintStats("GCR (restart)", restart, r2, b2, heavy_quark_res);
492  blas::copy(rSloppy, r);
493 
494  r2_old = r2;
495 
496  // prevent ending the Krylov space prematurely if other convergence criteria not met
497  if (r2 < stop) l2_converge = true;
498  }
499 
500  r2_old = r2;
501  }
502  }
503 
506 
508 
509  double gflops = (blas::flops + mat.flops() + matSloppy.flops() + matPrecon.flops() + matMdagM.flops()) * 1e-9;
510  if (K) gflops += K->flops()*1e-9;
511 
512  if (k>=param.maxiter && getVerbosity() >= QUDA_SUMMARIZE)
513  warningQuda("Exceeded maximum iterations %d", param.maxiter);
514 
515  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("GCR: number of restarts = %d\n", restart);
516 
517  if (param.compute_true_res) {
518  // Calculate the true residual
519  mat(r, x, tmp);
520  double true_res = blas::xmyNorm(b, r);
521  param.true_res = sqrt(true_res / b2);
524  else
525  param.true_res_hq = 0.0;
526 
528  } else {
529  if (param.preserve_source == QUDA_PRESERVE_SOURCE_NO) blas::copy(b, K ? rSloppy : *p[k_break]);
530  }
531 
532  param.gflops += gflops;
533  param.iter += total_iter;
534 
535  // reset the flops counters
536  blas::flops = 0;
537  mat.flops();
538  matSloppy.flops();
539  matPrecon.flops();
540  matMdagM.flops();
541 
543  profile.TPSTART(QUDA_PROFILE_FREE);
544 
545  PrintSummary("GCR", total_iter, r2, b2, stop, param.tol_hq);
546 
547  profile.TPSTOP(QUDA_PROFILE_FREE);
548 
549  return;
550  }
551 
552 } // namespace quda
Communication-avoiding GCR solver. This solver does un-preconditioned GCR, first building up a polyno...
Definition: invert_quda.h:1099
Conjugate-Gradient Solver.
Definition: invert_quda.h:639
ColorSpinorField * CreateAlias(const ColorSpinorParam &param)
Create a field that aliases this field's storage. The alias field can use a different precision than ...
static ColorSpinorField * Create(const ColorSpinorParam &param)
unsigned long long flops() const
Definition: dirac_quda.h:1909
void deflateSVD(std::vector< ColorSpinorField * > &sol, const std::vector< ColorSpinorField * > &vec, const std::vector< ColorSpinorField * > &evecs, const std::vector< Complex > &evals, bool accumulate=false) const
Deflate a set of source vectors with a set of left and right singular vectors.
void computeEvals(const DiracMatrix &mat, std::vector< ColorSpinorField * > &evecs, std::vector< Complex > &evals, int size)
Compute eigenvalues and their residiua.
void computeSVD(const DiracMatrix &mat, std::vector< ColorSpinorField * > &evecs, std::vector< Complex > &evals)
Computes Left/Right SVD from pre computed Right/Left.
GCR(const DiracMatrix &mat, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon, const DiracMatrix &matEig, SolverParam &param, TimeProfile &profile)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
virtual ~GCR()
QudaPrecision Precision() const
bool deflate_compute
Definition: invert_quda.h:475
TimeProfile & profile
Definition: invert_quda.h:471
const DiracMatrix & mat
Definition: invert_quda.h:465
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
Definition: solver.cpp:328
bool recompute_evals
Definition: invert_quda.h:476
std::vector< ColorSpinorField * > evecs
Definition: invert_quda.h:477
virtual double flops() const
Return flops.
Definition: invert_quda.h:633
void destroyDeflationSpace()
Destroy the allocated deflation space.
Definition: solver.cpp:229
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE)....
Definition: solver.cpp:386
const DiracMatrix & matEig
Definition: invert_quda.h:468
SolverParam & param
Definition: invert_quda.h:470
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
Definition: solver.cpp:311
void extendSVDDeflationSpace()
Extends the deflation space to twice its size for SVD deflation.
Definition: solver.cpp:287
std::vector< Complex > evals
Definition: invert_quda.h:478
EigenSolver * eig_solve
Definition: invert_quda.h:473
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE)
Definition: solver.cpp:373
void constructDeflationSpace(const ColorSpinorField &meta, const DiracMatrix &mat)
Constructs the deflation space and eigensolver.
Definition: solver.cpp:168
const DiracMatrix & matPrecon
Definition: invert_quda.h:467
const DiracMatrix & matSloppy
Definition: invert_quda.h:466
double Last(QudaProfileType idx)
Definition: timer.h:254
int pipeline
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
void end(void)
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:34
@ QUDA_CPU_FIELD_LOCATION
Definition: enum_quda.h:325
@ QUDA_USE_INIT_GUESS_NO
Definition: enum_quda.h:429
@ QUDA_USE_INIT_GUESS_YES
Definition: enum_quda.h:430
@ QUDA_DEBUG_VERBOSE
Definition: enum_quda.h:268
@ QUDA_SUMMARIZE
Definition: enum_quda.h:266
@ QUDA_VERBOSE
Definition: enum_quda.h:267
@ QUDA_HEAVY_QUARK_RESIDUAL
Definition: enum_quda.h:195
@ QUDA_INVALID_RESIDUAL
Definition: enum_quda.h:196
@ QUDA_L2_RELATIVE_RESIDUAL
Definition: enum_quda.h:193
@ QUDA_EIG_BLK_TR_LANCZOS
Definition: enum_quda.h:138
@ QUDA_EIG_TR_LANCZOS
Definition: enum_quda.h:137
@ QUDA_MR_INVERTER
Definition: enum_quda.h:110
@ QUDA_CA_GCR_INVERTER
Definition: enum_quda.h:132
@ QUDA_SD_INVERTER
Definition: enum_quda.h:112
@ QUDA_CG_INVERTER
Definition: enum_quda.h:107
@ QUDA_INVALID_INVERTER
Definition: enum_quda.h:133
@ QUDA_MG_INVERTER
Definition: enum_quda.h:122
@ QUDA_BICGSTAB_INVERTER
Definition: enum_quda.h:108
@ QUDA_PRESERVE_SOURCE_NO
Definition: enum_quda.h:238
@ QUDA_PRESERVE_SOURCE_YES
Definition: enum_quda.h:239
@ QUDA_INVALID_SCHWARZ
Definition: enum_quda.h:189
@ QUDA_NULL_FIELD_CREATE
Definition: enum_quda.h:360
@ QUDA_COMPUTE_NULL_VECTOR_NO
Definition: enum_quda.h:441
void init()
Create the BLAS context.
Complex caxpyDotzy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:79
unsigned long long flops
double cabxpyzAxNorm(double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
void zero(ColorSpinorField &a)
double norm2(const ColorSpinorField &a)
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: blas_quda.h:24
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
void stop()
Stop profiling.
Definition: device.cpp:228
void start()
Start profiling.
Definition: device.cpp:226
void updateAp(Complex **beta, std::vector< ColorSpinorField * > Ap, int begin, int size, int k)
void updateSolution(ColorSpinorField &x, const Complex *alpha, Complex **const beta, double *gamma, int k, std::vector< ColorSpinorField * > p)
void orthoDir(Complex **beta, std::vector< ColorSpinorField * > Ap, int k, int pipeline)
std::complex< double > Complex
Definition: quda_internal.h:86
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
void computeBeta(Complex **beta, std::vector< ColorSpinorField * > Ap, int i, int N, int k)
@ QUDA_PROFILE_INIT
Definition: timer.h:106
@ QUDA_PROFILE_EPILOGUE
Definition: timer.h:110
@ QUDA_PROFILE_COMPUTE
Definition: timer.h:108
@ QUDA_PROFILE_FREE
Definition: timer.h:111
@ QUDA_PROFILE_PREAMBLE
Definition: timer.h:107
double timeInterval(struct timeval start, struct timeval end)
void backSubs(const Complex *alpha, Complex **const beta, const double *gamma, Complex *delta, int n)
void fillInnerSolveParam(SolverParam &inner, const SolverParam &outer)
ColorSpinorParam csParam
Definition: pack_test.cpp:25
QudaGaugeParam param
Definition: pack_test.cpp:18
QudaEigType eig_type
Definition: quda.h:416
QudaPreserveSource preserve_source
Definition: invert_quda.h:151
QudaPrecision precision
Definition: invert_quda.h:136
QudaComputeNullVector compute_null_vector
Definition: invert_quda.h:61
bool is_preconditioner
verbosity to use for preconditioner
Definition: invert_quda.h:238
QudaSchwarzType schwarz_type
Definition: invert_quda.h:214
double tol_precondition
Definition: invert_quda.h:196
int max_res_increase_total
Definition: invert_quda.h:90
QudaResidualType residual_type
Definition: invert_quda.h:49
QudaPrecision precision_precondition
Definition: invert_quda.h:145
QudaPrecision precision_sloppy
Definition: invert_quda.h:139
QudaVerbosity verbosity_precondition
Definition: invert_quda.h:236
QudaEigParam eig_param
Definition: invert_quda.h:55
QudaUseInitGuess use_init_guess
Definition: invert_quda.h:58
QudaInverterType inv_type_precondition
Definition: invert_quda.h:28
bool global_reduction
whether the solver acting as a preconditioner for another solver
Definition: invert_quda.h:240
void pushVerbosity(QudaVerbosity verbosity)
Push a new verbosity onto the stack.
Definition: util_quda.cpp:83
#define printfQuda(...)
Definition: util_quda.h:114
void popVerbosity()
Pop the verbosity restoring the prior one on the stack.
Definition: util_quda.cpp:94
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define warningQuda(...)
Definition: util_quda.h:132
#define errorQuda(...)
Definition: util_quda.h:120