QUDA  0.9.0
inv_gcr_quda.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 
5 #include <complex>
6 
7 #include <quda_internal.h>
8 #include <blas_quda.h>
9 #include <dslash_quda.h>
10 #include <invert_quda.h>
11 #include <util_quda.h>
12 #include <color_spinor_field.h>
13 
14 #include <sys/time.h>
15 
16 namespace quda {
17 
18  double timeInterval(struct timeval start, struct timeval end) {
19  long ds = end.tv_sec - start.tv_sec;
20  long dus = end.tv_usec - start.tv_usec;
21  return ds + 0.000001*dus;
22  }
23 
24  // set the required parameters for the inner solver
25  void fillInnerSolveParam(SolverParam &inner, const SolverParam &outer) {
26  inner.tol = outer.tol_precondition;
27  inner.maxiter = outer.maxiter_precondition;
28  inner.delta = 1e-20; // no reliable updates within the inner solver
29 
30  inner.precision = outer.precision_precondition; // preconditioners are uni-precision solvers
32 
33  inner.iter = 0;
34  inner.gflops = 0;
35  inner.secs = 0;
36 
38  inner.is_preconditioner = true; // tell inner solver it is a preconditioner
39 
40  inner.global_reduction = false;
41 
43 
44  if (outer.inv_type == QUDA_GCR_INVERTER && outer.precision_sloppy != outer.precision_precondition)
47 
48  }
49 
50  void computeBeta(Complex **beta, std::vector<ColorSpinorField*> Ap, int i, int N, int k) {
51  Complex *Beta = new Complex[N];
52  std::vector<ColorSpinorField*> a(N), b(1);
53  for (int j=0; j<N; j++) {
54  a[j] = Ap[i+j];
55  Beta[j] = 0;
56  }
57  b[0] = Ap[k];
58  blas::cDotProduct(Beta, a, b); // vectorized dot product
59 #if 0
60  for (int j=0; j<N; j++) {
61  printfQuda("%d/%d vectorized %e %e, regular %e %e\n", j+1, N, Beta[j].real(), Beta[j].imag(),
62  blas::cDotProduct(*a[j], *b[j]).real(), blas::cDotProduct(*a[j], *b[j]).imag());
63  }
64 #endif
65 
66  for (int j=0; j<N; j++) beta[i+j][k] = Beta[j];
67  delete [] Beta;
68  }
69 
70  void updateAp(Complex **beta, std::vector<ColorSpinorField*> Ap, int begin, int size, int k) {
71 
72  Complex *beta_ = new Complex[size];
73  for (int i=0; i<size; i++) beta_[i] = -beta[i+begin][k];
74 
75  std::vector<ColorSpinorField*> Ap_(Ap.begin() + begin, Ap.begin() + begin + size);
76  std::vector<ColorSpinorField*> Apk(Ap.begin() + k, Ap.begin() + k + 1);
77 
78  blas::caxpy(beta_, Ap_, Apk);
79 
80  delete []beta_;
81  }
82 
83  void orthoDir(Complex **beta, std::vector<ColorSpinorField*> Ap, int k, int pipeline) {
84 
85  switch (pipeline) {
86  case 0: // no kernel fusion
87  for (int i=0; i<k; i++) { // 5 (k-1) memory transactions here
88  beta[i][k] = blas::cDotProduct(*(Ap[i]), *(Ap[k]));
89  blas::caxpy(-beta[i][k], *Ap[i], *Ap[k]);
90  }
91  break;
92  case 1: // basic kernel fusion
93  if (k==0) break;
94  beta[0][k] = blas::cDotProduct(*Ap[0], *Ap[k]);
95  for (int i=0; i<k-1; i++) { // 4 (k-1) memory transactions here
96  beta[i+1][k] = blas::caxpyDotzy(-beta[i][k], *Ap[i], *Ap[k], *Ap[i+1]);
97  }
98  blas::caxpy(-beta[k-1][k], *Ap[k-1], *Ap[k]);
99  break;
100  case 2: // two-way pipelining
101  case 3: // three-way pipelining
102  case 4: // four-way pipelining
103  case 5: // five-way pipelining
104  case 6: // six-way pipelining
105  case 7: // seven-way pipelining
106  case 8: // eight-way pipelining
107  {
108  const int N = pipeline;
109  for (int i=0; i<k-(N-1); i+=N) {
110  computeBeta(beta, Ap, i, N, k);
111  updateAp(beta, Ap, i, N, k);
112  }
113 
114  if (k%N != 0) { // need to update the remainder
115  for (int r = N-1; r>0; r--) {
116  if ((k%N) % r == 0) { // if true this is the remainder
117  computeBeta(beta, Ap, k-r, r, k);
118  updateAp(beta, Ap, k-r, r, k);
119  break;
120  }
121  }
122  }
123  }
124  break;
125  default:
126  errorQuda("Pipeline length %d type not defined", pipeline);
127  }
128 
129  }
130 
131  void backSubs(const Complex *alpha, Complex** const beta, const double *gamma, Complex *delta, int n) {
132  for (int k=n-1; k>=0;k--) {
133  delta[k] = alpha[k];
134  for (int j=k+1;j<n; j++) {
135  delta[k] -= beta[k][j]*delta[j];
136  }
137  delta[k] /= gamma[k];
138  }
139  }
140 
141  void updateSolution(ColorSpinorField &x, const Complex *alpha, Complex** const beta,
142  double *gamma, int k, std::vector<ColorSpinorField*> p) {
143 
144  Complex *delta = new Complex[k];
145 
146  // Update the solution vector
147  backSubs(alpha, beta, gamma, delta, k);
148 
149  std::vector<ColorSpinorField*> X;
150  X.push_back(&x);
151 
152  std::vector<ColorSpinorField*> P;
153  for (int i=0; i<k; i++) P.push_back(p[i]);
154  blas::caxpy(delta, P, X);
155 
156  delete []delta;
157  }
158 
160  TimeProfile &profile) :
161  Solver(param, profile), mat(mat), matSloppy(matSloppy), matPrecon(matPrecon), K(0), Kparam(param),
162  nKrylov(param.Nkrylov), init(false), rp(nullptr), yp(nullptr), tmpp(nullptr), x_sloppy(nullptr),
163  r_sloppy(nullptr), r_pre(nullptr), p_pre(nullptr), rM(nullptr)
164  {
165 
167 
168  if (param.inv_type_precondition == QUDA_CG_INVERTER) // inner CG preconditioner
169  K = new CG(matPrecon, matPrecon, Kparam, profile);
170  else if (param.inv_type_precondition == QUDA_BICGSTAB_INVERTER) // inner BiCGstab preconditioner
172  else if (param.inv_type_precondition == QUDA_MR_INVERTER) // inner MR preconditioner
173  K = new MR(matPrecon, matPrecon, Kparam, profile);
174  else if (param.inv_type_precondition == QUDA_SD_INVERTER) // inner MR preconditioner
175  K = new SD(matPrecon, Kparam, profile);
176  else if (param.inv_type_precondition == QUDA_INVALID_INVERTER) // unknown preconditioner
177  K = NULL;
178  else
179  errorQuda("Unsupported preconditioner %d\n", param.inv_type_precondition);
180 
181  p.resize(nKrylov);
182  Ap.resize(nKrylov);
183 
184  alpha = new Complex[nKrylov];
185  beta = new Complex*[nKrylov];
186  for (int i=0; i<nKrylov; i++) beta[i] = new Complex[nKrylov];
187  gamma = new double[nKrylov];
188  }
189 
190  GCR::GCR(DiracMatrix &mat, Solver &K, DiracMatrix &matSloppy, DiracMatrix &matPrecon,
191  SolverParam &param, TimeProfile &profile) :
192  Solver(param, profile), mat(mat), matSloppy(matSloppy), matPrecon(matPrecon), K(&K), Kparam(param),
193  nKrylov(param.Nkrylov), init(false), rp(nullptr), yp(nullptr), tmpp(nullptr), x_sloppy(nullptr),
194  r_sloppy(nullptr), r_pre(nullptr), p_pre(nullptr), rM(nullptr)
195  {
196  p.resize(nKrylov);
197  Ap.resize(nKrylov);
198 
199  alpha = new Complex[nKrylov];
200  beta = new Complex*[nKrylov];
201  for (int i=0; i<nKrylov; i++) beta[i] = new Complex[nKrylov];
202  gamma = new double[nKrylov];
203  }
204 
206  profile.TPSTART(QUDA_PROFILE_FREE);
207  delete []alpha;
208  for (int i=0; i<nKrylov; i++) delete []beta[i];
209  delete []beta;
210  delete []gamma;
211 
212  if (K && param.inv_type_precondition != QUDA_MG_INVERTER) delete K;
213 
214  if (param.precondition_cycle > 1) delete rM;
215 
217  if (x_sloppy) delete x_sloppy;
218  if (r_sloppy) delete r_sloppy;
219  }
220 
222  if (p_pre) delete p_pre;
223  if (r_pre) delete r_pre;
224  }
225 
226  for (int i=0; i<nKrylov; i++) {
227  if (p[i]) delete p[i];
228  if (Ap[i]) delete Ap[i];
229  }
230 
231  if (tmpp) delete tmpp;
232  if (rp) delete rp;
233  if (yp) delete yp;
234  profile.TPSTOP(QUDA_PROFILE_FREE);
235  }
236 
238  {
239  profile.TPSTART(QUDA_PROFILE_INIT);
240 
241  if (!init) {
245 
246  // high precision accumulator
248 
249  // create sloppy fields used for orthogonalization
250  csParam.setPrecision(param.precision_sloppy);
251  for (int i=0; i<nKrylov; i++) {
254  }
255 
256  tmpp = ColorSpinorField::Create(csParam); //temporary for sloppy mat-vec
257 
259  csParam.setPrecision(param.precision_sloppy);
262  } else {
263  x_sloppy = &x;
264  r_sloppy = rp;
265  }
266 
267  // these low precision fields are used by the inner solver
269  csParam.setPrecision(param.precision_precondition);
272  } else {
273  p_pre = NULL;
274  r_pre = r_sloppy;
275  }
276 
277  if (param.precondition_cycle > 1) {
278  ColorSpinorParam rParam(*r_sloppy);
279  rM = ColorSpinorField::Create(rParam);
280  }
281  init = true;
282  }
283 
284  ColorSpinorField &r = *rp;
285  ColorSpinorField &y = *yp;
286  ColorSpinorField &xSloppy = *x_sloppy;
287  ColorSpinorField &rSloppy = *r_sloppy;
288  ColorSpinorField &rPre = *r_pre;
290  blas::zero(y);
291 
292  bool precMatch = (param.precision_precondition != param.precision_sloppy || param.precondition_cycle > 1) ? false : true;
293 
294  // compute parity of the node
295  int parity = 0;
296  for (int i=0; i<4; i++) parity += commCoords(i);
297  parity = parity % 2;
298 
299  double b2 = blas::norm2(b); // norm sq of source
300  double r2; // norm sq of residual
301 
302  // compute initial residual depending on whether we have an initial guess or not
304  mat(r, x, y);
305  r2 = blas::xmyNorm(b, r);
306  blas::copy(y, x);
307  if (&x == &xSloppy) blas::zero(x); // need to zero x when doing uni-precision solver
308  } else {
309  blas::copy(r, b);
310  r2 = b2;
311  blas::zero(x); // defensive measure in case solution isn't already zero
312  if (&x != &xSloppy) blas::zero(xSloppy);
313  }
314 
315  // Check to see that we're not trying to invert on a zero-field source
316  if (b2 == 0) {
318  profile.TPSTOP(QUDA_PROFILE_INIT);
319  warningQuda("inverting on zero-field source\n");
320  x = b;
321  param.true_res = 0.0;
322  param.true_res_hq = 0.0;
323  return;
324  } else {
325  b2 = r2;
326  }
327  }
328 
329  double stop = stopping(param.tol, b2, param.residual_type); // stopping condition of solver
330 
331  const bool use_heavy_quark_res =
332  (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL) ? true : false;
333 
334  // this parameter determines how many consective reliable update
335  // reisudal increases we tolerate before terminating the solver,
336  // i.e., how long do we want to keep trying to converge
337  const int maxResIncrease = param.max_res_increase; // check if we reached the limit of our tolerance
338  const int maxResIncreaseTotal = param.max_res_increase_total;
339 
340  double heavy_quark_res = 0.0; // heavy quark residual
341  if(use_heavy_quark_res) heavy_quark_res = sqrt(blas::HeavyQuarkResidualNorm(x,r).z);
342 
343  int resIncrease = 0;
344  int resIncreaseTotal = 0;
345 
346  profile.TPSTOP(QUDA_PROFILE_INIT);
348 
349  blas::flops = 0;
350 
351  blas::copy(rSloppy, r);
352 
353  int total_iter = 0;
354  int restart = 0;
355  double r2_old = r2;
356  bool l2_converge = false;
357 
358  int pipeline = param.pipeline;
359  // Vectorized dot product only has limited support so work around
360  if (Ap[0]->Location() == QUDA_CPU_FIELD_LOCATION || pipeline == 0) pipeline = 1;
361 
362  if (pipeline > 1)
363  warningQuda("GCR with pipeline length %d is experimental", pipeline);
364 
366  profile.TPSTART(QUDA_PROFILE_COMPUTE);
367 
368  int k = 0;
369  PrintStats("GCR", total_iter+k, r2, b2, heavy_quark_res);
370  while ( !convergence(r2, heavy_quark_res, stop, param.tol_hq) &&
371  total_iter < param.maxiter) {
372 
373  for (int m=0; m<param.precondition_cycle; m++) {
375  ColorSpinorField &pPre = (precMatch ? *p[k] : *p_pre);
376 
377  if (m==0) { // residual is just source
378  blas::copy(rPre, rSloppy);
379  } else { // compute residual
380  blas::copy(*rM, rSloppy);
381  blas::axpy(-1.0, *Ap[k], *rM);
382  blas::copy(rPre, *rM);
383  }
384 
386  if ((parity+m)%2 == 0 || param.schwarz_type == QUDA_ADDITIVE_SCHWARZ) (*K)(pPre, rPre);
387  else blas::copy(pPre, rPre);
388  popVerbosity();
389 
390  // relaxation p = omega*p + (1-omega)*r
391  //if (param.omega!=1.0) blas::axpby((1.0-param.omega), rPre, param.omega, pPre);
392 
393  if (m==0) { blas::copy(*p[k], pPre); }
394  else { blas::copy(tmp, pPre); blas::xpy(tmp, *p[k]); }
395 
396  } else { // no preconditioner
397  *p[k] = rSloppy;
398  }
399  matSloppy(*Ap[k], *p[k], tmp);
401  printfQuda("GCR debug iter=%d: Ap2=%e, p2=%e, rPre2=%e\n",
402  total_iter, blas::norm2(*Ap[k]), blas::norm2(*p[k]), blas::norm2(rPre));
403  }
404 
405  orthoDir(beta, Ap, k, pipeline);
406 
407  double3 Apr = blas::cDotProductNormA(*Ap[k], rSloppy);
408 
410  printfQuda("GCR debug iter=%d: Apr=(%e,%e,%e)\n", total_iter, Apr.x, Apr.y, Apr.z);
411  for (int i=0; i<k; i++)
412  for (int j=0; j<=k; j++)
413  printfQuda("GCR debug iter=%d: beta[%d][%d] = (%e,%e)\n",
414  total_iter, i, j, real(beta[i][j]), imag(beta[i][j]));
415  }
416 
417  gamma[k] = sqrt(Apr.z); // gamma[k] = Ap[k]
418  if (gamma[k] == 0.0) errorQuda("GCR breakdown\n");
419  alpha[k] = Complex(Apr.x, Apr.y) / gamma[k]; // alpha = (1/|Ap|) * (Ap, r)
420 
421  // r -= (1/|Ap|^2) * (Ap, r) r, Ap *= 1/|Ap|
422  r2 = blas::cabxpyAxNorm(1.0/gamma[k], -alpha[k], *Ap[k], rSloppy);
423 
424  k++;
425  total_iter++;
426 
427  PrintStats("GCR", total_iter, r2, b2, heavy_quark_res);
428 
429  // update since nKrylov or maxiter reached, converged or reliable update required
430  // note that the heavy quark residual will by definition only be checked every nKrylov steps
431  if (k==nKrylov || total_iter==param.maxiter || (r2 < stop && !l2_converge) || sqrt(r2/r2_old) < param.delta) {
432 
433  // update the solution vector
434  updateSolution(xSloppy, alpha, beta, gamma, k, p);
435 
436  // recalculate residual in high precision
437  blas::copy(x, xSloppy);
438  blas::xpy(x, y);
439  mat(r, y, x);
440  r2 = blas::xmyNorm(b, r);
441 
442  if (use_heavy_quark_res) heavy_quark_res = sqrt(blas::HeavyQuarkResidualNorm(y, r).z);
443 
444  // break-out check if we have reached the limit of the precision
445  if (r2 > r2_old) {
446  resIncrease++;
447  resIncreaseTotal++;
448  warningQuda("GCR: new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)",
449  sqrt(r2), sqrt(r2_old), resIncreaseTotal);
450  if (resIncrease > maxResIncrease or resIncreaseTotal > maxResIncreaseTotal) {
451  warningQuda("GCR: solver exiting due to too many true residual norm increases");
452  break;
453  }
454  } else {
455  resIncrease = 0;
456  }
457 
458  k = 0;
459 
460  if ( !convergence(r2, heavy_quark_res, stop, param.tol_hq) ) {
461  restart++; // restarting if residual is still too great
462 
463  PrintStats("GCR (restart)", restart, r2, b2, heavy_quark_res);
464  blas::copy(rSloppy, r);
465  blas::zero(xSloppy);
466 
467  r2_old = r2;
468 
469  // prevent ending the Krylov space prematurely if other convergence criteria not met
470  if (r2 < stop) l2_converge = true;
471  }
472 
473  r2_old = r2;
474 
475  }
476 
477  }
478 
479  if (total_iter > 0) blas::copy(x, y);
480 
483 
485 
486  double gflops = (blas::flops + mat.flops() + matSloppy.flops() + matPrecon.flops())*1e-9;
487  if (K) gflops += K->flops()*1e-9;
488 
489  if (k>=param.maxiter && getVerbosity() >= QUDA_SUMMARIZE)
490  warningQuda("Exceeded maximum iterations %d", param.maxiter);
491 
492  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("GCR: number of restarts = %d\n", restart);
493 
494  if (param.compute_true_res) {
495  // Calculate the true residual
496  mat(r, x, y);
497  double true_res = blas::xmyNorm(b, r);
498  param.true_res = sqrt(true_res / b2);
501  else
502  param.true_res_hq = 0.0;
503  }
504 
505  param.gflops += gflops;
506  param.iter += total_iter;
507 
508  // reset the flops counters
509  blas::flops = 0;
510  mat.flops();
511  matSloppy.flops();
512  matPrecon.flops();
513 
515  profile.TPSTART(QUDA_PROFILE_FREE);
516 
517  PrintSummary("GCR", total_iter, r2, b2);
518 
519  profile.TPSTOP(QUDA_PROFILE_FREE);
520 
521  return;
522  }
523 
524 } // namespace quda
bool convergence(const double &r2, const double &hq2, const double &r2_tol, const double &hq_tol)
Definition: solver.cpp:139
bool global_reduction
whether the solver acting as a preconditioner for another solver
Definition: invert_quda.h:201
double timeInterval(struct timeval start, struct timeval end)
QudaSchwarzType schwarz_type
Definition: invert_quda.h:175
void computeBeta(Complex **beta, std::vector< ColorSpinorField *> Ap, int i, int N, int k)
QudaVerbosity verbosity_precondition
Definition: invert_quda.h:197
virtual double flops() const
Definition: invert_quda.h:399
static double stopping(const double &tol, const double &b2, QudaResidualType residual_type)
Definition: solver.cpp:122
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)
Definition: reduce_quda.cu:572
ColorSpinorField * r_pre
sloppy residual vector
Definition: invert_quda.h:624
QudaInverterType inv_type
Definition: invert_quda.h:19
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
std::vector< ColorSpinorField * > p
residual vector for doing multi-cycle preconditioning
Definition: invert_quda.h:628
#define errorQuda(...)
Definition: util_quda.h:90
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:241
void init()
Definition: blas_quda.cu:64
cudaEvent_t start
SolverParam Kparam
Definition: invert_quda.h:603
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:105
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
Definition: reduce_quda.cu:500
std::complex< double > Complex
Definition: eig_variables.h:13
int commCoords(int)
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44
static ColorSpinorField * Create(const ColorSpinorParam &param)
double * gamma
Definition: invert_quda.h:612
TimeProfile & profile
Definition: invert_quda.h:329
void operator()(ColorSpinorField &out, ColorSpinorField &in)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: copy_quda.cu:263
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
Definition: reduce_quda.cu:364
QudaInverterType inv_type_precondition
Definition: invert_quda.h:25
QudaPreserveSource preserve_source
Definition: invert_quda.h:121
int max_res_increase_total
Definition: invert_quda.h:79
ColorSpinorField * rM
preconditioner result
Definition: invert_quda.h:626
Complex ** beta
Definition: invert_quda.h:611
ColorSpinorField * r_sloppy
sloppy solution vector
Definition: invert_quda.h:623
void fillInnerSolveParam(SolverParam &inner, const SolverParam &outer)
Complex * alpha
Definition: invert_quda.h:610
int pipeline
Definition: test_util.cpp:1632
QudaGaugeParam param
Definition: pack_test.cpp:17
#define b
void backSubs(const Complex *alpha, Complex **const beta, const double *gamma, Complex *delta, int n)
GCR(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam &param, TimeProfile &profile)
QudaComputeNullVector compute_null_vector
Definition: invert_quda.h:53
double Last(QudaProfileType idx)
Solver * K
Definition: invert_quda.h:602
void PrintSummary(const char *name, int k, const double &r2, const double &b2)
Definition: solver.cpp:194
double cabxpyAxNorm(const double &a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y)
Definition: reduce_quda.cu:449
static unsigned int delta
QudaResidualType residual_type
Definition: invert_quda.h:47
void updateSolution(ColorSpinorField &x, const Complex *alpha, Complex **const beta, double *gamma, int k, std::vector< ColorSpinorField *> p)
virtual ~GCR()
ColorSpinorParam csParam
Definition: pack_test.cpp:24
static __inline__ size_t p
#define warningQuda(...)
Definition: util_quda.h:101
std::vector< ColorSpinorField * > Ap
Definition: invert_quda.h:629
bool is_preconditioner
verbosity to use for preconditioner
Definition: invert_quda.h:199
ColorSpinorField * yp
residual vector
Definition: invert_quda.h:620
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
Definition: reduce_quda.cu:703
Complex caxpyDotzy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Definition: reduce_quda.cu:544
double gamma(double) __attribute__((availability(macosx
ColorSpinorField * p_pre
residual passed to preconditioner
Definition: invert_quda.h:625
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.cu:246
double tol_precondition
Definition: invert_quda.h:164
void zero(ColorSpinorField &a)
Definition: blas_quda.cu:45
void pushVerbosity(QudaVerbosity verbosity)
Definition: util_quda.cpp:82
QudaPrecision precision_precondition
Definition: invert_quda.h:118
void axpy(const double &a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.cu:150
QudaPrecision precision
Definition: invert_quda.h:112
void orthoDir(Complex **beta, std::vector< ColorSpinorField *> Ap, int k, int pipeline)
SolverParam & param
Definition: invert_quda.h:328
unsigned long long flops() const
Definition: dirac_quda.h:995
void PrintStats(const char *, int k, const double &r2, const double &b2, const double &hq2)
Definition: solver.cpp:179
#define printfQuda(...)
Definition: util_quda.h:84
unsigned long long flops
Definition: blas_quda.cu:42
ColorSpinorField * tmpp
high precision accumulator
Definition: invert_quda.h:621
void xpy(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.cu:128
const DiracMatrix & mat
Definition: invert_quda.h:598
ColorSpinorField * rp
Definition: invert_quda.h:619
const DiracMatrix & matPrecon
Definition: invert_quda.h:600
QudaUseInitGuess use_init_guess
Definition: invert_quda.h:50
void popVerbosity()
Definition: util_quda.cpp:93
const DiracMatrix & matSloppy
Definition: invert_quda.h:599
QudaPrecision precision_sloppy
Definition: invert_quda.h:115
ColorSpinorField * x_sloppy
temporary for mat-vec
Definition: invert_quda.h:622
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
QudaParity parity
Definition: covdev_test.cpp:53
void updateAp(Complex **beta, std::vector< ColorSpinorField *> Ap, int begin, int size, int k)
#define a
cudaEvent_t cudaEvent_t end