QUDA  v1.1.0
A library for QCD on GPUs
inv_gmresdr_quda.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <string.h>
5 #include <iostream>
6 
7 #include <quda_internal.h>
8 #include <color_spinor_field.h>
9 #include <blas_quda.h>
10 #include <dslash_quda.h>
11 #include <invert_quda.h>
12 #include <util_quda.h>
13 
14 #ifdef MAGMA_LIB
15 #include <blas_magma.h>
16 #endif
17 
18 #include <algorithm>
19 #include <memory>
20 
21 #include <eigen_helper.h>
22 
23 /*
24  GMRES-DR algorithm:
25  R. B. Morgan, "GMRES with deflated restarting", SIAM J. Sci. Comput. 24 (2002) p. 20-37
26  See also: A.Frommer et al, "Deflation and Flexible SAP-Preconditioning of GMRES in Lattice QCD simulations" ArXiv hep-lat/1204.5463
27 */
28 
29 namespace quda {
30 
31  using namespace blas;
32  using namespace std;
33 
34  using DynamicStride = Stride<Dynamic, Dynamic>;
35 
36  using DenseMatrix = MatrixXcd;
37  using VectorSet = MatrixXcd;
38  using Vector = VectorXcd;
39 
40  // special types needed for compatibility with QUDA blas:
42 
43  struct SortedEvals {
44 
45  double _val;
46  int _idx;
47 
48  SortedEvals(double val, int idx) : _val(val), _idx(idx) {};
49  static bool SelectSmall(SortedEvals v1, SortedEvals v2) { return (v1._val < v2._val); }
50  };
51 
53 
55  {
56 
57  public:
61 
62  int m;
63  int k;
64  int restarts;
65 
67 
68  ColorSpinorFieldSet *Vkp1; // high-precision accumulation array
69 
70  GMResDRArgs(int m, int nev) :
71  ritzVecs(VectorSet::Zero(m + 1, nev + 1)),
72  H(DenseMatrix::Zero(m + 1, m)),
73  eta(Vector::Zero(m)),
74  m(m),
75  k(nev),
76  restarts(0),
77  Vkp1(nullptr)
78  {
79  c = static_cast<Complex *>(ritzVecs.col(k).data());
80  }
81 
82  inline void ResetArgs()
83  {
84  ritzVecs.setZero();
85  H.setZero();
86  eta.setZero();
87  }
88 
90  {
91  if (Vkp1) delete Vkp1;
92  }
93  };
94 
95  template <libtype which_lib> void ComputeHarmonicRitz(GMResDRArgs &args) { errorQuda("\nUnknown library type.\n"); }
96 
97  template <> void ComputeHarmonicRitz<libtype::magma_lib>(GMResDRArgs &args)
98  {
99 #ifdef MAGMA_LIB
100  DenseMatrix cH = args.H.block(0, 0, args.m, args.m).adjoint();
101  DenseMatrix Gk = args.H.block(0, 0, args.m, args.m);
102 
103  VectorSet harVecs = MatrixXcd::Zero(args.m, args.m);
104  Vector harVals = VectorXcd::Zero(args.m);
105 
106  Vector em = VectorXcd::Zero(args.m);
107 
108  em(args.m - 1) = norm(args.H(args.m, args.m - 1));
109 
110  cudaHostRegister(static_cast<void *>(cH.data()), args.m * args.m * sizeof(Complex), cudaHostRegisterDefault);
111  magma_Xgesv(static_cast<void *>(em.data()), args.m, args.m, static_cast<void *>(cH.data()), args.m, sizeof(Complex));
112  cudaHostUnregister(cH.data());
113 
114  Gk.col(args.m - 1) += em;
115 
116  cudaHostRegister(static_cast<void *>(Gk.data()), args.m * args.m * sizeof(Complex), cudaHostRegisterDefault);
117  magma_Xgeev(static_cast<void *>(Gk.data()), args.m, args.m, static_cast<void *>(harVecs.data()),
118  static_cast<void *>(harVals.data()), args.m, sizeof(Complex));
119  cudaHostUnregister(Gk.data());
120 
121  std::vector<SortedEvals> sorted_evals;
122  sorted_evals.reserve(args.m);
123 
124  for (int e = 0; e < args.m; e++) sorted_evals.push_back(SortedEvals(abs(harVals.data()[e]), e));
125  std::stable_sort(sorted_evals.begin(), sorted_evals.end(), SortedEvals::SelectSmall);
126 
127  for (int e = 0; e < args.k; e++)
128  memcpy(args.ritzVecs.col(e).data(), harVecs.col(sorted_evals[e]._idx).data(), (args.m) * sizeof(Complex));
129 #else
130  errorQuda("Magma library was not built.\n");
131 #endif
132  return;
133  }
134 
135  template <> void ComputeHarmonicRitz<libtype::eigen_lib>(GMResDRArgs &args)
136  {
137 
138  DenseMatrix cH = args.H.block(0, 0, args.m, args.m).adjoint();
139  DenseMatrix Gk = args.H.block(0, 0, args.m, args.m);
140 
141  VectorSet harVecs = MatrixXcd::Zero(args.m, args.m);
142  Vector harVals = VectorXcd::Zero(args.m);
143 
144  Vector em = VectorXcd::Zero(args.m);
145 
146  em(args.m - 1) = norm(args.H(args.m, args.m - 1));
147  Gk.col(args.m - 1) += cH.colPivHouseholderQr().solve(em);
148 
149  ComplexEigenSolver<DenseMatrix> es(Gk);
150  harVecs = es.eigenvectors();
151  harVals = es.eigenvalues();
152 
153  std::vector<SortedEvals> sorted_evals;
154  sorted_evals.reserve(args.m);
155 
156  for (int e = 0; e < args.m; e++) sorted_evals.push_back(SortedEvals(abs(harVals.data()[e]), e));
157  std::stable_sort(sorted_evals.begin(), sorted_evals.end(), SortedEvals::SelectSmall);
158 
159  for (int e = 0; e < args.k; e++)
160  memcpy(args.ritzVecs.col(e).data(), harVecs.col(sorted_evals[e]._idx).data(), (args.m) * sizeof(Complex));
161 
162  return;
163  }
164 
165  template <libtype which_lib> void ComputeEta(GMResDRArgs &args) { errorQuda("\nUnknown library type.\n"); }
166 
167  template <> void ComputeEta<libtype::magma_lib>(GMResDRArgs &args)
168  {
169 #ifdef MAGMA_LIB
170  DenseMatrix Htemp(DenseMatrix::Zero(args.m + 1, args.m));
171  Htemp = args.H;
172 
173  Complex *ctemp = static_cast<Complex *>(args.ritzVecs.col(0).data());
174  memcpy(ctemp, args.c, (args.m + 1) * sizeof(Complex));
175 
176  cudaHostRegister(static_cast<void *>(Htemp.data()), (args.m + 1) * args.m * sizeof(Complex), cudaHostRegisterDefault);
177  magma_Xgels(static_cast<void *>(Htemp.data()), ctemp, args.m + 1, args.m, args.m + 1, sizeof(Complex));
178  cudaHostUnregister(Htemp.data());
179 
180  memcpy(args.eta.data(), ctemp, args.m * sizeof(Complex));
181  memset(ctemp, 0, (args.m + 1) * sizeof(Complex));
182 #else
183  errorQuda("MAGMA library was not built.\n");
184 #endif
185  return;
186  }
187 
188  template <> void ComputeEta<libtype::eigen_lib>(GMResDRArgs &args)
189  {
190 
191  Map<VectorXcd, Unaligned> c_(args.c, args.m + 1);
192  args.eta = args.H.jacobiSvd(ComputeThinU | ComputeThinV).solve(c_);
193 
194  return;
195  }
196 
198  {
199  inner.tol = outer.tol_precondition;
200  inner.maxiter = outer.maxiter_precondition;
201  inner.delta = 1e-20;
202  inner.inv_type = outer.inv_type_precondition;
203 
204  inner.precision = outer.precision_precondition;
206 
207  inner.iter = 0;
208  inner.gflops = 0;
209  inner.secs = 0;
210 
212  inner.is_preconditioner = true;
213  inner.global_reduction = false;
214  if (inner.global_reduction) warningQuda("Set global reduction flag for preconditioner to true.\n");
215 
216  if (outer.precision_sloppy != outer.precision_precondition)
218  else
220  }
221 
222  GMResDR::GMResDR(const DiracMatrix &mat, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon,
223  SolverParam &param, TimeProfile &profile) :
224  Solver(mat, matSloppy, matPrecon, matPrecon, param, profile),
225  K(nullptr),
226  Kparam(param),
227  Vm(nullptr),
228  Zm(nullptr),
229  profile(profile),
230  gmresdr_args(nullptr),
231  init(false)
232  {
234 
236  K = new CG(matPrecon, matPrecon, matPrecon, matPrecon, Kparam, profile);
238  K = new BiCGstab(matPrecon, matPrecon, matPrecon, matPrecon, Kparam, profile);
240  K = new MR(matPrecon, matPrecon, Kparam, profile);
242  K = new SD(matPrecon, Kparam, profile);
244  K = nullptr;
245  else
246  errorQuda("Unsupported preconditioner %d\n", param.inv_type_precondition);
247  }
248 
249  GMResDR::GMResDR(const DiracMatrix &mat, Solver &K, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon,
250  SolverParam &param, TimeProfile &profile) :
251  Solver(mat, matSloppy, matPrecon, matPrecon, param, profile),
252  K(&K),
253  Kparam(param),
254  Vm(nullptr),
255  Zm(nullptr),
256  profile(profile),
257  gmresdr_args(nullptr),
258  init(false)
259  {
260  }
261 
263  {
264  profile.TPSTART(QUDA_PROFILE_FREE);
265 
266  if (init) {
267  delete Vm;
268  Vm = nullptr;
269 
270  delete r_sloppy;
271 
273  delete r_pre;
274  delete p_pre;
275  }
276 
277  if(K) {
278  delete Zm;
279  delete K;
280  }
281 
282  delete tmpp;
283  delete yp;
284  delete rp;
285 
286  delete gmresdr_args;
287  }
288 
289  profile.TPSTOP(QUDA_PROFILE_FREE);
290  }
291 
293  {
294  GMResDRArgs &args = *gmresdr_args;
295 
296  if (do_gels) {
298  ComputeEta<libtype::magma_lib>(args);
299  } else if (param.extlib_type == QUDA_EIGEN_EXTLIB) {
300  ComputeEta<libtype::eigen_lib>(args);
301  } else {
302  errorQuda("Library type %d is currently not supported.\n", param.extlib_type);
303  }
304  }
305 
306  std::vector<ColorSpinorField *> Z_(Zm->Components().begin(), Zm->Components().begin() + args.m);
307  std::vector<ColorSpinorField *> V_(Vm->Components());
308 
309  std::vector<ColorSpinorField *> x_, r_;
310  x_.push_back(x), r_.push_back(r);
311 
312  blas::caxpy(static_cast<Complex *>(args.eta.data()), Z_, x_);
313 
314  VectorXcd minusHeta = -(args.H * args.eta);
315  Map<VectorXcd, Unaligned> c_(args.c, args.m + 1);
316  c_ += minusHeta;
317 
318  blas::caxpy(static_cast<Complex *>(minusHeta.data()), V_, r_);
319  return;
320  }
321 
323  {
324  GMResDRArgs &args = *gmresdr_args;
325 
327  ComputeHarmonicRitz<libtype::magma_lib>(args);
328  } else if (param.extlib_type == QUDA_EIGEN_EXTLIB) {
329  ComputeHarmonicRitz<libtype::eigen_lib>(args);
330  } else {
331  errorQuda("Library type %d is currently not supported.\n", param.extlib_type);
332  }
333 
334  DenseMatrix Qkp1(MatrixXcd::Identity((args.m + 1), (args.k + 1)));
335 
336  HouseholderQR<MatrixXcd> qr(args.ritzVecs);
337  Qkp1.applyOnTheLeft(qr.householderQ());
338 
339  DenseMatrix Res = Qkp1.adjoint() * args.H * Qkp1.topLeftCorner(args.m, args.k);
340  args.H.setZero();
341  args.H.topLeftCorner(args.k + 1, args.k) = Res;
342 
343  blas::zero(*args.Vkp1);
344 
345  std::vector<ColorSpinorField *> vkp1(args.Vkp1->Components());
346  std::vector<ColorSpinorField *> vm(Vm->Components());
347 
348  RowMajorDenseMatrix Alpha(Qkp1); // convert Qkp1 to Row-major format first
349  blas::caxpy(static_cast<Complex *>(Alpha.data()), vm, vkp1);
350 
351  for (int i = 0; i < (args.m + 1); i++) {
352  if (i < (args.k + 1)) {
353  blas::copy(Vm->Component(i), args.Vkp1->Component(i));
354  blas::zero(args.Vkp1->Component(i));
355  } else
356  blas::zero(Vm->Component(i));
357  }
358 
359  if (Zm->V() != Vm->V()) {
360  std::vector<ColorSpinorField *> z(Zm->Components());
361  std::vector<ColorSpinorField *> vk(args.Vkp1->Components().begin(), args.Vkp1->Components().begin() + args.k);
362 
363  RowMajorDenseMatrix Beta(Qkp1.topLeftCorner(args.m, args.k));
364  blas::caxpy(static_cast<Complex *>(Beta.data()), z, vk);
365 
366  for (int i = 0; i < (args.m); i++) {
367  if (i < (args.k))
368  blas::copy(Zm->Component(i), args.Vkp1->Component(i));
369  else
370  blas::zero(Zm->Component(i));
371  }
372  }
373 
374  for (int j = 0; j < args.k; j++) {
375  Complex alpha = cDotProduct(Vm->Component(j), Vm->Component(args.k));
376  caxpy(-alpha, Vm->Component(j), Vm->Component(args.k));
377  }
378 
379  blas::ax(1.0 / sqrt(blas::norm2(Vm->Component(args.k))), Vm->Component(args.k));
380 
381  args.ritzVecs.setZero();
382  return;
383  }
384 
385  int GMResDR::FlexArnoldiProcedure(const int start_idx, const bool do_givens = false)
386  {
387  int j = start_idx;
388  GMResDRArgs &args = *gmresdr_args;
389  ColorSpinorField &tmp = *tmpp;
390 
391  std::unique_ptr<Complex[]> givensH((do_givens) ? new Complex[(args.m + 1) * args.m] : nullptr);
392  std::unique_ptr<Complex[]> cn((do_givens) ? new Complex[args.m] : nullptr);
393  std::unique_ptr<double[]> sn((do_givens) ? new double[args.m] : nullptr);
394 
395  Complex c0 = args.c[0];
396 
397  while (j < args.m) {
398  if (K) {
401 
403  zero(outPre);
405  (*K)(outPre, inPre);
406  popVerbosity();
407 
409  }
410  matSloppy(Vm->Component(j + 1), Zm->Component(j), tmp);
411 
412  args.H(0, j) = cDotProduct(Vm->Component(0), Vm->Component(j + 1));
413  caxpy(-args.H(0, j), Vm->Component(0), Vm->Component(j + 1));
414 
415  Complex h0 = do_givens ? args.H(0, j) : 0.0;
416 
417  for (int i = 1; i <= j; i++) {
418  args.H(i, j) = cDotProduct(Vm->Component(i), Vm->Component(j + 1));
419  caxpy(-args.H(i, j), Vm->Component(i), Vm->Component(j + 1));
420 
421  if (do_givens) {
422  givensH[(args.m + 1) * j + (i - 1)] = conj(cn[i - 1]) * h0 + sn[i - 1] * args.H(i, j);
423  h0 = -sn[i - 1] * h0 + cn[i - 1] * args.H(i, j);
424  }
425  }
426 
427  args.H(j + 1, j) = Complex(sqrt(norm2(Vm->Component(j + 1))), 0.0);
428  blas::ax(1.0 / args.H(j + 1, j).real(), Vm->Component(j + 1));
429  if (do_givens) {
430  double inv_denom = 1.0 / sqrt(norm(h0) + norm(args.H(j + 1, j)));
431  cn[j] = h0 * inv_denom;
432  sn[j] = args.H(j + 1, j).real() * inv_denom;
433  givensH[j * (args.m + 1) + j] = conj(cn[j]) * h0 + sn[j] * args.H(j + 1, j);
434 
435  args.c[j + 1] = -sn[j] * args.c[j];
436  args.c[j] *= conj(cn[j]);
437  }
438 
439  j += 1;
440  }
441 
442  if (do_givens) {
443  Map<MatrixXcd, Unaligned, DynamicStride> givensH_(givensH.get(), args.m, args.m, DynamicStride(args.m + 1, 1));
444  memcpy(args.eta.data(), args.c, args.m * sizeof(Complex));
445  memset((void *)args.c, 0, (args.m + 1) * sizeof(Complex));
446  args.c[0] = c0;
447 
448  givensH_.triangularView<Upper>().solveInPlace<OnTheLeft>(args.eta);
449 
450  } else {
451  memset((void *)args.c, 0, (args.m + 1) * sizeof(Complex));
452 
453  std::vector<ColorSpinorField *> v_(Vm->Components().begin(), Vm->Components().begin() + args.k + 1);
454  std::vector<ColorSpinorField *> r_;
455  r_.push_back(static_cast<ColorSpinorField *>(r_sloppy));
456 
457  blas::cDotProduct(args.c, v_, r_);
458  }
459  return (j - start_idx);
460  }
461 
463  {
464  profile.TPSTART(QUDA_PROFILE_INIT);
465 
466  const double tol_threshold = 1.2;
467  const double det_max_deviation = 0.4;
468 
469  ColorSpinorField *ep = nullptr;
470 
471  if (!init) {
472 
473  gmresdr_args = new GMResDRArgs(param.m, param.n_ev);
474 
480 
481  csParam.setPrecision(param.precision_sloppy);
482 
484  r_sloppy = ColorSpinorField::Create(csParam);
485 
487 
488  csParam.setPrecision(param.precision_precondition);
491 
492  }
493 
494  csParam.setPrecision(param.precision_sloppy);
495  csParam.is_composite = true;
496  csParam.composite_dim = param.m+1;
497 
499 
500  csParam.composite_dim = param.m;
501 
502  Zm = K ? ColorSpinorFieldSet::Create(csParam) : Vm;
503 
504  csParam.composite_dim = (param.n_ev + 1);
505  csParam.setPrecision(QUDA_DOUBLE_PRECISION);
506 
507  gmresdr_args->Vkp1 = ColorSpinorFieldSet::Create(csParam);
508 
509  init = true;
510  }
511 
512  GMResDRArgs &args = *gmresdr_args;
513 
514  ColorSpinorField &r = *rp;
515  ColorSpinorField &y = *yp;
516  ColorSpinorField &e = *ep;
517 
518  ColorSpinorField &rSloppy = *r_sloppy;
519 
520  profile.TPSTOP(QUDA_PROFILE_INIT);
521  profile.TPSTART(QUDA_PROFILE_PREAMBLE);
522 
523  int tot_iters = 0;
524 
525  double normb = norm2( b );
526  double stop = param.tol*param.tol* normb;
527 
528  mat(r, x);
529 
530  double r2 = xmyNorm(b, r);
531  double b2 = r2;
532  args.c[0] = Complex(sqrt(r2), 0.0);
533 
534  printfQuda("\nInitial residual squared: %1.16e, source %1.16e, tolerance %1.16e\n", r2, sqrt(normb), param.tol);
535 
536  rSloppy = r;
537 
539  blas::axpy(1.0 / args.c[0].real(), r, y);
540  Vm->Component(0) = y;
541  blas::zero(y);
542  } else {
543  blas::axpy(1.0 / args.c[0].real(), r, Vm->Component(0));
544  }
545 
546  profile.TPSTOP(QUDA_PROFILE_PREAMBLE);
547  profile.TPSTART(QUDA_PROFILE_COMPUTE);
548  blas::flops = 0;
549 
550  const bool use_heavy_quark_res = (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL) ? true : false;
551 
552  double heavy_quark_res = 0.0;
553  if (use_heavy_quark_res) heavy_quark_res = sqrt(blas::HeavyQuarkResidualNorm(x, r).z);
554 
555 
556  int restart_idx = 0, j = 0, check_interval = 4;
557 
558  DenseMatrix Gm = DenseMatrix::Zero(args.k+1, args.k+1);
559 
560  while (restart_idx < param.deflation_grid && !(convergence(r2, heavy_quark_res, stop, param.tol_hq) || !(r2 > stop))) {
561  tot_iters += FlexArnoldiProcedure(j, (j == 0));
562  UpdateSolution(&e, r_sloppy, !(j == 0));
563 
564  r2 = norm2(rSloppy);
565 
566  bool do_clean_restart = false;
567  double ext_r2 = 1.0;
568 
569  if ((restart_idx + 1) % check_interval) {
570  mat(y, e);
571  ext_r2 = xmyNorm(r, y);
572 
573  // can this be done as a single 2-d reduction?
574  for (int l = 0; l < args.k + 1; l++) {
575 
576  Complex *col = Gm.col(l).data();
577 
578  std::vector<ColorSpinorField *> v1_(Vm->Components().begin(), Vm->Components().begin() + args.k + 1);
579  std::vector<ColorSpinorField *> v2_;
580  v2_.push_back(static_cast<ColorSpinorField *>(&Vm->Component(l)));
581 
582  blas::cDotProduct(col, v1_, v2_);
583 
584  } // end l-loop
585 
586  Complex detGm = Gm.determinant();
587 
588  PrintStats("FGMResDR:", tot_iters, r2, b2, heavy_quark_res);
589  printfQuda("\nCheck cycle %d, true residual squared %1.15e, Gramm det : (%le, %le)\n", restart_idx, ext_r2,
590  detGm.real(), detGm.imag());
591 
592  Gm.setZero();
593 
594  do_clean_restart = ((sqrt(ext_r2) / sqrt(r2)) > tol_threshold) || fabs(1.0 - (norm(detGm)) > det_max_deviation);
595  }
596 
597  if (((restart_idx != param.deflation_grid - 1) && !do_clean_restart)) {
598 
599  RestartVZH();
600  j = args.k;
601 
602  } else {
603 
604  printfQuda("\nClean restart for cycle %d, true residual squared %1.15e\n", restart_idx, ext_r2);
605  args.ResetArgs();
606 
607  // update solution:
608  xpy(e, x);
609  r = y;
610  zero(e);
611 
612  args.c[0] = Complex(sqrt(ext_r2), 0.0);
613  blas::zero(Vm->Component(0));
614  blas::axpy(1.0 / args.c[0].real(), rSloppy, Vm->Component(0));
615 
616  j = 0;
617  }
618 
619  restart_idx += 1;
620  }
621 
622  // final solution:
623  xpy(e, x);
624 
625  profile.TPSTOP(QUDA_PROFILE_COMPUTE);
626  profile.TPSTART(QUDA_PROFILE_EPILOGUE);
627 
628  param.secs = profile.Last(QUDA_PROFILE_COMPUTE);
629  double gflops = (blas::flops + mat.flops()) * 1e-9;
630  param.gflops = gflops;
631  param.iter += tot_iters;
632 
633  mat(r, x);
634 
635  param.true_res = sqrt(xmyNorm(b, r) / b2);
636 
637  PrintSummary("FGMResDR:", tot_iters, r2, b2, stop, param.tol_hq);
638 
639  blas::flops = 0;
640  mat.flops();
641 
642  profile.TPSTOP(QUDA_PROFILE_EPILOGUE);
643 
644  param.rhs_idx += 1;
645 
646  if (ep) delete ep;
647  return;
648  }
649 
650 } // namespace quda
void magma_Xgels(void *Mat, void *c, int rows, int cols, int ldm, const int prec)
void magma_Xgesv(void *sol, const int ldn, const int n, void *Mat, const int ldm, const int prec)
void magma_Xgeev(void *Mat, const int n, const int ldm, void *vr, void *evalues, const int ldv, const int prec)
Conjugate-Gradient Solver.
Definition: invert_quda.h:639
CompositeColorSpinorField & Components()
static ColorSpinorField * Create(const ColorSpinorParam &param)
ColorSpinorField & Component(const int idx) const
unsigned long long flops() const
Definition: dirac_quda.h:1909
GMResDRArgs(int m, int nev)
ColorSpinorFieldSet * Vkp1
GMResDR(const DiracMatrix &mat, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon, SolverParam &param, TimeProfile &profile)
void UpdateSolution(ColorSpinorField *x, ColorSpinorField *r, bool do_gels)
int FlexArnoldiProcedure(const int start_idx, const bool do_givens)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
T data[N *N]
Definition: quda_matrix.h:70
const DiracMatrix & mat
Definition: invert_quda.h:465
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
Definition: solver.cpp:328
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE)....
Definition: solver.cpp:386
SolverParam & param
Definition: invert_quda.h:470
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE)
Definition: solver.cpp:373
const DiracMatrix & matPrecon
Definition: invert_quda.h:467
const DiracMatrix & matSloppy
Definition: invert_quda.h:466
double Last(QudaProfileType idx)
Definition: timer.h:254
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
void * memset(void *s, int c, size_t n)
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:34
@ QUDA_HEAVY_QUARK_RESIDUAL
Definition: enum_quda.h:195
@ QUDA_MR_INVERTER
Definition: enum_quda.h:110
@ QUDA_SD_INVERTER
Definition: enum_quda.h:112
@ QUDA_CG_INVERTER
Definition: enum_quda.h:107
@ QUDA_INVALID_INVERTER
Definition: enum_quda.h:133
@ QUDA_BICGSTAB_INVERTER
Definition: enum_quda.h:108
@ QUDA_PRESERVE_SOURCE_NO
Definition: enum_quda.h:238
@ QUDA_PRESERVE_SOURCE_YES
Definition: enum_quda.h:239
@ QUDA_DOUBLE_PRECISION
Definition: enum_quda.h:65
@ QUDA_EIGEN_EXTLIB
Definition: enum_quda.h:557
@ QUDA_MAGMA_EXTLIB
Definition: enum_quda.h:558
@ QUDA_ZERO_FIELD_CREATE
Definition: enum_quda.h:361
void init()
Create the BLAS context.
void init()
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:79
unsigned long long flops
void ax(double a, ColorSpinorField &x)
void zero(ColorSpinorField &a)
double norm2(const ColorSpinorField &a)
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:43
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void xpy(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:41
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: blas_quda.h:24
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
void stop()
Stop profiling.
Definition: device.cpp:228
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:130
MatrixXcd DenseMatrix
void ComputeEta(GMResDRArgs &args)
MatrixXcd VectorSet
double norm2(const CloverField &a, bool inverse=false)
__device__ __host__ void zero(double &a)
Definition: float_vector.h:318
void ComputeHarmonicRitz(GMResDRArgs &args)
VectorXcd Vector
Matrix< Complex, Dynamic, Dynamic, RowMajor > RowMajorDenseMatrix
std::complex< double > Complex
Definition: quda_internal.h:86
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
@ QUDA_PROFILE_INIT
Definition: timer.h:106
@ QUDA_PROFILE_EPILOGUE
Definition: timer.h:110
@ QUDA_PROFILE_COMPUTE
Definition: timer.h:108
@ QUDA_PROFILE_FREE
Definition: timer.h:111
@ QUDA_PROFILE_PREAMBLE
Definition: timer.h:107
void fillFGMResDRInnerSolveParam(SolverParam &inner, const SolverParam &outer)
Stride< Dynamic, Dynamic > DynamicStride
Definition: deflation.cpp:17
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
__host__ __device__ ValueType abs(ValueType x)
Definition: complex_quda.h:125
ColorSpinorParam csParam
Definition: pack_test.cpp:25
QudaGaugeParam param
Definition: pack_test.cpp:18
QudaPreserveSource preserve_source
Definition: invert_quda.h:151
QudaPrecision precision
Definition: invert_quda.h:136
bool is_preconditioner
verbosity to use for preconditioner
Definition: invert_quda.h:238
double tol_precondition
Definition: invert_quda.h:196
QudaResidualType residual_type
Definition: invert_quda.h:49
QudaExtLibType extlib_type
Definition: invert_quda.h:248
QudaPrecision precision_precondition
Definition: invert_quda.h:145
QudaPrecision precision_sloppy
Definition: invert_quda.h:139
QudaInverterType inv_type
Definition: invert_quda.h:22
QudaVerbosity verbosity_precondition
Definition: invert_quda.h:236
QudaInverterType inv_type_precondition
Definition: invert_quda.h:28
bool global_reduction
whether the solver acting as a preconditioner for another solver
Definition: invert_quda.h:240
SortedEvals(double val, int idx)
static bool SelectSmall(SortedEvals v1, SortedEvals v2)
void pushVerbosity(QudaVerbosity verbosity)
Push a new verbosity onto the stack.
Definition: util_quda.cpp:83
#define printfQuda(...)
Definition: util_quda.h:114
void popVerbosity()
Pop the verbosity restoring the prior one on the stack.
Definition: util_quda.cpp:94
#define warningQuda(...)
Definition: util_quda.h:132
#define errorQuda(...)
Definition: util_quda.h:120