QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
inv_mr_quda.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 
5 #include <complex>
6 
7 #include <quda_internal.h>
8 #include <blas_quda.h>
9 #include <dslash_quda.h>
10 #include <invert_quda.h>
11 #include <util_quda.h>
12 
13 #include<face_quda.h>
14 
15 #include <color_spinor_field.h>
16 
17 namespace quda {
18 
20  Solver(param, profile), mat(mat), init(false), allocate_r(false)
21  {
22 
23  }
24 
25  MR::~MR() {
27  if (init) {
28  if (allocate_r) delete rp;
29  delete Arp;
30  delete tmpp;
31  }
33  }
34 
36  {
37 
38  globalReduce = false; // use local reductions for DD solver
39 
40  if (!init) {
44  rp = new cudaColorSpinorField(x, csParam);
45  allocate_r = true;
46  }
47  Arp = new cudaColorSpinorField(x);
48  tmpp = new cudaColorSpinorField(x, csParam); //temporary for mat-vec
49 
50  init = true;
51  }
54  cudaColorSpinorField &Ar = *Arp;
55  cudaColorSpinorField &tmp = *tmpp;
56 
57  // set initial guess to zero and thus the residual is just the source
58  zeroCuda(x); // can get rid of this for a special first update kernel
59  double b2 = normCuda(b);
60  if (&r != &b) copyCuda(r, b);
61 
62  // domain-wise normalization of the initial residual to prevent underflow
63  double r2=0.0; // if zero source then we will exit immediately doing no work
64  if (b2 > 0.0) {
65  axCuda(1/sqrt(b2), r); // can merge this with the prior copy
66  r2 = 1.0; // by definition by this is now true
67  }
68 
70  quda::blas_flops = 0;
72  }
73 
74  double omega = 1.0;
75 
76  int k = 0;
78  double x2 = norm2(x);
79  double3 Ar3 = cDotProductNormBCuda(Ar, r);
80  printfQuda("MR: %d iterations, r2 = %e, <r|A|r> = (%e, %e), x2 = %e\n",
81  k, Ar3.z, Ar3.x, Ar3.y, x2);
82  }
83 
84  while (k < param.maxiter && r2 > 0.0) {
85 
86  mat(Ar, r, tmp);
87 
88  double3 Ar3 = cDotProductNormACuda(Ar, r);
89  Complex alpha = Complex(Ar3.x, Ar3.y) / Ar3.z;
90 
91  // x += omega*alpha*r, r -= omega*alpha*Ar, r2 = norm2(r)
92  //r2 = caxpyXmazNormXCuda(omega*alpha, r, x, Ar);
93  caxpyXmazCuda(omega*alpha, r, x, Ar);
94 
96  double x2 = norm2(x);
97  double r2 = norm2(r);
98  printfQuda("MR: %d iterations, r2 = %e, <r|A|r> = (%e,%e) x2 = %e\n",
99  k+1, r2, Ar3.x, Ar3.y, x2);
100  } else if (getVerbosity() >= QUDA_VERBOSE) {
101  printfQuda("MR: %d iterations, <r|A|r> = (%e, %e)\n", k, Ar3.x, Ar3.y);
102  }
103 
104  k++;
105  }
106 
107  if (getVerbosity() >= QUDA_VERBOSE) {
108  mat(Ar, r, tmp);
109  Complex Ar2 = cDotProductCuda(Ar, r);
110  printfQuda("MR: %d iterations, <r|A|r> = (%e, %e)\n", k, real(Ar2), imag(Ar2));
111  }
112 
113  // Obtain global solution by rescaling
114  if (b2 > 0.0) axCuda(sqrt(b2), x);
115 
120 
121  double gflops = (quda::blas_flops + mat.flops())*1e-9;
122  reduceDouble(gflops);
123 
124  param.gflops += gflops;
125  param.iter += k;
126 
127  // Calculate the true residual
128  r2 = norm2(r);
129  mat(r, x);
130  double true_res = xmyNormCuda(b, r);
131  param.true_res = sqrt(true_res / b2);
132 
133  if (getVerbosity() >= QUDA_SUMMARIZE) {
134  printfQuda("MR: Converged after %d iterations, relative residua: iterated = %e, true = %e\n",
135  k, sqrt(r2/b2), param.true_res);
136  }
137 
138  // reset the flops counters
139  quda::blas_flops = 0;
140  mat.flops();
142  }
143 
144  globalReduce = true; // renable global reductions for outer solver
145 
146  return;
147  }
148 
149 } // namespace quda
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:105
std::complex< double > Complex
Definition: eig_variables.h:13
MR(DiracMatrix &mat, SolverParam &param, TimeProfile &profile)
Definition: inv_mr_quda.cpp:19
void operator()(cudaColorSpinorField &out, cudaColorSpinorField &in)
Definition: inv_mr_quda.cpp:35
void mat(void *out, void **fatlink, void **longlink, void *in, double kappa, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision)
TimeProfile & profile
Definition: invert_quda.h:224
QudaInverterType inv_type_precondition
Definition: invert_quda.h:24
QudaPreserveSource preserve_source
Definition: invert_quda.h:90
double3 cDotProductNormBCuda(cudaColorSpinorField &a, cudaColorSpinorField &b)
Definition: reduce_quda.cu:620
unsigned long long flops() const
Definition: dirac_quda.h:587
QudaGaugeParam param
Definition: pack_test.cpp:17
cudaColorSpinorField * tmp
void caxpyXmazCuda(const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)
Definition: blas_quda.cu:452
Complex cDotProductCuda(cudaColorSpinorField &, cudaColorSpinorField &)
Definition: reduce_quda.cu:468
ColorSpinorParam csParam
Definition: pack_test.cpp:24
void copyCuda(cudaColorSpinorField &dst, const cudaColorSpinorField &src)
Definition: copy_quda.cu:235
double normCuda(const cudaColorSpinorField &b)
Definition: reduce_quda.cu:145
int x[4]
unsigned long long blas_flops
Definition: blas_quda.cu:37
SolverParam & param
Definition: invert_quda.h:223
void Stop(QudaProfileType idx)
double Last(QudaProfileType idx)
void reduceDouble(double &)
#define printfQuda(...)
Definition: util_quda.h:67
void zeroCuda(cudaColorSpinorField &a)
Definition: blas_quda.cu:40
double3 cDotProductNormACuda(cudaColorSpinorField &a, cudaColorSpinorField &b)
Definition: reduce_quda.cu:591
virtual ~MR()
Definition: inv_mr_quda.cpp:25
void Start(QudaProfileType idx)
void init(int argc, char **argv)
Definition: dslash_test.cpp:79
void axCuda(const double &a, cudaColorSpinorField &x)
Definition: blas_quda.cu:171
double norm2(const ColorSpinorField &)
double xmyNormCuda(cudaColorSpinorField &a, cudaColorSpinorField &b)
Definition: reduce_quda.cu:343
bool globalReduce
Definition: face_buffer.cpp:11