QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
inv_pcg_quda.cpp
Go to the documentation of this file.
1 #include <cstdio>
2 #include <cstdlib>
3 #include <cmath>
4 
5 #include <quda_internal.h>
6 #include <color_spinor_field.h>
7 #include <blas_quda.h>
8 #include <dslash_quda.h>
9 #include <invert_quda.h>
10 #include <util_quda.h>
11 #include <sys/time.h>
12 
13 #include <face_quda.h>
14 #include <iostream>
15 
16 namespace quda {
17 
18 
19  // set the required parameters for the inner solver
20  static void fillInnerSolverParam(SolverParam &inner, const SolverParam &outer)
21  {
22  inner.tol = outer.tol_precondition;
23  inner.maxiter = outer.maxiter_precondition;
24  inner.delta = 1e-20; // no reliable updates within the inner solver
25  inner.precision = outer.precision_precondition; // preconditioners are uni-precision solvers
26  inner.precision_sloppy = outer.precision_precondition;
27 
28  inner.iter = 0;
29  inner.gflops = 0;
30  inner.secs = 0;
31 
32  inner.inv_type_precondition = QUDA_PCG_INVERTER; // used to tell the inner solver it is an inner solver
33 
34  if(outer.inv_type == QUDA_PCG_INVERTER && outer.precision_sloppy != outer.precision_precondition)
35  inner.preserve_source = QUDA_PRESERVE_SOURCE_NO;
36  else inner.preserve_source = QUDA_PRESERVE_SOURCE_YES;
37  }
38 
39 
41  Solver(param, profile), mat(mat), matSloppy(matSloppy), matPrecon(matPrecon), K(0), Kparam(param)
42  {
43 
44  fillInnerSolverParam(Kparam, param);
45 
47  K = new CG(matPrecon, matPrecon, Kparam, profile);
48  }else if(param.inv_type_precondition == QUDA_MR_INVERTER){
49  K = new MR(matPrecon, Kparam, profile);
50  }else if(param.inv_type_precondition == QUDA_SD_INVERTER){
51  K = new SD(matPrecon, Kparam, profile);
52  }else if(param.inv_type_precondition != QUDA_INVALID_INVERTER){ // unknown preconditioner
53  errorQuda("Unknown inner solver %d", param.inv_type_precondition);
54  }
55  }
56 
59 
60  if(K) delete K;
61 
63  }
64 
65 
67  {
68 
70  // Check to see that we're not trying to invert on a zero-field source
71  const double b2 = norm2(b);
72  if(b2 == 0){
74  printfQuda("Warning: inverting on zero-field source\n");
75  x=b;
76  param.true_res = 0.0;
77  param.true_res_hq = 0.0;
78  }
79 
80  int k=0;
81  int rUpdate=0;
82 
83  cudaColorSpinorField* minvrPre = NULL;
84  cudaColorSpinorField* rPre = NULL;
85  cudaColorSpinorField* minvr = NULL;
86  cudaColorSpinorField* minvrSloppy = NULL;
87  cudaColorSpinorField* p = NULL;
88 
89 
92  if(K) minvr = new cudaColorSpinorField(b);
94  cudaColorSpinorField y(b,csParam);
95 
96  mat(r, x, y); // => r = A*x;
97  double r2 = xmyNormCuda(b,r);
98 
100  cudaColorSpinorField tmpSloppy(x,csParam);
101  cudaColorSpinorField Ap(x,csParam);
102 
103  cudaColorSpinorField *r_sloppy;
104  if(param.precision_sloppy == x.Precision())
105  {
106  r_sloppy = &r;
107  minvrSloppy = minvr;
108  }else{
109  csParam.create = QUDA_COPY_FIELD_CREATE;
110  r_sloppy = new cudaColorSpinorField(r,csParam);
111  if(K) minvrSloppy = new cudaColorSpinorField(*minvr,csParam);
112  }
113 
114 
115  cudaColorSpinorField *x_sloppy;
116  if(param.precision_sloppy == x.Precision() ||
119  x_sloppy = &x;
120  }else{
121  csParam.create = QUDA_COPY_FIELD_CREATE;
122  x_sloppy = new cudaColorSpinorField(x,csParam);
123  }
124 
125 
126  cudaColorSpinorField &xSloppy = *x_sloppy;
127  cudaColorSpinorField &rSloppy = *r_sloppy;
128 
129  if(&x != &xSloppy){
130  copyCuda(y, x); // copy x to y
131  zeroCuda(xSloppy);
132  }else{
133  zeroCuda(y); // no reliable updates // NB: check this
134  }
135 
136  const bool use_heavy_quark_res = (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL) ? true : false;
137 
138  if(K){
139  csParam.create = QUDA_COPY_FIELD_CREATE;
141  rPre = new cudaColorSpinorField(rSloppy,csParam);
142  // Create minvrPre
143  minvrPre = new cudaColorSpinorField(*rPre);
144  globalReduce = false;
145  (*K)(*minvrPre, *rPre);
146  globalReduce = true;
147  *minvrSloppy = *minvrPre;
148  p = new cudaColorSpinorField(*minvrSloppy);
149  }else{
150  p = new cudaColorSpinorField(rSloppy);
151  }
152 
153 
155 
156 
158 
159 
160 
161  double stop = stopping(param.tol, b2, param.residual_type); // stopping condition of solver
162  double heavy_quark_res = 0.0; // heavy quark residual
163  if(use_heavy_quark_res) heavy_quark_res = sqrt(HeavyQuarkResidualNormCuda(x,r).z);
164 
165  double alpha = 0.0, beta=0.0;
166  double pAp;
167  double rMinvr = 0;
168  double rMinvr_old = 0.0;
169  double r_new_Minvr_old = 0.0;
170  double r2_old = 0;
171  r2 = norm2(r);
172 
173  double rNorm = sqrt(r2);
174  double r0Norm = rNorm;
175  double maxrx = rNorm;
176  double maxrr = rNorm;
177  double delta = param.delta;
178 
179 
180  if(K) rMinvr = reDotProductCuda(rSloppy,*minvrSloppy);
181 
184 
185 
186  quda::blas_flops = 0;
187 
188  const int maxResIncrease = param.max_res_increase; // check if we reached the limit of our tolerance
189  const int maxResIncreaseTotal = param.max_res_increase_total;
190 
191  int resIncrease = 0;
192  int resIncreaseTotal = 0;
193 
194  while(!convergence(r2, heavy_quark_res, stop, param.tol_hq) && k < param.maxiter){
195 
196  matSloppy(Ap, *p, tmpSloppy);
197 
198  double sigma;
199  pAp = reDotProductCuda(*p,Ap);
200 
201  alpha = (K) ? rMinvr/pAp : r2/pAp;
202  Complex cg_norm = axpyCGNormCuda(-alpha, Ap, rSloppy);
203  // r --> r - alpha*A*p
204  r2_old = r2;
205  r2 = real(cg_norm);
206 
207  sigma = imag(cg_norm) >= 0.0 ? imag(cg_norm) : r2; // use r2 if (r_k+1, r_k-1 - r_k) breaks
208 
209  if(K) rMinvr_old = rMinvr;
210 
211  rNorm = sqrt(r2);
212  if(rNorm > maxrx) maxrx = rNorm;
213  if(rNorm > maxrr) maxrr = rNorm;
214 
215 
216  int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0;
217  int updateR = ((rNorm < delta*maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0;
218 
219 
220  // force a reliable update if we are within target tolerance (only if doing reliable updates)
221  if( convergence(r2, heavy_quark_res, stop, param.tol_hq) && delta >= param.tol) updateX = 1;
222 
223 
224  if( !(updateR || updateX) ){
225 
226  if(K){
227  r_new_Minvr_old = reDotProductCuda(rSloppy,*minvrSloppy);
228  *rPre = rSloppy;
229  globalReduce = false;
230  (*K)(*minvrPre, *rPre);
231  globalReduce = true;
232 
233 
234  *minvrSloppy = *minvrPre;
235 
236  rMinvr = reDotProductCuda(rSloppy,*minvrSloppy);
237  beta = (rMinvr - r_new_Minvr_old)/rMinvr_old;
238  axpyZpbxCuda(alpha, *p, xSloppy, *minvrSloppy, beta);
239  }else{
240  beta = sigma/r2_old; // use the alternative beta computation
241  axpyZpbxCuda(alpha, *p, xSloppy, rSloppy, beta);
242  }
243  } else { // reliable update
244 
245  axpyCuda(alpha, *p, xSloppy); // xSloppy += alpha*p
246  copyCuda(x, xSloppy);
247  xpyCuda(x, y); // y += x
248  // Now compute r
249  mat(r, y, x); // x is just a temporary here
250  r2 = xmyNormCuda(b, r);
251  copyCuda(rSloppy, r); // copy r to rSloppy
252  zeroCuda(xSloppy);
253 
254 
255  // break-out check if we have reached the limit of the precision
256  if(sqrt(r2) > r0Norm && updateX) {
257  resIncrease++;
258  resIncreaseTotal++;
259  // reuse r0Norm for this
260  warningQuda("PCG: new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)", sqrt(r2), r0Norm, resIncreaseTotal);
261 
262 
263 
264  if (resIncrease > maxResIncrease or resIncreaseTotal > maxResIncreaseTotal)break;
265  }else{
266  resIncrease = 0;
267  }
268 
269  rNorm = sqrt(r2);
270  maxrr = rNorm;
271  maxrx = rNorm;
272  r0Norm = rNorm;
273  ++rUpdate;
274 
275  if(K){
276  *rPre = rSloppy;
277  globalReduce = false;
278  (*K)(*minvrPre, *rPre);
279  globalReduce = true;
280 
281  *minvrSloppy = *minvrPre;
282 
283  rMinvr = reDotProductCuda(rSloppy,*minvrSloppy);
284  beta = rMinvr/rMinvr_old;
285 
286  xpayCuda(*minvrSloppy, beta, *p); // p = minvrSloppy + beta*p
287  }else{ // standard CG - no preconditioning
288 
289  // explicitly restore the orthogonality of the gradient vector
290  double rp = reDotProductCuda(rSloppy, *p)/(r2);
291  axpyCuda(-rp, rSloppy, *p);
292 
293  beta = r2/r2_old;
294  xpayCuda(rSloppy, beta, *p);
295  }
296  }
297  ++k;
298  PrintStats("PCG", k, r2, b2, heavy_quark_res);
299  }
300 
301 
303 
305 
306  if(x.Precision() != param.precision_sloppy) copyCuda(x, xSloppy);
307  xpyCuda(y, x); // x += y
308 
309 
311  double gflops = (quda::blas_flops + mat.flops() + matSloppy.flops() + matPrecon.flops())*1e-9;
312  reduceDouble(gflops);
313  param.gflops = gflops;
314  param.iter += k;
315 
316  if (k==param.maxiter)
317  warningQuda("Exceeded maximum iterations %d", param.maxiter);
318 
319  if (getVerbosity() >= QUDA_VERBOSE)
320  printfQuda("CG: Reliable updates = %d\n", rUpdate);
321 
322 
323 
324 
325 
326  // compute the true residual
327  mat(r, x, y);
328  double true_res = xmyNormCuda(b, r);
329  param.true_res = sqrt(true_res / b2);
330 
331  // reset the flops counters
332  quda::blas_flops = 0;
333  mat.flops();
334  matSloppy.flops();
335  matPrecon.flops();
336 
339 
340  if(K){ // These are only needed if preconditioning is used
341  delete minvrPre;
342  delete rPre;
343  delete minvr;
344  if(x.Precision() != param.precision_sloppy) delete minvrSloppy;
345  }
346  delete p;
347 
348  if(x.Precision() != param.precision_sloppy){
349  delete x_sloppy;
350  delete r_sloppy;
351  }
352 
354  return;
355  }
356 
357 
358 } // namespace quda
bool convergence(const double &r2, const double &hq2, const double &r2_tol, const double &hq_tol)
Definition: solver.cpp:82
void setPrecision(QudaPrecision precision)
static double stopping(const double &tol, const double &b2, QudaResidualType residual_type)
Definition: solver.cpp:65
int y[4]
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
#define errorQuda(...)
Definition: util_quda.h:73
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:105
std::complex< double > Complex
Definition: eig_variables.h:13
void axpyZpbxCuda(const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, const double &b)
Definition: blas_quda.cu:338
void mat(void *out, void **fatlink, void **longlink, void *in, double kappa, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision)
TimeProfile & profile
Definition: invert_quda.h:224
QudaInverterType inv_type_precondition
Definition: invert_quda.h:24
int max_res_increase_total
Definition: invert_quda.h:54
Complex axpyCGNormCuda(const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
Definition: reduce_quda.cu:682
unsigned long long flops() const
Definition: dirac_quda.h:587
QudaGaugeParam param
Definition: pack_test.cpp:17
virtual ~PreconCG()
QudaResidualType residual_type
Definition: invert_quda.h:35
ColorSpinorParam csParam
Definition: pack_test.cpp:24
#define warningQuda(...)
Definition: util_quda.h:84
void copyCuda(cudaColorSpinorField &dst, const cudaColorSpinorField &src)
Definition: copy_quda.cu:235
void operator()(cudaColorSpinorField &out, cudaColorSpinorField &in)
void axpyCuda(const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
Definition: blas_quda.cu:115
int x[4]
QudaPrecision precision_precondition
Definition: invert_quda.h:87
unsigned long long blas_flops
Definition: blas_quda.cu:37
SolverParam & param
Definition: invert_quda.h:223
void xpyCuda(cudaColorSpinorField &x, cudaColorSpinorField &y)
Definition: blas_quda.cu:98
double reDotProductCuda(cudaColorSpinorField &a, cudaColorSpinorField &b)
Definition: reduce_quda.cu:170
void Stop(QudaProfileType idx)
QudaPrecision Precision() const
void PrintStats(const char *, int k, const double &r2, const double &b2, const double &hq2)
Definition: solver.cpp:122
double Last(QudaProfileType idx)
void reduceDouble(double &)
#define printfQuda(...)
Definition: util_quda.h:67
void zeroCuda(cudaColorSpinorField &a)
Definition: blas_quda.cu:40
void Start(QudaProfileType idx)
QudaPrecision precision_sloppy
Definition: invert_quda.h:84
bool use_sloppy_partial_accumulator
Definition: invert_quda.h:44
PreconCG(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam &param, TimeProfile &profile)
void xpayCuda(cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y)
Definition: blas_quda.cu:138
double3 HeavyQuarkResidualNormCuda(cudaColorSpinorField &x, cudaColorSpinorField &r)
Definition: reduce_quda.cu:777
double norm2(const ColorSpinorField &)
double xmyNormCuda(cudaColorSpinorField &a, cudaColorSpinorField &b)
Definition: reduce_quda.cu:343
bool globalReduce
Definition: face_buffer.cpp:11