20 static void fillInnerSolverParam(SolverParam &inner,
const SolverParam &outer)
22 inner.tol = outer.tol_precondition;
23 inner.maxiter = outer.maxiter_precondition;
25 inner.precision = outer.precision_precondition;
26 inner.precision_sloppy = outer.precision_precondition;
34 if(outer.inv_type ==
QUDA_PCG_INVERTER && outer.precision_sloppy != outer.precision_precondition)
41 Solver(param, profile), mat(mat), matSloppy(matSloppy), matPrecon(matPrecon), K(0), Kparam(param)
44 fillInnerSolverParam(Kparam, param);
47 K =
new CG(matPrecon, matPrecon, Kparam, profile);
49 K =
new MR(matPrecon, Kparam, profile);
51 K =
new SD(matPrecon, Kparam, profile);
71 const double b2 =
norm2(b);
74 printfQuda(
"Warning: inverting on zero-field source\n");
145 (*K)(*minvrPre, *rPre);
147 *minvrSloppy = *minvrPre;
162 double heavy_quark_res = 0.0;
165 double alpha = 0.0, beta=0.0;
168 double rMinvr_old = 0.0;
169 double r_new_Minvr_old = 0.0;
173 double rNorm =
sqrt(r2);
174 double r0Norm = rNorm;
175 double maxrx = rNorm;
176 double maxrr = rNorm;
192 int resIncreaseTotal = 0;
196 matSloppy(Ap, *p, tmpSloppy);
201 alpha = (K) ? rMinvr/pAp : r2/pAp;
207 sigma = imag(cg_norm) >= 0.0 ? imag(cg_norm) : r2;
209 if(K) rMinvr_old = rMinvr;
212 if(rNorm > maxrx) maxrx = rNorm;
213 if(rNorm > maxrr) maxrr = rNorm;
216 int updateX = (rNorm < delta*r0Norm && r0Norm <= maxrx) ? 1 : 0;
217 int updateR = ((rNorm < delta*maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0;
224 if( !(updateR || updateX) ){
230 (*K)(*minvrPre, *rPre);
234 *minvrSloppy = *minvrPre;
237 beta = (rMinvr - r_new_Minvr_old)/rMinvr_old;
256 if(
sqrt(r2) > r0Norm && updateX) {
260 warningQuda(
"PCG: new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)",
sqrt(r2), r0Norm, resIncreaseTotal);
264 if (resIncrease > maxResIncrease or resIncreaseTotal > maxResIncreaseTotal)
break;
278 (*K)(*minvrPre, *rPre);
281 *minvrSloppy = *minvrPre;
284 beta = rMinvr/rMinvr_old;
298 PrintStats(
"PCG", k, r2, b2, heavy_quark_res);
320 printfQuda(
"CG: Reliable updates = %d\n", rUpdate);
bool convergence(const double &r2, const double &hq2, const double &r2_tol, const double &hq_tol)
void setPrecision(QudaPrecision precision)
static double stopping(const double &tol, const double &b2, QudaResidualType residual_type)
QudaVerbosity getVerbosity()
__host__ __device__ ValueType sqrt(ValueType x)
std::complex< double > Complex
void axpyZpbxCuda(const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, const double &b)
void mat(void *out, void **fatlink, void **longlink, void *in, double kappa, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision)
QudaInverterType inv_type_precondition
int max_res_increase_total
Complex axpyCGNormCuda(const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
unsigned long long flops() const
QudaResidualType residual_type
void copyCuda(cudaColorSpinorField &dst, const cudaColorSpinorField &src)
void operator()(cudaColorSpinorField &out, cudaColorSpinorField &in)
void axpyCuda(const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
QudaPrecision precision_precondition
unsigned long long blas_flops
void xpyCuda(cudaColorSpinorField &x, cudaColorSpinorField &y)
double reDotProductCuda(cudaColorSpinorField &a, cudaColorSpinorField &b)
void Stop(QudaProfileType idx)
QudaPrecision Precision() const
void PrintStats(const char *, int k, const double &r2, const double &b2, const double &hq2)
double Last(QudaProfileType idx)
void reduceDouble(double &)
void zeroCuda(cudaColorSpinorField &a)
void Start(QudaProfileType idx)
QudaPrecision precision_sloppy
bool use_sloppy_partial_accumulator
PreconCG(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam ¶m, TimeProfile &profile)
void xpayCuda(cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y)
double3 HeavyQuarkResidualNormCuda(cudaColorSpinorField &x, cudaColorSpinorField &r)
double norm2(const ColorSpinorField &)
double xmyNormCuda(cudaColorSpinorField &a, cudaColorSpinorField &b)