19 static void fillInnerSolverParam(SolverParam &inner,
const SolverParam &outer)
21 inner.tol = outer.tol_precondition;
26 = outer.inv_type_precondition ==
QUDA_CG_INVERTER ? outer.precision_sloppy : outer.precision_precondition;
27 inner.precision_sloppy = outer.precision_precondition;
38 inner.is_preconditioner =
true;
39 inner.pipeline =
true;
41 inner.schwarz_type = outer.schwarz_type;
44 inner.maxiter = outer.maxiter_precondition;
46 inner.Nkrylov = inner.maxiter / outer.precondition_cycle;
48 inner.Nsteps = outer.precondition_cycle;
51 if (outer.inv_type ==
QUDA_PCG_INVERTER && outer.precision_sloppy != outer.precision_precondition)
56 inner.verbosity_precondition = outer.verbosity_precondition;
58 inner.compute_true_res =
false;
59 inner.sloppy_converge =
true;
68 fillInnerSolverParam(Kparam,
param);
103 printfQuda(
"Warning: inverting on zero-field source\n");
153 tmp3p = tmp2p = tmpp;
165 if (b2 == 0) b2 = r2;
216 (*K)(*minvrPre, *rPre);
217 *minvrSloppy = *minvrPre;
227 double heavy_quark_res = 0.0;
230 double alpha = 0.0, beta = 0.0;
233 double rMinvr_old = 0.0;
234 double r_new_Minvr_old = 0.0;
238 double rNorm =
sqrt(r2);
239 double r0Norm = rNorm;
240 double maxrx = rNorm;
241 double maxrr = rNorm;
242 double maxr_deflate = rNorm;
252 PrintStats(
"PCG", k, r2, b2, heavy_quark_res);
258 int resIncreaseTotal = 0;
267 alpha = (K) ? rMinvr / pAp : r2 / pAp;
273 sigma = imag(cg_norm) >= 0.0 ? imag(cg_norm) : r2;
275 if (K) rMinvr_old = rMinvr;
278 if (rNorm > maxrx) maxrx = rNorm;
279 if (rNorm > maxrr) maxrr = rNorm;
281 int updateX = (rNorm < delta * r0Norm && r0Norm <= maxrx) ? 1 : 0;
282 int updateR = ((rNorm < delta * maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0;
294 (*K)(*minvrPre, *rPre);
297 *minvrSloppy = *minvrPre;
300 beta = (rMinvr - r_new_Minvr_old) / rMinvr_old;
301 axpyZpbx(alpha, *p, xSloppy, *minvrSloppy, beta);
303 beta = sigma / r2_old;
304 axpyZpbx(alpha, *p, xSloppy, rSloppy, beta);
308 axpy(alpha, *p, xSloppy);
322 maxr_deflate =
sqrt(r2);
329 if (
sqrt(r2) > r0Norm && updateX) {
334 "PCG: new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)",
335 sqrt(r2), r0Norm, resIncreaseTotal);
337 if (resIncrease > maxResIncrease or resIncreaseTotal > maxResIncreaseTotal)
break;
351 (*K)(*minvrPre, *rPre);
352 *minvrSloppy = *minvrPre;
355 beta = rMinvr / rMinvr_old;
357 xpay(*minvrSloppy, beta, *p);
362 axpy(-rp, rSloppy, *p);
365 xpay(rSloppy, beta, *p);
369 PrintStats(
"PCG", k, r2, b2, heavy_quark_res);
390 double true_res =
xmyNorm(b, r);
403 if (tmpp)
delete tmpp;
405 if (tmp2p && tmpp != tmp2p)
delete tmp2p;
Conjugate-Gradient Solver.
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
bool isStaggered() const
return if the operator is a staggered operator
unsigned long long flops() const
void deflate(std::vector< ColorSpinorField * > &sol, const std::vector< ColorSpinorField * > &src, const std::vector< ColorSpinorField * > &evecs, const std::vector< Complex > &evals, bool accumulate=false) const
Deflate a set of source vectors with a given eigenspace.
void computeEvals(const DiracMatrix &mat, std::vector< ColorSpinorField * > &evecs, std::vector< Complex > &evals, int size)
Compute eigenvalues and their residiua.
QudaPrecision Precision() const
void operator()(ColorSpinorField &out, ColorSpinorField &in)
PreconCG(const DiracMatrix &mat, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon, const DiracMatrix &matEig, SolverParam ¶m, TimeProfile &profile)
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
std::vector< ColorSpinorField * > evecs
void destroyDeflationSpace()
Destroy the allocated deflation space.
const DiracMatrix & matEig
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
std::vector< Complex > evals
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE)
void constructDeflationSpace(const ColorSpinorField &meta, const DiracMatrix &mat)
Constructs the deflation space and eigensolver.
const DiracMatrix & matPrecon
const DiracMatrix & matSloppy
double Last(QudaProfileType idx)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
cudaColorSpinorField * tmp
@ QUDA_USE_INIT_GUESS_YES
@ QUDA_HEAVY_QUARK_RESIDUAL
@ QUDA_L2_RELATIVE_RESIDUAL
@ QUDA_PRESERVE_SOURCE_NO
@ QUDA_PRESERVE_SOURCE_YES
@ QUDA_REFERENCE_FIELD_CREATE
@ QUDA_COMPUTE_NULL_VECTOR_NO
Complex axpyCGNorm(double a, ColorSpinorField &x, ColorSpinorField &y)
void axpyZpbx(double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, double b)
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
void zero(ColorSpinorField &a)
double norm2(const ColorSpinorField &a)
double reDotProduct(ColorSpinorField &x, ColorSpinorField &y)
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
void xpy(ColorSpinorField &x, ColorSpinorField &y)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
void stop()
Stop profiling.
double norm2(const CloverField &a, bool inverse=false)
__device__ __host__ void zero(double &a)
std::complex< double > Complex
__host__ __device__ ValueType sqrt(ValueType x)
__host__ __device__ std::enable_if<!isFixed< T1 >::value &&!isFixed< T2 >::value, void >::type copy(T1 &a, const T2 &b)
Copy function which is trival between floating point types. When converting to an integer type,...
void updateR()
update the radius for halos.
QudaComputeNullVector compute_null_vector
bool use_sloppy_partial_accumulator
int max_res_increase_total
QudaResidualType residual_type
QudaPrecision precision_sloppy
QudaUseInitGuess use_init_guess
QudaInverterType inv_type_precondition
QudaVerbosity getVerbosity()