23 Solver(param, profile), mat(mat), matSloppy(matSloppy), yp(nullptr), rp(nullptr),
24 rnewp(nullptr), pp(nullptr), App(nullptr), tmpp(nullptr), tmp2p(nullptr), tmp3p(nullptr),
25 rSloppyp(nullptr), xSloppyp(nullptr),
init(false)
33 for (
auto pi :
p)
if (pi)
delete pi;
52 if (veci)
delete veci;
61 CG(mmdag, mmdagSloppy, param, profile), mmdag(mat.Expose()), mmdagSloppy(matSloppy.Expose()),
62 xp(nullptr),
yp(nullptr),
init(false) {
98 if (b2 == 0.0) b2 = r2;
143 CG(mdagm, mdagmSloppy, param, profile), mdagm(mat.Expose()), mdagmSloppy(matSloppy.Expose()),
144 bp(nullptr),
init(false) {
219 if (Np < 0 || Np > 16)
errorQuda(
"Invalid value %d for solution_accumulator_pipeline\n", Np);
232 printfQuda(
"Warning: inverting on zero-field source\n");
290 if (Np != (
int)
p.size()) {
291 for (
auto &pi :
p)
delete pi;
302 const double deps=
sqrt(u);
303 constexpr
double dfac = 1.1;
315 if(alternative_reliable){
327 if (b2 == 0) b2 = r2;
337 std::vector<ColorSpinorField *> rhs;
363 if (Np != (
int)
p.size()) {
364 for (
auto &pi :
p)
delete pi;
371 for (
auto &p_i :
p) *p_i = p_init ? *p_init : rSloppy;
375 if (r2_old_init != 0.0 and p_init) {
376 r2_old = r2_old_init;
383 const bool use_heavy_quark_res =
385 bool heavy_quark_restart =
false;
392 double heavy_quark_res = 0.0;
393 double heavy_quark_res_old = 0.0;
395 if (use_heavy_quark_res) {
397 heavy_quark_res_old = heavy_quark_res;
405 double rNorm =
sqrt(r2);
406 double r0Norm = rNorm;
407 double maxrx = rNorm;
408 double maxrr = rNorm;
419 const int hqmaxresRestartTotal
423 int resIncreaseTotal = 0;
424 int hqresIncrease = 0;
425 int hqresRestartTotal = 0;
429 bool L2breakdown =
false;
430 const double L2breakdown_eps = 100. * uhigh;
441 int steps_since_reliable = 1;
445 if(alternative_reliable){
446 dinit = uhigh * (rNorm + Anorm * xNorm);
454 bool breakdown =
false;
458 if(alternative_reliable){
460 r2 = quadruple.x; Ap2 = quadruple.y; pAp = quadruple.z; ppnorm= quadruple.w;
464 r2 = triplet.x; Ap2 = triplet.y; pAp = triplet.z;
468 sigma = alpha[j]*(alpha[j] * Ap2 - pAp);
469 if (sigma < 0.0 || steps_since_reliable == 0) {
480 if (alternative_reliable) {
493 sigma = imag(cg_norm) >= 0.0 ? imag(cg_norm) : r2;
501 if (alternative_reliable) {
503 updateX = ( (d <= deps*sqrt(r2_old)) or (dfac * dinit > deps * r0Norm) ) and (d_new > deps*rNorm) and (d_new > dfac * dinit);
506 if (rNorm > maxrx) maxrx = rNorm;
507 if (rNorm > maxrr) maxrr = rNorm;
508 updateX = (rNorm < delta * r0Norm && r0Norm <= maxrx) ? 1 : 0;
509 updateR = ((rNorm < delta * maxrr && r0Norm <= maxrr) || updateX) ? 1 : 0;
520 if ( !(updateR || updateX )) {
521 beta = sigma / r2_old;
529 errorQuda(
"Not implemented pipelined CG with Np > 1");
537 if ( (j+1)%Np == 0 ) {
538 const auto alpha_ = std::unique_ptr<Complex[]>(
new Complex[Np]);
539 for (
int i=0; i<Np; i++) alpha_[i] = alpha[i];
540 std::vector<ColorSpinorField*> x_;
541 x_.push_back(&xSloppy);
551 if (use_heavy_quark_res && k%heavy_quark_check==0) {
552 if (&x != &xSloppy) {
562 if (alternative_reliable) {
564 pnorm = pnorm + alpha[j] * alpha[j]* (ppnorm);
566 d_new = d + u*rNorm + uhigh*Anorm * xnorm;
570 steps_since_reliable++;
575 const auto alpha_ = std::unique_ptr<Complex[]>(
new Complex[Np]);
576 for (
int i=0; i<=j; i++) alpha_[i] = alpha[i];
577 std::vector<ColorSpinorField*> x_;
578 x_.push_back(&xSloppy);
579 std::vector<ColorSpinorField*> p_;
580 for (
int i=0; i<=j; i++) p_.push_back(
p[i]);
595 if (alternative_reliable) {
612 if (
sqrt(r2) > r0Norm && updateX and not L2breakdown) {
616 "CG: new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)",
617 sqrt(r2), r0Norm, resIncreaseTotal);
619 if ((use_heavy_quark_res and
sqrt(r2) < L2breakdown_eps) or resIncrease > maxResIncrease
620 or resIncreaseTotal > maxResIncreaseTotal or r2 < stop) {
621 if (use_heavy_quark_res) {
625 if (resIncrease > maxResIncrease or resIncreaseTotal > maxResIncreaseTotal or r2 < stop) {
626 warningQuda(
"CG: solver exiting due to too many true residual norm increases");
636 if (use_heavy_quark_res and L2breakdown) {
639 warningQuda(
"CG: Restarting without reliable updates for heavy-quark residual (total #inc %i)",
641 heavy_quark_restart =
true;
643 if (heavy_quark_res > heavy_quark_res_old) {
645 warningQuda(
"CG: new reliable HQ residual norm %e is greater than previous reliable residual norm %e",
646 heavy_quark_res, heavy_quark_res_old);
648 if (hqresIncrease > hqmaxresIncrease) {
649 warningQuda(
"CG: solver exiting due to too many heavy quark residual norm increases (%i/%i)",
650 hqresIncrease, hqmaxresIncrease);
657 if (hqresRestartTotal > hqmaxresRestartTotal) {
658 warningQuda(
"CG: solver exiting due to too many heavy quark residual restarts (%i/%i)", hqresRestartTotal,
659 hqmaxresRestartTotal);
664 if (use_heavy_quark_res and heavy_quark_restart) {
667 heavy_quark_restart =
false;
677 steps_since_reliable = 0;
681 heavy_quark_res_old = heavy_quark_res;
692 if (use_heavy_quark_res) {
697 converged = L2done and HQdone;
701 if (converged && steps_since_reliable > 0 && (j+1)%Np != 0 ) {
702 const auto alpha_ = std::unique_ptr<Complex[]>(
new Complex[Np]);
703 for (
int i=0; i<=j; i++) alpha_[i] = alpha[i];
704 std::vector<ColorSpinorField*> x_;
705 x_.push_back(&xSloppy);
706 std::vector<ColorSpinorField*> p_;
707 for (
int i=0; i<=j; i++) p_.push_back(
p[i]);
712 j = steps_since_reliable == 0 ? 0 : (j+1)%Np;
730 printfQuda(
"CG: Reliable updates = %d\n", rUpdate);
756 errorQuda(
"QUDA_BLOCKSOLVER not built.");
764 using Eigen::MatrixXcd;
775 errorQuda(
"Warning: inverting on zero-field source - undefined for block solver\n");
844 r2avg += r2(i,i).real();
858 if (&x != &xSloppy) {
865 const bool use_heavy_quark_res =
867 if(use_heavy_quark_res)
errorQuda(
"ERROR: heavy quark residual not supported in block solver");
918 bool allconverged =
true;
922 allconverged = allconverged && converged[i];
926 MatrixXcd L = r2.llt().matrixL();
928 MatrixXcd Linv = C.inverse();
931 std::cout <<
"r2\n " << r2 << std::endl;
932 std::cout <<
"L\n " << L.adjoint() << std::endl;
956 std::cout <<
" pTp " << std::endl << pTp << std::endl;
957 std::cout <<
" L " << std::endl << L.adjoint() << std::endl;
958 std::cout <<
" C " << std::endl << C << std::endl;
971 if (i!=j) pAp(j,i) =
std::conj(pAp(i,j));
976 alpha = pAp.inverse() * C;
986 beta = pAp.inverse();
1007 L = r2.llt().matrixL();
1025 std::cout <<
" rTr " << std::endl << pTp << std::endl;
1026 std::cout <<
"QR" << S<< std::endl <<
"QP " << S.inverse()*S << std::endl;;
1055 std::cout <<
" pTp " << std::endl << pTp << std::endl;
1056 std::cout <<
"S " << S<< std::endl <<
"C " << C << std::endl;
1062 r2(j,j) = C(0,j)*
conj(C(0,j));
1064 r2(j,j) += C(i,j) *
conj(C(i,j));
1065 r2avg += r2(j,j).real();
1071 allconverged =
true;
1074 allconverged = allconverged && converged[i];
1106 PrintSummary(
"CG", k, r2(i,i).real(), b2[i], stop[i], 0.0);
1128 #define BLOCKCG_GS 1 1131 errorQuda(
"QUDA_BLOCKSOLVER not built.");
1138 const bool use_block =
true;
1144 using Eigen::MatrixXcd;
1159 errorQuda(
"Warning: inverting on zero-field source\n");
1175 std::cout <<
"b2m\n" << b2m << std::endl;
1240 printfQuda(
"r2[%i] %e\n", i, r2(i,i).real());
1256 if (&x != &xSloppy) {
1263 const bool use_heavy_quark_res =
1265 bool heavy_quark_restart =
false;
1277 if (use_heavy_quark_res) {
1279 heavy_quark_res_old[i] = heavy_quark_res[i];
1299 rNorm[i] =
sqrt(r2(i,i).real());
1300 r0Norm[i] = rNorm[i];
1301 maxrx[i] = rNorm[i];
1302 maxrr[i] = rNorm[i];
1314 const int hqmaxresIncrease = maxResIncrease + 1;
1316 int resIncrease = 0;
1317 int resIncreaseTotal = 0;
1318 int hqresIncrease = 0;
1322 bool L2breakdown =
false;
1331 r2avg+=r2(i,i).real();
1333 PrintStats(
"CG", k, r2avg, b2avg, heavy_quark_res[0]);
1334 int steps_since_reliable = 1;
1335 bool allconverged =
true;
1339 allconverged = allconverged && converged[i];
1371 std::cout <<
" pTp " << std::endl << pTp << std::endl;
1372 std::cout <<
"QR" << gamma<< std::endl <<
"QP " << gamma.inverse()*gamma << std::endl;;
1380 bool breakdown =
false;
1392 if(use_block or i==j)
1399 alpha = pAp.inverse() * gamma.adjoint().inverse() * r2;
1401 std::cout <<
"alpha\n" << alpha << std::endl;
1404 std::cout <<
"pAp " << std::endl <<pAp << std::endl;
1405 std::cout <<
"pAp^-1 " << std::endl <<pAp.inverse() << std::endl;
1406 std::cout <<
"r2 " << std::endl <<r2 << std::endl;
1407 std::cout <<
"alpha " << std::endl <<alpha << std::endl;
1408 std::cout <<
"pAp^-1r2" << std::endl << pAp.inverse()*r2 << std::endl;
1421 if(use_block or i==j)
1439 rNorm[i] =
sqrt(r2(i,i).real());
1440 if (rNorm[i] > maxrx[i]) maxrx[i] = rNorm[i];
1441 if (rNorm[i] > maxrr[i]) maxrr[i] = rNorm[i];
1442 updateX = (rNorm[i] < delta * r0Norm[i] && r0Norm[i] <= maxrx[i]) ?
true :
false;
1443 updateR = ((rNorm[i] < delta * maxrr[i] && r0Norm[i] <= maxrr[i]) || updateX) ? true :
false;
1445 if ( (updateR || updateX )) {
1452 if ( !(updateR || updateX )) {
1454 beta = gamma * r2_old.inverse() * sigma;
1456 std::cout <<
"beta\n" << beta << std::endl;
1483 double rcoeff= (j==0?1.0:0.0);
1522 std::cout <<
" pTp " << std::endl << pTp << std::endl;
1523 std::cout <<
"QR" << gamma<< std::endl <<
"QP " << gamma.inverse()*gamma << std::endl;;
1528 if (use_heavy_quark_res && (k % heavy_quark_check) == 0) {
1529 if (&x != &xSloppy) {
1542 steps_since_reliable++;
1566 if (use_heavy_quark_res){
1575 if (
sqrt(r2(i,i).real()) > r0Norm[i] && updateX) {
1578 warningQuda(
"CG: new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)",
1579 sqrt(r2(i,i).real()), r0Norm[i], resIncreaseTotal);
1580 if ( resIncrease > maxResIncrease or resIncreaseTotal > maxResIncreaseTotal) {
1581 if (use_heavy_quark_res) {
1584 warningQuda(
"CG: solver exiting due to too many true residual norm increases");
1594 if (use_heavy_quark_res and L2breakdown) {
1596 warningQuda(
"CG: Restarting without reliable updates for heavy-quark residual");
1597 heavy_quark_restart =
true;
1598 if (heavy_quark_res[i] > heavy_quark_res_old[i]) {
1600 warningQuda(
"CG: new reliable HQ residual norm %e is greater than previous reliable residual norm %e", heavy_quark_res[i], heavy_quark_res_old[i]);
1602 if (hqresIncrease > hqmaxresIncrease) {
1603 warningQuda(
"CG: solver exiting due to too many heavy quark residual norm increases");
1611 rNorm[i] =
sqrt(r2(i,i).real());
1612 maxrr[i] = rNorm[i];
1613 maxrx[i] = rNorm[i];
1614 r0Norm[i] = rNorm[i];
1615 heavy_quark_res_old[i] = heavy_quark_res[i];
1619 if (use_heavy_quark_res and heavy_quark_restart) {
1622 heavy_quark_restart =
false;
1629 beta(i,i) = r2(i,i) / r2_old(i,i);
1634 steps_since_reliable = 0;
1640 allconverged =
true;
1643 r2avg+= r2(i,i).real();
1646 allconverged = allconverged && converged[i];
1648 PrintStats(
"CG", k, r2avg, b2avg, heavy_quark_res[0]);
1651 if (use_heavy_quark_res) {
1657 converged[i] = L2done and HQdone;
1680 printfQuda(
"CG: Reliable updates = %d\n", rUpdate);
1690 PrintSummary(
"CG", k, r2(i,i).real(), b2[i], stop[i], 0.0);
cudaColorSpinorField * tmp2
void blocksolve(ColorSpinorField &out, ColorSpinorField &in)
void ax(double a, ColorSpinorField &x)
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)
bool convergenceHQ(double r2, double hq2, double r2_tol, double hq_tol)
Test for HQ solver convergence – ignore L2 residual.
void axpyZpbx(double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, double b)
#define QUDA_MAX_MULTI_SHIFT
Maximum number of shifts supported by the multi-shift solver. This number may be changed if need be...
QudaVerbosity getVerbosity()
Matrix< N, std::complex< T > > conj(const Matrix< N, std::complex< T > > &mat)
#define checkPrecision(...)
double norm2(const ColorSpinorField &a)
__host__ __device__ ValueType sqrt(ValueType x)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
const DiracMatrix & matSloppy
void deflate(std::vector< ColorSpinorField *> vec_defl, std::vector< ColorSpinorField *> vec, std::vector< ColorSpinorField *> evecs, std::vector< Complex > evals)
Deflate vector with Eigenvectors.
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE) ...
cudaColorSpinorField * tmp
double3 xpyHeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &r)
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
double reDotProduct(ColorSpinorField &x, ColorSpinorField &y)
Complex axpyCGNorm(double a, ColorSpinorField &x, ColorSpinorField &y)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
ColorSpinorField & Component(const int idx) const
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
QudaPreserveSource preserve_source
int max_res_increase_total
std::vector< ColorSpinorField * > defl_tmp1
bool alternative_reliable
This is just a dummy structure we use for trove to define the required structure size.
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
size_t RealLength() const
CGNE(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam ¶m, TimeProfile &profile)
bool convergenceL2(double r2, double hq2, double r2_tol, double hq_tol)
Test for L2 solver convergence – ignore HQ residual.
std::vector< ColorSpinorField * > defl_tmp2
QudaComputeNullVector compute_null_vector
double Last(QudaProfileType idx)
QudaResidualType residual_type
CG(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam ¶m, TimeProfile &profile)
double true_res_hq_offset[QUDA_MAX_MULTI_SHIFT]
int max_hq_res_restart_total
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
double4 quadrupleCGReduction(ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
#define checkLocation(...)
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
void constructDeflationSpace(const ColorSpinorField &meta, const DiracMatrix &mat, bool svd)
Constructs the deflation space.
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
double true_res_offset[QUDA_MAX_MULTI_SHIFT]
CGNR(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam ¶m, TimeProfile &profile)
std::complex< double > Complex
void tripleCGUpdate(double alpha, double beta, ColorSpinorField &q, ColorSpinorField &r, ColorSpinorField &x, ColorSpinorField &p)
std::vector< Complex > evals
void init()
Create the CUBLAS context.
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void zero(ColorSpinorField &a)
std::vector< ColorSpinorField * > p
void Mdag(ColorSpinorField &out, const ColorSpinorField &in) const
std::vector< ColorSpinorField * > evecs
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const =0
ColorSpinorField * rSloppyp
void operator()(ColorSpinorField &out, ColorSpinorField &in)
Run CG.
Conjugate-Gradient Solver.
unsigned long long flops() const
ColorSpinorField * xSloppyp
void operator()(ColorSpinorField &out, ColorSpinorField &in)
Run CG.
void xpy(ColorSpinorField &x, ColorSpinorField &y)
void axpby(double a, ColorSpinorField &x, double b, ColorSpinorField &y)
double axpyNorm(double a, ColorSpinorField &x, ColorSpinorField &y)
bool use_alternative_reliable
QudaUseInitGuess use_init_guess
void operator()(ColorSpinorField &out, ColorSpinorField &in)
Run CG.
QudaPrecision precision_sloppy
bool use_sloppy_partial_accumulator
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE). Assumes SolverParam.true_res and SolverParam.true_res_hq has been set.
__host__ __device__ ValueType conj(ValueType x)
int solution_accumulator_pipeline
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
QudaPrecision Precision() const
void xpayz(ColorSpinorField &x, double a, ColorSpinorField &y, ColorSpinorField &z)
const Dirac * Expose() const
double3 tripleCGReduction(ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
void updateR()
update the radius for halos.