37 if (Q_AQandg)
delete []Q_AQandg;
38 if (Q_AS)
delete []Q_AS;
39 if (alpha)
delete []alpha;
40 if (beta)
delete []beta;
42 bool use_source =
false;
47 for (
int i=0; i<
param.
Nkrylov+1; i++)
if (i>0 || !use_source)
delete S[i];
49 for (
int i=0; i<
param.
Nkrylov; i++)
if (i>0 || !use_source)
delete S[i];
56 if (tmp_sloppy)
delete tmp_sloppy;
57 if (tmp_sloppy2)
delete tmp_sloppy2;
58 if (tmpp)
delete tmpp;
59 if (tmpp2)
delete tmpp2;
70 CACG(mmdag, mmdagSloppy, mmdagPrecon, mmdagEig,
param, profile),
72 mmdagSloppy(matSloppy.Expose()),
73 mmdagPrecon(matPrecon.Expose()),
74 mmdagEig(matEig.Expose()),
114 if (b2 == 0.0) b2 = r2;
160 CACG(mdagm, mdagmSloppy, mdagmPrecon, mdagmEig,
param, profile),
162 mdagmSloppy(matSloppy.Expose()),
163 mdagmPrecon(matPrecon.Expose()),
164 mdagmEig(matEig.Expose()),
244 bool use_source =
false;
268 if (i>0) AS[i-1] = S[i];
305 for (
int i=0; i<N; i++) {
306 vecg(i) = Q_AQandg[i*(N+1)+N];
307 for (
int j=0; j<N; j++) {
308 matQ_AQ(i,j) = Q_AQandg[i*(N+1)+j];
311 Map<vector> vecalpha(alpha,N);
313 vecalpha = matQ_AQ.fullPivLu().solve(vecg);
319 void CACG::compute_alpha()
327 const int N = Q.size();
330 case 1: compute_alpha_N<1>(Q_AQandg, alpha);
break;
331 case 2: compute_alpha_N<2>(Q_AQandg, alpha);
break;
332 case 3: compute_alpha_N<3>(Q_AQandg, alpha);
break;
333 case 4: compute_alpha_N<4>(Q_AQandg, alpha);
break;
334 case 5: compute_alpha_N<5>(Q_AQandg, alpha);
break;
335 case 6: compute_alpha_N<6>(Q_AQandg, alpha);
break;
336 case 7: compute_alpha_N<7>(Q_AQandg, alpha);
break;
337 case 8: compute_alpha_N<8>(Q_AQandg, alpha);
break;
338 case 9: compute_alpha_N<9>(Q_AQandg, alpha);
break;
339 case 10: compute_alpha_N<10>(Q_AQandg, alpha);
break;
340 case 11: compute_alpha_N<11>(Q_AQandg, alpha);
break;
341 case 12: compute_alpha_N<12>(Q_AQandg, alpha);
break;
347 const int N = Q.size();
348 matrix matQ_AQ(N, N);
350 for (
int i = 0; i < N; i++) {
351 vecg(i) = Q_AQandg[i * (N + 1) + N];
352 for (
int j = 0; j < N; j++) { matQ_AQ(i, j) = Q_AQandg[i * (N + 1) + j]; }
354 Map<vector> vecalpha(alpha, N);
356 vecalpha = matQ_AQ.fullPivLu().solve(vecg);
377 for (
int i=0; i<N; i++) {
378 for (
int j=0; j<N; j++) {
379 matQ_AQ(i,j) = Q_AQandg[i*(N+1)+j];
382 Map<matrix> matQ_AS(Q_AS,N,N), matbeta(beta,N,N);
385 matbeta = matQ_AQ.fullPivLu().solve(matQ_AS);
391 void CACG::compute_beta()
399 const int N = Q.size();
402 case 1: compute_beta_N<1>(Q_AQandg, Q_AS, beta);
break;
403 case 2: compute_beta_N<2>(Q_AQandg, Q_AS, beta);
break;
404 case 3: compute_beta_N<3>(Q_AQandg, Q_AS, beta);
break;
405 case 4: compute_beta_N<4>(Q_AQandg, Q_AS, beta);
break;
406 case 5: compute_beta_N<5>(Q_AQandg, Q_AS, beta);
break;
407 case 6: compute_beta_N<6>(Q_AQandg, Q_AS, beta);
break;
408 case 7: compute_beta_N<7>(Q_AQandg, Q_AS, beta);
break;
409 case 8: compute_beta_N<8>(Q_AQandg, Q_AS, beta);
break;
410 case 9: compute_beta_N<9>(Q_AQandg, Q_AS, beta);
break;
411 case 10: compute_beta_N<10>(Q_AQandg, Q_AS, beta);
break;
412 case 11: compute_beta_N<11>(Q_AQandg, Q_AS, beta);
break;
413 case 12: compute_beta_N<12>(Q_AQandg, Q_AS, beta);
break;
418 const int N = Q.size();
419 matrix matQ_AQ(N, N);
420 for (
int i = 0; i < N; i++) {
421 for (
int j = 0; j < N; j++) { matQ_AQ(i, j) = Q_AQandg[i * (N + 1) + j]; }
423 Map<matrix> matQ_AS(Q_AS, N, N), matbeta(beta, N, N);
426 matbeta = matQ_AQ.fullPivLu().solve(matQ_AS);
440 int CACG::reliable(
double &rNorm,
double &maxrr,
int &rUpdate,
const double &r2,
const double &delta) {
443 if (rNorm > maxrr) maxrr = rNorm;
445 int updateR = (rNorm < delta*maxrr) ? 1 : 0;
488 double b2 = !fixed_iteration ?
blas::norm2(b) : 1.0;
511 if (!fixed_iteration) {
528 if (!fixed_iteration) {
545 for (
int i = 0; i < 10; i++) {
548 for (
int j = 0; j < 10; j++) {
549 matSloppy(*AQ[0], *Q[0], tmpSloppy, tmpSloppy2);
559 matSloppy(*AQ[0], *Q[0], tmpSloppy, tmpSloppy2);
590 double heavy_quark_res = 0.0;
594 int resIncreaseTotal = 0;
603 bool l2_converge =
false;
608 double rNorm =
sqrt(r2);
609 double maxrr = rNorm;
610 double maxr_deflate = rNorm;
613 double m_map = 2./(lambda_max-lambda_min);
614 double b_map = -(lambda_max+lambda_min)/(lambda_max-lambda_min);
618 PrintStats(
"CA-CG", total_iter, r2, b2, heavy_quark_res);
623 for (
int k = 0; k < n_krylov; k++) {
matSloppy(*AS[k], *S[k], tmpSloppy, tmpSloppy2); }
626 matSloppy(*AS[0], *S[0], tmpSloppy, tmpSloppy2);
630 Complex facs1[] = { m_map, b_map };
631 std::vector<ColorSpinorField*> recur1{AS[0],S[0]};
632 std::vector<ColorSpinorField*> S1{S[1]};
635 matSloppy(*AS[1], *S[1], tmpSloppy, tmpSloppy2);
640 Complex factors[] = { 2.*m_map, 2.*b_map, -1 };
641 for (
int k = 2; k < n_krylov; k++) {
642 std::vector<ColorSpinorField*> recur2{AS[k-1],S[k-1],S[k-2]};
643 std::vector<ColorSpinorField*> Sk{S[k]};
646 matSloppy(*AS[k], *S[k], tmpSloppy, tmpSloppy2);
653 if (total_iter == 0) {
655 for (
int i = 0; i < n_krylov; i++) *Q[i] = *S[i];
656 for (
int i = 0; i < n_krylov; i++) *AQ[i] = *AS[i];
664 std::vector<ColorSpinorField*> R;
665 for (
int i = 0; i < n_krylov; i++) R.push_back(S[i]);
673 for (
int i = 0; i < n_krylov; i++)
std::swap(Q[i], Qtmp[i]);
676 for (
int i = 0; i < n_krylov; i++)
std::swap(AQ[i], Qtmp[i]);
683 std::vector<ColorSpinorField*> Q2;
684 for (
int i = 0; i < n_krylov; i++) Q2.push_back(AQ[i]);
694 std::vector<ColorSpinorField*> X;
701 if (!fixed_iteration) {
702 for (
int i = 0; i <
param.
Nkrylov; i++) { alpha[i] = -alpha[i]; }
703 std::vector<ColorSpinorField*> S0{S[0]};
715 if (total_iter > 0) {
716 PrintStats(
"CA-CG", total_iter, r2, b2, heavy_quark_res);
719 total_iter += n_krylov;
724 if (total_iter>=
param.
maxiter || (r2 <
stop && !l2_converge) || reliable(rNorm, maxrr, rUpdate, r2, delta)) {
737 maxr_deflate =
sqrt(r2);
748 warningQuda(
"CA-CG: new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)",
749 sqrt(r2),
sqrt(r2_old), resIncreaseTotal);
750 if (resIncrease > maxResIncrease or resIncreaseTotal > maxResIncreaseTotal) {
751 warningQuda(
"CA-CG: solver exiting due to too many true residual norm increases");
Communication-avoiding CG solver. This solver does un-preconditioned CG, running in steps of n_krylov...
CACG(const DiracMatrix &mat, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon, const DiracMatrix &matEig, SolverParam ¶m, TimeProfile &profile)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
CACGNE(const DiracMatrix &mat, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon, const DiracMatrix &matEig, SolverParam ¶m, TimeProfile &profile)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
CACGNR(const DiracMatrix &mat, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon, const DiracMatrix &matEig, SolverParam ¶m, TimeProfile &profile)
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const =0
Apply M for the dirac op. E.g. the Schur Complement operator.
void Mdag(ColorSpinorField &out, const ColorSpinorField &in) const
Apply Mdag (daggered operator of M.
const Dirac * Expose() const
unsigned long long flops() const
void deflate(std::vector< ColorSpinorField * > &sol, const std::vector< ColorSpinorField * > &src, const std::vector< ColorSpinorField * > &evecs, const std::vector< Complex > &evals, bool accumulate=false) const
Deflate a set of source vectors with a given eigenspace.
void computeEvals(const DiracMatrix &mat, std::vector< ColorSpinorField * > &evecs, std::vector< Complex > &evals, int size)
Compute eigenvalues and their residiua.
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
std::vector< ColorSpinorField * > evecs
void destroyDeflationSpace()
Destroy the allocated deflation space.
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE)....
const DiracMatrix & matEig
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
std::vector< Complex > evals
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE)
void constructDeflationSpace(const ColorSpinorField &meta, const DiracMatrix &mat)
Constructs the deflation space and eigensolver.
const DiracMatrix & matPrecon
const DiracMatrix & matSloppy
double Last(QudaProfileType idx)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
cudaColorSpinorField * tmp
@ QUDA_USE_INIT_GUESS_YES
@ QUDA_HEAVY_QUARK_RESIDUAL
@ QUDA_PRESERVE_SOURCE_NO
@ QUDA_PRESERVE_SOURCE_YES
@ QUDA_COMPUTE_NULL_VECTOR_NO
#define checkPrecision(...)
void init()
Create the BLAS context.
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
void caxpyz(const Complex *a, std::vector< ColorSpinorField * > &x, std::vector< ColorSpinorField * > &y, std::vector< ColorSpinorField * > &z)
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
void ax(double a, ColorSpinorField &x)
void zero(ColorSpinorField &a)
double norm2(const ColorSpinorField &a)
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void xpy(ColorSpinorField &x, ColorSpinorField &y)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
void axpby(double a, ColorSpinorField &x, double b, ColorSpinorField &y)
void stop()
Stop profiling.
std::complex< double > Complex
__host__ __device__ ValueType sqrt(ValueType x)
void compute_alpha_N(Complex *Q_AQandg, Complex *alpha)
void compute_beta_N(Complex *Q_AQandg, Complex *Q_AS, Complex *beta)
void updateR()
update the radius for halos.
#define qudaDeviceSynchronize()
QudaPreserveSource preserve_source
QudaComputeNullVector compute_null_vector
bool is_preconditioner
verbosity to use for preconditioner
int max_res_increase_total
QudaResidualType residual_type
QudaPrecision precision_sloppy
QudaUseInitGuess use_init_guess
DEVICEHOST void swap(Real &a, Real &b)
QudaVerbosity getVerbosity()