16 :
Solver(param, profile), mat(mat), matSloppy(matSloppy),
init(false),
17 lambda_init(false), basis(param.
ca_basis),
18 Q_AQandg(nullptr), Q_AS(nullptr), alpha(nullptr), beta(nullptr), rp(nullptr),
19 tmpp(nullptr), tmpp2(nullptr), tmp_sloppy(nullptr), tmp_sloppy2(nullptr) { }
30 bool use_source =
false;
35 for (
int i=0; i<
param.
Nkrylov+1; i++)
if (i>0 || !use_source)
delete S[i];
37 for (
int i=0; i<
param.
Nkrylov; i++)
if (i>0 || !use_source)
delete S[i];
53 if (veci)
delete veci;
62 CACG(mmdag, mmdagSloppy, param, profile), mmdag(mat.Expose()), mmdagSloppy(matSloppy.Expose()),
63 xp(nullptr), yp(nullptr),
init(false) {
99 if (b2 == 0.0) b2 = r2;
144 CACG(mdagm, mdagmSloppy, param, profile), mdagm(mat.Expose()), mdagmSloppy(matSloppy.Expose()),
145 bp(nullptr),
init(false) {
222 bool use_source =
false;
246 if (i>0)
AS[i-1] =
S[i];
282 using namespace Eigen;
288 for (
int i=0; i<N; i++) {
289 vecg(i) = Q_AQandg[i*(N+1)+N];
290 for (
int j=0; j<N; j++) {
291 matQ_AQ(i,j) = Q_AQandg[i*(N+1)+j];
294 Map<vector> vecalpha(alpha,N);
296 vecalpha = matQ_AQ.fullPivLu().solve(vecg);
310 const int N =
Q.size();
325 using namespace Eigen;
329 const int N =
Q.size();
332 for (
int i=0; i<N; i++) {
333 vecg(i) = Q_AQandg[i*(N+1)+N];
334 for (
int j=0; j<N; j++) {
335 matQ_AQ(i,j) = Q_AQandg[i*(N+1)+j];
338 Map<vector> vecalpha(
alpha,N);
340 vecalpha = matQ_AQ.fullPivLu().solve(vecg);
358 using namespace Eigen;
362 for (
int i=0; i<N; i++) {
363 for (
int j=0; j<N; j++) {
364 matQ_AQ(i,j) = Q_AQandg[i*(N+1)+j];
367 Map<matrix> matQ_AS(Q_AS,N,N), matbeta(beta,N,N);
370 matbeta = matQ_AQ.fullPivLu().solve(matQ_AS);
384 const int N =
Q.size();
399 using namespace Eigen;
402 const int N =
Q.size();
404 for (
int i=0; i<N; i++) {
405 for (
int j=0; j<N; j++) {
406 matQ_AQ(i,j) = Q_AQandg[i*(N+1)+j];
409 Map<matrix> matQ_AS(Q_AS,N,N), matbeta(
beta,N,N);
412 matbeta = matQ_AQ.fullPivLu().solve(matQ_AS);
426 int CACG::reliable(
double &rNorm,
double &maxrr,
int &rUpdate,
const double &r2,
const double &delta) {
429 if (rNorm > maxrr) maxrr = rNorm;
431 int updateR = (rNorm < delta*maxrr) ? 1 : 0;
474 double b2 = !fixed_iteration ?
blas::norm2(b) : 1.0;
479 mat(r_, x, tmp, tmp2);
481 if (!fixed_iteration) {
494 std::vector<ColorSpinorField *> rhs;
520 for (
int i = 0; i < 10; i++) {
523 for (
int j = 0; j < 10; j++) {
526 printfQuda(
"Current Rayleigh Quotient step %d is %e\n", i*10+1,
sqrt(blas::norm2(*
AQ[0])));
535 lambda_max = 1.1*(
sqrt(blas::norm2(*
AQ[0])));
565 double heavy_quark_res = 0.0;
569 int resIncreaseTotal = 0;
578 bool l2_converge =
false;
583 double rNorm =
sqrt(r2);
584 double maxrr = rNorm;
587 double m_map = 2./(lambda_max-lambda_min);
588 double b_map = -(lambda_max+lambda_min)/(lambda_max-lambda_min);
592 PrintStats(
"CA-CG", total_iter, r2, b2, heavy_quark_res);
597 for (
int k=0; k<nKrylov; k++) {
606 Complex facs1[] = { m_map, b_map };
607 std::vector<ColorSpinorField*> recur1{
AS[0],
S[0]};
608 std::vector<ColorSpinorField*> S1{S[1]};
616 Complex factors[] = { 2.*m_map, 2.*b_map, -1 };
617 for (
int k = 2; k < nKrylov; k++) {
618 std::vector<ColorSpinorField*> recur2{
AS[k-1],S[k-1],S[k-2]};
619 std::vector<ColorSpinorField*> Sk{S[k]};
630 if (total_iter == 0) {
632 for (
int i=0; i<nKrylov; i++) *
Q[i] = *
S[i];
633 for (
int i=0; i<nKrylov; i++) *
AQ[i] = *
AS[i];
641 std::vector<ColorSpinorField*>
R;
642 for (
int i=0; i < nKrylov; i++) R.push_back(
S[i]);
660 std::vector<ColorSpinorField*> Q2;
661 for (
int i=0; i<nKrylov; i++) Q2.push_back(
AQ[i]);
671 std::vector<ColorSpinorField*>
X;
678 if (!fixed_iteration) {
680 std::vector<ColorSpinorField*> S0{
S[0]};
692 if (total_iter > 0) {
693 PrintStats(
"CA-CG", total_iter, r2, b2, heavy_quark_res);
702 if (total_iter>=
param.
maxiter || (r2 < stop && !l2_converge) ||
reliable(rNorm, maxrr, rUpdate, r2, delta)) {
704 mat(r_, x, tmp, tmp2);
714 warningQuda(
"CA-CG: new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)",
715 sqrt(r2),
sqrt(r2_old), resIncreaseTotal);
716 if (resIncrease > maxResIncrease or resIncreaseTotal > maxResIncreaseTotal) {
717 warningQuda(
"CA-CG: solver exiting due to too many true residual norm increases");
737 mat(r_, x, tmp, tmp2);
cudaColorSpinorField * tmp2
void ax(double a, ColorSpinorField &x)
void caxpyz(const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z)
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.
Communication-avoiding CG solver. This solver does un-preconditioned CG, running in steps of nKrylov...
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
QudaVerbosity getVerbosity()
#define checkPrecision(...)
double norm2(const ColorSpinorField &a)
__host__ __device__ ValueType sqrt(ValueType x)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
void deflate(std::vector< ColorSpinorField *> vec_defl, std::vector< ColorSpinorField *> vec, std::vector< ColorSpinorField *> evecs, std::vector< Complex > evals)
Deflate vector with Eigenvectors.
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE) ...
std::vector< ColorSpinorField * > AQ
const DiracMatrix & matSloppy
cudaColorSpinorField * tmp
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
QudaPreserveSource preserve_source
int max_res_increase_total
std::vector< ColorSpinorField * > defl_tmp1
This is just a dummy structure we use for trove to define the required structure size.
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
int reliable(double &rNorm, double &maxrr, int &rUpdate, const double &r2, const double &delta)
std::vector< ColorSpinorField * > Qtmp
ColorSpinorField * tmp_sloppy
std::vector< ColorSpinorField * > defl_tmp2
void compute_beta_N(Complex *Q_AQandg, Complex *Q_AS, Complex *beta)
void compute_beta()
Compute the beta coefficients.
QudaComputeNullVector compute_null_vector
double Last(QudaProfileType idx)
#define qudaDeviceSynchronize()
void compute_alpha_N(Complex *Q_AQandg, Complex *alpha)
CACGNR(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam ¶m, TimeProfile &profile)
ColorSpinorField * tmp_sloppy2
QudaResidualType residual_type
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
bool is_preconditioner
verbosity to use for preconditioner
void compute_alpha()
Compute the alpha coefficients.
void constructDeflationSpace(const ColorSpinorField &meta, const DiracMatrix &mat, bool svd)
Constructs the deflation space.
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
std::vector< ColorSpinorField * > AS
std::complex< double > Complex
std::vector< ColorSpinorField * > Q
std::vector< Complex > evals
void init()
Create the CUBLAS context.
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void zero(ColorSpinorField &a)
void Mdag(ColorSpinorField &out, const ColorSpinorField &in) const
std::vector< ColorSpinorField * > evecs
CACG(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam ¶m, TimeProfile &profile)
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const =0
DEVICEHOST void swap(Real &a, Real &b)
unsigned long long flops() const
void xpy(ColorSpinorField &x, ColorSpinorField &y)
void axpby(double a, ColorSpinorField &x, double b, ColorSpinorField &y)
QudaUseInitGuess use_init_guess
CACGNE(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam ¶m, TimeProfile &profile)
QudaPrecision precision_sloppy
void create(ColorSpinorField &b)
Initiate the fields needed by the solver.
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE). Assumes SolverParam.true_res and SolverParam.true_res_hq has been set.
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
const Dirac * Expose() const
void updateR()
update the radius for halos.