quda-ref/v1.1.0/inv__gcr__quda_8cpp_source.html

 #include <stdio.h>

 #include <stdlib.h>

 #include <math.h>


 #include <complex>


 #include <quda_internal.h>

 #include <blas_quda.h>

 #include <dslash_quda.h>

 #include <invert_quda.h>

 #include <util_quda.h>

 #include <color_spinor_field.h>


 #include <sys/time.h>


 namespace quda {


   double timeInterval(struct timeval start, struct timeval end) {

     long ds = end.tv_sec - start.tv_sec;

     long dus = end.tv_usec - start.tv_usec;

     return ds + 0.000001*dus;

   }


   // set the required parameters for the inner solver

   void fillInnerSolveParam(SolverParam &inner, const SolverParam &outer) {

     inner.tol = outer.tol_precondition;

     inner.delta = 1e-20; // no reliable updates within the inner solver


     inner.precision = outer.precision_sloppy;

     inner.precision_sloppy = outer.precision_precondition;


     // this sets a fixed iteration count if we're using the MR solver

     inner.residual_type = (outer.inv_type_precondition == QUDA_MR_INVERTER) ? QUDA_INVALID_RESIDUAL : QUDA_L2_RELATIVE_RESIDUAL;


     inner.iter = 0;

     inner.gflops = 0;

     inner.secs = 0;


     inner.inv_type_precondition = QUDA_INVALID_INVERTER;

     inner.is_preconditioner = true; // tell inner solver it is a preconditioner

     inner.pipeline = true;


     inner.schwarz_type = outer.schwarz_type;

     inner.global_reduction = inner.schwarz_type == QUDA_INVALID_SCHWARZ ? true : false;


     inner.use_init_guess = QUDA_USE_INIT_GUESS_NO;


     inner.maxiter = outer.maxiter_precondition;

     if (outer.inv_type_precondition == QUDA_CA_GCR_INVERTER) {

       inner.Nkrylov = inner.maxiter / outer.precondition_cycle;

     } else {

       inner.Nsteps = outer.precondition_cycle;

     }


     inner.preserve_source = QUDA_PRESERVE_SOURCE_YES;


     inner.verbosity_precondition = outer.verbosity_precondition;


     inner.compute_true_res = false;

     inner.sloppy_converge = true;

   }


   void computeBeta(Complex **beta, std::vector<ColorSpinorField*> Ap, int i, int N, int k) {

     Complex *Beta = new Complex[N];

     std::vector<ColorSpinorField*> a(N), b(1);

     for (int j=0; j<N; j++) {

       a[j] = Ap[i+j];

       Beta[j] = 0;

     }

     b[0] = Ap[k];

     blas::cDotProduct(Beta, a, b); // vectorized dot product

 #if 0

     for (int j=0; j<N; j++) {

       printfQuda("%d/%d vectorized %e %e, regular %e %e\n", j+1, N, Beta[j].real(), Beta[j].imag(),

                  blas::cDotProduct(*a[j], *b[j]).real(), blas::cDotProduct(*a[j], *b[j]).imag());

       }

 #endif


     for (int j=0; j<N; j++) beta[i+j][k] = Beta[j];

     delete [] Beta;

   }


   void updateAp(Complex **beta, std::vector<ColorSpinorField*> Ap, int begin, int size, int k) {


     Complex *beta_ = new Complex[size];

     for (int i=0; i<size; i++) beta_[i] = -beta[i+begin][k];


     std::vector<ColorSpinorField*> Ap_(Ap.begin() + begin, Ap.begin() + begin + size);

     std::vector<ColorSpinorField*> Apk(Ap.begin() + k, Ap.begin() + k + 1);


     blas::caxpy(beta_, Ap_, Apk);


     delete []beta_;

   }


   void orthoDir(Complex **beta, std::vector<ColorSpinorField*> Ap, int k, int pipeline) {


     switch (pipeline) {

     case 0: // no kernel fusion

       for (int i=0; i<k; i++) { // 5 (k-1) memory transactions here

         beta[i][k] = blas::cDotProduct(*(Ap[i]), *(Ap[k]));

         blas::caxpy(-beta[i][k], *Ap[i], *Ap[k]);

       }

       break;

     case 1: // basic kernel fusion

       if (k==0) break;

       beta[0][k] = blas::cDotProduct(*Ap[0], *Ap[k]);

       for (int i=0; i<k-1; i++) { // 4 (k-1) memory transactions here

         beta[i+1][k] = blas::caxpyDotzy(-beta[i][k], *Ap[i], *Ap[k], *Ap[i+1]);

       }

       blas::caxpy(-beta[k-1][k], *Ap[k-1], *Ap[k]);

       break;

     default:

       {

         const int N = pipeline;

         for (int i=0; i<k-(N-1); i+=N) {

           computeBeta(beta, Ap, i, N, k);

           updateAp(beta, Ap, i, N, k);

         }


         if (k%N != 0) { // need to update the remainder

           for (int r = N-1; r>0; r--) {

             if ((k%N) % r == 0) { // if true this is the remainder

               computeBeta(beta, Ap, k-r, r, k);

               updateAp(beta, Ap, k-r, r, k);

               break;

             }

           }

         }

       }

       break;

     }


   }


   void backSubs(const Complex *alpha, Complex** const beta, const double *gamma, Complex *delta, int n) {

     for (int k=n-1; k>=0;k--) {

       delta[k] = alpha[k];

       for (int j=k+1;j<n; j++) {

         delta[k] -= beta[k][j]*delta[j];

       }

       delta[k] /= gamma[k];

     }

   }


   void updateSolution(ColorSpinorField &x, const Complex *alpha, Complex **const beta, double *gamma, int k,

                       std::vector<ColorSpinorField *> p)

   {

     Complex *delta = new Complex[k];


     // Update the solution vector

     backSubs(alpha, beta, gamma, delta, k);


     std::vector<ColorSpinorField*> X;

     X.push_back(&x);


     std::vector<ColorSpinorField*> P;

     for (int i=0; i<k; i++) P.push_back(p[i]);

     blas::caxpy(delta, P, X);


     delete []delta;

   }


   GCR::GCR(const DiracMatrix &mat, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon,

            const DiracMatrix &matEig, SolverParam &param, TimeProfile &profile) :

     Solver(mat, matSloppy, matPrecon, matEig, param, profile),

     matMdagM(DiracMdagM(matEig.Expose())),

     K(0),

     Kparam(param),

     n_krylov(param.Nkrylov),

     init(false),

     rp(nullptr),

     tmpp(nullptr),

     tmp_sloppy(nullptr),

     r_sloppy(nullptr)

   {

     fillInnerSolveParam(Kparam, param);


     if (param.inv_type_precondition == QUDA_CG_INVERTER) // inner CG solver

       K = new CG(matSloppy, matPrecon, matPrecon, matEig, Kparam, profile);

     else if (param.inv_type_precondition == QUDA_BICGSTAB_INVERTER) // inner BiCGstab solver

       K = new BiCGstab(matSloppy, matPrecon, matPrecon, matEig, Kparam, profile);

     else if (param.inv_type_precondition == QUDA_MR_INVERTER) // inner MR solver

       K = new MR(matSloppy, matPrecon, Kparam, profile);

     else if (param.inv_type_precondition == QUDA_SD_INVERTER) // inner SD solver

       K = new SD(matSloppy, Kparam, profile);

     else if (param.inv_type_precondition == QUDA_CA_GCR_INVERTER) // inner CA-GCR solver

       K = new CAGCR(matSloppy, matPrecon, matPrecon, matEig, Kparam, profile);

     else if (param.inv_type_precondition == QUDA_INVALID_INVERTER) // unsupported

       K = NULL;

     else

       errorQuda("Unsupported preconditioner %d\n", param.inv_type_precondition);


     p.resize(n_krylov + 1);

     Ap.resize(n_krylov);


     alpha = new Complex[n_krylov];

     beta = new Complex *[n_krylov];

     for (int i = 0; i < n_krylov; i++) beta[i] = new Complex[n_krylov];

     gamma = new double[n_krylov];

   }


   GCR::GCR(const DiracMatrix &mat, Solver &K, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon,

            const DiracMatrix &matEig, SolverParam &param, TimeProfile &profile) :

     Solver(mat, matSloppy, matPrecon, matEig, param, profile),

     matMdagM(matEig.Expose()),

     K(&K),

     Kparam(param),

     n_krylov(param.Nkrylov),

     init(false),

     rp(nullptr),

     tmpp(nullptr),

     tmp_sloppy(nullptr),

     r_sloppy(nullptr)

   {

     p.resize(n_krylov + 1);

     Ap.resize(n_krylov);


     alpha = new Complex[n_krylov];

     beta = new Complex *[n_krylov];

     for (int i = 0; i < n_krylov; i++) beta[i] = new Complex[n_krylov];

     gamma = new double[n_krylov];

   }


   GCR::~GCR() {

     profile.TPSTART(QUDA_PROFILE_FREE);


     delete []alpha;

     for (int i = 0; i < n_krylov; i++) delete[] beta[i];

     delete []beta;

     delete []gamma;


     if (K && param.inv_type_precondition != QUDA_MG_INVERTER) delete K;


     if (init && param.precision_sloppy != tmpp->Precision()) {

       if (r_sloppy && r_sloppy != rp) delete r_sloppy;

     }


     for (int i = 0; i < n_krylov + 1; i++)

       if (p[i]) delete p[i];

     for (int i = 0; i < n_krylov; i++)

       if (Ap[i]) delete Ap[i];


     if (tmp_sloppy != tmpp) delete tmp_sloppy;

     if (tmpp) delete tmpp;

     if (rp) delete rp;


     destroyDeflationSpace();


     profile.TPSTOP(QUDA_PROFILE_FREE);

   }


   void GCR::operator()(ColorSpinorField &x, ColorSpinorField &b)

   {

     if (n_krylov == 0) {

       // Krylov space is zero-dimensional so return doing no work

       if (param.use_init_guess == QUDA_USE_INIT_GUESS_NO) blas::zero(x);

       return;

     }


     profile.TPSTART(QUDA_PROFILE_INIT);


     if (!init) {

       ColorSpinorParam csParam(x);

       csParam.create = QUDA_NULL_FIELD_CREATE;


       rp = (K || x.Precision() != param.precision_sloppy) ? ColorSpinorField::Create(csParam) : nullptr;


       // high precision temporary

       tmpp = ColorSpinorField::Create(csParam);


       // create sloppy fields used for orthogonalization

       csParam.setPrecision(param.precision_sloppy);

       for (int i = 0; i < n_krylov + 1; i++) p[i] = ColorSpinorField::Create(csParam);

       for (int i = 0; i < n_krylov; i++) Ap[i] = ColorSpinorField::Create(csParam);


       csParam.setPrecision(param.precision_sloppy);

       if (param.precision_sloppy != x.Precision()) {

         tmp_sloppy = tmpp->CreateAlias(csParam);

       } else {

         tmp_sloppy = tmpp;

       }


       if (param.precision_sloppy != x.Precision()) {

         r_sloppy = K ? ColorSpinorField::Create(csParam) : nullptr;

       } else {

         r_sloppy = K ? rp : nullptr;

       }


       init = true;

     }


     if (param.deflate) {

       // Construct the eigensolver and deflation space if requested.

       if (param.eig_param.eig_type == QUDA_EIG_TR_LANCZOS || param.eig_param.eig_type == QUDA_EIG_BLK_TR_LANCZOS) {

         constructDeflationSpace(b, matMdagM);

       } else {

         // Use Arnoldi to inspect the space only and turn off deflation

         constructDeflationSpace(b, mat);

         param.deflate = false;

       }

       if (deflate_compute) {

         // compute the deflation space.

         profile.TPSTOP(QUDA_PROFILE_INIT);

         (*eig_solve)(evecs, evals);

         if (param.deflate) {

           // double the size of the Krylov space

           extendSVDDeflationSpace();

           // populate extra memory with L/R singular vectors

           eig_solve->computeSVD(matMdagM, evecs, evals);

         }

         profile.TPSTART(QUDA_PROFILE_INIT);

         deflate_compute = false;

       }

       if (recompute_evals) {

         eig_solve->computeEvals(matMdagM, evecs, evals);

         eig_solve->computeSVD(matMdagM, evecs, evals);

         recompute_evals = false;

       }

     }


     ColorSpinorField &r = rp ? *rp : *p[0];

     ColorSpinorField &rSloppy = r_sloppy ? *r_sloppy : *p[0];

     ColorSpinorField &tmp = *tmpp;

     ColorSpinorField &tmpSloppy = *tmp_sloppy;


     double b2 = blas::norm2(b);  // norm sq of source

     double r2;                // norm sq of residual


     // compute initial residual depending on whether we have an initial guess or not

     if (param.use_init_guess == QUDA_USE_INIT_GUESS_YES) {

       // Compute r = b - A * x

       mat(r, x, tmp);

       r2 = blas::xmyNorm(b, r);

       // x contains the original guess.

     } else {

       blas::copy(r, b);

       r2 = b2;

       blas::zero(x);

     }


     if (param.deflate && param.maxiter > 1) {

       // Deflate: Hardcoded to SVD. If maxiter == 1, this is a dummy solve

       eig_solve->deflateSVD(x, r, evecs, evals, true);


       // Compute r_defl = RHS - A * LHS

       mat(r, x, tmp);

       r2 = blas::xmyNorm(b, r);

     }


     // Check to see that we're not trying to invert on a zero-field source

     if (b2 == 0) {

       if (param.compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_NO) {

         profile.TPSTOP(QUDA_PROFILE_INIT);

         warningQuda("inverting on zero-field source\n");

         x = b;

         param.true_res = 0.0;

         param.true_res_hq = 0.0;

         return;

       } else {

         b2 = r2;

       }

     }


     double stop = stopping(param.tol, b2, param.residual_type); // stopping condition of solver


     const bool use_heavy_quark_res =

       (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL) ? true : false;


     // this parameter determines how many consective reliable update

     // reisudal increases we tolerate before terminating the solver,

     // i.e., how long do we want to keep trying to converge

     const int maxResIncrease = param.max_res_increase; // check if we reached the limit of our tolerance

     const int maxResIncreaseTotal = param.max_res_increase_total;


     double heavy_quark_res = 0.0; // heavy quark residual

     if(use_heavy_quark_res) heavy_quark_res = sqrt(blas::HeavyQuarkResidualNorm(x,r).z);


     int resIncrease = 0;

     int resIncreaseTotal = 0;


     profile.TPSTOP(QUDA_PROFILE_INIT);

     profile.TPSTART(QUDA_PROFILE_PREAMBLE);


     blas::flops = 0;


     blas::copy(rSloppy, r);


     int total_iter = 0;

     int restart = 0;

     double r2_old = r2;

     double maxr_deflate = sqrt(r2);

     bool l2_converge = false;


     int pipeline = param.pipeline;

     // Vectorized dot product only has limited support so work around

     if (Ap[0]->Location() == QUDA_CPU_FIELD_LOCATION || pipeline == 0) pipeline = 1;

     if (pipeline > n_krylov) pipeline = n_krylov;


     profile.TPSTOP(QUDA_PROFILE_PREAMBLE);

     profile.TPSTART(QUDA_PROFILE_COMPUTE);


     int k = 0;

     int k_break = 0;


     PrintStats("GCR", total_iter+k, r2, b2, heavy_quark_res);

     while ( !convergence(r2, heavy_quark_res, stop, param.tol_hq) && total_iter < param.maxiter) {


       if (K) {

         pushVerbosity(param.verbosity_precondition);

         (*K)(*p[k], rSloppy);

         popVerbosity();

         // relaxation p = omega*p + (1-omega)*r

         //if (param.omega!=1.0) blas::axpby((1.0-param.omega), rPre, param.omega, pPre);

       }


       matSloppy(*Ap[k], *p[k], tmpSloppy);


       if (getVerbosity()>= QUDA_DEBUG_VERBOSE)

         printfQuda("GCR debug iter=%d: Ap2=%e, p2=%e, r2=%e\n",

                    total_iter, blas::norm2(*Ap[k]), blas::norm2(*p[k]), blas::norm2(rSloppy));


       orthoDir(beta, Ap, k, pipeline);


       double3 Apr = blas::cDotProductNormA(*Ap[k], K ? rSloppy : *p[k]);


       if (getVerbosity()>= QUDA_DEBUG_VERBOSE) {

         printfQuda("GCR debug iter=%d: Apr=(%e,%e,%e)\n", total_iter, Apr.x, Apr.y, Apr.z);

         for (int i=0; i<k; i++)

           for (int j=0; j<=k; j++)

             printfQuda("GCR debug iter=%d: beta[%d][%d] = (%e,%e)\n",

                        total_iter, i, j, real(beta[i][j]), imag(beta[i][j]));

       }


       gamma[k] = sqrt(Apr.z); // gamma[k] = Ap[k]

       if (gamma[k] == 0.0) errorQuda("GCR breakdown\n");

       alpha[k] = Complex(Apr.x, Apr.y) / gamma[k]; // alpha = (1/|Ap|) * (Ap, r)


       // r -= (1/|Ap|^2) * (Ap, r) r, Ap *= 1/|Ap|

       r2 = blas::cabxpyzAxNorm(1.0 / gamma[k], -alpha[k], *Ap[k], K ? rSloppy : *p[k], K ? rSloppy : *p[k + 1]);


       k++;

       total_iter++;


       PrintStats("GCR", total_iter, r2, b2, heavy_quark_res);


       // update since n_krylov or maxiter reached, converged or reliable update required

       // note that the heavy quark residual will by definition only be checked every n_krylov steps

       if (k == n_krylov || total_iter == param.maxiter || (r2 < stop && !l2_converge) || sqrt(r2 / r2_old) < param.delta) {


         // update the solution vector

         updateSolution(x, alpha, beta, gamma, k, p);


         if ( (r2 < stop || total_iter==param.maxiter) && param.sloppy_converge) break;

         mat(r, x, tmp);

         r2 = blas::xmyNorm(b, r);


         if (param.deflate && sqrt(r2) < maxr_deflate * param.tol_restart) {

           // Deflate: Hardcoded to SVD.

           eig_solve->deflateSVD(x, r, evecs, evals, true);


           // Compute r_defl = RHS - A * LHS

           mat(r, x, tmp);

           r2 = blas::xmyNorm(b, r);


           maxr_deflate = sqrt(r2);

         }


         if (use_heavy_quark_res) heavy_quark_res = sqrt(blas::HeavyQuarkResidualNorm(x, r).z);


         // break-out check if we have reached the limit of the precision

         if (r2 > r2_old) {

           resIncrease++;

           resIncreaseTotal++;

           warningQuda("GCR: new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)",

                       sqrt(r2), sqrt(r2_old), resIncreaseTotal);

           if (resIncrease > maxResIncrease or resIncreaseTotal > maxResIncreaseTotal) {

             warningQuda("GCR: solver exiting due to too many true residual norm increases");

             break;

           }

         } else {

           resIncrease = 0;

         }


         k_break = k;

         k = 0;


         if ( !convergence(r2, heavy_quark_res, stop, param.tol_hq) ) {

           restart++; // restarting if residual is still too great


           PrintStats("GCR (restart)", restart, r2, b2, heavy_quark_res);

           blas::copy(rSloppy, r);


           r2_old = r2;


           // prevent ending the Krylov space prematurely if other convergence criteria not met

           if (r2 < stop) l2_converge = true;

         }


         r2_old = r2;

       }

     }


     profile.TPSTOP(QUDA_PROFILE_COMPUTE);

     profile.TPSTART(QUDA_PROFILE_EPILOGUE);


     param.secs += profile.Last(QUDA_PROFILE_COMPUTE);


     double gflops = (blas::flops + mat.flops() + matSloppy.flops() + matPrecon.flops() + matMdagM.flops()) * 1e-9;

     if (K) gflops += K->flops()*1e-9;


     if (k>=param.maxiter && getVerbosity() >= QUDA_SUMMARIZE)

       warningQuda("Exceeded maximum iterations %d", param.maxiter);


     if (getVerbosity() >= QUDA_VERBOSE) printfQuda("GCR: number of restarts = %d\n", restart);


     if (param.compute_true_res) {

       // Calculate the true residual

       mat(r, x, tmp);

       double true_res = blas::xmyNorm(b, r);

       param.true_res = sqrt(true_res / b2);

       if (param.residual_type & QUDA_HEAVY_QUARK_RESIDUAL)

         param.true_res_hq = sqrt(blas::HeavyQuarkResidualNorm(x,r).z);

       else

         param.true_res_hq = 0.0;


       if (param.preserve_source == QUDA_PRESERVE_SOURCE_NO) blas::copy(b, r);

     } else {

       if (param.preserve_source == QUDA_PRESERVE_SOURCE_NO) blas::copy(b, K ? rSloppy : *p[k_break]);

     }


     param.gflops += gflops;

     param.iter += total_iter;


     // reset the flops counters

     blas::flops = 0;

     mat.flops();

     matSloppy.flops();

     matPrecon.flops();

     matMdagM.flops();


     profile.TPSTOP(QUDA_PROFILE_EPILOGUE);

     profile.TPSTART(QUDA_PROFILE_FREE);


     PrintSummary("GCR", total_iter, r2, b2, stop, param.tol_hq);


     profile.TPSTOP(QUDA_PROFILE_FREE);


     return;

   }


 } // namespace quda

blas_quda.h

quda::BiCGstab
Definition: invert_quda.h:807

quda::CAGCR
Communication-avoiding GCR solver. This solver does un-preconditioned GCR, first building up a polyno...
Definition: invert_quda.h:1099

quda::CG
Conjugate-Gradient Solver.
Definition: invert_quda.h:639

quda::ColorSpinorField
Definition: color_spinor_field.h:379

quda::ColorSpinorField::CreateAlias
ColorSpinorField * CreateAlias(const ColorSpinorParam &param)
Create a field that aliases this field's storage. The alias field can use a different precision than ...
Definition: color_spinor_field.cpp:742

quda::ColorSpinorField::Create
static ColorSpinorField * Create(const ColorSpinorParam &param)
Definition: color_spinor_field.cpp:714

quda::ColorSpinorParam
Definition: color_spinor_field.h:131

quda::DiracMatrix
Definition: dirac_quda.h:1892

quda::DiracMatrix::flops
unsigned long long flops() const
Definition: dirac_quda.h:1909

quda::DiracMdagM
Definition: dirac_quda.h:2025

quda::EigenSolver::deflateSVD
void deflateSVD(std::vector< ColorSpinorField * > &sol, const std::vector< ColorSpinorField * > &vec, const std::vector< ColorSpinorField * > &evecs, const std::vector< Complex > &evals, bool accumulate=false) const
Deflate a set of source vectors with a set of left and right singular vectors.
Definition: eigensolve_quda.cpp:672

quda::EigenSolver::computeEvals
void computeEvals(const DiracMatrix &mat, std::vector< ColorSpinorField * > &evecs, std::vector< Complex > &evals, int size)
Compute eigenvalues and their residiua.
Definition: eigensolve_quda.cpp:718

quda::EigenSolver::computeSVD
void computeSVD(const DiracMatrix &mat, std::vector< ColorSpinorField * > &evecs, std::vector< Complex > &evals)
Computes Left/Right SVD from pre computed Right/Left.
Definition: eigensolve_quda.cpp:625

quda::GCR::GCR
GCR(const DiracMatrix &mat, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon, const DiracMatrix &matEig, SolverParam &param, TimeProfile &profile)
Definition: inv_gcr_quda.cpp:164

quda::GCR::operator()
void operator()(ColorSpinorField &out, ColorSpinorField &in)
Definition: inv_gcr_quda.cpp:253

quda::GCR::~GCR
virtual ~GCR()
Definition: inv_gcr_quda.cpp:225

quda::LatticeField::Precision
QudaPrecision Precision() const
Definition: lattice_field.h:567

quda::MR
Definition: invert_quda.h:961

quda::SD
Definition: invert_quda.h:1147

quda::Solver
Definition: invert_quda.h:462

quda::Solver::deflate_compute
bool deflate_compute
Definition: invert_quda.h:475

quda::Solver::profile
TimeProfile & profile
Definition: invert_quda.h:471

quda::Solver::mat
const DiracMatrix & mat
Definition: invert_quda.h:465

quda::Solver::convergence
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
Definition: solver.cpp:328

quda::Solver::recompute_evals
bool recompute_evals
Definition: invert_quda.h:476

quda::Solver::evecs
std::vector< ColorSpinorField * > evecs
Definition: invert_quda.h:477

quda::Solver::flops
virtual double flops() const
Return flops.
Definition: invert_quda.h:633

quda::Solver::destroyDeflationSpace
void destroyDeflationSpace()
Destroy the allocated deflation space.
Definition: solver.cpp:229

quda::Solver::PrintSummary
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE)....
Definition: solver.cpp:386

quda::Solver::matEig
const DiracMatrix & matEig
Definition: invert_quda.h:468

quda::Solver::param
SolverParam & param
Definition: invert_quda.h:470

quda::Solver::stopping
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
Definition: solver.cpp:311

quda::Solver::extendSVDDeflationSpace
void extendSVDDeflationSpace()
Extends the deflation space to twice its size for SVD deflation.
Definition: solver.cpp:287

quda::Solver::evals
std::vector< Complex > evals
Definition: invert_quda.h:478

quda::Solver::eig_solve
EigenSolver * eig_solve
Definition: invert_quda.h:473

quda::Solver::PrintStats
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE)
Definition: solver.cpp:373

quda::Solver::constructDeflationSpace
void constructDeflationSpace(const ColorSpinorField &meta, const DiracMatrix &mat)
Constructs the deflation space and eigensolver.
Definition: solver.cpp:168

quda::Solver::matPrecon
const DiracMatrix & matPrecon
Definition: invert_quda.h:467

quda::Solver::matSloppy
const DiracMatrix & matSloppy
Definition: invert_quda.h:466

quda::TimeProfile
Definition: timer.h:174

quda::TimeProfile::Last
double Last(QudaProfileType idx)
Definition: timer.h:254

color_spinor_field.h

pipeline
int pipeline
Definition: command_line_params.cpp:55

mat
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
Definition: covdev_reference.cpp:109

end
void end(void)
Definition: covdev_test.cpp:141

tmp
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:34

dslash_quda.h

QUDA_CPU_FIELD_LOCATION
@ QUDA_CPU_FIELD_LOCATION
Definition: enum_quda.h:325

QUDA_USE_INIT_GUESS_NO
@ QUDA_USE_INIT_GUESS_NO
Definition: enum_quda.h:429

QUDA_USE_INIT_GUESS_YES
@ QUDA_USE_INIT_GUESS_YES
Definition: enum_quda.h:430

QUDA_DEBUG_VERBOSE
@ QUDA_DEBUG_VERBOSE
Definition: enum_quda.h:268

QUDA_SUMMARIZE
@ QUDA_SUMMARIZE
Definition: enum_quda.h:266

QUDA_VERBOSE
@ QUDA_VERBOSE
Definition: enum_quda.h:267

QUDA_HEAVY_QUARK_RESIDUAL
@ QUDA_HEAVY_QUARK_RESIDUAL
Definition: enum_quda.h:195

QUDA_INVALID_RESIDUAL
@ QUDA_INVALID_RESIDUAL
Definition: enum_quda.h:196

QUDA_L2_RELATIVE_RESIDUAL
@ QUDA_L2_RELATIVE_RESIDUAL
Definition: enum_quda.h:193

QUDA_EIG_BLK_TR_LANCZOS
@ QUDA_EIG_BLK_TR_LANCZOS
Definition: enum_quda.h:138

QUDA_EIG_TR_LANCZOS
@ QUDA_EIG_TR_LANCZOS
Definition: enum_quda.h:137

QUDA_MR_INVERTER
@ QUDA_MR_INVERTER
Definition: enum_quda.h:110

QUDA_CA_GCR_INVERTER
@ QUDA_CA_GCR_INVERTER
Definition: enum_quda.h:132

QUDA_SD_INVERTER
@ QUDA_SD_INVERTER
Definition: enum_quda.h:112

QUDA_CG_INVERTER
@ QUDA_CG_INVERTER
Definition: enum_quda.h:107

QUDA_INVALID_INVERTER
@ QUDA_INVALID_INVERTER
Definition: enum_quda.h:133

QUDA_MG_INVERTER
@ QUDA_MG_INVERTER
Definition: enum_quda.h:122

QUDA_BICGSTAB_INVERTER
@ QUDA_BICGSTAB_INVERTER
Definition: enum_quda.h:108

QUDA_PRESERVE_SOURCE_NO
@ QUDA_PRESERVE_SOURCE_NO
Definition: enum_quda.h:238

QUDA_PRESERVE_SOURCE_YES
@ QUDA_PRESERVE_SOURCE_YES
Definition: enum_quda.h:239

QUDA_INVALID_SCHWARZ
@ QUDA_INVALID_SCHWARZ
Definition: enum_quda.h:189

QUDA_NULL_FIELD_CREATE
@ QUDA_NULL_FIELD_CREATE
Definition: enum_quda.h:360

QUDA_COMPUTE_NULL_VECTOR_NO
@ QUDA_COMPUTE_NULL_VECTOR_NO
Definition: enum_quda.h:441

invert_quda.h

quda::blas_lapack::native::init
void init()
Create the BLAS context.
Definition: blas_lapack_cublas.cpp:28

quda::blas::caxpyDotzy
Complex caxpyDotzy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)

quda::blas::HeavyQuarkResidualNorm
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)

quda::blas::xmyNorm
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:79

quda::blas::flops
unsigned long long flops

quda::blas::cabxpyzAxNorm
double cabxpyzAxNorm(double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)

quda::blas::zero
void zero(ColorSpinorField &a)

quda::blas::norm2
double norm2(const ColorSpinorField &a)

quda::blas::cDotProductNormA
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)

quda::blas::caxpy
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)

quda::blas::copy
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: blas_quda.h:24

quda::blas::cDotProduct
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)

quda::device::profile::stop
void stop()
Stop profiling.
Definition: device.cpp:228

quda::device::profile::start
void start()
Start profiling.
Definition: device.cpp:226

quda
Definition: blas_lapack.h:24

quda::updateAp
void updateAp(Complex **beta, std::vector< ColorSpinorField * > Ap, int begin, int size, int k)
Definition: inv_gcr_quda.cpp:83

quda::updateSolution
void updateSolution(ColorSpinorField &x, const Complex *alpha, Complex **const beta, double *gamma, int k, std::vector< ColorSpinorField * > p)
Definition: inv_gcr_quda.cpp:146

quda::orthoDir
void orthoDir(Complex **beta, std::vector< ColorSpinorField * > Ap, int k, int pipeline)
Definition: inv_gcr_quda.cpp:96

quda::Complex
std::complex< double > Complex
Definition: quda_internal.h:86

quda::sqrt
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120

quda::computeBeta
void computeBeta(Complex **beta, std::vector< ColorSpinorField * > Ap, int i, int N, int k)
Definition: inv_gcr_quda.cpp:63

quda::QUDA_PROFILE_INIT
@ QUDA_PROFILE_INIT
Definition: timer.h:106

quda::QUDA_PROFILE_EPILOGUE
@ QUDA_PROFILE_EPILOGUE
Definition: timer.h:110

quda::QUDA_PROFILE_COMPUTE
@ QUDA_PROFILE_COMPUTE
Definition: timer.h:108

quda::QUDA_PROFILE_FREE
@ QUDA_PROFILE_FREE
Definition: timer.h:111

quda::QUDA_PROFILE_PREAMBLE
@ QUDA_PROFILE_PREAMBLE
Definition: timer.h:107

quda::timeInterval
double timeInterval(struct timeval start, struct timeval end)
Definition: inv_gcr_quda.cpp:18

quda::backSubs
void backSubs(const Complex *alpha, Complex **const beta, const double *gamma, Complex *delta, int n)
Definition: inv_gcr_quda.cpp:136

quda::fillInnerSolveParam
void fillInnerSolveParam(SolverParam &inner, const SolverParam &outer)
Definition: inv_gcr_quda.cpp:25

csParam
ColorSpinorParam csParam
Definition: pack_test.cpp:25

param
QudaGaugeParam param
Definition: pack_test.cpp:18

quda_internal.h

QudaEigParam_s::eig_type
QudaEigType eig_type
Definition: quda.h:416

quda::SolverParam
Definition: invert_quda.h:17

quda::SolverParam::preserve_source
QudaPreserveSource preserve_source
Definition: invert_quda.h:151

quda::SolverParam::iter
int iter
Definition: invert_quda.h:133

quda::SolverParam::Nsteps
int Nsteps
Definition: invert_quda.h:187

quda::SolverParam::precision
QudaPrecision precision
Definition: invert_quda.h:136

quda::SolverParam::compute_null_vector
QudaComputeNullVector compute_null_vector
Definition: invert_quda.h:61

quda::SolverParam::pipeline
int pipeline
Definition: invert_quda.h:106

quda::SolverParam::true_res
double true_res
Definition: invert_quda.h:124

quda::SolverParam::is_preconditioner
bool is_preconditioner
verbosity to use for preconditioner
Definition: invert_quda.h:238

quda::SolverParam::schwarz_type
QudaSchwarzType schwarz_type
Definition: invert_quda.h:214

quda::SolverParam::tol_precondition
double tol_precondition
Definition: invert_quda.h:196

quda::SolverParam::max_res_increase_total
int max_res_increase_total
Definition: invert_quda.h:90

quda::SolverParam::residual_type
QudaResidualType residual_type
Definition: invert_quda.h:49

quda::SolverParam::precondition_cycle
int precondition_cycle
Definition: invert_quda.h:193

quda::SolverParam::maxiter_precondition
int maxiter_precondition
Definition: invert_quda.h:199

quda::SolverParam::precision_precondition
QudaPrecision precision_precondition
Definition: invert_quda.h:145

quda::SolverParam::precision_sloppy
QudaPrecision precision_sloppy
Definition: invert_quda.h:139

quda::SolverParam::deflate
bool deflate
Definition: invert_quda.h:52

quda::SolverParam::true_res_hq
double true_res_hq
Definition: invert_quda.h:127

quda::SolverParam::sloppy_converge
bool sloppy_converge
Definition: invert_quda.h:121

quda::SolverParam::max_res_increase
int max_res_increase
Definition: invert_quda.h:85

quda::SolverParam::verbosity_precondition
QudaVerbosity verbosity_precondition
Definition: invert_quda.h:236

quda::SolverParam::eig_param
QudaEigParam eig_param
Definition: invert_quda.h:55

quda::SolverParam::use_init_guess
QudaUseInitGuess use_init_guess
Definition: invert_quda.h:58

quda::SolverParam::secs
double secs
Definition: invert_quda.h:217

quda::SolverParam::maxiter
int maxiter
Definition: invert_quda.h:130

quda::SolverParam::tol_hq
double tol_hq
Definition: invert_quda.h:115

quda::SolverParam::gflops
double gflops
Definition: invert_quda.h:220

quda::SolverParam::tol_restart
double tol_restart
Definition: invert_quda.h:112

quda::SolverParam::compute_true_res
bool compute_true_res
Definition: invert_quda.h:118

quda::SolverParam::tol
double tol
Definition: invert_quda.h:109

quda::SolverParam::inv_type_precondition
QudaInverterType inv_type_precondition
Definition: invert_quda.h:28

quda::SolverParam::delta
double delta
Definition: invert_quda.h:64

quda::SolverParam::Nkrylov
int Nkrylov
Definition: invert_quda.h:190

quda::SolverParam::global_reduction
bool global_reduction
whether the solver acting as a preconditioner for another solver
Definition: invert_quda.h:240

util_quda.h

pushVerbosity
void pushVerbosity(QudaVerbosity verbosity)
Push a new verbosity onto the stack.
Definition: util_quda.cpp:83

printfQuda
#define printfQuda(...)
Definition: util_quda.h:114

popVerbosity
void popVerbosity()
Pop the verbosity restoring the prior one on the stack.
Definition: util_quda.cpp:94

getVerbosity
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21

warningQuda
#define warningQuda(...)
Definition: util_quda.h:132

errorQuda
#define errorQuda(...)
Definition: util_quda.h:120