19 long ds = end.tv_sec - start.tv_sec;
20 long dus = end.tv_usec - start.tv_usec;
21 return ds + 0.000001*dus;
64 std::vector<ColorSpinorField*> a(N), b(1);
65 for (
int j=0; j<N; j++) {
72 for (
int j=0; j<N; j++) {
73 printfQuda(
"%d/%d vectorized %e %e, regular %e %e\n", j+1, N, Beta[j].real(), Beta[j].imag(),
78 for (
int j=0; j<N; j++) beta[i+j][k] = Beta[j];
85 for (
int i=0; i<
size; i++) beta_[i] = -beta[i+begin][k];
87 std::vector<ColorSpinorField*> Ap_(Ap.begin() + begin, Ap.begin() + begin +
size);
88 std::vector<ColorSpinorField*> Apk(Ap.begin() + k, Ap.begin() + k + 1);
99 for (
int i=0; i<k; i++) {
107 for (
int i=0; i<k-1; i++) {
115 for (
int i=0; i<k-(N-1); i+=N) {
121 for (
int r = N-1; r>0; r--) {
122 if ((k%N) % r == 0) {
136 for (
int k=n-1; k>=0;k--) {
138 for (
int j=k+1;j<n; j++) {
139 delta[k] -= beta[k][j]*delta[j];
141 delta[k] /= gamma[k];
146 double *gamma,
int k, std::vector<ColorSpinorField*> p) {
151 backSubs(alpha, beta, gamma, delta, k);
153 std::vector<ColorSpinorField*>
X;
156 std::vector<ColorSpinorField*> P;
157 for (
int i=0; i<k; i++) P.push_back(p[i]);
165 Solver(param, profile), mat(mat), matSloppy(matSloppy), matPrecon(matPrecon), K(0), Kparam(param),
166 nKrylov(param.Nkrylov),
init(false), rp(nullptr), yp(nullptr), tmpp(nullptr), y_sloppy(nullptr),
173 K =
new CG(matSloppy, matPrecon,
Kparam, profile);
177 K =
new MR(matSloppy, matPrecon,
Kparam, profile);
198 Solver(param, profile), mat(mat), matSloppy(matSloppy), matPrecon(matPrecon), K(&K),
Kparam(param),
225 for (
int i=0; i<nKrylov+1; i++)
if (
p[i])
delete p[i];
226 for (
int i=0; i<
nKrylov; i++)
if (
Ap[i])
delete Ap[i];
234 if (veci)
delete veci;
309 std::vector<ColorSpinorField *> rhs;
345 const bool use_heavy_quark_res =
354 double heavy_quark_res = 0.0;
358 int resIncreaseTotal = 0;
370 bool l2_converge =
false;
383 PrintStats(
"GCR", total_iter+k, r2, b2, heavy_quark_res);
388 (*K)(*
p[k], rSloppy);
400 printfQuda(
"GCR debug iter=%d: Ap2=%e, p2=%e, r2=%e\n",
408 printfQuda(
"GCR debug iter=%d: Apr=(%e,%e,%e)\n", total_iter, Apr.x, Apr.y, Apr.z);
409 for (
int i=0; i<k; i++)
410 for (
int j=0; j<=k; j++)
411 printfQuda(
"GCR debug iter=%d: beta[%d][%d] = (%e,%e)\n",
412 total_iter, i, j, real(
beta[i][j]), imag(
beta[i][j]));
425 PrintStats(
"GCR", total_iter, r2, b2, heavy_quark_res);
447 warningQuda(
"GCR: new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)",
448 sqrt(r2),
sqrt(r2_old), resIncreaseTotal);
449 if (resIncrease > maxResIncrease or resIncreaseTotal > maxResIncreaseTotal) {
450 warningQuda(
"GCR: solver exiting due to too many true residual norm increases");
463 PrintStats(
"GCR (restart)", restart, r2, b2, heavy_quark_res);
470 if (r2 < stop) l2_converge =
true;
485 if (
K) gflops +=
K->
flops()*1e-9;
bool global_reduction
whether the solver acting as a preconditioner for another solver
double timeInterval(struct timeval start, struct timeval end)
QudaSchwarzType schwarz_type
void computeBeta(Complex **beta, std::vector< ColorSpinorField *> Ap, int i, int N, int k)
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
QudaVerbosity verbosity_precondition
virtual double flops() const
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)
QudaVerbosity getVerbosity()
std::vector< ColorSpinorField * > p
sloppy residual vector
double norm2(const ColorSpinorField &a)
__host__ __device__ ValueType sqrt(ValueType x)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE) ...
cudaColorSpinorField * tmp
Communication-avoiding GCR solver. This solver does un-preconditioned GCR, first building up a polyno...
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
QudaInverterType inv_type_precondition
QudaPreserveSource preserve_source
int max_res_increase_total
std::vector< ColorSpinorField * > defl_tmp1
ColorSpinorField * r_sloppy
sloppy solution vector
void fillInnerSolveParam(SolverParam &inner, const SolverParam &outer)
std::vector< ColorSpinorField * > defl_tmp2
void backSubs(const Complex *alpha, Complex **const beta, const double *gamma, Complex *delta, int n)
GCR(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam ¶m, TimeProfile &profile)
QudaComputeNullVector compute_null_vector
void deflateSVD(std::vector< ColorSpinorField *> vec_defl, std::vector< ColorSpinorField *> vec, std::vector< ColorSpinorField *> evecs, std::vector< Complex > evals)
Deflate vector with both left and Right singular vectors.
double Last(QudaProfileType idx)
QudaResidualType residual_type
void updateSolution(ColorSpinorField &x, const Complex *alpha, Complex **const beta, double *gamma, int k, std::vector< ColorSpinorField *> p)
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
std::vector< ColorSpinorField * > Ap
bool is_preconditioner
verbosity to use for preconditioner
ColorSpinorField * yp
residual vector
void constructDeflationSpace(const ColorSpinorField &meta, const DiracMatrix &mat, bool svd)
Constructs the deflation space.
double cabxpyzAxNorm(double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
std::complex< double > Complex
std::vector< Complex > evals
void init()
Create the CUBLAS context.
Complex caxpyDotzy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void zero(ColorSpinorField &a)
void pushVerbosity(QudaVerbosity verbosity)
Push a new verbosity onto the stack.
QudaPrecision precision_precondition
std::vector< ColorSpinorField * > evecs
ColorSpinorField * y_sloppy
temporary for mat-vec
void orthoDir(Complex **beta, std::vector< ColorSpinorField *> Ap, int k, int pipeline)
Conjugate-Gradient Solver.
unsigned long long flops() const
ColorSpinorField * tmpp
high precision accumulator
void xpy(ColorSpinorField &x, ColorSpinorField &y)
const DiracMatrix & matPrecon
QudaUseInitGuess use_init_guess
void popVerbosity()
Pop the verbosity restoring the prior one on the stack.
const DiracMatrix & matSloppy
QudaPrecision precision_sloppy
bool use_sloppy_partial_accumulator
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE). Assumes SolverParam.true_res and SolverParam.true_res_hq has been set.
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
QudaPrecision Precision() const
void updateAp(Complex **beta, std::vector< ColorSpinorField *> Ap, int begin, int size, int k)
const Dirac * Expose() const