20 long dus =
end.tv_usec -
start.tv_usec;
21 return ds + 0.000001*dus;
65 std::vector<ColorSpinorField*> a(N), b(1);
66 for (
int j=0; j<N; j++) {
73 for (
int j=0; j<N; j++) {
74 printfQuda(
"%d/%d vectorized %e %e, regular %e %e\n", j+1, N, Beta[j].real(), Beta[j].imag(),
79 for (
int j=0; j<N; j++) beta[i+j][k] = Beta[j];
83 void updateAp(
Complex **beta, std::vector<ColorSpinorField*> Ap,
int begin,
int size,
int k) {
86 for (
int i=0; i<size; i++) beta_[i] = -beta[i+begin][k];
88 std::vector<ColorSpinorField*> Ap_(Ap.begin() + begin, Ap.begin() + begin + size);
89 std::vector<ColorSpinorField*> Apk(Ap.begin() + k, Ap.begin() + k + 1);
100 for (
int i=0; i<k; i++) {
108 for (
int i=0; i<k-1; i++) {
116 for (
int i=0; i<k-(N-1); i+=N) {
122 for (
int r = N-1; r>0; r--) {
123 if ((k%N) % r == 0) {
137 for (
int k=n-1; k>=0;k--) {
139 for (
int j=k+1;j<n; j++) {
140 delta[k] -= beta[k][j]*delta[j];
142 delta[k] /= gamma[k];
147 std::vector<ColorSpinorField *> p)
152 backSubs(alpha, beta, gamma, delta, k);
154 std::vector<ColorSpinorField*> X;
157 std::vector<ColorSpinorField*> P;
158 for (
int i=0; i<k; i++) P.push_back(p[i]);
170 n_krylov(
param.Nkrylov),
194 p.resize(n_krylov + 1);
198 beta =
new Complex *[n_krylov];
199 for (
int i = 0; i < n_krylov; i++) beta[i] =
new Complex[n_krylov];
200 gamma =
new double[n_krylov];
206 matMdagM(matEig.Expose()),
209 n_krylov(
param.Nkrylov),
216 p.resize(n_krylov + 1);
220 beta =
new Complex *[n_krylov];
221 for (
int i = 0; i < n_krylov; i++) beta[i] =
new Complex[n_krylov];
222 gamma =
new double[n_krylov];
229 for (
int i = 0; i < n_krylov; i++)
delete[] beta[i];
236 if (r_sloppy && r_sloppy != rp)
delete r_sloppy;
239 for (
int i = 0; i < n_krylov + 1; i++)
240 if (p[i])
delete p[i];
241 for (
int i = 0; i < n_krylov; i++)
242 if (Ap[i])
delete Ap[i];
244 if (tmp_sloppy != tmpp)
delete tmp_sloppy;
245 if (tmpp)
delete tmpp;
287 r_sloppy = K ? rp :
nullptr;
367 const bool use_heavy_quark_res =
376 double heavy_quark_res = 0.0;
380 int resIncreaseTotal = 0;
392 double maxr_deflate =
sqrt(r2);
393 bool l2_converge =
false;
406 PrintStats(
"GCR", total_iter+k, r2, b2, heavy_quark_res);
411 (*K)(*p[k], rSloppy);
420 printfQuda(
"GCR debug iter=%d: Ap2=%e, p2=%e, r2=%e\n",
428 printfQuda(
"GCR debug iter=%d: Apr=(%e,%e,%e)\n", total_iter, Apr.x, Apr.y, Apr.z);
429 for (
int i=0; i<k; i++)
430 for (
int j=0; j<=k; j++)
431 printfQuda(
"GCR debug iter=%d: beta[%d][%d] = (%e,%e)\n",
432 total_iter, i, j, real(beta[i][j]), imag(beta[i][j]));
435 gamma[k] =
sqrt(Apr.z);
436 if (gamma[k] == 0.0)
errorQuda(
"GCR breakdown\n");
437 alpha[k] =
Complex(Apr.x, Apr.y) / gamma[k];
440 r2 =
blas::cabxpyzAxNorm(1.0 / gamma[k], -alpha[k], *Ap[k], K ? rSloppy : *p[k], K ? rSloppy : *p[k + 1]);
445 PrintStats(
"GCR", total_iter, r2, b2, heavy_quark_res);
466 maxr_deflate =
sqrt(r2);
475 warningQuda(
"GCR: new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)",
476 sqrt(r2),
sqrt(r2_old), resIncreaseTotal);
477 if (resIncrease > maxResIncrease or resIncreaseTotal > maxResIncreaseTotal) {
478 warningQuda(
"GCR: solver exiting due to too many true residual norm increases");
491 PrintStats(
"GCR (restart)", restart, r2, b2, heavy_quark_res);
497 if (r2 <
stop) l2_converge =
true;
510 if (K) gflops += K->
flops()*1e-9;
Communication-avoiding GCR solver. This solver does un-preconditioned GCR, first building up a polyno...
Conjugate-Gradient Solver.
ColorSpinorField * CreateAlias(const ColorSpinorParam ¶m)
Create a field that aliases this field's storage. The alias field can use a different precision than ...
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
unsigned long long flops() const
void deflateSVD(std::vector< ColorSpinorField * > &sol, const std::vector< ColorSpinorField * > &vec, const std::vector< ColorSpinorField * > &evecs, const std::vector< Complex > &evals, bool accumulate=false) const
Deflate a set of source vectors with a set of left and right singular vectors.
void computeEvals(const DiracMatrix &mat, std::vector< ColorSpinorField * > &evecs, std::vector< Complex > &evals, int size)
Compute eigenvalues and their residiua.
void computeSVD(const DiracMatrix &mat, std::vector< ColorSpinorField * > &evecs, std::vector< Complex > &evals)
Computes Left/Right SVD from pre computed Right/Left.
GCR(const DiracMatrix &mat, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon, const DiracMatrix &matEig, SolverParam ¶m, TimeProfile &profile)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
QudaPrecision Precision() const
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
std::vector< ColorSpinorField * > evecs
virtual double flops() const
Return flops.
void destroyDeflationSpace()
Destroy the allocated deflation space.
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE)....
const DiracMatrix & matEig
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
void extendSVDDeflationSpace()
Extends the deflation space to twice its size for SVD deflation.
std::vector< Complex > evals
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE)
void constructDeflationSpace(const ColorSpinorField &meta, const DiracMatrix &mat)
Constructs the deflation space and eigensolver.
const DiracMatrix & matPrecon
const DiracMatrix & matSloppy
double Last(QudaProfileType idx)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
cudaColorSpinorField * tmp
@ QUDA_CPU_FIELD_LOCATION
@ QUDA_USE_INIT_GUESS_YES
@ QUDA_HEAVY_QUARK_RESIDUAL
@ QUDA_L2_RELATIVE_RESIDUAL
@ QUDA_EIG_BLK_TR_LANCZOS
@ QUDA_PRESERVE_SOURCE_NO
@ QUDA_PRESERVE_SOURCE_YES
@ QUDA_COMPUTE_NULL_VECTOR_NO
void init()
Create the BLAS context.
Complex caxpyDotzy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
double cabxpyzAxNorm(double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
void zero(ColorSpinorField &a)
double norm2(const ColorSpinorField &a)
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
void stop()
Stop profiling.
void start()
Start profiling.
void updateAp(Complex **beta, std::vector< ColorSpinorField * > Ap, int begin, int size, int k)
void updateSolution(ColorSpinorField &x, const Complex *alpha, Complex **const beta, double *gamma, int k, std::vector< ColorSpinorField * > p)
void orthoDir(Complex **beta, std::vector< ColorSpinorField * > Ap, int k, int pipeline)
std::complex< double > Complex
__host__ __device__ ValueType sqrt(ValueType x)
void computeBeta(Complex **beta, std::vector< ColorSpinorField * > Ap, int i, int N, int k)
double timeInterval(struct timeval start, struct timeval end)
void backSubs(const Complex *alpha, Complex **const beta, const double *gamma, Complex *delta, int n)
void fillInnerSolveParam(SolverParam &inner, const SolverParam &outer)
QudaPreserveSource preserve_source
QudaComputeNullVector compute_null_vector
bool is_preconditioner
verbosity to use for preconditioner
QudaSchwarzType schwarz_type
int max_res_increase_total
QudaResidualType residual_type
QudaPrecision precision_precondition
QudaPrecision precision_sloppy
QudaVerbosity verbosity_precondition
QudaUseInitGuess use_init_guess
QudaInverterType inv_type_precondition
bool global_reduction
whether the solver acting as a preconditioner for another solver
void pushVerbosity(QudaVerbosity verbosity)
Push a new verbosity onto the stack.
void popVerbosity()
Pop the verbosity restoring the prior one on the stack.
QudaVerbosity getVerbosity()