20 long dus =
end.tv_usec -
start.tv_usec;
21 return ds + 0.000001*dus;
52 std::vector<ColorSpinorField*>
a(N),
b(1);
53 for (
int j=0; j<N; j++) {
60 for (
int j=0; j<N; j++) {
61 printfQuda(
"%d/%d vectorized %e %e, regular %e %e\n", j+1, N, Beta[j].real(), Beta[j].imag(),
66 for (
int j=0; j<N; j++) beta[
i+j][k] = Beta[j];
73 for (
int i=0;
i<
size;
i++) beta_[
i] = -beta[
i+begin][k];
75 std::vector<ColorSpinorField*> Ap_(Ap.begin() + begin, Ap.begin() + begin +
size);
76 std::vector<ColorSpinorField*> Apk(Ap.begin() + k, Ap.begin() + k + 1);
87 for (
int i=0;
i<k;
i++) {
95 for (
int i=0;
i<k-1;
i++) {
109 for (
int i=0;
i<k-(N-1);
i+=N) {
115 for (
int r = N-1; r>0; r--) {
116 if ((k%N) % r == 0) {
132 for (
int k=
n-1; k>=0;k--) {
134 for (
int j=k+1;j<
n; j++) {
142 double *
gamma,
int k, std::vector<ColorSpinorField*>
p) {
149 std::vector<ColorSpinorField*>
X;
152 std::vector<ColorSpinorField*> P;
153 for (
int i=0;
i<k;
i++) P.push_back(
p[
i]);
162 nKrylov(
param.Nkrylov),
init(false), rp(nullptr), yp(nullptr), tmpp(nullptr), x_sloppy(nullptr),
163 r_sloppy(nullptr), r_pre(nullptr), p_pre(nullptr), rM(nullptr)
193 nKrylov(
param.Nkrylov),
init(false), rp(nullptr), yp(nullptr), tmpp(nullptr), x_sloppy(nullptr),
194 r_sloppy(nullptr), r_pre(nullptr), p_pre(nullptr), rM(nullptr)
227 if (
p[
i])
delete p[
i];
331 const bool use_heavy_quark_res =
340 double heavy_quark_res = 0.0;
344 int resIncreaseTotal = 0;
356 bool l2_converge =
false;
369 PrintStats(
"GCR", total_iter+k, r2, b2, heavy_quark_res);
401 printfQuda(
"GCR debug iter=%d: Ap2=%e, p2=%e, rPre2=%e\n",
410 printfQuda(
"GCR debug iter=%d: Apr=(%e,%e,%e)\n", total_iter, Apr.x, Apr.y, Apr.z);
411 for (
int i=0;
i<k;
i++)
412 for (
int j=0; j<=k; j++)
413 printfQuda(
"GCR debug iter=%d: beta[%d][%d] = (%e,%e)\n",
414 total_iter,
i, j, real(
beta[
i][j]), imag(
beta[
i][j]));
427 PrintStats(
"GCR", total_iter, r2, b2, heavy_quark_res);
448 warningQuda(
"GCR: new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)",
449 sqrt(r2),
sqrt(r2_old), resIncreaseTotal);
450 if (resIncrease > maxResIncrease or resIncreaseTotal > maxResIncreaseTotal) {
451 warningQuda(
"GCR: solver exiting due to too many true residual norm increases");
463 PrintStats(
"GCR (restart)", restart, r2, b2, heavy_quark_res);
470 if (r2 < stop) l2_converge =
true;
bool convergence(const double &r2, const double &hq2, const double &r2_tol, const double &hq_tol)
bool global_reduction
whether the solver acting as a preconditioner for another solver
double timeInterval(struct timeval start, struct timeval end)
QudaSchwarzType schwarz_type
void computeBeta(Complex **beta, std::vector< ColorSpinorField *> Ap, int i, int N, int k)
QudaVerbosity verbosity_precondition
virtual double flops() const
static double stopping(const double &tol, const double &b2, QudaResidualType residual_type)
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)
ColorSpinorField * r_pre
sloppy residual vector
QudaInverterType inv_type
QudaVerbosity getVerbosity()
std::vector< ColorSpinorField * > p
residual vector for doing multi-cycle preconditioning
double norm2(const ColorSpinorField &a)
__host__ __device__ ValueType sqrt(ValueType x)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
std::complex< double > Complex
cudaColorSpinorField * tmp
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
QudaInverterType inv_type_precondition
QudaPreserveSource preserve_source
int max_res_increase_total
ColorSpinorField * rM
preconditioner result
ColorSpinorField * r_sloppy
sloppy solution vector
void fillInnerSolveParam(SolverParam &inner, const SolverParam &outer)
void backSubs(const Complex *alpha, Complex **const beta, const double *gamma, Complex *delta, int n)
GCR(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam ¶m, TimeProfile &profile)
QudaComputeNullVector compute_null_vector
double Last(QudaProfileType idx)
void PrintSummary(const char *name, int k, const double &r2, const double &b2)
double cabxpyAxNorm(const double &a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y)
static unsigned int delta
QudaResidualType residual_type
void updateSolution(ColorSpinorField &x, const Complex *alpha, Complex **const beta, double *gamma, int k, std::vector< ColorSpinorField *> p)
static __inline__ size_t p
std::vector< ColorSpinorField * > Ap
bool is_preconditioner
verbosity to use for preconditioner
ColorSpinorField * yp
residual vector
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
Complex caxpyDotzy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
double gamma(double) __attribute__((availability(macosx
ColorSpinorField * p_pre
residual passed to preconditioner
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void zero(ColorSpinorField &a)
void pushVerbosity(QudaVerbosity verbosity)
QudaPrecision precision_precondition
void axpy(const double &a, ColorSpinorField &x, ColorSpinorField &y)
void orthoDir(Complex **beta, std::vector< ColorSpinorField *> Ap, int k, int pipeline)
unsigned long long flops() const
void PrintStats(const char *, int k, const double &r2, const double &b2, const double &hq2)
ColorSpinorField * tmpp
high precision accumulator
void xpy(ColorSpinorField &x, ColorSpinorField &y)
const DiracMatrix & matPrecon
QudaUseInitGuess use_init_guess
const DiracMatrix & matSloppy
QudaPrecision precision_sloppy
ColorSpinorField * x_sloppy
temporary for mat-vec
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
void updateAp(Complex **beta, std::vector< ColorSpinorField *> Ap, int begin, int size, int k)
cudaEvent_t cudaEvent_t end