20 void BiCGstabL::computeTau(
Complex **tau,
double* sigma, std::vector<ColorSpinorField*> r,
int begin,
int size,
int j)
23 std::vector<ColorSpinorField*> a(size), b(1);
24 for (
int k=0; k<size; k++)
32 for (
int k=0; k<size; k++)
34 tau[begin+k][j] = Tau[k]/sigma[begin+k];
39 void BiCGstabL::updateR(
Complex **tau, std::vector<ColorSpinorField*> r,
int begin,
int size,
int j)
43 for (
int i=0; i<size; i++)
45 tau_[i] = -tau[i+begin][j];
48 std::vector<ColorSpinorField*> r_(r.begin() + begin, r.begin() + begin + size);
49 std::vector<ColorSpinorField*> rj(r.begin() + j, r.begin() + j + 1);
56 void BiCGstabL::orthoDir(
Complex **tau,
double* sigma, std::vector<ColorSpinorField*> r,
int j,
int pipeline)
62 for (
int i=1; i<j; i++)
74 for (
int i=1; i<j-1; i++)
76 tau[i+1][j] =
blas::caxpyDotzy(-tau[i][j], *r[i], *r[j], *r[i+1])/sigma[i+1];
89 for (step = 0; step < (j-1)/N; step++)
91 computeTau(tau, sigma, r, 1+step*N, N, j);
92 updateR(tau, r, 1+step*N, N, j);
98 computeTau(tau, sigma, r, 1+step*N, (j-1)%N, j);
99 updateR(tau, r, 1+step*N, (j-1)%N, j);
107 void BiCGstabL::updateUend(
Complex *gamma, std::vector<ColorSpinorField *> u,
int n_krylov)
111 for (
int i = 0; i < n_krylov; i++) { gamma_[i] = -gamma[i + 1]; }
113 std::vector<ColorSpinorField*> u_(u.begin() + 1, u.end());
114 std::vector<ColorSpinorField*> u0(u.begin(), u.begin() + 1);
122 std::vector<ColorSpinorField *> r, ColorSpinorField &x,
int n_krylov)
126 blas::caxpy(-gamma_prime[n_krylov], *r[n_krylov], *r[0]);
127 for (
int j = 1; j < n_krylov; j++)
139 gamma_prime_prime_[0] = gamma[1];
140 gamma_prime_prime_[n_krylov] = 0.0;
141 gamma_prime_[0] = 0.0;
142 gamma_prime_[n_krylov] = -gamma_prime[n_krylov];
143 for (
int i = 1; i < n_krylov; i++) {
144 gamma_prime_prime_[i] = gamma_prime_prime[i];
145 gamma_prime_[i] = -gamma_prime[i];
149 delete[] gamma_prime_prime_;
150 delete[] gamma_prime_;
177 std::vector<ColorSpinorField*> &r;
178 std::vector<ColorSpinorField*> &u;
201 x(x), r(r), u(u), alpha(alpha), beta(beta), j_max(j_max),
215 static int count = 0;
220 for (
int i= (count*j_max)/n_update; i<((count+1)*j_max)/n_update && i<j_max; i++)
233 for (
int i= (count*j_max)/n_update; i<((count+1)*j_max)/n_update && i<j_max; i++)
240 if (++count == n_update) count = 0;
251 n_krylov(
param.Nkrylov),
254 r.resize(n_krylov + 1);
255 u.resize(n_krylov + 1);
257 gamma =
new Complex[n_krylov + 1];
258 gamma_prime =
new Complex[n_krylov + 1];
259 gamma_prime_prime =
new Complex[n_krylov + 1];
260 sigma =
new double[n_krylov + 1];
262 tau =
new Complex *[n_krylov + 1];
263 for (
int i = 0; i < n_krylov + 1; i++) { tau[i] =
new Complex[n_krylov + 1]; }
265 std::stringstream ss;
266 ss <<
"BiCGstab-" << n_krylov;
267 solver_name = ss.str();
273 delete[] gamma_prime;
274 delete[] gamma_prime_prime;
277 for (
int i = 0; i < n_krylov + 1; i++) {
delete[] tau[i]; }
281 delete r_sloppy_saved_p;
283 for (
int i = 1; i < n_krylov + 1; i++) {
288 delete x_sloppy_saved_p;
304 int BiCGstabL::reliable(
double &rNorm,
double &maxrx,
double &maxrr,
const double &r2,
const double &delta) {
307 if (rNorm > maxrx) maxrx = rNorm;
308 if (rNorm > maxrr) maxrr = rNorm;
311 int updateR = (rNorm < delta*maxrr) ? 1 : 0;
355 for (
int i = 0; i <= n_krylov; i++) {
359 r_sloppy_saved_p = r[0];
397 errorQuda(
"Null vector computing requires non-zero guess!");
420 r[0] = r_sloppy_saved_p;
432 x_sloppyp = x_sloppy_saved_p;
449 for (
int i = 1; i <= n_krylov; i++) {
blas::zero(*r[i]); }
457 const bool use_heavy_quark_res =
482 double rNorm =
sqrt(r2);
483 double maxrr = rNorm;
484 double maxrx = rNorm;
486 PrintStats(solver_name.c_str(), k, r2, b2, heavy_quark_res);
493 for (
int j = 0; j < n_krylov; j++) {
497 beta = alpha*rho1/rho0;
547 for (
int j = 1; j <= n_krylov; j++) {
558 orthoDir(tau, sigma, r, j,
pipeline);
569 gamma_prime[j] =
Complex(rjr.x, rjr.y)/sigma[j];
573 gamma[n_krylov] = gamma_prime[n_krylov];
574 omega = gamma[n_krylov];
577 for (
int j = n_krylov - 1; j > 0; j--) {
579 gamma[j] = gamma_prime[j];
580 for (
int i = j + 1; i <= n_krylov; i++) { gamma[j] = gamma[j] - tau[j][i] * gamma[i]; }
584 for (
int j = 1; j < n_krylov; j++) {
585 gamma_prime_prime[j] = gamma[j+1];
586 for (
int i = j + 1; i < n_krylov; i++) {
587 gamma_prime_prime[j] = gamma_prime_prime[j] + tau[j][i]*gamma[i+1];
594 updateUend(gamma, u, n_krylov);
602 updateXRend(gamma, gamma_prime, gamma_prime_prime, r, x_sloppy, n_krylov);
609 if (use_heavy_quark_res && k%heavy_quark_check==0) {
628 if (reliable(rNorm, maxrx, maxrr, r2, delta))
664 PrintStats(solver_name.c_str(), k, r2, b2, heavy_quark_res);
BiCGstabL(const DiracMatrix &mat, const DiracMatrix &matSloppy, SolverParam ¶m, TimeProfile &profile)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
void apply(const qudaStream_t &stream)
void update_j_max(int new_j_max)
BiCGstabLUpdate(ColorSpinorField *x, std::vector< ColorSpinorField * > &r, std::vector< ColorSpinorField * > &u, Complex *alpha, Complex *beta, BiCGstabLUpdateType update_type, int j_max, int n_update)
virtual ~BiCGstabLUpdate()
void update_update_type(BiCGstabLUpdateType new_update_type)
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
unsigned long long flops() const
virtual int getStencilSteps() const =0
QudaPrecision Precision() const
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE)....
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE)
const DiracMatrix & matSloppy
double Last(QudaProfileType idx)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
@ QUDA_USE_INIT_GUESS_YES
@ QUDA_HEAVY_QUARK_RESIDUAL
@ QUDA_PRESERVE_SOURCE_NO
@ QUDA_COMPUTE_NULL_VECTOR_NO
void init()
Create the BLAS context.
Complex caxpyDotzy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
void caxpby(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y)
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
void caxpyBxpz(const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &)
double3 xpyHeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &r)
void zero(ColorSpinorField &a)
double norm2(const ColorSpinorField &a)
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void xpy(ColorSpinorField &x, ColorSpinorField &y)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
void stop()
Stop profiling.
std::complex< double > Complex
__host__ __device__ ValueType sqrt(ValueType x)
void updateR()
update the radius for halos.
cudaStream_t qudaStream_t
QudaPreserveSource preserve_source
QudaComputeNullVector compute_null_vector
bool is_preconditioner
verbosity to use for preconditioner
bool use_sloppy_partial_accumulator
QudaResidualType residual_type
QudaPrecision precision_sloppy
QudaUseInitGuess use_init_guess
QudaVerbosity getVerbosity()