24 std::vector<ColorSpinorField*> a(size), b(1);
25 for (
int k=0; k<
size; k++)
33 for (
int k=0; k<
size; k++)
35 tau[begin+k][j] = Tau[k]/sigma[begin+k];
44 for (
int i=0; i<
size; i++)
46 tau_[i] = -tau[i+begin][j];
49 std::vector<ColorSpinorField*> r_(r.begin() + begin, r.begin() + begin +
size);
50 std::vector<ColorSpinorField*> rj(r.begin() + j, r.begin() + j + 1);
63 for (
int i=1; i<j; i++)
75 for (
int i=1; i<j-1; i++)
77 tau[i+1][j] =
blas::caxpyDotzy(-tau[i][j], *r[i], *r[j], *r[i+1])/sigma[i+1];
90 for (step = 0; step < (j-1)/N; step++)
93 updateR(tau, r, 1+step*N, N, j);
99 computeTau(tau, sigma, r, 1+step*N, (j-1)%N, j);
100 updateR(tau, r, 1+step*N, (j-1)%N, j);
112 for (
int i = 0; i <
nKrylov; i++)
114 gamma_[i] = -gamma[i+1];
117 std::vector<ColorSpinorField*> u_(u.begin() + 1, u.end());
118 std::vector<ColorSpinorField*> u0(u.begin(), u.begin() + 1);
144 gamma_prime_prime_[0] = gamma[1];
145 gamma_prime_prime_[
nKrylov] = 0.0;
146 gamma_prime_[0] = 0.0;
148 for (
int i = 1; i <
nKrylov; i++)
150 gamma_prime_prime_[i] = gamma_prime_prime[i];
151 gamma_prime_[i] = -gamma_prime[i];
155 delete[] gamma_prime_prime_;
156 delete[] gamma_prime_;
182 std::vector<ColorSpinorField*> &
r;
183 std::vector<ColorSpinorField*> &
u;
206 x(x), r(r), u(u), alpha(alpha), beta(beta), j_max(j_max),
219 static int count = 0;
224 for (
int i= (count*j_max)/n_update; i<((count+1)*j_max)/n_update && i<j_max; i++)
237 for (
int i= (count*j_max)/n_update; i<((count+1)*j_max)/n_update && i<j_max; i++)
244 if (++count == n_update) count = 0;
256 Solver(param, profile), mat(mat), matSloppy(matSloppy),
nKrylov(param.Nkrylov),
init(false)
269 std::stringstream ss;
281 for (
int i = 0; i <
nKrylov+1; i++) {
delete[]
tau[i]; }
287 for (
int i = 1; i < nKrylov+1; i++) {
308 int BiCGstabL::reliable(
double &rNorm,
double &maxrx,
double &maxrr,
const double &r2,
const double &delta) {
311 if (rNorm > maxrx) maxrx = rNorm;
312 if (rNorm > maxrr) maxrr = rNorm;
315 int updateR = (rNorm < delta*maxrr) ? 1 : 0;
359 for (
int i = 0; i <=
nKrylov; i++) {
401 errorQuda(
"Null vector computing requires non-zero guess!");
453 for (
int i = 1; i <=
nKrylov; i++)
464 const bool use_heavy_quark_res =
489 double rNorm =
sqrt(r2);
490 double maxrr = rNorm;
491 double maxrx = rNorm;
500 for (
int j = 0; j <
nKrylov; j++) {
517 bicgstabl_update.update_j_max(j);
541 bicgstabl_update.update_j_max(j);
554 for (
int j = 1; j <=
nKrylov; j++)
586 for (
int j = nKrylov-1; j > 0; j--)
590 for (
int i = j+1; i <=
nKrylov; i++)
597 for (
int j = 1; j <
nKrylov; j++)
600 for (
int i = j+1; i <
nKrylov; i++)
624 if (use_heavy_quark_res && k%heavy_quark_check==0) {
643 if (
reliable(rNorm, maxrx, maxrr, r2, delta))
ColorSpinorField * x_sloppy_saved_p
void update_update_type(BiCGstabLUpdateType new_update_type)
ColorSpinorField * yp
Full precision residual.
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)
ColorSpinorField * r0_saved_p
Sloppy solution vector.
QudaVerbosity getVerbosity()
double norm2(const ColorSpinorField &a)
__host__ __device__ ValueType sqrt(ValueType x)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE) ...
BiCGstabL(DiracMatrix &mat, DiracMatrix &matSloppy, SolverParam ¶m, TimeProfile &profile)
double3 xpyHeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &r)
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
void computeTau(Complex **tau, double *sigma, std::vector< ColorSpinorField *> r, int begin, int size, int j)
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
std::vector< ColorSpinorField * > u
QudaPreserveSource preserve_source
ColorSpinorField * r_fullp
BiCGstabLUpdate(ColorSpinorField *x, std::vector< ColorSpinorField *> &r, std::vector< ColorSpinorField *> &u, Complex *alpha, Complex *beta, BiCGstabLUpdateType update_type, int j_max, int n_update)
void caxpyBxpz(const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &)
QudaComputeNullVector compute_null_vector
std::vector< ColorSpinorField * > & u
double Last(QudaProfileType idx)
Complex * gamma_prime_prime
QudaResidualType residual_type
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
void apply(const cudaStream_t &stream)
bool is_preconditioner
verbosity to use for preconditioner
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
std::complex< double > Complex
std::vector< ColorSpinorField * > r
Sloppy temporary vector.
void orthoDir(Complex **tau, double *sigma, std::vector< ColorSpinorField *> r, int j, int pipeline)
const DiracMatrix & matSloppy
void updateUend(Complex *gamma, std::vector< ColorSpinorField *> u, int nKrylov)
Complex caxpyDotzy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void zero(ColorSpinorField &a)
ColorSpinorField * tempp
Full precision temporary.
int reliable(double &rNorm, double &maxrx, double &maxrr, const double &r2, const double &delta)
Current residual, in BiCG language.
void updateR(Complex **tau, std::vector< ColorSpinorField *> r, int begin, int size, int j)
unsigned long long flops() const
void updateXRend(Complex *gamma, Complex *gamma_prime, Complex *gamma_prime_prime, std::vector< ColorSpinorField *> r, ColorSpinorField &x, int nKrylov)
ColorSpinorField * r_sloppy_saved_p
Shadow residual, in BiCG language.
void xpy(ColorSpinorField &x, ColorSpinorField &y)
virtual int getStencilSteps() const =0
void caxpby(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y)
std::vector< ColorSpinorField * > & r
virtual ~BiCGstabLUpdate()
QudaUseInitGuess use_init_guess
QudaPrecision precision_sloppy
bool use_sloppy_partial_accumulator
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE). Assumes SolverParam.true_res and SolverParam.true_res_hq has been set.
void update_j_max(int new_j_max)
QudaPrecision Precision() const
__device__ unsigned int count[QUDA_MAX_MULTI_REDUCE]
BiCGstabLUpdateType update_type
void operator()(ColorSpinorField &out, ColorSpinorField &in)