40 static int max_eigcg_cycles = 4;
79 run_residual_correction(false),
92 if (run_residual_correction)
return;
94 Tm.diagonal<0>()[
id] = diag_val;
97 Tm.diagonal<+1>()[
id] = offdiag_val;
98 Tm.diagonal<-1>()[
id] = offdiag_val;
127 auto s = std::make_unique<Complex[]>(2 * k);
129 for (
int i = 0; i < 2 * k; i++) Tm(i, i) = Tmvals(i);
131 std::vector<ColorSpinorField *> w_;
138 Map<VectorXcd, Unaligned> s_(s.get(), 2 * k);
141 Tm.col(2 * k).segment(0, 2 * k) = s_;
142 Tm.row(2 * k).segment(0, 2 * k) = s_.adjoint();
150 template <>
void ComputeRitz<libtype::eigen_lib>(
EigCGArgs &args)
152 const int m = args.
m;
153 const int k = args.
k;
155 SelfAdjointEigenSolver<MatrixXcd> es_tm(args.
Tm);
156 args.
ritzVecs.leftCols(k) = es_tm.eigenvectors().leftCols(k);
158 SelfAdjointEigenSolver<MatrixXcd> es_tm1(Map<MatrixXcd, Unaligned, DynamicStride >(args.
Tm.data(), (m-1), (m-1),
DynamicStride(m, 1)));
159 Block<MatrixXcd>(args.
ritzVecs.derived(), 0, k, m-1, k) = es_tm1.eigenvectors().leftCols(k);
160 args.
ritzVecs.block(m-1, k, 1, k).setZero();
162 MatrixXcd Q2k(MatrixXcd::Identity(m, 2*k));
163 HouseholderQR<MatrixXcd> ritzVecs2k_qr( Map<MatrixXcd, Unaligned >(args.
ritzVecs.data(), m, 2*k) );
164 Q2k.applyOnTheLeft( ritzVecs2k_qr.householderQ() );
167 args.
H2k = Q2k.adjoint()*args.
Tm*Q2k;
170 SelfAdjointEigenSolver<MatrixXcd> es_h2k(args.
H2k);
171 Block<MatrixXcd>(args.
ritzVecs.derived(), 0, 0, m, 2*k) = Q2k * es_h2k.eigenvectors();
172 args.
Tmvals.segment(0,2*k) = es_h2k.eigenvalues();
178 template <>
void ComputeRitz<libtype::magma_lib>(
EigCGArgs &args)
181 const int m = args.
m;
182 const int k = args.
k;
186 double *evalm =
static_cast<double *
>( args.
Tmvals.data());
188 cudaHostRegister(
static_cast<void *
>(evecm), m*m*
sizeof(
Complex), cudaHostRegisterDefault);
194 cudaHostRegister(
static_cast<void *
>(evecm1), m*m*
sizeof(
Complex), cudaHostRegisterDefault);
197 for(
int l = 1; l <= m ; l++) evecm1[l*m-1] = 0.0 ;
199 memcpy(&evecm[k*m], evecm1, k*m*
sizeof(
Complex));
203 MatrixXcd Q2k(MatrixXcd::Identity(m, 2*k));
204 HouseholderQR<MatrixXcd> ritzVecs2k_qr( Map<MatrixXcd, Unaligned >(args.
ritzVecs.data(), m, 2*k) );
205 Q2k.applyOnTheLeft( ritzVecs2k_qr.householderQ() );
208 args.
H2k = Q2k.adjoint()*args.
Tm*Q2k;
211 SelfAdjointEigenSolver<MatrixXcd> es_h2k(args.
H2k);
212 Block<MatrixXcd>(args.
ritzVecs.derived(), 0, 0, m, 2*k) = Q2k * es_h2k.eigenvectors();
213 args.
Tmvals.segment(0,2*k) = es_h2k.eigenvalues();
215 cudaHostUnregister(evecm);
216 cudaHostUnregister(evecm1);
218 errorQuda(
"Magma library was not built.");
224 static void fillEigCGInnerSolverParam(SolverParam &inner,
const SolverParam &outer,
bool use_sloppy_partial_accumulator =
true)
226 inner.tol = outer.tol_precondition;
227 inner.maxiter = outer.maxiter_precondition;
229 inner.precision = outer.precision_precondition;
230 inner.precision_sloppy = outer.precision_precondition;
237 inner.is_preconditioner =
true;
239 inner.use_sloppy_partial_accumulator= use_sloppy_partial_accumulator;
241 if(outer.inv_type ==
QUDA_EIGCG_INVERTER && outer.precision_sloppy != outer.precision_precondition)
247 static void fillInitCGSolverParam(SolverParam &inner,
const SolverParam &outer) {
252 inner.tol = outer.tol;
253 inner.tol_restart = outer.tol_restart;
254 inner.maxiter = outer.maxiter;
255 inner.delta = outer.delta;
256 inner.precision = outer.precision;
257 inner.precision_sloppy = outer.precision_precondition;
262 inner.use_sloppy_partial_accumulator=
false;
280 "Incorrect number of the requested low modes: m= %d while n_ev=%d (note that 2*n_ev must be less then m).",
286 printfQuda(
"\nDeflation space is complete, running initCG solver.");
287 fillInitCGSolverParam(Kparam,
param);
293 fillEigCGInnerSolverParam(Kparam,
param);
296 errorQuda(
"preconditioning is not supported for the incremental solver.");
297 fillInitCGSolverParam(Kparam,
param);
343 ComputeRitz<libtype::magma_lib>(args);
345 ComputeRitz<libtype::eigen_lib>(args);
354 std::vector<ColorSpinorField*> vm (Vm->
Components());
389 }
else if (args.
id == (
param.
m-1)) {
415 printfQuda(
"Warning: inverting on zero-field source\n");
505 const bool use_heavy_quark_res =
511 double heavy_quark_res = 0.0;
516 double alpha=1.0, alpha_inv=1.0, beta=0.0, alpha_old_inv = 1.0;
518 double lanczos_diag, lanczos_offdiag;
528 PrintStats(
"eigCG", k, r2, b2, heavy_quark_res);
536 alpha_old_inv = alpha_inv;
537 alpha = rMinvr / pAp;
538 alpha_inv = 1.0 / alpha;
540 lanczos_diag = (alpha_inv + beta*alpha_old_inv);
556 double rMinvr_old = rMinvr;
558 beta = rMinvr / rMinvr_old;
562 lanczos_offdiag = (-
sqrt(beta)*alpha_inv);
563 args.
SetLanczos(lanczos_diag, lanczos_offdiag);
567 PrintStats(
"eigCG", k, r2, b2, heavy_quark_res);
611 const double full_tol = Kparam.
tol;
683 const double b2 =
norm2(in);
691 if (K)
errorQuda(
"\nInitCG does not (yet) support preconditioning.");
722 int logical_rhs_id = 0;
723 bool dcg_cycle =
false;
728 eSloppy = e, rSloppy = r;
738 printfQuda(
"Running DCG correction cycle.\n");
750 dcg_cycle = (logical_rhs_id >= max_eigcg_cycles);
772 }
while ((r2 >
stop) && mixed_prec);
782 if (mixed_prec && max_eigcg_cycles > logical_rhs_id) {
783 printfQuda(
"Reset maximum eigcg cycles to %d (was %d)\n", logical_rhs_id, max_eigcg_cycles);
784 max_eigcg_cycles = logical_rhs_id;
794 const int max_n_ev = defl.
size();
void magma_Xheev(void *Mat, const int n, const int ldm, void *evalues, const int prec)
Conjugate-Gradient Solver.
CompositeColorSpinorField & Components()
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
ColorSpinorField & Component(const int idx) const
bool is_complete()
Test whether the deflation space is complete and therefore cannot be further extended
int size()
return deflation space size
void increment(ColorSpinorField &V, int n_ev)
void reduce(double tol, int max_n_ev)
unsigned long long flops() const
void SetLanczos(Complex diag_val, Complex offdiag_val)
void RestartLanczos(ColorSpinorField *w, ColorSpinorFieldSet *v, const double inv_sqrt_r2)
ColorSpinorFieldSet * V2k
bool run_residual_correction
void operator()(ColorSpinorField &out, ColorSpinorField &in)
int eigCGsolve(ColorSpinorField &out, ColorSpinorField &in)
void UpdateVm(ColorSpinorField &res, double beta, double sqrtr2)
int initCGsolve(ColorSpinorField &out, ColorSpinorField &in)
IncEigCG(const DiracMatrix &mat, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon, SolverParam ¶m, TimeProfile &profile)
void RestartVT(const double beta, const double rho)
QudaPrecision Precision() const
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE)....
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE)
const DiracMatrix & matPrecon
const DiracMatrix & matSloppy
double Last(QudaProfileType idx)
void commGlobalReductionSet(bool global_reduce)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
cudaColorSpinorField * tmp
@ QUDA_CUDA_FIELD_LOCATION
@ QUDA_USE_INIT_GUESS_YES
@ QUDA_HEAVY_QUARK_RESIDUAL
@ QUDA_INC_EIGCG_INVERTER
@ QUDA_PRESERVE_SOURCE_NO
@ QUDA_PRESERVE_SOURCE_YES
#define checkLocation(...)
void axpyZpbx(double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, double b)
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
void ax(double a, ColorSpinorField &x)
void zero(ColorSpinorField &a)
double norm2(const ColorSpinorField &a)
double reDotProduct(ColorSpinorField &x, ColorSpinorField &y)
double axpyNorm(double a, ColorSpinorField &x, ColorSpinorField &y)
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void xpy(ColorSpinorField &x, ColorSpinorField &y)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
void stop()
Stop profiling.
void ComputeRitz(EigCGArgs &args)
double norm2(const CloverField &a, bool inverse=false)
std::complex< double > Complex
__host__ __device__ ValueType sqrt(ValueType x)
Stride< Dynamic, Dynamic > DynamicStride
QudaResidualType residual_type
QudaExtLibType extlib_type
QudaPrecision precision_precondition
QudaPrecision precision_sloppy
QudaInverterType inv_type
QudaPrecision precision_ritz
QudaInverterType inv_type_precondition
QudaVerbosity getVerbosity()