20 #include <Eigen/Dense> 33 using namespace Eigen;
73 m(m), k(k), id(0), restarts(0), global_stop(0.0), run_residual_correction(false), V2k(nullptr) { }
81 if(run_residual_correction)
return;
83 Tm.diagonal<0>()[
id] = diag_val;
86 Tm.diagonal<+1>()[
id] = offdiag_val;
87 Tm.diagonal<-1>()[
id] = offdiag_val;
111 std::unique_ptr<Complex[] >
s(
new Complex[2*k]);
113 for(
int i = 0; i < 2*k; i++) Tm(i,i) = Tmvals(i);
115 std::vector<ColorSpinorField*> w_;
122 Map<VectorXcd, Unaligned > s_(s.get(), 2*k);
125 Tm.col(2*k).segment(0, 2*k) = s_;
126 Tm.row(2*k).segment(0, 2*k) = s_.adjoint();
136 template <>
void ComputeRitz<libtype::eigen_lib>(
EigCGArgs &args)
138 const int m = args.m;
139 const int k = args.k;
141 SelfAdjointEigenSolver<MatrixXcd> es_tm(args.Tm);
142 args.ritzVecs.leftCols(k) = es_tm.eigenvectors().leftCols(k);
144 SelfAdjointEigenSolver<MatrixXcd> es_tm1(Map<MatrixXcd, Unaligned, DynamicStride >(args.Tm.data(), (m-1), (m-1),
DynamicStride(m, 1)));
145 Block<MatrixXcd>(args.ritzVecs.derived(), 0, k, m-1, k) = es_tm1.eigenvectors().leftCols(k);
146 args.ritzVecs.block(m-1, k, 1, k).setZero();
148 MatrixXcd Q2k(MatrixXcd::Identity(m, 2*k));
149 HouseholderQR<MatrixXcd> ritzVecs2k_qr( Map<MatrixXcd, Unaligned >(args.ritzVecs.data(), m, 2*k) );
150 Q2k.applyOnTheLeft( ritzVecs2k_qr.householderQ() );
153 args.H2k = Q2k.adjoint()*args.Tm*Q2k;
156 SelfAdjointEigenSolver<MatrixXcd> es_h2k(args.H2k);
157 Block<MatrixXcd>(args.ritzVecs.derived(), 0, 0, m, 2*k) = Q2k * es_h2k.eigenvectors();
158 args.Tmvals.segment(0,2*k) = es_h2k.eigenvalues();
164 template <>
void ComputeRitz<libtype::magma_lib>(
EigCGArgs &args)
167 const int m = args.m;
168 const int k = args.k;
170 args.ritzVecs = args.Tm;
172 double *evalm =
static_cast<double *
>( args.Tmvals.data());
174 cudaHostRegister(static_cast<void *>(evecm), m*m*
sizeof(
Complex), cudaHostRegisterDefault);
180 cudaHostRegister(static_cast<void *>(evecm1), m*m*
sizeof(
Complex), cudaHostRegisterDefault);
183 for(
int l = 1; l <= m ; l++) evecm1[l*m-1] = 0.0 ;
185 memcpy(&evecm[k*m], evecm1, k*m*
sizeof(
Complex));
189 MatrixXcd Q2k(MatrixXcd::Identity(m, 2*k));
190 HouseholderQR<MatrixXcd> ritzVecs2k_qr( Map<MatrixXcd, Unaligned >(args.ritzVecs.data(), m, 2*k) );
191 Q2k.applyOnTheLeft( ritzVecs2k_qr.householderQ() );
194 args.H2k = Q2k.adjoint()*args.Tm*Q2k;
197 SelfAdjointEigenSolver<MatrixXcd> es_h2k(args.H2k);
198 Block<MatrixXcd>(args.ritzVecs.derived(), 0, 0, m, 2*k) = Q2k * es_h2k.eigenvectors();
199 args.Tmvals.segment(0,2*k) = es_h2k.eigenvalues();
201 cudaHostUnregister(evecm);
202 cudaHostUnregister(evecm1);
204 errorQuda(
"Magma library was not built.");
253 Solver(param, profile), mat(mat), matSloppy(matSloppy), matPrecon(matPrecon), K(nullptr), Kparam(param), Vm(nullptr), r_pre(nullptr), p_pre(nullptr), eigcg_args(nullptr), profile(profile),
init(false)
256 if (2 * param.
nev >= param.
m)
258 "Incorrect number of the requested low modes: m= %d while nev=%d (note that 2*nev must be less then m).",
262 printfQuda(
"\nInitialize eigCG(m=%d, nev=%d) solver.", param.
m, param.
nev);
264 printfQuda(
"\nDeflation space is complete, running initCG solver.");
274 errorQuda(
"preconditioning is not supported for the incremental solver.");
279 K =
new CG(matPrecon, matPrecon,
Kparam, profile);
281 K =
new MR(matPrecon, matPrecon,
Kparam, profile);
321 ComputeRitz<libtype::magma_lib>(args);
323 ComputeRitz<libtype::eigen_lib>(args);
336 blas::caxpy( static_cast<Complex*>(Alpha.data()), vm , v2k);
367 }
else if (args.
id == (
param.
m-1)) {
393 printfQuda(
"Warning: inverting on zero-field source\n");
483 const bool use_heavy_quark_res =
489 double heavy_quark_res = 0.0;
494 double alpha=1.0, alpha_inv=1.0, beta=0.0, alpha_old_inv = 1.0;
496 double lanczos_diag, lanczos_offdiag;
506 PrintStats(
"eigCG", k, r2, b2, heavy_quark_res);
514 alpha_old_inv = alpha_inv;
515 alpha = rMinvr / pAp;
516 alpha_inv = 1.0 / alpha;
518 lanczos_diag = (alpha_inv + beta*alpha_old_inv);
534 double rMinvr_old = rMinvr;
536 beta = rMinvr / rMinvr_old;
540 lanczos_offdiag = (-
sqrt(beta)*alpha_inv);
541 args.
SetLanczos(lanczos_diag, lanczos_offdiag);
545 PrintStats(
"eigCG", k, r2, b2, heavy_quark_res);
661 const double b2 =
norm2(in);
669 if (
K)
errorQuda(
"\nInitCG does not (yet) support preconditioning.");
700 int logical_rhs_id = 0;
701 bool dcg_cycle =
false;
706 eSloppy = e, rSloppy = r;
716 printfQuda(
"Running DCG correction cycle.\n");
750 }
while ((r2 > stop) && mixed_prec);
760 if (mixed_prec && max_eigcg_cycles > logical_rhs_id) {
761 printfQuda(
"Reset maximum eigcg cycles to %d (was %d)\n", logical_rhs_id, max_eigcg_cycles);
762 max_eigcg_cycles = logical_rhs_id;
772 const int max_nev = defl.
size();
cudaColorSpinorField * tmp2
void ax(double a, ColorSpinorField &x)
bool run_residual_correction
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
ColorSpinorField * Az
temporary for mat-vec
void axpyZpbx(double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, double b)
ColorSpinorField * p_pre
residual passed to preconditioner
QudaInverterType inv_type
QudaVerbosity getVerbosity()
double norm2(const ColorSpinorField &a)
__host__ __device__ ValueType sqrt(ValueType x)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE) ...
cudaColorSpinorField * tmp
IncEigCG(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam ¶m, TimeProfile &profile)
CompositeColorSpinorField & Components()
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
int size()
return deflation space size
int initCGsolve(ColorSpinorField &out, ColorSpinorField &in)
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
double reDotProduct(ColorSpinorField &x, ColorSpinorField &y)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
ColorSpinorField & Component(const int idx) const
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
int eigCGsolve(ColorSpinorField &out, ColorSpinorField &in)
QudaPrecision precision_ritz
QudaInverterType inv_type_precondition
QudaPreserveSource preserve_source
void reduce(double tol, int max_nev)
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
double norm2(const CloverField &a, bool inverse=false)
bool is_complete()
Test whether the deflation space is complete and therefore cannot be further extended.
bool is_composite
for deflation solvers:
void ComputeRitz(EigCGArgs &args)
double Last(QudaProfileType idx)
void magma_Xheev(void *Mat, const int n, const int ldm, void *evalues, const int prec)
QudaResidualType residual_type
Stride< Dynamic, Dynamic > DynamicStride
static double stopping(double tol, double b2, QudaResidualType residual_type)
Set the solver L2 stopping condition.
void UpdateVm(ColorSpinorField &res, double beta, double sqrtr2)
static void fillInitCGSolverParam(SolverParam &inner, const SolverParam &outer)
#define checkLocation(...)
static void fillEigCGInnerSolverParam(SolverParam &inner, const SolverParam &outer, bool use_sloppy_partial_accumulator=true)
bool is_preconditioner
verbosity to use for preconditioner
static int max_eigcg_cycles
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
std::complex< double > Complex
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
QudaExtLibType extlib_type
void zero(ColorSpinorField &a)
void SetLanczos(Complex diag_val, Complex offdiag_val)
QudaPrecision precision_precondition
cpuColorSpinorField * out
Conjugate-Gradient Solver.
ColorSpinorField * yp
residual vector
unsigned long long flops() const
EigCGArgs * eigcg_args
preconditioner result
void xpy(ColorSpinorField &x, ColorSpinorField &y)
void increment(ColorSpinorField &V, int nev)
double axpyNorm(double a, ColorSpinorField &x, ColorSpinorField &y)
ColorSpinorField * p
high precision accumulator
void RestartLanczos(ColorSpinorField *w, ColorSpinorFieldSet *v, const double inv_sqrt_r2)
QudaUseInitGuess use_init_guess
ColorSpinorFieldSet * V2k
QudaPrecision precision_sloppy
bool use_sloppy_partial_accumulator
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE). Assumes SolverParam.true_res and SolverParam.true_res_hq has been set.
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
QudaPrecision Precision() const
void RestartVT(const double beta, const double rho)
void commGlobalReductionSet(bool global_reduce)
void operator()(ColorSpinorField &out, ColorSpinorField &in)