20 #include <Eigen/Dense> 33 using namespace Eigen;
73 m(m), k(k),
id(0), restarts(0), global_stop(0.0), run_residual_correction(false), V2k(nullptr) { }
81 if(run_residual_correction)
return;
83 Tm.diagonal<0>()[
id] = diag_val;
86 Tm.diagonal<+1>()[
id] = offdiag_val;
87 Tm.diagonal<-1>()[
id] = offdiag_val;
111 std::unique_ptr<Complex[] >
s(
new Complex[2*k]);
113 for(
int i = 0;
i < 2*k;
i++) Tm(
i,
i) = Tmvals(
i);
115 std::vector<ColorSpinorField*> w_;
122 Map<VectorXcd, Unaligned > s_(
s.get(), 2*k);
125 Tm.col(2*k).segment(0, 2*k) = s_;
126 Tm.row(2*k).segment(0, 2*k) = s_.adjoint();
138 const int m =
args.m;
139 const int k =
args.k;
141 SelfAdjointEigenSolver<MatrixXcd> es_tm(
args.Tm);
142 args.ritzVecs.leftCols(k) = es_tm.eigenvectors().leftCols(k);
144 SelfAdjointEigenSolver<MatrixXcd> es_tm1(Map<MatrixXcd, Unaligned, DynamicStride >(
args.Tm.data(), (m-1), (m-1),
DynamicStride(m, 1)));
145 Block<MatrixXcd>(
args.ritzVecs.derived(), 0, k, m-1, k) = es_tm1.eigenvectors().leftCols(k);
146 args.ritzVecs.block(m-1, k, 1, k).setZero();
148 MatrixXcd Q2k(MatrixXcd::Identity(m, 2*k));
149 HouseholderQR<MatrixXcd> ritzVecs2k_qr( Map<MatrixXcd, Unaligned >(
args.ritzVecs.data(), m, 2*k) );
150 Q2k.applyOnTheLeft( ritzVecs2k_qr.householderQ() );
153 args.H2k = Q2k.adjoint()*
args.Tm*Q2k;
156 SelfAdjointEigenSolver<MatrixXcd> es_h2k(
args.H2k);
157 Block<MatrixXcd>(
args.ritzVecs.derived(), 0, 0, m, 2*k) = Q2k * es_h2k.eigenvectors();
158 args.Tmvals.segment(0,2*k) = es_h2k.eigenvalues();
167 const int m =
args.m;
168 const int k =
args.k;
172 double *evalm =
static_cast<double *
>(
args.Tmvals.data());
174 cudaHostRegister(static_cast<void *>(evecm), m*m*
sizeof(
Complex), cudaHostRegisterDefault);
180 cudaHostRegister(static_cast<void *>(evecm1), m*m*
sizeof(
Complex), cudaHostRegisterDefault);
183 for(
int l = 1; l <= m ; l++) evecm1[l*m-1] = 0.0 ;
189 MatrixXcd Q2k(MatrixXcd::Identity(m, 2*k));
190 HouseholderQR<MatrixXcd> ritzVecs2k_qr( Map<MatrixXcd, Unaligned >(
args.ritzVecs.data(), m, 2*k) );
191 Q2k.applyOnTheLeft( ritzVecs2k_qr.householderQ() );
194 args.H2k = Q2k.adjoint()*
args.Tm*Q2k;
197 SelfAdjointEigenSolver<MatrixXcd> es_h2k(
args.H2k);
198 Block<MatrixXcd>(
args.ritzVecs.derived(), 0, 0, m, 2*k) = Q2k * es_h2k.eigenvectors();
199 args.Tmvals.segment(0,2*k) = es_h2k.eigenvalues();
201 cudaHostUnregister(evecm);
202 cudaHostUnregister(evecm1);
204 errorQuda(
"Magma library was not built.\n");
253 Solver(
param, profile),
mat(
mat), matSloppy(matSloppy), matPrecon(matPrecon), K(nullptr), Kparam(
param), Vm(nullptr), r_pre(nullptr), p_pre(nullptr), eigcg_args(nullptr), profile(profile),
init(false)
257 printfQuda(
"\nDeflation space is complete, running initCG solver.\n");
313 ComputeRitz<libtype::magma_lib>(
args);
315 ComputeRitz<libtype::eigen_lib>(
args);
325 std::vector<ColorSpinorField*> v2k(
args.V2k->Components());
328 blas::caxpy( static_cast<Complex*>(Alpha.data()), vm , v2k);
353 if(
args.run_residual_correction)
return;
358 args.ResetSearchIdx();
385 printfQuda(
"Warning: inverting on zero-field source\n");
475 const bool use_heavy_quark_res =
481 double heavy_quark_res = 0.0;
486 double alpha=1.0, alpha_inv=1.0, beta=0.0, alpha_old_inv = 1.0;
488 double lanczos_diag, lanczos_offdiag;
498 PrintStats(
"eigCG", k, r2, b2, heavy_quark_res);
506 alpha_old_inv = alpha_inv;
507 alpha = rMinvr / pAp;
508 alpha_inv = 1.0 / alpha;
510 lanczos_diag = (alpha_inv + beta*alpha_old_inv);
526 double rMinvr_old = rMinvr;
528 beta = rMinvr / rMinvr_old;
532 lanczos_offdiag = (-
sqrt(beta)*alpha_inv);
533 args.SetLanczos(lanczos_diag, lanczos_offdiag);
537 PrintStats(
"eigCG", k, r2, b2, heavy_quark_res);
661 if(
K)
errorQuda(
"\nInitCG does not (yet) support preconditioning.\n");
692 int logical_rhs_id = 0;
693 bool dcg_cycle =
false;
698 eSloppy =
e, rSloppy = r;
708 printfQuda(
"Running DCG correction cycle.\n");
737 PrintSummary( !dcg_cycle ?
"EigCG:" :
"DCG (correction cycle):", iters, r2, b2);
742 }
while ((r2 > stop) && mixed_prec);
764 const int max_nev = defl.
size();
bool convergence(const double &r2, const double &hq2, const double &r2_tol, const double &hq_tol)
bool run_residual_correction
void xpay(ColorSpinorField &x, const double &a, ColorSpinorField &y)
ColorSpinorField * Az
temporary for mat-vec
static double stopping(const double &tol, const double &b2, QudaResidualType residual_type)
ColorSpinorField * p_pre
residual passed to preconditioner
QudaInverterType inv_type
QudaVerbosity getVerbosity()
double norm2(const ColorSpinorField &a)
__host__ __device__ ValueType sqrt(ValueType x)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
std::complex< double > Complex
cudaColorSpinorField * tmp
IncEigCG(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam ¶m, TimeProfile &profile)
double axpyNorm(const double &a, ColorSpinorField &x, ColorSpinorField &y)
CompositeColorSpinorField & Components()
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
int size()
return deflation space size
int initCGsolve(ColorSpinorField &out, ColorSpinorField &in)
double reDotProduct(ColorSpinorField &x, ColorSpinorField &y)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
void ax(const double &a, ColorSpinorField &x)
ColorSpinorField & Component(const int idx) const
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
int eigCGsolve(ColorSpinorField &out, ColorSpinorField &in)
QudaPrecision precision_ritz
QudaInverterType inv_type_precondition
QudaPreserveSource preserve_source
void reduce(double tol, int max_nev)
double norm2(const CloverField &a, bool inverse=false)
bool is_complete()
Test whether the deflation space is complete and therefore cannot be further extended ...
void ComputeRitz(EigCGArgs &args)
double Last(QudaProfileType idx)
void PrintSummary(const char *name, int k, const double &r2, const double &b2)
void magma_Xheev(void *Mat, const int n, const int ldm, void *evalues, const int prec)
QudaResidualType residual_type
void axpyZpbx(const double &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, const double &b)
Stride< Dynamic, Dynamic > DynamicStride
def id
projector matrices ######################################################################## ...
void UpdateVm(ColorSpinorField &res, double beta, double sqrtr2)
static void fillInitCGSolverParam(SolverParam &inner, const SolverParam &outer)
#define checkLocation(...)
static void fillEigCGInnerSolverParam(SolverParam &inner, const SolverParam &outer, bool use_sloppy_partial_accumulator=true)
bool is_preconditioner
verbosity to use for preconditioner
static int max_eigcg_cycles
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void * memcpy(void *__dst, const void *__src, size_t __n)
QudaExtLibType extlib_type
whether to use a global or local (node) reduction for this solver
void zero(ColorSpinorField &a)
void SetLanczos(Complex diag_val, Complex offdiag_val)
QudaPrecision precision_precondition
cpuColorSpinorField * out
ColorSpinorField * yp
residual vector
unsigned long long flops() const
void PrintStats(const char *, int k, const double &r2, const double &b2, const double &hq2)
EigCGArgs * eigcg_args
preconditioner result
void xpy(ColorSpinorField &x, ColorSpinorField &y)
void increment(ColorSpinorField &V, int nev)
ColorSpinorField * p
high precision accumulator
void RestartLanczos(ColorSpinorField *w, ColorSpinorFieldSet *v, const double inv_sqrt_r2)
QudaUseInitGuess use_init_guess
ColorSpinorFieldSet * V2k
QudaPrecision precision_sloppy
bool use_sloppy_partial_accumulator
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
QudaPrecision Precision() const
void RestartVT(const double beta, const double rho)
void commGlobalReductionSet(bool global_reduce)
void operator()(ColorSpinorField &out, ColorSpinorField &in)