21 #include <Eigen/Dense> 22 #include <Eigen/Eigenvalues> 36 using namespace Eigen;
75 eta(
Vector::
Zero(m)), m(m), k(
nev), restarts(0), Vkp1(nullptr) {
c =
static_cast<Complex*
> (ritzVecs.col(k).data()); }
103 cudaHostRegister(static_cast<void *>(cH.data()),
args.m*
args.m*
sizeof(
Complex), cudaHostRegisterDefault);
105 cudaHostUnregister(cH.data());
107 Gk.col(
args.m-1) += em;
109 cudaHostRegister(static_cast<void *>(Gk.data()),
args.m*
args.m*
sizeof(
Complex), cudaHostRegisterDefault);
110 magma_Xgeev(static_cast<void*>(Gk.data()),
args.m,
args.m, static_cast<void*>(harVecs.data()), static_cast<void*>(harVals.data()),
args.m,
sizeof(
Complex));
111 cudaHostUnregister(Gk.data());
113 std::vector<SortedEvals> sorted_evals;
114 sorted_evals.reserve(
args.m);
121 errorQuda(
"Magma library was not built.\n");
139 Gk.col(
args.m-1) += cH.colPivHouseholderQr().solve(em);
141 ComplexEigenSolver<DenseMatrix> es( Gk );
142 harVecs = es.eigenvectors();
143 harVals = es.eigenvalues ();
145 std::vector<SortedEvals> sorted_evals;
146 sorted_evals.reserve(
args.m);
167 cudaHostRegister(static_cast<void*>(Htemp.data()), (
args.m+1)*
args.m*
sizeof(
Complex), cudaHostRegisterDefault);
169 cudaHostUnregister(Htemp.data());
174 errorQuda(
"MAGMA library was not built.\n");
181 Map<VectorXcd, Unaligned> c_(
args.c,
args.m+1);
182 args.eta =
args.H.jacobiSvd(ComputeThinU | ComputeThinV).solve(c_);
213 Vm(nullptr), Zm(nullptr), profile(profile), gmresdr_args(nullptr),
init(false)
236 Vm(nullptr), Zm(nullptr), profile(profile), gmresdr_args(nullptr),
init(false) { }
277 ComputeEta<libtype::magma_lib>(
args);
279 ComputeEta<libtype::eigen_lib>(
args);
288 std::vector<ColorSpinorField*> x_, r_;
289 x_.push_back(
x), r_.push_back(r);
293 VectorXcd minusHeta = - (
args.H *
args.eta);
294 Map<VectorXcd, Unaligned> c_(
args.c,
args.m+1);
297 blas::caxpy(static_cast<Complex*>(minusHeta.data()), V_, r_);
307 ComputeHarmonicRitz<libtype::magma_lib>(
args);
309 ComputeHarmonicRitz<libtype::eigen_lib>(
args);
316 HouseholderQR<MatrixXcd> qr(
args.ritzVecs);
317 Qkp1.applyOnTheLeft( qr.householderQ());
325 std::vector<ColorSpinorField*> vkp1(
args.Vkp1->Components());
329 blas::caxpy(static_cast<Complex*>(Alpha.data()), vm , vkp1);
331 for(
int i = 0;
i < (
args.m+1);
i++)
344 std::vector<ColorSpinorField*> vk(
args.Vkp1->Components().begin(),
args.Vkp1->Components().begin()+
args.k);
347 blas::caxpy(static_cast<Complex*>(Beta.data()),
z , vk);
349 for(
int i = 0;
i < (
args.m);
i++)
358 for(
int j = 0; j <
args.k; j++)
366 args.ritzVecs.setZero();
377 std::unique_ptr<Complex[] > givensH((do_givens) ?
new Complex[(
args.m+1)*
args.m] :
nullptr);
378 std::unique_ptr<Complex[] > cn((do_givens) ?
new Complex[
args.m] :
nullptr);
379 std::unique_ptr<double[] > sn((do_givens) ?
new double[
args.m] :
nullptr);
392 (*K)( outPre ,inPre );
404 for(
int i = 1;
i <= j;
i++)
411 h0 = -sn[
i-1]*h0 + cn[
i-1]*
args.H(
i,j);
420 cn[j] = h0 * inv_denom;
421 sn[j] =
args.H(j+1,j).real() * inv_denom;
422 givensH[j*(
args.m+1)+j] =
conj(cn[j])*h0 + sn[j]*
args.H(j+1,j);
438 givensH_.triangularView<Upper>().solveInPlace<OnTheLeft>(
args.eta);
444 std::vector<ColorSpinorField*> r_;
445 r_.push_back(static_cast<ColorSpinorField*>(
r_sloppy));
450 return (j-start_idx);
457 const double tol_threshold = 1.2;
458 const double det_max_deviation = 0.4;
517 double normb =
norm2(
b );
526 printfQuda(
"\nInitial residual squared: %1.16e, source %1.16e, tolerance %1.16e\n", r2,
sqrt(normb),
param.
tol);
544 double heavy_quark_res = 0.0;
548 int restart_idx = 0, j = 0, check_interval = 4;
559 bool do_clean_restart =
false;
562 if((restart_idx+1) % check_interval) {
567 for(
int l = 0; l <
args.k+1; l++) {
569 Complex *col = Gm.col(l).data();
572 std::vector<ColorSpinorField*> v2_;
573 v2_.push_back(static_cast<ColorSpinorField*>(&
Vm->
Component(l)));
579 Complex detGm = Gm.determinant();
581 PrintStats(
"FGMResDR:", tot_iters, r2, b2, heavy_quark_res);
582 printfQuda(
"\nCheck cycle %d, true residual squared %1.15e, Gramm det : (%le, %le)\n", restart_idx, ext_r2, detGm.real(), detGm.imag());
586 do_clean_restart = ((
sqrt(ext_r2) /
sqrt(r2)) > tol_threshold) ||
fabs(1.0 - (
norm(detGm)) > det_max_deviation);
596 printfQuda(
"\nClean restart for cycle %d, true residual squared %1.15e\n", restart_idx, ext_r2);
bool convergence(const double &r2, const double &hq2, const double &r2_tol, const double &hq_tol)
bool global_reduction
whether the solver acting as a preconditioner for another solver
QudaVerbosity verbosity_precondition
void magma_Xgeev(void *Mat, const int n, const int ldm, void *vr, void *evalues, const int ldv, const int prec)
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
QudaInverterType inv_type
double norm2(const ColorSpinorField &a)
int FlexArnoldiProcedure(const int start_idx, const bool do_givens)
__host__ __device__ ValueType sqrt(ValueType x)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
std::complex< double > Complex
static bool SelectSmall(SortedEvals v1, SortedEvals v2)
TimeProfile & profile
preconditioner result
cudaColorSpinorField * tmp
CompositeColorSpinorField & Components()
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
SortedEvals(double val, int idx)
ColorSpinorFieldSet * Vkp1
GMResDRArgs(int m, int nev)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
void ax(const double &a, ColorSpinorField &x)
ColorSpinorField & Component(const int idx) const
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
GMResDR(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam ¶m, TimeProfile &profile)
QudaInverterType inv_type_precondition
QudaPreserveSource preserve_source
Matrix< Complex, Dynamic, Dynamic, RowMajor > RowMajorDenseMatrix
double norm2(const CloverField &a, bool inverse=false)
ColorSpinorField * yp
residual vector
GMResDRArgs * gmresdr_args
double Last(QudaProfileType idx)
void PrintSummary(const char *name, int k, const double &r2, const double &b2)
QudaResidualType residual_type
void fillFGMResDRInnerSolveParam(SolverParam &inner, const SolverParam &outer)
Stride< Dynamic, Dynamic > DynamicStride
bool is_preconditioner
verbosity to use for preconditioner
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void * memcpy(void *__dst, const void *__src, size_t __n)
QudaExtLibType extlib_type
whether to use a global or local (node) reduction for this solver
void zero(ColorSpinorField &a)
void pushVerbosity(QudaVerbosity verbosity)
QudaPrecision precision_precondition
void axpy(const double &a, ColorSpinorField &x, ColorSpinorField &y)
void magma_Xgels(void *Mat, void *c, int rows, int cols, int ldm, const int prec)
double norm(const Float *a, const int N)
void * memset(void *__b, int __c, size_t __len)
unsigned long long flops() const
void PrintStats(const char *, int k, const double &r2, const double &b2, const double &hq2)
void xpy(ColorSpinorField &x, ColorSpinorField &y)
ColorSpinorField * p_pre
residual passed to preconditioner
__host__ __device__ ValueType abs(ValueType x)
QudaPrecision precision_sloppy
void UpdateSolution(ColorSpinorField *x, ColorSpinorField *r, bool do_gels)
ColorSpinorField * tmpp
high precision accumulator
void ComputeHarmonicRitz(GMResDRArgs &args)
__host__ __device__ ValueType conj(ValueType x)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
static __inline__ enum cudaRoundMode mode enum cudaRoundMode mode enum cudaRoundMode mode enum cudaRoundMode mode int val
ColorSpinorField * r_pre
sloppy residual vector
__device__ __host__ void zero(vector_type< scalar, n > &v)
void magma_Xgesv(void *sol, const int ldn, const int n, void *Mat, const int ldm, const int prec)
void ComputeEta(GMResDRArgs &args)
ColorSpinorField * r_sloppy
temporary for mat-vec