21 #include <Eigen/Dense> 22 #include <Eigen/Eigenvalues> 36 using namespace Eigen;
75 eta(
Vector::
Zero(m)), m(m), k(nev), restarts(0), Vkp1(nullptr) { c =
static_cast<Complex*
> (ritzVecs.col(k).data()); }
90 template <>
void ComputeHarmonicRitz<libtype::magma_lib>(
GMResDRArgs &args)
93 DenseMatrix cH = args.H.block(0, 0, args.m, args.m).adjoint();
94 DenseMatrix Gk = args.H.block(0, 0, args.m, args.m);
96 VectorSet harVecs = MatrixXcd::Zero(args.m, args.m);
97 Vector harVals = VectorXcd::Zero(args.m);
99 Vector em = VectorXcd::Zero(args.m);
101 em(args.m-1) =
norm( args.H(args.m, args.m-1) );
103 cudaHostRegister(static_cast<void *>(cH.data()), args.m*args.m*
sizeof(
Complex), cudaHostRegisterDefault);
104 magma_Xgesv(static_cast<void*>(em.data()), args.m, args.m, static_cast<void*>(cH.data()), args.m,
sizeof(
Complex));
105 cudaHostUnregister(cH.data());
107 Gk.col(args.m-1) += em;
109 cudaHostRegister(static_cast<void *>(Gk.data()), args.m*args.m*
sizeof(
Complex), cudaHostRegisterDefault);
110 magma_Xgeev(static_cast<void*>(Gk.data()), args.m, args.m, static_cast<void*>(harVecs.data()), static_cast<void*>(harVals.data()), args.m,
sizeof(
Complex));
111 cudaHostUnregister(Gk.data());
113 std::vector<SortedEvals> sorted_evals;
114 sorted_evals.reserve(args.m);
116 for(
int e = 0; e < args.m; e++) sorted_evals.push_back(
SortedEvals(
abs(harVals.data()[e]), e ));
119 for(
int e = 0; e < args.k; e++) memcpy(args.ritzVecs.col(e).data(), harVecs.col(sorted_evals[e]._idx).data(), (args.m)*
sizeof(
Complex));
121 errorQuda(
"Magma library was not built.\n");
127 template <>
void ComputeHarmonicRitz<libtype::eigen_lib>(
GMResDRArgs &args)
130 DenseMatrix cH = args.H.block(0, 0, args.m, args.m).adjoint();
131 DenseMatrix Gk = args.H.block(0, 0, args.m, args.m);
133 VectorSet harVecs = MatrixXcd::Zero(args.m, args.m);
134 Vector harVals = VectorXcd::Zero(args.m);
136 Vector em = VectorXcd::Zero(args.m);
138 em(args.m-1) =
norm( args.H(args.m, args.m-1) );
139 Gk.col(args.m-1) += cH.colPivHouseholderQr().solve(em);
141 ComplexEigenSolver<DenseMatrix> es( Gk );
142 harVecs = es.eigenvectors();
143 harVals = es.eigenvalues ();
145 std::vector<SortedEvals> sorted_evals;
146 sorted_evals.reserve(args.m);
148 for(
int e = 0; e < args.m; e++) sorted_evals.push_back(
SortedEvals(
abs(harVals.data()[e]), e ));
151 for(
int e = 0; e < args.k; e++) memcpy(args.ritzVecs.col(e).data(), harVecs.col(sorted_evals[e]._idx).data(), (args.m)*
sizeof(
Complex));
159 template <>
void ComputeEta<libtype::magma_lib>(
GMResDRArgs &args) {
161 DenseMatrix Htemp(DenseMatrix::Zero(args.m+1,args.m));
164 Complex *ctemp =
static_cast<Complex*
> (args.ritzVecs.col(0).data());
165 memcpy(ctemp, args.c, (args.m+1)*
sizeof(
Complex));
167 cudaHostRegister(static_cast<void*>(Htemp.data()), (args.m+1)*args.m*
sizeof(
Complex), cudaHostRegisterDefault);
168 magma_Xgels(static_cast<void*>(Htemp.data()), ctemp, args.m+1, args.m, args.m+1,
sizeof(
Complex));
169 cudaHostUnregister(Htemp.data());
171 memcpy(args.eta.data(), ctemp, args.m*
sizeof(
Complex));
174 errorQuda(
"MAGMA library was not built.\n");
179 template <>
void ComputeEta<libtype::eigen_lib>(
GMResDRArgs &args) {
181 Map<VectorXcd, Unaligned> c_(args.c, args.m+1);
182 args.eta = args.H.jacobiSvd(ComputeThinU | ComputeThinV).solve(c_);
212 Solver(param, profile), mat(mat), matSloppy(matSloppy), matPrecon(matPrecon), K(nullptr), Kparam(param),
213 Vm(nullptr), Zm(nullptr), profile(profile), gmresdr_args(nullptr),
init(false)
218 K =
new CG(matPrecon, matPrecon,
Kparam, profile);
222 K =
new MR(matPrecon, matPrecon,
Kparam, profile);
235 Solver(param, profile), mat(mat), matSloppy(matSloppy), matPrecon(matPrecon), K(&K),
Kparam(param),
277 ComputeEta<libtype::magma_lib>(args);
279 ComputeEta<libtype::eigen_lib>(args);
288 std::vector<ColorSpinorField*> x_, r_;
289 x_.push_back(x), r_.push_back(r);
293 VectorXcd minusHeta = - (args.
H * args.
eta);
294 Map<VectorXcd, Unaligned> c_(args.
c, args.
m+1);
297 blas::caxpy(static_cast<Complex*>(minusHeta.data()), V_, r_);
307 ComputeHarmonicRitz<libtype::magma_lib>(args);
309 ComputeHarmonicRitz<libtype::eigen_lib>(args);
314 DenseMatrix Qkp1(MatrixXcd::Identity((args.
m+1), (args.
k+1)));
316 HouseholderQR<MatrixXcd> qr(args.
ritzVecs);
317 Qkp1.applyOnTheLeft( qr.householderQ());
319 DenseMatrix Res = Qkp1.adjoint()*args.
H*Qkp1.topLeftCorner(args.
m, args.
k);
321 args.
H.topLeftCorner(args.
k+1, args.
k) = Res;
329 blas::caxpy(static_cast<Complex*>(Alpha.data()), vm , vkp1);
331 for(
int i = 0; i < (args.
m+1); i++)
347 blas::caxpy(static_cast<Complex*>(Beta.data()), z , vk);
349 for(
int i = 0; i < (args.
m); i++)
358 for(
int j = 0; j < args.
k; j++)
377 std::unique_ptr<Complex[] > givensH((do_givens) ?
new Complex[(args.
m+1)*args.
m] :
nullptr);
378 std::unique_ptr<Complex[] > cn((do_givens) ?
new Complex[args.
m] :
nullptr);
379 std::unique_ptr<double[] > sn((do_givens) ?
new double[args.
m] :
nullptr);
381 Complex c0 = args.
c[0];
392 (*K)( outPre ,inPre );
402 Complex h0 = do_givens ? args.
H(0, j) : 0.0;
404 for(
int i = 1; i <= j; i++)
410 givensH[(args.
m+1)*j+(i-1)] =
conj(cn[i-1])*h0 + sn[i-1]*args.
H(i,j);
411 h0 = -sn[i-1]*h0 + cn[i-1]*args.
H(i,j);
420 cn[j] = h0 * inv_denom;
421 sn[j] = args.
H(j+1,j).real() * inv_denom;
422 givensH[j*(args.
m+1)+j] =
conj(cn[j])*h0 + sn[j]*args.
H(j+1,j);
424 args.
c[j+1] = -sn[j]*args.
c[j];
425 args.
c[j] *=
conj(cn[j]);
433 Map<MatrixXcd, Unaligned, DynamicStride> givensH_(givensH.get(), args.
m, args.
m,
DynamicStride(args.
m+1,1) );
434 memcpy(args.
eta.data(), args.
c, args.
m*
sizeof(
Complex));
435 memset((
void*)args.
c, 0, (args.
m+1)*
sizeof(Complex));
438 givensH_.triangularView<Upper>().solveInPlace<OnTheLeft>(args.
eta);
441 memset((
void*)args.
c, 0, (args.
m+1)*
sizeof(Complex));
444 std::vector<ColorSpinorField*> r_;
445 r_.push_back(static_cast<ColorSpinorField*>(
r_sloppy));
450 return (j-start_idx);
457 const double tol_threshold = 1.2;
458 const double det_max_deviation = 0.4;
517 double normb =
norm2( b );
526 printfQuda(
"\nInitial residual squared: %1.16e, source %1.16e, tolerance %1.16e\n", r2,
sqrt(normb),
param.
tol);
544 double heavy_quark_res = 0.0;
548 int restart_idx = 0, j = 0, check_interval = 4;
559 bool do_clean_restart =
false;
562 if((restart_idx+1) % check_interval) {
567 for(
int l = 0; l < args.
k+1; l++) {
569 Complex *col = Gm.col(l).data();
572 std::vector<ColorSpinorField*> v2_;
573 v2_.push_back(static_cast<ColorSpinorField*>(&
Vm->
Component(l)));
579 Complex detGm = Gm.determinant();
581 PrintStats(
"FGMResDR:", tot_iters, r2, b2, heavy_quark_res);
582 printfQuda(
"\nCheck cycle %d, true residual squared %1.15e, Gramm det : (%le, %le)\n", restart_idx, ext_r2, detGm.real(), detGm.imag());
586 do_clean_restart = ((
sqrt(ext_r2) /
sqrt(r2)) > tol_threshold) || fabs(1.0 - (
norm(detGm)) > det_max_deviation);
596 printfQuda(
"\nClean restart for cycle %d, true residual squared %1.15e\n", restart_idx, ext_r2);
void ax(double a, ColorSpinorField &x)
bool global_reduction
whether the solver acting as a preconditioner for another solver
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
QudaVerbosity verbosity_precondition
void magma_Xgeev(void *Mat, const int n, const int ldm, void *vr, void *evalues, const int ldv, const int prec)
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
QudaInverterType inv_type
double norm2(const ColorSpinorField &a)
int FlexArnoldiProcedure(const int start_idx, const bool do_givens)
__host__ __device__ ValueType sqrt(ValueType x)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
static bool SelectSmall(SortedEvals v1, SortedEvals v2)
TimeProfile & profile
preconditioner result
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE) ...
cudaColorSpinorField * tmp
CompositeColorSpinorField & Components()
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
SortedEvals(double val, int idx)
ColorSpinorFieldSet * Vkp1
GMResDRArgs(int m, int nev)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
ColorSpinorField & Component(const int idx) const
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
GMResDR(DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, SolverParam ¶m, TimeProfile &profile)
QudaInverterType inv_type_precondition
QudaPreserveSource preserve_source
Matrix< Complex, Dynamic, Dynamic, RowMajor > RowMajorDenseMatrix
double norm2(const CloverField &a, bool inverse=false)
ColorSpinorField * yp
residual vector
GMResDRArgs * gmresdr_args
bool is_composite
for deflation solvers:
double Last(QudaProfileType idx)
__device__ __host__ void caxpy(const complex< Float > &a, const complex< Float > &x, complex< Float > &y)
QudaResidualType residual_type
void fillFGMResDRInnerSolveParam(SolverParam &inner, const SolverParam &outer)
Stride< Dynamic, Dynamic > DynamicStride
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
bool is_preconditioner
verbosity to use for preconditioner
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
std::complex< double > Complex
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
QudaExtLibType extlib_type
void zero(ColorSpinorField &a)
void pushVerbosity(QudaVerbosity verbosity)
Push a new verbosity onto the stack.
QudaPrecision precision_precondition
void * memset(void *s, int c, size_t n)
void magma_Xgels(void *Mat, void *c, int rows, int cols, int ldm, const int prec)
Conjugate-Gradient Solver.
unsigned long long flops() const
void xpy(ColorSpinorField &x, ColorSpinorField &y)
ColorSpinorField * p_pre
residual passed to preconditioner
__host__ __device__ ValueType abs(ValueType x)
void popVerbosity()
Pop the verbosity restoring the prior one on the stack.
QudaPrecision precision_sloppy
void UpdateSolution(ColorSpinorField *x, ColorSpinorField *r, bool do_gels)
ColorSpinorField * tmpp
high precision accumulator
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE). Assumes SolverParam.true_res and SolverParam.true_res_hq has been set.
void ComputeHarmonicRitz(GMResDRArgs &args)
__host__ __device__ ValueType conj(ValueType x)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
ColorSpinorField * r_pre
sloppy residual vector
__device__ __host__ void zero(vector_type< scalar, n > &v)
void magma_Xgesv(void *sol, const int ldn, const int n, void *Mat, const int ldm, const int prec)
void ComputeEta(GMResDRArgs &args)
ColorSpinorField * r_sloppy
temporary for mat-vec