79 c =
static_cast<Complex *
>(ritzVecs.col(k).data());
91 if (Vkp1)
delete Vkp1;
97 template <>
void ComputeHarmonicRitz<libtype::magma_lib>(
GMResDRArgs &args)
103 VectorSet harVecs = MatrixXcd::Zero(args.
m, args.
m);
104 Vector harVals = VectorXcd::Zero(args.
m);
106 Vector em = VectorXcd::Zero(args.
m);
108 em(args.
m - 1) =
norm(args.
H(args.
m, args.
m - 1));
110 cudaHostRegister(
static_cast<void *
>(cH.data()), args.
m * args.
m *
sizeof(
Complex), cudaHostRegisterDefault);
111 magma_Xgesv(
static_cast<void *
>(em.data()), args.
m, args.
m,
static_cast<void *
>(cH.data()), args.
m,
sizeof(
Complex));
112 cudaHostUnregister(cH.data());
114 Gk.col(args.
m - 1) += em;
116 cudaHostRegister(
static_cast<void *
>(Gk.data()), args.
m * args.
m *
sizeof(
Complex), cudaHostRegisterDefault);
117 magma_Xgeev(
static_cast<void *
>(Gk.data()), args.
m, args.
m,
static_cast<void *
>(harVecs.data()),
118 static_cast<void *
>(harVals.data()), args.
m,
sizeof(
Complex));
119 cudaHostUnregister(Gk.data());
121 std::vector<SortedEvals> sorted_evals;
122 sorted_evals.reserve(args.
m);
124 for (
int e = 0; e < args.
m; e++) sorted_evals.push_back(
SortedEvals(
abs(harVals.data()[e]), e));
127 for (
int e = 0; e < args.
k; e++)
128 memcpy(args.
ritzVecs.col(e).data(), harVecs.col(sorted_evals[e]._idx).data(), (args.
m) *
sizeof(
Complex));
130 errorQuda(
"Magma library was not built.\n");
135 template <>
void ComputeHarmonicRitz<libtype::eigen_lib>(
GMResDRArgs &args)
141 VectorSet harVecs = MatrixXcd::Zero(args.
m, args.
m);
142 Vector harVals = VectorXcd::Zero(args.
m);
144 Vector em = VectorXcd::Zero(args.
m);
146 em(args.
m - 1) =
norm(args.
H(args.
m, args.
m - 1));
147 Gk.col(args.
m - 1) += cH.colPivHouseholderQr().solve(em);
149 ComplexEigenSolver<DenseMatrix> es(Gk);
150 harVecs = es.eigenvectors();
151 harVals = es.eigenvalues();
153 std::vector<SortedEvals> sorted_evals;
154 sorted_evals.reserve(args.
m);
156 for (
int e = 0; e < args.
m; e++) sorted_evals.push_back(
SortedEvals(
abs(harVals.data()[e]), e));
159 for (
int e = 0; e < args.
k; e++)
160 memcpy(args.
ritzVecs.col(e).data(), harVecs.col(sorted_evals[e]._idx).data(), (args.
m) *
sizeof(
Complex));
167 template <>
void ComputeEta<libtype::magma_lib>(
GMResDRArgs &args)
174 memcpy(ctemp, args.
c, (args.
m + 1) *
sizeof(
Complex));
176 cudaHostRegister(
static_cast<void *
>(Htemp.data()), (args.
m + 1) * args.
m *
sizeof(
Complex), cudaHostRegisterDefault);
177 magma_Xgels(
static_cast<void *
>(Htemp.data()), ctemp, args.
m + 1, args.
m, args.
m + 1,
sizeof(
Complex));
178 cudaHostUnregister(Htemp.data());
180 memcpy(args.
eta.data(), ctemp, args.
m *
sizeof(
Complex));
183 errorQuda(
"MAGMA library was not built.\n");
188 template <>
void ComputeEta<libtype::eigen_lib>(
GMResDRArgs &args)
191 Map<VectorXcd, Unaligned> c_(args.
c, args.
m + 1);
192 args.
eta = args.
H.jacobiSvd(ComputeThinU | ComputeThinV).solve(c_);
230 gmresdr_args(nullptr),
257 gmresdr_args(nullptr),
298 ComputeEta<libtype::magma_lib>(args);
300 ComputeEta<libtype::eigen_lib>(args);
307 std::vector<ColorSpinorField *> V_(Vm->
Components());
309 std::vector<ColorSpinorField *> x_, r_;
310 x_.push_back(x), r_.push_back(r);
314 VectorXcd minusHeta = -(args.
H * args.
eta);
315 Map<VectorXcd, Unaligned> c_(args.
c, args.
m + 1);
327 ComputeHarmonicRitz<libtype::magma_lib>(args);
329 ComputeHarmonicRitz<libtype::eigen_lib>(args);
334 DenseMatrix Qkp1(MatrixXcd::Identity((args.
m + 1), (args.
k + 1)));
336 HouseholderQR<MatrixXcd> qr(args.
ritzVecs);
337 Qkp1.applyOnTheLeft(qr.householderQ());
339 DenseMatrix Res = Qkp1.adjoint() * args.
H * Qkp1.topLeftCorner(args.
m, args.
k);
341 args.
H.topLeftCorner(args.
k + 1, args.
k) = Res;
346 std::vector<ColorSpinorField *> vm(Vm->
Components());
351 for (
int i = 0; i < (args.
m + 1); i++) {
352 if (i < (args.
k + 1)) {
359 if (Zm->
V() != Vm->
V()) {
360 std::vector<ColorSpinorField *> z(Zm->
Components());
366 for (
int i = 0; i < (args.
m); i++) {
374 for (
int j = 0; j < args.
k; j++) {
391 std::unique_ptr<Complex[]> givensH((do_givens) ?
new Complex[(args.
m + 1) * args.
m] :
nullptr);
392 std::unique_ptr<Complex[]> cn((do_givens) ?
new Complex[args.
m] :
nullptr);
393 std::unique_ptr<double[]> sn((do_givens) ?
new double[args.
m] :
nullptr);
415 Complex h0 = do_givens ? args.
H(0, j) : 0.0;
417 for (
int i = 1; i <= j; i++) {
422 givensH[(args.
m + 1) * j + (i - 1)] =
conj(cn[i - 1]) * h0 + sn[i - 1] * args.
H(i, j);
423 h0 = -sn[i - 1] * h0 + cn[i - 1] * args.
H(i, j);
430 double inv_denom = 1.0 /
sqrt(
norm(h0) +
norm(args.
H(j + 1, j)));
431 cn[j] = h0 * inv_denom;
432 sn[j] = args.
H(j + 1, j).real() * inv_denom;
433 givensH[j * (args.
m + 1) + j] =
conj(cn[j]) * h0 + sn[j] * args.
H(j + 1, j);
435 args.
c[j + 1] = -sn[j] * args.
c[j];
436 args.
c[j] *=
conj(cn[j]);
443 Map<MatrixXcd, Unaligned, DynamicStride> givensH_(givensH.get(), args.
m, args.
m,
DynamicStride(args.
m + 1, 1));
444 memcpy(args.
eta.data(), args.
c, args.
m *
sizeof(
Complex));
448 givensH_.triangularView<Upper>().solveInPlace<OnTheLeft>(args.
eta);
454 std::vector<ColorSpinorField *> r_;
459 return (j - start_idx);
466 const double tol_threshold = 1.2;
467 const double det_max_deviation = 0.4;
525 double normb =
norm2( b );
534 printfQuda(
"\nInitial residual squared: %1.16e, source %1.16e, tolerance %1.16e\n", r2,
sqrt(normb),
param.
tol);
552 double heavy_quark_res = 0.0;
556 int restart_idx = 0, j = 0, check_interval = 4;
566 bool do_clean_restart =
false;
569 if ((restart_idx + 1) % check_interval) {
574 for (
int l = 0; l < args.
k + 1; l++) {
576 Complex *col = Gm.col(l).data();
578 std::vector<ColorSpinorField *> v1_(Vm->
Components().begin(), Vm->
Components().begin() + args.
k + 1);
579 std::vector<ColorSpinorField *> v2_;
586 Complex detGm = Gm.determinant();
588 PrintStats(
"FGMResDR:", tot_iters, r2, b2, heavy_quark_res);
589 printfQuda(
"\nCheck cycle %d, true residual squared %1.15e, Gramm det : (%le, %le)\n", restart_idx, ext_r2,
590 detGm.real(), detGm.imag());
594 do_clean_restart = ((
sqrt(ext_r2) /
sqrt(r2)) > tol_threshold) || fabs(1.0 - (
norm(detGm)) > det_max_deviation);
604 printfQuda(
"\nClean restart for cycle %d, true residual squared %1.15e\n", restart_idx, ext_r2);
void magma_Xgels(void *Mat, void *c, int rows, int cols, int ldm, const int prec)
void magma_Xgesv(void *sol, const int ldn, const int n, void *Mat, const int ldm, const int prec)
void magma_Xgeev(void *Mat, const int n, const int ldm, void *vr, void *evalues, const int ldv, const int prec)
Conjugate-Gradient Solver.
CompositeColorSpinorField & Components()
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
ColorSpinorField & Component(const int idx) const
unsigned long long flops() const
GMResDRArgs(int m, int nev)
ColorSpinorFieldSet * Vkp1
GMResDR(const DiracMatrix &mat, const DiracMatrix &matSloppy, const DiracMatrix &matPrecon, SolverParam ¶m, TimeProfile &profile)
void UpdateSolution(ColorSpinorField *x, ColorSpinorField *r, bool do_gels)
int FlexArnoldiProcedure(const int start_idx, const bool do_givens)
void operator()(ColorSpinorField &out, ColorSpinorField &in)
bool convergence(double r2, double hq2, double r2_tol, double hq_tol)
void PrintSummary(const char *name, int k, double r2, double b2, double r2_tol, double hq_tol)
Prints out the summary of the solver convergence (requires a verbosity of QUDA_SUMMARIZE)....
void PrintStats(const char *name, int k, double r2, double b2, double hq2)
Prints out the running statistics of the solver (requires a verbosity of QUDA_VERBOSE)
const DiracMatrix & matPrecon
const DiracMatrix & matSloppy
double Last(QudaProfileType idx)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
void * memset(void *s, int c, size_t n)
cudaColorSpinorField * tmp
@ QUDA_HEAVY_QUARK_RESIDUAL
@ QUDA_PRESERVE_SOURCE_NO
@ QUDA_PRESERVE_SOURCE_YES
void init()
Create the BLAS context.
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
void ax(double a, ColorSpinorField &x)
void zero(ColorSpinorField &a)
double norm2(const ColorSpinorField &a)
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void xpy(ColorSpinorField &x, ColorSpinorField &y)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
void stop()
Stop profiling.
__host__ __device__ ValueType conj(ValueType x)
void ComputeEta(GMResDRArgs &args)
double norm2(const CloverField &a, bool inverse=false)
__device__ __host__ void zero(double &a)
void ComputeHarmonicRitz(GMResDRArgs &args)
Matrix< Complex, Dynamic, Dynamic, RowMajor > RowMajorDenseMatrix
std::complex< double > Complex
__host__ __device__ ValueType sqrt(ValueType x)
void fillFGMResDRInnerSolveParam(SolverParam &inner, const SolverParam &outer)
Stride< Dynamic, Dynamic > DynamicStride
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
__host__ __device__ ValueType abs(ValueType x)
QudaPreserveSource preserve_source
bool is_preconditioner
verbosity to use for preconditioner
QudaResidualType residual_type
QudaExtLibType extlib_type
QudaPrecision precision_precondition
QudaPrecision precision_sloppy
QudaInverterType inv_type
QudaVerbosity verbosity_precondition
QudaInverterType inv_type_precondition
bool global_reduction
whether the solver acting as a preconditioner for another solver
SortedEvals(double val, int idx)
static bool SelectSmall(SortedEvals v1, SortedEvals v2)
void pushVerbosity(QudaVerbosity verbosity)
Push a new verbosity onto the stack.
void popVerbosity()
Pop the verbosity restoring the prior one on the stack.