29 for (
int i = 0; i <
n_kr; i++) {
33 for (
int j = 0; j <
n_kr; j++) {
47 void IRAM::arnoldiStep(std::vector<ColorSpinorField *> &v, std::vector<ColorSpinorField *> &r,
double &beta,
int j)
68 std::vector<Complex>
tmp(j + 1);
69 std::vector<ColorSpinorField *> v_;
71 for (
int i = 0; i < j + 1; i++) { v_.push_back(v[i]); }
76 for (
int i = 0; i < j + 1; i++)
tmp[i] *= -1.0;
78 for (
int i = 0; i < j + 1; i++)
upperHess[i][j] = -1.0 *
tmp[i];
98 int orth_iter_max = 100;
100 while (beta < 0.717 * beta_pre && orth_iter < orth_iter_max) {
103 printfQuda(
"beta = %e > 0.717*beta_pre = %e: Reorthogonalise at step %d, iter %d\n", beta, 0.717 * beta_pre, j,
114 for (
int i = 0; i < j + 1; i++)
tmp[i] *= -1.0;
116 for (
int i = 0; i < j + 1; i++)
upperHess[i][j] -=
tmp[i];
122 if (orth_iter == orth_iter_max) {
errorQuda(
"Unable to orthonormalise r"); }
128 std::vector<Complex> Qmat_keep(
n_kr * keep, 0.0);
129 for (
int j = 0; j <
n_kr; j++)
130 for (
int i = 0; i < keep; i++) { Qmat_keep[j * keep + i] =
Qmat[j][i]; }
135 void IRAM::reorder(std::vector<ColorSpinorField *> &kSpace, std::vector<Complex> &evals,
140 std::vector<std::tuple<Complex, double, ColorSpinorField *>> array(n);
141 for (
int i = 0; i < n; i++) array[i] = std::make_tuple(evals[i],
residua[i], kSpace[i]);
145 std::sort(array.begin(), array.begin() + n,
146 [](
const std::tuple<Complex, double, ColorSpinorField *> &a,
147 const std::tuple<Complex, double, ColorSpinorField *> &b) {
148 return (abs(std::get<0>(a)) > abs(std::get<0>(b)));
152 std::sort(array.begin(), array.begin() + n,
153 [](
const std::tuple<Complex, double, ColorSpinorField *> &a,
154 const std::tuple<Complex, double, ColorSpinorField *> &b) {
155 return (abs(std::get<0>(a)) < abs(std::get<0>(b)));
159 std::sort(array.begin(), array.begin() + n,
160 [](
const std::tuple<Complex, double, ColorSpinorField *> &a,
161 const std::tuple<Complex, double, ColorSpinorField *> &b) {
162 return (std::get<0>(a).real() > std::get<0>(b).real());
166 std::sort(array.begin(), array.begin() + n,
167 [](
const std::tuple<Complex, double, ColorSpinorField *> &a,
168 const std::tuple<Complex, double, ColorSpinorField *> &b) {
169 return (std::get<0>(a).real() < std::get<0>(b).real());
173 std::sort(array.begin(), array.begin() + n,
174 [](
const std::tuple<Complex, double, ColorSpinorField *> &a,
175 const std::tuple<Complex, double, ColorSpinorField *> &b) {
176 return (std::get<0>(a).imag() > std::get<0>(b).imag());
180 std::sort(array.begin(), array.begin() + n,
181 [](
const std::tuple<Complex, double, ColorSpinorField *> &a,
182 const std::tuple<Complex, double, ColorSpinorField *> &b) {
183 return (std::get<0>(a).imag() < std::get<0>(b).imag());
186 default:
errorQuda(
"Undefined spectrum type %d given", spec_type);
190 for (
int i = 0; i < n; i++) {
191 std::swap(evals[i], std::get<0>(array[i]));
193 std::swap(kSpace[i], std::get<2>(array[i]));
203 MatrixXcd UHcopy = MatrixXcd::Zero(
n_kr,
n_kr);
204 for (
int i = 0; i <
n_kr; i++) {
205 for (
int j = 0; j <
n_kr; j++) {
214 for (
int shift = 0; shift < num_shifts; shift++) {
217 for (
int i = 0; i <
n_kr; i++)
upperHess[i][i] -= evals[shift];
221 for (
int i = 0; i <
n_kr; i++)
upperHess[i][i] += evals[shift];
229 Complex T11, T12, T21, T22, U1, U2;
235 std::vector<Complex> R11(
n_kr - 1, 0.0);
236 std::vector<Complex> R12(
n_kr - 1, 0.0);
237 std::vector<Complex> R21(
n_kr - 1, 0.0);
238 std::vector<Complex> R22(
n_kr - 1, 0.0);
240 for (
int i = 0; i <
n_kr - 1; i++) {
244 if (
abs(R[i + 1][i]) <
tol) {
251 dV = (U1.real() > 0) ? dV : -dV;
268 R[i][i] -= (T11 * R[i][i] + T12 * R[i + 1][i]);
273 #pragma omp parallel for schedule(static, 32)
275 for (
int j = i + 1; j <
n_kr; j++) {
277 R[i][j] -= (T11 * temp + T12 * R[i + 1][j]);
278 R[i + 1][j] -= (T21 * temp + T22 * R[i + 1][j]);
284 for (
int j = 0; j <
n_kr - 1; j++) {
290 #pragma omp for schedule(static, 32) nowait
292 for (
int i = 0; i < j + 2; i++) {
294 R[i][j] -= (R11[j] * temp + R12[j] * R[i][j + 1]);
295 R[i][j + 1] -= (R21[j] * temp + R22[j] * R[i][j + 1]);
298 #pragma omp for schedule(static, 32) nowait
300 for (
int i = 0; i <
n_kr; i++) {
302 Q[i][j] -= (R11[j] * temp + R12[j] * Q[i][j + 1]);
303 Q[i][j + 1] -= (R21[j] * temp + R22[j] * Q[i][j + 1]);
317 MatrixXcd Q = MatrixXcd::Identity(
n_kr,
n_kr);
318 MatrixXcd R = MatrixXcd::Zero(
n_kr,
n_kr);
319 for (
int i = 0; i <
n_kr; i++) {
320 for (
int j = 0; j <
n_kr; j++) { R(i, j) =
upperHess[i][j]; }
324 Eigen::ComplexSchur<MatrixXcd> schurUH;
325 schurUH.computeFromHessenberg(R, Q);
331 MatrixXcd matUpper = MatrixXcd::Zero(
n_kr,
n_kr);
332 matUpper = schurUH.matrixT().triangularView<Eigen::Upper>();
333 matUpper.conservativeResize(
n_kr,
n_kr);
334 Eigen::ComplexEigenSolver<MatrixXcd> eigenSolver(matUpper);
335 Q = schurUH.matrixU() * eigenSolver.eigenvectors();
338 for (
int i = 0; i <
n_kr; i++) {
339 evals[i] = eigenSolver.eigenvalues()[i];
341 for (
int j = 0; j <
n_kr; j++)
Qmat[i][j] = Q(i, j);
347 for (
int i = 0; i <
n_kr; i++) {
348 for (
int j = 0; j <
n_kr; j++) {
360 int max_iter = 100000;
363 Complex temp, discriminant, sol1, sol2, eval;
364 for (
int i =
n_kr - 2; i >= 0; i--) {
365 while (
iter < max_iter) {
367 Rmat[i + 1][i] = 0.0;
374 temp = (
Rmat[i][i] -
Rmat[i + 1][i + 1]) * (
Rmat[i][i] -
Rmat[i + 1][i + 1]) / 4.0;
375 discriminant =
sqrt(
Rmat[i + 1][i] *
Rmat[i][i + 1] + temp);
378 temp = (
Rmat[i][i] +
Rmat[i + 1][i + 1]) / 2.0;
380 sol1 = temp -
Rmat[i + 1][i + 1] + discriminant;
381 sol2 = temp -
Rmat[i + 1][i + 1] - discriminant;
385 eval =
Rmat[i + 1][i + 1] + (
norm(sol1) <
norm(sol2) ? sol1 : sol2);
388 for (
int j = 0; j <
n_kr; j++)
Rmat[j][j] -= eval;
394 for (
int j = 0; j <
n_kr; j++)
Rmat[j][j] += eval;
405 MatrixXcd Q = MatrixXcd::Zero(
n_kr,
n_kr);
406 MatrixXcd R = MatrixXcd::Zero(
n_kr,
n_kr);
407 for (
int i = 0; i <
n_kr; i++) {
408 for (
int j = 0; j <
n_kr; j++) {
409 Q(i, j) =
Qmat[i][j];
410 R(i, j) =
Rmat[i][j];
414 MatrixXcd matUpper = MatrixXcd::Zero(
n_kr,
n_kr);
415 matUpper = R.triangularView<Eigen::Upper>();
416 matUpper.conservativeResize(
n_kr,
n_kr);
417 Eigen::ComplexEigenSolver<MatrixXcd> eigenSolver(matUpper);
418 Q *= eigenSolver.eigenvectors();
421 for (
int i = 0; i <
n_kr; i++) {
422 evals[i] = eigenSolver.eigenvalues()[i];
424 for (
int j = 0; j <
n_kr; j++)
Qmat[i][j] = Q(i, j);
432 void IRAM::operator()(std::vector<ColorSpinorField *> &kSpace, std::vector<Complex> &evals)
498 for (
int i = 0; i <
n_ev; i++) {
499 int idx =
n_kr - 1 - i;
500 double rtemp = std::max(epsilon23,
abs(evals[idx]));
504 printf(
"residuum[%d] = %e, condition = %e\n", i,
residua[idx],
tol *
abs(evals[idx]));
566 errorQuda(
"IRAM failed to compute the requested %d vectors with a %d search space and %d Krylov space in %d "
567 "restart steps. Exiting.",
570 warningQuda(
"IRAM failed to compute the requested %d vectors with a %d search space and %d Krylov space in %d "
571 "restart steps. Continuing with current arnoldi factorisation.",
576 printfQuda(
"IRAM computed the requested %d vectors with a %d search space and %d Krylov space in %d "
577 "restart steps and %d OP*x operations.\n",
589 for (
int i = 0; i <
n_kr; i++) {
void matVec(const DiracMatrix &mat, ColorSpinorField &out, const ColorSpinorField &in)
Applies the specified matVec operation: M, Mdag, MMdag, MdagM.
void prepareInitialGuess(std::vector< ColorSpinorField * > &kSpace)
Check for an initial guess. If none present, populate with rands, then orthonormalise.
void sortArrays(QudaEigSpectrumType spec_type, int n, std::vector< Complex > &x, std::vector< Complex > &y)
Sort array the first n elements of x according to spec_type, y comes along for the ride.
std::vector< ColorSpinorField * > r
double setEpsilon(const QudaPrecision prec)
Set the epsilon parameter.
void computeEvals(const DiracMatrix &mat, std::vector< ColorSpinorField * > &evecs, std::vector< Complex > &evals, int size)
Compute eigenvalues and their residiua.
void rotateVecsComplex(std::vector< ColorSpinorField * > &kSpace, const Complex *rot_array, const int offset, const int dim, const int keep, const int locked, TimeProfile &profile)
Rotate the Krylov space.
void queryPrec(const QudaPrecision prec)
Query the eigensolver precision to stdout.
void prepareKrylovSpace(std::vector< ColorSpinorField * > &kSpace, std::vector< Complex > &evals)
Extend the Krylov space.
void loadFromFile(const DiracMatrix &mat, std::vector< ColorSpinorField * > &eig_vecs, std::vector< Complex > &evals)
Load and check eigenpairs from file.
void cleanUpEigensolver(std::vector< ColorSpinorField * > &kSpace, std::vector< Complex > &evals)
Release memory, save eigenvectors, resize the Krylov space to its original dimension.
void printEigensolverSetup()
Dump the eigensolver parameters to stdout.
std::vector< double > residua
void rotateBasis(std::vector< ColorSpinorField * > &v, int keep)
Rotate the Krylov space.
IRAM(const DiracMatrix &mat, QudaEigParam *eig_param, TimeProfile &profile)
Constructor for Thick Restarted Eigensolver class.
virtual ~IRAM()
Destructor for Thick Restarted Eigensolver class.
void qrIteration(Complex **Q, Complex **R)
Apply One step of the the QR algorithm.
void eigensolveFromUpperHess(std::vector< Complex > &evals, const double beta)
Get the eigendecomposition from the upper Hessenberg matrix via QR.
void arnoldiStep(std::vector< ColorSpinorField * > &v, std::vector< ColorSpinorField * > &r, double &beta, int j)
Arnoldi step: extends the Krylov space by one vector.
void reorder(std::vector< ColorSpinorField * > &kSpace, std::vector< Complex > &evals, const QudaEigSpectrumType spec_type)
Reorder the Krylov space and eigenvalues.
void operator()(std::vector< ColorSpinorField * > &kSpace, std::vector< Complex > &evals)
Compute eigenpairs.
void qrShifts(const std::vector< Complex > evals, const int num_shifts)
Apply shifts to the upper Hessenberg matrix via QR decomposition.
bool isRunning(QudaProfileType idx)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
cudaColorSpinorField * tmp
enum QudaEigSpectrumType_s QudaEigSpectrumType
#define safe_malloc(size)
void caxpby(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y)
void ax(double a, ColorSpinorField &x)
double norm2(const ColorSpinorField &a)
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
__host__ __device__ ValueType conj(ValueType x)
void saveTuneCache(bool error=false)
std::complex< double > Complex
__host__ __device__ ValueType sqrt(ValueType x)
@ QUDA_PROFILE_HOST_COMPUTE
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
__host__ __device__ ValueType abs(ValueType x)
QudaEigSpectrumType spectrum
QudaBoolean require_convergence
DEVICEHOST void swap(Real &a, Real &b)
QudaVerbosity getVerbosity()