QUDA  v1.1.0
A library for QCD on GPUs
eig_iram.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <iostream>
5 #include <vector>
6 #include <algorithm>
7 
8 #include <quda_internal.h>
9 #include <eigensolve_quda.h>
10 #include <qio_field.h>
11 #include <color_spinor_field.h>
12 #include <blas_quda.h>
13 #include <util_quda.h>
14 #include <eigen_helper.h>
15 
16 namespace quda
17 {
18  // Implicitly Restarted Arnoldi Method constructor
19  IRAM::IRAM(const DiracMatrix &mat, QudaEigParam *eig_param, TimeProfile &profile) :
20  EigenSolver(mat, eig_param, profile)
21  {
22  bool profile_running = profile.isRunning(QUDA_PROFILE_INIT);
23  if (!profile_running) profile.TPSTART(QUDA_PROFILE_INIT);
24 
25  // Upper Hessenberg, Q and R matrices
26  upperHess = (Complex **)safe_malloc((n_kr) * sizeof(Complex *));
27  Qmat = (Complex **)safe_malloc((n_kr) * sizeof(Complex *));
28  Rmat = (Complex **)safe_malloc((n_kr) * sizeof(Complex *));
29  for (int i = 0; i < n_kr; i++) {
30  upperHess[i] = (Complex *)safe_malloc((n_kr) * sizeof(Complex));
31  Qmat[i] = (Complex *)safe_malloc((n_kr) * sizeof(Complex));
32  Rmat[i] = (Complex *)safe_malloc((n_kr) * sizeof(Complex));
33  for (int j = 0; j < n_kr; j++) {
34  upperHess[i][j] = 0.0;
35  Qmat[i][j] = 0.0;
36  Rmat[i][j] = 0.0;
37  }
38  }
39 
40  if (eig_param->qr_tol == 0) { eig_param->qr_tol = eig_param->tol * 1e-2; }
41 
42  if (!profile_running) profile.TPSTOP(QUDA_PROFILE_INIT);
43  }
44 
45  // Arnoldi Member functions
46  //---------------------------------------------------------------------------
47  void IRAM::arnoldiStep(std::vector<ColorSpinorField *> &v, std::vector<ColorSpinorField *> &r, double &beta, int j)
48  {
49  beta = sqrt(blas::norm2(*r[0]));
50  if (j > 0) upperHess[j][j - 1] = beta;
51 
52  // v_{j} = r_{j-1}/beta
53  blas::ax(1.0 / beta, *r[0]);
54  std::swap(v[j], r[0]);
55 
56  // r_{j} = M * v_{j};
57  matVec(mat, *r[0], *v[j]);
58 
59  double beta_pre = sqrt(blas::norm2(*r[0]));
60 
61  // Compute the j-th residual corresponding
62  // to the j step factorization.
63  // Use Classical Gram Schmidt and compute:
64  // w_{j} <- V_{j}^dag * M * v_{j}
65  // r_{j} <- M * v_{j} - V_{j} * w_{j}
66 
67  // H_{j,i}_j = v_i^dag * r
68  std::vector<Complex> tmp(j + 1);
69  std::vector<ColorSpinorField *> v_;
70  v_.reserve(j + 1);
71  for (int i = 0; i < j + 1; i++) { v_.push_back(v[i]); }
72  blas::cDotProduct(tmp.data(), v_, r);
73 
74  // Orthogonalise r_{j} against V_{j}.
75  // r = r - H_{j,i} * v_j
76  for (int i = 0; i < j + 1; i++) tmp[i] *= -1.0;
77  blas::caxpy(tmp.data(), v_, r);
78  for (int i = 0; i < j + 1; i++) upperHess[i][j] = -1.0 * tmp[i];
79 
80  // Re-orthogonalization / Iterative refinement phase
81  // Maximum 100 tries.
82 
83  // s = V_{j}^T * B * r_{j}
84  // r_{j} = r_{j} - V_{j}*s
85  // alphaj = alphaj + s_{j}
86 
87  // The stopping criteria used for iterative refinement is
88  // discussed in Parlett's book SEP, page 107 and in Gragg &
89  // Reichel ACM TOMS paper; Algorithm 686, Dec. 1990.
90  // Determine if we need to correct the residual. The goal is
91  // to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} ||
92  // The following test determines whether the sine of the
93  // angle between OP*x and the computed residual is less
94  // than or equal to 0.717. In practice, more than one
95  // step of iterative refinement is rare.
96 
97  int orth_iter = 0;
98  int orth_iter_max = 100;
99  beta = sqrt(blas::norm2(*r[0]));
100  while (beta < 0.717 * beta_pre && orth_iter < orth_iter_max) {
101 
102  if (getVerbosity() >= QUDA_DEBUG_VERBOSE) {
103  printfQuda("beta = %e > 0.717*beta_pre = %e: Reorthogonalise at step %d, iter %d\n", beta, 0.717 * beta_pre, j,
104  orth_iter);
105  }
106 
107  beta_pre = beta;
108 
109  // Compute the correction to the residual:
110  // r_{j} = r_{j} - V_{j} * r_{j}
111  // and adjust for the correction in the
112  // upper Hessenberg matrix.
113  blas::cDotProduct(tmp.data(), v_, r);
114  for (int i = 0; i < j + 1; i++) tmp[i] *= -1.0;
115  blas::caxpy(tmp.data(), v_, r);
116  for (int i = 0; i < j + 1; i++) upperHess[i][j] -= tmp[i];
117 
118  beta = sqrt(blas::norm2(*r[0]));
119  orth_iter++;
120  }
121 
122  if (orth_iter == orth_iter_max) { errorQuda("Unable to orthonormalise r"); }
123  }
124 
125  void IRAM::rotateBasis(std::vector<ColorSpinorField *> &kSpace, int keep)
126  {
127  // Multi-BLAS friendly array to store the part of the rotation matrix
128  std::vector<Complex> Qmat_keep(n_kr * keep, 0.0);
129  for (int j = 0; j < n_kr; j++)
130  for (int i = 0; i < keep; i++) { Qmat_keep[j * keep + i] = Qmat[j][i]; }
131 
132  rotateVecsComplex(kSpace, Qmat_keep.data(), n_kr, n_kr, keep, 0, profile);
133  }
134 
135  void IRAM::reorder(std::vector<ColorSpinorField *> &kSpace, std::vector<Complex> &evals,
136  const QudaEigSpectrumType spec_type)
137  {
138 
139  int n = n_kr;
140  std::vector<std::tuple<Complex, double, ColorSpinorField *>> array(n);
141  for (int i = 0; i < n; i++) array[i] = std::make_tuple(evals[i], residua[i], kSpace[i]);
142 
143  switch (spec_type) {
145  std::sort(array.begin(), array.begin() + n,
146  [](const std::tuple<Complex, double, ColorSpinorField *> &a,
147  const std::tuple<Complex, double, ColorSpinorField *> &b) {
148  return (abs(std::get<0>(a)) > abs(std::get<0>(b)));
149  });
150  break;
152  std::sort(array.begin(), array.begin() + n,
153  [](const std::tuple<Complex, double, ColorSpinorField *> &a,
154  const std::tuple<Complex, double, ColorSpinorField *> &b) {
155  return (abs(std::get<0>(a)) < abs(std::get<0>(b)));
156  });
157  break;
159  std::sort(array.begin(), array.begin() + n,
160  [](const std::tuple<Complex, double, ColorSpinorField *> &a,
161  const std::tuple<Complex, double, ColorSpinorField *> &b) {
162  return (std::get<0>(a).real() > std::get<0>(b).real());
163  });
164  break;
166  std::sort(array.begin(), array.begin() + n,
167  [](const std::tuple<Complex, double, ColorSpinorField *> &a,
168  const std::tuple<Complex, double, ColorSpinorField *> &b) {
169  return (std::get<0>(a).real() < std::get<0>(b).real());
170  });
171  break;
173  std::sort(array.begin(), array.begin() + n,
174  [](const std::tuple<Complex, double, ColorSpinorField *> &a,
175  const std::tuple<Complex, double, ColorSpinorField *> &b) {
176  return (std::get<0>(a).imag() > std::get<0>(b).imag());
177  });
178  break;
180  std::sort(array.begin(), array.begin() + n,
181  [](const std::tuple<Complex, double, ColorSpinorField *> &a,
182  const std::tuple<Complex, double, ColorSpinorField *> &b) {
183  return (std::get<0>(a).imag() < std::get<0>(b).imag());
184  });
185  break;
186  default: errorQuda("Undefined spectrum type %d given", spec_type);
187  }
188 
189  // Repopulate arrays with sorted elements
190  for (int i = 0; i < n; i++) {
191  std::swap(evals[i], std::get<0>(array[i]));
192  std::swap(residua[i], std::get<1>(array[i]));
193  std::swap(kSpace[i], std::get<2>(array[i]));
194  }
195  }
196 
197  void IRAM::qrShifts(const std::vector<Complex> evals, const int num_shifts)
198  {
199  // This isn't really Eigen, but it's morally equivalent
201 
202  // Reset Q to the identity, copy upper Hessenberg
203  MatrixXcd UHcopy = MatrixXcd::Zero(n_kr, n_kr);
204  for (int i = 0; i < n_kr; i++) {
205  for (int j = 0; j < n_kr; j++) {
206  if (i == j)
207  Qmat[i][j] = 1.0;
208  else
209  Qmat[i][j] = 0.0;
210  UHcopy(i, j) = upperHess[i][j];
211  }
212  }
213 
214  for (int shift = 0; shift < num_shifts; shift++) {
215 
216  // Shift the eigenvalue
217  for (int i = 0; i < n_kr; i++) upperHess[i][i] -= evals[shift];
218 
220 
221  for (int i = 0; i < n_kr; i++) upperHess[i][i] += evals[shift];
222  }
223 
225  }
226 
228  {
229  Complex T11, T12, T21, T22, U1, U2;
230  double dV;
231 
232  double tol = eig_param->qr_tol;
233 
234  // Allocate the rotation matrices.
235  std::vector<Complex> R11(n_kr - 1, 0.0);
236  std::vector<Complex> R12(n_kr - 1, 0.0);
237  std::vector<Complex> R21(n_kr - 1, 0.0);
238  std::vector<Complex> R22(n_kr - 1, 0.0);
239 
240  for (int i = 0; i < n_kr - 1; i++) {
241 
242  // If the sub-diagonal element is numerically
243  // small enough, floor it to 0;
244  if (abs(R[i + 1][i]) < tol) {
245  R[i + 1][i] = 0.0;
246  continue;
247  }
248 
249  U1 = R[i][i];
250  dV = sqrt(norm(R[i][i]) + norm(R[i + 1][i]));
251  dV = (U1.real() > 0) ? dV : -dV;
252  U1 += dV;
253  U2 = R[i + 1][i];
254 
255  T11 = conj(U1) / dV;
256  R11[i] = conj(T11);
257 
258  T12 = conj(U2) / dV;
259  R12[i] = conj(T12);
260 
261  T21 = conj(T12) * conj(U1) / U1;
262  R21[i] = conj(T21);
263 
264  T22 = T12 * U2 / U1;
265  R22[i] = conj(T22);
266 
267  // Do the H_kk and set the H_k+1k to zero
268  R[i][i] -= (T11 * R[i][i] + T12 * R[i + 1][i]);
269  R[i + 1][i] = 0;
270 
271  // Continue for the other columns
272 #ifdef _OPENMP
273 #pragma omp parallel for schedule(static, 32)
274 #endif
275  for (int j = i + 1; j < n_kr; j++) {
276  Complex temp = R[i][j];
277  R[i][j] -= (T11 * temp + T12 * R[i + 1][j]);
278  R[i + 1][j] -= (T21 * temp + T22 * R[i + 1][j]);
279  }
280  }
281 
282  // Rotate R and V, i.e. H->RQ. V->VQ
283  // Loop over columns of upper Hessenberg
284  for (int j = 0; j < n_kr - 1; j++) {
285  if (abs(R11[j]) > tol) {
286  // Loop over the rows, up to the sub diagonal element i=j+1
287 #ifdef _OPENMP
288 #pragma omp parallel
289  {
290 #pragma omp for schedule(static, 32) nowait
291 #endif
292  for (int i = 0; i < j + 2; i++) {
293  Complex temp = R[i][j];
294  R[i][j] -= (R11[j] * temp + R12[j] * R[i][j + 1]);
295  R[i][j + 1] -= (R21[j] * temp + R22[j] * R[i][j + 1]);
296  }
297 #ifdef _OPENMP
298 #pragma omp for schedule(static, 32) nowait
299 #endif
300  for (int i = 0; i < n_kr; i++) {
301  Complex temp = Q[i][j];
302  Q[i][j] -= (R11[j] * temp + R12[j] * Q[i][j + 1]);
303  Q[i][j + 1] -= (R21[j] * temp + R22[j] * Q[i][j + 1]);
304  }
305 #ifdef _OPENMP
306  }
307 #endif
308  }
309  }
310  }
311 
312  void IRAM::eigensolveFromUpperHess(std::vector<Complex> &evals, const double beta)
313  {
314  if (eig_param->use_eigen_qr) {
315  profile.TPSTART(QUDA_PROFILE_EIGENQR);
316  // Construct the upper Hessenberg matrix
317  MatrixXcd Q = MatrixXcd::Identity(n_kr, n_kr);
318  MatrixXcd R = MatrixXcd::Zero(n_kr, n_kr);
319  for (int i = 0; i < n_kr; i++) {
320  for (int j = 0; j < n_kr; j++) { R(i, j) = upperHess[i][j]; }
321  }
322 
323  // QR the upper Hessenberg matrix
324  Eigen::ComplexSchur<MatrixXcd> schurUH;
325  schurUH.computeFromHessenberg(R, Q);
327 
328  profile.TPSTART(QUDA_PROFILE_EIGENEV);
329  // Extract the upper triangular matrix, eigensolve, then
330  // get the eigenvectors of the upper Hessenberg
331  MatrixXcd matUpper = MatrixXcd::Zero(n_kr, n_kr);
332  matUpper = schurUH.matrixT().triangularView<Eigen::Upper>();
333  matUpper.conservativeResize(n_kr, n_kr);
334  Eigen::ComplexEigenSolver<MatrixXcd> eigenSolver(matUpper);
335  Q = schurUH.matrixU() * eigenSolver.eigenvectors();
336 
337  // Update eigenvalues, residuia, and the Q matrix
338  for (int i = 0; i < n_kr; i++) {
339  evals[i] = eigenSolver.eigenvalues()[i];
340  residua[i] = abs(beta * Q.col(i)[n_kr - 1]);
341  for (int j = 0; j < n_kr; j++) Qmat[i][j] = Q(i, j);
342  }
344  } else {
346  // Copy the upper Hessenberg matrix into Rmat, and set Qmat to the identity
347  for (int i = 0; i < n_kr; i++) {
348  for (int j = 0; j < n_kr; j++) {
349  Rmat[i][j] = upperHess[i][j];
350  if (i == j)
351  Qmat[i][j] = 1.0;
352  else
353  Qmat[i][j] = 0.0;
354  }
355  }
356 
357  // This is about as high as one cat get in double without causing
358  // the Arnoldi to compute more restarts.
359  double tol = eig_param->qr_tol;
360  int max_iter = 100000;
361  int iter = 0;
362 
363  Complex temp, discriminant, sol1, sol2, eval;
364  for (int i = n_kr - 2; i >= 0; i--) {
365  while (iter < max_iter) {
366  if (abs(Rmat[i + 1][i]) < tol) {
367  Rmat[i + 1][i] = 0.0;
368  break;
369  } else {
370 
371  // Compute the 2 eigenvalues via the quadratic formula
372  //----------------------------------------------------
373  // The discriminant
374  temp = (Rmat[i][i] - Rmat[i + 1][i + 1]) * (Rmat[i][i] - Rmat[i + 1][i + 1]) / 4.0;
375  discriminant = sqrt(Rmat[i + 1][i] * Rmat[i][i + 1] + temp);
376 
377  // Reuse temp
378  temp = (Rmat[i][i] + Rmat[i + 1][i + 1]) / 2.0;
379 
380  sol1 = temp - Rmat[i + 1][i + 1] + discriminant;
381  sol2 = temp - Rmat[i + 1][i + 1] - discriminant;
382  //----------------------------------------------------
383 
384  // Deduce the better eval to shift
385  eval = Rmat[i + 1][i + 1] + (norm(sol1) < norm(sol2) ? sol1 : sol2);
386 
387  // Shift the eigenvalue
388  for (int j = 0; j < n_kr; j++) Rmat[j][j] -= eval;
389 
390  // Do the QR iteration
392 
393  // Shift back
394  for (int j = 0; j < n_kr; j++) Rmat[j][j] += eval;
395  }
396  iter++;
397  }
398  }
400 
401  profile.TPSTART(QUDA_PROFILE_EIGENEV);
402  // Compute the eigevectors of the origial upper Hessenberg
403  // This is now very cheap because the input matrix to Eigen
404  // is upper triangular.
405  MatrixXcd Q = MatrixXcd::Zero(n_kr, n_kr);
406  MatrixXcd R = MatrixXcd::Zero(n_kr, n_kr);
407  for (int i = 0; i < n_kr; i++) {
408  for (int j = 0; j < n_kr; j++) {
409  Q(i, j) = Qmat[i][j];
410  R(i, j) = Rmat[i][j];
411  }
412  }
413 
414  MatrixXcd matUpper = MatrixXcd::Zero(n_kr, n_kr);
415  matUpper = R.triangularView<Eigen::Upper>();
416  matUpper.conservativeResize(n_kr, n_kr);
417  Eigen::ComplexEigenSolver<MatrixXcd> eigenSolver(matUpper);
418  Q *= eigenSolver.eigenvectors();
419 
420  // Update eigenvalues, residuia, and the Q matrix
421  for (int i = 0; i < n_kr; i++) {
422  evals[i] = eigenSolver.eigenvalues()[i];
423  residua[i] = abs(beta * Q.col(i)[n_kr - 1]);
424  for (int j = 0; j < n_kr; j++) Qmat[i][j] = Q(i, j);
425  }
426 
427  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("QR iterations = %d\n", iter);
429  }
430  }
431 
432  void IRAM::operator()(std::vector<ColorSpinorField *> &kSpace, std::vector<Complex> &evals)
433  {
434  // In case we are deflating an operator, save the tunechache from the inverter
435  saveTuneCache();
436 
437  // Override any user input for block size.
438  block_size = 1;
439 
440  // Pre-launch checks and preparation
441  //---------------------------------------------------------------------------
442  if (getVerbosity() >= QUDA_SUMMARIZE) queryPrec(kSpace[0]->Precision());
443  // Check to see if we are loading eigenvectors
444  if (strcmp(eig_param->vec_infile, "") != 0) {
445  printfQuda("Loading evecs from file name %s\n", eig_param->vec_infile);
446  loadFromFile(mat, kSpace, evals);
447  return;
448  }
449 
450  // Check for an initial guess. If none present, populate with rands, then
451  // orthonormalise
452  prepareInitialGuess(kSpace);
453 
454  // Increase the size of kSpace passed to the function, will be trimmed to
455  // original size before exit.
456  prepareKrylovSpace(kSpace, evals);
457 
458  // Apply a matrix op to the residual to place it in the
459  // range of the operator
460  matVec(mat, *r[0], *kSpace[0]);
461 
462  // Convergence criteria
463  double epsilon = setEpsilon(kSpace[0]->Precision());
464  double epsilon23 = pow(epsilon, 2.0 / 3.0);
465  double beta = 0.0;
466 
467  // Print Eigensolver params
469  //---------------------------------------------------------------------------
470 
471  // Begin IRAM Eigensolver computation
472  //---------------------------------------------------------------------------
473  profile.TPSTART(QUDA_PROFILE_COMPUTE);
474 
475  // Loop over restart iterations.
476  num_keep = 0;
477  while (restart_iter < max_restarts && !converged) {
478  for (int step = num_keep; step < n_kr; step++) arnoldiStep(kSpace, r, beta, step);
479  iter += n_kr - num_keep;
480 
481  // Ritz values and their errors are updated.
483  eigensolveFromUpperHess(evals, beta);
484  profile.TPSTART(QUDA_PROFILE_COMPUTE);
485 
486  num_keep = n_ev;
487  int num_shifts = n_kr - num_keep;
488 
489  // Put unwanted Ritz values first. We will shift these out of the
490  // Krylov space using QR.
492 
493  // Put smallest errors on the unwanted Ritz first to aid in forward stability
494  sortArrays(QUDA_SPECTRUM_LM_EIG, num_shifts, residua, evals);
495 
496  // Convergence test
497  iter_converged = 0;
498  for (int i = 0; i < n_ev; i++) {
499  int idx = n_kr - 1 - i;
500  double rtemp = std::max(epsilon23, abs(evals[idx]));
501  if (residua[idx] < tol * rtemp) {
502  iter_converged++;
504  printf("residuum[%d] = %e, condition = %e\n", i, residua[idx], tol * abs(evals[idx]));
505  } else {
506  // Unlikely to find new converged eigenvalues
507  break;
508  }
509  }
510 
511  int num_keep0 = num_keep;
512  iter_keep = std::min(iter_converged + (n_kr - num_converged) / 2, n_kr - 12);
513 
516  num_shifts = n_kr - num_keep;
517 
518  if (getVerbosity() >= QUDA_VERBOSE)
519  printfQuda("%04d converged eigenvalues at iter %d\n", num_converged, restart_iter);
520 
521  if (num_converged >= n_conv) {
522 
524  eigensolveFromUpperHess(evals, beta);
525  // Rotate the Krylov space
526  rotateBasis(kSpace, n_kr);
527  // Reorder the Krylov space and Ritz values
528  reorder(kSpace, evals, eig_param->spectrum);
529 
530  // Compute the eigenvalues.
531  profile.TPSTART(QUDA_PROFILE_COMPUTE);
532  computeEvals(mat, kSpace, evals);
533 
534  converged = true;
535 
536  } else {
537 
538  // If num_keep changed, we resort the Ritz values and residua
539  if (num_keep0 < num_keep) {
541  sortArrays(QUDA_SPECTRUM_LM_EIG, num_shifts, residua, evals);
542  }
543 
545  // Apply the shifts of the unwated Ritz values via QR
546  qrShifts(evals, num_shifts);
547 
548  // Compress the Krylov space using the accumulated Givens rotations in Qmat
549  rotateBasis(kSpace, num_keep + 1);
550  profile.TPSTART(QUDA_PROFILE_COMPUTE);
551 
552  // Update the residual vector
553  blas::caxpby(upperHess[num_keep][num_keep - 1], *kSpace[num_keep], Qmat[n_kr - 1][num_keep - 1], *r[0]);
554 
555  if (sqrt(blas::norm2(*r[0])) < epsilon) { errorQuda("IRAM has encountered an invariant subspace..."); }
556  }
557  restart_iter++;
558  }
559 
561 
562  // Post computation report
563  //---------------------------------------------------------------------------
564  if (!converged) {
566  errorQuda("IRAM failed to compute the requested %d vectors with a %d search space and %d Krylov space in %d "
567  "restart steps. Exiting.",
569  } else {
570  warningQuda("IRAM failed to compute the requested %d vectors with a %d search space and %d Krylov space in %d "
571  "restart steps. Continuing with current arnoldi factorisation.",
573  }
574  } else {
575  if (getVerbosity() >= QUDA_SUMMARIZE) {
576  printfQuda("IRAM computed the requested %d vectors with a %d search space and %d Krylov space in %d "
577  "restart steps and %d OP*x operations.\n",
579  }
580  }
581 
582  // Local clean-up
583  cleanUpEigensolver(kSpace, evals);
584  }
585 
586  // Destructor
588  {
589  for (int i = 0; i < n_kr; i++) {
590  host_free(upperHess[i]);
591  host_free(Qmat[i]);
592  host_free(Rmat[i]);
593  }
595  host_free(Qmat);
596  host_free(Rmat);
597  }
598 } // namespace quda
void matVec(const DiracMatrix &mat, ColorSpinorField &out, const ColorSpinorField &in)
Applies the specified matVec operation: M, Mdag, MMdag, MdagM.
void prepareInitialGuess(std::vector< ColorSpinorField * > &kSpace)
Check for an initial guess. If none present, populate with rands, then orthonormalise.
TimeProfile & profile
void sortArrays(QudaEigSpectrumType spec_type, int n, std::vector< Complex > &x, std::vector< Complex > &y)
Sort array the first n elements of x according to spec_type, y comes along for the ride.
const DiracMatrix & mat
std::vector< ColorSpinorField * > r
double setEpsilon(const QudaPrecision prec)
Set the epsilon parameter.
void computeEvals(const DiracMatrix &mat, std::vector< ColorSpinorField * > &evecs, std::vector< Complex > &evals, int size)
Compute eigenvalues and their residiua.
void rotateVecsComplex(std::vector< ColorSpinorField * > &kSpace, const Complex *rot_array, const int offset, const int dim, const int keep, const int locked, TimeProfile &profile)
Rotate the Krylov space.
QudaEigParam * eig_param
void queryPrec(const QudaPrecision prec)
Query the eigensolver precision to stdout.
void prepareKrylovSpace(std::vector< ColorSpinorField * > &kSpace, std::vector< Complex > &evals)
Extend the Krylov space.
void loadFromFile(const DiracMatrix &mat, std::vector< ColorSpinorField * > &eig_vecs, std::vector< Complex > &evals)
Load and check eigenpairs from file.
void cleanUpEigensolver(std::vector< ColorSpinorField * > &kSpace, std::vector< Complex > &evals)
Release memory, save eigenvectors, resize the Krylov space to its original dimension.
void printEigensolverSetup()
Dump the eigensolver parameters to stdout.
std::vector< double > residua
Complex ** Qmat
void rotateBasis(std::vector< ColorSpinorField * > &v, int keep)
Rotate the Krylov space.
Definition: eig_iram.cpp:125
IRAM(const DiracMatrix &mat, QudaEigParam *eig_param, TimeProfile &profile)
Constructor for Thick Restarted Eigensolver class.
Definition: eig_iram.cpp:19
virtual ~IRAM()
Destructor for Thick Restarted Eigensolver class.
Definition: eig_iram.cpp:587
Complex ** upperHess
void qrIteration(Complex **Q, Complex **R)
Apply One step of the the QR algorithm.
Definition: eig_iram.cpp:227
void eigensolveFromUpperHess(std::vector< Complex > &evals, const double beta)
Get the eigendecomposition from the upper Hessenberg matrix via QR.
Definition: eig_iram.cpp:312
void arnoldiStep(std::vector< ColorSpinorField * > &v, std::vector< ColorSpinorField * > &r, double &beta, int j)
Arnoldi step: extends the Krylov space by one vector.
Definition: eig_iram.cpp:47
Complex ** Rmat
void reorder(std::vector< ColorSpinorField * > &kSpace, std::vector< Complex > &evals, const QudaEigSpectrumType spec_type)
Reorder the Krylov space and eigenvalues.
Definition: eig_iram.cpp:135
void operator()(std::vector< ColorSpinorField * > &kSpace, std::vector< Complex > &evals)
Compute eigenpairs.
Definition: eig_iram.cpp:432
void qrShifts(const std::vector< Complex > evals, const int num_shifts)
Apply shifts to the upper Hessenberg matrix via QR decomposition.
Definition: eig_iram.cpp:197
bool isRunning(QudaProfileType idx)
Definition: timer.h:260
double epsilon
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:34
@ QUDA_DEBUG_VERBOSE
Definition: enum_quda.h:268
@ QUDA_SUMMARIZE
Definition: enum_quda.h:266
@ QUDA_VERBOSE
Definition: enum_quda.h:267
enum QudaEigSpectrumType_s QudaEigSpectrumType
@ QUDA_SPECTRUM_LM_EIG
Definition: enum_quda.h:147
@ QUDA_SPECTRUM_SM_EIG
Definition: enum_quda.h:148
@ QUDA_SPECTRUM_LR_EIG
Definition: enum_quda.h:149
@ QUDA_SPECTRUM_SR_EIG
Definition: enum_quda.h:150
@ QUDA_SPECTRUM_SI_EIG
Definition: enum_quda.h:152
@ QUDA_SPECTRUM_LI_EIG
Definition: enum_quda.h:151
#define safe_malloc(size)
Definition: malloc_quda.h:106
#define host_free(ptr)
Definition: malloc_quda.h:115
void caxpby(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y)
void ax(double a, ColorSpinorField &x)
double norm2(const ColorSpinorField &a)
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:130
void saveTuneCache(bool error=false)
Definition: tune.cpp:439
std::complex< double > Complex
Definition: quda_internal.h:86
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
@ QUDA_PROFILE_INIT
Definition: timer.h:106
@ QUDA_PROFILE_HOST_COMPUTE
Definition: timer.h:119
@ QUDA_PROFILE_COMPUTE
Definition: timer.h:108
@ QUDA_PROFILE_EIGENQR
Definition: timer.h:117
@ QUDA_PROFILE_EIGENEV
Definition: timer.h:116
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
Definition: complex_quda.h:111
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
__host__ __device__ ValueType abs(ValueType x)
Definition: complex_quda.h:125
QudaEigSpectrumType spectrum
Definition: quda.h:466
QudaBoolean use_eigen_qr
Definition: quda.h:453
char vec_infile[256]
Definition: quda.h:522
double tol
Definition: quda.h:479
QudaBoolean require_convergence
Definition: quda.h:463
double qr_tol
Definition: quda.h:481
DEVICEHOST void swap(Real &a, Real &b)
Definition: svd_quda.h:134
#define printfQuda(...)
Definition: util_quda.h:114
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define warningQuda(...)
Definition: util_quda.h:132
#define errorQuda(...)
Definition: util_quda.h:120