QUDA  v1.1.0
A library for QCD on GPUs
eig_trlm.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <iostream>
5 #include <vector>
6 #include <algorithm>
7 
8 #include <quda_internal.h>
9 #include <eigensolve_quda.h>
10 #include <qio_field.h>
11 #include <color_spinor_field.h>
12 #include <blas_quda.h>
13 #include <util_quda.h>
14 #include <eigen_helper.h>
15 
16 namespace quda
17 {
18  // Thick Restarted Lanczos Method constructor
19  TRLM::TRLM(const DiracMatrix &mat, QudaEigParam *eig_param, TimeProfile &profile) :
20  EigenSolver(mat, eig_param, profile)
21  {
22  bool profile_running = profile.isRunning(QUDA_PROFILE_INIT);
23  if (!profile_running) profile.TPSTART(QUDA_PROFILE_INIT);
24 
25  // Tridiagonal/Arrow matrix
26  alpha = (double *)safe_malloc(n_kr * sizeof(double));
27  beta = (double *)safe_malloc(n_kr * sizeof(double));
28  for (int i = 0; i < n_kr; i++) {
29  alpha[i] = 0.0;
30  beta[i] = 0.0;
31  }
32 
33  // Thick restart specific checks
34  if (n_kr < n_ev + 6) errorQuda("n_kr=%d must be greater than n_ev+6=%d\n", n_kr, n_ev + 6);
35 
37  errorQuda("Only real spectrum type (LR or SR) can be passed to the TR Lanczos solver");
38  }
39 
40  if (!profile_running) profile.TPSTOP(QUDA_PROFILE_INIT);
41  }
42 
43  void TRLM::operator()(std::vector<ColorSpinorField *> &kSpace, std::vector<Complex> &evals)
44  {
45  // In case we are deflating an operator, save the tunechache from the inverter
46  saveTuneCache();
47 
48  // Override any user input for block size.
49  block_size = 1;
50 
51  // Pre-launch checks and preparation
52  //---------------------------------------------------------------------------
53  if (getVerbosity() >= QUDA_VERBOSE) queryPrec(kSpace[0]->Precision());
54  // Check to see if we are loading eigenvectors
55  if (strcmp(eig_param->vec_infile, "") != 0) {
56  printfQuda("Loading evecs from file name %s\n", eig_param->vec_infile);
57  loadFromFile(mat, kSpace, evals);
58  return;
59  }
60 
61  // Check for an initial guess. If none present, populate with rands, then
62  // orthonormalise
63  prepareInitialGuess(kSpace);
64 
65  // Increase the size of kSpace passed to the function, will be trimmed to
66  // original size before exit.
67  prepareKrylovSpace(kSpace, evals);
68 
69  // Check for Chebyshev maximum estimation
70  checkChebyOpMax(mat, kSpace);
71 
72  // Convergence and locking criteria
73  double mat_norm = 0.0;
74  double epsilon = setEpsilon(kSpace[0]->Precision());
75 
76  // Print Eigensolver params
78  //---------------------------------------------------------------------------
79 
80  // Begin TRLM Eigensolver computation
81  //---------------------------------------------------------------------------
83 
84  // Loop over restart iterations.
85  while (restart_iter < max_restarts && !converged) {
86 
87  for (int step = num_keep; step < n_kr; step++) lanczosStep(kSpace, step);
88  iter += (n_kr - num_keep);
89 
90  // The eigenvalues are returned in the alpha array
94 
95  // mat_norm is updated.
96  for (int i = num_locked; i < n_kr; i++)
97  if (fabs(alpha[i]) > mat_norm) mat_norm = fabs(alpha[i]);
98 
99  // Locking check
100  iter_locked = 0;
101  for (int i = 1; i < (n_kr - num_locked); i++) {
102  if (residua[i + num_locked] < epsilon * mat_norm) {
104  printfQuda("**** Locking %d resid=%+.6e condition=%.6e ****\n", i, residua[i + num_locked],
105  epsilon * mat_norm);
106  iter_locked = i;
107  } else {
108  // Unlikely to find new locked pairs
109  break;
110  }
111  }
112 
113  // Convergence check
115  for (int i = iter_locked + 1; i < n_kr - num_locked; i++) {
116  if (residua[i + num_locked] < tol * mat_norm) {
118  printfQuda("**** Converged %d resid=%+.6e condition=%.6e ****\n", i, residua[i + num_locked], tol * mat_norm);
119  iter_converged = i;
120  } else {
121  // Unlikely to find new converged pairs
122  break;
123  }
124  }
125 
126  iter_keep = std::min(iter_converged + (n_kr - num_converged) / 2, n_kr - num_locked - 12);
127 
129  computeKeptRitz(kSpace);
130  profile.TPSTART(QUDA_PROFILE_COMPUTE);
131 
135 
136  if (getVerbosity() >= QUDA_VERBOSE) {
137  printfQuda("%04d converged eigenvalues at restart iter %04d\n", num_converged, restart_iter + 1);
138  }
139 
140  if (getVerbosity() >= QUDA_DEBUG_VERBOSE) {
141  printfQuda("iter Conv = %d\n", iter_converged);
142  printfQuda("iter Keep = %d\n", iter_keep);
143  printfQuda("iter Lock = %d\n", iter_locked);
144  printfQuda("num_converged = %d\n", num_converged);
145  printfQuda("num_keep = %d\n", num_keep);
146  printfQuda("num_locked = %d\n", num_locked);
147  for (int i = 0; i < n_kr; i++) {
148  printfQuda("Ritz[%d] = %.16e residual[%d] = %.16e\n", i, alpha[i], i, residua[i]);
149  }
150  }
151 
152  // Check for convergence
153  if (num_converged >= n_conv) {
154  reorder(kSpace);
155  converged = true;
156  }
157 
158  restart_iter++;
159  }
160 
162 
163  // Post computation report
164  //---------------------------------------------------------------------------
165  if (!converged) {
167  errorQuda("TRLM failed to compute the requested %d vectors with a %d search space and %d Krylov space in %d "
168  "restart steps. Exiting.",
170  } else {
171  warningQuda("TRLM failed to compute the requested %d vectors with a %d search space and %d Krylov space in %d "
172  "restart steps. Continuing with current lanczos factorisation.",
174  }
175  } else {
176  if (getVerbosity() >= QUDA_SUMMARIZE) {
177  printfQuda("TRLM computed the requested %d vectors in %d restart steps and %d OP*x operations.\n", n_conv,
178  restart_iter, iter);
179 
180  // Dump all Ritz values and residua if using Chebyshev
181  for (int i = 0; i < n_conv && eig_param->use_poly_acc; i++) {
182  printfQuda("RitzValue[%04d]: (%+.16e, %+.16e) residual %.16e\n", i, alpha[i], 0.0, residua[i]);
183  }
184  }
185 
186  // Compute eigenvalues
187  computeEvals(mat, kSpace, evals);
188  }
189 
190  // Local clean-up
191  cleanUpEigensolver(kSpace, evals);
192  }
193 
194  // Destructor
196  {
197  ritz_mat.clear();
198  ritz_mat.shrink_to_fit();
199  host_free(alpha);
200  host_free(beta);
201  }
202 
203  // Thick Restart Member functions
204  //---------------------------------------------------------------------------
205  void TRLM::lanczosStep(std::vector<ColorSpinorField *> v, int j)
206  {
207  // Compute r = A * v_j - b_{j-i} * v_{j-1}
208  // r = A * v_j
209 
210  chebyOp(mat, *r[0], *v[j]);
211 
212  // a_j = v_j^dag * r
213  alpha[j] = blas::reDotProduct(*v[j], *r[0]);
214 
215  // r = r - a_j * v_j
216  blas::axpy(-alpha[j], *v[j], *r[0]);
217 
218  int start = (j > num_keep) ? j - 1 : 0;
219 
220  if (j - start > 0) {
221  std::vector<ColorSpinorField *> r_ {r[0]};
222  std::vector<double> beta_;
223  beta_.reserve(j - start);
224  std::vector<ColorSpinorField *> v_;
225  v_.reserve(j - start);
226  for (int i = start; i < j; i++) {
227  beta_.push_back(-beta[i]);
228  v_.push_back(v[i]);
229  }
230  // r = r - b_{j-1} * v_{j-1}
231  blas::axpy(beta_.data(), v_, r_);
232  }
233 
234  // Orthogonalise r against the Krylov space
235  for (int k = 0; k < 1; k++) blockOrthogonalize(v, r, j + 1);
236 
237  // b_j = ||r||
238  beta[j] = sqrt(blas::norm2(*r[0]));
239 
240  // Prepare next step.
241  // v_{j+1} = r / b_j
242  blas::zero(*v[j + 1]);
243  blas::axpy(1.0 / beta[j], *r[0], *v[j + 1]);
244 
245  // Save Lanczos step tuning
246  saveTuneCache();
247  }
248 
249  void TRLM::reorder(std::vector<ColorSpinorField *> &kSpace)
250  {
251  int i = 0;
252 
253  if (reverse) {
254  while (i < n_kr) {
255  if ((i == 0) || (alpha[i - 1] >= alpha[i]))
256  i++;
257  else {
258  std::swap(alpha[i], alpha[i - 1]);
259  std::swap(kSpace[i], kSpace[i - 1]);
260  i--;
261  }
262  }
263  } else {
264  while (i < n_kr) {
265  if ((i == 0) || (alpha[i - 1] <= alpha[i]))
266  i++;
267  else {
268  std::swap(alpha[i], alpha[i - 1]);
269  std::swap(kSpace[i], kSpace[i - 1]);
270  i--;
271  }
272  }
273  }
274  }
275 
277  {
278  profile.TPSTART(QUDA_PROFILE_EIGEN);
279  int dim = n_kr - num_locked;
280  int arrow_pos = num_keep - num_locked;
281 
282  // Eigen objects
283  MatrixXd A = MatrixXd::Zero(dim, dim);
284  ritz_mat.resize(dim * dim);
285  for (int i = 0; i < dim * dim; i++) ritz_mat[i] = 0.0;
286 
287  // Invert the spectrum due to chebyshev
288  if (reverse) {
289  for (int i = num_locked; i < n_kr - 1; i++) {
290  alpha[i] *= -1.0;
291  beta[i] *= -1.0;
292  }
293  alpha[n_kr - 1] *= -1.0;
294  }
295 
296  // Construct arrow mat A_{dim,dim}
297  for (int i = 0; i < dim; i++) {
298 
299  // alpha populates the diagonal
300  A(i, i) = alpha[i + num_locked];
301  }
302 
303  for (int i = 0; i < arrow_pos; i++) {
304 
305  // beta populates the arrow
306  A(i, arrow_pos) = beta[i + num_locked];
307  A(arrow_pos, i) = beta[i + num_locked];
308  }
309 
310  for (int i = arrow_pos; i < dim - 1; i++) {
311 
312  // beta populates the sub-diagonal
313  A(i, i + 1) = beta[i + num_locked];
314  A(i + 1, i) = beta[i + num_locked];
315  }
316 
317  // Eigensolve the arrow matrix
318  SelfAdjointEigenSolver<MatrixXd> eigensolver;
319  eigensolver.compute(A);
320 
321  // repopulate ritz matrix
322  for (int i = 0; i < dim; i++)
323  for (int j = 0; j < dim; j++) ritz_mat[dim * i + j] = eigensolver.eigenvectors().col(i)[j];
324 
325  for (int i = 0; i < dim; i++) {
326  residua[i + num_locked] = fabs(beta[n_kr - 1] * eigensolver.eigenvectors().col(i)[dim - 1]);
327  // Update the alpha array
328  alpha[i + num_locked] = eigensolver.eigenvalues()[i];
329  }
330 
331  // Put spectrum back in order
332  if (reverse) {
333  for (int i = num_locked; i < n_kr; i++) { alpha[i] *= -1.0; }
334  }
335 
336  profile.TPSTOP(QUDA_PROFILE_EIGEN);
337  }
338 
339  void TRLM::computeKeptRitz(std::vector<ColorSpinorField *> &kSpace)
340  {
341  int offset = n_kr + 1;
342  int dim = n_kr - num_locked;
343 
344  // Multi-BLAS friendly array to store part of Ritz matrix we want
345  double *ritz_mat_keep = (double *)safe_malloc((dim * iter_keep) * sizeof(double));
346  for (int j = 0; j < dim; j++) {
347  for (int i = 0; i < iter_keep; i++) { ritz_mat_keep[j * iter_keep + i] = ritz_mat[i * dim + j]; }
348  }
349 
350  rotateVecs(kSpace, ritz_mat_keep, offset, dim, iter_keep, num_locked, profile);
351 
352  // Update residual vector
353  std::swap(kSpace[num_locked + iter_keep], kSpace[n_kr]);
354 
355  // Update sub arrow matrix
356  for (int i = 0; i < iter_keep; i++) beta[i + num_locked] = beta[n_kr - 1] * ritz_mat[dim * (i + 1) - 1];
357 
358  host_free(ritz_mat_keep);
359  }
360 } // namespace quda
void blockOrthogonalize(std::vector< ColorSpinorField * > v, std::vector< ColorSpinorField * > &r, int j)
Orthogonalise input vectors r against vector space v using block-BLAS.
void rotateVecs(std::vector< ColorSpinorField * > &kSpace, const double *rot_array, const int offset, const int dim, const int keep, const int locked, TimeProfile &profile)
Rotate the Krylov space.
void prepareInitialGuess(std::vector< ColorSpinorField * > &kSpace)
Check for an initial guess. If none present, populate with rands, then orthonormalise.
TimeProfile & profile
const DiracMatrix & mat
std::vector< ColorSpinorField * > r
void checkChebyOpMax(const DiracMatrix &mat, std::vector< ColorSpinorField * > &kSpace)
Check for a maximum of the Chebyshev operator.
double setEpsilon(const QudaPrecision prec)
Set the epsilon parameter.
void computeEvals(const DiracMatrix &mat, std::vector< ColorSpinorField * > &evecs, std::vector< Complex > &evals, int size)
Compute eigenvalues and their residiua.
QudaEigParam * eig_param
void queryPrec(const QudaPrecision prec)
Query the eigensolver precision to stdout.
void prepareKrylovSpace(std::vector< ColorSpinorField * > &kSpace, std::vector< Complex > &evals)
Extend the Krylov space.
void loadFromFile(const DiracMatrix &mat, std::vector< ColorSpinorField * > &eig_vecs, std::vector< Complex > &evals)
Load and check eigenpairs from file.
void cleanUpEigensolver(std::vector< ColorSpinorField * > &kSpace, std::vector< Complex > &evals)
Release memory, save eigenvectors, resize the Krylov space to its original dimension.
void printEigensolverSetup()
Dump the eigensolver parameters to stdout.
void chebyOp(const DiracMatrix &mat, ColorSpinorField &out, const ColorSpinorField &in)
Promoted the specified matVec operation: M, Mdag, MMdag, MdagM to a Chebyshev polynomial.
std::vector< double > residua
std::vector< double > ritz_mat
void eigensolveFromArrowMat()
Get the eigendecomposition from the arrow matrix.
Definition: eig_trlm.cpp:276
double * beta
double * alpha
void lanczosStep(std::vector< ColorSpinorField * > v, int j)
Lanczos step: extends the Krylov space.
Definition: eig_trlm.cpp:205
void computeKeptRitz(std::vector< ColorSpinorField * > &kSpace)
Rotate the Ritz vectors usinng the arrow matrix eigendecomposition.
Definition: eig_trlm.cpp:339
void operator()(std::vector< ColorSpinorField * > &kSpace, std::vector< Complex > &evals)
Compute eigenpairs.
Definition: eig_trlm.cpp:43
virtual ~TRLM()
Destructor for Thick Restarted Eigensolver class.
Definition: eig_trlm.cpp:195
TRLM(const DiracMatrix &mat, QudaEigParam *eig_param, TimeProfile &profile)
Constructor for Thick Restarted Eigensolver class.
Definition: eig_trlm.cpp:19
void reorder(std::vector< ColorSpinorField * > &kSpace)
Reorder the Krylov space by eigenvalue.
Definition: eig_trlm.cpp:249
bool isRunning(QudaProfileType idx)
Definition: timer.h:260
std::array< int, 4 > dim
double epsilon
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
@ QUDA_DEBUG_VERBOSE
Definition: enum_quda.h:268
@ QUDA_SUMMARIZE
Definition: enum_quda.h:266
@ QUDA_VERBOSE
Definition: enum_quda.h:267
@ QUDA_SPECTRUM_LR_EIG
Definition: enum_quda.h:149
@ QUDA_SPECTRUM_SR_EIG
Definition: enum_quda.h:150
#define safe_malloc(size)
Definition: malloc_quda.h:106
#define host_free(ptr)
Definition: malloc_quda.h:115
void zero(ColorSpinorField &a)
double norm2(const ColorSpinorField &a)
double reDotProduct(ColorSpinorField &x, ColorSpinorField &y)
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:43
void start()
Start profiling.
Definition: device.cpp:226
void saveTuneCache(bool error=false)
Definition: tune.cpp:439
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
@ QUDA_PROFILE_INIT
Definition: timer.h:106
@ QUDA_PROFILE_COMPUTE
Definition: timer.h:108
@ QUDA_PROFILE_EIGEN
Definition: timer.h:114
QudaEigSpectrumType spectrum
Definition: quda.h:466
QudaBoolean use_poly_acc
Definition: quda.h:419
char vec_infile[256]
Definition: quda.h:522
QudaBoolean require_convergence
Definition: quda.h:463
DEVICEHOST void swap(Real &a, Real &b)
Definition: svd_quda.h:134
#define printfQuda(...)
Definition: util_quda.h:114
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define warningQuda(...)
Definition: util_quda.h:132
#define errorQuda(...)
Definition: util_quda.h:120