QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
eigensolve_quda.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <iostream>
5 #include <vector>
6 #include <algorithm>
7 
8 #include <quda_internal.h>
9 #include <eigensolve_quda.h>
10 #include <qio_field.h>
11 #include <color_spinor_field.h>
12 #include <blas_quda.h>
13 #include <util_quda.h>
14 
15 #include <Eigen/Eigenvalues>
16 #include <Eigen/Dense>
17 
18 bool flags = true;
19 
20 namespace quda
21 {
22 
23  using namespace Eigen;
24 
25  // Eigensolver class
26  //-----------------------------------------------------------------------------
28  eig_param(eig_param),
29  profile(profile),
30  tmp1(nullptr),
31  tmp2(nullptr)
32  {
33  profile.TPSTART(QUDA_PROFILE_INIT);
34 
35  // Problem parameters
36  nEv = eig_param->nEv;
37  nKr = eig_param->nKr;
38  nConv = eig_param->nConv;
39  tol = eig_param->tol;
40  reverse = false;
41 
42  // Algorithm variables
43  converged = false;
44  restart_iter = 0;
45  max_restarts = eig_param->max_restarts;
46  check_interval = eig_param->check_interval;
47  iter = 0;
48  iter_converged = 0;
49  iter_locked = 0;
50  iter_keep = 0;
51  num_converged = 0;
52  num_locked = 0;
53  num_keep = 0;
54 
55  // Sanity checks
56  if (nKr <= nEv) errorQuda("nKr=%d is less than or equal to nEv=%d\n", nKr, nEv);
57  if (nEv < nConv) errorQuda("nConv=%d is greater than nEv=%d\n", nConv, nEv);
58  if (nEv == 0) errorQuda("nEv=0 passed to Eigensolver\n");
59  if (nKr == 0) errorQuda("nKr=0 passed to Eigensolver\n");
60  if (nConv == 0) errorQuda("nConv=0 passed to Eigensolver\n");
61 
62  residua = (double *)safe_malloc(nKr * sizeof(double));
63  for (int i = 0; i < nKr; i++) { residua[i] = 0.0; }
64 
65  // Quda MultiBLAS friendly array
66  Qmat = (Complex *)safe_malloc(nEv * nKr * sizeof(Complex));
67 
68  // Part of the spectrum to be computed.
69  switch (eig_param->spectrum) {
70  case QUDA_SPECTRUM_SR_EIG: strcpy(spectrum, "SR"); break;
71  case QUDA_SPECTRUM_LR_EIG: strcpy(spectrum, "LR"); break;
72  case QUDA_SPECTRUM_SM_EIG: strcpy(spectrum, "SM"); break;
73  case QUDA_SPECTRUM_LM_EIG: strcpy(spectrum, "LM"); break;
74  case QUDA_SPECTRUM_SI_EIG: strcpy(spectrum, "SI"); break;
75  case QUDA_SPECTRUM_LI_EIG: strcpy(spectrum, "LI"); break;
76  default: errorQuda("Unexpected spectrum type %d", eig_param->spectrum);
77  }
78 
79  // Deduce whether to reverse the sorting
80  if (strncmp("L", spectrum, 1) == 0 && !eig_param->use_poly_acc) {
81  reverse = true;
82  } else if (strncmp("S", spectrum, 1) == 0 && eig_param->use_poly_acc) {
83  reverse = true;
84  spectrum[0] = 'L';
85  } else if (strncmp("L", spectrum, 1) == 0 && eig_param->use_poly_acc) {
86  reverse = true;
87  spectrum[0] = 'S';
88  }
89 
90  // Print Eigensolver params
91  if (getVerbosity() >= QUDA_VERBOSE) {
92  printfQuda("spectrum %s\n", spectrum);
93  printfQuda("tol %.4e\n", tol);
94  printfQuda("nConv %d\n", nConv);
95  printfQuda("nEv %d\n", nEv);
96  printfQuda("nKr %d\n", nKr);
97  if (eig_param->use_poly_acc) {
98  printfQuda("polyDeg %d\n", eig_param->poly_deg);
99  printfQuda("a-min %f\n", eig_param->a_min);
100  printfQuda("a-max %f\n", eig_param->a_max);
101  }
102  }
103 
104  profile.TPSTOP(QUDA_PROFILE_INIT);
105  }
106 
107  // We bake the matrix operator 'mat' and the eigensolver parameters into the
108  // eigensolver.
110  {
111  EigenSolver *eig_solver = nullptr;
112 
113  switch (eig_param->eig_type) {
114  case QUDA_EIG_IR_ARNOLDI: errorQuda("IR Arnoldi not implemented"); break;
115  case QUDA_EIG_IR_LANCZOS: errorQuda("IR Lanczos not implemented"); break;
116  case QUDA_EIG_TR_LANCZOS:
117  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Creating TR Lanczos eigensolver\n");
118  eig_solver = new TRLM(eig_param, mat, profile);
119  break;
120  default: errorQuda("Invalid eig solver type");
121  }
122  return eig_solver;
123  }
124 
125  // Utilities and functions common to all Eigensolver instances
126  //------------------------------------------------------------------------------
127 
129  {
130  if (!tmp1 || !tmp2) {
132  if (!tmp1) tmp1 = ColorSpinorField::Create(param);
133  if (!tmp2) tmp2 = ColorSpinorField::Create(param);
134  }
135  mat(out, in, *tmp1, *tmp2);
136  }
137 
139  {
140  // Just do a simple matVec if no poly acc is requested
141  if (!eig_param->use_poly_acc) {
142  matVec(mat, out, in);
143  return;
144  }
145 
146  if (eig_param->poly_deg == 0) { errorQuda("Polynomial acceleration requested with zero polynomial degree"); }
147 
148  // Compute the polynomial accelerated operator.
149  double a = eig_param->a_min;
150  double b = eig_param->a_max;
151  double delta = (b - a) / 2.0;
152  double theta = (b + a) / 2.0;
153  double sigma1 = -delta / theta;
154  double sigma;
155  double d1 = sigma1 / delta;
156  double d2 = 1.0;
157  double d3;
158 
159  // out = d2 * in + d1 * out
160  // C_1(x) = x
161  matVec(mat, out, in);
162  blas::caxpby(d2, const_cast<ColorSpinorField &>(in), d1, out);
163  if (eig_param->poly_deg == 1) return;
164 
165  // C_0 is the current 'in' vector.
166  // C_1 is the current 'out' vector.
167 
168  // Clone 'in' to two temporary vectors.
171 
172  blas::copy(*tmp1, in);
173  blas::copy(*tmp2, out);
174 
175  // Using Chebyshev polynomial recursion relation,
176  // C_{m+1}(x) = 2*x*C_{m} - C_{m-1}
177 
178  double sigma_old = sigma1;
179 
180  // construct C_{m+1}(x)
181  for (int i = 2; i < eig_param->poly_deg; i++) {
182  sigma = 1.0 / (2.0 / sigma1 - sigma_old);
183 
184  d1 = 2.0 * sigma / delta;
185  d2 = -d1 * theta;
186  d3 = -sigma * sigma_old;
187 
188  // FIXME - we could introduce a fused matVec + blas kernel here, eliminating one temporary
189  // mat*C_{m}(x)
190  matVec(mat, out, *tmp2);
191 
192  Complex d1c(d1, 0.0);
193  Complex d2c(d2, 0.0);
194  Complex d3c(d3, 0.0);
195  blas::caxpbypczw(d3c, *tmp1, d2c, *tmp2, d1c, out, *tmp1);
196  std::swap(tmp1, tmp2);
197 
198  sigma_old = sigma;
199  }
200  blas::copy(out, *tmp2);
201 
202  delete tmp1;
203  delete tmp2;
204  }
205 
206  // Orthogonalise r against V_[j]
207  Complex EigenSolver::blockOrthogonalize(std::vector<ColorSpinorField *> vecs, std::vector<ColorSpinorField *> rvec,
208  int j)
209  {
210  Complex *s = (Complex *)safe_malloc((j + 1) * sizeof(Complex));
211  Complex sum(0.0, 0.0);
212  std::vector<ColorSpinorField *> vecs_ptr;
213  for (int i = 0; i < j + 1; i++) { vecs_ptr.push_back(vecs[i]); }
214  // Block dot products stored in s.
215  blas::cDotProduct(s, vecs_ptr, rvec);
216 
217  // Block orthogonalise
218  for (int i = 0; i < j + 1; i++) {
219  sum += s[i];
220  s[i] *= -1.0;
221  }
222  blas::caxpy(s, vecs_ptr, rvec);
223 
224  host_free(s);
225  return sum;
226  }
227 
228  // Deflate vec, place result in vec_defl
229  void EigenSolver::deflate(std::vector<ColorSpinorField *> vec_defl, std::vector<ColorSpinorField *> vec,
230  std::vector<ColorSpinorField *> eig_vecs, std::vector<Complex> evals)
231  {
232  // number of evecs
233  int n_defl = eig_param->nConv;
234 
235  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Deflating %d vectors\n", n_defl);
236 
237  // Perform Sum_i V_i * (L_i)^{-1} * (V_i)^dag * vec = vec_defl
238  // for all i computed eigenvectors and values.
239 
240  // Pointers to the required Krylov space vectors,
241  // no extra memory is allocated.
242  std::vector<ColorSpinorField *> eig_vecs_ptr;
243  for (int i = 0; i < n_defl; i++) eig_vecs_ptr.push_back(eig_vecs[i]);
244 
245  // 1. Take block inner product: (V_i)^dag * vec = A_i
246  Complex *s = (Complex *)safe_malloc(n_defl * sizeof(Complex));
247  blas::cDotProduct(s, eig_vecs_ptr, vec);
248 
249  // 2. Perform block caxpy: V_i * (L_i)^{-1} * A_i
250  for (int i = 0; i < n_defl; i++) { s[i] /= evals[i].real(); }
251 
252  // 3. Accumulate sum vec_defl = Sum_i V_i * (L_i)^{-1} * A_i
253  blas::zero(*vec_defl[0]);
254  blas::caxpy(s, eig_vecs_ptr, vec_defl);
255  // FIXME - we can optimize the zeroing out with a "multi-caxy"
256  // function that just writes over vec_defl and doesn't sum. When
257  // we exceed the multi-blas limit this would deompose into caxy
258  // for the kernel call and caxpy for the subsequent ones
259 
260  host_free(s);
261  }
262 
263  void EigenSolver::computeSVD(const DiracMatrix &mat, std::vector<ColorSpinorField *> &evecs, std::vector<Complex> &evals)
264  {
265 
266  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Computing SVD of M\n");
267 
268  int nConv = eig_param->nConv;
269  if (evecs.size() != (unsigned int)(2 * nConv))
270  errorQuda("Incorrect deflation space sized %d passed to computeSVD, expected %d", (int)(evecs.size()), 2 * nConv);
271 
272  Complex sigma_tmp[nConv];
273 
274  for (int i = 0; i < nConv; i++) {
275 
276  // This function assumes that you have computed the eigenvectors
277  // of MdagM(MMdag), ie, the right(left) SVD of M. The ith eigen vector in the
278  // array corresponds to the ith right(left) singular vector. We place the
279  // computed left(right) singular vectors in the second half of the array. We
280  // assume that right vectors are given and we compute the left.
281  //
282  // As a cross check, we recompute the singular values from mat vecs rather
283  // than make the direct relation (sigma_i)^2 = |lambda_i|
284  //--------------------------------------------------------------------------
285 
286  // Lambda already contains the square root of the eigenvalue of the norm op.
287  Complex lambda = evals[i];
288 
289  // M*Rev_i = M*Rsv_i = sigma_i Lsv_i
290  mat.Expose()->M(*evecs[nConv + i], *evecs[i]);
291 
292  // sigma_i = sqrt(sigma_i (Lsv_i)^dag * sigma_i * Lsv_i )
293  Complex sigma_sq = blas::cDotProduct(*evecs[nConv + i], *evecs[nConv + i]);
294  sigma_tmp[i] = Complex(sqrt(sigma_sq.real()), sqrt(abs(sigma_sq.imag())));
295 
296  // Normalise the Lsv: sigma_i Lsv_i -> Lsv_i
297  double norm = sqrt(blas::norm2(*evecs[nConv + i]));
298  blas::ax(1.0 / norm, *evecs[nConv + i]);
299 
300  if (getVerbosity() >= QUDA_SUMMARIZE)
301  printfQuda("Sval[%04d] = %+.16e %+.16e sigma - sqrt(|lambda|) = %+.16e\n", i, sigma_tmp[i].real(),
302  sigma_tmp[i].imag(), sigma_tmp[i].real() - sqrt(abs(lambda.real())));
303  evals[i] = sigma_tmp[i];
304  //--------------------------------------------------------------------------
305  }
306  }
307 
308  // Deflate vec, place result in vec_defl
309  void EigenSolver::deflateSVD(std::vector<ColorSpinorField *> vec_defl, std::vector<ColorSpinorField *> vec,
310  std::vector<ColorSpinorField *> eig_vecs, std::vector<Complex> evals)
311  {
312  // number of evecs
313  int n_defl = eig_param->nConv;
314 
315  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Deflating %d left and %d right singular vectors\n", n_defl, n_defl);
316 
317  // Perform Sum_i R_i * (\sigma_i)^{-1} * L_i^dag * vec = vec_defl
318  // for all i computed eigenvectors and values.
319 
320  // 1. Take block inner product: L_i^dag * vec = A_i
321  std::vector<ColorSpinorField *> left_vecs_ptr;
322  for (int i = n_defl; i < 2 * n_defl; i++) left_vecs_ptr.push_back(eig_vecs[i]);
323  Complex *s = (Complex *)safe_malloc(n_defl * sizeof(Complex));
324  blas::cDotProduct(s, left_vecs_ptr, vec);
325 
326  // 2. Perform block caxpy
327  // A_i -> (\sigma_i)^{-1} * A_i
328  // vec_defl = Sum_i (R_i)^{-1} * A_i
329  blas::zero(*vec_defl[0]);
330  std::vector<ColorSpinorField *> right_vecs_ptr;
331  for (int i = 0; i < n_defl; i++) {
332  right_vecs_ptr.push_back(eig_vecs[i]);
333  s[i] /= evals[i].real();
334  }
335  blas::caxpy(s, right_vecs_ptr, vec_defl);
336 
337  // FIXME - we can optimize the zeroing out with a "multi-caxy"
338  // function that just writes over vec_defl and doesn't sum. When
339  // we exceed the multi-blas limit this would deompose into caxy
340  // for the kernel call and caxpy for the subsequent ones
341 
342  host_free(s);
343  }
344 
345  void EigenSolver::computeEvals(const DiracMatrix &mat, std::vector<ColorSpinorField *> &evecs,
346  std::vector<Complex> &evals, int size)
347  {
348  for (int i = 0; i < size; i++) {
349  // r = A * v_i
350  matVec(mat, *r[0], *evecs[i]);
351 
352  // lambda_i = v_i^dag A v_i / (v_i^dag * v_i)
353  evals[i] = blas::cDotProduct(*evecs[i], *r[0]) / sqrt(blas::norm2(*evecs[i]));
354 
355  // Measure ||lambda_i*v_i - A*v_i||
356  Complex n_unit(-1.0, 0.0);
357  blas::caxpby(evals[i], *evecs[i], n_unit, *r[0]);
358  residua[i] = sqrt(blas::norm2(*r[0]));
359  }
360  }
361 
362  void EigenSolver::loadVectors(std::vector<ColorSpinorField *> &eig_vecs, std::string vec_infile)
363  {
364  // profile.TPSTOP(QUDA_PROFILE_COMPUTE);
365  // profile.TPSTART(QUDA_PROFILE_IO);
366 
367 #ifdef HAVE_QIO
368  const int Nvec = eig_vecs.size();
369  if (strcmp(vec_infile.c_str(), "") != 0) {
370  if (getVerbosity() >= QUDA_SUMMARIZE)
371  printfQuda("Start loading %04d vectors from %s\n", Nvec, vec_infile.c_str());
372 
373  std::vector<ColorSpinorField *> tmp;
374  if (eig_vecs[0]->Location() == QUDA_CUDA_FIELD_LOCATION) {
375  ColorSpinorParam csParam(*eig_vecs[0]);
377  csParam.setPrecision(eig_vecs[0]->Precision() < QUDA_SINGLE_PRECISION ? QUDA_SINGLE_PRECISION :
378  eig_vecs[0]->Precision());
380  csParam.create = QUDA_NULL_FIELD_CREATE;
381  for (int i = 0; i < Nvec; i++) { tmp.push_back(ColorSpinorField::Create(csParam)); }
382  } else {
383  for (int i = 0; i < Nvec; i++) { tmp.push_back(eig_vecs[i]); }
384  }
385 
386  void **V = static_cast<void **>(safe_malloc(Nvec * sizeof(void *)));
387  for (int i = 0; i < Nvec; i++) {
388  V[i] = tmp[i]->V();
389  if (V[i] == NULL) {
390  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Could not allocate space for eigenVector[%d]\n", i);
391  }
392  }
393 
394  read_spinor_field(vec_infile.c_str(), &V[0], tmp[0]->Precision(), tmp[0]->X(), tmp[0]->Ncolor(), tmp[0]->Nspin(),
395  Nvec, 0, (char **)0);
396 
397  host_free(V);
398  if (eig_vecs[0]->Location() == QUDA_CUDA_FIELD_LOCATION) {
399  for (int i = 0; i < Nvec; i++) {
400  *eig_vecs[i] = *tmp[i];
401  delete tmp[i];
402  }
403  }
404 
405  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Done loading vectors\n");
406  } else {
407  errorQuda("No eigenspace input file defined.");
408  }
409 #else
410  errorQuda("\nQIO library was not built.\n");
411 #endif
412  // profile.TPSTOP(QUDA_PROFILE_IO);
413  // profile.TPSTART(QUDA_PROFILE_COMPUTE);
414  }
415 
416  void EigenSolver::saveVectors(const std::vector<ColorSpinorField *> &eig_vecs, std::string vec_outfile)
417  {
418  // profile.TPSTOP(QUDA_PROFILE_COMPUTE);
419  // profile.TPSTART(QUDA_PROFILE_IO);
420 
421 #ifdef HAVE_QIO
422  const int Nvec = eig_vecs.size();
423  std::vector<ColorSpinorField *> tmp;
424  if (eig_vecs[0]->Location() == QUDA_CUDA_FIELD_LOCATION) {
425  ColorSpinorParam csParam(*eig_vecs[0]);
427  csParam.setPrecision(eig_vecs[0]->Precision() < QUDA_SINGLE_PRECISION ? QUDA_SINGLE_PRECISION :
428  eig_vecs[0]->Precision());
430  csParam.create = QUDA_NULL_FIELD_CREATE;
431  for (int i = 0; i < Nvec; i++) {
432  tmp.push_back(ColorSpinorField::Create(csParam));
433  *tmp[i] = *eig_vecs[i];
434  }
435  } else {
436  for (int i = 0; i < Nvec; i++) { tmp.push_back(eig_vecs[i]); }
437  }
438 
439  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Start saving %d vectors to %s\n", Nvec, vec_outfile.c_str());
440 
441  void **V = static_cast<void **>(safe_malloc(Nvec * sizeof(void *)));
442  for (int i = 0; i < Nvec; i++) {
443  V[i] = tmp[i]->V();
444  if (V[i] == NULL) {
445  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Could not allocate space for eigenVector[%04d]\n", i);
446  }
447  }
448 
449  write_spinor_field(vec_outfile.c_str(), &V[0], tmp[0]->Precision(), tmp[0]->X(), tmp[0]->Ncolor(), tmp[0]->Nspin(),
450  Nvec, 0, (char **)0);
451 
452  host_free(V);
453  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Done saving vectors\n");
454  if (eig_vecs[0]->Location() == QUDA_CUDA_FIELD_LOCATION) {
455  for (int i = 0; i < Nvec; i++) delete tmp[i];
456  }
457 
458 #else
459  errorQuda("\nQIO library was not built.\n");
460 #endif
461  // profile.TPSTOP(QUDA_PROFILE_IO);
462  // profile.TPSTART(QUDA_PROFILE_COMPUTE);
463  }
464 
465  void EigenSolver::loadFromFile(const DiracMatrix &mat, std::vector<ColorSpinorField *> &kSpace,
466  std::vector<Complex> &evals)
467  {
468  // Make an array of size nConv
469  std::vector<ColorSpinorField *> vecs_ptr;
470  for (int i = 0; i < nConv; i++) { vecs_ptr.push_back(kSpace[i]); }
471  loadVectors(vecs_ptr, eig_param->vec_infile);
472 
473  // Create the device side residual vector by cloning
474  // the kSpace passed to the function.
475  ColorSpinorParam csParam(*kSpace[0]);
476  csParam.create = QUDA_ZERO_FIELD_CREATE;
477  r.push_back(ColorSpinorField::Create(csParam));
478 
479  // Error estimates (residua) given by ||A*vec - lambda*vec||
480  computeEvals(mat, kSpace, evals, nConv);
481  for (int i = 0; i < nConv; i++) {
482  if (getVerbosity() >= QUDA_SUMMARIZE) {
483  printfQuda("EigValue[%04d]: (%+.16e, %+.16e) residual %.16e\n", i, evals[i].real(), evals[i].imag(), residua[i]);
484  }
485  }
486 
487  delete r[0];
488  }
489 
491  {
492  if (tmp1) delete tmp1;
493  if (tmp2) delete tmp2;
495  host_free(Qmat);
496  }
497  //-----------------------------------------------------------------------------
498  //-----------------------------------------------------------------------------
499 
500  // Thick Restarted Lanczos Method constructor
502  EigenSolver(eig_param, profile),
503  mat(mat)
504  {
505  profile.TPSTART(QUDA_PROFILE_INIT);
506 
507  // Tridiagonal/Arrow matrix
508  alpha = (double *)safe_malloc(nKr * sizeof(double));
509  beta = (double *)safe_malloc(nKr * sizeof(double));
510  for (int i = 0; i < nKr; i++) {
511  alpha[i] = 0.0;
512  beta[i] = 0.0;
513  }
514 
515  // Thick restart specific checks
516  if (nKr < nEv + 6) errorQuda("nKr=%d must be greater than nEv+6=%d\n", nKr, nEv + 6);
517 
518  if (!(eig_param->spectrum == QUDA_SPECTRUM_LR_EIG || eig_param->spectrum == QUDA_SPECTRUM_SR_EIG)) {
519  errorQuda("Only real spectrum type (LR or SR) can be passed to the TR Lanczos solver");
520  }
521 
522  profile.TPSTOP(QUDA_PROFILE_INIT);
523  }
524 
525  void TRLM::operator()(std::vector<ColorSpinorField *> &kSpace, std::vector<Complex> &evals)
526  {
527  // Check to see if we are loading eigenvectors
528  if (strcmp(eig_param->vec_infile, "") != 0) {
529  printfQuda("Loading evecs from file name %s\n", eig_param->vec_infile);
530  loadFromFile(mat, kSpace, evals);
531  return;
532  }
533 
534  // Test for an initial guess
535  double norm = sqrt(blas::norm2(*kSpace[0]));
536  if (norm == 0) {
537  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Initial residual is zero. Populating with rands.\n");
538  if (kSpace[0]->Location() == QUDA_CPU_FIELD_LOCATION) {
539  kSpace[0]->Source(QUDA_RANDOM_SOURCE);
540  } else {
541  RNG *rng = new RNG(*kSpace[0], 1234);
542  rng->Init();
543  spinorNoise(*kSpace[0], *rng, QUDA_NOISE_UNIFORM);
544  rng->Release();
545  delete rng;
546  }
547  }
548 
549  // Normalise initial guess
550  norm = sqrt(blas::norm2(*kSpace[0]));
551  blas::ax(1.0 / norm, *kSpace[0]);
552 
553  // Create a device side residual vector by cloning
554  // the kSpace passed to the function.
555  ColorSpinorParam csParamClone(*kSpace[0]);
556  csParam = csParamClone;
557  // Increase Krylov space to nKr+1 one vector, create residual
558  for (int i = nConv; i < nKr + 1; i++) kSpace.push_back(ColorSpinorField::Create(csParam));
560  r.push_back(ColorSpinorField::Create(csParam));
561  // Increase evals space to nEv
562  for (int i = nConv; i < nEv; i++) evals.push_back(0.0);
563  //---------------------------------------------------------------------------
564 
565  // Convergence and locking criteria
566  double mat_norm = 0.0;
567  double epsilon = DBL_EPSILON;
568  QudaPrecision prec = kSpace[0]->Precision();
569  switch (prec) {
571  epsilon = DBL_EPSILON;
572  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Running Eigensolver in double precision\n");
573  break;
575  epsilon = FLT_EPSILON;
576  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Running Eigensolver in single precision\n");
577  break;
578  case QUDA_HALF_PRECISION:
579  epsilon = 2e-3;
580  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Running Eigensolver in half precision\n");
581  break;
583  epsilon = 5e-2;
584  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Running Eigensolver in quarter precision\n");
585  break;
586  default: errorQuda("Invalid precision %d", prec);
587  }
588 
589  // Begin TRLM Eigensolver computation
590  //---------------------------------------------------------------------------
591  if (getVerbosity() >= QUDA_SUMMARIZE) {
592  printfQuda("*****************************\n");
593  printfQuda("**** START TRLM SOLUTION ****\n");
594  printfQuda("*****************************\n");
595  }
596 
597  profile.TPSTART(QUDA_PROFILE_COMPUTE);
598 
599  // Loop over restart iterations.
600  while (restart_iter < max_restarts && !converged) {
601 
602  for (int step = num_keep; step < nKr; step++) lanczosStep(kSpace, step);
603  iter += (nKr - num_keep);
604  // if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Restart %d complete\n", restart_iter+1);
605 
606  int arrow_pos = std::max(num_keep - num_locked + 1, 2);
607  // The eigenvalues are returned in the alpha array
610  profile.TPSTART(QUDA_PROFILE_COMPUTE);
611 
612  // mat_norm is updated.
613  for (int i = num_locked; i < nKr; i++)
614  if (fabs(alpha[i]) > mat_norm) mat_norm = fabs(alpha[i]);
615 
616  // Locking check
617  iter_locked = 0;
618  for (int i = 1; i < (nKr - num_locked); i++) {
619  if (residua[i + num_locked] < epsilon * mat_norm) {
621  printfQuda("**** Locking %d resid=%+.6e condition=%.6e ****\n", i, residua[i + num_locked],
622  epsilon * mat_norm);
623  iter_locked = i;
624  } else {
625  // Unlikely to find new locked pairs
626  break;
627  }
628  }
629 
630  // Convergence check
632  for (int i = iter_locked + 1; i < nKr - num_locked; i++) {
633  if (residua[i + num_locked] < tol * mat_norm) {
635  printfQuda("**** Converged %d resid=%+.6e condition=%.6e ****\n", i, residua[i + num_locked], tol * mat_norm);
636  iter_converged = i;
637  } else {
638  // Unlikely to find new converged pairs
639  break;
640  }
641  }
642 
643  iter_keep = std::min(iter_converged + (nKr - num_converged) / 2, nKr - num_locked - 12);
644 
645  computeKeptRitz(kSpace);
646 
647  num_converged = num_locked + iter_converged;
648  num_keep = num_locked + iter_keep;
649  num_locked += iter_locked;
650 
651  if (getVerbosity() >= QUDA_VERBOSE) {
652  // printfQuda("iter Conv = %d\n", iter_converged);
653  // printfQuda("iter Keep = %d\n", iter_keep);
654  // printfQuda("iter Lock = %d\n", iter_locked);
655  printfQuda("%04d converged eigenvalues at restart iter %04d\n", num_converged, restart_iter + 1);
656  // printfQuda("num_converged = %d\n", num_converged);
657  // printfQuda("num_keep = %d\n", num_keep);
658  // printfQuda("num_locked = %d\n", num_locked);
659  }
660 
661  if (getVerbosity() >= QUDA_VERBOSE) {
662  for (int i = 0; i < nKr; i++) {
663  // printfQuda("Ritz[%d] = %.16e residual[%d] = %.16e\n", i, alpha[i], i, residua[i]);
664  }
665  }
666 
667  // Check for convergence
668  if (num_converged >= nConv) {
669  reorder(kSpace);
670  converged = true;
671  }
672 
673  restart_iter++;
674  }
675 
677 
679  printfQuda("kSpace size at convergence/max restarts = %d\n", (int)kSpace.size());
680  // Prune the Krylov space back to size when passed to eigensolver
681  for (unsigned int i = nConv; i < kSpace.size(); i++) { delete kSpace[i]; }
682  kSpace.resize(nConv);
683  evals.resize(nConv);
684 
685  // Post computation report
686  //---------------------------------------------------------------------------
687  if (!converged) {
689  errorQuda("TRLM failed to compute the requested %d vectors with a %d search space and %d Krylov space in %d "
690  "restart steps. Exiting.",
691  nConv, nEv, nKr, max_restarts);
692  } else {
693  warningQuda("TRLM failed to compute the requested %d vectors with a %d search space and %d Krylov space in %d "
694  "restart steps. Continuing with current lanczos factorisation.",
695  nConv, nEv, nKr, max_restarts);
696  }
697  } else {
698  if (getVerbosity() >= QUDA_SUMMARIZE) {
699  printfQuda("TRLM computed the requested %d vectors in %d restart steps and %d OP*x operations.\n", nConv,
700  restart_iter, iter);
701 
702  // Dump all Ritz values and residua
703  for (int i = 0; i < nConv; i++) {
704  printfQuda("RitzValue[%04d]: (%+.16e, %+.16e) residual %.16e\n", i, alpha[i], 0.0, residua[i]);
705  }
706  }
707 
708  // Compute eigenvalues
709  computeEvals(mat, kSpace, evals, nConv);
710  if (getVerbosity() >= QUDA_SUMMARIZE) {
711  for (int i = 0; i < nConv; i++) {
712  printfQuda("EigValue[%04d]: (%+.16e, %+.16e) residual %.16e\n", i, evals[i].real(), evals[i].imag(),
713  residua[i]);
714  }
715  }
716  }
717 
718  // Local clean-up
719  delete r[0];
720 
721  // Only save if outfile is defined
722  if (strcmp(eig_param->vec_outfile, "") != 0) {
723  if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("saving eigenvectors\n");
724  // Make an array of size nConv
725  std::vector<ColorSpinorField *> vecs_ptr;
726  for (int i = 0; i < nConv; i++) { vecs_ptr.push_back(kSpace[i]); }
727  saveVectors(vecs_ptr, eig_param->vec_outfile);
728  }
729 
730  if (getVerbosity() >= QUDA_SUMMARIZE) {
731  printfQuda("*****************************\n");
732  printfQuda("***** END TRLM SOLUTION *****\n");
733  printfQuda("*****************************\n");
734  }
735  }
736 
737  // Destructor
739  {
740  ritz_mat.clear();
741  ritz_mat.shrink_to_fit();
742  host_free(alpha);
743  host_free(beta);
744  }
745 
746  // Thick Restart Member functions
747  //---------------------------------------------------------------------------
748  void TRLM::lanczosStep(std::vector<ColorSpinorField *> v, int j)
749  {
750  // Compute r = A * v_j - b_{j-i} * v_{j-1}
751  // r = A * v_j
752 
753  chebyOp(mat, *r[0], *v[j]);
754 
755  // a_j = v_j^dag * r
756  alpha[j] = blas::reDotProduct(*v[j], *r[0]);
757 
758  // r = r - a_j * v_j
759  blas::axpy(-alpha[j], *v[j], *r[0]);
760 
761  int start = (j > num_keep) ? j - 1 : 0;
762  for (int i = start; i < j; i++) {
763 
764  // r = r - b_{j-1} * v_{j-1}
765  blas::axpy(-beta[i], *v[i], *r[0]);
766  }
767 
768  // Orthogonalise r against the Krylov space
769  if (j > 0)
770  for (int k = 0; k < 1; k++) blockOrthogonalize(v, r, j);
771 
772  // b_j = ||r||
773  beta[j] = sqrt(blas::norm2(*r[0]));
774 
775  // Prepare next step.
776  // v_{j+1} = r / b_j
777  blas::zero(*v[j + 1]);
778  blas::axpy(1.0 / beta[j], *r[0], *v[j + 1]);
779  }
780 
781  void TRLM::reorder(std::vector<ColorSpinorField *> &kSpace)
782  {
783  int i = 0;
784 
785  if (reverse) {
786  while (i < nKr) {
787  if ((i == 0) || (alpha[i - 1] >= alpha[i]))
788  i++;
789  else {
790  double tmp = alpha[i];
791  alpha[i] = alpha[i - 1];
792  alpha[--i] = tmp;
793  std::swap(kSpace[i], kSpace[i - 1]);
794  }
795  }
796  } else {
797  while (i < nKr) {
798  if ((i == 0) || (alpha[i - 1] <= alpha[i]))
799  i++;
800  else {
801  double tmp = alpha[i];
802  alpha[i] = alpha[i - 1];
803  alpha[--i] = tmp;
804  std::swap(kSpace[i], kSpace[i - 1]);
805  }
806  }
807  }
808  }
809 
810  void TRLM::eigensolveFromArrowMat(int num_locked, int arrow_pos)
811  {
812  profile.TPSTART(QUDA_PROFILE_EIGEN);
813  int dim = nKr - num_locked;
814 
815  // Eigen objects
816  MatrixXd A = MatrixXd::Zero(dim, dim);
817  ritz_mat.resize(dim * dim);
818  for (int i = 0; i < dim * dim; i++) ritz_mat[i] = 0.0;
819 
820  // Invert the spectrum due to chebyshev
821  if (reverse) {
822  for (int i = num_locked; i < nKr - 1; i++) {
823  alpha[i] *= -1.0;
824  beta[i] *= -1.0;
825  }
826  alpha[nKr - 1] *= -1.0;
827  }
828 
829  // Construct arrow mat A_{dim,dim}
830  for (int i = 0; i < dim; i++) {
831 
832  // alpha populates the diagonal
833  A(i, i) = alpha[i + num_locked];
834  }
835 
836  for (int i = 0; i < arrow_pos - 1; i++) {
837 
838  // beta populates the arrow
839  A(i, arrow_pos - 1) = beta[i + num_locked];
840  A(arrow_pos - 1, i) = beta[i + num_locked];
841  }
842 
843  for (int i = arrow_pos - 1; i < dim - 1; i++) {
844 
845  // beta populates the sub-diagonal
846  A(i, i + 1) = beta[i + num_locked];
847  A(i + 1, i) = beta[i + num_locked];
848  }
849 
850  // Eigensolve the arrow matrix
851  SelfAdjointEigenSolver<MatrixXd> eigensolver;
852  eigensolver.compute(A);
853 
854  // repopulate ritz matrix
855  for (int i = 0; i < dim; i++)
856  for (int j = 0; j < dim; j++) ritz_mat[dim * i + j] = eigensolver.eigenvectors().col(i)[j];
857 
858  for (int i = 0; i < dim; i++) {
859  residua[i + num_locked] = fabs(beta[nKr - 1] * eigensolver.eigenvectors().col(i)[dim - 1]);
860  // Update the alpha array
861  alpha[i + num_locked] = eigensolver.eigenvalues()[i];
862  }
863 
864  // Put spectrum back in order
865  if (reverse) {
866  for (int i = num_locked; i < nKr; i++) { alpha[i] *= -1.0; }
867  }
868 
869  profile.TPSTOP(QUDA_PROFILE_EIGEN);
870  }
871 
872  void TRLM::computeKeptRitz(std::vector<ColorSpinorField *> &kSpace)
873  {
874 
875  int offset = nKr + 1;
876  int dim = nKr - num_locked;
877 
878  if ((int)kSpace.size() < offset + iter_keep) {
879  for (int i = kSpace.size(); i < offset + iter_keep; i++) {
880  if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printfQuda("Adding %d vector to kSpace\n", i);
881  kSpace.push_back(ColorSpinorField::Create(csParam));
882  }
883  }
884 
885  // Array for multi-BLAS caxpy
886  Complex *ritz_mat_col = (Complex *)safe_malloc((dim - 1) * sizeof(Complex));
887 
888  for (int i = 0; i < iter_keep; i++) {
889  int k = offset + i;
890  *r[0] = *kSpace[num_locked];
891  blas::ax(ritz_mat[dim * i], *r[0]);
892  *kSpace[k] = *r[0];
893 
894  // Pointers to the relevant vectors
895  std::vector<ColorSpinorField *> vecs_ptr;
896  std::vector<ColorSpinorField *> kSpace_ptr;
897  kSpace_ptr.push_back(kSpace[k]);
898  for (int j = 1; j < dim; j++) {
899  vecs_ptr.push_back(kSpace[num_locked + j]);
900  ritz_mat_col[j - 1].real(ritz_mat[i * dim + j]);
901  ritz_mat_col[j - 1].imag(0.0);
902  }
903 
904  // Multi-BLAS axpy
905  blas::caxpy(ritz_mat_col, vecs_ptr, kSpace_ptr);
906  }
907 
908  host_free(ritz_mat_col);
909 
910  for (int i = 0; i < iter_keep; i++) *kSpace[i + num_locked] = *kSpace[offset + i];
911  *kSpace[num_locked + iter_keep] = *kSpace[nKr];
912 
913  for (int i = 0; i < iter_keep; i++) beta[i + num_locked] = beta[nKr - 1] * ritz_mat[dim * (i + 1) - 1];
914  }
915 } // namespace quda
cudaColorSpinorField * tmp2
void ax(double a, ColorSpinorField &x)
Definition: blas_quda.cu:508
void Init()
Initialize CURAND RNG states.
Definition: random.cu:122
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
cudaColorSpinorField * tmp1
enum QudaPrecision_s QudaPrecision
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
void eigensolveFromArrowMat(int nLocked, int arror_pos)
Get the eigendecomposition from the arrow matrix.
#define errorQuda(...)
Definition: util_quda.h:121
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:721
#define host_free(ptr)
Definition: malloc_quda.h:71
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
Definition: reduce_quda.cu:764
double epsilon
Definition: test_util.cpp:1649
void deflate(std::vector< ColorSpinorField *> vec_defl, std::vector< ColorSpinorField *> vec, std::vector< ColorSpinorField *> evecs, std::vector< Complex > evals)
Deflate vector with Eigenvectors.
void chebyOp(const DiracMatrix &mat, ColorSpinorField &out, const ColorSpinorField &in)
Promoted the specified matVec operation: M, Mdag, MMdag, MdagM to a Chebyshev polynomial.
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44
static ColorSpinorField * Create(const ColorSpinorParam &param)
TRLM(QudaEigParam *eig_param, const DiracMatrix &mat, TimeProfile &profile)
Constructor for Thick Restarted Eigensolver class.
static void loadVectors(std::vector< ColorSpinorField *> &eig_vecs, std::string file)
Load vectors from file.
void caxpbypczw(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, const Complex &c, ColorSpinorField &z, ColorSpinorField &w)
Definition: blas_quda.cu:528
double reDotProduct(ColorSpinorField &x, ColorSpinorField &y)
Definition: reduce_quda.cu:728
void computeKeptRitz(std::vector< ColorSpinorField *> &kSpace)
Get the eigen-decomposition from the arrow matrix.
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: copy_quda.cu:355
void operator()(std::vector< ColorSpinorField *> &kSpace, std::vector< Complex > &evals)
Compute eigenpairs.
__host__ __device__ void sum(double &a, double &b)
Definition: blas_helper.cuh:62
void lanczosStep(std::vector< ColorSpinorField *> v, int j)
Lanczos step: extends the Kylov space.
QudaBoolean use_poly_acc
Definition: quda.h:387
QudaGaugeParam param
Definition: pack_test.cpp:17
int nConv
Definition: quda.h:420
static void saveVectors(const std::vector< ColorSpinorField *> &eig_vecs, std::string file)
Save vectors to file.
QudaEigType eig_type
Definition: quda.h:384
void deflateSVD(std::vector< ColorSpinorField *> vec_defl, std::vector< ColorSpinorField *> vec, std::vector< ColorSpinorField *> evecs, std::vector< Complex > evals)
Deflate vector with both left and Right singular vectors.
const DiracMatrix & mat
QudaBoolean require_convergence
Definition: quda.h:408
QudaFieldLocation location
ColorSpinorField * tmp1
static EigenSolver * create(QudaEigParam *eig_param, const DiracMatrix &mat, TimeProfile &profile)
Creates the eigensolver using the parameters given and the matrix.
double * beta
void Release()
Release Device memory for CURAND RNG states.
Definition: random.cu:145
ColorSpinorParam csParam
Definition: pack_test.cpp:24
void loadFromFile(const DiracMatrix &mat, std::vector< ColorSpinorField *> &eig_vecs, std::vector< Complex > &evals)
Load and check eigenpairs from file.
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:35
cpuColorSpinorField * in
Class declaration to initialize and hold CURAND RNG states.
Definition: random_quda.h:23
constexpr int size
ColorSpinorParam csParam
#define warningQuda(...)
Definition: util_quda.h:133
int poly_deg
Definition: quda.h:390
double a_min
Definition: quda.h:393
void matVec(const DiracMatrix &mat, ColorSpinorField &out, const ColorSpinorField &in)
Applies the specified matVec operation: M, Mdag, MMdag, MdagM.
std::vector< double > ritz_mat
void computeSVD(const DiracMatrix &mat, std::vector< ColorSpinorField *> &evecs, std::vector< Complex > &evals)
Computes Left/Right SVD from pre computed Right/Left.
std::complex< double > Complex
Definition: quda_internal.h:46
TimeProfile & profile
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.cu:512
#define safe_malloc(size)
Definition: malloc_quda.h:66
void zero(ColorSpinorField &a)
Definition: blas_quda.cu:472
int V
Definition: test_util.cpp:27
Complex blockOrthogonalize(std::vector< ColorSpinorField *> v, std::vector< ColorSpinorField *> r, int j)
Orthogonalise input vector r against vector space v using block-BLAS.
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const =0
ColorSpinorField * tmp2
char vec_outfile[256]
Definition: quda.h:462
double * alpha
virtual ~TRLM()
Destructor for Thick Restarted Eigensolver class.
cpuColorSpinorField * out
double tol
Definition: quda.h:422
DEVICEHOST void swap(Real &a, Real &b)
Definition: svd_quda.h:139
__shared__ float s[]
std::vector< ColorSpinorField * > r
#define printfQuda(...)
Definition: util_quda.h:115
void reorder(std::vector< ColorSpinorField *> &kSpace)
Reorder the Krylov space by eigenvalue.
void write_spinor_field(const char *filename, void *V[], QudaPrecision precision, const int *X, int nColor, int nSpin, int Nvec, int argc, char *argv[])
Definition: qio_field.h:29
int check_interval
Definition: quda.h:424
void caxpby(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y)
Definition: blas_quda.cu:523
int max_restarts
Definition: quda.h:426
QudaEigSpectrumType spectrum
Definition: quda.h:411
__host__ __device__ ValueType abs(ValueType x)
Definition: complex_quda.h:125
void computeEvals(const DiracMatrix &mat, std::vector< ColorSpinorField *> &evecs, std::vector< Complex > &evals, int k)
Compute eigenvalues and their residiua.
bool flags
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
double a_max
Definition: quda.h:394
QudaPrecision prec
Definition: test_util.cpp:1608
void read_spinor_field(const char *filename, void *V[], QudaPrecision precision, const int *X, int nColor, int nSpin, int Nvec, int argc, char *argv[])
Definition: qio_field.h:24
char vec_infile[256]
Definition: quda.h:459
const Dirac * Expose() const
Definition: dirac_quda.h:1135
void spinorNoise(ColorSpinorField &src, RNG &randstates, QudaNoiseType type)
Generate a random noise spinor. This variant allows the user to manage the RNG state.
EigenSolver(QudaEigParam *eig_param, TimeProfile &profile)
Constructor for base Eigensolver class.
QudaEigParam * eig_param
Thick Restarted Lanczos Method.