QUDA  v1.1.0
A library for QCD on GPUs
eig_block_trlm.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <iostream>
5 #include <vector>
6 #include <algorithm>
7 
8 #include <quda_internal.h>
9 #include <eigensolve_quda.h>
10 #include <qio_field.h>
11 #include <color_spinor_field.h>
12 #include <blas_quda.h>
13 #include <util_quda.h>
14 #include <eigen_helper.h>
15 
16 namespace quda
17 {
18  // Thick Restarted Block Lanczos Method constructor
19  BLKTRLM::BLKTRLM(const DiracMatrix &mat, QudaEigParam *eig_param, TimeProfile &profile) :
20  TRLM(mat, eig_param, profile)
21  {
22  bool profile_running = profile.isRunning(QUDA_PROFILE_INIT);
23  if (!profile_running) profile.TPSTART(QUDA_PROFILE_INIT);
24 
25  // Block Thick restart specific checks
26  if (n_kr < n_ev + 6) errorQuda("n_kr=%d must be greater than n_ev+6=%d\n", n_kr, n_ev + 6);
27 
29  errorQuda("Only real spectrum type (LR or SR) can be passed to the TR Lanczos solver");
30  }
31 
32  if (n_kr % block_size != 0) {
33  errorQuda("Block size %d is not a factor of the Krylov space size %d", block_size, n_kr);
34  }
35 
36  if (n_ev % block_size != 0) {
37  errorQuda("Block size %d is not a factor of the compressed space %d", block_size, n_ev);
38  }
39 
40  if (block_size == 0) { errorQuda("Block size %d passed to block eigensolver", block_size); }
41 
42  int n_blocks = n_kr / block_size;
44  int arrow_mat_array_size = block_data_length * n_blocks;
45  // Tridiagonal/Arrow matrix
46  block_alpha = (Complex *)safe_malloc(arrow_mat_array_size * sizeof(Complex));
47  block_beta = (Complex *)safe_malloc(arrow_mat_array_size * sizeof(Complex));
48  for (int i = 0; i < arrow_mat_array_size; i++) {
49  block_alpha[i] = 0.0;
50  block_beta[i] = 0.0;
51  }
52 
53  // Temp storage used in blockLanczosStep
55 
56  if (!profile_running) profile.TPSTOP(QUDA_PROFILE_INIT);
57  }
58 
59  void BLKTRLM::operator()(std::vector<ColorSpinorField *> &kSpace, std::vector<Complex> &evals)
60  {
61  // In case we are deflating an operator, save the tunechache from the inverter
62  saveTuneCache();
63 
64  // Pre-launch checks and preparation
65  //---------------------------------------------------------------------------
66  if (getVerbosity() >= QUDA_VERBOSE) queryPrec(kSpace[0]->Precision());
67  // Check to see if we are loading eigenvectors
68  if (strcmp(eig_param->vec_infile, "") != 0) {
69  printfQuda("Loading evecs from file name %s\n", eig_param->vec_infile);
70  loadFromFile(mat, kSpace, evals);
71  return;
72  }
73 
74  // Check for an initial guess. If none present, populate with rands, then
75  // orthonormalise
76  // DMH: This is an important step. With block solvers, initial guesses
77  // of block sizes N can be subspaces rich in extremal eigenmodes,
78  // N times more rich than non-blocked solvers.
79  // Final paragraph, IV.B https://arxiv.org/pdf/1902.02064.pdf
80  prepareInitialGuess(kSpace);
81 
82  // Increase the size of kSpace passed to the function, will be trimmed to
83  // original size before exit.
84  prepareKrylovSpace(kSpace, evals);
85 
86  // Check for Chebyshev maximum estimation
87  checkChebyOpMax(mat, kSpace);
88 
89  // Convergence and locking criteria
90  double mat_norm = 0.0;
91  double epsilon = setEpsilon(kSpace[0]->Precision());
92 
93  // Print Eigensolver params
95  //---------------------------------------------------------------------------
96 
97  // Begin BLOCK TRLM Eigensolver computation
98  //---------------------------------------------------------------------------
100 
101  // Loop over restart iterations.
102  while (restart_iter < max_restarts && !converged) {
103 
104  for (int step = num_keep; step < n_kr; step += block_size) blockLanczosStep(kSpace, step);
105  iter += (n_kr - num_keep);
106 
107  // Solve current block tridiag
110  profile.TPSTART(QUDA_PROFILE_COMPUTE);
111 
112  // mat_norm is updated.
113  for (int i = num_locked; i < n_kr; i++)
114  if (fabs(alpha[i]) > mat_norm) mat_norm = fabs(alpha[i]);
115 
116  // Locking check
117  iter_locked = 0;
118  for (int i = 1; i < (n_kr - num_locked); i++) {
119  if (residua[i + num_locked] < epsilon * mat_norm) {
121  printfQuda("**** Locking %d resid=%+.6e condition=%.6e ****\n", i, residua[i + num_locked],
122  epsilon * mat_norm);
123  iter_locked = i;
124  } else {
125  // Unlikely to find new locked pairs
126  break;
127  }
128  }
129 
130  // Convergence check
132  for (int i = iter_locked + 1; i < n_kr - num_locked; i++) {
133  if (residua[i + num_locked] < tol * mat_norm) {
135  printfQuda("**** Converged %d resid=%+.6e condition=%.6e ****\n", i, residua[i + num_locked], tol * mat_norm);
136  iter_converged = i;
137  } else {
138  // Unlikely to find new converged pairs
139  break;
140  }
141  }
142 
143  // In order to maintain the block structure, we truncate the
144  // algorithmic variables to be multiples of the block size
145  iter_keep = std::min(iter_converged + (n_kr - num_converged) / 2, n_kr - num_locked - 12);
148  computeBlockKeptRitz(kSpace);
149  profile.TPSTART(QUDA_PROFILE_COMPUTE);
150 
154 
155  // In order to maintain the block structure, we truncate the
156  // algorithmic variables to be multiples of the block size
160 
161  if (getVerbosity() >= QUDA_VERBOSE) {
162  printfQuda("%04d converged eigenvalues at restart iter %04d\n", num_converged, restart_iter + 1);
163  }
164 
165  if (getVerbosity() >= QUDA_DEBUG_VERBOSE) {
166  printfQuda("iter Conv = %d\n", iter_converged);
167  printfQuda("iter Keep = %d\n", iter_keep);
168  printfQuda("iter Lock = %d\n", iter_locked);
169  printfQuda("num_converged = %d\n", num_converged);
170  printfQuda("num_keep = %d\n", num_keep);
171  printfQuda("num_locked = %d\n", num_locked);
172  for (int i = 0; i < n_kr; i++) {
173  printfQuda("Ritz[%d] = %.16e residual[%d] = %.16e\n", i, alpha[i], i, residua[i]);
174  }
175  }
176 
177  // Check for convergence
178  if (num_converged >= n_conv) converged = true;
179  restart_iter++;
180  }
181 
183 
184  // Post computation report
185  //---------------------------------------------------------------------------
186  if (!converged) {
188  errorQuda("BLOCK TRLM failed to compute the requested %d vectors with a %d search space, %d block size, "
189  "and %d Krylov space in %d restart steps. Exiting.",
191  } else {
192  warningQuda("BLOCK TRLM failed to compute the requested %d vectors with a %d search space, %d block size, "
193  "and %d Krylov space in %d restart steps. Continuing with current lanczos factorisation.",
195  }
196  } else {
197  if (getVerbosity() >= QUDA_SUMMARIZE) {
198  printfQuda("BLOCK TRLM computed the requested %d vectors in %d restart steps with %d block size and "
199  "%d BLOCKED OP*x operations.\n",
201 
202  // Dump all Ritz values and residua
203  for (int i = 0; i < n_conv; i++) {
204  printfQuda("RitzValue[%04d]: (%+.16e, %+.16e) residual %.16e\n", i, alpha[i], 0.0, residua[i]);
205  }
206  }
207 
208  // Compute eigenvalues
209  computeEvals(mat, kSpace, evals);
210  }
211 
212  // Local clean-up
213  cleanUpEigensolver(kSpace, evals);
214  }
215 
216  // Destructor
218  {
222  }
223 
224  // Block Thick Restart Member functions
225  //---------------------------------------------------------------------------
226  void BLKTRLM::blockLanczosStep(std::vector<ColorSpinorField *> v, int j)
227  {
228  // Compute r = A * v_j - b_{j-i} * v_{j-1}
229 
230  // Offset for alpha, beta matrices
231  int arrow_offset = j * block_size;
232  int idx = 0, idx_conj = 0;
233 
234  // r = A * v_j
235  for (int b = 0; b < block_size; b++) chebyOp(mat, *r[b], *v[j + b]);
236 
237  // r = r - b_{j-1} * v_{j-1}
238  int start = (j > num_keep) ? j - block_size : 0;
239  if (j - start > 0) {
240 
241  std::vector<ColorSpinorField *> r_;
242  r_.reserve(block_size);
243  for (int i = 0; i < block_size; i++) r_.push_back(r[i]);
244 
245  int blocks = (j - start) / block_size;
246  std::vector<Complex> beta_;
247  beta_.reserve(blocks * block_data_length);
248 
249  // Switch beta block order from COLUMN to ROW major
250  // This switches the block from upper to lower triangular
251  for (int i = 0; i < blocks; i++) {
252  int block_offset = (i + start / block_size) * block_data_length;
253  for (int b = 0; b < block_size; b++) {
254  for (int c = 0; c < block_size; c++) {
255  idx = c * block_size + b;
256  beta_.push_back(-block_beta[block_offset + idx]);
257  }
258  }
259  }
260 
261  std::vector<ColorSpinorField *> v_;
262  v_.reserve(j - start);
263  for (int i = start; i < j; i++) { v_.push_back(v[i]); }
264  if (blocks == 1)
265  blas::caxpy_L(beta_.data(), v_, r_);
266  else
267  blas::caxpy(beta_.data(), v_, r_);
268  }
269 
270  // a_j = v_j^dag * r
271  std::vector<ColorSpinorField *> vecs_ptr;
272  vecs_ptr.reserve(block_size);
273  for (int b = 0; b < block_size; b++) { vecs_ptr.push_back(v[j + b]); }
274  // Block dot products stored in alpha_block.
275  blas::cDotProduct(block_alpha + arrow_offset, vecs_ptr, r);
276 
277  // Use jth_block to negate alpha data and apply block BLAS.
278  // Data is in square hermitian form, no need to switch to ROW major
279  for (int b = 0; b < block_size; b++) {
280  for (int c = 0; c < block_size; c++) {
281  idx = b * block_size + c;
282  jth_block[idx] = -1.0 * block_alpha[arrow_offset + idx];
283  }
284  }
285 
286  // r = r - a_j * v_j
287  blas::caxpy(jth_block, vecs_ptr, r);
288 
289  // Orthogonalise R[0:block_size] against the Krylov space V[0:j + block_size]
290  for (int k = 0; k < 1; k++) blockOrthogonalize(v, r, j + block_size);
291 
292  // QR decomposition via modified Gram-Schmidt
293  // NB, QR via modified Gram-Schmidt is numerically unstable.
294  // We perform the QR iteratively to recover numerical stability.
295  //
296  // Q_0 * R_0(V) -> Q_0 * R_0 = V
297  // Q_1 * R_1(Q_0) -> Q_1 * R_1 = V * R_0^-1 -> Q_1 * R_1 * R_0 = V
298  // ...
299  // Q_k * R_k(Q_{k-1}) -> Q_k * R_k * R_{k-1} * ... * R_0 = V
300  //
301  // Where the Q_k are orthonormal to MP and (R_k * R_{k-1} * ... * R_0)^1
302  // is the matrix that maps V -> Q_k.
303 
304  // Column major order
305  bool orthed = false;
306  int k = 0, kmax = 3;
307  while (!orthed && k < kmax) {
308  // Compute R_{k}
309  if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printfQuda("Orthing k = %d\n", k);
310  for (int b = 0; b < block_size; b++) {
311  double norm = sqrt(blas::norm2(*r[b]));
312  blas::ax(1.0 / norm, *r[b]);
313  jth_block[b * (block_size + 1)] = norm;
314  for (int c = b + 1; c < block_size; c++) {
315 
316  Complex cnorm = blas::cDotProduct(*r[b], *r[c]);
317  blas::caxpy(-cnorm, *r[b], *r[c]);
318 
319  idx = c * block_size + b;
320  idx_conj = b * block_size + c;
321 
322  jth_block[idx] = cnorm;
323  jth_block[idx_conj] = 0.0;
324  }
325  }
326  // Accumulate R_{k} products
327  updateBlockBeta(k, arrow_offset);
328  orthed = orthoCheck(r, block_size);
329  k++;
330  }
331 
332  // Prepare next step.
333  // v_{j+1} = r
334  for (int b = 0; b < block_size; b++) *v[j + block_size + b] = *r[b];
335 
336  // Save Lanczos step tuning
337  saveTuneCache();
338  }
339 
340  void BLKTRLM::updateBlockBeta(int k, int arrow_offset)
341  {
342  if (k == 0) {
343  // Copy over the jth_block matrix to block beta, Beta = R_0
344  int idx = 0;
345  for (int b = 0; b < block_size; b++) {
346  for (int c = 0; c < b + 1; c++) {
347  idx = b * block_size + c;
348  block_beta[arrow_offset + idx] = jth_block[idx];
349  }
350  }
351  } else {
352  // Compute BetaNew_ac = (R_k)_ab * Beta_bc
353  // Use Eigen, it's neater
354  MatrixXcd betaN = MatrixXcd::Zero(block_size, block_size);
355  MatrixXcd beta = MatrixXcd::Zero(block_size, block_size);
356  MatrixXcd Rk = MatrixXcd::Zero(block_size, block_size);
357  int idx = 0;
358 
359  // Populate matrices
360  for (int b = 0; b < block_size; b++) {
361  for (int c = 0; c < b + 1; c++) {
362  idx = b * block_size + c;
363  beta(c, b) = block_beta[arrow_offset + idx];
364  Rk(c, b) = jth_block[idx];
365  }
366  }
367 
368  // Multiply using Eigen
369  betaN = Rk * beta;
370 
371  // Copy back to beta array
372  for (int b = 0; b < block_size; b++) {
373  for (int c = 0; c < b + 1; c++) {
374  idx = b * block_size + c;
375  block_beta[arrow_offset + idx] = betaN(c, b);
376  }
377  }
378  }
379  }
380 
382  {
383  profile.TPSTART(QUDA_PROFILE_EIGEN);
384  int dim = n_kr - num_locked;
385  if (dim % block_size != 0) errorQuda("dim = %d modulo block_size = %d != 0", dim, block_size);
386  int blocks = dim / block_size;
387 
388  int arrow_pos = num_keep - num_locked;
389  if (arrow_pos % block_size != 0) errorQuda("arrow_pos = %d modulo block_size = %d != 0", arrow_pos, block_size);
390  int block_arrow_pos = arrow_pos / block_size;
391  int num_locked_offset = (num_locked / block_size) * block_data_length;
392 
393  // Eigen objects
394  MatrixXcd T = MatrixXcd::Zero(dim, dim);
395  block_ritz_mat.resize(dim * dim);
396  int idx = 0;
397 
398  // Populate the r and eblocks
399  for (int i = 0; i < block_arrow_pos; i++) {
400  for (int b = 0; b < block_size; b++) {
401  // E block
402  idx = i * block_size + b;
403  T(idx, idx) = alpha[idx + num_locked];
404 
405  for (int c = 0; c < block_size; c++) {
406  // r blocks
407  idx = num_locked_offset + b * block_size + c;
408  T(arrow_pos + c, i * block_size + b) = block_beta[i * block_data_length + idx];
409  T(i * block_size + b, arrow_pos + c) = conj(block_beta[i * block_data_length + idx]);
410  }
411  }
412  }
413 
414  // Add the alpha blocks
415  for (int i = block_arrow_pos; i < blocks; i++) {
416  for (int b = 0; b < block_size; b++) {
417  for (int c = 0; c < block_size; c++) {
418  idx = num_locked_offset + b * block_size + c;
419  T(i * block_size + b, i * block_size + c) = block_alpha[i * block_data_length + idx];
420  }
421  }
422  }
423 
424  // Add the beta blocks
425  for (int i = block_arrow_pos; i < blocks - 1; i++) {
426  for (int b = 0; b < block_size; b++) {
427  for (int c = 0; c < b + 1; c++) {
428  idx = num_locked_offset + b * block_size + c;
429  // Sub diag
430  T((i + 1) * block_size + c, i * block_size + b) = block_beta[i * block_data_length + idx];
431  // Super diag
432  T(i * block_size + b, (i + 1) * block_size + c) = conj(block_beta[i * block_data_length + idx]);
433  }
434  }
435  }
436 
437  // Invert the spectrum due to Chebyshev (except the arrow diagonal)
438  if (reverse) {
439  for (int b = 0; b < dim; b++) {
440  for (int c = 0; c < dim; c++) {
441  T(c, b) *= -1.0;
442  if (restart_iter > 0)
443  if (b == c && b < arrow_pos && c < arrow_pos) T(c, b) *= -1.0;
444  }
445  }
446  }
447 
448  // Eigensolve the arrow matrix
449  SelfAdjointEigenSolver<MatrixXcd> eigensolver;
450  eigensolver.compute(T);
451 
452  // Populate the alpha array with eigenvalues
453  for (int i = 0; i < dim; i++) alpha[i + num_locked] = eigensolver.eigenvalues()[i];
454 
455  // Repopulate ritz matrix: COLUMN major
456  for (int i = 0; i < dim; i++)
457  for (int j = 0; j < dim; j++) block_ritz_mat[dim * i + j] = eigensolver.eigenvectors().col(i)[j];
458 
459  for (int i = 0; i < blocks; i++) {
460  for (int b = 0; b < block_size; b++) {
461  idx = b * (block_size + 1);
463  * block_ritz_mat[dim * (i * block_size + b + 1) - 1]);
464  }
465  }
466 
467  profile.TPSTOP(QUDA_PROFILE_EIGEN);
468  }
469 
470  void BLKTRLM::computeBlockKeptRitz(std::vector<ColorSpinorField *> &kSpace)
471  {
472  int offset = n_kr + block_size;
473  int dim = n_kr - num_locked;
474 
475  // Multi-BLAS friendly array to store part of Ritz matrix we want
476  Complex *ritz_mat_keep = (Complex *)safe_malloc((dim * iter_keep) * sizeof(Complex));
477  for (int j = 0; j < dim; j++) {
478  for (int i = 0; i < iter_keep; i++) { ritz_mat_keep[j * iter_keep + i] = block_ritz_mat[i * dim + j]; }
479  }
480 
481  rotateVecsComplex(kSpace, ritz_mat_keep, offset, dim, iter_keep, num_locked, profile);
482 
483  // Update residual vectors
484  for (int i = 0; i < block_size; i++) std::swap(kSpace[num_locked + iter_keep + i], kSpace[n_kr + i]);
485 
486  // Compute new r blocks
487  // Use Eigen, it's neater
488  MatrixXcd beta = MatrixXcd::Zero(block_size, block_size);
489  MatrixXcd ri = MatrixXcd::Zero(block_size, block_size);
490  MatrixXcd ritzi = MatrixXcd::Zero(block_size, block_size);
491  int blocks = iter_keep / block_size;
492  int idx = 0;
493  int beta_offset = n_kr * block_size - block_data_length;
494  int num_locked_offset = num_locked * block_size;
495 
496  for (int b = 0; b < block_size; b++) {
497  for (int c = 0; c < b + 1; c++) {
498  idx = b * block_size + c;
499  beta(c, b) = block_beta[beta_offset + idx];
500  }
501  }
502  for (int i = 0; i < blocks; i++) {
503  for (int b = 0; b < block_size; b++) {
504  for (int c = 0; c < block_size; c++) {
505  idx = i * block_size * dim + b * dim + (dim - block_size) + c;
506  ritzi(c, b) = block_ritz_mat[idx];
507  }
508  }
509 
510  ri = beta * ritzi;
511  for (int b = 0; b < block_size; b++) {
512  for (int c = 0; c < block_size; c++) {
513  idx = num_locked_offset + b * block_size + c;
514  block_beta[i * block_data_length + idx] = ri(c, b);
515  }
516  }
517  }
518 
519  host_free(ritz_mat_keep);
520 
521  // Save Krylov rotation tuning
522  saveTuneCache();
523  }
524 
525 } // namespace quda
Complex * block_alpha
std::vector< Complex > block_ritz_mat
void operator()(std::vector< ColorSpinorField * > &kSpace, std::vector< Complex > &evals)
Compute eigenpairs.
Complex * block_beta
void eigensolveFromBlockArrowMat()
Get the eigendecomposition from the current block arrow matrix.
void computeBlockKeptRitz(std::vector< ColorSpinorField * > &kSpace)
Rotate the Ritz vectors usinng the arrow matrix eigendecomposition Uses a complex ritz matrix.
Complex * jth_block
BLKTRLM(const DiracMatrix &mat, QudaEigParam *eig_param, TimeProfile &profile)
Constructor for Thick Restarted Eigensolver class.
void updateBlockBeta(int k, int arrow_offset)
Accumulate the R products of QR into the block beta array.
virtual ~BLKTRLM()
Destructor for Thick Restarted Eigensolver class.
void blockLanczosStep(std::vector< ColorSpinorField * > v, int j)
block lanczos step: extends the Krylov space in block step
void blockOrthogonalize(std::vector< ColorSpinorField * > v, std::vector< ColorSpinorField * > &r, int j)
Orthogonalise input vectors r against vector space v using block-BLAS.
bool orthoCheck(std::vector< ColorSpinorField * > v, int j)
Check orthonormality of input vector space v.
void prepareInitialGuess(std::vector< ColorSpinorField * > &kSpace)
Check for an initial guess. If none present, populate with rands, then orthonormalise.
TimeProfile & profile
const DiracMatrix & mat
std::vector< ColorSpinorField * > r
void checkChebyOpMax(const DiracMatrix &mat, std::vector< ColorSpinorField * > &kSpace)
Check for a maximum of the Chebyshev operator.
double setEpsilon(const QudaPrecision prec)
Set the epsilon parameter.
void computeEvals(const DiracMatrix &mat, std::vector< ColorSpinorField * > &evecs, std::vector< Complex > &evals, int size)
Compute eigenvalues and their residiua.
void rotateVecsComplex(std::vector< ColorSpinorField * > &kSpace, const Complex *rot_array, const int offset, const int dim, const int keep, const int locked, TimeProfile &profile)
Rotate the Krylov space.
QudaEigParam * eig_param
void queryPrec(const QudaPrecision prec)
Query the eigensolver precision to stdout.
void prepareKrylovSpace(std::vector< ColorSpinorField * > &kSpace, std::vector< Complex > &evals)
Extend the Krylov space.
void loadFromFile(const DiracMatrix &mat, std::vector< ColorSpinorField * > &eig_vecs, std::vector< Complex > &evals)
Load and check eigenpairs from file.
void cleanUpEigensolver(std::vector< ColorSpinorField * > &kSpace, std::vector< Complex > &evals)
Release memory, save eigenvectors, resize the Krylov space to its original dimension.
void printEigensolverSetup()
Dump the eigensolver parameters to stdout.
void chebyOp(const DiracMatrix &mat, ColorSpinorField &out, const ColorSpinorField &in)
Promoted the specified matVec operation: M, Mdag, MMdag, MdagM to a Chebyshev polynomial.
std::vector< double > residua
Thick Restarted Lanczos Method.
double * beta
double * alpha
bool isRunning(QudaProfileType idx)
Definition: timer.h:260
std::array< int, 4 > dim
double epsilon
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
@ QUDA_DEBUG_VERBOSE
Definition: enum_quda.h:268
@ QUDA_SUMMARIZE
Definition: enum_quda.h:266
@ QUDA_VERBOSE
Definition: enum_quda.h:267
@ QUDA_SPECTRUM_LR_EIG
Definition: enum_quda.h:149
@ QUDA_SPECTRUM_SR_EIG
Definition: enum_quda.h:150
#define safe_malloc(size)
Definition: malloc_quda.h:106
#define host_free(ptr)
Definition: malloc_quda.h:115
void ax(double a, ColorSpinorField &x)
double norm2(const ColorSpinorField &a)
void caxpy_L(const Complex *a, std::vector< ColorSpinorField * > &x, std::vector< ColorSpinorField * > &y)
Compute the block "caxpy_L" with over the set of ColorSpinorFields. E.g., it computes.
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
void start()
Start profiling.
Definition: device.cpp:226
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:130
void saveTuneCache(bool error=false)
Definition: tune.cpp:439
std::complex< double > Complex
Definition: quda_internal.h:86
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
@ QUDA_PROFILE_INIT
Definition: timer.h:106
@ QUDA_PROFILE_COMPUTE
Definition: timer.h:108
@ QUDA_PROFILE_EIGEN
Definition: timer.h:114
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
QudaEigSpectrumType spectrum
Definition: quda.h:466
char vec_infile[256]
Definition: quda.h:522
QudaBoolean require_convergence
Definition: quda.h:463
DEVICEHOST void swap(Real &a, Real &b)
Definition: svd_quda.h:134
#define printfQuda(...)
Definition: util_quda.h:114
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define warningQuda(...)
Definition: util_quda.h:132
#define errorQuda(...)
Definition: util_quda.h:120