18 #if (defined(QMP_COMMS) || defined(MPI_COMMS))
28 void arpackErrorHelpNAUPD();
29 void arpackErrorHelpNEUPD();
31 void arpack_solve(std::vector<ColorSpinorField *> &h_evecs, std::vector<Complex> &h_evals,
const DiracMatrix &
mat,
44 printfQuda(
"**** START ARPACK SOLUTION ****\n");
45 #ifndef ARPACK_LOGGING
48 printfQuda(
"Output directed to %s\n", arpack_logfile);
56 #if (defined(QMP_COMMS) || defined(MPI_COMMS))
57 int *fcomm_ =
nullptr;
58 MPI_Fint mpi_comm_fort = MPI_Comm_c2f(MPI_COMM_HANDLE);
59 fcomm_ =
static_cast<int *
>(&mpi_comm_fort);
66 int *iparam_ = (
int *)
safe_malloc(11 *
sizeof(
int));
67 int n_ = h_evecs[0]->Volume() * h_evecs[0]->Nspin() * h_evecs[0]->Ncolor();
68 int n_ev_ = eig_param->
n_ev;
69 int n_kr_ = eig_param->
n_kr;
70 int ldv_ = h_evecs[0]->Volume() * h_evecs[0]->Nspin() * h_evecs[0]->Ncolor();
71 int lworkl_ = (3 * n_kr_ * n_kr_ + 5 * n_kr_) * 2;
73 int max_iter = eig_param->
max_restarts * (n_kr_ - n_ev_) + n_ev_;
74 int *h_evals_sorted_idx = (
int *)
safe_malloc(n_kr_ *
sizeof(
int));
78 iparam_[2] = max_iter;
99 if (strncmp(
"S", spectrum, 1) == 0 && eig_param->
use_poly_acc) {
106 double tol_ = eig_param->
tol;
107 double *mod_h_evals_sorted = (
double *)
safe_malloc(n_kr_ *
sizeof(
double));
115 for (
int a = 0; a < ldv_; a++) resid_[a] = I;
122 double *w_rwork_ = (
double *)
safe_malloc(n_kr_ *
sizeof(
double));
123 int *select_ = (
int *)
safe_malloc(n_kr_ *
sizeof(
int));
127 std::vector<ColorSpinorField *> h_evecs_arpack;
129 for (
int i = 0; i < n_kr_; i++) {
131 ColorSpinorParam
param(*h_evecs[0]);
142 bool allocate =
true;
143 ColorSpinorField *h_v =
nullptr;
144 ColorSpinorField *d_v =
nullptr;
145 ColorSpinorField *h_v2 =
nullptr;
146 ColorSpinorField *d_v2 =
nullptr;
147 ColorSpinorField *resid =
nullptr;
149 #ifdef ARPACK_LOGGING
152 int arpack_log_u = 9999;
154 #if (defined(QMP_COMMS) || defined(MPI_COMMS))
155 if (arpack_logfile != NULL && (
comm_rank() == 0)) {
156 ARPACK(initlog)(&arpack_log_u, arpack_logfile, strlen(arpack_logfile));
157 int msglvl0 = 9, msglvl3 = 9;
170 printfQuda(
"ARPACK verbosity set to mcaup2=3 mcaupd=3 mceupd=3; \n");
171 printfQuda(
"output is directed to %s\n", arpack_logfile);
175 if (arpack_logfile != NULL) {
176 ARPACK(initlog)(&arpack_log_u, arpack_logfile, strlen(arpack_logfile));
177 int msglvl0 = 9, msglvl3 = 9;
190 printfQuda(
"ARPACK verbosity set to mcaup2=3 mcaupd=3 mceupd=3; \n");
191 printfQuda(
"output is directed to %s\n", arpack_logfile);
209 #if (defined(QMP_COMMS) || defined(MPI_COMMS))
211 (fcomm_, &ido_, &bmat, &n_, spectrum, &n_ev_, &tol_, resid_, &n_kr_, h_evecs_, &n_, iparam_, ipntr_, w_workd_,
212 w_workl_, &lworkl_, w_rwork_, &info_, 1, 2);
215 arpackErrorHelpNAUPD();
216 errorQuda(
"\nError in pznaupd info = %d. Exiting.", info_);
220 (&ido_, &bmat, &n_, spectrum, &n_ev_, &tol_, resid_, &n_kr_, h_evecs_, &n_, iparam_, ipntr_, w_workd_, w_workl_,
221 &lworkl_, w_rwork_, &info_, 1, 2);
223 arpackErrorHelpNAUPD();
224 errorQuda(
"\nError in znaupd info = %d. Exiting.", info_);
232 ColorSpinorParam
param(*h_evecs[0]);
240 param.v = w_workd_ + (ipntr_[0] - 1);
243 param.v = w_workd_ + (ipntr_[1] - 1);
257 if (ido_ == 99 || info_ == 1)
break;
259 if (ido_ == -1 || ido_ == 1) {
269 eig_solver->chebyOp(
mat, *d_v2, *d_v);
283 }
while (99 != ido_ && iter_count < max_iter);
288 printfQuda(
"Finish: iter=%04d info=%d ido=%d\n", iter_count, info_, ido_);
296 #if (defined(QMP_COMMS) || defined(MPI_COMMS))
298 (fcomm_, &rvec_, &howmny, select_, h_evals_, h_evecs_, &n_, &sigma_, w_workev_, &bmat, &n_, spectrum, &n_ev_, &tol_,
299 resid_, &n_kr_, h_evecs_, &n_, iparam_, ipntr_, w_workd_, w_workl_, &lworkl_, w_rwork_, &info_, 1, 1, 2);
301 arpackErrorHelpNEUPD();
302 errorQuda(
"\nError in pzneupd info = %d. You likely need to\n"
303 "increase the maximum ARPACK iterations. Exiting.",
305 }
else if (info_ != 0) {
306 arpackErrorHelpNEUPD();
307 errorQuda(
"\nError in pzneupd info = %d. Exiting.", info_);
311 (&rvec_, &howmny, select_, h_evals_, h_evecs_, &n_, &sigma_, w_workev_, &bmat, &n_, spectrum, &n_ev_, &tol_, resid_,
312 &n_kr_, h_evecs_, &n_, iparam_, ipntr_, w_workd_, w_workl_, &lworkl_, w_rwork_, &info_, 1, 1, 2);
314 arpackErrorHelpNEUPD();
315 errorQuda(
"\nError in zneupd info = %d. You likely need to\n"
316 "increase the maximum ARPACK iterations. Exiting.",
318 }
else if (info_ != 0) {
319 arpackErrorHelpNEUPD();
320 errorQuda(
"\nError in zneupd info = %d. Exiting.", info_);
331 errorQuda(
"ARPACK Error: No shifts could be applied during implicit\n");
335 #ifdef ARPACK_LOGGING
336 #if (defined(QMP_COMMS) || defined(MPI_COMMS))
338 if (arpack_logfile != NULL) { ARPACK(finilog)(&arpack_log_u); }
341 if (arpack_logfile != NULL) ARPACK(finilog)(&arpack_log_u);
347 int nconv = iparam_[4];
350 std::vector<std::pair<double, int>> evals_sorted;
351 for (
int j = 0; j < nconv; j++) { evals_sorted.push_back(std::make_pair(h_evals_[j].real(), j)); }
356 std::sort(evals_sorted.begin(), evals_sorted.end());
357 if (reverse) std::reverse(evals_sorted.begin(), evals_sorted.end());
360 for (
int j = 0; j < nconv; j++) {
362 printfQuda(
"RitzValue[%04d] = %+.16e %+.16e Residual: %+.16e\n", j, real(h_evals_[evals_sorted[j].second]),
363 imag(h_evals_[evals_sorted[j].second]),
364 std::abs(*(w_workl_ + ipntr_[10] - 1 + evals_sorted[j].second)));
368 for (
int i = 0; i < nconv; i++) {
369 int idx = evals_sorted[i].second;
372 *d_v = *h_evecs_arpack[idx];
377 eig_solver->matVec(
mat, *d_v2, *d_v);
383 Complex m_lambda(-h_evals_[idx]);
392 printfQuda(
"EigValue[%04d] = %+.16e %+.16e Residual: %.16e\n", i, real(h_evals_[idx]), imag(h_evals_[idx]),
397 for (
int i = 0; i < nconv; i++) h_evals[i] = h_evals_[evals_sorted[i].second];
400 for (
int i = 0; i < nconv; i++) *h_evecs[i] = *h_evecs_arpack[evals_sorted[i].second];
406 for (
int i = 0; i < n_kr_; i++)
delete h_evecs_arpack[i];
429 void arpackErrorHelpNAUPD()
433 printfQuda(
" If INFO .EQ. 0, a randomly initial residual vector is used.\n");
434 printfQuda(
" If INFO .NE. 0, RESID contains the initial residual vector,\n");
435 printfQuda(
" possibly from a previous run.\n");
438 printfQuda(
" = 1: Maximum number of iterations taken.\n");
439 printfQuda(
" All possible eigenvalues of OP has been found. IPARAM(5)\n");
440 printfQuda(
" returns the number of wanted converged Ritz values.\n");
441 printfQuda(
" = 2: No longer an informational error. Deprecated starting\n");
443 printfQuda(
" = 3: No shifts could be applied during a cycle of the\n");
444 printfQuda(
" Implicitly restarted Arnoldi iteration. One possibility\n");
445 printfQuda(
" is to increase the size of NCV relative to NEV.\n");
449 printfQuda(
" = -3: NCV-NEV >= 1 and less than or equal to N.\n");
450 printfQuda(
" = -4: The maximum number of Arnoldi update iteration\n");
452 printfQuda(
" = -5: WHICH must be 'LM', 'SM', 'LR', 'SR', 'LI', 'SI'\n");
453 printfQuda(
" = -6: BMAT must be one of 'I' or 'G'.\n");
454 printfQuda(
" = -7: Length of private work array is not sufficient.\n");
455 printfQuda(
" = -8: Error return from LAPACK eigenvalue calculation;\n");
456 printfQuda(
" = -9: Starting vector is zero.\n");
457 printfQuda(
" = -10: IPARAM(7) must be 1,2,3.\n");
458 printfQuda(
" = -11: IPARAM(7) = 1 and BMAT = 'G' are incompatible.\n");
459 printfQuda(
" = -12: IPARAM(1) must be equal to 0 or 1.\n");
460 printfQuda(
" = -9999: Could not build an Arnoldi factorization.\n");
461 printfQuda(
" User input error highly likely. Please\n");
462 printfQuda(
" check actual array dimensions and layout.\n");
463 printfQuda(
" IPARAM(5) returns the size of the current Arnoldi\n");
467 void arpackErrorHelpNEUPD()
473 printfQuda(
" = 1: The Schur form computed by LAPACK routine csheqr\n");
474 printfQuda(
" could not be reordered by LAPACK routine ztrsen.\n");
475 printfQuda(
" Re-enter subroutine zneupd with IPARAM(5)=NCV and\n");
476 printfQuda(
" increase the size of the array D to have\n");
477 printfQuda(
" dimension at least dimension NCV and allocate at\n");
479 printfQuda(
" columns for Z. NOTE: Not necessary if Z and V share\n");
480 printfQuda(
" the same space. Please notify the authors if this\n");
484 printfQuda(
" = -3: NCV-NEV >= 1 and less than or equal to N.\n");
485 printfQuda(
" = -5: WHICH must be 'LM', 'SM', 'LR', 'SR', 'LI', 'SI'\n");
486 printfQuda(
" = -6: BMAT must be one of 'I' or 'G'.\n");
487 printfQuda(
" = -7: Length of private work WORKL array is inufficient.\n");
488 printfQuda(
" = -8: Error return from LAPACK eigenvalue calculation.\n");
490 printfQuda(
" = -9: Error return from calculation of eigenvectors.\n");
491 printfQuda(
" Informational error from LAPACK routine ztrevc.\n");
492 printfQuda(
" = -10: IPARAM(7) must be 1,2,3\n");
493 printfQuda(
" = -11: IPARAM(7) = 1 and BMAT = 'G' are incompatible.\n");
494 printfQuda(
" = -12: HOWMNY = 'S' not yet implemented\n");
495 printfQuda(
" = -13: HOWMNY must be one of 'A' or 'P' if RVEC = .true.\n");
496 printfQuda(
" = -14: ZNAUPD did not find any eigenvalues to sufficient\n");
498 printfQuda(
" = -15: ZNEUPD got a different count of the number of\n");
499 printfQuda(
" converged Ritz values than ZNAUPD got. This\n");
500 printfQuda(
" indicates the user probably made an error in\n");
501 printfQuda(
" passing data from ZNAUPD to ZNEUPD or that the\n");
502 printfQuda(
" data was modified before entering ZNEUPD\n");
510 errorQuda(
"(P)ARPACK has not been enabled for this build");
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
static EigenSolver * create(QudaEigParam *eig_param, const DiracMatrix &mat, TimeProfile &profile)
Creates the eigensolver using the parameters given and the matrix.
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
@ QUDA_CUDA_FIELD_LOCATION
@ QUDA_CPU_FIELD_LOCATION
@ QUDA_SPACE_SPIN_COLOR_FIELD_ORDER
@ QUDA_REFERENCE_FIELD_CREATE
#define safe_malloc(size)
void caxpby(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y)
double norm2(const ColorSpinorField &a)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
void arpack_solve(std::vector< ColorSpinorField * > &h_evecs, std::vector< Complex > &h_evals, const DiracMatrix &mat, QudaEigParam *eig_param, TimeProfile &profile)
The QUDA interface function. One passes two allocated arrays to hold the the eigenmode data,...
std::complex< double > Complex
__host__ __device__ ValueType sqrt(ValueType x)
__host__ __device__ ValueType abs(ValueType x)
QudaEigSpectrumType spectrum
QudaFieldLocation location
QudaVerbosity getVerbosity()