QUDA  0.9.0
quda_arpack_interface.cpp
Go to the documentation of this file.
2 
3 #if (defined (QMP_COMMS) || defined (MPI_COMMS))
4 #include <mpi.h>
5 #endif
6 
7 
8 //using namespace quda ;
9 
10  struct SortEvals{
11  double _val;
12  int _idx;
13 
14  static bool small_values;
15 
16  SortEvals(double val, int idx) : _val(val), _idx(idx) {};
17 
18  static bool SelectSmall (SortEvals v1, SortEvals v2) { return (v1._val < v2._val);}
19  static bool SelectLarge (SortEvals v1, SortEvals v2) { return (v1._val > v2._val);}
20 
21  };
22 
23  bool SortEvals::small_values = true;
24 
25  template<typename Float> void arpack_naupd(int &ido, char &bmat, int &n, char *which, int &nev, Float &tol, std::complex<Float> *resid, int &ncv, std::complex<Float> *v, int &ldv,
26  int *iparam, int *ipntr, std::complex<Float> *workd, std::complex<Float> *workl, int &lworkl, Float *rwork, int &info, int *fcomm)
27  {
28 #ifdef ARPACK_LIB
29  if(sizeof(Float) == sizeof(float))
30  {
31  float _tol = static_cast<float>(tol);
32 #ifdef MULTI_GPU
33  ARPACK(pcnaupd)(fcomm, &ido, &bmat, &n, which, &nev, &_tol, reinterpret_cast<std::complex<float> *>(resid), &ncv, reinterpret_cast<std::complex<float> *>(v),
34  &ldv, iparam, ipntr, reinterpret_cast<std::complex<float> *>(workd), reinterpret_cast<std::complex<float> *>(workl), &lworkl, reinterpret_cast<float*>(rwork), &info);
35 #else
36  ARPACK(cnaupd)(&ido, &bmat, &n, which, &nev, &_tol, reinterpret_cast<std::complex<float> *>(resid), &ncv, reinterpret_cast<std::complex<float> *>(v),
37  &ldv, iparam, ipntr, reinterpret_cast<std::complex<float> *>(workd), reinterpret_cast<std::complex<float> *>(workl), &lworkl, reinterpret_cast<float*>(rwork), &info);
38 #endif //MULTI_GPU
39  }
40  else
41  {
42  double _tol = static_cast<double>(tol);
43 #ifdef MULTI_GPU
44  ARPACK(pznaupd)(fcomm, &ido, &bmat, &n, which, &nev, &_tol, reinterpret_cast<std::complex<double> *>(resid), &ncv, reinterpret_cast<std::complex<double> *>(v),
45  &ldv, iparam, ipntr, reinterpret_cast<std::complex<double> *>(workd), reinterpret_cast<std::complex<double> *>(workl), &lworkl, reinterpret_cast<double*>(rwork), &info);
46 #else
47  ARPACK(znaupd)(&ido, &bmat, &n, which, &nev, &_tol, reinterpret_cast<std::complex<double> *>(resid), &ncv, reinterpret_cast<std::complex<double> *>(v),
48  &ldv, iparam, ipntr, reinterpret_cast<std::complex<double> *>(workd), reinterpret_cast<std::complex<double> *>(workl), &lworkl, reinterpret_cast<double*>(rwork), &info);
49 #endif //MULTI_GPU
50  }
51 #endif //ARPACK_LIB
52  return;
53  }
54 
55  template<typename Float> void arpack_neupd (int &comp_evecs, char howmny, int *select, std::complex<Float>* evals, std::complex<Float>* v, int &ldv, std::complex<Float> sigma, std::complex<Float>* workev,
56  char bmat, int &n, char *which, int &nev, Float tol, std::complex<Float>* resid, int &ncv, std::complex<Float>* v1, int &ldv1, int *iparam, int *ipntr,
57  std::complex<Float>* workd, std::complex<Float>* workl, int &lworkl, Float* rwork, int &info, int *fcomm)
58  {
59 #ifdef ARPACK_LIB
60  if(sizeof(Float) == sizeof(float))
61  {
62  float _tol = static_cast<float>(tol);
63  std::complex<float> _sigma = static_cast<std::complex<float> >(sigma);
64 #ifdef MULTI_GPU
65  ARPACK(pcneupd)(fcomm, &comp_evecs, &howmny, select, reinterpret_cast<std::complex<float> *>(evals),
66  reinterpret_cast<std::complex<float> *>(v), &ldv, &_sigma, reinterpret_cast<std::complex<float> *>(workev), &bmat, &n, which,
67  &nev, &_tol, reinterpret_cast<std::complex<float> *>(resid), &ncv, reinterpret_cast<std::complex<float> *>(v1),
68  &ldv1, iparam, ipntr, reinterpret_cast<std::complex<float> *>(workd), reinterpret_cast<std::complex<float> *>(workl),
69  &lworkl, reinterpret_cast<float *>(rwork), &info);
70 #else
71 
72  ARPACK(cneupd)(&comp_evecs, &howmny, select, reinterpret_cast<std::complex<float> *>(evals),
73  reinterpret_cast<std::complex<float> *>(v), &ldv, &_sigma, reinterpret_cast<std::complex<float> *>(workev), &bmat, &n, which,
74  &nev, &_tol, reinterpret_cast<std::complex<float> *>(resid), &ncv, reinterpret_cast<std::complex<float> *>(v1),
75  &ldv1, iparam, ipntr, reinterpret_cast<std::complex<float> *>(workd), reinterpret_cast<std::complex<float> *>(workl),
76  &lworkl, reinterpret_cast<float *>(rwork), &info);
77 #endif //MULTI_GPU
78  }
79  else
80  {
81  double _tol = static_cast<double>(tol);
82  std::complex<double> _sigma = static_cast<std::complex<double> >(sigma);
83 #ifdef MULTI_GPU
84  ARPACK(pzneupd)(fcomm, &comp_evecs, &howmny, select, reinterpret_cast<std::complex<double> *>(evals),
85  reinterpret_cast<std::complex<double> *>(v), &ldv, &_sigma, reinterpret_cast<std::complex<double> *>(workev), &bmat, &n, which,
86  &nev, &_tol, reinterpret_cast<std::complex<double> *>(resid), &ncv, reinterpret_cast<std::complex<double> *>(v1),
87  &ldv1, iparam, ipntr, reinterpret_cast<std::complex<double> *>(workd), reinterpret_cast<std::complex<double> *>(workl),
88  &lworkl, reinterpret_cast<double *>(rwork), &info);
89 #else
90  ARPACK(zneupd)(&comp_evecs, &howmny, select, reinterpret_cast<std::complex<double> *>(evals),
91  reinterpret_cast<std::complex<double> *>(v), &ldv, &_sigma, reinterpret_cast<std::complex<double> *>(workev), &bmat, &n, which,
92  &nev, &_tol, reinterpret_cast<std::complex<double> *>(resid), &ncv, reinterpret_cast<std::complex<double> *>(v1),
93  &ldv1, iparam, ipntr, reinterpret_cast<std::complex<double> *>(workd), reinterpret_cast<std::complex<double> *>(workl),
94  &lworkl, reinterpret_cast<double *>(rwork), &info);
95 #endif //MULTI_GPU
96  }
97 #endif //ARPACK_LIB
98  return;
99  }
100 
101 namespace quda{
102 
103  template<typename Float> class QudaMatvec {
104 
105  protected:
106 
111  /*Vector quda gpu fields required for the operation*/
114 
115  public:
116 
119  //we need an explicit fieldOrder setup here since meta is a none-native field.
121  csParam.location = QUDA_CUDA_FIELD_LOCATION; // hard code to GPU location for null-space generation for now
123  csParam.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
124  csParam.setPrecision(matPrecision);
125 
128  }
129 
130  virtual ~QudaMatvec() {
131  delete cuda_in ;
132  delete cuda_out;
133  }
134 
135  void operator()(std::complex<Float> *out, std::complex<Float> *in);
136  };
137 
138  template<typename Float>
139  void QudaMatvec<Float>::operator() (std::complex<Float> *out, std::complex<Float> *in)
140  {
141  ColorSpinorParam csParam(*cuda_in);
142  csParam.setPrecision(sizeof(Float) == sizeof(float) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION);
146 
147  csParam.v = static_cast<void*>(in);
149 
150  csParam.v = static_cast<void*>(out);
152 
153  *cuda_in = *cpu_in;
154  //
155  matEigen(*cuda_out, *cuda_in);
156  //
157  *cpu_out = *cuda_out;
158 
159  delete cpu_in ;
160  delete cpu_out;
161 
162  return;
163  }
164 
165  template<typename Float> class ArpackArgs {
166 
167  private:
168  //main setup:
170  std::vector<ColorSpinorField*> &evecs ; //container of spinor fields
171 
172  //arpack objects:
173  size_t clen;
174  size_t cldn;
175  std::complex<Float> *w_d_; //just keep eigenvalues
176  std::complex<Float> *w_v_; //continuous buffer to keep eigenvectors
177 
179  int nev;//number of eigenvecs to be comupted
180  int ncv;//search subspace dimension (note that 1 <= NCV-NEV and NCV <= N)
181  char *lanczos_which;// ARPACK which="{S,L}{R,I,M}
183  Float tol;
184  int info;
185 
186  public:
187 
188  ArpackArgs(QudaMatvec<Float> &matvec, std::vector<ColorSpinorField*> &evecs, std::complex<Float> *evals, int nev, int ncv, char *which, Float tol) : matvec(matvec), evecs(evecs), w_d_(evals), nev(nev), ncv(ncv), lanczos_which(which), tol(tol), info(0)
189  {
190  clen = evecs[0]->Length() >> 1;//complex length
191  cldn = clen;
192 
193  w_v_ = new std::complex<Float>[cldn*ncv]; /* workspace for evectors (BLAS-matrix), [ld_evec, >=ncv] */
194  }
195 
196  virtual ~ArpackArgs() { delete w_v_; }
197 
198  //Main IRA algorithm driver:
199  void apply();
200  //save computed eigenmodes to the user defined arrays:
201  void save();
202  };
203 
204 
205 //copy fields:
206  template<typename Float>
208  {
209  printfQuda("\nLoad eigenvectors..\n");
210 
211  std::vector<SortEvals> sorted_evals;
212  sorted_evals.reserve(nev);
213 
214  ColorSpinorParam csParam(*evecs[0]);
215 
217  //cpuParam.extendDimensionality();5-dim field
219  csParam.setPrecision(sizeof(Float) == sizeof(float) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION);
220 
222 
223  std::string arpack_which(lanczos_which);
224 
225  if (arpack_which.compare(std::string("SM"))) {
226  for(int e = 0; e < nev; e++) sorted_evals.push_back( SortEvals(std::norm(w_d_[e]), e));
227  } else if (arpack_which.compare(std::string("SI"))) {
228  for(int e = 0; e < nev; e++) sorted_evals.push_back( SortEvals(w_d_[e].imag(), e));
229  } else if (arpack_which.compare(std::string("SR"))) {
230  for(int e = 0; e < nev; e++) sorted_evals.push_back( SortEvals(w_d_[e].real(), e));
231  } else if (arpack_which.compare(std::string("LM"))) {
232  for(int e = 0; e < nev; e++) sorted_evals.push_back( SortEvals(std::norm(w_d_[e]), e));
233  SortEvals::small_values = false;
234  } else if (arpack_which.compare(std::string("LI"))) {
235  for(int e = 0; e < nev; e++) sorted_evals.push_back( SortEvals(w_d_[e].imag(), e));
236  SortEvals::small_values = false;
237  } else if (arpack_which.compare(std::string("LR"))) {
238  for(int e = 0; e < nev; e++) sorted_evals.push_back( SortEvals(w_d_[e].real(), e));
239  SortEvals::small_values = false;
240  } else {
241  errorQuda("\nSorting option is not supported.\n");
242  }
243 
244  if(SortEvals::small_values) std::stable_sort(sorted_evals.begin(), sorted_evals.end(), SortEvals::SelectSmall );
245  else std::stable_sort(sorted_evals.begin(), sorted_evals.end(), SortEvals::SelectLarge );
246 
247  cpuColorSpinorField *cpu_tmp = nullptr;
248  int ev_id = 0;
249 
250  for(std::vector<ColorSpinorField*>::iterator vec = evecs.begin() ; vec != evecs.end(); ++vec) {
251  int sorted_id = sorted_evals[ev_id++]._idx;
252 
253  printfQuda("%d ,Re= %le, Im= %le\n", sorted_id, w_d_[sorted_id].real(), w_d_[sorted_id].imag());
254 
255  std::complex<Float>* tmp_buffer = &w_v_[sorted_id*cldn];
256  cpuColorSpinorField *curr_nullvec = static_cast<cpuColorSpinorField*> (*vec);
257 
258  csParam.v = static_cast<void*>(tmp_buffer);
259  cpu_tmp = static_cast<cpuColorSpinorField*>(ColorSpinorField::Create(csParam));
260 
261  *curr_nullvec = *cpu_tmp;//this does not work for different precision (usual memcpy)?
262 
263  delete cpu_tmp;
264  }
265 
266  return;
267  }
268 
269  template<typename Float>
271  {
272  int *fcomm = nullptr;
273 #ifdef MULTI_GPU
274  MPI_Fint mpi_comm_fort = MPI_Comm_c2f(MPI_COMM_WORLD);
275  fcomm = static_cast<int*>(&mpi_comm_fort);
276 #endif
277 
278  int max_iter = 4000;
279 
280  /* all FORTRAN communication uses underscored variables */
281  int ido_;
282  int info_;
283  int iparam_[11];
284  int ipntr_[14];
285  int n_ = clen,
286  nev_ = nev,
287  ncv_ = ncv,
288  ldv_ = cldn,
289  lworkl_ = (3 * ncv_ * ncv_ + 5 * ncv_),
290  rvec_ = 1;
291  std::complex<Float> sigma_ = 0.0;
292  Float tol_ = tol;
293 
294  std::complex<Float> *resid_ = new std::complex<Float>[cldn];
295  std::complex<Float> *w_workd_ = new std::complex<Float>[(cldn * 3)];
296  std::complex<Float> *w_workl_ = new std::complex<Float>[lworkl_];
297  Float *w_rwork_ = new Float[ncv_];
298 
299  /* __neupd-only workspace */
300  std::complex<Float> *w_workev_ = new std::complex<Float>[ 2 * ncv_];
301  int *select_ = new int[ ncv_];
302 
303  /* cnaupd cycle */
304  ido_ = 0;
305  info_ = 0;
306  iparam_[0] = 1;
307  iparam_[2] = max_iter;
308  iparam_[3] = 1;
309  iparam_[6] = 1;
310 
311  char howmny='A';//'P'
312  char bmat = 'I';
313 
314  int iter_cnt= 0;
315 
316  do {
317  //interface to arpack routines
318  arpack_naupd<Float>(ido_, bmat, n_, lanczos_which, nev_, tol, resid_, ncv_, w_v_, ldv_, iparam_, ipntr_, w_workd_, w_workl_, lworkl_, w_rwork_, info_, fcomm);
319 
320  if (info_ != 0) errorQuda("\nError in ARPACK CNAUPD (error code %d) , exit.\n", info_);
321 
322  iter_cnt++;
323 
324  if (ido_ == -1 || ido_ == 1) {
325  //apply matrix vector here:
326  matvec(&(w_workd_[(ipntr_[1]-1)]), &(w_workd_[(ipntr_[0]-1)]));
327 
328  if(iter_cnt % 50 == 0) printfQuda("\nIteration : %d\n", iter_cnt);
329  }
330 
331  } while (99 != ido_ && iter_cnt < max_iter);
332 
333  printfQuda("Finish: iter=%04d info=%d ido=%d\n", iter_cnt, info_, ido_);
334 
335  //int conv_cnt = iparam_[4];
336 
337  /* for howmny="P", no additional space is required */
338  arpack_neupd<Float>(rvec_, howmny, select_, w_d_, w_v_, ldv_, sigma_, w_workev_, bmat, n_, lanczos_which,
339  nev_, tol_, resid_, ncv_, w_v_, ldv_, iparam_, ipntr_, w_workd_, w_workl_, lworkl_, w_rwork_, info_, fcomm);
340 
341  if (info_ != 0) errorQuda("\nError in ARPACK CNEUPD (error code %d) , exit.\n", info_);
342 
343  /* cleanup */
344  if (w_workl_ != nullptr) delete [] w_workl_;
345  if (w_rwork_ != nullptr) delete [] w_rwork_;
346  if (w_workev_ != nullptr) delete [] w_workev_;
347  if (select_ != nullptr) delete [] select_;
348 
349  if (w_workd_ != nullptr) delete [] w_workd_;
350  if (resid_ != nullptr) delete [] resid_;
351 
352  return;
353  }
354 
356  template<typename Float>
357  void arpack_solve( std::vector<ColorSpinorField*> &B, void *evals, DiracMatrix &matEigen, QudaPrecision matPrec, QudaPrecision arpackPrec, double tol, int nev, int ncv, char *target)
358  {
359  QudaMatvec<Float> mv(matEigen, matPrec, *B[0]);
360  ArpackArgs<Float> arg(mv, B, static_cast<std::complex<Float> *> (evals), nev , ncv, target, (Float)tol);
361  arg.apply();
362  arg.save();
363 
364  return;
365  }
366 
367  void arpackSolve( std::vector<ColorSpinorField*> &B, void* evals, DiracMatrix &matEigen, QudaPrecision matPrec, QudaPrecision arpackPrec, double tol, int nev, int ncv, char *target)
368  {
369 #ifdef ARPACK_LIB
370  if((nev <= 0) or (nev > static_cast<int>(B[0]->Length()))) errorQuda("Wrong number of the requested eigenvectors.\n");
371  if(((ncv-nev) < 2) or (ncv > static_cast<int>(B[0]->Length()))) errorQuda("Wrong size of the IRAM work subspace.\n");
372  if(arpackPrec == QUDA_DOUBLE_PRECISION) arpack_solve<double>(B, evals, matEigen, matPrec, arpackPrec, tol, nev , ncv, target);
373  else arpack_solve<float> (B, evals, matEigen, matPrec, arpackPrec, tol, nev , ncv, target);
374 #else
375  errorQuda("Arpack library was not built.\n");
376  #endif
377  return;
378  }
379 
380 }
381 
382 
void setPrecision(QudaPrecision precision)
enum QudaPrecision_s QudaPrecision
void operator()(std::complex< Float > *out, std::complex< Float > *in)
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
Definition: complex_quda.h:896
std::vector< ColorSpinorField * > & evecs
#define errorQuda(...)
Definition: util_quda.h:90
void arpack_naupd(int &ido, char &bmat, int &n, char *which, int &nev, Float &tol, std::complex< Float > *resid, int &ncv, std::complex< Float > *v, int &ldv, int *iparam, int *ipntr, std::complex< Float > *workd, std::complex< Float > *workl, int &lworkl, Float *rwork, int &info, int *fcomm)
void arpack_solve(std::vector< ColorSpinorField *> &B, void *evals, DiracMatrix &matEigen, QudaPrecision matPrec, QudaPrecision arpackPrec, double tol, int nev, int ncv, char *target)
static ColorSpinorField * Create(const ColorSpinorParam &param)
void arpackSolve(std::vector< ColorSpinorField *> &B, void *evals, DiracMatrix &matEigen, QudaPrecision matPrec, QudaPrecision arpackPrec, double tol, int nev, int ncv, char *target)
ColorSpinorField * cuda_in
static bool small_values
static bool SelectLarge(SortEvals v1, SortEvals v2)
ColorSpinorField * cuda_out
void arpack_neupd(int &comp_evecs, char howmny, int *select, std::complex< Float > *evals, std::complex< Float > *v, int &ldv, std::complex< Float > sigma, std::complex< Float > *workev, char bmat, int &n, char *which, int &nev, Float tol, std::complex< Float > *resid, int &ncv, std::complex< Float > *v1, int &ldv1, int *iparam, int *ipntr, std::complex< Float > *workd, std::complex< Float > *workl, int &lworkl, Float *rwork, int &info, int *fcomm)
double tol
Definition: test_util.cpp:1647
QudaFieldLocation location
QudaMatvec< Float > & matvec
std::complex< Float > * w_v_
ColorSpinorParam csParam
Definition: pack_test.cpp:24
cpuColorSpinorField * in
SortEvals(double val, int idx)
ArpackArgs(QudaMatvec< Float > &matvec, std::vector< ColorSpinorField *> &evecs, std::complex< Float > *evals, int nev, int ncv, char *which, Float tol)
cpuColorSpinorField * out
int nev
Definition: test_util.cpp:1669
QudaMatvec(DiracMatrix &matEigen, QudaPrecision prec, ColorSpinorField &meta)
#define printfQuda(...)
Definition: util_quda.h:84
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Definition: complex_quda.h:880
std::complex< Float > * w_d_
static __inline__ enum cudaRoundMode mode enum cudaRoundMode mode enum cudaRoundMode mode enum cudaRoundMode mode int val
static bool SelectSmall(SortEvals v1, SortEvals v2)
QudaPrecision prec
Definition: test_util.cpp:1615