QUDA  v1.1.0
A library for QCD on GPUs
deflation.cpp
Go to the documentation of this file.
1 #include <deflation.h>
2 #include <qio_field.h>
3 #include <string.h>
4 
5 #include <memory>
6 
7 #ifdef MAGMA_LIB
8 #include <blas_magma.h>
9 #endif
10 
11 #include <eigen_helper.h>
12 
13 namespace quda
14 {
15 
16  using namespace blas;
17  using DynamicStride = Stride<Dynamic, Dynamic>;
18 
19  static auto pinned_allocator = [] (size_t bytes ) { return static_cast<Complex*>(pool_pinned_malloc(bytes)); };
20  static auto pinned_deleter = [] (Complex *hptr) { pool_pinned_free(hptr); };
21 
23  param(param),
24  profile(profile),
25  r(nullptr),
26  Av(nullptr),
27  r_sloppy(nullptr),
28  Av_sloppy(nullptr)
29  {
30  // for reporting level 1 is the fine level but internally use level 0 for indexing
31  printfQuda("Creating deflation space of %d vectors.\n", param.tot_dim);
32 
33  if (param.eig_global.import_vectors) loadVectors(param.RV); // whether to load eigenvectors
34  // create aux fields
35  ColorSpinorParam csParam(param.RV->Component(0));
37  csParam.location = param.location;
38  csParam.mem_type = QUDA_MEMORY_DEVICE;
39  csParam.setPrecision(QUDA_DOUBLE_PRECISION, QUDA_DOUBLE_PRECISION, true); // accum fields always full precision
40  if (csParam.nSpin != 1) csParam.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
41 
44 
45  if (param.eig_global.cuda_prec_ritz != QUDA_DOUBLE_PRECISION) { // allocate sloppy fields
46  csParam.setPrecision(param.eig_global.cuda_prec_ritz);
48  Av_sloppy = ColorSpinorField::Create(csParam);
49  } else {
50  r_sloppy = r;
51  Av_sloppy = Av;
52  }
53 
54  printfQuda("Deflation space setup completed\n");
55  // now we can run through the verification if requested
56  if (param.eig_global.run_verify && param.eig_global.import_vectors) verify();
57  // print out profiling information for the adaptive setup
58  if (getVerbosity() >= QUDA_SUMMARIZE) profile.Print();
59  }
60 
62  {
64  if (r_sloppy) delete r_sloppy;
65  if (Av_sloppy) delete Av_sloppy;
66  }
67 
68  if (r) delete r;
69  if (Av) delete Av;
70 
71  if (getVerbosity() >= QUDA_SUMMARIZE) profile.Print();
72  }
73 
74  double Deflation::flops() const
75  {
76  double flops = 0;//Do we need to report this?
77  //compute total flops for deflation application. Not sure we really need this.
78  return flops;
79  }
80 
85  {
86  const int n_evs_to_print = param.cur_dim;
87  if (n_evs_to_print == 0) errorQuda("Incorrect size of current deflation space");
88 
89  std::unique_ptr<Complex, decltype(pinned_deleter) > projm( pinned_allocator(param.ld*param.cur_dim * sizeof(Complex)), pinned_deleter);
90 
92 #ifdef MAGMA_LIB
93  memcpy(projm.get(), param.matProj, param.ld*param.cur_dim*sizeof(Complex));
94  std::unique_ptr<double[] > evals(new double[param.ld]);
95  magma_Xheev(projm.get(), param.cur_dim, param.ld, evals.get(), sizeof(Complex));
96 #else
97  errorQuda("MAGMA library was not built");
98 #endif
99  } else if( param.eig_global.extlib_type == QUDA_EIGEN_EXTLIB ) {
100  Map<MatrixXcd, Unaligned, DynamicStride> projm_(param.matProj, param.cur_dim, param.cur_dim, DynamicStride(param.ld, 1));
101  Map<MatrixXcd, Unaligned, DynamicStride> evecs_(projm.get(), param.cur_dim, param.cur_dim, DynamicStride(param.ld, 1));
102 
103  SelfAdjointEigenSolver<MatrixXcd> es_projm( projm_ );
104  evecs_.block(0, 0, param.cur_dim, param.cur_dim) = es_projm.eigenvectors();
105  } else {
106  errorQuda("Library type %d is currently not supported", param.eig_global.extlib_type);
107  }
108 
109  std::vector<ColorSpinorField*> rv(param.RV->Components().begin(), param.RV->Components().begin() + param.cur_dim);
110  std::vector<ColorSpinorField*> res;
111  res.push_back(r);
112 
113  for (int i = 0; i < n_evs_to_print; i++) {
114  zero(*r);
115  blas::caxpy(&projm.get()[i * param.ld], rv, res); // multiblas
116  *r_sloppy = *r;
117  param.matDeflation(*Av_sloppy, *r_sloppy);
118  double3 dotnorm = cDotProductNormA(*r_sloppy, *Av_sloppy);
119  double eval = dotnorm.x / dotnorm.z;
120  blas::xpay(*Av_sloppy, -eval, *r_sloppy);
121  double relerr = sqrt(norm2(*r_sloppy) / dotnorm.z);
122  printfQuda("Eigenvalue %d: %1.12e Residual: %1.12e\n", i + 1, eval, relerr);
123  }
124  }
125 
127  {
130  errorQuda("Method is not implemented for %d inverter type", param.eig_global.invert_param->inv_type);
131 
132  if(param.cur_dim == 0) return;//nothing to do
133 
134  std::unique_ptr<Complex[] > vec(new Complex[param.ld]);
135 
136  double check_nrm2 = norm2(b);
137 
138  printfQuda("\nSource norm (gpu): %1.15e, curr deflation space dim = %d\n", sqrt(check_nrm2), param.cur_dim);
139 
140  ColorSpinorField *b_sloppy = param.RV->Precision() != b.Precision() ? r_sloppy : &b;
141  *b_sloppy = b;
142 
143  std::vector<ColorSpinorField*> rv_(param.RV->Components().begin(), param.RV->Components().begin()+param.cur_dim);
144  std::vector<ColorSpinorField*> in_;
145  in_.push_back(static_cast<ColorSpinorField*>(b_sloppy));
146 
147  blas::cDotProduct(vec.get(), rv_, in_);//<i, b>
148 
149  if (!param.use_inv_ritz) {
151 #ifdef MAGMA_LIB
152  magma_Xgesv(vec.get(), param.ld, param.cur_dim, param.matProj, param.ld, sizeof(Complex));
153 #else
154  errorQuda("MAGMA library was not built");
155 #endif
156  } else if (param.eig_global.extlib_type == QUDA_EIGEN_EXTLIB) {
157  Map<MatrixXcd, Unaligned, DynamicStride> projm_(param.matProj, param.cur_dim, param.cur_dim,
158  DynamicStride(param.ld, 1));
159  Map<VectorXcd, Unaligned> vec_(vec.get(), param.cur_dim);
160 
161  VectorXcd vec2_(param.cur_dim);
162  vec2_ = projm_.fullPivHouseholderQr().solve(vec_);
163  vec_ = vec2_;
164  } else {
165  errorQuda("Library type %d is currently not supported", param.eig_global.extlib_type);
166  }
167  } else {
168  for(int i = 0; i < param.cur_dim; i++) vec[i] *= param.invRitzVals[i];
169  }
170 
171  std::vector<ColorSpinorField*> out_;
172  out_.push_back(&x);
173 
174  blas::caxpy(vec.get(), rv_, out_); //multiblas
175 
176  check_nrm2 = norm2(x);
177  printfQuda("\nDeflated guess spinor norm (gpu): %1.15e\n", sqrt(check_nrm2));
178  }
179 
181  {
184  errorQuda("Method is not implemented for %d inverter type", param.eig_global.invert_param->inv_type);
185 
186  if (n_ev == 0) return; // nothing to do
187 
188  const int first_idx = param.cur_dim;
189 
190  if (param.RV->CompositeDim() < (first_idx + n_ev) || param.tot_dim < (first_idx + n_ev)) {
191  warningQuda("\nNot enough space to add %d vectors. Keep deflation space unchanged.\n", n_ev);
192  return;
193  }
194 
195  for (int i = 0; i < n_ev; i++) blas::copy(param.RV->Component(first_idx + i), Vm.Component(i));
196 
197  printfQuda("\nConstruct projection matrix..\n");
198 
199  // Block MGS orthogonalization
200  // The degree to which we interpolate between modified GramSchmidt and GramSchmidt (performance vs stability)
201  const int cdot_pipeline_length = 4;
202 
203  for (int i = first_idx; i < (first_idx + n_ev); i++) {
204  std::unique_ptr<Complex[]> alpha(new Complex[i]);
205 
206  ColorSpinorField *accum = param.eig_global.cuda_prec_ritz != QUDA_DOUBLE_PRECISION ? r : &param.RV->Component(i);
207  *accum = param.RV->Component(i);
208 
209  int offset = 0;
210  while (offset < i) {
211  const int local_length = (i - offset) > cdot_pipeline_length ? cdot_pipeline_length : (i - offset);
212 
213  std::vector<ColorSpinorField *> vj_(param.RV->Components().begin() + offset,
214  param.RV->Components().begin() + offset + local_length);
215  std::vector<ColorSpinorField *> vi_;
216  vi_.push_back(accum);
217 
218  blas::cDotProduct(alpha.get(), vj_, vi_);
219  for (int j = 0; j < local_length; j++) alpha[j] = -alpha[j];
220  blas::caxpy(alpha.get(), vj_, vi_); // i-<j,i>j
221 
222  offset += cdot_pipeline_length;
223  }
224 
225  alpha[0] = blas::norm2(*accum);
226 
227  if (alpha[0].real() > 1e-16)
228  blas::ax(1.0 / sqrt(alpha[0].real()), *accum);
229  else
230  errorQuda("Cannot orthogonalize %dth vector", i);
231 
232  param.RV->Component(i) = *accum;
233 
234  param.matDeflation(*Av_sloppy, param.RV->Component(i)); // precision must match!
235  // load diagonal:
236  *Av = *Av_sloppy;
237  param.matProj[i * param.ld + i] = cDotProduct(*accum, *Av);
238 
239  if (i > 0) {
240  std::vector<ColorSpinorField *> vj_(param.RV->Components().begin(), param.RV->Components().begin() + i);
241  std::vector<ColorSpinorField *> av_;
242  av_.push_back(Av_sloppy);
243 
244  blas::cDotProduct(alpha.get(), vj_, av_);
245 
246  for (int j = 0; j < i; j++) {
247  param.matProj[i * param.ld + j] = alpha[j];
248  param.matProj[j * param.ld + i] = conj(alpha[j]); // conj
249  }
250  }
251  }
252 
253  param.cur_dim += n_ev;
254 
255  printfQuda("\nNew curr deflation space dim = %d\n", param.cur_dim);
256  }
257 
258  void Deflation::reduce(double tol, int max_n_ev)
259  {
260  if (param.cur_dim < max_n_ev) {
261  printf("\nToo big number of eigenvectors was requested, switched to maximum available number %d\n", param.cur_dim);
262  max_n_ev = param.cur_dim;
263  }
264 
265  std::unique_ptr<double[]> evals(new double[param.cur_dim]);
266  std::unique_ptr<Complex, decltype(pinned_deleter)> projm(
267  pinned_allocator(param.ld * param.cur_dim * sizeof(Complex)), pinned_deleter);
268 
269  memcpy(projm.get(), param.matProj, param.ld * param.cur_dim * sizeof(Complex));
270 
272 #ifdef MAGMA_LIB
273  magma_Xheev(projm.get(), param.cur_dim, param.ld, evals.get(), sizeof(Complex));
274 #else
275  errorQuda("MAGMA library was not built");
276 #endif
277  } else if (param.eig_global.extlib_type == QUDA_EIGEN_EXTLIB) {
278  Map<MatrixXcd, Unaligned, DynamicStride> projm_(projm.get(), param.cur_dim, param.cur_dim,
279  DynamicStride(param.ld, 1));
280  Map<VectorXd, Unaligned> evals_(evals.get(), param.cur_dim);
281  SelfAdjointEigenSolver<MatrixXcd> es(projm_);
282  projm_ = es.eigenvectors();
283  evals_ = es.eigenvalues();
284  } else {
285  errorQuda("Library type %d is currently not supported", param.eig_global.extlib_type);
286  }
287 
288  // reset projection matrix, now we will use inverse ritz values when deflate an initial guess:
289  param.use_inv_ritz = true;
290  for (int i = 0; i < param.cur_dim; i++) {
291  if (fabs(evals[i]) > 1e-16) {
292  param.invRitzVals[i] = 1.0 / evals[i];
293  } else {
294  errorQuda("Cannot invert Ritz value");
295  }
296  }
297 
299  // Create an eigenvector set:
301  // csParam.setPrecision(search_space_prec);//eigCG internal search space precision: must be adjustable.
302  csParam.is_composite = true;
303  csParam.composite_dim = max_n_ev;
304 
305  csParam.mem_type = QUDA_MEMORY_MAPPED;
306  std::unique_ptr<ColorSpinorField> buff(ColorSpinorField::Create(csParam));
307 
308  int idx = 0;
309  double relerr = 0.0;
310  bool do_residual_check = (tol != 0.0);
311 
312  while ((relerr < tol) && (idx < max_n_ev)) {
313  std::vector<ColorSpinorField *> rv(param.RV->Components().begin(), param.RV->Components().begin() + param.cur_dim);
314  std::vector<ColorSpinorField *> res;
315  res.push_back(r);
316 
317  blas::zero(*r);
318  blas::caxpy(&projm.get()[idx * param.ld], rv, res); // multiblas
319  blas::copy(buff->Component(idx), *r);
320 
321  if (do_residual_check) { // if tol=0.0 then disable relative residual norm check
322  *r_sloppy = *r;
323  param.matDeflation(*Av_sloppy, *r_sloppy);
324  double3 dotnorm = cDotProductNormA(*r_sloppy, *Av_sloppy);
325  double eval = dotnorm.x / dotnorm.z;
326  blas::xpay(*Av_sloppy, -eval, *r_sloppy);
327  relerr = sqrt(norm2(*r_sloppy) / dotnorm.z);
328  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Eigenvalue: %1.12e Residual: %1.12e\n", eval, relerr);
329  }
330 
331  idx++;
332  }
333 
334  printfQuda("\nReserved eigenvectors: %d\n", idx);
335  // copy all the stuff to cudaRitzVectors set:
336  for (int i = 0; i < idx; i++) blas::copy(param.RV->Component(i), buff->Component(i));
337 
338  // reset current dimension:
339  param.cur_dim = idx; // idx never exceeds cur_dim.
340  param.tot_dim = idx;
341  }
342 
343  //supports seperate reading or single file read
345  {
346  if (RV->IsComposite()) errorQuda("Not a composite field");
347 
348  profile.TPSTOP(QUDA_PROFILE_INIT);
349  profile.TPSTART(QUDA_PROFILE_IO);
350 
351  std::string vec_infile(param.eig_global.vec_infile);
352  std::vector<ColorSpinorField *> &B = RV->Components();
353 
354  const int Nvec = B.size();
355  printfQuda("Start loading %d vectors from %s\n", Nvec, vec_infile.c_str());
356 
357  void **V = new void*[Nvec];
358  for (int i = 0; i < Nvec; i++) {
359  V[i] = B[i]->V();
360  if (V[i] == NULL) {
361  printfQuda("Could not allocate V[%d]\n", i);
362  }
363  }
364 
365  if (strcmp(vec_infile.c_str(),"")!=0) {
366  auto parity = (B[0]->SiteSubset() == QUDA_FULL_SITE_SUBSET ? QUDA_INVALID_PARITY : QUDA_EVEN_PARITY);
367  read_spinor_field(vec_infile.c_str(), &V[0], B[0]->Precision(), B[0]->X(), B[0]->SiteSubset(), parity,
368  B[0]->Ncolor(), B[0]->Nspin(), Nvec, 0, (char **)0);
369  } else {
370  errorQuda("No eigenspace file defined");
371  }
372 
373  printfQuda("Done loading vectors\n");
374  profile.TPSTOP(QUDA_PROFILE_IO);
375  profile.TPSTART(QUDA_PROFILE_INIT);
376  }
377 
379  {
380  if (RV->IsComposite()) errorQuda("Not a composite field");
381 
382  profile.TPSTOP(QUDA_PROFILE_INIT);
383  profile.TPSTART(QUDA_PROFILE_IO);
384 
385  std::string vec_outfile(param.eig_global.vec_outfile);
386  std::vector<ColorSpinorField*> &B = RV->Components();
387 
388  if (strcmp(param.eig_global.vec_outfile,"")!=0) {
389  const int Nvec = B.size();
390  printfQuda("Start saving %d vectors to %s\n", Nvec, vec_outfile.c_str());
391 
392  void **V = static_cast<void**>(safe_malloc(Nvec*sizeof(void*)));
393  for (int i=0; i<Nvec; i++) {
394  V[i] = B[i]->V();
395  if (V[i] == NULL) {
396  printfQuda("Could not allocate V[%d]\n", i);
397  }
398  }
399 
400  // assumes even parity if a single-parity field...
401  auto parity = (B[0]->SiteSubset() == QUDA_FULL_SITE_SUBSET ? QUDA_INVALID_PARITY : QUDA_EVEN_PARITY);
402  write_spinor_field(vec_outfile.c_str(), &V[0], B[0]->Precision(), B[0]->X(), B[0]->SiteSubset(), parity,
403  B[0]->Ncolor(), B[0]->Nspin(), Nvec, 0, (char **)0);
404 
405  host_free(V);
406  printfQuda("Done saving vectors\n");
407  }
408 
409  profile.TPSTOP(QUDA_PROFILE_IO);
410  profile.TPSTART(QUDA_PROFILE_INIT);
411  }
412 
413 } // namespace quda
void magma_Xgesv(void *sol, const int ldn, const int n, void *Mat, const int ldm, const int prec)
void magma_Xheev(void *Mat, const int n, const int ldm, void *evalues, const int prec)
CompositeColorSpinorField & Components()
static ColorSpinorField * Create(const ColorSpinorParam &param)
ColorSpinorField & Component(const int idx) const
void operator()(ColorSpinorField &out, ColorSpinorField &in)
Definition: deflation.cpp:126
Deflation(DeflationParam &param, TimeProfile &profile)
Definition: deflation.cpp:22
void increment(ColorSpinorField &V, int n_ev)
Definition: deflation.cpp:180
virtual ~Deflation()
Definition: deflation.cpp:61
void saveVectors(ColorSpinorField *RV)
Save the eigen space vectors in file.
Definition: deflation.cpp:378
double flops() const
Return the total flops done on this and all coarser levels.
Definition: deflation.cpp:74
void reduce(double tol, int max_n_ev)
Definition: deflation.cpp:258
void loadVectors(ColorSpinorField *RV)
Load the eigen space vectors from file.
Definition: deflation.cpp:344
QudaPrecision Precision() const
void Print()
Definition: timer.cpp:7
double tol
int V
Definition: host_utils.cpp:37
QudaParity parity
Definition: covdev_test.cpp:40
@ QUDA_SUMMARIZE
Definition: enum_quda.h:266
@ QUDA_VERBOSE
Definition: enum_quda.h:267
@ QUDA_FULL_SITE_SUBSET
Definition: enum_quda.h:333
@ QUDA_UKQCD_GAMMA_BASIS
Definition: enum_quda.h:369
@ QUDA_EVEN_PARITY
Definition: enum_quda.h:284
@ QUDA_INVALID_PARITY
Definition: enum_quda.h:284
@ QUDA_MEMORY_MAPPED
Definition: enum_quda.h:15
@ QUDA_MEMORY_DEVICE
Definition: enum_quda.h:13
@ QUDA_INC_EIGCG_INVERTER
Definition: enum_quda.h:117
@ QUDA_EIGCG_INVERTER
Definition: enum_quda.h:116
@ QUDA_DOUBLE_PRECISION
Definition: enum_quda.h:65
@ QUDA_EIGEN_EXTLIB
Definition: enum_quda.h:557
@ QUDA_MAGMA_EXTLIB
Definition: enum_quda.h:558
@ QUDA_ZERO_FIELD_CREATE
Definition: enum_quda.h:361
#define pool_pinned_malloc(size)
Definition: malloc_quda.h:172
#define safe_malloc(size)
Definition: malloc_quda.h:106
#define pool_pinned_free(ptr)
Definition: malloc_quda.h:173
#define host_free(ptr)
Definition: malloc_quda.h:115
unsigned long long bytes
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
Definition: blas_quda.h:45
void ax(double a, ColorSpinorField &x)
void zero(ColorSpinorField &a)
double norm2(const ColorSpinorField &a)
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: blas_quda.h:24
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:130
double norm2(const CloverField &a, bool inverse=false)
__device__ __host__ void zero(double &a)
Definition: float_vector.h:318
std::complex< double > Complex
Definition: quda_internal.h:86
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
@ QUDA_PROFILE_INIT
Definition: timer.h:106
@ QUDA_PROFILE_IO
Definition: timer.h:112
Stride< Dynamic, Dynamic > DynamicStride
Definition: deflation.cpp:17
::std::string string
Definition: gtest-port.h:891
ColorSpinorParam csParam
Definition: pack_test.cpp:25
QudaGaugeParam param
Definition: pack_test.cpp:18
void read_spinor_field(const char *filename, void *V[], QudaPrecision precision, const int *X, QudaSiteSubset subset, QudaParity parity, int nColor, int nSpin, int Nvec, int argc, char *argv[])
Definition: qio_field.h:24
void write_spinor_field(const char *filename, void *V[], QudaPrecision precision, const int *X, QudaSiteSubset subset, QudaParity parity, int nColor, int nSpin, int Nvec, int argc, char *argv[])
Definition: qio_field.h:31
char vec_outfile[256]
Definition: quda.h:525
QudaPrecision cuda_prec_ritz
Definition: quda.h:510
char vec_infile[256]
Definition: quda.h:522
QudaExtLibType extlib_type
Definition: quda.h:542
QudaInvertParam * invert_param
Definition: quda.h:413
QudaFieldLocation location
Definition: quda.h:33
QudaInverterType inv_type
Definition: quda.h:107
QudaEigParam & eig_global
Definition: deflation.h:17
Complex * matProj
Definition: deflation.h:31
DiracMatrix & matDeflation
Definition: deflation.h:28
ColorSpinorField * RV
Definition: deflation.h:22
double * invRitzVals
Definition: deflation.h:25
#define printfQuda(...)
Definition: util_quda.h:114
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define warningQuda(...)
Definition: util_quda.h:132
#define errorQuda(...)
Definition: util_quda.h:120