QUDA  0.9.0
deflation.cpp
Go to the documentation of this file.
1 #include <deflation.h>
2 #include <qio_field.h>
3 #include <string.h>
4 
5 #include <memory>
6 
7 
8 
9 #ifdef MAGMA_LIB
10 #include <blas_magma.h>
11 #endif
12 
13 #include <Eigen/Dense>
14 
15 
16 namespace quda {
17 
18  using namespace blas;
19 
20  using namespace Eigen;
21 
22  using DynamicStride = Stride<Dynamic, Dynamic>;
23 
24  static auto pinned_allocator = [] (size_t bytes ) { return static_cast<Complex*>(pool_pinned_malloc(bytes)); };
25  static auto pinned_deleter = [] (Complex *hptr) { pool_pinned_free(hptr); };
26 
27 
28  //static bool debug = false;
29 
31  : param(param), profile(profile),
32  r(nullptr), Av(nullptr), r_sloppy(nullptr), Av_sloppy(nullptr) {
33 
34 
35  // for reporting level 1 is the fine level but internally use level 0 for indexing
36  printfQuda("Creating deflation space of %d vectors.\n", param.tot_dim);
37 
38  if( param.eig_global.import_vectors ) loadVectors(param.RV);//whether to load eigenvectors
39  // create aux fields
42  csParam.location = param.location;
43  csParam.mem_type = QUDA_MEMORY_DEVICE;
44  csParam.setPrecision(QUDA_DOUBLE_PRECISION);//accum fields always full precision
45 
46  if (csParam.location==QUDA_CUDA_FIELD_LOCATION) {
47  // all coarse GPU vectors use FLOAT2 ordering
48  csParam.fieldOrder = QUDA_FLOAT2_FIELD_ORDER;
49  if(csParam.nSpin != 1) csParam.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
50  }
51 
54 
55  if(param.eig_global.cuda_prec_ritz != QUDA_DOUBLE_PRECISION ) { //allocate sloppy fields
56 
57  csParam.setPrecision(param.eig_global.cuda_prec_ritz);//accum fields always full precision
59 
62 
63  } else {
64  r_sloppy = r;
65  Av_sloppy = Av;
66  }
67 
68 
69  printfQuda("Deflation space setup completed\n");
70  // now we can run through the verification if requested
72  // print out profiling information for the adaptive setup
74  }
75 
77 
79  if (r_sloppy) delete r_sloppy;
80  if (Av_sloppy) delete Av_sloppy;
81  }
82 
83  if (r) delete r;
84  if (Av) delete Av;
85 
87  }
88 
89  double Deflation::flops() const {
90  double flops = 0;//Do we need to report this?
91 
92  //compute total flops for deflation application. Not sure we really need this.
93  return flops;
94  }
95 
101  const int nevs_to_print = param.cur_dim;
102  if(nevs_to_print == 0) errorQuda("\nIncorrect size of current deflation space. \n");
103 
104  std::unique_ptr<Complex, decltype(pinned_deleter) > projm( pinned_allocator(param.ld*param.cur_dim * sizeof(Complex)), pinned_deleter);
105 
107 #ifdef MAGMA_LIB
108  memcpy(projm.get(), param.matProj, param.ld*param.cur_dim*sizeof(Complex));
109  std::unique_ptr<double[] > evals(new double[param.ld]);
110  magma_Xheev(projm.get(), param.cur_dim, param.ld, evals.get(), sizeof(Complex));
111 #else
112  errorQuda("MAGMA library was not built.\n");
113 #endif
115  Map<MatrixXcd, Unaligned, DynamicStride> projm_(param.matProj, param.cur_dim, param.cur_dim, DynamicStride(param.ld, 1));
116  Map<MatrixXcd, Unaligned, DynamicStride> evecs_(projm.get(), param.cur_dim, param.cur_dim, DynamicStride(param.ld, 1));
117 
118  SelfAdjointEigenSolver<MatrixXcd> es_projm( projm_ );
119  evecs_.block(0, 0, param.cur_dim, param.cur_dim) = es_projm.eigenvectors();
120  } else {
121  errorQuda("Library type %d is currently not supported.\n", param.eig_global.extlib_type);
122  }
123 
124  std::vector<ColorSpinorField*> rv(param.RV->Components().begin(), param.RV->Components().begin() + param.cur_dim);
125  std::vector<ColorSpinorField*> res;
126  res.push_back(r);
127 
128  for(int i = 0; i < nevs_to_print; i++)
129  {
130  zero(*r);
131 
132  blas::caxpy(&projm.get()[i*param.ld], rv, res);//multiblas
133 
134  *r_sloppy = *r;
135 
137 
138  double3 dotnorm = cDotProductNormA(*r_sloppy, *Av_sloppy);
139 
140  double eval = dotnorm.x / dotnorm.z;
141 
142  blas::xpay(*Av_sloppy, -eval, *r_sloppy );
143 
144  double relerr = sqrt( norm2(*r_sloppy) / dotnorm.z );
145 
146  printfQuda("Eigenvalue %d: %1.12e Residual: %1.12e\n", i+1, eval, relerr);
147  }
148 
149  return;
150  }
151 
154  errorQuda("\nMethod is not implemented for %d inverter type.\n", param.eig_global.invert_param->inv_type);
155 
156  if(param.cur_dim == 0) return;//nothing to do
157 
158  std::unique_ptr<Complex[] > vec(new Complex[param.ld]);
159 
160  double check_nrm2 = norm2(b);
161 
162  printfQuda("\nSource norm (gpu): %1.15e, curr deflation space dim = %d\n", sqrt(check_nrm2), param.cur_dim);
163 
164  ColorSpinorField *b_sloppy = param.RV->Precision() != b.Precision() ? r_sloppy : &b;
165  *b_sloppy = b;
166 
167  std::vector<ColorSpinorField*> rv_(param.RV->Components().begin(), param.RV->Components().begin()+param.cur_dim);
168  std::vector<ColorSpinorField*> in_;
169  in_.push_back(static_cast<ColorSpinorField*>(b_sloppy));
170 
171  blas::cDotProduct(vec.get(), rv_, in_);//<i, b>
172 
173  if(!param.use_inv_ritz)
174  {
176 #ifdef MAGMA_LIB
177  magma_Xgesv(vec.get(), param.ld, param.cur_dim, param.matProj, param.ld, sizeof(Complex));
178 #else
179  errorQuda("MAGMA library was not built.\n");
180 #endif
181  } else if( param.eig_global.extlib_type == QUDA_EIGEN_EXTLIB ) {
182  Map<MatrixXcd, Unaligned, DynamicStride> projm_(param.matProj, param.cur_dim, param.cur_dim, DynamicStride(param.ld, 1));
183  Map<VectorXcd, Unaligned> vec_ (vec.get(), param.cur_dim);
184 
185  VectorXcd vec2_(param.cur_dim);
186  vec2_ = projm_.fullPivHouseholderQr().solve(vec_);
187 
188  vec_ = vec2_;
189  } else {
190  errorQuda("Library type %d is currently not supported.\n", param.eig_global.extlib_type);
191  }
192  } else {
193  for(int i = 0; i < param.cur_dim; i++) vec[i] *= param.invRitzVals[i];
194  }
195 
196  std::vector<ColorSpinorField*> out_;
197  out_.push_back(&x);
198 
199  blas::caxpy(vec.get(), rv_, out_); //multiblas
200 
201  check_nrm2 = norm2(x);
202  printfQuda("\nDeflated guess spinor norm (gpu): %1.15e\n", sqrt(check_nrm2));
203 
204  return;
205  }
206 
209  errorQuda("\nMethod is not implemented for %d inverter type.\n", param.eig_global.invert_param->inv_type);
210 
211  if( nev == 0 ) return; //nothing to do
212 
213  const int first_idx = param.cur_dim;
214 
215  if(param.RV->CompositeDim() < (first_idx+nev) || param.tot_dim < (first_idx+nev)) {
216  warningQuda("\nNot enough space to add %d vectors. Keep deflation space unchanged.\n", nev);
217  return;
218  }
219 
220  for(int i = 0; i < nev; i++) blas::copy(param.RV->Component(first_idx+i), Vm.Component(i));
221 
222  printfQuda("\nConstruct projection matrix..\n");
223 
224  // Block MGS orthogonalization
225  // The degree to which we interpolate between modified GramSchmidt and GramSchmidt (performance vs stability)
226  const int cdot_pipeline_length = 4;
227 
228  for(int i = first_idx; i < (first_idx + nev); i++)
229  {
230  std::unique_ptr<Complex[] > alpha(new Complex[i]);
231 
233  *accum = param.RV->Component(i);
234 
235  int offset = 0;
236  while (offset < i) {
237 
238  const int local_length = (i - offset) > cdot_pipeline_length ? cdot_pipeline_length : (i - offset);
239 
240  std::vector<ColorSpinorField*> vj_(param.RV->Components().begin()+offset, param.RV->Components().begin()+offset+local_length);
241  std::vector<ColorSpinorField*> vi_;
242  vi_.push_back(accum);
243 
244  blas::cDotProduct(alpha.get(), vj_, vi_);
245  for (int j = 0; j < local_length; j++) alpha[j] = -alpha[j];
246 
247  blas::caxpy(alpha.get(), vj_, vi_); //i-<j,i>j
248 
249  offset += cdot_pipeline_length;
250  }
251 
252  alpha[0] = blas::norm2(*accum);
253 
254  if(alpha[0].real() > 1e-16) blas::ax(1.0 /sqrt(alpha[0].real()), *accum);
255  else errorQuda("\nCannot orthogonalize %dth vector\n", i);
256 
257  param.RV->Component(i) = *accum;
258 
259  param.matDeflation(*Av_sloppy, param.RV->Component(i));//precision must match!
260  //load diagonal:
261  *Av = *Av_sloppy;
262  param.matProj[i*param.ld+i] = cDotProduct(*accum, *Av);
263 
264  if (i>0) {
265  std::vector<ColorSpinorField*> vj_(param.RV->Components().begin(), param.RV->Components().begin()+i);
266  std::vector<ColorSpinorField*> av_;
267  av_.push_back(Av_sloppy);
268 
269  blas::cDotProduct(alpha.get(), vj_, av_);
270 
271  for (int j = 0; j < i; j++) {
272  param.matProj[i*param.ld+j] = alpha[j];
273  param.matProj[j*param.ld+i] = conj(alpha[j]);//conj
274  }
275  }
276  }
277 
278  param.cur_dim += nev;
279 
280  printfQuda("\nNew curr deflation space dim = %d\n", param.cur_dim);
281  return;
282  }
283 
284 
285  void Deflation::reduce(double tol, int max_nev) {
286  if(param.cur_dim < max_nev)
287  {
288  printf("\nToo big number of eigenvectors was requested, switched to maximum available number %d\n", param.cur_dim);
289  max_nev = param.cur_dim;
290  }
291 
292  std::unique_ptr<double[] > evals(new double[param.cur_dim]);
293  std::unique_ptr<Complex, decltype(pinned_deleter) > projm( pinned_allocator(param.ld*param.cur_dim * sizeof(Complex)), pinned_deleter);
294 
295  memcpy(projm.get(), param.matProj, param.ld*param.cur_dim*sizeof(Complex));
296 
298 #ifdef MAGMA_LIB
299  magma_Xheev(projm.get(), param.cur_dim, param.ld, evals.get(), sizeof(Complex));
300 #else
301  errorQuda("MAGMA library was not built.\n");
302 #endif
304  Map<MatrixXcd, Unaligned, DynamicStride> projm_(projm.get(), param.cur_dim, param.cur_dim, DynamicStride(param.ld, 1));
305  Map<VectorXd, Unaligned> evals_(evals.get(), param.cur_dim);
306 
307  SelfAdjointEigenSolver<MatrixXcd> es(projm_);
308 
309  projm_ = es.eigenvectors();
310  evals_ = es.eigenvalues();
311  } else {
312  errorQuda("Library type %d is currently not supported.\n", param.eig_global.extlib_type);
313  }
314 
315  //reset projection matrix, now we will use inverse ritz values when deflate an initial guess:
316  param.use_inv_ritz = true;
317  for(int i = 0; i < param.cur_dim; i++)
318  {
319  if(fabs(evals[i]) > 1e-16)
320  {
321  param.invRitzVals[i] = 1.0 / evals[i];
322  }
323  else
324  {
325  errorQuda("\nCannot invert Ritz value.\n");
326  }
327  }
328 
330  //Create an eigenvector set:
332  //csParam.setPrecision(search_space_prec);//eigCG internal search space precision: must be adjustable.
333  csParam.is_composite = true;
334  csParam.composite_dim = max_nev;
335 
336  csParam.mem_type = QUDA_MEMORY_MAPPED;
337  std::unique_ptr<ColorSpinorField> buff(ColorSpinorField::Create(csParam));
338 
339  int idx = 0;
340  double relerr = 0.0;
341  bool do_residual_check = (tol != 0.0);
342 
343  while ((relerr < tol) && (idx < max_nev))
344  {
345  std::vector<ColorSpinorField*> rv(param.RV->Components().begin(), param.RV->Components().begin() + param.cur_dim);
346  std::vector<ColorSpinorField*> res;
347  res.push_back(r);
348 
349  blas::zero(*r);
350  blas::caxpy(&projm.get()[idx*param.ld], rv, res);//multiblas
351  blas::copy(buff->Component(idx), *r);
352 
353  if( do_residual_check ) //if tol=0.0 then disable relative residual norm check
354  {
355  *r_sloppy = *r;
357 
358  double3 dotnorm = cDotProductNormA(*r_sloppy, *Av_sloppy);
359 
360  double eval = dotnorm.x / dotnorm.z;
361 
362  blas::xpay(*Av_sloppy, -eval, *r_sloppy );
363 
364  relerr = sqrt( norm2(*r_sloppy) / dotnorm.z );
365 
366  if(getVerbosity() >= QUDA_VERBOSE) printfQuda("Eigenvalue: %1.12e Residual: %1.12e\n", eval, relerr);
367  }
368 
369  idx += 1;
370  }
371 
372  printfQuda("\nReserved eigenvectors: %d\n", idx);
373  //copy all the stuff to cudaRitzVectors set:
374  for(int i = 0; i < idx; i++) blas::copy(param.RV->Component(i), buff->Component(i));
375 
376  //reset current dimension:
377  param.cur_dim = idx;//idx never exceeds cur_dim.
378  param.tot_dim = idx;
379 
380  return;
381  }
382 
383  //supports seperate reading or single file read
385 
386  if(RV->IsComposite()) errorQuda("\nNot a composite field.\n");
387 
388  profile.TPSTOP(QUDA_PROFILE_INIT);
389  profile.TPSTART(QUDA_PROFILE_IO);
390 
391  std::string vec_infile(param.eig_global.vec_infile);
392 
393  std::vector<ColorSpinorField*> &B = RV->Components();
394 
395  const int Nvec = B.size();
396  printfQuda("Start loading %d vectors from %s\n", Nvec, vec_infile.c_str());
397 
398  void **V = new void*[Nvec];
399  for (int i=0; i<Nvec; i++) {
400  V[i] = B[i]->V();
401  if (V[i] == NULL) {
402  printfQuda("Could not allocate V[%d]\n", i);
403  }
404  }
405 
406  if (strcmp(vec_infile.c_str(),"")!=0) {
407  read_spinor_field(vec_infile.c_str(), &V[0], B[0]->Precision(), B[0]->X(),
408  B[0]->Ncolor(), B[0]->Nspin(), Nvec, 0, (char**)0);
409  } else {
410  errorQuda("No eigenspace file defined.");
411  }
412 
413  printfQuda("Done loading vectors\n");
414  profile.TPSTOP(QUDA_PROFILE_IO);
415  profile.TPSTART(QUDA_PROFILE_INIT);
416 
417  return;
418  }
419 
421  if(RV->IsComposite()) errorQuda("\nNot a composite field.\n");
422 
423  profile.TPSTOP(QUDA_PROFILE_INIT);
424  profile.TPSTART(QUDA_PROFILE_IO);
425 
427 
428 
429  std::vector<ColorSpinorField*> &B = RV->Components();
430 
431  if (strcmp(param.eig_global.vec_outfile,"")!=0) {
432  const int Nvec = B.size();
433  printfQuda("Start saving %d vectors to %s\n", Nvec, vec_outfile.c_str());
434 
435  void **V = static_cast<void**>(safe_malloc(Nvec*sizeof(void*)));
436  for (int i=0; i<Nvec; i++) {
437  V[i] = B[i]->V();
438  if (V[i] == NULL) {
439  printfQuda("Could not allocate V[%d]\n", i);
440  }
441  }
442 
443  write_spinor_field(vec_outfile.c_str(), &V[0], B[0]->Precision(), B[0]->X(),
444  B[0]->Ncolor(), B[0]->Nspin(), Nvec, 0, (char**)0);
445 
446  host_free(V);
447  printfQuda("Done saving vectors\n");
448  }
449 
450  profile.TPSTOP(QUDA_PROFILE_IO);
451  profile.TPSTART(QUDA_PROFILE_INIT);
452 
453  return;
454  }
455 
456 }
void xpay(ColorSpinorField &x, const double &a, ColorSpinorField &y)
Definition: blas_quda.cu:173
#define pool_pinned_free(ptr)
Definition: malloc_quda.h:116
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)
Definition: reduce_quda.cu:572
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
#define errorQuda(...)
Definition: util_quda.h:90
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:241
QudaInverterType inv_type
Definition: quda.h:94
#define host_free(ptr)
Definition: malloc_quda.h:59
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:105
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
Definition: reduce_quda.cu:500
std::complex< double > Complex
Definition: eig_variables.h:13
CompositeColorSpinorField & Components()
static ColorSpinorField * Create(const ColorSpinorParam &param)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: copy_quda.cu:263
void ax(const double &a, ColorSpinorField &x)
Definition: blas_quda.cu:209
DiracMatrix & matDeflation
Definition: deflation.h:28
ColorSpinorField & Component(const int idx) const
Complex * matProj
Definition: deflation.h:31
void reduce(double tol, int max_nev)
Definition: deflation.cpp:285
double norm2(const CloverField &a, bool inverse=false)
size_t size_t offset
QudaGaugeParam param
Definition: pack_test.cpp:17
#define b
int strcmp(const char *__s1, const char *__s2)
Deflation(DeflationParam &param, TimeProfile &profile)
Definition: deflation.cpp:30
QudaInvertParam * invert_param
Definition: quda.h:346
double tol
Definition: test_util.cpp:1647
void magma_Xheev(void *Mat, const int n, const int ldm, void *evalues, const int prec)
Definition: blas_magma.cu:299
int printf(const char *,...) __attribute__((__format__(__printf__
char vec_infile[]
Definition: test_util.cpp:1636
QudaBoolean run_verify
Definition: quda.h:373
DeflationParam & param
Definition: deflation.h:82
Stride< Dynamic, Dynamic > DynamicStride
Definition: deflation.cpp:22
ColorSpinorParam csParam
Definition: pack_test.cpp:24
void Print()
Definition: timer.cpp:6
int V
Definition: test_util.cpp:28
#define warningQuda(...)
Definition: util_quda.h:101
virtual ~Deflation()
Definition: deflation.cpp:76
char vec_outfile[]
Definition: test_util.cpp:1637
void saveVectors(ColorSpinorField *RV)
Save the eigen space vectors in file.
Definition: deflation.cpp:420
TimeProfile profile
Definition: deflation.h:85
QudaFieldLocation location
Definition: deflation.h:46
QudaBoolean import_vectors
Definition: quda.h:361
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.cu:246
double flops() const
Return the total flops done on this and all coarser levels.
Definition: deflation.cpp:89
void * memcpy(void *__dst, const void *__src, size_t __n)
#define safe_malloc(size)
Definition: malloc_quda.h:54
void zero(ColorSpinorField &a)
Definition: blas_quda.cu:45
ColorSpinorField * Av_sloppy
Definition: deflation.h:97
#define pool_pinned_malloc(size)
Definition: malloc_quda.h:115
char vec_outfile[256]
Definition: quda.h:379
void operator()(ColorSpinorField &out, ColorSpinorField &in)
Definition: deflation.cpp:152
int nev
Definition: test_util.cpp:1669
ColorSpinorField * RV
Definition: deflation.h:22
void loadVectors(ColorSpinorField *RV)
Load the eigen space vectors from file.
Definition: deflation.cpp:384
ColorSpinorField * Av
Definition: deflation.h:91
QudaExtLibType extlib_type
Definition: quda.h:388
#define printfQuda(...)
Definition: util_quda.h:84
double fabs(double)
void write_spinor_field(const char *filename, void *V[], QudaPrecision precision, const int *X, int nColor, int nSpin, int Nvec, int argc, char *argv[])
Definition: qio_field.h:22
void increment(ColorSpinorField &V, int nev)
Definition: deflation.cpp:207
ColorSpinorField * r_sloppy
Definition: deflation.h:94
QudaEigParam & eig_global
Definition: deflation.h:17
ColorSpinorField * r
Definition: deflation.h:88
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:115
QudaPrecision Precision() const
double * invRitzVals
Definition: deflation.h:25
QudaPrecision cuda_prec_ritz
Definition: quda.h:364
__device__ __host__ void zero(vector_type< scalar, n > &v)
Definition: cub_helper.cuh:82
void read_spinor_field(const char *filename, void *V[], QudaPrecision precision, const int *X, int nColor, int nSpin, int Nvec, int argc, char *argv[])
Definition: qio_field.h:17
static auto pinned_allocator
Definition: deflation.cpp:24
static auto pinned_deleter
Definition: deflation.cpp:25
char vec_infile[256]
Definition: quda.h:376
void magma_Xgesv(void *sol, const int ldn, const int n, void *Mat, const int ldm, const int prec)
Definition: blas_magma.cu:268
unsigned long long bytes
Definition: blas_quda.cu:43