QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
deflation.cpp
Go to the documentation of this file.
1 #include <deflation.h>
2 #include <qio_field.h>
3 #include <string.h>
4 
5 #include <memory>
6 
7 #ifdef MAGMA_LIB
8 #include <blas_magma.h>
9 #endif
10 
11 #include <Eigen/Dense>
12 
13 namespace quda
14 {
15 
16  using namespace blas;
17  using namespace Eigen;
18  using DynamicStride = Stride<Dynamic, Dynamic>;
19 
20  static auto pinned_allocator = [] (size_t bytes ) { return static_cast<Complex*>(pool_pinned_malloc(bytes)); };
21  static auto pinned_deleter = [] (Complex *hptr) { pool_pinned_free(hptr); };
22 
24  param(param),
25  profile(profile),
26  r(nullptr),
27  Av(nullptr),
28  r_sloppy(nullptr),
29  Av_sloppy(nullptr)
30  {
31  // for reporting level 1 is the fine level but internally use level 0 for indexing
32  printfQuda("Creating deflation space of %d vectors.\n", param.tot_dim);
33 
34  if (param.eig_global.import_vectors) loadVectors(param.RV); // whether to load eigenvectors
35  // create aux fields
38  csParam.location = param.location;
39  csParam.mem_type = QUDA_MEMORY_DEVICE;
40  csParam.setPrecision(QUDA_DOUBLE_PRECISION);//accum fields always full precision
41 
42  if (csParam.location==QUDA_CUDA_FIELD_LOCATION) {
43  // all coarse GPU vectors use FLOAT2 ordering
44  csParam.fieldOrder = QUDA_FLOAT2_FIELD_ORDER;
45  if(csParam.nSpin != 1) csParam.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
46  }
47 
50 
51  if (param.eig_global.cuda_prec_ritz != QUDA_DOUBLE_PRECISION) { // allocate sloppy fields
52  csParam.setPrecision(param.eig_global.cuda_prec_ritz);//accum fields always full precision
54 
57  } else {
58  r_sloppy = r;
59  Av_sloppy = Av;
60  }
61 
62  printfQuda("Deflation space setup completed\n");
63  // now we can run through the verification if requested
65  // print out profiling information for the adaptive setup
66  if (getVerbosity() >= QUDA_SUMMARIZE) profile.Print();
67  }
68 
70  {
72  if (r_sloppy) delete r_sloppy;
73  if (Av_sloppy) delete Av_sloppy;
74  }
75 
76  if (r) delete r;
77  if (Av) delete Av;
78 
80  }
81 
82  double Deflation::flops() const
83  {
84  double flops = 0;//Do we need to report this?
85  //compute total flops for deflation application. Not sure we really need this.
86  return flops;
87  }
88 
93  {
94  const int nevs_to_print = param.cur_dim;
95  if (nevs_to_print == 0) errorQuda("Incorrect size of current deflation space");
96 
97  std::unique_ptr<Complex, decltype(pinned_deleter) > projm( pinned_allocator(param.ld*param.cur_dim * sizeof(Complex)), pinned_deleter);
98 
100 #ifdef MAGMA_LIB
101  memcpy(projm.get(), param.matProj, param.ld*param.cur_dim*sizeof(Complex));
102  std::unique_ptr<double[] > evals(new double[param.ld]);
103  magma_Xheev(projm.get(), param.cur_dim, param.ld, evals.get(), sizeof(Complex));
104 #else
105  errorQuda("MAGMA library was not built");
106 #endif
108  Map<MatrixXcd, Unaligned, DynamicStride> projm_(param.matProj, param.cur_dim, param.cur_dim, DynamicStride(param.ld, 1));
109  Map<MatrixXcd, Unaligned, DynamicStride> evecs_(projm.get(), param.cur_dim, param.cur_dim, DynamicStride(param.ld, 1));
110 
111  SelfAdjointEigenSolver<MatrixXcd> es_projm( projm_ );
112  evecs_.block(0, 0, param.cur_dim, param.cur_dim) = es_projm.eigenvectors();
113  } else {
114  errorQuda("Library type %d is currently not supported", param.eig_global.extlib_type);
115  }
116 
117  std::vector<ColorSpinorField*> rv(param.RV->Components().begin(), param.RV->Components().begin() + param.cur_dim);
118  std::vector<ColorSpinorField*> res;
119  res.push_back(r);
120 
121  for (int i = 0; i < nevs_to_print; i++) {
122  zero(*r);
123  blas::caxpy(&projm.get()[i * param.ld], rv, res); // multiblas
124  *r_sloppy = *r;
125  param.matDeflation(*Av_sloppy, *r_sloppy);
126  double3 dotnorm = cDotProductNormA(*r_sloppy, *Av_sloppy);
127  double eval = dotnorm.x / dotnorm.z;
128  blas::xpay(*Av_sloppy, -eval, *r_sloppy);
129  double relerr = sqrt(norm2(*r_sloppy) / dotnorm.z);
130  printfQuda("Eigenvalue %d: %1.12e Residual: %1.12e\n", i + 1, eval, relerr);
131  }
132  }
133 
135  {
138  errorQuda("Method is not implemented for %d inverter type", param.eig_global.invert_param->inv_type);
139 
140  if(param.cur_dim == 0) return;//nothing to do
141 
142  std::unique_ptr<Complex[] > vec(new Complex[param.ld]);
143 
144  double check_nrm2 = norm2(b);
145 
146  printfQuda("\nSource norm (gpu): %1.15e, curr deflation space dim = %d\n", sqrt(check_nrm2), param.cur_dim);
147 
148  ColorSpinorField *b_sloppy = param.RV->Precision() != b.Precision() ? r_sloppy : &b;
149  *b_sloppy = b;
150 
151  std::vector<ColorSpinorField*> rv_(param.RV->Components().begin(), param.RV->Components().begin()+param.cur_dim);
152  std::vector<ColorSpinorField*> in_;
153  in_.push_back(static_cast<ColorSpinorField*>(b_sloppy));
154 
155  blas::cDotProduct(vec.get(), rv_, in_);//<i, b>
156 
157  if (!param.use_inv_ritz) {
159 #ifdef MAGMA_LIB
160  magma_Xgesv(vec.get(), param.ld, param.cur_dim, param.matProj, param.ld, sizeof(Complex));
161 #else
162  errorQuda("MAGMA library was not built");
163 #endif
165  Map<MatrixXcd, Unaligned, DynamicStride> projm_(param.matProj, param.cur_dim, param.cur_dim,
166  DynamicStride(param.ld, 1));
167  Map<VectorXcd, Unaligned> vec_(vec.get(), param.cur_dim);
168 
169  VectorXcd vec2_(param.cur_dim);
170  vec2_ = projm_.fullPivHouseholderQr().solve(vec_);
171  vec_ = vec2_;
172  } else {
173  errorQuda("Library type %d is currently not supported", param.eig_global.extlib_type);
174  }
175  } else {
176  for(int i = 0; i < param.cur_dim; i++) vec[i] *= param.invRitzVals[i];
177  }
178 
179  std::vector<ColorSpinorField*> out_;
180  out_.push_back(&x);
181 
182  blas::caxpy(vec.get(), rv_, out_); //multiblas
183 
184  check_nrm2 = norm2(x);
185  printfQuda("\nDeflated guess spinor norm (gpu): %1.15e\n", sqrt(check_nrm2));
186  }
187 
189  {
192  errorQuda("Method is not implemented for %d inverter type", param.eig_global.invert_param->inv_type);
193 
194  if( nev == 0 ) return; //nothing to do
195 
196  const int first_idx = param.cur_dim;
197 
198  if (param.RV->CompositeDim() < (first_idx + nev) || param.tot_dim < (first_idx + nev)) {
199  warningQuda("\nNot enough space to add %d vectors. Keep deflation space unchanged.\n", nev);
200  return;
201  }
202 
203  for(int i = 0; i < nev; i++) blas::copy(param.RV->Component(first_idx+i), Vm.Component(i));
204 
205  printfQuda("\nConstruct projection matrix..\n");
206 
207  // Block MGS orthogonalization
208  // The degree to which we interpolate between modified GramSchmidt and GramSchmidt (performance vs stability)
209  const int cdot_pipeline_length = 4;
210 
211  for (int i = first_idx; i < (first_idx + nev); i++) {
212  std::unique_ptr<Complex[]> alpha(new Complex[i]);
213 
215  *accum = param.RV->Component(i);
216 
217  int offset = 0;
218  while (offset < i) {
219  const int local_length = (i - offset) > cdot_pipeline_length ? cdot_pipeline_length : (i - offset);
220 
221  std::vector<ColorSpinorField *> vj_(param.RV->Components().begin() + offset,
222  param.RV->Components().begin() + offset + local_length);
223  std::vector<ColorSpinorField *> vi_;
224  vi_.push_back(accum);
225 
226  blas::cDotProduct(alpha.get(), vj_, vi_);
227  for (int j = 0; j < local_length; j++) alpha[j] = -alpha[j];
228  blas::caxpy(alpha.get(), vj_, vi_); // i-<j,i>j
229 
230  offset += cdot_pipeline_length;
231  }
232 
233  alpha[0] = blas::norm2(*accum);
234 
235  if (alpha[0].real() > 1e-16)
236  blas::ax(1.0 / sqrt(alpha[0].real()), *accum);
237  else
238  errorQuda("Cannot orthogonalize %dth vector", i);
239 
240  param.RV->Component(i) = *accum;
241 
242  param.matDeflation(*Av_sloppy, param.RV->Component(i)); // precision must match!
243  // load diagonal:
244  *Av = *Av_sloppy;
245  param.matProj[i * param.ld + i] = cDotProduct(*accum, *Av);
246 
247  if (i > 0) {
248  std::vector<ColorSpinorField *> vj_(param.RV->Components().begin(), param.RV->Components().begin() + i);
249  std::vector<ColorSpinorField *> av_;
250  av_.push_back(Av_sloppy);
251 
252  blas::cDotProduct(alpha.get(), vj_, av_);
253 
254  for (int j = 0; j < i; j++) {
255  param.matProj[i * param.ld + j] = alpha[j];
256  param.matProj[j * param.ld + i] = conj(alpha[j]); // conj
257  }
258  }
259  }
260 
261  param.cur_dim += nev;
262 
263  printfQuda("\nNew curr deflation space dim = %d\n", param.cur_dim);
264  }
265 
266  void Deflation::reduce(double tol, int max_nev)
267  {
268  if (param.cur_dim < max_nev) {
269  printf("\nToo big number of eigenvectors was requested, switched to maximum available number %d\n", param.cur_dim);
270  max_nev = param.cur_dim;
271  }
272 
273  std::unique_ptr<double[]> evals(new double[param.cur_dim]);
274  std::unique_ptr<Complex, decltype(pinned_deleter)> projm(
276 
277  memcpy(projm.get(), param.matProj, param.ld * param.cur_dim * sizeof(Complex));
278 
280 #ifdef MAGMA_LIB
281  magma_Xheev(projm.get(), param.cur_dim, param.ld, evals.get(), sizeof(Complex));
282 #else
283  errorQuda("MAGMA library was not built");
284 #endif
286  Map<MatrixXcd, Unaligned, DynamicStride> projm_(projm.get(), param.cur_dim, param.cur_dim,
287  DynamicStride(param.ld, 1));
288  Map<VectorXd, Unaligned> evals_(evals.get(), param.cur_dim);
289  SelfAdjointEigenSolver<MatrixXcd> es(projm_);
290  projm_ = es.eigenvectors();
291  evals_ = es.eigenvalues();
292  } else {
293  errorQuda("Library type %d is currently not supported", param.eig_global.extlib_type);
294  }
295 
296  // reset projection matrix, now we will use inverse ritz values when deflate an initial guess:
297  param.use_inv_ritz = true;
298  for (int i = 0; i < param.cur_dim; i++) {
299  if (fabs(evals[i]) > 1e-16) {
300  param.invRitzVals[i] = 1.0 / evals[i];
301  } else {
302  errorQuda("Cannot invert Ritz value");
303  }
304  }
305 
307  // Create an eigenvector set:
309  // csParam.setPrecision(search_space_prec);//eigCG internal search space precision: must be adjustable.
310  csParam.is_composite = true;
311  csParam.composite_dim = max_nev;
312 
313  csParam.mem_type = QUDA_MEMORY_MAPPED;
314  std::unique_ptr<ColorSpinorField> buff(ColorSpinorField::Create(csParam));
315 
316  int idx = 0;
317  double relerr = 0.0;
318  bool do_residual_check = (tol != 0.0);
319 
320  while ((relerr < tol) && (idx < max_nev)) {
321  std::vector<ColorSpinorField *> rv(param.RV->Components().begin(), param.RV->Components().begin() + param.cur_dim);
322  std::vector<ColorSpinorField *> res;
323  res.push_back(r);
324 
325  blas::zero(*r);
326  blas::caxpy(&projm.get()[idx * param.ld], rv, res); // multiblas
327  blas::copy(buff->Component(idx), *r);
328 
329  if (do_residual_check) { // if tol=0.0 then disable relative residual norm check
330  *r_sloppy = *r;
332  double3 dotnorm = cDotProductNormA(*r_sloppy, *Av_sloppy);
333  double eval = dotnorm.x / dotnorm.z;
334  blas::xpay(*Av_sloppy, -eval, *r_sloppy);
335  relerr = sqrt(norm2(*r_sloppy) / dotnorm.z);
336  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Eigenvalue: %1.12e Residual: %1.12e\n", eval, relerr);
337  }
338 
339  idx++;
340  }
341 
342  printfQuda("\nReserved eigenvectors: %d\n", idx);
343  // copy all the stuff to cudaRitzVectors set:
344  for (int i = 0; i < idx; i++) blas::copy(param.RV->Component(i), buff->Component(i));
345 
346  // reset current dimension:
347  param.cur_dim = idx; // idx never exceeds cur_dim.
348  param.tot_dim = idx;
349  }
350 
351  //supports seperate reading or single file read
353  {
354  if (RV->IsComposite()) errorQuda("Not a composite field");
355 
356  profile.TPSTOP(QUDA_PROFILE_INIT);
357  profile.TPSTART(QUDA_PROFILE_IO);
358 
359  std::string vec_infile(param.eig_global.vec_infile);
360  std::vector<ColorSpinorField *> &B = RV->Components();
361 
362  const int Nvec = B.size();
363  printfQuda("Start loading %d vectors from %s\n", Nvec, vec_infile.c_str());
364 
365  void **V = new void*[Nvec];
366  for (int i = 0; i < Nvec; i++) {
367  V[i] = B[i]->V();
368  if (V[i] == NULL) {
369  printfQuda("Could not allocate V[%d]\n", i);
370  }
371  }
372 
373  if (strcmp(vec_infile.c_str(),"")!=0) {
374  read_spinor_field(vec_infile.c_str(), &V[0], B[0]->Precision(), B[0]->X(),
375  B[0]->Ncolor(), B[0]->Nspin(), Nvec, 0, (char**)0);
376  } else {
377  errorQuda("No eigenspace file defined");
378  }
379 
380  printfQuda("Done loading vectors\n");
381  profile.TPSTOP(QUDA_PROFILE_IO);
382  profile.TPSTART(QUDA_PROFILE_INIT);
383  }
384 
386  {
387  if (RV->IsComposite()) errorQuda("Not a composite field");
388 
389  profile.TPSTOP(QUDA_PROFILE_INIT);
390  profile.TPSTART(QUDA_PROFILE_IO);
391 
392  std::string vec_outfile(param.eig_global.vec_outfile);
393  std::vector<ColorSpinorField*> &B = RV->Components();
394 
395  if (strcmp(param.eig_global.vec_outfile,"")!=0) {
396  const int Nvec = B.size();
397  printfQuda("Start saving %d vectors to %s\n", Nvec, vec_outfile.c_str());
398 
399  void **V = static_cast<void**>(safe_malloc(Nvec*sizeof(void*)));
400  for (int i=0; i<Nvec; i++) {
401  V[i] = B[i]->V();
402  if (V[i] == NULL) {
403  printfQuda("Could not allocate V[%d]\n", i);
404  }
405  }
406 
407  write_spinor_field(vec_outfile.c_str(), &V[0], B[0]->Precision(), B[0]->X(),
408  B[0]->Ncolor(), B[0]->Nspin(), Nvec, 0, (char**)0);
409 
410  host_free(V);
411  printfQuda("Done saving vectors\n");
412  }
413 
414  profile.TPSTOP(QUDA_PROFILE_IO);
415  profile.TPSTART(QUDA_PROFILE_INIT);
416  }
417 
418 } // namespace quda
void ax(double a, ColorSpinorField &x)
Definition: blas_quda.cu:508
#define pool_pinned_free(ptr)
Definition: malloc_quda.h:128
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)
Definition: reduce_quda.cu:778
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define errorQuda(...)
Definition: util_quda.h:121
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:721
QudaInverterType inv_type
Definition: quda.h:103
#define host_free(ptr)
Definition: malloc_quda.h:71
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
Definition: reduce_quda.cu:764
CompositeColorSpinorField & Components()
static ColorSpinorField * Create(const ColorSpinorParam &param)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: copy_quda.cu:355
DiracMatrix & matDeflation
Definition: deflation.h:28
ColorSpinorField & Component(const int idx) const
Complex * matProj
Definition: deflation.h:31
void reduce(double tol, int max_nev)
Definition: deflation.cpp:266
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
Definition: blas_quda.h:37
double norm2(const CloverField &a, bool inverse=false)
QudaGaugeParam param
Definition: pack_test.cpp:17
Deflation(DeflationParam &param, TimeProfile &profile)
Definition: deflation.cpp:23
QudaInvertParam * invert_param
Definition: quda.h:381
double tol
Definition: test_util.cpp:1656
void magma_Xheev(void *Mat, const int n, const int ldm, void *evalues, const int prec)
Definition: blas_magma.cu:296
QudaBoolean run_verify
Definition: quda.h:456
DeflationParam & param
Definition: deflation.h:82
Stride< Dynamic, Dynamic > DynamicStride
Definition: deflation.cpp:18
ColorSpinorParam csParam
Definition: pack_test.cpp:24
void Print()
Definition: timer.cpp:7
#define warningQuda(...)
Definition: util_quda.h:133
virtual ~Deflation()
Definition: deflation.cpp:69
void saveVectors(ColorSpinorField *RV)
Save the eigen space vectors in file.
Definition: deflation.cpp:385
std::complex< double > Complex
Definition: quda_internal.h:46
TimeProfile profile
Definition: deflation.h:85
QudaFieldLocation location
Definition: deflation.h:46
QudaBoolean import_vectors
Definition: quda.h:444
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.cu:512
double flops() const
Return the total flops done on this and all coarser levels.
Definition: deflation.cpp:82
#define safe_malloc(size)
Definition: malloc_quda.h:66
void zero(ColorSpinorField &a)
Definition: blas_quda.cu:472
int V
Definition: test_util.cpp:27
ColorSpinorField * Av_sloppy
Definition: deflation.h:97
#define pool_pinned_malloc(size)
Definition: malloc_quda.h:127
char vec_outfile[256]
Definition: quda.h:462
void operator()(ColorSpinorField &out, ColorSpinorField &in)
Definition: deflation.cpp:134
int nev
Definition: test_util.cpp:1707
ColorSpinorField * RV
Definition: deflation.h:22
void loadVectors(ColorSpinorField *RV)
Load the eigen space vectors from file.
Definition: deflation.cpp:352
ColorSpinorField * Av
Definition: deflation.h:91
QudaExtLibType extlib_type
Definition: quda.h:471
#define printfQuda(...)
Definition: util_quda.h:115
void write_spinor_field(const char *filename, void *V[], QudaPrecision precision, const int *X, int nColor, int nSpin, int Nvec, int argc, char *argv[])
Definition: qio_field.h:29
void increment(ColorSpinorField &V, int nev)
Definition: deflation.cpp:188
ColorSpinorField * r_sloppy
Definition: deflation.h:94
QudaEigParam & eig_global
Definition: deflation.h:17
ColorSpinorField * r
Definition: deflation.h:88
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:130
QudaPrecision Precision() const
double * invRitzVals
Definition: deflation.h:25
QudaPrecision cuda_prec_ritz
Definition: quda.h:447
__device__ __host__ void zero(vector_type< scalar, n > &v)
Definition: cub_helper.cuh:54
void read_spinor_field(const char *filename, void *V[], QudaPrecision precision, const int *X, int nColor, int nSpin, int Nvec, int argc, char *argv[])
Definition: qio_field.h:24
static auto pinned_allocator
Definition: deflation.cpp:20
static auto pinned_deleter
Definition: deflation.cpp:21
char vec_infile[256]
Definition: quda.h:459
void magma_Xgesv(void *sol, const int ldn, const int n, void *Mat, const int ldm, const int prec)
Definition: blas_magma.cu:265
unsigned long long bytes
Definition: blas_quda.cu:23