QUDA  0.9.0
milc_interface.cpp
Go to the documentation of this file.
1 #include <cstdio>
2 #include <cstdlib>
3 #include <iostream>
4 #include <quda.h>
5 #include <quda_milc_interface.h>
6 #include <quda_internal.h>
7 #include <color_spinor_field.h>
8 #include <string.h>
9 #include <unitarization_links.h>
10 #include <ks_improved_force.h>
11 #include <dslash_quda.h>
12 
13 #define MAX(a,b) ((a)>(b)?(a):(b))
14 
15 #ifdef BUILD_MILC_INTERFACE
16 
17 // code for NVTX taken from Jiri Kraus' blog post:
18 // http://devblogs.nvidia.com/parallelforall/cuda-pro-tip-generate-custom-application-profile-timelines-nvtx/
19 
20 #ifdef INTERFACE_NVTX
21 #include "nvToolsExt.h"
22 
23 static const uint32_t colors[] = { 0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff };
24 static const int num_colors = sizeof(colors)/sizeof(uint32_t);
25 
26 #define PUSH_RANGE(name,cid) { \
27  int color_id = cid; \
28  color_id = color_id%num_colors;\
29  nvtxEventAttributes_t eventAttrib = {0}; \
30  eventAttrib.version = NVTX_VERSION; \
31  eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \
32  eventAttrib.colorType = NVTX_COLOR_ARGB; \
33  eventAttrib.color = colors[color_id]; \
34  eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \
35  eventAttrib.message.ascii = name; \
36  nvtxRangePushEx(&eventAttrib); \
37 }
38 #define POP_RANGE nvtxRangePop();
39 #else
40 #define PUSH_RANGE(name,cid)
41 #define POP_RANGE
42 #endif
43 
44 
45 static bool initialized = false;
46 static int gridDim[4];
47 static int localDim[4];
48 
49 static bool invalidate_quda_gauge = true;
50 static bool create_quda_gauge = false;
51 
52 static bool invalidate_quda_mom = true;
53 
54 static void *df_preconditioner = nullptr;
55 
56 // set to 1 for GPU resident pipeline (not yet supported in mainline MILC)
57 #define MOM_PIPE 0
58 
59 using namespace quda;
60 using namespace quda::fermion_force;
61 
62 
63 #define QUDAMILC_VERBOSE 1
64 template <bool start>
65 void inline qudamilc_called(const char* func, QudaVerbosity verb){
66 #ifdef QUDAMILC_VERBOSE
67 if (verb >= QUDA_VERBOSE) {
68  if(start){
69  printfQuda("QUDA_MILC_INTERFACE: %s (called) \n",func);
70  PUSH_RANGE(func,1)
71  }
72  else {
73  printfQuda("QUDA_MILC_INTERFACE: %s (return) \n",func);
74  POP_RANGE
75  }
76  }
77 #endif
78 
79 }
80 
81 template <bool start>
82 void inline qudamilc_called(const char * func){
83  qudamilc_called<start>(func, getVerbosity());
84 }
85 
86 
87 void qudaInit(QudaInitArgs_t input)
88 {
89  if(initialized) return;
90  setVerbosityQuda(input.verbosity, "", stdout);
91  qudamilc_called<true>(__func__);
92  qudaSetLayout(input.layout);
93  initialized = true;
94  qudamilc_called<false>(__func__);
95 
96 }
97 
98 void qudaFinalize()
99 {
100  qudamilc_called<true>(__func__);
101  endQuda();
102  qudamilc_called<false>(__func__);
103 }
104 #ifdef MULTI_GPU
105 
109 static int rankFromCoords(const int *coords, void *fdata)
110 {
111  int *dims = static_cast<int *>(fdata);
112 
113  int rank = coords[3];
114  for (int i = 2; i >= 0; i--) {
115  rank = dims[i] * rank + coords[i];
116  }
117  return rank;
118 }
119 #endif
120 
121 void qudaSetLayout(QudaLayout_t input)
122 {
123  int local_dim[4];
124  for(int dir=0; dir<4; ++dir){ local_dim[dir] = input.latsize[dir]; }
125 #ifdef MULTI_GPU
126  for(int dir=0; dir<4; ++dir){ local_dim[dir] /= input.machsize[dir]; }
127 #endif
128  for(int dir=0; dir<4; ++dir){
129  if(local_dim[dir]%2 != 0){
130  printf("Error: Odd lattice dimensions are not supported\n");
131  exit(1);
132  }
133  }
134 
135  for(int dir=0; dir<4; ++dir) localDim[dir] = local_dim[dir];
136 
137 #ifdef MULTI_GPU
138  for(int dir=0; dir<4; ++dir) gridDim[dir] = input.machsize[dir];
139  initCommsGridQuda(4, gridDim, rankFromCoords, (void *)(gridDim));
140  static int device = -1;
141 #else
142  for(int dir=0; dir<4; ++dir) gridDim[dir] = 1;
143  static int device = input.device;
144 #endif
145 
146  initQuda(device);
147 }
148 
149 void* qudaAllocatePinned(size_t bytes) {
150  return pool_pinned_malloc(bytes);
151 }
152 
153 void qudaFreePinned(void *ptr) {
155 }
156 
158 {
159 
160  static bool initialized = false;
161 
162  if(initialized) return;
163  qudamilc_called<true>(__func__);
164 
165 #if defined(GPU_HISQ_FORCE) || defined(GPU_UNITARIZE)
166  const bool reunit_allow_svd = (params.reunit_allow_svd) ? true : false;
167  const bool reunit_svd_only = (params.reunit_svd_only) ? true : false;
168  const double unitarize_eps = 1e-14;
169  const double max_error = 1e-10;
170 #endif
171 
172 #ifdef GPU_HISQ_FORCE
174  params.force_filter,
175  max_error,
178  params.reunit_svd_rel_error,
179  params.reunit_svd_abs_error);
180 #endif
181 
182 #ifdef GPU_UNITARIZE
184  max_error,
187  params.reunit_svd_rel_error,
188  params.reunit_svd_abs_error);
189 #endif // UNITARIZE_GPU
190 
191  initialized = true;
192  qudamilc_called<false>(__func__);
193  return;
194 }
195 
196 
197 
198 static QudaGaugeParam newMILCGaugeParam(const int* dim, QudaPrecision prec, QudaLinkType link_type)
199 {
201  for(int dir=0; dir<4; ++dir) gParam.X[dir] = dim[dir];
202  gParam.cuda_prec_sloppy = gParam.cpu_prec = gParam.cuda_prec = prec;
203  gParam.type = link_type;
204 
205  gParam.reconstruct_sloppy = gParam.reconstruct = ((link_type == QUDA_SU3_LINKS) ? QUDA_RECONSTRUCT_12 : QUDA_RECONSTRUCT_NO);
206  gParam.gauge_order = QUDA_MILC_GAUGE_ORDER;
208  gParam.gauge_fix = QUDA_GAUGE_FIXED_NO;
209  gParam.scale = 1.0;
210  gParam.anisotropy = 1.0;
211  gParam.tadpole_coeff = 1.0;
212  gParam.scale = 0;
213  gParam.ga_pad = 0;
214  gParam.site_ga_pad = 0;
215  gParam.mom_ga_pad = 0;
216  gParam.llfat_ga_pad = 0;
217  return gParam;
218 }
219 
220 static void invalidateGaugeQuda() {
221  freeGaugeQuda();
222  invalidate_quda_gauge = true;
223 }
224 
225 void qudaLoadKSLink(int prec, QudaFatLinkArgs_t fatlink_args,
226  const double act_path_coeff[6], void* inlink, void* fatlink, void* longlink)
227 {
228  qudamilc_called<true>(__func__);
229 
230  QudaGaugeParam param = newMILCGaugeParam(localDim,
233 
236 
237  computeKSLinkQuda(fatlink, longlink, nullptr, inlink, const_cast<double*>(act_path_coeff), &param);
238  qudamilc_called<false>(__func__);
239 
240  // requires loadGaugeQuda to be called in subequent solver
241  invalidateGaugeQuda();
242 
243  // this flags that we are using QUDA to create the HISQ links
244  create_quda_gauge = true;
245  qudamilc_called<false>(__func__);
246 }
247 
248 
249 
250 void qudaLoadUnitarizedLink(int prec, QudaFatLinkArgs_t fatlink_args,
251  const double act_path_coeff[6], void* inlink, void* fatlink, void* ulink)
252 {
253  qudamilc_called<true>(__func__);
254 
255  QudaGaugeParam param = newMILCGaugeParam(localDim,
258 
259  computeKSLinkQuda(fatlink, nullptr, ulink, inlink, const_cast<double*>(act_path_coeff), &param);
260  qudamilc_called<false>(__func__);
261 
262  // requires loadGaugeQuda to be called in subequent solver
263  invalidateGaugeQuda();
264 
265  // this flags that we are using QUDA to create the HISQ links
266  create_quda_gauge = true;
267  qudamilc_called<false>(__func__);
268 }
269 
270 
271 void qudaHisqForce(int prec, int num_terms, int num_naik_terms, double** coeff, void** quark_field,
272  const double level2_coeff[6], const double fat7_coeff[6],
273  const void* const w_link, const void* const v_link, const void* const u_link,
274  void* const milc_momentum)
275 {
276  qudamilc_called<true>(__func__);
277 
279 
280  if (!invalidate_quda_mom) {
281  gParam.use_resident_mom = true;
282  gParam.make_resident_mom = true;
283  gParam.return_result_mom = false;
284  } else {
285  gParam.use_resident_mom = false;
286  gParam.make_resident_mom = false;
287  gParam.return_result_mom = true;
288  }
289 
290  long long flops;
291  computeHISQForceQuda(milc_momentum, &flops, level2_coeff, fat7_coeff,
292  w_link, v_link, u_link,
293  quark_field, num_terms, num_naik_terms, coeff,
294  &gParam);
295  qudamilc_called<false>(__func__);
296  return;
297 }
298 
299 
300 void qudaAsqtadForce(int prec, const double act_path_coeff[6],
301  const void* const one_link_src[4], const void* const naik_src[4],
302  const void* const link, void* const milc_momentum)
303 {
304  errorQuda("This interface has been removed and is no longer supported");
305 }
306 
307 
308 
309 void qudaComputeOprod(int prec, int num_terms, int num_naik_terms, double** coeff, double scale,
310  void** quark_field, void* oprod[3])
311 {
312  errorQuda("This interface has been removed and is no longer supported");
313 }
314 
315 
316 void qudaUpdateU(int prec, double eps, QudaMILCSiteArg_t *arg)
317 {
318  qudamilc_called<true>(__func__);
319  QudaGaugeParam gaugeParam = newMILCGaugeParam(localDim,
322  void *gauge = arg->site ? arg->site : arg->link;
323  void *mom = arg->site ? arg->site : arg->mom;
324 
325  gaugeParam.gauge_offset = arg->link_offset;
326  gaugeParam.mom_offset = arg->mom_offset;
327  gaugeParam.site_size = arg->size;
329 
330  if (!invalidate_quda_mom) {
333  } else {
336  }
337 
338  updateGaugeFieldQuda(gauge, mom, eps, 0, 0, &gaugeParam);
339  qudamilc_called<false>(__func__);
340  return;
341 }
342 
343 void qudaRephase(int prec, void *gauge, int flag, double i_mu)
344 {
345  qudamilc_called<true>(__func__);
346  QudaGaugeParam gaugeParam = newMILCGaugeParam(localDim,
349 
352  gaugeParam.i_mu = i_mu;
354 
356  qudamilc_called<false>(__func__);
357  return;
358 }
359 
360 void qudaUnitarizeSU3(int prec, double tol, QudaMILCSiteArg_t *arg)
361 {
362  qudamilc_called<true>(__func__);
363  QudaGaugeParam gaugeParam = newMILCGaugeParam(localDim,
366 
367  void *gauge = arg->site ? arg->site : arg->link;
368  gaugeParam.gauge_offset = arg->link_offset;
369  gaugeParam.site_size = arg->size;
371 
372  projectSU3Quda(gauge, tol, &gaugeParam);
373  qudamilc_called<false>(__func__);
374  return;
375 }
376 
377 double qudaMomAction(int prec, void *momentum)
378 {
379  qudamilc_called<true>(__func__);
380 
381  QudaGaugeParam momParam = newMILCGaugeParam(localDim,
384 
385  if (MOM_PIPE) {
386  if (invalidate_quda_mom) {
387  // beginning of trajectory so download the momentum and make
388  // resident
389  momParam.use_resident_mom = false;
390  momParam.make_resident_mom = true;
391  invalidate_quda_mom = false;
392  } else {
393  // end of trajectory so use resident and then invalidate
394  momParam.use_resident_mom = true;
395  momParam.make_resident_mom = false;
396  invalidate_quda_mom = true;
397  }
398  } else { // no momentum residency
399  momParam.use_resident_mom = false;
400  momParam.make_resident_mom = false;
401  invalidate_quda_mom = true;
402  }
403 
404  double action = momActionQuda(momentum, &momParam);
405 
406  qudamilc_called<false>(__func__);
407 
408  return action;
409 }
410 
411 static inline int opp(int dir){
412  return 7-dir;
413 }
414 
415 
416 static void createGaugeForcePaths(int **paths, int dir, int num_loop_types){
417 
418  int index=0;
419  // Plaquette paths
420  if (num_loop_types >= 1)
421  for(int i=0; i<4; ++i){
422  if(i==dir) continue;
423  paths[index][0] = i; paths[index][1] = opp(dir); paths[index++][2] = opp(i);
424  paths[index][0] = opp(i); paths[index][1] = opp(dir); paths[index++][2] = i;
425  }
426 
427  // Rectangle Paths
428  if (num_loop_types >= 2)
429  for(int i=0; i<4; ++i){
430  if(i==dir) continue;
431  paths[index][0] = paths[index][1] = i; paths[index][2] = opp(dir); paths[index][3] = paths[index][4] = opp(i);
432  index++;
433  paths[index][0] = paths[index][1] = opp(i); paths[index][2] = opp(dir); paths[index][3] = paths[index][4] = i;
434  index++;
435  paths[index][0] = dir; paths[index][1] = i; paths[index][2] = paths[index][3] = opp(dir); paths[index][4] = opp(i);
436  index++;
437  paths[index][0] = dir; paths[index][1] = opp(i); paths[index][2] = paths[index][3] = opp(dir); paths[index][4] = i;
438  index++;
439  paths[index][0] = i; paths[index][1] = paths[index][2] = opp(dir); paths[index][3] = opp(i); paths[index][4] = dir;
440  index++;
441  paths[index][0] = opp(i); paths[index][1] = paths[index][2] = opp(dir); paths[index][3] = i; paths[index][4] = dir;
442  index++;
443  }
444 
445  if (num_loop_types >= 3) {
446  // Staple paths
447  for(int i=0; i<4; ++i){
448  for(int j=0; j<4; ++j){
449  if(i==dir || j==dir || i==j) continue;
450  paths[index][0] = i; paths[index][1] = j; paths[index][2] = opp(dir); paths[index][3] = opp(i), paths[index][4] = opp(j);
451  index++;
452  paths[index][0] = i; paths[index][1] = opp(j); paths[index][2] = opp(dir); paths[index][3] = opp(i), paths[index][4] = j;
453  index++;
454  paths[index][0] = opp(i); paths[index][1] = j; paths[index][2] = opp(dir); paths[index][3] = i, paths[index][4] = opp(j);
455  index++;
456  paths[index][0] = opp(i); paths[index][1] = opp(j); paths[index][2] = opp(dir); paths[index][3] = i, paths[index][4] = j;
457  index++;
458  }
459  }
460  }
461 
462 }
463 
464 
465 void qudaGaugeForce( int precision,
466  int num_loop_types,
467  double milc_loop_coeff[3],
468  double eb3,
470 {
471  qudamilc_called<true>(__func__);
472 
473  int numPaths = 0;
474  switch (num_loop_types) {
475  case 1:
476  numPaths = 6;
477  break;
478  case 2:
479  numPaths = 24;
480  break;
481  case 3:
482  numPaths = 48;
483  break;
484  default:
485  errorQuda("Invalid num_loop_types = %d\n", num_loop_types);
486  }
487 
488  QudaGaugeParam qudaGaugeParam = newMILCGaugeParam(localDim,
489  (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION,
491  void *gauge = arg->site ? arg->site : arg->link;
492  void *mom = arg->site ? arg->site : arg->mom;
493 
494  qudaGaugeParam.gauge_offset = arg->link_offset;
495  qudaGaugeParam.mom_offset = arg->mom_offset;
496  qudaGaugeParam.site_size = arg->size;
498 
499  double *loop_coeff = static_cast<double*>(safe_malloc(numPaths*sizeof(double)));
500  int *length = static_cast<int*>(safe_malloc(numPaths*sizeof(int)));
501 
502  if (num_loop_types >= 1) for(int i= 0; i< 6; ++i) {
503  loop_coeff[i] = milc_loop_coeff[0];
504  length[i] = 3;
505  }
506  if (num_loop_types >= 2) for(int i= 6; i<24; ++i) {
507  loop_coeff[i] = milc_loop_coeff[1];
508  length[i] = 5;
509  }
510  if (num_loop_types >= 3) for(int i=24; i<48; ++i) {
511  loop_coeff[i] = milc_loop_coeff[2];
512  length[i] = 5;
513  }
514 
515  int** input_path_buf[4];
516  for(int dir=0; dir<4; ++dir){
517  input_path_buf[dir] = static_cast<int**>(safe_malloc(numPaths*sizeof(int*)));
518  for(int i=0; i<numPaths; ++i){
519  input_path_buf[dir][i] = static_cast<int*>(safe_malloc(length[i]*sizeof(int)));
520  }
521  createGaugeForcePaths(input_path_buf[dir], dir, num_loop_types);
522  }
523 
524  if (!invalidate_quda_mom) {
528 
529  // this means when we compute the momentum, we acummulate to the
530  // preexisting resident momentum instead of overwriting it
532  } else {
536 
537  // this means we compute momentum into a fresh field, copy it back
538  // and sum to current momentum in MILC. This saves an initial
539  // CPU->GPU download of the current momentum.
541  }
542 
543  int max_length = 6;
544 
545  computeGaugeForceQuda(mom, gauge, input_path_buf, length,
546  loop_coeff, numPaths, max_length, eb3, &qudaGaugeParam);
547 
548  for(int dir=0; dir<4; ++dir){
549  for(int i=0; i<numPaths; ++i) host_free(input_path_buf[dir][i]);
550  host_free(input_path_buf[dir]);
551  }
552 
553  host_free(length);
554  host_free(loop_coeff);
555 
556  qudamilc_called<false>(__func__);
557  return;
558 }
559 
560 
561 static int getFatLinkPadding(const int dim[4])
562 {
563  int padding = MAX(dim[1]*dim[2]*dim[3]/2, dim[0]*dim[2]*dim[3]/2);
564  padding = MAX(padding, dim[0]*dim[1]*dim[3]/2);
565  padding = MAX(padding, dim[0]*dim[1]*dim[2]/2);
566  return padding;
567 }
568 
569 
570 // set the params for the single mass solver
571 static void setInvertParams(const int dim[4],
576  double mass,
577  double target_residual,
578  double target_residual_hq,
579  int maxiter,
580  double reliable_delta,
583  QudaInverterType inverter,
584  QudaInvertParam *invertParam)
585 {
586  invertParam->use_sloppy_partial_accumulator = 0;
587  invertParam->verbosity = verbosity;
588  invertParam->mass = mass;
589  invertParam->tol = target_residual;
590  invertParam->tol_hq =target_residual_hq;
591  invertParam->num_offset = 0;
592 
593  invertParam->inv_type = inverter;
594  invertParam->maxiter = maxiter;
595  invertParam->reliable_delta = reliable_delta;
596 
598  invertParam->cpu_prec = cpu_prec;
599  invertParam->cuda_prec = cuda_prec;
600  invertParam->cuda_prec_sloppy = cuda_prec_sloppy;
601 
603  invertParam->solve_type = QUDA_NORMEQ_PC_SOLVE;
605  invertParam->gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; // not used, but required by the code.
606  invertParam->dirac_order = QUDA_DIRAC_ORDER;
607 
608  invertParam->dslash_type = QUDA_ASQTAD_DSLASH;
609  invertParam->Ls = 1;
610  invertParam->gflops = 0.0;
611 
614 
615 
616  if(parity == QUDA_EVEN_PARITY){ // even parity
617  invertParam->matpc_type = QUDA_MATPC_EVEN_EVEN;
618  }else if(parity == QUDA_ODD_PARITY){
619  invertParam->matpc_type = QUDA_MATPC_ODD_ODD;
620  }else{
621  errorQuda("Invalid parity\n");
622  exit(1);
623  }
624 
625  invertParam->dagger = QUDA_DAG_NO;
626  invertParam->sp_pad = 0;
628 
629  // for the preconditioner
631  invertParam->tol_precondition = 1e-1;
632  invertParam->maxiter_precondition = 2;
633  invertParam->verbosity_precondition = QUDA_SILENT;
635 
636  invertParam->compute_action = 0;
637 
638  return;
639 }
640 
641 
642 
643 
644 // Set params for the multi-mass solver.
645 static void setInvertParams(const int dim[4],
650  int num_offset,
651  const double offset[],
652  const double target_residual_offset[],
653  const double target_residual_hq_offset[],
654  int maxiter,
655  double reliable_delta,
658  QudaInverterType inverter,
659  QudaInvertParam *invertParam)
660 {
661 
662  const double null_mass = -1;
663  const double null_residual = -1;
664 
665 
667  null_mass, null_residual, null_residual, maxiter, reliable_delta, parity, verbosity, inverter, invertParam);
668 
669  invertParam->num_offset = num_offset;
670  for(int i=0; i<num_offset; ++i){
671  invertParam->offset[i] = offset[i];
672  invertParam->tol_offset[i] = target_residual_offset[i];
673  //if(invertParam->residual_type & QUDA_HEAVY_QUARK_RESIDUAL){
674  invertParam->tol_hq_offset[i] = target_residual_hq_offset[i];
675  //}
676  }
677  return;
678 }
679 
680 
681 static void setGaugeParams(const int dim[4],
686  const double tadpole,
688 {
689 
690  for(int dir=0; dir<4; ++dir){
691  gaugeParam->X[dir] = dim[dir];
692  }
693 
699 
701  gaugeParam->anisotropy = 1.0;
702  gaugeParam->tadpole_coeff = tadpole;
703  gaugeParam->t_boundary = QUDA_PERIODIC_T; // anti-periodic boundary conditions are built into the gauge field
705  gaugeParam->ga_pad = getFatLinkPadding(dim);
707 
708 
709  // preconditioning...
712 
713  return;
714 }
715 
716 
717 
718 static void setColorSpinorParams(const int dim[4],
719  QudaPrecision precision,
721 {
722 
723  param->nColor = 3;
724  param->nSpin = 1;
725  param->nDim = 4;
726 
727  for(int dir=0; dir<4; ++dir) param->x[dir] = dim[dir];
728  param->x[0] /= 2;
729 
730  param->precision = precision;
731  param->pad = 0;
732  param->siteSubset = QUDA_PARITY_SITE_SUBSET;
733  param->siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
735  param->gammaBasis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; // meaningless, but required by the code.
736  param->create = QUDA_ZERO_FIELD_CREATE;
737  return;
738 }
739 
740 void setDeflationParam(QudaPrecision ritz_prec,
744  char vec_infile[],
745  char vec_outfile[],
746  QudaEigParam *df_param)
747 {
748 
750 
751  df_param->cuda_prec_ritz = ritz_prec;
752  df_param->location = location_ritz;
753  df_param->mem_type_ritz = mem_type_ritz;
754 
755 
756  df_param->run_verify = QUDA_BOOLEAN_NO;
757 
758  df_param->nk = df_param->invert_param->nev;
759  df_param->np = df_param->invert_param->nev*df_param->invert_param->deflation_grid;
760 
761  // set file i/o parameters
762  strcpy(df_param->vec_infile, vec_infile);
763  strcpy(df_param->vec_outfile, vec_outfile);
764 }
765 
766 
767 
768 static size_t getColorVectorOffset(QudaParity local_parity, bool even_odd_exchange, const int dim[4])
769 {
770  size_t offset;
771  int volume = dim[0]*dim[1]*dim[2]*dim[3];
772 
773  if(local_parity == QUDA_EVEN_PARITY){
774  offset = even_odd_exchange ? volume*6/2 : 0;
775  }else{
776  offset = even_odd_exchange ? 0 : volume*6/2;
777  }
778  return offset;
779 }
780 
781 
782 void qudaMultishiftInvert(int external_precision,
783  int quda_precision,
784  int num_offsets,
785  double* const offset,
786  QudaInvertArgs_t inv_args,
787  const double target_residual[],
788  const double target_fermilab_residual[],
789  const void* const fatlink,
790  const void* const longlink,
791  const double tadpole,
792  void* source,
793  void** solutionArray,
794  double* const final_residual,
795  double* const final_fermilab_residual,
796  int *num_iters)
797 {
798 
799  static const QudaVerbosity verbosity = getVerbosity();
800  qudamilc_called<true>(__func__, verbosity);
801 
802  if(target_residual[0] == 0){
803  errorQuda("qudaMultishiftInvert: zeroth target residual cannot be zero\n");
804  exit(1);
805  }
806 
807  QudaPrecision host_precision = (external_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
808  QudaPrecision device_precision = (quda_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
809  const bool use_mixed_precision = (((quda_precision==2) && inv_args.mixed_precision) ||
810  ((quda_precision==1) && (inv_args.mixed_precision==2)) ) ? true : false;
811  QudaPrecision device_precision_sloppy;
812  switch(inv_args.mixed_precision) {
813  case 2: device_precision_sloppy = QUDA_HALF_PRECISION; break;
814  case 1: device_precision_sloppy = QUDA_SINGLE_PRECISION; break;
815  default: device_precision_sloppy = device_precision;
816  }
817 
818  QudaPrecision device_precision_precondition = device_precision_sloppy;
819 
821  setGaugeParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition, tadpole, &gaugeParam);
822 
823  QudaInvertParam invertParam = newQudaInvertParam();
824 
825  invertParam.residual_type = static_cast<QudaResidualType_s>(0);
826  invertParam.residual_type = (target_residual[0] != 0) ? static_cast<QudaResidualType_s> ( invertParam.residual_type | QUDA_L2_RELATIVE_RESIDUAL) : invertParam.residual_type;
827  invertParam.residual_type = (target_fermilab_residual[0] != 0) ? static_cast<QudaResidualType_s> (invertParam.residual_type | QUDA_HEAVY_QUARK_RESIDUAL) : invertParam.residual_type;
828 
829  if (verbosity >= QUDA_VERBOSE) {
830  if (invertParam.residual_type & QUDA_L2_RELATIVE_RESIDUAL)
831  printfQuda("Using QUDA_L2_RELATIVE_RESIDUAL");
832  if (invertParam.residual_type & QUDA_HEAVY_QUARK_RESIDUAL)
833  printfQuda("Using QUDA_HEAVY_QUARK_RESIDUAL");
834  }
835 
836  invertParam.use_sloppy_partial_accumulator = 0;
837 
838  QudaParity local_parity = inv_args.evenodd;
839  {
840  const double reliable_delta = (use_mixed_precision ? 1e-1 : 0.0);
841  setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition,
842  num_offsets, offset, target_residual, target_fermilab_residual,
843  inv_args.max_iter, reliable_delta, local_parity, verbosity, QUDA_CG_INVERTER, &invertParam);
844  }
845 
847  setColorSpinorParams(localDim, host_precision, &csParam);
848 
849  // dirty hack to invalidate the cached gauge field without breaking interface compatability
850  if (*num_iters == -1) {
851  invalidateGaugeQuda();
852  }
853 
854  // set the solver
855  char *quda_reconstruct = getenv("QUDA_MILC_HISQ_RECONSTRUCT");
857  if (!quda_reconstruct || strcmp(quda_reconstruct,"18")==0) {
858  long_reconstruct = QUDA_RECONSTRUCT_NO;
859  } else if (strcmp(quda_reconstruct,"13")==0) {
860  long_reconstruct = QUDA_RECONSTRUCT_13;
861  } else if (strcmp(quda_reconstruct,"9")==0) {
862  long_reconstruct = QUDA_RECONSTRUCT_9;
863  } else {
864  errorQuda("reconstruct request %s not supported", quda_reconstruct);
865  }
866 
867 
868  if(invalidate_quda_gauge || !create_quda_gauge ){
869  const int fat_pad = getFatLinkPadding(localDim);
871  gaugeParam.ga_pad = fat_pad; // don't know if this is correct
873  loadGaugeQuda(const_cast<void*>(fatlink), &gaugeParam);
874 
875  const int long_pad = 3*fat_pad;
877  gaugeParam.ga_pad = long_pad;
879  loadGaugeQuda(const_cast<void*>(longlink), &gaugeParam);
880  invalidate_quda_gauge = false;
881  }
882 
883  void** sln_pointer = (void**)malloc(num_offsets*sizeof(void*));
884  int quark_offset = getColorVectorOffset(local_parity, false, gaugeParam.X)*host_precision;
885  void* src_pointer = static_cast<char*>(source) + quark_offset;
886 
887  for(int i=0; i<num_offsets; ++i) sln_pointer[i] = static_cast<char*>(solutionArray[i]) + quark_offset;
888 
889  invertMultiShiftQuda(sln_pointer, src_pointer, &invertParam);
890  free(sln_pointer);
891 
892  // return the number of iterations taken by the inverter
893  *num_iters = invertParam.iter;
894  for(int i=0; i<num_offsets; ++i){
895  final_residual[i] = invertParam.true_res_offset[i];
896  final_fermilab_residual[i] = invertParam.true_res_hq_offset[i];
897  } // end loop over number of offsets
898 
899  if(!create_quda_gauge) invalidateGaugeQuda();
900 
901  qudamilc_called<false>(__func__, verbosity);
902  return;
903 } // qudaMultiShiftInvert
904 
905 
906 
907 
908 void qudaInvert(int external_precision,
909  int quda_precision,
910  double mass,
911  QudaInvertArgs_t inv_args,
912  double target_residual,
913  double target_fermilab_residual,
914  const void* const fatlink,
915  const void* const longlink,
916  const double tadpole,
917  void* source,
918  void* solution,
919  double* const final_residual,
920  double* const final_fermilab_residual,
921  int* num_iters)
922 {
923  static const QudaVerbosity verbosity = getVerbosity();
924  qudamilc_called<true>(__func__, verbosity);
925 
926  if(target_fermilab_residual == 0 && target_residual == 0){
927  errorQuda("qudaInvert: requesting zero residual\n");
928  exit(1);
929  }
930 
931  // static const QudaVerbosity verbosity = getVerbosity();
932  QudaPrecision host_precision = (external_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
933  QudaPrecision device_precision = (quda_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
934  QudaPrecision device_precision_sloppy;
935 
936  switch(inv_args.mixed_precision) {
937  case 2: device_precision_sloppy = QUDA_HALF_PRECISION; break;
938  case 1: device_precision_sloppy = QUDA_SINGLE_PRECISION; break;
939  default: device_precision_sloppy = device_precision;
940  }
941 
942  QudaPrecision device_precision_precondition = device_precision_sloppy;
944  // a basic set routine for the gauge parameters
945  setGaugeParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition, tadpole, &gaugeParam);
946  QudaInvertParam invertParam = newQudaInvertParam();
947 
948  invertParam.residual_type = static_cast<QudaResidualType_s>(0);
949  invertParam.residual_type = (target_residual != 0) ? static_cast<QudaResidualType_s> ( invertParam.residual_type | QUDA_L2_RELATIVE_RESIDUAL) : invertParam.residual_type;
950  invertParam.residual_type = (target_fermilab_residual != 0) ? static_cast<QudaResidualType_s> (invertParam.residual_type | QUDA_HEAVY_QUARK_RESIDUAL) : invertParam.residual_type;
951 
952  QudaParity local_parity = inv_args.evenodd;
953  //double& target_res = (invertParam.residual_type == QUDA_L2_RELATIVE_RESIDUAL) ? target_residual : target_fermilab_residual;
954  double& target_res = target_residual;
955  double& target_res_hq = target_fermilab_residual;
956  const double reliable_delta = 1e-1;
957 
958  setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition,
959  mass, target_res, target_res_hq, inv_args.max_iter, reliable_delta, local_parity, verbosity, QUDA_CG_INVERTER, &invertParam);
960  invertParam.use_sloppy_partial_accumulator = 0;
961  if (invertParam.residual_type == QUDA_HEAVY_QUARK_RESIDUAL) invertParam.heavy_quark_check = 1;
962 
964  setColorSpinorParams(localDim, host_precision, &csParam);
965 
966  const int fat_pad = getFatLinkPadding(localDim);
967  const int long_pad = 3*fat_pad;
968 
969  // dirty hack to invalidate the cached gauge field without breaking interface compatability
970  if (*num_iters == -1 || !canReuseResidentGauge(&invertParam) ) {
971  invalidateGaugeQuda();
972  }
973 
974  if(invalidate_quda_gauge || !create_quda_gauge){
976  gaugeParam.ga_pad = fat_pad;
978  loadGaugeQuda(const_cast<void*>(fatlink), &gaugeParam);
979  if(longlink != nullptr) {
981  gaugeParam.ga_pad = long_pad;
983  loadGaugeQuda(const_cast<void*>(longlink), &gaugeParam);
984  }
985  invalidate_quda_gauge = false;
986  }
987  if(longlink == nullptr) {
988  invertParam.dslash_type = QUDA_STAGGERED_DSLASH;
989  }
990 
991  int quark_offset = getColorVectorOffset(local_parity, false, gaugeParam.X)*host_precision;
992 
993  invertQuda(static_cast<char*>(solution) + quark_offset,
994  static_cast<char*>(source) + quark_offset,
995  &invertParam);
996 
997  // return the number of iterations taken by the inverter
998  *num_iters = invertParam.iter;
999  *final_residual = invertParam.true_res;
1000  *final_fermilab_residual = invertParam.true_res_hq;
1001 
1002  if(!create_quda_gauge) invalidateGaugeQuda();
1003 
1004  qudamilc_called<false>(__func__, verbosity);
1005  return;
1006 } // qudaInvert
1007 
1008 
1009 void qudaDslash(int external_precision,
1010  int quda_precision,
1011  QudaInvertArgs_t inv_args,
1012  const void* const fatlink,
1013  const void* const longlink,
1014  const double tadpole,
1015  void* src,
1016  void* dst,
1017  int* num_iters)
1018 {
1019  static const QudaVerbosity verbosity = getVerbosity();
1020  qudamilc_called<true>(__func__, verbosity);
1021 
1022  // static const QudaVerbosity verbosity = getVerbosity();
1023  QudaPrecision host_precision = (external_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
1024  QudaPrecision device_precision = (quda_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
1025  QudaPrecision device_precision_sloppy = device_precision;
1026  QudaPrecision device_precision_precondition = device_precision_sloppy;
1027 
1029  // a basic set routine for the gauge parameters
1030  setGaugeParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition, tadpole, &gaugeParam);
1031  QudaInvertParam invertParam = newQudaInvertParam();
1032 
1033  QudaParity local_parity = inv_args.evenodd;
1034  QudaParity other_parity = local_parity == QUDA_EVEN_PARITY ? QUDA_ODD_PARITY : QUDA_EVEN_PARITY;
1035 
1036  setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition,
1037  0.0, 0, 0, 0, 0.0, local_parity, verbosity, QUDA_CG_INVERTER, &invertParam);
1038 
1040  setColorSpinorParams(localDim, host_precision, &csParam);
1041 
1042  const int fat_pad = getFatLinkPadding(localDim);
1043  const int long_pad = 3*fat_pad;
1044 
1045  // dirty hack to invalidate the cached gauge field without breaking interface compatability
1046  if (*num_iters == -1 || !canReuseResidentGauge(&invertParam) ) {
1047  invalidateGaugeQuda();
1048  }
1049 
1050  if(invalidate_quda_gauge || !create_quda_gauge){
1052  gaugeParam.ga_pad = fat_pad;
1054  loadGaugeQuda(const_cast<void*>(fatlink), &gaugeParam);
1055 
1057  gaugeParam.ga_pad = long_pad;
1059  loadGaugeQuda(const_cast<void*>(longlink), &gaugeParam);
1060 
1061  invalidate_quda_gauge = false;
1062  }
1063 
1064  int src_offset = getColorVectorOffset(other_parity, false, gaugeParam.X);
1065  int dst_offset = getColorVectorOffset(local_parity, false, gaugeParam.X);
1066 
1067  dslashQuda(static_cast<char*>(dst) + dst_offset*host_precision,
1068  static_cast<char*>(src) + src_offset*host_precision,
1069  &invertParam, local_parity);
1070 
1071  if(!create_quda_gauge) invalidateGaugeQuda();
1072 
1073  qudamilc_called<false>(__func__, verbosity);
1074  return;
1075 } // qudaDslash
1076 
1077 
1078 void qudaInvertMsrc(int external_precision,
1079  int quda_precision,
1080  double mass,
1081  QudaInvertArgs_t inv_args,
1082  double target_residual,
1083  double target_fermilab_residual,
1084  const void* const fatlink,
1085  const void* const longlink,
1086  const double tadpole,
1087  void** sourceArray,
1088  void** solutionArray,
1089  double* const final_residual,
1090  double* const final_fermilab_residual,
1091  int* num_iters,
1092  int num_src)
1093 {
1094 
1095  static const QudaVerbosity verbosity = getVerbosity();
1096  qudamilc_called<true>(__func__, verbosity);
1097 
1098  if(target_fermilab_residual == 0 && target_residual == 0){
1099  errorQuda("qudaInvert: requesting zero residual\n");
1100  exit(1);
1101  }
1102 
1103  // static const QudaVerbosity verbosity = getVerbosity();
1104  QudaPrecision host_precision = (external_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
1105  QudaPrecision device_precision = (quda_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
1106  QudaPrecision device_precision_sloppy;
1107 
1108  switch(inv_args.mixed_precision) {
1109  case 2: device_precision_sloppy = QUDA_HALF_PRECISION; break;
1110  case 1: device_precision_sloppy = QUDA_SINGLE_PRECISION; break;
1111  default: device_precision_sloppy = device_precision;
1112  }
1113 
1114  QudaPrecision device_precision_precondition = device_precision_sloppy;
1116  // a basic set routine for the gauge parameters
1117  setGaugeParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition, tadpole, &gaugeParam);
1118  QudaInvertParam invertParam = newQudaInvertParam();
1119 
1120  invertParam.residual_type = static_cast<QudaResidualType_s>(0);
1121  invertParam.residual_type = (target_residual != 0) ? static_cast<QudaResidualType_s> ( invertParam.residual_type | QUDA_L2_RELATIVE_RESIDUAL) : invertParam.residual_type;
1122  invertParam.residual_type = (target_fermilab_residual != 0) ? static_cast<QudaResidualType_s> (invertParam.residual_type | QUDA_HEAVY_QUARK_RESIDUAL) : invertParam.residual_type;
1123  invertParam.num_src = num_src;
1124 
1125  QudaParity local_parity = inv_args.evenodd;
1126  //double& target_res = (invertParam.residual_type == QUDA_L2_RELATIVE_RESIDUAL) ? target_residual : target_fermilab_residual;
1127  double& target_res = target_residual;
1128  double& target_res_hq = target_fermilab_residual;
1129  const double reliable_delta = 1e-1;
1130 
1131  setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition,
1132  mass, target_res, target_res_hq, inv_args.max_iter, reliable_delta, local_parity, verbosity, QUDA_CG_INVERTER, &invertParam);
1133  invertParam.use_sloppy_partial_accumulator = 0;
1134  if (invertParam.residual_type == QUDA_HEAVY_QUARK_RESIDUAL) invertParam.heavy_quark_check = 1;
1135 
1136 
1137 
1139  setColorSpinorParams(localDim, host_precision, &csParam);
1140 
1141  const int fat_pad = getFatLinkPadding(localDim);
1142  const int long_pad = 3*fat_pad;
1143 
1144  // dirty hack to invalidate the cached gauge field without breaking interface compatability
1145  if (*num_iters == -1 || !canReuseResidentGauge(&invertParam) ) {
1146  invalidateGaugeQuda();
1147  }
1148 
1149  if(invalidate_quda_gauge || !create_quda_gauge){
1151  gaugeParam.ga_pad = fat_pad;
1153  loadGaugeQuda(const_cast<void*>(fatlink), &gaugeParam);
1154 
1156  gaugeParam.ga_pad = long_pad;
1158  loadGaugeQuda(const_cast<void*>(longlink), &gaugeParam);
1159 
1160  invalidate_quda_gauge = false;
1161  }
1162 
1163  int quark_offset = getColorVectorOffset(local_parity, false, gaugeParam.X)*host_precision;
1164  void** sln_pointer = (void**)malloc(num_src*sizeof(void*));
1165  void** src_pointer = (void**)malloc(num_src*sizeof(void*));
1166 
1167  for(int i=0; i<num_src; ++i) sln_pointer[i] = static_cast<char*>(solutionArray[i]) + quark_offset;
1168  for(int i=0; i<num_src; ++i) src_pointer[i] = static_cast<char*>(sourceArray[i]) + quark_offset;
1169 
1170  invertMultiSrcQuda(sln_pointer, src_pointer, &invertParam);
1171  free(sln_pointer);
1172  free(src_pointer);
1173 
1174 
1175  // return the number of iterations taken by the inverter
1176  *num_iters = invertParam.iter;
1177  *final_residual = invertParam.true_res;
1178  *final_fermilab_residual = invertParam.true_res_hq;
1179 
1180  if(!create_quda_gauge) invalidateGaugeQuda();
1181 
1182  qudamilc_called<false>(__func__, verbosity);
1183  return;
1184 } // qudaInvert
1185 
1186 
1187 void qudaEigCGInvert(int external_precision,
1188  int quda_precision,
1189  double mass,
1190  QudaInvertArgs_t inv_args,
1191  double target_residual,
1192  double target_fermilab_residual,
1193  const void* const fatlink,
1194  const void* const longlink,
1195  const double tadpole,
1196  void* source,//array of source vectors -> overwritten on exit
1197  void* solution,//temporary
1198  QudaEigArgs_t eig_args,
1199  const int rhs_idx,//current rhs
1200  const int last_rhs_flag,//is this the last rhs to solve
1201  double* const final_residual,
1202  double* const final_fermilab_residual,
1203  int *num_iters)
1204 {
1205 
1206  static const QudaVerbosity verbosity = getVerbosity();
1207  qudamilc_called<true>(__func__, verbosity);
1208 
1209  if(target_fermilab_residual == 0 && target_residual == 0){
1210  errorQuda("qudaInvert: requesting zero residual\n");
1211  exit(1);
1212  }
1213 
1214  QudaPrecision host_precision = (external_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
1215  QudaPrecision device_precision = (quda_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
1216  QudaPrecision device_precision_sloppy;
1217 
1218  switch(inv_args.mixed_precision) {
1219  case 2: device_precision_sloppy = QUDA_HALF_PRECISION; break;
1220  case 1: device_precision_sloppy = QUDA_SINGLE_PRECISION; break;
1221  default: device_precision_sloppy = device_precision;
1222  }
1223 
1224  QudaPrecision device_precision_precondition = device_precision_sloppy;
1226  // a basic set routine for the gauge parameters
1227  setGaugeParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition, tadpole, &gaugeParam);
1228  QudaInvertParam invertParam = newQudaInvertParam();
1229 
1230  invertParam.residual_type = static_cast<QudaResidualType_s>(0);
1231  invertParam.residual_type = (target_residual != 0) ? static_cast<QudaResidualType_s> ( invertParam.residual_type | QUDA_L2_RELATIVE_RESIDUAL) : invertParam.residual_type;
1232  invertParam.residual_type = (target_fermilab_residual != 0) ? static_cast<QudaResidualType_s> (invertParam.residual_type | QUDA_HEAVY_QUARK_RESIDUAL) : invertParam.residual_type;
1233 
1234 
1235  QudaParity local_parity = inv_args.evenodd;
1236  double& target_res = target_residual;
1237  double& target_res_hq = target_fermilab_residual;
1238  const double reliable_delta = 1e-1;
1239 
1240  setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition,
1241  mass, target_res, target_res_hq, inv_args.max_iter, reliable_delta, local_parity, verbosity, QUDA_CG_INVERTER, &invertParam);
1242  invertParam.use_sloppy_partial_accumulator = 0;
1243  if (invertParam.residual_type == QUDA_HEAVY_QUARK_RESIDUAL) invertParam.heavy_quark_check = 1;
1245  QudaEigParam df_param = newQudaEigParam();
1246  df_param.invert_param = &invertParam;
1247 
1248  invertParam.solve_type = QUDA_NORMOP_PC_SOLVE;
1249  invertParam.nev = eig_args.nev;
1250  invertParam.max_search_dim = eig_args.max_search_dim;
1251  invertParam.deflation_grid = eig_args.deflation_grid;
1252  invertParam.cuda_prec_ritz = eig_args.prec_ritz;
1253  invertParam.tol_restart = eig_args.tol_restart;
1254  invertParam.eigcg_max_restarts = eig_args.eigcg_max_restarts;
1255  invertParam.max_restart_num = eig_args.max_restart_num;
1256  invertParam.inc_tol = eig_args.inc_tol;
1257  invertParam.eigenval_tol = eig_args.eigenval_tol;
1258  invertParam.rhs_idx = rhs_idx;
1259 
1260  if((inv_args.solver_type != QUDA_INC_EIGCG_INVERTER) && (inv_args.solver_type != QUDA_EIGCG_INVERTER)) errorQuda("Incorrect inverter type.\n");
1261  invertParam.inv_type = inv_args.solver_type;
1262 
1264 
1265  setDeflationParam(eig_args.prec_ritz, eig_args.location_ritz, eig_args.mem_type_ritz, eig_args.deflation_ext_lib, eig_args.vec_infile, eig_args.vec_outfile, &df_param);
1266 
1268  setColorSpinorParams(localDim, host_precision, &csParam);
1269 
1270  if((invalidate_quda_gauge || !create_quda_gauge) && (rhs_idx == 0)){//do this for the first RHS
1271 
1272  const int fat_pad = getFatLinkPadding(localDim);
1273  const int long_pad = 3*fat_pad;
1274 
1275  printfQuda("Initialize gauge field.\n");
1277  gaugeParam.ga_pad = fat_pad;
1279  loadGaugeQuda(const_cast<void*>(fatlink), &gaugeParam);
1280 
1282  gaugeParam.ga_pad = long_pad;
1284  loadGaugeQuda(const_cast<void*>(longlink), &gaugeParam);
1285 
1286  invalidate_quda_gauge = false;
1287  }
1288 
1289  int quark_offset = getColorVectorOffset(local_parity, false, gaugeParam.X)*host_precision;
1290 
1291  if(rhs_idx == 0) df_preconditioner = newDeflationQuda(&df_param);
1292 
1293  invertParam.deflation_op = df_preconditioner;
1294 
1295  invertQuda(static_cast<char*>(solution) + quark_offset,
1296  static_cast<char*>(source) + quark_offset,
1297  &invertParam);
1298 
1299  if(last_rhs_flag) destroyDeflationQuda(df_preconditioner);
1300 
1301  // return the number of iterations taken by the inverter
1302  *num_iters = invertParam.iter;
1303  *final_residual = invertParam.true_res;
1304  *final_fermilab_residual = invertParam.true_res_hq;
1305 
1306  if(!create_quda_gauge && last_rhs_flag) invalidateGaugeQuda();
1307 
1308  qudamilc_called<false>(__func__, verbosity);
1309 
1310  return;
1311 } // qudaEigCGInvert
1312 
1313 
1314 static int clover_alloc = 0;
1315 
1316 void* qudaCreateGaugeField(void* gauge, int geometry, int precision)
1317 {
1318  qudamilc_called<true>(__func__);
1319  QudaPrecision qudaPrecision = (precision==2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
1320  QudaGaugeParam gaugeParam = newMILCGaugeParam(localDim, qudaPrecision,
1321  (geometry==1) ? QUDA_GENERAL_LINKS : QUDA_SU3_LINKS);
1322  qudamilc_called<false>(__func__);
1323  return createGaugeFieldQuda(gauge, geometry, &gaugeParam);
1324 }
1325 
1326 
1327 void qudaSaveGaugeField(void* gauge, void* inGauge)
1328 {
1329  qudamilc_called<true>(__func__);
1330  cudaGaugeField* cudaGauge = reinterpret_cast<cudaGaugeField*>(inGauge);
1331  QudaGaugeParam gaugeParam = newMILCGaugeParam(localDim, cudaGauge->Precision(), QUDA_GENERAL_LINKS);
1332  saveGaugeFieldQuda(gauge, inGauge, &gaugeParam);
1333  qudamilc_called<false>(__func__);
1334  return;
1335 }
1336 
1337 
1338 void qudaDestroyGaugeField(void* gauge)
1339 {
1340  qudamilc_called<true>(__func__);
1341  destroyGaugeFieldQuda(gauge);
1342  qudamilc_called<false>(__func__);
1343  return;
1344 }
1345 
1346 
1347 void setInvertParam(QudaInvertParam &invertParam, QudaInvertArgs_t &inv_args,
1348  int external_precision, int quda_precision, double kappa, double reliable_delta);
1349 
1350 void qudaCloverForce(void *mom, double dt, void **x, void **p, double *coeff, double kappa, double ck,
1351  int nvec, double multiplicity, void *gauge, int precision, QudaInvertArgs_t inv_args)
1352 {
1353  qudamilc_called<true>(__func__);
1354  QudaGaugeParam gaugeParam = newMILCGaugeParam(localDim,
1355  (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION,
1357  gaugeParam.gauge_order = QUDA_MILC_GAUGE_ORDER; // refers to momentume gauge order
1358 
1359  QudaInvertParam invertParam = newQudaInvertParam();
1360  setInvertParam(invertParam, inv_args, precision, precision, kappa, 0);
1361  invertParam.num_offset = nvec;
1362  for (int i=0; i<nvec; ++i) invertParam.offset[i] = 0.0; // not needed
1363  invertParam.clover_coeff = 0.0; // not needed
1364 
1365  // solution types
1367  invertParam.solve_type = QUDA_NORMOP_PC_SOLVE;
1368  invertParam.inv_type = QUDA_CG_INVERTER;
1370 
1371  invertParam.verbosity = getVerbosity();
1372  invertParam.verbosity_precondition = QUDA_SILENT;
1373  invertParam.use_resident_solution = inv_args.use_resident_solution;
1374 
1375  computeCloverForceQuda(mom, dt, x, p, coeff, -kappa*kappa, ck, nvec, multiplicity,
1376  gauge, &gaugeParam, &invertParam);
1377  qudamilc_called<false>(__func__);
1378  return;
1379 }
1380 
1381 
1382 void setGaugeParams(QudaGaugeParam &gaugeParam, const int dim[4], QudaInvertArgs_t &inv_args,
1383  int external_precision, int quda_precision) {
1384 
1385  const QudaPrecision host_precision = (external_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
1386  const QudaPrecision device_precision = (quda_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
1387  QudaPrecision device_precision_sloppy;
1388 
1389  switch(inv_args.mixed_precision) {
1390  case 2: device_precision_sloppy = QUDA_HALF_PRECISION; break;
1391  case 1: device_precision_sloppy = QUDA_SINGLE_PRECISION; break;
1392  default: device_precision_sloppy = device_precision;
1393  }
1394 
1395  for(int dir=0; dir<4; ++dir) gaugeParam.X[dir] = dim[dir];
1396 
1397  gaugeParam.anisotropy = 1.0;
1400 
1401  // Check the boundary conditions
1402  // Can't have twisted or anti-periodic boundary conditions in the spatial
1403  // directions with 12 reconstruct at the moment.
1404  bool trivial_phase = true;
1405  for(int dir=0; dir<3; ++dir){
1406  if(inv_args.boundary_phase[dir] != 0) trivial_phase = false;
1407  }
1408  if(inv_args.boundary_phase[3] != 0 && inv_args.boundary_phase[3] != 1) trivial_phase = false;
1409 
1410  if(trivial_phase){
1414  }else{
1418  }
1419 
1420  gaugeParam.cpu_prec = host_precision;
1421  gaugeParam.cuda_prec = device_precision;
1422  gaugeParam.cuda_prec_sloppy = device_precision_sloppy;
1423  gaugeParam.cuda_prec_precondition = device_precision_sloppy;
1425  gaugeParam.ga_pad = getFatLinkPadding(dim);
1426 }
1427 
1428 
1429 
1430 void setInvertParam(QudaInvertParam &invertParam, QudaInvertArgs_t &inv_args,
1431  int external_precision, int quda_precision, double kappa, double reliable_delta) {
1432 
1433  const QudaPrecision host_precision = (external_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
1434  const QudaPrecision device_precision = (quda_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
1435  QudaPrecision device_precision_sloppy;
1436  switch(inv_args.mixed_precision) {
1437  case 2: device_precision_sloppy = QUDA_HALF_PRECISION; break;
1438  case 1: device_precision_sloppy = QUDA_SINGLE_PRECISION; break;
1439  default: device_precision_sloppy = device_precision;
1440  }
1441 
1442  static const QudaVerbosity verbosity = getVerbosity();
1443 
1444  invertParam.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
1445  invertParam.kappa = kappa;
1446  invertParam.dagger = QUDA_DAG_NO;
1448  invertParam.gcrNkrylov = 30;
1449  invertParam.reliable_delta = reliable_delta;
1450  invertParam.maxiter = inv_args.max_iter;
1451 
1452  invertParam.cuda_prec_precondition = device_precision_sloppy;
1453  invertParam.verbosity_precondition = verbosity;
1454  invertParam.verbosity = verbosity;
1455  invertParam.cpu_prec = host_precision;
1456  invertParam.cuda_prec = device_precision;
1457  invertParam.cuda_prec_sloppy = device_precision_sloppy;
1460  invertParam.dirac_order = QUDA_DIRAC_ORDER;
1461  invertParam.sp_pad = 0;
1462  invertParam.cl_pad = 0;
1463  invertParam.clover_cpu_prec = host_precision;
1464  invertParam.clover_cuda_prec = device_precision;
1465  invertParam.clover_cuda_prec_sloppy = device_precision_sloppy;
1466  invertParam.clover_cuda_prec_precondition = device_precision_sloppy;
1467  invertParam.clover_order = QUDA_PACKED_CLOVER_ORDER;
1468 
1469  invertParam.compute_action = 0;
1470 }
1471 
1472 
1473 void qudaLoadGaugeField(int external_precision,
1474  int quda_precision,
1475  QudaInvertArgs_t inv_args,
1476  const void* milc_link) {
1477  qudamilc_called<true>(__func__);
1479  setGaugeParams(gaugeParam, localDim, inv_args, external_precision, quda_precision);
1480 
1481  loadGaugeQuda(const_cast<void*>(milc_link), &gaugeParam);
1482  qudamilc_called<false>(__func__);
1483 } // qudaLoadGaugeField
1484 
1485 
1486 void qudaFreeGaugeField() {
1487  qudamilc_called<true>(__func__);
1488  freeGaugeQuda();
1489  qudamilc_called<false>(__func__);
1490 } // qudaFreeGaugeField
1491 
1492 
1493 void qudaLoadCloverField(int external_precision,
1494  int quda_precision,
1495  QudaInvertArgs_t inv_args,
1496  void* milc_clover,
1497  void* milc_clover_inv,
1498  QudaSolutionType solution_type,
1500  double clover_coeff,
1501  int compute_trlog,
1502  double *trlog) {
1503  qudamilc_called<true>(__func__);
1504  QudaInvertParam invertParam = newQudaInvertParam();
1505  setInvertParam(invertParam, inv_args, external_precision, quda_precision, 0.0, 0.0);
1506  invertParam.solution_type = solution_type;
1507  invertParam.solve_type = solve_type;
1508  invertParam.matpc_type = QUDA_MATPC_EVEN_EVEN_ASYMMETRIC;
1509  invertParam.compute_clover_trlog = compute_trlog;
1510  invertParam.clover_coeff = clover_coeff;
1511 
1512  if(invertParam.dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
1513  if (clover_alloc == 0) {
1514  loadCloverQuda(milc_clover, milc_clover_inv, &invertParam);
1515  clover_alloc = 1;
1516  } else {
1517  errorQuda("Clover term already allocated");
1518  }
1519  }
1520 
1521  if (compute_trlog) {
1522  trlog[0] = invertParam.trlogA[0];
1523  trlog[1] = invertParam.trlogA[1];
1524  }
1525  qudamilc_called<false>(__func__);
1526 } // qudaLoadCoverField
1527 
1528 
1529 
1530 void qudaFreeCloverField() {
1531  qudamilc_called<true>(__func__);
1532  if (clover_alloc==1) {
1533  freeCloverQuda();
1534  clover_alloc = 0;
1535  } else {
1536  errorQuda("Trying to free non-allocated clover term");
1537  }
1538  qudamilc_called<false>(__func__);
1539 } // qudaFreeCloverField
1540 
1541 
1542 void qudaCloverInvert(int external_precision,
1543  int quda_precision,
1544  double kappa,
1545  double clover_coeff,
1546  QudaInvertArgs_t inv_args,
1547  double target_residual,
1548  double target_fermilab_residual,
1549  const void* link,
1550  void* clover, // could be stored in Milc format
1551  void* cloverInverse,
1552  void* source,
1553  void* solution,
1554  double* const final_residual,
1555  double* const final_fermilab_residual,
1556  int* num_iters)
1557 {
1558  qudamilc_called<true>(__func__);
1559  if(target_fermilab_residual == 0 && target_residual == 0){
1560  errorQuda("qudaCloverInvert: requesting zero residual\n");
1561  exit(1);
1562  }
1563 
1564  if (link) qudaLoadGaugeField(external_precision, quda_precision, inv_args, link);
1565 
1566  if (clover || cloverInverse) {
1567  qudaLoadCloverField(external_precision, quda_precision, inv_args, clover, cloverInverse,
1569  }
1570 
1571  double reliable_delta = 1e-1;
1572 
1573  QudaInvertParam invertParam = newQudaInvertParam();
1574  setInvertParam(invertParam, inv_args, external_precision, quda_precision, kappa, reliable_delta);
1575  invertParam.residual_type = static_cast<QudaResidualType_s>(0);
1576  invertParam.residual_type = (target_residual != 0) ? static_cast<QudaResidualType_s> ( invertParam.residual_type | QUDA_L2_RELATIVE_RESIDUAL) : invertParam.residual_type;
1577  invertParam.residual_type = (target_fermilab_residual != 0) ? static_cast<QudaResidualType_s> (invertParam.residual_type | QUDA_HEAVY_QUARK_RESIDUAL) : invertParam.residual_type;
1578 
1579  invertParam.tol = target_residual;
1580  invertParam.tol_hq = target_fermilab_residual;
1581  if (invertParam.residual_type == QUDA_HEAVY_QUARK_RESIDUAL) invertParam.heavy_quark_check = 1;
1582  invertParam.clover_coeff = clover_coeff;
1583 
1584  // solution types
1585  invertParam.solution_type = QUDA_MAT_SOLUTION;
1588  invertParam.matpc_type = QUDA_MATPC_ODD_ODD;
1589 
1590  invertQuda(solution, source, &invertParam);
1591 
1592  *num_iters = invertParam.iter;
1593  *final_residual = invertParam.true_res;
1594  *final_fermilab_residual = invertParam.true_res_hq;
1595 
1596  if (clover || cloverInverse) qudaFreeCloverField();
1597  if (link) qudaFreeGaugeField();
1598  qudamilc_called<false>(__func__);
1599  return;
1600 } // qudaCloverInvert
1601 
1602 
1603 void qudaEigCGCloverInvert(int external_precision,
1604  int quda_precision,
1605  double kappa,
1606  double clover_coeff,
1607  QudaInvertArgs_t inv_args,
1608  double target_residual,
1609  double target_fermilab_residual,
1610  const void* link,
1611  void* clover, // could be stored in Milc format
1612  void* cloverInverse,
1613  void* source,//array of source vectors -> overwritten on exit!
1614  void* solution,//temporary
1615  QudaEigArgs_t eig_args,
1616  const int rhs_idx,//current rhs
1617  const int last_rhs_flag,//is this the last rhs to solve?
1618  double* const final_residual,
1619  double* const final_fermilab_residual,
1620  int *num_iters)
1621 {
1622  qudamilc_called<true>(__func__);
1623  if(target_fermilab_residual == 0 && target_residual == 0){
1624  errorQuda("qudaCloverInvert: requesting zero residual\n");
1625  exit(1);
1626  }
1627 
1628  if (link && (rhs_idx == 0)) qudaLoadGaugeField(external_precision, quda_precision, inv_args, link);
1629 
1630  if ( (clover || cloverInverse) && (rhs_idx == 0)) {
1631  qudaLoadCloverField(external_precision, quda_precision, inv_args, clover, cloverInverse,
1633  }
1634 
1635  double reliable_delta = 1e-1;
1636 
1637  QudaInvertParam invertParam = newQudaInvertParam();
1638  setInvertParam(invertParam, inv_args, external_precision, quda_precision, kappa, reliable_delta);
1639  invertParam.residual_type = static_cast<QudaResidualType_s>(0);
1640  invertParam.residual_type = (target_residual != 0) ? static_cast<QudaResidualType_s> ( invertParam.residual_type | QUDA_L2_RELATIVE_RESIDUAL) : invertParam.residual_type;
1641  invertParam.residual_type = (target_fermilab_residual != 0) ? static_cast<QudaResidualType_s> (invertParam.residual_type | QUDA_HEAVY_QUARK_RESIDUAL) : invertParam.residual_type;
1642 
1643  invertParam.tol = target_residual;
1644  invertParam.tol_hq = target_fermilab_residual;
1645  if (invertParam.residual_type == QUDA_HEAVY_QUARK_RESIDUAL) invertParam.heavy_quark_check = 1;
1646  invertParam.clover_coeff = clover_coeff;
1647 
1648  // solution types
1649  invertParam.solution_type = QUDA_MAT_SOLUTION;
1650  invertParam.matpc_type = QUDA_MATPC_ODD_ODD;
1651 
1653  QudaEigParam df_param = newQudaEigParam();
1654  df_param.invert_param = &invertParam;
1655 
1656  invertParam.solve_type = QUDA_NORMOP_PC_SOLVE;
1657  invertParam.nev = eig_args.nev;
1658  invertParam.max_search_dim = eig_args.max_search_dim;
1659  invertParam.deflation_grid = eig_args.deflation_grid;
1660  invertParam.cuda_prec_ritz = eig_args.prec_ritz;
1661  invertParam.tol_restart = eig_args.tol_restart;
1662  invertParam.eigcg_max_restarts = eig_args.eigcg_max_restarts;
1663  invertParam.max_restart_num = eig_args.max_restart_num;
1664  invertParam.inc_tol = eig_args.inc_tol;
1665  invertParam.eigenval_tol = eig_args.eigenval_tol;
1666  invertParam.rhs_idx = rhs_idx;
1667 
1668 
1669  if((inv_args.solver_type != QUDA_INC_EIGCG_INVERTER) && (inv_args.solver_type != QUDA_EIGCG_INVERTER)) errorQuda("Incorrect inverter type.\n");
1670  invertParam.inv_type = inv_args.solver_type;
1671 
1673 
1674  setDeflationParam(eig_args.prec_ritz, eig_args.location_ritz, eig_args.mem_type_ritz, eig_args.deflation_ext_lib, eig_args.vec_infile, eig_args.vec_outfile, &df_param);
1675 
1676  if(rhs_idx == 0) df_preconditioner = newDeflationQuda(&df_param);
1677  invertParam.deflation_op = df_preconditioner;
1678 
1679  invertQuda(solution, source, &invertParam);
1680 
1681  if(last_rhs_flag) destroyDeflationQuda(df_preconditioner);
1682 
1683  *num_iters = invertParam.iter;
1684  *final_residual = invertParam.true_res;
1685  *final_fermilab_residual = invertParam.true_res_hq;
1686 
1687  if ( (clover || cloverInverse) && last_rhs_flag) qudaFreeCloverField();
1688  if (link && last_rhs_flag) qudaFreeGaugeField();
1689  qudamilc_called<false>(__func__);
1690  return;
1691 } // qudaEigCGCloverInvert
1692 
1693 
1694 void qudaCloverMultishiftInvert(int external_precision,
1695  int quda_precision,
1696  int num_offsets,
1697  double* const offset,
1698  double kappa,
1699  double clover_coeff,
1700  QudaInvertArgs_t inv_args,
1701  const double* target_residual_offset,
1702  const void* milc_link,
1703  void* milc_clover,
1704  void* milc_clover_inv,
1705  void* source,
1706  void** solutionArray,
1707  double* const final_residual,
1708  int* num_iters)
1709 {
1710 
1711  static const QudaVerbosity verbosity = getVerbosity();
1712  qudamilc_called<true>(__func__, verbosity);
1713 
1714  for(int i=0; i<num_offsets; ++i){
1715  if(target_residual_offset[i] == 0){
1716  errorQuda("qudaCloverMultishiftInvert: target residual cannot be zero\n");
1717  exit(1);
1718  }
1719  }
1720 
1721  // if doing a pure double-precision multi-shift solve don't use reliable updates
1722  const bool use_mixed_precision = (((quda_precision==2) && inv_args.mixed_precision) ||
1723  ((quda_precision==1) && (inv_args.mixed_precision==2)) ) ? true : false;
1724  double reliable_delta = (use_mixed_precision) ? 1e-2 : 0.0;
1725  QudaInvertParam invertParam = newQudaInvertParam();
1726  setInvertParam(invertParam, inv_args, external_precision, quda_precision, kappa, reliable_delta);
1728  invertParam.num_offset = num_offsets;
1729  for(int i=0; i<num_offsets; ++i){
1730  invertParam.offset[i] = offset[i];
1731  invertParam.tol_offset[i] = target_residual_offset[i];
1732  }
1733  invertParam.tol = target_residual_offset[0];
1734  invertParam.clover_coeff = clover_coeff;
1735 
1736  // solution types
1738  invertParam.solve_type = QUDA_NORMOP_PC_SOLVE;
1739  invertParam.inv_type = QUDA_CG_INVERTER;
1741 
1742  invertParam.verbosity = verbosity;
1743  invertParam.verbosity_precondition = QUDA_SILENT;
1744 
1745  invertParam.make_resident_solution = inv_args.make_resident_solution;
1746  invertParam.compute_true_res = 0;
1747 
1748  if (num_offsets==1 && offset[0] == 0) {
1749  // set the solver
1750  char *quda_solver = getenv("QUDA_MILC_CLOVER_SOLVER");
1751 
1752  // default is chronological CG
1753  if (!quda_solver || strcmp(quda_solver,"CHRONO_CG_SOLVER")==0) {
1754  // use CG with chronological forecasting
1755  invertParam.use_resident_chrono = 1;
1756  invertParam.make_resident_chrono = 1;
1757  invertParam.max_chrono_dim = 10;
1758  } else if (strcmp(quda_solver,"BICGSTAB_SOLVER")==0){
1759  // use two-step BiCGStab
1760  invertParam.inv_type = QUDA_BICGSTAB_INVERTER;
1761  invertParam.solve_type = QUDA_DIRECT_PC_SOLVE;
1762  } else if (strcmp(quda_solver,"CG_SOLVER")==0){
1763  // regular CG
1764  invertParam.use_resident_chrono = 0;
1765  invertParam.make_resident_chrono = 0;
1766  }
1767 
1768  invertQuda(solutionArray[0], source, &invertParam);
1769  *final_residual = invertParam.true_res;
1770  } else {
1771  invertMultiShiftQuda(solutionArray, source, &invertParam);
1772  for (int i=0; i<num_offsets; ++i) final_residual[i] = invertParam.true_res_offset[i];
1773  }
1774 
1775  // return the number of iterations taken by the inverter
1776  *num_iters = invertParam.iter;
1777 
1778  qudamilc_called<false>(__func__, verbosity);
1779  return;
1780 } // qudaCloverMultishiftInvert
1781 
1782 
1783 void qudaGaugeFixingOVR( int precision,
1784  unsigned int gauge_dir,
1785  int Nsteps,
1786  int verbose_interval,
1787  double relax_boost,
1788  double tolerance,
1789  unsigned int reunit_interval,
1790  unsigned int stopWtheta,
1791  void* milc_sitelink
1792  )
1793 {
1794 
1795 
1796  QudaGaugeParam qudaGaugeParam = newMILCGaugeParam(localDim,
1797  (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION,
1798  QUDA_SU3_LINKS);
1800  //qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_12;
1801 
1802 
1803  double timeinfo[3];
1804  computeGaugeFixingOVRQuda(milc_sitelink, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta, \
1805  &qudaGaugeParam, timeinfo);
1806 
1807  printfQuda("Time H2D: %lf\n", timeinfo[0]);
1808  printfQuda("Time to Compute: %lf\n", timeinfo[1]);
1809  printfQuda("Time D2H: %lf\n", timeinfo[2]);
1810  printfQuda("Time all: %lf\n", timeinfo[0]+timeinfo[1]+timeinfo[2]);
1811 
1812  return;
1813 }
1814 
1815 void qudaGaugeFixingFFT( int precision,
1816  unsigned int gauge_dir,
1817  int Nsteps,
1818  int verbose_interval,
1819  double alpha,
1820  unsigned int autotune,
1821  double tolerance,
1822  unsigned int stopWtheta,
1823  void* milc_sitelink
1824  )
1825 {
1826 
1827 
1828  QudaGaugeParam qudaGaugeParam = newMILCGaugeParam(localDim,
1829  (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION,
1832  //qudaGaugeParam.reconstruct = QUDA_RECONSTRUCT_12;
1833 
1834 
1835  double timeinfo[3];
1836  computeGaugeFixingFFTQuda(milc_sitelink, gauge_dir, Nsteps, verbose_interval, alpha, autotune, tolerance, stopWtheta, \
1837  &qudaGaugeParam, timeinfo);
1838 
1839  printfQuda("Time H2D: %lf\n", timeinfo[0]);
1840  printfQuda("Time to Compute: %lf\n", timeinfo[1]);
1841  printfQuda("Time D2H: %lf\n", timeinfo[2]);
1842  printfQuda("Time all: %lf\n", timeinfo[0]+timeinfo[1]+timeinfo[2]);
1843 
1844  return;
1845 }
1846 
1847 #endif // BUILD_MILC_INTERFACE
void computeCloverForceQuda(void *mom, double dt, void **x, void **p, double *coeff, double kappa2, double ck, int nvector, double multiplicity, void *gauge, QudaGaugeParam *gauge_param, QudaInvertParam *inv_param)
int maxiter_precondition
Definition: quda.h:267
static QudaGaugeParam qudaGaugeParam
QudaTboundary t_boundary
Definition: gauge_field.h:18
QudaDiracFieldOrder dirac_order
Definition: quda.h:195
QudaMassNormalization mass_normalization
Definition: quda.h:185
double tol_hq_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:159
QudaReconstructType reconstruct_sloppy
Definition: quda.h:46
double anisotropy
Definition: quda.h:31
void freeCloverQuda(void)
QudaGaugeParam gaugeParam
Definition: covdev_test.cpp:36
void invertMultiShiftQuda(void **_hp_x, void *_hp_b, QudaInvertParam *param)
void setVerbosityQuda(QudaVerbosity verbosity, const char prefix[], FILE *outfile)
void endQuda(void)
void free(void *)
#define pool_pinned_free(ptr)
Definition: malloc_quda.h:116
void qudaHisqParamsInit(QudaHisqParams_t hisq_params)
QudaSolveType solve_type
Definition: quda.h:182
QudaVerbosity verbosity_precondition
Definition: quda.h:261
QudaVerbosity verbosity
enum QudaPrecision_s QudaPrecision
void qudaUnitarizeSU3(int prec, double tol, QudaMILCSiteArg_t *arg)
int ga_pad
Definition: quda.h:53
void destroyDeflationQuda(void *df_instance)
int make_resident_mom
Definition: quda.h:74
void qudaGaugeFixingFFT(int precision, unsigned int gauge_dir, int Nsteps, int verbose_interval, double alpha, unsigned int autotune, double tolerance, unsigned int stopWtheta, void *milc_sitelink)
Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
size_t gauge_offset
Definition: quda.h:78
QudaGaugeFixed gauge_fix
Definition: quda.h:51
QudaExtLibType deflation_ext_lib
void setUnitarizeForceConstants(double unitarize_eps, double hisq_force_filter, double max_det_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
Set the constant parameters for the force unitarization.
QudaInverterType inv_type_precondition
Definition: quda.h:248
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
const void * func
QudaLinkType type
Definition: quda.h:35
const void * src
double kappa
Definition: quda.h:97
QudaPrecision cuda_prec_ritz
Definition: quda.h:290
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
#define errorQuda(...)
Definition: util_quda.h:90
double tol
Definition: quda.h:110
void setUnitarizeLinksConstants(double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
QudaDslashType dslash_type
Definition: quda.h:93
QudaReconstructType reconstruct_precondition
Definition: quda.h:49
QudaInverterType inv_type
Definition: quda.h:94
cudaEvent_t start
QudaPrecision prec_ritz
QudaPrecision cuda_prec
Definition: quda.h:191
#define host_free(ptr)
Definition: malloc_quda.h:59
enum QudaSolveType_s QudaSolveType
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
QudaExtLibType deflation_ext_lib
Definition: test_util.cpp:1680
void qudaInit(QudaInitArgs_t input)
QudaPrecision cpu_prec
Definition: quda.h:190
QudaMemoryType mem_type_ritz
void setDeflationParam(QudaEigParam &df_param)
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
static int rank
Definition: comm_mpi.cpp:42
double momActionQuda(void *momentum, QudaGaugeParam *param)
QudaStaggeredPhase staggered_phase_type
Definition: quda.h:62
char * strcpy(char *__dst, const char *__src)
void qudaLoadGaugeField(int external_precision, int quda_precision, QudaInvertArgs_t inv_args, const void *milc_link)
void qudaEigCGInvert(int external_precision, int quda_precision, double mass, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *const fatlink, const void *const longlink, const double tadpole, void *source, void *solution, QudaEigArgs_t eig_args, const int rhs_idx, const int last_rhs_flag, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
QudaDagType dagger
Definition: quda.h:184
#define MAX(a, b)
void qudaCloverMultishiftInvert(int external_precision, int quda_precision, int num_offsets, double *const offset, double kappa, double clover_coeff, QudaInvertArgs_t inv_args, const double *target_residual, const void *milc_link, void *milc_clover, void *milc_clover_inv, void *source, void **solutionArray, double *const final_residual, int *num_iters)
void qudaEigCGCloverInvert(int external_precision, int quda_precision, double kappa, double clover_coeff, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *milc_link, void *milc_clover, void *milc_clover_inv, void *source, void *solution, QudaEigArgs_t eig_args, const int rhs_idx, const int last_rhs_flag, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
QudaGaugeFieldOrder gauge_order
Definition: quda.h:36
int nvec
Definition: test_util.cpp:1635
double true_res
Definition: quda.h:115
void qudaInvertMsrc(int external_precision, int quda_precision, double mass, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *const fatlink, const void *const longlink, const double tadpole, void **sourceArray, void **solutionArray, double *const final_residual, double *const final_fermilab_residual, int *num_iters, int num_src)
size_t mom_offset
Definition: quda.h:79
void computeKSLinkQuda(void *fatlink, void *longlink, void *ulink, void *inlink, double *path_coeff, QudaGaugeParam *param)
void qudaGaugeFixingOVR(const int precision, const unsigned int gauge_dir, const int Nsteps, const int verbose_interval, const double relax_boost, const double tolerance, const unsigned int reunit_interval, const unsigned int stopWtheta, void *milc_sitelink)
Gauge fixing with overrelaxation with support for single and multi GPU.
void qudaSaveGaugeField(void *gauge, void *inGauge)
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
QudaPrecision & cuda_prec_precondition
int make_resident_solution
Definition: quda.h:313
int overwrite_mom
Definition: quda.h:69
double qudaMomAction(int precision, void *momentum)
void qudaSetLayout(QudaLayout_t layout)
void exit(int) __attribute__((noreturn))
QudaPrecision clover_cuda_prec_sloppy
Definition: quda.h:202
int compute_action
Definition: quda.h:174
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity)
QudaFieldLocation input_location
Definition: quda.h:90
void freeGaugeQuda(void)
void initCommsGridQuda(int nDim, const int *dims, QudaCommsMap func, void *fdata)
int staggered_phase_applied
Definition: quda.h:63
double true_res_hq_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:168
double reliable_delta
Definition: quda.h:118
QudaPrecision cpu_prec
Definition: covdev_test.cpp:33
size_t site_size
Definition: quda.h:80
size_t size_t offset
QudaUseInitGuess use_init_guess
Definition: quda.h:206
int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, const unsigned int verbose_interval, const double relax_boost, const double tolerance, const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param, double *timeinfo)
Gauge fixing with overrelaxation with support for single and multi GPU.
char * index(const char *, int)
QudaGaugeParam param
Definition: pack_test.cpp:17
void setInvertParam(QudaInvertParam &inv_param)
QudaSolutionType solution_type
Definition: quda.h:181
void projectSU3Quda(void *gauge_h, double tol, QudaGaugeParam *param)
QudaMemoryType mem_type_ritz
Definition: quda.h:367
int strcmp(const char *__s1, const char *__s2)
QudaPrecision clover_cuda_prec
Definition: quda.h:201
void * longlink[4]
int computeGaugeForceQuda(void *mom, void *sitelink, int ***input_path_buf, int *path_length, double *loop_coeff, int num_paths, int max_length, double dt, QudaGaugeParam *qudaGaugeParam)
QudaInvertParam * invert_param
Definition: quda.h:346
double scale
Definition: quda.h:33
void initQuda(int device)
double tol
Definition: test_util.cpp:1647
void qudaFreePinned(void *ptr)
void qudaUpdateU(int precision, double eps, QudaMILCSiteArg_t *arg)
QudaFieldLocation output_location
Definition: quda.h:91
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
QudaPrecision clover_cuda_prec_precondition
Definition: quda.h:203
int printf(const char *,...) __attribute__((__format__(__printf__
char vec_infile[]
Definition: test_util.cpp:1636
bool canReuseResidentGauge(QudaInvertParam *inv_param)
QudaPrecision & cuda_prec_sloppy
VOLATILE spinorFloat kappa
QudaBoolean run_verify
Definition: quda.h:373
double i_mu
Definition: quda.h:65
void qudaFreeCloverField()
void * newDeflationQuda(QudaEigParam *param)
QudaPrecision cuda_prec
Definition: covdev_test.cpp:34
QudaPrecision cuda_prec_sloppy
Definition: quda.h:192
static bool initialized
Profiler for initQuda.
QudaVerbosity verbosity
Definition: quda.h:219
void qudaMultishiftInvert(int external_precision, int precision, int num_offsets, double *const offset, QudaInvertArgs_t inv_args, const double *target_residual, const double *target_fermilab_residual, const void *const milc_fatlink, const void *const milc_longlink, const double tadpole, void *source, void **solutionArray, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
ColorSpinorParam csParam
Definition: pack_test.cpp:24
double tol_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:156
double true_res_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:162
QudaInvertParam newQudaInvertParam(void)
double gflops
Definition: quda.h:227
void * qudaCreateGaugeField(void *gauge, int geometry, int precision)
int eigcg_max_restarts
Definition: quda.h:306
int use_resident_chrono
Definition: quda.h:322
QudaPrecision cuda_prec_precondition
Definition: quda.h:48
QudaCloverFieldOrder clover_order
Definition: quda.h:205
static __inline__ size_t p
void saveGaugeFieldQuda(void *outGauge, void *inGauge, QudaGaugeParam *param)
double tol_hq
Definition: quda.h:112
QudaInverterType solver_type
void qudaRephase(int prec, void *gauge, int flag, double i_mu)
double true_res_hq
Definition: quda.h:116
void qudaInvert(int external_precision, int quda_precision, double mass, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *const milc_fatlink, const void *const milc_longlink, const double tadpole, void *source, void *solution, double *const final_resid, double *const final_rel_resid, int *num_iters)
enum QudaSolutionType_s QudaSolutionType
void qudaComputeOprod(int precision, int num_terms, int num_naik_terms, double **coeff, double scale, void **quark_field, void *oprod[3])
QudaGammaBasis gamma_basis
Definition: quda.h:197
const int * machsize
char vec_outfile[]
Definition: test_util.cpp:1637
QudaPrecision cuda_prec_sloppy
Definition: quda.h:45
int max_search_dim
Definition: quda.h:298
double tol_precondition
Definition: quda.h:264
#define PUSH_RANGE(name, cid)
double offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:153
void qudaFreeGaugeField()
int use_sloppy_partial_accumulator
Definition: quda.h:119
int heavy_quark_check
Definition: quda.h:142
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
Definition: quda.h:43
enum QudaLinkType_s QudaLinkType
QudaPrecision cuda_prec
Definition: quda.h:42
void qudaDslash(int external_precision, int quda_precision, QudaInvertArgs_t inv_args, const void *const milc_fatlink, const void *const milc_longlink, const double tadpole, void *source, void *solution, int *num_iters)
int X[4]
Definition: quda.h:29
void qudaLoadCloverField(int external_precision, int quda_precision, QudaInvertArgs_t inv_args, void *milc_clover, void *milc_clover_inv, QudaSolutionType solution_type, QudaSolveType solve_type, double clover_coeff, int compute_trlog, double *trlog)
double mass
Definition: quda.h:96
QudaBoolean import_vectors
Definition: quda.h:361
void qudaFinalize()
QudaFieldLocation location
Definition: quda.h:370
int gcrNkrylov
Definition: quda.h:237
QudaFieldLocation location_ritz
const void * ptr
#define safe_malloc(size)
Definition: malloc_quda.h:54
void qudaCloverInvert(int external_precision, int quda_precision, double kappa, double clover_coeff, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *milc_link, void *milc_clover, void *milc_clover_inv, void *source, void *solution, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
QudaSolveType solve_type
Definition: test_util.cpp:1653
int make_resident_chrono
Definition: quda.h:319
void staggeredPhaseQuda(void *gauge_h, QudaGaugeParam *param)
#define pool_pinned_malloc(size)
Definition: malloc_quda.h:115
int computeGaugeFixingFFTQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, const unsigned int verbose_interval, const double alpha, const unsigned int autotune, const double tolerance, const unsigned int stopWtheta, QudaGaugeParam *param, double *timeinfo)
Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
void qudaLoadUnitarizedLink(int precision, QudaFatLinkArgs_t fatlink_args, const double path_coeff[6], void *inlink, void *fatlink, void *ulink)
char vec_outfile[256]
Definition: quda.h:379
void destroyGaugeFieldQuda(void *gauge)
QudaResidualType_s
Definition: enum_quda.h:165
enum QudaFieldLocation_s QudaFieldLocation
unsigned int uint32_t
double tadpole_coeff
Definition: quda.h:32
QudaPrecision cuda_prec_precondition
Definition: quda.h:193
int deflation_grid
Definition: quda.h:302
GaugeFieldParam gParam
double tol_restart
Definition: quda.h:111
void updateGaugeFieldQuda(void *gauge, void *momentum, double dt, int conj_mom, int exact, QudaGaugeParam *param)
void * fatlink[4]
double clover_coeff
Definition: test_util.cpp:1645
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
void invertMultiSrcQuda(void **_hp_x, void **_hp_b, QudaInvertParam *param)
const int * latsize
void qudaCloverForce(void *mom, double dt, void **x, void **p, double *coeff, double kappa, double ck, int nvec, double multiplicity, void *gauge, int precision, QudaInvertArgs_t inv_args)
void * qudaAllocatePinned(size_t bytes)
QudaMemoryType mem_type_ritz
Definition: test_util.cpp:1682
#define printfQuda(...)
Definition: util_quda.h:84
QudaTboundary t_boundary
Definition: quda.h:38
unsigned long long flops
Definition: blas_quda.cu:42
cudaGaugeField * cudaGauge
#define POP_RANGE
int max_restart_num
Definition: quda.h:308
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Definition: complex_quda.h:880
void * createGaugeFieldQuda(void *gauge, int geometry, QudaGaugeParam *param)
int use_resident_mom
Definition: quda.h:72
void size_t length
QudaReconstructType reconstruct
Definition: gauge_field.h:14
void qudaHisqForce(int precision, int num_terms, int num_naik_terms, double **coeff, void **quark_field, const double level2_coeff[6], const double fat7_coeff[6], const void *const w_link, const void *const v_link, const void *const u_link, void *const milc_momentum)
QudaFieldLocation location_ritz
Definition: test_util.cpp:1681
int compute_true_res
Definition: quda.h:114
QudaResidualType residual_type
Definition: quda.h:286
double inc_tol
Definition: quda.h:310
int num_offset
Definition: quda.h:146
enum QudaVerbosity_s QudaVerbosity
int return_result_mom
Definition: quda.h:76
QudaVerbosity verbosity
int max_chrono_dim
Definition: quda.h:325
int use_resident_solution
Definition: quda.h:316
void computeHISQForceQuda(void *momentum, long long *flops, const double level2_coeff[6], const double fat7_coeff[6], const void *const w_link, const void *const v_link, const void *const u_link, void **quark, int num, int num_naik, double **coeff, QudaGaugeParam *param)
void qudaLoadKSLink(int precision, QudaFatLinkArgs_t fatlink_args, const double act_path_coeff[6], void *inlink, void *fatlink, void *longlink)
void * deflation_op
Definition: quda.h:254
double eigenval_tol
Definition: quda.h:304
QudaPrecision Precision() const
QudaPrecision clover_cpu_prec
Definition: quda.h:200
QudaPrecision cuda_prec_ritz
Definition: quda.h:364
QudaParity parity
Definition: covdev_test.cpp:53
void qudaGaugeForce(int precision, int num_loop_types, double milc_loop_coeff[3], double eb3, QudaMILCSiteArg_t *arg)
static int opp(int dir)
char * getenv(const char *)
QudaPrecision prec
Definition: test_util.cpp:1615
QudaMatPCType matpc_type
Definition: quda.h:183
QudaEigParam newQudaEigParam(void)
char vec_infile[256]
Definition: quda.h:376
enum QudaInverterType_s QudaInverterType
enum QudaMemoryType_s QudaMemoryType
void qudaAsqtadForce(int precision, const double act_path_coeff[6], const void *const one_link_src[4], const void *const naik_src[4], const void *const link, void *const milc_momentum)
static void createGaugeForcePaths(int **paths, int dir, int num_loop_types)
unsigned long long bytes
Definition: blas_quda.cu:43
QudaPrecision cpu_prec
Definition: quda.h:40
enum QudaExtLibType_s QudaExtLibType
QudaLayout_t layout
void qudaDestroyGaugeField(void *gauge)
QudaGaugeParam newQudaGaugeParam(void)
QudaPreserveSource preserve_source
Definition: quda.h:188
double clover_coeff
Definition: quda.h:208
enum cudaDeviceAttr attr int device