QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
milc_interface.cpp
Go to the documentation of this file.
1 #include <cstdio>
2 #include <cstdlib>
3 #include <iostream>
4 #include <quda.h>
5 #include <quda_milc_interface.h>
6 #include <quda_internal.h>
7 #include <color_spinor_field.h>
8 #include <string.h>
9 #include <hisq_links_quda.h>
10 #include <ks_improved_force.h>
11 #include <dslash_quda.h>
12 
13 #define MAX(a,b) ((a)>(b)?(a):(b))
14 
15 #ifdef BUILD_MILC_INTERFACE
16 
17 static bool initialized = false;
18 static int gridDim[4];
19 static int localDim[4];
20 static int clover_alloc = 0;
21 static bool invalidate_quda_gauge = true;
22 static bool create_quda_gauge = false;
23 
24 
25 using namespace quda;
26 using namespace quda::fermion_force;
27 
28 
29 
30 #define QUDAMILC_VERBOSE 1
31 template <bool start>
32 void inline qudamilc_called(const char* func, QudaVerbosity verb){
33 #ifdef QUDAMILC_VERBOSE
34 if (verb >= QUDA_VERBOSE) {
35  if(start)
36  printf("QUDA_MILC_INTERFACE: %s (called) \n",func);
37  else
38  printf("QUDA_MILC_INTERFACE: %s (return) \n",func);
39  }
40 #endif
41 }
42 
43 template <bool start>
44 void inline qudamilc_called(const char * func){
45  qudamilc_called<start>(func, getVerbosity());
46 }
47 
48 
49 void qudaInit(QudaInitArgs_t input)
50 {
51  qudamilc_called<true>(__func__);
52  if(initialized) return;
53  setVerbosityQuda(input.verbosity, "", stdout);
54  qudaSetLayout(input.layout);
55  initialized = true;
56  qudamilc_called<false>(__func__);
57 
58 }
59 
60 void qudaFinalize()
61 {
62  qudamilc_called<true>(__func__);
63  endQuda();
64  qudamilc_called<false>(__func__);
65 }
66 
71 static int rankFromCoords(const int *coords, void *fdata)
72 {
73  int *dims = static_cast<int *>(fdata);
74 
75  int rank = coords[3];
76  for (int i = 2; i >= 0; i--) {
77  rank = dims[i] * rank + coords[i];
78  }
79  return rank;
80 }
81 
82 
83 void qudaSetLayout(QudaLayout_t input)
84 {
85  int local_dim[4];
86  for(int dir=0; dir<4; ++dir){ local_dim[dir] = input.latsize[dir]; }
87 #ifdef MULTI_GPU
88  for(int dir=0; dir<4; ++dir){ local_dim[dir] /= input.machsize[dir]; }
89 #endif
90  for(int dir=0; dir<4; ++dir){
91  if(local_dim[dir]%2 != 0){
92  printf("Error: Odd lattice dimensions are not supported\n");
93  exit(1);
94  }
95  }
96 
97  for(int dir=0; dir<4; ++dir) localDim[dir] = local_dim[dir];
98 
99 #ifdef GPU_COMMS
100  setKernelPackT(true);
101 #endif
102 
103 #ifdef MULTI_GPU
104  for(int dir=0; dir<4; ++dir) gridDim[dir] = input.machsize[dir];
105  initCommsGridQuda(4, gridDim, rankFromCoords, (void *)(gridDim));
106  static int device = -1;
107 #else
108  for(int dir=0; dir<4; ++dir) gridDim[dir] = 1;
109  static int device = input.device;
110 #endif
111 
112  initQuda(device);
113 }
114 
115 
117 {
118 
119  static bool initialized = false;
120 
121  if(initialized) return;
122  qudamilc_called<true>(__func__);
123 
124  const bool reunit_allow_svd = (params.reunit_allow_svd) ? true : false;
125  const bool reunit_svd_only = (params.reunit_svd_only) ? true : false;
126 
127 
128  const double unitarize_eps = 1e-14;
129  const double max_error = 1e-10;
130 
131 #ifdef GPU_HISQ_FORCE
133  params.force_filter,
134  max_error,
135  reunit_allow_svd,
136  reunit_svd_only,
137  params.reunit_svd_rel_error,
138  params.reunit_svd_abs_error);
139 #endif
140 
141 #ifdef GPU_UNITARIZE
142  setUnitarizeLinksConstants(unitarize_eps,
143  max_error,
144  reunit_allow_svd,
145  reunit_svd_only,
146  params.reunit_svd_rel_error,
147  params.reunit_svd_abs_error);
148 #endif // UNITARIZE_GPU
149 
150  initialized = true;
151  qudamilc_called<false>(__func__);
152  return;
153 }
154 
155 
156 
157 static QudaGaugeParam newMILCGaugeParam(const int* dim, QudaPrecision prec, QudaLinkType link_type)
158 {
160  for(int dir=0; dir<4; ++dir) gParam.X[dir] = dim[dir];
161  gParam.cuda_prec_sloppy = gParam.cpu_prec = gParam.cuda_prec = prec;
162  gParam.type = link_type;
163 
166  gParam.t_boundary = QUDA_PERIODIC_T;
168  gParam.scale = 1.0;
169  gParam.anisotropy = 1.0;
170  gParam.tadpole_coeff = 1.0;
171  gParam.scale = 0;
172  gParam.ga_pad = 0;
173  gParam.site_ga_pad = 0;
174  gParam.mom_ga_pad = 0;
175  gParam.llfat_ga_pad = 0;
176  return gParam;
177 }
178 
179 
180 static void invalidateGaugeQuda() {
181  freeGaugeQuda();
182  invalidate_quda_gauge = true;
183 }
184 
185 
186 
187 #ifdef GPU_FATLINK
188 
189 void qudaLoadKSLink(int prec, QudaFatLinkArgs_t fatlink_args,
190  const double act_path_coeff[6], void* inlink, void* fatlink, void* longlink)
191 {
192  qudamilc_called<true>(__func__);
193  // create_quda_gauge = true;
194 
195 #ifdef MULTI_GPU
197 #else
199 #endif
200 
201  QudaGaugeParam param = newMILCGaugeParam(localDim,
204 
205  computeKSLinkQuda(fatlink, longlink, NULL, inlink, const_cast<double*>(act_path_coeff), &param, method);
206  qudamilc_called<false>(__func__);
207  // require loadGaugeQuda to be called in subsequent solve
208 // invalidateGaugeQuda();
209 }
210 
211 
212 
213 void qudaLoadUnitarizedLink(int prec, QudaFatLinkArgs_t fatlink_args,
214  const double act_path_coeff[6], void* inlink, void* fatlink, void* ulink)
215 {
216  qudamilc_called<true>(__func__);
217 #ifdef MULTI_GPU
219 #else
221 #endif
222 
223  QudaGaugeParam param = newMILCGaugeParam(localDim,
226 
227  computeKSLinkQuda(fatlink, NULL, ulink, inlink, const_cast<double*>(act_path_coeff), &param, method);
228  qudamilc_called<false>(__func__);
229  return;
230 }
231 
232 #endif
233 
234 
235 #ifdef GPU_HISQ_FORCE
236 
237 void qudaHisqForce(int prec, const double level2_coeff[6], const double fat7_coeff[6],
238  const void* const staple_src[4], const void* const one_link_src[4], const void* const naik_src[4],
239  const void* const w_link, const void* const v_link, const void* const u_link,
240  void* const milc_momentum)
241 {
242  qudamilc_called<true>(__func__);
243 
244  QudaGaugeParam gParam = newMILCGaugeParam(localDim,
247 
248  long long flops;
249  computeHISQForceQuda(milc_momentum, &flops, level2_coeff, fat7_coeff,
250  staple_src, one_link_src, naik_src,
251  w_link, v_link, u_link, &gParam);
252  qudamilc_called<false>(__func__);
253  return;
254 }
255 
256 
257 void qudaAsqtadForce(int prec, const double act_path_coeff[6],
258  const void* const one_link_src[4], const void* const naik_src[4],
259  const void* const link, void* const milc_momentum)
260 {
261  qudamilc_called<true>(__func__);
262 
263 
264  QudaGaugeParam gParam = newMILCGaugeParam(localDim,
267 
268  long long flops;
269  computeAsqtadForceQuda(milc_momentum, &flops, act_path_coeff, one_link_src, naik_src, link, &gParam);
270 
271  qudamilc_called<false>(__func__);
272  return;
273 }
274 
275 
276 
277 void qudaComputeOprod(int prec, int num_terms, double** coeff,
278  void** quark_field, void* oprod[2])
279 {
280  qudamilc_called<true>(__func__);
281  QudaGaugeParam oprodParam = newMILCGaugeParam(localDim,
284 
285  computeStaggeredOprodQuda(oprod, quark_field, num_terms, coeff, &oprodParam);
286  qudamilc_called<false>(__func__);
287  return;
288 }
289 
290 #endif // GPU_HISQ_FORCE
291 
292 #ifdef GPU_GAUGE_FORCE
293 void qudaUpdateU(int prec, double eps, void* momentum, void* link)
294 {
295  qudamilc_called<true>(__func__);
296  QudaGaugeParam gaugeParam = newMILCGaugeParam(localDim,
299 
300 
301  updateGaugeFieldQuda(link, momentum, eps, 0, 0, &gaugeParam);
302  qudamilc_called<false>(__func__);
303  return;
304 }
305 
306 
307 // gauge force code
308 static int getVolume(const int dim[4])
309 {
310  return dim[0]*dim[1]*dim[2]*dim[3];
311 }
312 
313 
314  template<class Real>
315 static void setLoopCoeffs(const double milc_loop_coeff[3],
316  Real loop_coeff[48])
317 {
318  // 6 plaquette terms
319  for(int i=0; i<6; ++i) loop_coeff[i] = milc_loop_coeff[0];
320 
321  // 18 rectangle terms
322  for(int i=0; i<18; ++i) loop_coeff[i+6] = milc_loop_coeff[1];
323 
324  for(int i=0; i<24; ++i) loop_coeff[i+24] = milc_loop_coeff[2];
325 
326  return;
327 }
328 
329 
330 
331 static inline int opp(int dir){
332  return 7-dir;
333 }
334 
335 
336 static void createGaugeForcePaths(int **paths, int dir){
337 
338  int index=0;
339  // Plaquette paths
340  for(int i=0; i<4; ++i){
341  if(i==dir) continue;
342  paths[index][0] = i; paths[index][1] = opp(dir); paths[index++][2] = opp(i);
343  paths[index][0] = opp(i); paths[index][1] = opp(dir); paths[index++][2] = i;
344  }
345 
346  // Rectangle Paths
347  for(int i=0; i<4; ++i){
348  if(i==dir) continue;
349  paths[index][0] = paths[index][1] = i; paths[index][2] = opp(dir); paths[index][3] = paths[index][4] = opp(i);
350  index++;
351  paths[index][0] = paths[index][1] = opp(i); paths[index][2] = opp(dir); paths[index][3] = paths[index][4] = i;
352  index++;
353  paths[index][0] = dir; paths[index][1] = i; paths[index][2] = paths[index][3] = opp(dir); paths[index][4] = opp(i);
354  index++;
355  paths[index][0] = dir; paths[index][1] = opp(i); paths[index][2] = paths[index][3] = opp(dir); paths[index][4] = i;
356  index++;
357  paths[index][0] = i; paths[index][1] = paths[index][2] = opp(dir); paths[index][3] = opp(i); paths[index][4] = dir;
358  index++;
359  paths[index][0] = opp(i); paths[index][1] = paths[index][2] = opp(dir); paths[index][3] = i; paths[index][4] = dir;
360  index++;
361  }
362 
363 
364  // Staple paths
365  for(int i=0; i<4; ++i){
366  for(int j=0; j<4; ++j){
367  if(i==dir || j==dir || i==j) continue;
368 
369  paths[index][0] = i; paths[index][1] = j; paths[index][2] = opp(dir); paths[index][3] = opp(i), paths[index][4] = opp(j);
370  index++;
371  paths[index][0] = i; paths[index][1] = opp(j); paths[index][2] = opp(dir); paths[index][3] = opp(i), paths[index][4] = j;
372  index++;
373  paths[index][0] = opp(i); paths[index][1] = j; paths[index][2] = opp(dir); paths[index][3] = i, paths[index][4] = opp(j);
374  index++;
375  paths[index][0] = opp(i); paths[index][1] = opp(j); paths[index][2] = opp(dir); paths[index][3] = i, paths[index][4] = j;
376  index++;
377  }
378  }
379 
380 }
381 
382 
383 
384 void qudaGaugeForce( int precision,
385  int num_loop_types,
386  double milc_loop_coeff[3],
387  double eb3,
388  void* milc_sitelink,
389  void* milc_momentum
390  )
391 {
392  qudamilc_called<true>(__func__);
393  const int numPaths = 48;
394  QudaGaugeParam qudaGaugeParam = newMILCGaugeParam(localDim,
395  (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION,
397 
398  static double loop_coeff[numPaths];
399  static int length[numPaths];
400 
401  if((milc_loop_coeff[0] != loop_coeff[0]) ||
402  (milc_loop_coeff[1] != loop_coeff[6]) ||
403  (milc_loop_coeff[2] != loop_coeff[24]) ){
404 
405  setLoopCoeffs(milc_loop_coeff, loop_coeff);
406  for(int i=0; i<6; ++i) length[i] = 3;
407  for(int i=6; i<48; ++i) length[i] = 5;
408  }
409 
410 
411  int** input_path_buf[4];
412  for(int dir=0; dir<4; ++dir){
413  input_path_buf[dir] = new int*[numPaths];
414  for(int i=0; i<numPaths; ++i){
415  input_path_buf[dir][i] = new int[length[i]];
416  }
417  createGaugeForcePaths(input_path_buf[dir], dir);
418  }
419 
420  int max_length = 6;
421  double timeinfo[3];
422 
423  memset(milc_momentum, 0, 4*getVolume(localDim)*10*qudaGaugeParam.cpu_prec);
424  computeGaugeForceQuda(milc_momentum, milc_sitelink, input_path_buf, length,
425  loop_coeff, numPaths, max_length, eb3,
426  &qudaGaugeParam, timeinfo);
427 
428  for(int dir=0; dir<4; ++dir){
429  for(int i=0; i<numPaths; ++i){
430  delete[] input_path_buf[dir][i];
431  }
432  delete[] input_path_buf[dir];
433  }
434  qudamilc_called<false>(__func__);
435  return;
436 }
437 
438 #endif // GPU_GAUGE_FORCE
439 
440 static int getFatLinkPadding(const int dim[4])
441 {
442  int padding = MAX(dim[1]*dim[2]*dim[3]/2, dim[0]*dim[2]*dim[3]/2);
443  padding = MAX(padding, dim[0]*dim[1]*dim[3]/2);
444  padding = MAX(padding, dim[0]*dim[1]*dim[2]/2);
445  return padding;
446 }
447 
448 
449 #ifdef GPU_STAGGERED_DIRAC
450 // set the params for the single mass solver
451 static void setInvertParams(const int dim[4],
454  QudaPrecision cuda_prec_sloppy,
455  QudaPrecision cuda_prec_precondition,
456  double mass,
457  double target_residual,
458  double target_residual_hq,
459  int maxiter,
460  double reliable_delta,
462  QudaVerbosity verbosity,
463  QudaInverterType inverter,
464  QudaInvertParam *invertParam)
465 {
466  invertParam->use_sloppy_partial_accumulator = 0;
467  invertParam->verbosity = verbosity;
468  invertParam->mass = mass;
469  invertParam->tol = target_residual;
470  invertParam->tol_hq =target_residual_hq;
471  invertParam->num_offset = 0;
472 
473  invertParam->inv_type = inverter;
474  invertParam->maxiter = maxiter;
475  invertParam->reliable_delta = reliable_delta;
476 
478  invertParam->cpu_prec = cpu_prec;
479  invertParam->cuda_prec = cuda_prec;
480  invertParam->cuda_prec_sloppy = cuda_prec_sloppy;
481 
483  invertParam->solve_type = QUDA_NORMEQ_PC_SOLVE;
485  invertParam->gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; // not used, but required by the code.
486  invertParam->dirac_order = QUDA_DIRAC_ORDER;
487 
488  invertParam->dslash_type = QUDA_ASQTAD_DSLASH;
489  invertParam->tune = QUDA_TUNE_YES;
490  invertParam->gflops = 0.0;
491 
494 
495 
496  if(parity == QUDA_EVEN_PARITY){ // even parity
497  invertParam->matpc_type = QUDA_MATPC_EVEN_EVEN;
498  }else if(parity == QUDA_ODD_PARITY){
499  invertParam->matpc_type = QUDA_MATPC_ODD_ODD;
500  }else{
501  errorQuda("Invalid parity\n");
502  exit(1);
503  }
504 
505  invertParam->dagger = QUDA_DAG_NO;
506  invertParam->sp_pad = dim[0]*dim[1]*dim[2]/2;
507  invertParam->use_init_guess = QUDA_USE_INIT_GUESS_YES;
508 
509  // for the preconditioner
511  invertParam->tol_precondition = 1e-1;
512  invertParam->maxiter_precondition = 2;
513  invertParam->verbosity_precondition = QUDA_SILENT;
514  invertParam->cuda_prec_precondition = cuda_prec_precondition;
515 
516 
517  return;
518 }
519 
520 
521 
522 
523 // Set params for the multi-mass solver.
524 static void setInvertParams(const int dim[4],
525  QudaPrecision cpu_prec,
526  QudaPrecision cuda_prec,
527  QudaPrecision cuda_prec_sloppy,
528  QudaPrecision cuda_prec_precondition,
529  int num_offset,
530  const double offset[],
531  const double target_residual_offset[],
532  const double target_residual_hq_offset[],
533  int maxiter,
534  double reliable_delta,
535  QudaParity parity,
536  QudaVerbosity verbosity,
537  QudaInverterType inverter,
538  QudaInvertParam *invertParam)
539 {
540 
541  const double null_mass = -1;
542  const double null_residual = -1;
543 
544 
545  setInvertParams(dim, cpu_prec, cuda_prec, cuda_prec_sloppy, cuda_prec_precondition,
546  null_mass, null_residual, null_residual, maxiter, reliable_delta, parity, verbosity, inverter, invertParam);
547 
548  invertParam->num_offset = num_offset;
549  for(int i=0; i<num_offset; ++i){
550  invertParam->offset[i] = offset[i];
551  invertParam->tol_offset[i] = target_residual_offset[i];
552  //if(invertParam->residual_type & QUDA_HEAVY_QUARK_RESIDUAL){
553  invertParam->tol_hq_offset[i] = target_residual_hq_offset[i];
554  //}
555  }
556  return;
557 }
558 
559 
560 static void setGaugeParams(const int dim[4],
561  QudaPrecision cpu_prec,
562  QudaPrecision cuda_prec,
563  QudaPrecision cuda_prec_sloppy,
564  QudaPrecision cuda_prec_precondition,
565  const double tadpole,
566  QudaGaugeParam *gaugeParam)
567 {
568 
569  for(int dir=0; dir<4; ++dir){
570  gaugeParam->X[dir] = dim[dir];
571  }
572 
573  gaugeParam->cpu_prec = cpu_prec;
574  gaugeParam->cuda_prec = cuda_prec;
575  gaugeParam->cuda_prec_sloppy = cuda_prec_sloppy;
576  gaugeParam->reconstruct = QUDA_RECONSTRUCT_NO;
578 
579  gaugeParam->gauge_fix = QUDA_GAUGE_FIXED_NO;
580  gaugeParam->anisotropy = 1.0;
581  gaugeParam->tadpole_coeff = tadpole;
582  gaugeParam->t_boundary = QUDA_PERIODIC_T; // anti-periodic boundary conditions are built into the gauge field
583  gaugeParam->gauge_order = QUDA_MILC_GAUGE_ORDER;
584  gaugeParam->ga_pad = getFatLinkPadding(dim);
585  gaugeParam->scale = -1.0/(24.0*gaugeParam->tadpole_coeff*gaugeParam->tadpole_coeff);
586 
587 
588  // preconditioning...
589  gaugeParam->cuda_prec_precondition = cuda_prec_precondition;
591 
592  return;
593 }
594 
595 
596 
597 static void setColorSpinorParams(const int dim[4],
598  QudaPrecision precision,
599  ColorSpinorParam* param)
600 {
601 
602  param->nColor = 3;
603  param->nSpin = 1;
604  param->nDim = 4;
605 
606  for(int dir=0; dir<4; ++dir) param->x[dir] = dim[dir];
607  param->x[0] /= 2;
608 
609  param->precision = precision;
610  param->pad = 0;
614  param->gammaBasis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; // meaningless, but required by the code.
616  return;
617 }
618 
619 
620 static size_t getColorVectorOffset(QudaParity local_parity, bool even_odd_exchange, const int dim[4])
621 {
622  size_t offset;
623  int volume = dim[0]*dim[1]*dim[2]*dim[3];
624 
625  if(local_parity == QUDA_EVEN_PARITY){
626  offset = even_odd_exchange ? volume*6/2 : 0;
627  }else{
628  offset = even_odd_exchange ? 0 : volume*6/2;
629  }
630  return offset;
631 }
632 
633 
634 void qudaMultishiftInvert(int external_precision,
635  int quda_precision,
636  int num_offsets,
637  double* const offset,
638  QudaInvertArgs_t inv_args,
639  const double target_residual[],
640  const double target_fermilab_residual[],
641  const void* const fatlink,
642  const void* const longlink,
643  const double tadpole,
644  void* source,
645  void** solutionArray,
646  double* const final_residual,
647  double* const final_fermilab_residual,
648  int *num_iters)
649 {
650 
651  // for(int i=0; i<num_offsets; ++i){
652  // if(target_residual[i] == 0){
653  // errorQuda("qudaMultishiftInvert: target residual cannot be zero\n");
654  // exit(1);
655  // }
656  // }
657  // for(int i=0; i<num_offsets; ++i){
658  static const QudaVerbosity verbosity = getVerbosity();
659  qudamilc_called<true>(__func__, verbosity);
660 
661  if(target_residual[0] == 0){
662  errorQuda("qudaMultishiftInvert: target residual cannot be zero\n");
663  exit(1);
664  }
665  // }
666 
667  QudaPrecision host_precision = (external_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
668  QudaPrecision device_precision = (quda_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
669  const bool use_mixed_precision = (((quda_precision==2) && inv_args.mixed_precision) ||
670  ((quda_precision==1) && (inv_args.mixed_precision==2)) ) ? true : false;
671  QudaPrecision device_precision_sloppy;
672  if(inv_args.mixed_precision == 2){
673  device_precision_sloppy = QUDA_HALF_PRECISION;
674  }else if(inv_args.mixed_precision == 1){
675  device_precision_sloppy = QUDA_SINGLE_PRECISION;
676  }else{
677  device_precision_sloppy = device_precision;
678  }
679 
680  QudaPrecision device_precision_precondition = device_precision_sloppy;
681 
682 
683 
684 
685 
686 /*
687  if(verbosity >= QUDA_VERBOSE){
688  if(quda_precision == 2){
689  printfQuda("Using %s double-precision multi-mass inverter\n", use_mixed_precision?"mixed":"pure");
690  }else if(quda_precision == 1){
691  printfQuda("Using %s single-precision multi-mass inverter\n", use_mixed_precision?"mixed":"pure");
692  }else{
693  errorQuda("Unrecognised precision\n");
694  exit(1);
695  }
696  }
697 */
698 
699  QudaGaugeParam gaugeParam = newQudaGaugeParam();
700  setGaugeParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition, tadpole, &gaugeParam);
701 
702  QudaInvertParam invertParam = newQudaInvertParam();
703 
704  invertParam.residual_type = static_cast<QudaResidualType_s>(0);
705  invertParam.residual_type = (target_residual[0] != 0) ? static_cast<QudaResidualType_s> ( invertParam.residual_type | QUDA_L2_RELATIVE_RESIDUAL) : invertParam.residual_type;
706  invertParam.residual_type = (target_fermilab_residual[0] != 0) ? static_cast<QudaResidualType_s> (invertParam.residual_type | QUDA_HEAVY_QUARK_RESIDUAL) : invertParam.residual_type;
707  if (invertParam.residual_type ==QUDA_L2_RELATIVE_RESIDUAL)
708  printfQuda("Using QUDA_L2_RELATIVE_RESIDUAL only");
709  else{
710  if (invertParam.residual_type &QUDA_L2_RELATIVE_RESIDUAL)
711  printfQuda("Using QUDA_L2_RELATIVE_RESIDUAL");
712  if (invertParam.residual_type &QUDA_L2_RELATIVE_RESIDUAL)
713  printfQuda("Using QUDA_HEAVY_QUARK_RESIDUAL");
714  }
715 
716  invertParam.use_sloppy_partial_accumulator = 0;
717 
718 
719  const double ignore_mass = 1.0;
720 
721  QudaParity local_parity = inv_args.evenodd;
722  {
723  // need to set this to zero until issue #146 is fixed
724  const double reliable_delta = (use_mixed_precision ? 1e-1 :0.0);//1e-1;
725  setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition,
726  num_offsets, offset, target_residual, target_fermilab_residual,
727  inv_args.max_iter, reliable_delta, local_parity, verbosity, QUDA_CG_INVERTER, &invertParam);
728  }
729 
731  setColorSpinorParams(localDim, host_precision, &csParam);
732 
733  const QudaPrecision milc_precision = (external_precision==2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
734 
735 // if(invalidate_quda_gauge){
736  const int fat_pad = getFatLinkPadding(localDim);
737  gaugeParam.type = QUDA_GENERAL_LINKS;
738  gaugeParam.ga_pad = fat_pad; // don't know if this is correct
739  gaugeParam.reconstruct = gaugeParam.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
740  loadGaugeQuda(const_cast<void*>(fatlink), &gaugeParam);
741 
742  const int long_pad = 3*fat_pad;
743  gaugeParam.type = QUDA_THREE_LINKS;
744  gaugeParam.ga_pad = long_pad;
745  loadGaugeQuda(const_cast<void*>(longlink), &gaugeParam);
746 
747  // invalidate_quda_gauge = false;
748 // }
749 
750  void** sln_pointer = (void**)malloc(num_offsets*sizeof(void*));
751  int quark_offset = getColorVectorOffset(local_parity, false, gaugeParam.X)*host_precision;
752  void* src_pointer = (char*)source + quark_offset;
753 
754  for(int i=0; i<num_offsets; ++i) sln_pointer[i] = (char*)solutionArray[i] + quark_offset;
755 
756  invertMultiShiftQuda(sln_pointer, src_pointer, &invertParam);
757  free(sln_pointer);
758 
759  // return the number of iterations taken by the inverter
760  *num_iters = invertParam.iter;
761  for(int i=0; i<num_offsets; ++i){
762  final_residual[i] = invertParam.true_res_offset[i];
763  final_fermilab_residual[i] = invertParam.true_res_hq_offset[i];
764  } // end loop over number of offsets
765 
766  freeGaugeQuda();
767  // if(!create_quda_gauge) invalidateGaugeQuda();
768  qudamilc_called<false>(__func__, verbosity);
769  return;
770 } // qudaMultiShiftInvert
771 
772 
773 
774 
775 void qudaInvert(int external_precision,
776  int quda_precision,
777  double mass,
778  QudaInvertArgs_t inv_args,
779  double target_residual,
780  double target_fermilab_residual,
781  const void* const fatlink,
782  const void* const longlink,
783  const double tadpole,
784  void* source,
785  void* solution,
786  double* const final_residual,
787  double* const final_fermilab_residual,
788  int* num_iters)
789 {
790 
791  static const QudaVerbosity verbosity = getVerbosity();
792  qudamilc_called<true>(__func__, verbosity);
793 
794  if(target_fermilab_residual == 0 && target_residual == 0){
795  errorQuda("qudaInvert: requesting zero residual\n");
796  exit(1);
797  }
798 
799  const bool use_mixed_precision = (((quda_precision==2) && inv_args.mixed_precision) ||
800  ((quda_precision==1) && (inv_args.mixed_precision==2) ) ) ? true : false;
801 
802  // static const QudaVerbosity verbosity = getVerbosity();
803  QudaPrecision host_precision = (external_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
804  QudaPrecision device_precision = (quda_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
805  QudaPrecision device_precision_sloppy;
806 
807  if(inv_args.mixed_precision == 2){
808  device_precision_sloppy = QUDA_HALF_PRECISION;
809  }else if(inv_args.mixed_precision == 1){
810  device_precision_sloppy = QUDA_SINGLE_PRECISION;
811  }else{
812  device_precision_sloppy = device_precision;
813  }
814 
815 
816 
817  QudaPrecision device_precision_precondition = device_precision_sloppy;
818  QudaGaugeParam gaugeParam = newQudaGaugeParam();
819  // a basic set routine for the gauge parameters
820  setGaugeParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition, tadpole, &gaugeParam);
821  QudaInvertParam invertParam = newQudaInvertParam();
822 
823  invertParam.residual_type = static_cast<QudaResidualType_s>(0);
824  invertParam.residual_type = (target_residual != 0) ? static_cast<QudaResidualType_s> ( invertParam.residual_type | QUDA_L2_RELATIVE_RESIDUAL) : invertParam.residual_type;
825  invertParam.residual_type = (target_fermilab_residual != 0) ? static_cast<QudaResidualType_s> (invertParam.residual_type | QUDA_HEAVY_QUARK_RESIDUAL) : invertParam.residual_type;
826 
827 
828  QudaParity local_parity = inv_args.evenodd;
829  //double& target_res = (invertParam.residual_type == QUDA_L2_RELATIVE_RESIDUAL) ? target_residual : target_fermilab_residual;
830  double& target_res = target_residual;
831  double& target_res_hq = target_fermilab_residual;
832  const double reliable_delta = 1e-1;
833 
834  setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition,
835  mass, target_res, target_res_hq, inv_args.max_iter, reliable_delta, local_parity, verbosity, QUDA_CG_INVERTER, &invertParam);
836  invertParam.use_sloppy_partial_accumulator = 0;
837 
839  setColorSpinorParams(localDim, host_precision, &csParam);
840 
841 
842  const QudaPrecision milc_precision = (external_precision==2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
843  const int fat_pad = getFatLinkPadding(localDim);
844  const int long_pad = 3*fat_pad;
845 
846 // if(invalidate_quda_gauge){
847  gaugeParam.type = QUDA_GENERAL_LINKS;
848  gaugeParam.ga_pad = fat_pad;
849  gaugeParam.reconstruct = gaugeParam.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
850  loadGaugeQuda(const_cast<void*>(fatlink), &gaugeParam);
851 
852  gaugeParam.type = QUDA_THREE_LINKS;
853  gaugeParam.ga_pad = long_pad;
854  gaugeParam.reconstruct = gaugeParam.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
855  loadGaugeQuda(const_cast<void*>(longlink), &gaugeParam);
856 
857  invalidate_quda_gauge = false;
858 // }
859 
860  int quark_offset = getColorVectorOffset(local_parity, false, gaugeParam.X);
861 
862  invertQuda(((char*)solution + quark_offset*host_precision),
863  ((char*)source + quark_offset*host_precision),
864  &invertParam);
865 
866  // return the number of iterations taken by the inverter
867  *num_iters = invertParam.iter;
868  *final_residual = invertParam.true_res;
869  *final_fermilab_residual = invertParam.true_res_hq;
870 
871  freeGaugeQuda();
872  qudamilc_called<false>(__func__, verbosity);
873 // if(!create_quda_gauge) invalidateGaugeQuda();
874  return;
875 } // qudaInvert
876 
877 #endif
878 
879 #ifdef GPU_CLOVER_DIRAC
880 
881 void* qudaCreateExtendedGaugeField(void* gauge, int geometry, int precision)
882 {
883  qudamilc_called<true>(__func__);
884  QudaPrecision qudaPrecision = (precision==2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
885  QudaGaugeParam gaugeParam = newMILCGaugeParam(localDim, qudaPrecision,
886  (geometry==1) ? QUDA_GENERAL_LINKS : QUDA_SU3_LINKS);
887 
888  return createExtendedGaugeField(gauge, geometry, &gaugeParam);
889 }
890 
891 
892 void* qudaCreateGaugeField(void* gauge, int geometry, int precision)
893 {
894  qudamilc_called<true>(__func__);
895  QudaPrecision qudaPrecision = (precision==2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
896  QudaGaugeParam gaugeParam = newMILCGaugeParam(localDim, qudaPrecision,
897  (geometry==1) ? QUDA_GENERAL_LINKS : QUDA_SU3_LINKS);
898 
899  return createGaugeField(gauge, geometry, &gaugeParam);
900 }
901 
902 
903 void qudaSaveGaugeField(void* gauge, void* inGauge)
904 {
905  qudamilc_called<true>(__func__);
906  cudaGaugeField* cudaGauge = reinterpret_cast<cudaGaugeField*>(inGauge);
907  QudaGaugeParam gaugeParam = newMILCGaugeParam(localDim, cudaGauge->Precision(), QUDA_GENERAL_LINKS);
908  saveGaugeField(gauge, inGauge, &gaugeParam);
909  qudamilc_called<false>(__func__);
910  return;
911 }
912 
913 
914 void qudaDestroyGaugeField(void* gauge)
915 {
916  qudamilc_called<true>(__func__);
917  destroyQudaGaugeField(gauge);
918  qudamilc_called<false>(__func__);
919  return;
920 }
921 
922 
923 void qudaCloverTrace(void* out, void* clover, int mu, int nu)
924 {
925  computeCloverTraceQuda(out, clover, mu, nu, const_cast<int*>(localDim));
926  return;
927 }
928 
929 
930 
931 void qudaCloverDerivative(void* out, void* gauge, void* oprod, int mu, int nu, double coeff, int precision, int parity, int conjugate)
932 {
933 
934  QudaParity qudaParity = (parity==2) ? QUDA_EVEN_PARITY : QUDA_ODD_PARITY;
935  QudaGaugeParam gaugeParam = newMILCGaugeParam(localDim,
936  (precision==1) ? QUDA_SINGLE_PRECISION : QUDA_DOUBLE_PRECISION,
938 
939  computeCloverDerivativeQuda(out, gauge, oprod, mu, nu, coeff, qudaParity, &gaugeParam, conjugate);
940 
941  return;
942 }
943 
944 
945 
946 void setGaugeParams(QudaGaugeParam &gaugeParam, const int dim[4], QudaInvertArgs_t &inv_args,
947  int external_precision, int quda_precision) {
948 
949  const QudaPrecision host_precision = (external_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
950  const QudaPrecision device_precision = (quda_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
951  QudaPrecision device_precision_sloppy;
952 
953  if(inv_args.mixed_precision == 2){
954  device_precision_sloppy = QUDA_HALF_PRECISION;
955  }else if(inv_args.mixed_precision == 1){
956  device_precision_sloppy = QUDA_SINGLE_PRECISION;
957  }else{
958  device_precision_sloppy = device_precision;
959  }
960 
961  for(int dir=0; dir<4; ++dir) gaugeParam.X[dir] = dim[dir];
962 
963  gaugeParam.anisotropy = 1.0;
964  gaugeParam.type = QUDA_WILSON_LINKS;
965  gaugeParam.gauge_order = QUDA_MILC_GAUGE_ORDER;
966 
967  // Check the boundary conditions
968  // Can't have twisted or anti-periodic boundary conditions in the spatial
969  // directions with 12 reconstruct at the moment.
970  bool trivial_phase = true;
971  for(int dir=0; dir<3; ++dir){
972  if(inv_args.boundary_phase[dir] != 0) trivial_phase = false;
973  }
974  if(inv_args.boundary_phase[3] != 0 && inv_args.boundary_phase[3] != 1) trivial_phase = false;
975 
976  if(trivial_phase){
977  gaugeParam.t_boundary = (inv_args.boundary_phase[3]) ? QUDA_ANTI_PERIODIC_T : QUDA_PERIODIC_T;
978  gaugeParam.reconstruct = QUDA_RECONSTRUCT_12;
980  }else{
981  gaugeParam.t_boundary = QUDA_PERIODIC_T;
982  gaugeParam.reconstruct = QUDA_RECONSTRUCT_NO;
984  }
985 
986  gaugeParam.cpu_prec = host_precision;
987  gaugeParam.cuda_prec = device_precision;
988  gaugeParam.cuda_prec_sloppy = device_precision_sloppy;
989  gaugeParam.cuda_prec_precondition = device_precision_sloppy;
990  gaugeParam.gauge_fix = QUDA_GAUGE_FIXED_NO;
991  gaugeParam.ga_pad = getFatLinkPadding(dim);
992 }
993 
994 
995 
996 void setInvertParam(QudaInvertParam &invertParam, QudaInvertArgs_t &inv_args,
997  int external_precision, int quda_precision, double kappa) {
998 
999  const QudaPrecision host_precision = (external_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
1000  const QudaPrecision device_precision = (quda_precision == 2) ? QUDA_DOUBLE_PRECISION : QUDA_SINGLE_PRECISION;
1001  QudaPrecision device_precision_sloppy;
1002  if(inv_args.mixed_precision == 2){
1003  device_precision_sloppy = QUDA_HALF_PRECISION;
1004  }else if(inv_args.mixed_precision == 1){
1005  device_precision_sloppy = QUDA_SINGLE_PRECISION;
1006  }else{
1007  device_precision_sloppy = device_precision;
1008  }
1009 
1010 
1011 
1012  invertParam.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
1013  invertParam.kappa = kappa;
1014  invertParam.dagger = QUDA_DAG_NO;
1016  invertParam.gcrNkrylov = 30;
1017  invertParam.reliable_delta = 1e-1;
1018  invertParam.maxiter = inv_args.max_iter;
1019 
1020  invertParam.cuda_prec_precondition = device_precision_sloppy;
1021  invertParam.verbosity_precondition = QUDA_SILENT;
1022  invertParam.verbosity = QUDA_SILENT;
1023  invertParam.cpu_prec = host_precision;
1024  invertParam.cuda_prec = device_precision;
1025  invertParam.cuda_prec_sloppy = device_precision_sloppy;
1028  invertParam.dirac_order = QUDA_DIRAC_ORDER;
1029  invertParam.tune = QUDA_TUNE_YES;
1030  invertParam.sp_pad = 0;
1031  invertParam.cl_pad = 0;
1032  invertParam.clover_cpu_prec = host_precision;
1033  invertParam.clover_cuda_prec = device_precision;
1034  invertParam.clover_cuda_prec_sloppy = device_precision_sloppy;
1035  invertParam.clover_cuda_prec_precondition = device_precision_sloppy;
1036  invertParam.clover_order = QUDA_PACKED_CLOVER_ORDER;
1037 }
1038 
1039 
1040 void qudaLoadGaugeField(int external_precision,
1041  int quda_precision,
1042  QudaInvertArgs_t inv_args,
1043  const void* milc_link) {
1044  qudamilc_called<true>(__func__);
1045  QudaGaugeParam gaugeParam = newQudaGaugeParam();
1046  setGaugeParams(gaugeParam, localDim, inv_args, external_precision, quda_precision);
1047 
1048  loadGaugeQuda(const_cast<void*>(milc_link), &gaugeParam);
1049  qudamilc_called<false>(__func__);
1050 } // qudaLoadGaugeField
1051 
1052 
1053 void qudaFreeGaugeField() {
1054  qudamilc_called<true>(__func__);
1055  freeGaugeQuda();
1056  qudamilc_called<false>(__func__);
1057 } // qudaFreeGaugeField
1058 
1059 
1060 void qudaLoadCloverField(int external_precision,
1061  int quda_precision,
1062  QudaInvertArgs_t inv_args,
1063  void* milc_clover,
1064  void* milc_clover_inv,
1065  QudaSolutionType solution_type,
1066  QudaSolveType solve_type,
1067  double clover_coeff,
1068  int compute_trlog,
1069  double *trlog) {
1070 
1071  QudaInvertParam invertParam = newQudaInvertParam();
1072  setInvertParam(invertParam, inv_args, external_precision, quda_precision, 0.0);
1073  invertParam.solution_type = solution_type;
1074  invertParam.solve_type = solve_type;
1076  invertParam.compute_clover_trlog = compute_trlog;
1077  invertParam.clover_coeff = clover_coeff;
1078 
1079  if(invertParam.dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
1080  if (clover_alloc == 0) {
1081  loadCloverQuda(milc_clover, milc_clover_inv, &invertParam);
1082  clover_alloc = 1;
1083  } else {
1084  errorQuda("Clover term already allocated");
1085  }
1086  }
1087 
1088  if (compute_trlog) {
1089  trlog[0] = invertParam.trlogA[0];
1090  trlog[1] = invertParam.trlogA[1];
1091  }
1092 } // qudaLoadCoverField
1093 
1094 
1095 
1096 void qudaFreeCloverField() {
1097  if (clover_alloc==1) {
1098  freeCloverQuda();
1099  clover_alloc = 0;
1100  } else {
1101  errorQuda("Trying to free non-allocated clover term");
1102  }
1103 } // qudaFreeCloverField
1104 
1105 
1106 
1107 void qudaCloverInvert(int external_precision,
1108  int quda_precision,
1109  double kappa,
1110  double clover_coeff,
1111  QudaInvertArgs_t inv_args,
1112  double target_residual,
1113  double target_fermilab_residual,
1114  const void* link,
1115  void* clover, // could be stored in Milc format
1116  void* cloverInverse,
1117  void* source,
1118  void* solution,
1119  double* const final_residual,
1120  double* const final_fermilab_residual,
1121  int* num_iters)
1122 {
1123 
1124  if(target_fermilab_residual !=0 && target_residual != 0){
1125  errorQuda("qudaCloverInvert: conflicting residuals requested\n");
1126  exit(1);
1127  }else if(target_fermilab_residual == 0 && target_residual == 0){
1128  errorQuda("qudaCloverInvert: requesting zero residual\n");
1129  exit(1);
1130  }
1131 
1132  qudaLoadGaugeField(external_precision, quda_precision, inv_args, link);
1133 
1134 // double clover_coeff = 0.0;
1135  qudaLoadCloverField(external_precision, quda_precision, inv_args, clover, cloverInverse,
1136  QUDA_MAT_SOLUTION, QUDA_DIRECT_PC_SOLVE, clover_coeff, 0, 0);
1137 
1138  QudaInvertParam invertParam = newQudaInvertParam();
1139  setInvertParam(invertParam, inv_args, external_precision, quda_precision, kappa);
1140  invertParam.residual_type = (target_residual != 0) ? QUDA_L2_RELATIVE_RESIDUAL : QUDA_HEAVY_QUARK_RESIDUAL;
1141  invertParam.tol = (target_residual != 0) ? target_residual : target_fermilab_residual;
1142 
1143  // solution types
1144  invertParam.solution_type = QUDA_MAT_SOLUTION;
1145  invertParam.solve_type = QUDA_DIRECT_PC_SOLVE;
1146  invertParam.inv_type = QUDA_BICGSTAB_INVERTER;
1147  invertParam.matpc_type = QUDA_MATPC_ODD_ODD;
1148 
1149  invertQuda(solution, source, &invertParam);
1150  *num_iters = invertParam.iter;
1151  *final_residual = invertParam.true_res;
1152  *final_fermilab_residual = invertParam.true_res_hq;
1153 
1156 
1157  return;
1158 } // qudaCloverInvert
1159 
1160 
1161 void qudaCloverMultishiftInvert(int external_precision,
1162  int quda_precision,
1163  int num_offsets,
1164  double* const offset,
1165  double kappa,
1166  double clover_coeff,
1167  QudaInvertArgs_t inv_args,
1168  const double* target_residual_offset,
1169  const void* milc_link,
1170  void* milc_clover,
1171  void* milc_clover_inv,
1172  void* source,
1173  void** solutionArray,
1174  double* const final_residual,
1175  int* num_iters)
1176 {
1177 
1178  static const QudaVerbosity verbosity = getVerbosity();
1179  qudamilc_called<true>(__func__, verbosity);
1180 
1181  for(int i=0; i<num_offsets; ++i){
1182  if(target_residual_offset[i] == 0){
1183  errorQuda("qudaMultishiftInvert: target residual cannot be zero\n");
1184  exit(1);
1185  }
1186  }
1187 
1188  QudaInvertParam invertParam = newQudaInvertParam();
1189  setInvertParam(invertParam, inv_args, external_precision, quda_precision, kappa);
1191  invertParam.num_offset = num_offsets;
1192  for(int i=0; i<num_offsets; ++i){
1193  invertParam.offset[i] = offset[i];
1194  invertParam.tol_offset[i] = target_residual_offset[i];
1195  }
1196  invertParam.tol = target_residual_offset[0];
1197  invertParam.clover_coeff = clover_coeff;
1198 
1199  // solution types
1201  invertParam.solve_type = QUDA_NORMOP_PC_SOLVE;
1202  invertParam.inv_type = QUDA_CG_INVERTER;
1204 
1205  invertParam.verbosity = verbosity;
1206  invertParam.verbosity_precondition = QUDA_SILENT;
1207 
1208  invertMultiShiftQuda(solutionArray, source, &invertParam);
1209 
1210  // return the number of iterations taken by the inverter
1211  *num_iters = invertParam.iter;
1212  for(int i=0; i<num_offsets; ++i) final_residual[i] = invertParam.true_res_offset[i];
1213 
1214  return;
1215 } // qudaCloverMultishiftInvert
1216 
1217 
1218 
1219 
1220 
1221 void qudaCloverMultishiftMDInvert(int external_precision,
1222  int quda_precision,
1223  int num_offsets,
1224  double* const offset,
1225  double kappa,
1226  double clover_coeff,
1227  QudaInvertArgs_t inv_args,
1228  const double* target_residual_offset,
1229  const void* milc_link,
1230  void* milc_clover,
1231  void* milc_clover_inv,
1232  void* source,
1233  void** psiEven,
1234  void** psiOdd,
1235  void** pEven,
1236  void** pOdd,
1237  double* const final_residual,
1238  int* num_iters)
1239 {
1240  static const QudaVerbosity verbosity = getVerbosity();
1241  qudamilc_called<true>(__func__, verbosity);
1242 
1243  for(int i=0; i<num_offsets; ++i){
1244  if(target_residual_offset[i] == 0){
1245  errorQuda("qudaMultishiftInvert: target residual cannot be zero\n");
1246  exit(1);
1247  }
1248  }
1249 
1250  QudaInvertParam invertParam = newQudaInvertParam();
1251  setInvertParam(invertParam, inv_args, external_precision, quda_precision, kappa);
1253  invertParam.num_offset = num_offsets;
1254  for(int i=0; i<num_offsets; ++i){
1255  invertParam.offset[i] = offset[i];
1256  invertParam.tol_offset[i] = target_residual_offset[i];
1257  }
1258  invertParam.tol = target_residual_offset[0];
1259  invertParam.clover_coeff = clover_coeff;
1260 
1261  // solution types
1263  invertParam.solve_type = QUDA_NORMOP_PC_SOLVE;
1264  invertParam.inv_type = QUDA_CG_INVERTER;
1266 
1267  invertParam.verbosity = verbosity;
1268  invertParam.verbosity_precondition = QUDA_SILENT;
1269 
1270  invertMultiShiftMDQuda(psiEven, psiOdd, pEven, pOdd, source, &invertParam);
1271 
1272  // return the number of iterations taken by the inverter
1273  *num_iters = invertParam.iter;
1274  for(int i=0; i<num_offsets; ++i) final_residual[i] = invertParam.true_res_offset[i];
1275 
1276  return;
1277 } // qudaCloverMultishiftMDInvert
1278 
1279 #endif // GPU_CLOVER_DIRAC
1280 
1281 
1282 #endif // BUILD_MILC_INTERFACE
int maxiter_precondition
Definition: quda.h:216
QudaDiracFieldOrder dirac_order
Definition: quda.h:156
void qudaGaugeForce(int precision, int num_loop_types, double milc_loop_coeff[3], double eb3, void *milc_sitelink, void *milc_momentum)
QudaMassNormalization mass_normalization
Definition: quda.h:146
double tol_hq_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:134
QudaReconstructType reconstruct_sloppy
Definition: quda.h:46
double anisotropy
Definition: quda.h:31
void freeCloverQuda(void)
void destroyQudaGaugeField(void *gauge)
void computeCloverTraceQuda(void *out, void *clover, int mu, int nu, int dim[4])
void invertMultiShiftQuda(void **_hp_x, void *_hp_b, QudaInvertParam *param)
void setVerbosityQuda(QudaVerbosity verbosity, const char prefix[], FILE *outfile)
void endQuda(void)
void qudaHisqParamsInit(QudaHisqParams_t hisq_params)
QudaSolveType solve_type
Definition: quda.h:143
QudaVerbosity verbosity_precondition
Definition: quda.h:210
void computeHISQForceQuda(void *momentum, long long *flops, const double level2_coeff[6], const double fat7_coeff[6], const void *const staple_src[4], const void *const one_link_src[4], const void *const naik_src[4], const void *const w_link, const void *const v_link, const void *const u_link, const QudaGaugeParam *param)
enum QudaPrecision_s QudaPrecision
int ga_pad
Definition: quda.h:53
void * qudaCreateExtendedGaugeField(void *gauge, int geometry, int precision)
QudaGaugeFixed gauge_fix
Definition: quda.h:51
QudaTune tune
Definition: quda.h:185
void setUnitarizeForceConstants(double unitarize_eps, double hisq_force_filter, double max_det_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
QudaInverterType inv_type_precondition
Definition: quda.h:203
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
QudaLinkType type
Definition: quda.h:35
double kappa
Definition: quda.h:89
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
#define errorQuda(...)
Definition: util_quda.h:73
double tol
Definition: quda.h:102
QudaDslashType dslash_type
Definition: quda.h:85
void setUnitarizeLinksConstants(double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error, bool check_unitarization=true)
QudaReconstructType reconstruct_precondition
Definition: quda.h:49
QudaInverterType inv_type
Definition: quda.h:86
void qudaComputeOprod(int precision, int num_terms, double **coeff, void **quark_field, void *oprod[2])
QudaGaugeParam gaugeParam
QudaPrecision cuda_prec
Definition: quda.h:152
void computeKSLinkQuda(void *fatlink, void *longlink, void *ulink, void *inlink, double *path_coeff, QudaGaugeParam *param, QudaComputeFatMethod method)
enum QudaSolveType_s QudaSolveType
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
int computeGaugeForceQuda(void *mom, void *sitelink, int ***input_path_buf, int *path_length, double *loop_coeff, int num_paths, int max_length, double dt, QudaGaugeParam *qudaGaugeParam, double *timeinfo)
__global__ void const RealA *const const RealA *const const RealA *const const RealB *const const RealB *const int int mu
void qudaInit(QudaInitArgs_t input)
QudaPrecision cpu_prec
Definition: quda.h:151
double trlogA[2]
Definition: quda.h:172
void * createGaugeField(void *gauge, int geometry, QudaGaugeParam *param)
void qudaLoadGaugeField(int external_precision, int quda_precision, QudaInvertArgs_t inv_args, const void *milc_link)
QudaPrecision precision
Definition: lattice_field.h:41
QudaDagType dagger
Definition: quda.h:145
#define MAX(a, b)
void qudaCloverMultishiftInvert(int external_precision, int quda_precision, int num_offsets, double *const offset, double kappa, double clover_coeff, QudaInvertArgs_t inv_args, const double *target_residual, const void *milc_link, void *milc_clover, void *milc_clover_inv, void *source, void **solutionArray, double *const final_residual, int *num_iters)
QudaGaugeFieldOrder gauge_order
Definition: quda.h:36
double true_res
Definition: quda.h:105
void computeAsqtadForceQuda(void *const momentum, long long *flops, const double act_path_coeff[6], const void *const one_link_src[4], const void *const naik_src[4], const void *const link, const QudaGaugeParam *param)
void qudaSaveGaugeField(void *gauge, void *inGauge)
void qudaMultishiftInvert(int external_precision, int precision, int num_offsets, double *const offset, QudaInvertArgs_t inv_args, const double *target_residual, const double *target_relative_residual, const void *const milc_fatlink, const void *const milc_longlink, const double tadpole, void *source, void **solutionArray, double *const final_residual, double *const final_relative_residual, int *num_iters)
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
int length[]
QudaPrecision cpu_prec
Definition: dslash_test.cpp:34
int compute_clover_trlog
Definition: quda.h:171
QudaSiteSubset siteSubset
Definition: lattice_field.h:42
cudaGaugeField * cudaGauge
void qudaSetLayout(QudaLayout_t layout)
QudaPrecision clover_cuda_prec_sloppy
Definition: quda.h:163
void invertMultiShiftMDQuda(void **_hp_xe, void **_hp_xo, void **_hp_ye, void **_hp_yo, void *_hp_b, QudaInvertParam *param)
QudaFieldLocation input_location
Definition: quda.h:82
void freeGaugeQuda(void)
void initCommsGridQuda(int nDim, const int *dims, QudaCommsMap func, void *fdata)
double true_res_hq_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:140
double reliable_delta
Definition: quda.h:108
void * longlink[4]
QudaUseInitGuess use_init_guess
Definition: quda.h:167
QudaGaugeParam param
Definition: pack_test.cpp:17
int llfat_ga_pad
Definition: quda.h:58
QudaSolutionType solution_type
Definition: quda.h:142
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:38
QudaPrecision clover_cuda_prec
Definition: quda.h:162
double scale
Definition: quda.h:33
void initQuda(int device)
QudaFieldLocation output_location
Definition: quda.h:83
QudaPrecision clover_cuda_prec_precondition
Definition: quda.h:164
int site_ga_pad
Definition: quda.h:55
void qudaCloverTrace(void *out, void *clover, int mu, int nu)
VOLATILE spinorFloat kappa
__device__ __host__ int index(int i, int j)
Definition: quda_matrix.h:342
void qudaFreeCloverField()
QudaPrecision cuda_prec_sloppy
Definition: quda.h:153
QudaVerbosity verbosity
Definition: quda.h:174
ColorSpinorParam csParam
Definition: pack_test.cpp:24
double tol_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:131
double true_res_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:137
QudaInvertParam newQudaInvertParam(void)
double gflops
Definition: quda.h:182
void * qudaCreateGaugeField(void *gauge, int geometry, int precision)
void computeCloverDerivativeQuda(void *out, void *gauge, void *oprod, int mu, int nu, double coeff, QudaParity parity, QudaGaugeParam *param, int conjugate)
QudaPrecision cuda_prec_precondition
Definition: quda.h:48
QudaCloverFieldOrder clover_order
Definition: quda.h:166
double tol_hq
Definition: quda.h:104
void qudaUpdateU(int precision, double eps, void *momentum, void *link)
double true_res_hq
Definition: quda.h:106
enum QudaSolutionType_s QudaSolutionType
QudaGammaBasis gamma_basis
Definition: quda.h:158
const int * machsize
__constant__ double coeff
QudaPrecision cuda_prec_sloppy
Definition: quda.h:45
double tol_precondition
Definition: quda.h:213
double offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:128
void qudaFreeGaugeField()
int use_sloppy_partial_accumulator
Definition: quda.h:109
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
Definition: quda.h:43
enum QudaLinkType_s QudaLinkType
QudaPrecision cuda_prec
Definition: quda.h:42
int X[4]
Definition: quda.h:29
void qudaLoadCloverField(int external_precision, int quda_precision, QudaInvertArgs_t inv_args, void *milc_clover, void *milc_clover_inv, QudaSolutionType solution_type, QudaSolveType solve_type, double clover_coeff, int compute_trlog, double *trlog)
double mass
Definition: quda.h:88
void qudaFinalize()
int gcrNkrylov
Definition: quda.h:192
void qudaHisqForce(int precision, const double level2_coeff[6], const double fat7_coeff[6], const void *const staple_src[4], const void *const one_link_src[4], const void *const naik_src[4], const void *const w_link, const void *const v_link, const void *const u_link, void *const milc_momentum)
void qudaCloverInvert(int external_precision, int quda_precision, double kappa, double clover_coeff, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *milc_link, void *milc_clover, void *milc_clover_inv, void *source, void *solution, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
void qudaCloverDerivative(void *out, void *gauge, void *oprod, int mu, int nu, double coeff, int precision, int parity, int conjugate)
QudaPrecision cuda_prec
Definition: dslash_test.cpp:35
void qudaLoadUnitarizedLink(int precision, QudaFatLinkArgs_t fatlink_args, const double path_coeff[6], void *inlink, void *fatlink, void *ulink)
QudaResidualType_s
Definition: enum_quda.h:146
void computeStaggeredOprodQuda(void **oprod, void **quark, int num, double **coeff, QudaGaugeParam *param)
double tadpole_coeff
Definition: quda.h:32
cpuColorSpinorField * out
QudaPrecision cuda_prec_precondition
Definition: quda.h:154
void * memset(void *s, int c, size_t n)
void * createExtendedGaugeField(void *gauge, int geometry, QudaGaugeParam *param)
GaugeFieldParam gParam
void updateGaugeFieldQuda(void *gauge, void *momentum, double dt, int conj_mom, int exact, QudaGaugeParam *param)
void qudaCloverMultishiftMDInvert(int external_precision, int quda_precision, int num_offsets, double *const offset, double kappa, double clover_coeff, QudaInvertArgs_t inv_args, const double *target_residual, const void *milc_link, void *milc_clover, void *milc_clover_inv, void *source, void **psiEven, void **psiOdd, void **pEven, void **pOdd, double *const final_residual, int *num_iters)
Main header file for the QUDA library.
const int * latsize
int mom_ga_pad
Definition: quda.h:59
void qudaInvert(int external_precision, int quda_precision, double mass, QudaInvertArgs_t inv_args, double target_resid, double target_relresid, const void *const milc_fatlink, const void *const milc_longlink, const double tadpole, void *source, void *solution, double *const final_resid, double *const final_rel_resid, int *num_iters)
#define printfQuda(...)
Definition: util_quda.h:67
QudaTboundary t_boundary
Definition: quda.h:38
void saveGaugeField(void *outGauge, void *inGauge, QudaGaugeParam *param)
void setKernelPackT(bool pack)
Definition: dslash_quda.cu:82
int device
Definition: test_util.cpp:1546
QudaResidualType residual_type
Definition: quda.h:235
int num_offset
Definition: quda.h:123
enum QudaVerbosity_s QudaVerbosity
QudaVerbosity verbosity
double mass
Definition: test_util.cpp:1569
QudaPrecision prec
Definition: test_util.cpp:1551
void qudaLoadKSLink(int precision, QudaFatLinkArgs_t fatlink_args, const double act_path_coeff[6], void *inlink, void *fatlink, void *longlink)
QudaPrecision clover_cpu_prec
Definition: quda.h:161
enum QudaComputeFatMethod_s QudaComputeFatMethod
const QudaParity parity
Definition: dslash_test.cpp:29
void * gauge[4]
Definition: su3_test.cpp:15
QudaMatPCType matpc_type
Definition: quda.h:144
enum QudaInverterType_s QudaInverterType
void qudaAsqtadForce(int precision, const double act_path_coeff[6], const void *const one_link_src[4], const void *const naik_src[4], const void *const link, void *const milc_momentum)
QudaPrecision cpu_prec
Definition: quda.h:40
QudaLayout_t layout
void qudaDestroyGaugeField(void *gauge)
QudaGaugeParam newQudaGaugeParam(void)
QudaPreserveSource preserve_source
Definition: quda.h:149
double clover_coeff
Definition: quda.h:169
void * fatlink[4]