QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
invert_test.cpp
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <time.h>
4 #include <math.h>
5 #include <string.h>
6 
7 #include <util_quda.h>
8 #include <test_util.h>
9 #include <dslash_util.h>
10 #include <blas_reference.h>
13 #include "misc.h"
14 
15 #include "face_quda.h"
16 
17 #if defined(QMP_COMMS)
18 #include <qmp.h>
19 #elif defined(MPI_COMMS)
20 #include <mpi.h>
21 #endif
22 
23 #include <gauge_qio.h>
24 
25 #define MAX(a,b) ((a)>(b)?(a):(b))
26 
27 // In a typical application, quda.h is the only QUDA header required.
28 #include <quda.h>
29 
30 // Wilson, clover-improved Wilson, twisted mass, and domain wall are supported.
32 
33 // Twisted mass flavor type
35 
36 extern bool tune;
37 extern int device;
38 extern int xdim;
39 extern int ydim;
40 extern int zdim;
41 extern int tdim;
42 extern int Lsdim;
43 extern int gridsize_from_cmdline[];
45 extern QudaPrecision prec;
50 extern int multishift; // whether to test multi-shift or standard solver
51 extern double mass; // mass of Dirac operator
52 extern QudaMassNormalization normalization; // mass normalization of Dirac operators
53 extern QudaMatPCType matpc_type; // preconditioning type
54 
55 extern char latfile[];
56 
57 extern void usage(char** );
58 
59 void
61 {
62  printfQuda("running the following test:\n");
63 
64  printfQuda("prec prec_sloppy multishift matpc_type recon recon_sloppy S_dimension T_dimension Ls_dimension dslash_type normalization\n");
65  printfQuda("%6s %6s %d %12s %2s %2s %3d/%3d/%3d %3d %2d %14s %8s\n",
69  xdim, ydim, zdim, tdim, Lsdim,
72 
73  printfQuda("Grid partition info: X Y Z T\n");
74  printfQuda(" %d %d %d %d\n",
75  dimPartitioned(0),
76  dimPartitioned(1),
77  dimPartitioned(2),
78  dimPartitioned(3));
79 
80  return ;
81 
82 }
83 
84 int main(int argc, char **argv)
85 {
86 
87  for (int i = 1; i < argc; i++){
88  if(process_command_line_option(argc, argv, &i) == 0){
89  continue;
90  }
91  printfQuda("ERROR: Invalid option:%s\n", argv[i]);
92  usage(argv);
93  }
94 
96  prec_sloppy = prec;
97  }
100  }
101 
102  // initialize QMP/MPI, QUDA comms grid and RNG (test_util.cpp)
103  initComms(argc, argv, gridsize_from_cmdline);
104 
106 
107  // *** QUDA parameters begin here.
108 
116  printfQuda("dslash_type %d not supported\n", dslash_type);
117  exit(0);
118  }
119 
122  QudaPrecision cuda_prec_sloppy = prec_sloppy;
123  QudaPrecision cuda_prec_precondition = QUDA_HALF_PRECISION;
124 
127 
128  double kappa5;
129 
130  gauge_param.X[0] = xdim;
131  gauge_param.X[1] = ydim;
132  gauge_param.X[2] = zdim;
133  gauge_param.X[3] = tdim;
134  inv_param.Ls = 1;
135 
136  gauge_param.anisotropy = 1.0;
137  gauge_param.type = QUDA_WILSON_LINKS;
138  gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
139  gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
140 
141  gauge_param.cpu_prec = cpu_prec;
142  gauge_param.cuda_prec = cuda_prec;
143  gauge_param.reconstruct = link_recon;
144  gauge_param.cuda_prec_sloppy = cuda_prec_sloppy;
146  gauge_param.cuda_prec_precondition = cuda_prec_precondition;
148  gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
149 
150  inv_param.dslash_type = dslash_type;
151 
152  inv_param.mass = mass;
153  inv_param.kappa = 1.0 / (2.0 * (1 + 3/gauge_param.anisotropy + mass));
154 
156  inv_param.mu = 0.12;
157  inv_param.epsilon = 0.1385;
158  inv_param.twist_flavor = twist_flavor;
159  inv_param.Ls = (inv_param.twist_flavor == QUDA_TWIST_NONDEG_DOUBLET) ? 2 : 1;
160  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH ||
162  inv_param.m5 = -1.8;
163  kappa5 = 0.5/(5 + inv_param.m5);
164  inv_param.Ls = Lsdim;
165  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
166  inv_param.m5 = -1.8;
167  kappa5 = 0.5/(5 + inv_param.m5);
168  inv_param.Ls = Lsdim;
169  for(int k = 0; k < Lsdim; k++)
170  {
171  // b5[k], c[k] values are chosen for arbitrary values,
172  // but the difference of them are same as 1.0
173  inv_param.b_5[k] = 1.452;
174  inv_param.c_5[k] = 0.452;
175  }
176  }
177 
178  // offsets used only by multi-shift solver
179  inv_param.num_offset = 4;
180  double offset[4] = {0.01, 0.02, 0.03, 0.04};
181  for (int i=0; i<inv_param.num_offset; i++) inv_param.offset[i] = offset[i];
182 
183  inv_param.inv_type = inv_type;
185  inv_param.solution_type = QUDA_MAT_SOLUTION;
186  } else {
188  }
189  inv_param.matpc_type = matpc_type;
190 
191  inv_param.dagger = QUDA_DAG_NO;
192  inv_param.mass_normalization = normalization;
194 
201  inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
202  } else {
203  inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
204  }
205 
206  inv_param.pipeline = 0;
207 
208  inv_param.Nsteps = 2;
209  inv_param.gcrNkrylov = 10;
210  inv_param.tol = 1e-7;
211  inv_param.tol_restart = 1e-3; //now theoretical background for this parameter...
212 #if __COMPUTE_CAPABILITY__ >= 200
213  // require both L2 relative and heavy quark residual to determine convergence
215  inv_param.tol_hq = 1e-3; // specify a tolerance for the residual for heavy quark residual
216 #else
217  // Pre Fermi architecture only supports L2 relative residual norm
219 #endif
220  // these can be set individually
221  for (int i=0; i<inv_param.num_offset; i++) {
222  inv_param.tol_offset[i] = inv_param.tol;
223  inv_param.tol_hq_offset[i] = inv_param.tol_hq;
224  }
225  inv_param.maxiter = 10000;
226  inv_param.reliable_delta = 1e-1;
227  inv_param.use_sloppy_partial_accumulator = 0;
228  inv_param.max_res_increase = 1;
229 
230  // domain decomposition preconditioner parameters
232 
234  inv_param.precondition_cycle = 1;
235  inv_param.tol_precondition = 1e-1;
236  inv_param.maxiter_precondition = 10;
238  inv_param.cuda_prec_precondition = cuda_prec_precondition;
239  inv_param.omega = 1.0;
240 
241  inv_param.cpu_prec = cpu_prec;
242  inv_param.cuda_prec = cuda_prec;
243  inv_param.cuda_prec_sloppy = cuda_prec_sloppy;
246  inv_param.dirac_order = QUDA_DIRAC_ORDER;
247 
250 
251  inv_param.tune = tune ? QUDA_TUNE_YES : QUDA_TUNE_NO;
252 
253  gauge_param.ga_pad = 0; // 24*24*24/2;
254  inv_param.sp_pad = 0; // 24*24*24/2;
255  inv_param.cl_pad = 0; // 24*24*24/2;
256 
257  // For multi-GPU, ga_pad must be large enough to store a time-slice
258 #ifdef MULTI_GPU
259  int x_face_size = gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2;
260  int y_face_size = gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2;
261  int z_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2;
262  int t_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2;
263  int pad_size =MAX(x_face_size, y_face_size);
264  pad_size = MAX(pad_size, z_face_size);
265  pad_size = MAX(pad_size, t_face_size);
266  gauge_param.ga_pad = pad_size;
267 #endif
268 
270  inv_param.clover_cpu_prec = cpu_prec;
271  inv_param.clover_cuda_prec = cuda_prec;
272  inv_param.clover_cuda_prec_sloppy = cuda_prec_sloppy;
273  inv_param.clover_cuda_prec_precondition = cuda_prec_precondition;
275  inv_param.clover_coeff = 1.5*inv_param.kappa;
276  }
277 
278  inv_param.verbosity = QUDA_VERBOSE;
279 
280  // *** Everything between here and the call to initQuda() is
281  // *** application-specific.
282 
283  // set parameters for the reference Dslash, and prepare fields to be loaded
287  dw_setDims(gauge_param.X, inv_param.Ls);
288  } else {
289  setDims(gauge_param.X);
290  }
291 
292  setSpinorSiteSize(24);
293 
294  size_t gSize = (gauge_param.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
295  size_t sSize = (inv_param.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
296 
297  void *gauge[4], *clover_inv=0, *clover=0;
298 
299  for (int dir = 0; dir < 4; dir++) {
300  gauge[dir] = malloc(V*gaugeSiteSize*gSize);
301  }
302 
303  if (strcmp(latfile,"")) { // load in the command line supplied gauge field
304  read_gauge_field(latfile, gauge, gauge_param.cpu_prec, gauge_param.X, argc, argv);
305  construct_gauge_field(gauge, 2, gauge_param.cpu_prec, &gauge_param);
306  } else { // else generate a random SU(3) field
307  construct_gauge_field(gauge, 1, gauge_param.cpu_prec, &gauge_param);
308  }
309 
311  double norm = 0.0; // clover components are random numbers in the range (-norm, norm)
312  double diag = 1.0; // constant added to the diagonal
313 
314  size_t cSize = (inv_param.clover_cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
315  clover_inv = malloc(V*cloverSiteSize*cSize);
316  construct_clover_field(clover_inv, norm, diag, inv_param.clover_cpu_prec);
317 
318  // The uninverted clover term is only needed when solving the unpreconditioned
319  // system or when using "asymmetric" even/odd preconditioning.
320  int preconditioned = (inv_param.solve_type == QUDA_DIRECT_PC_SOLVE ||
321  inv_param.solve_type == QUDA_NORMOP_PC_SOLVE);
322  int asymmetric = preconditioned &&
325  if (!preconditioned) {
326  clover = clover_inv;
327  clover_inv = NULL;
328  } else if (asymmetric) { // fake it by using the same random matrix
329  clover = clover_inv; // for both clover and clover_inv
330  } else {
331  clover = NULL;
332  }
333  }
334 
335  void *spinorIn = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
336  void *spinorCheck = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
337 
338  void *spinorOut = NULL, **spinorOutMulti = NULL;
339  if (multishift) {
340  spinorOutMulti = (void**)malloc(inv_param.num_offset*sizeof(void *));
341  for (int i=0; i<inv_param.num_offset; i++) {
342  spinorOutMulti[i] = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
343  }
344  } else {
345  spinorOut = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
346  }
347 
348  memset(spinorIn, 0, inv_param.Ls*V*spinorSiteSize*sSize);
349  memset(spinorCheck, 0, inv_param.Ls*V*spinorSiteSize*sSize);
350  if (multishift) {
351  for (int i=0; i<inv_param.num_offset; i++) memset(spinorOutMulti[i], 0, inv_param.Ls*V*spinorSiteSize*sSize);
352  } else {
353  memset(spinorOut, 0, inv_param.Ls*V*spinorSiteSize*sSize);
354  }
355 
356  // create a point source at 0 (in each subvolume... FIXME)
357 
358  // create a point source at 0 (in each subvolume... FIXME)
359 
360  if (inv_param.cpu_prec == QUDA_SINGLE_PRECISION) {
361  //((float*)spinorIn)[0] = 1.0;
362  for (int i=0; i<inv_param.Ls*V*spinorSiteSize; i++) ((float*)spinorIn)[i] = rand() / (float)RAND_MAX;
363  } else {
364  //((double*)spinorIn)[0] = 1.0;
365  for (int i=0; i<inv_param.Ls*V*spinorSiteSize; i++) ((double*)spinorIn)[i] = rand() / (double)RAND_MAX;
366  }
367 
368  // start the timer
369  double time0 = -((double)clock());
370 
371  // initialize the QUDA library
372  initQuda(device);
373 
374  // load the gauge field
375  loadGaugeQuda((void*)gauge, &gauge_param);
376 
377  // load the clover term, if desired
378  if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) loadCloverQuda(clover, clover_inv, &inv_param);
379 
380  if (dslash_type == QUDA_TWISTED_CLOVER_DSLASH) loadCloverQuda(NULL, NULL, &inv_param);
381 
382  // perform the inversion
383  if (multishift) {
384  invertMultiShiftQuda(spinorOutMulti, spinorIn, &inv_param);
385  } else {
386  invertQuda(spinorOut, spinorIn, &inv_param);
387  }
388 
389  // stop the timer
390  time0 += clock();
391  time0 /= CLOCKS_PER_SEC;
392 
393  printfQuda("Device memory used:\n Spinor: %f GiB\n Gauge: %f GiB\n",
394  inv_param.spinorGiB, gauge_param.gaugeGiB);
396  printfQuda(" Clover: %f GiB\n", inv_param.cloverGiB);
397  }
398  printfQuda("\nDone: %i iter / %g secs = %g Gflops, total time = %g secs\n",
399  inv_param.iter, inv_param.secs, inv_param.gflops/inv_param.secs, time0);
400 
401  if (multishift) {
402  if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) {
403  errorQuda("Mass normalization not supported for multi-shift solver in invert_test");
404  }
405 
406  void *spinorTmp = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
407 
408  printfQuda("Host residuum checks: \n");
409  for(int i=0; i < inv_param.num_offset; i++) {
410  ax(0, spinorCheck, V*spinorSiteSize, inv_param.cpu_prec);
411 
413  if (inv_param.twist_flavor != QUDA_TWIST_MINUS && inv_param.twist_flavor != QUDA_TWIST_PLUS)
414  errorQuda("Twisted mass solution type not supported");
415  tm_matpc(spinorTmp, gauge, spinorOutMulti[i], inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
416  inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param);
417  tm_matpc(spinorCheck, gauge, spinorTmp, inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
418  inv_param.matpc_type, 1, inv_param.cpu_prec, gauge_param);
420  wil_matpc(spinorTmp, gauge, spinorOutMulti[i], inv_param.kappa, inv_param.matpc_type, 0,
421  inv_param.cpu_prec, gauge_param);
422  wil_matpc(spinorCheck, gauge, spinorTmp, inv_param.kappa, inv_param.matpc_type, 1,
423  inv_param.cpu_prec, gauge_param);
424  } else {
425  printfQuda("Domain wall not supported for multi-shift\n");
426  exit(-1);
427  }
428 
429  axpy(inv_param.offset[i], spinorOutMulti[i], spinorCheck, Vh*spinorSiteSize, inv_param.cpu_prec);
430  mxpy(spinorIn, spinorCheck, Vh*spinorSiteSize, inv_param.cpu_prec);
431  double nrm2 = norm_2(spinorCheck, Vh*spinorSiteSize, inv_param.cpu_prec);
432  double src2 = norm_2(spinorIn, Vh*spinorSiteSize, inv_param.cpu_prec);
433  double l2r = sqrt(nrm2 / src2);
434 
435  printfQuda("Shift %d residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g\n",
436  i, inv_param.tol_offset[i], inv_param.true_res_offset[i], l2r,
437  inv_param.tol_hq_offset[i], inv_param.true_res_hq_offset[i]);
438  }
439  free(spinorTmp);
440 
441  } else {
442 
443  if (inv_param.solution_type == QUDA_MAT_SOLUTION) {
444 
446  if(inv_param.twist_flavor == QUDA_TWIST_PLUS || inv_param.twist_flavor == QUDA_TWIST_MINUS)
447  tm_mat(spinorCheck, gauge, spinorOut, inv_param.kappa, inv_param.mu, inv_param.twist_flavor, 0, inv_param.cpu_prec, gauge_param);
448  else
449  {
450  int tm_offset = V*spinorSiteSize; //12*spinorRef->Volume();
451  void *evenOut = spinorCheck;
452  void *oddOut = cpu_prec == sizeof(double) ? (void*)((double*)evenOut + tm_offset): (void*)((float*)evenOut + tm_offset);
453 
454  void *evenIn = spinorOut;
455  void *oddIn = cpu_prec == sizeof(double) ? (void*)((double*)evenIn + tm_offset): (void*)((float*)evenIn + tm_offset);
456 
457  tm_ndeg_mat(evenOut, oddOut, gauge, evenIn, oddIn, inv_param.kappa, inv_param.mu, inv_param.epsilon, 0, inv_param.cpu_prec, gauge_param);
458  }
460  wil_mat(spinorCheck, gauge, spinorOut, inv_param.kappa, 0, inv_param.cpu_prec, gauge_param);
461  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH) {
462  dw_mat(spinorCheck, gauge, spinorOut, kappa5, inv_param.dagger, inv_param.cpu_prec, gauge_param, inv_param.mass);
463 // } else if (dslash_type == QUDA_DOMAIN_WALL_4D_DSLASH) {
464 // dw_4d_mat(spinorCheck, gauge, spinorOut, kappa5, inv_param.dagger, inv_param.cpu_prec, gauge_param, inv_param.mass);
465 // } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
466 // mdw_mat(spinorCheck, gauge, spinorOut, kappa5, inv_param.dagger, inv_param.cpu_prec, gauge_param, inv_param.mass);
467  } else {
468  printfQuda("Unsupported dslash_type\n");
469  exit(-1);
470  }
471  if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) {
475  ax(0.5/kappa5, spinorCheck, V*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
477  ax(0.5/inv_param.kappa, spinorCheck, 2*V*spinorSiteSize, inv_param.cpu_prec);
478  } else {
479  ax(0.5/inv_param.kappa, spinorCheck, V*spinorSiteSize, inv_param.cpu_prec);
480  }
481  }
482 
483  } else if(inv_param.solution_type == QUDA_MATPC_SOLUTION) {
484 
486  if (inv_param.twist_flavor != QUDA_TWIST_MINUS && inv_param.twist_flavor != QUDA_TWIST_PLUS)
487  errorQuda("Twisted mass solution type not supported");
488  tm_matpc(spinorCheck, gauge, spinorOut, inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
489  inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param);
491  wil_matpc(spinorCheck, gauge, spinorOut, inv_param.kappa, inv_param.matpc_type, 0,
492  inv_param.cpu_prec, gauge_param);
493  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH) {
494  dw_matpc(spinorCheck, gauge, spinorOut, kappa5, inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param, inv_param.mass);
495  } else if (dslash_type == QUDA_DOMAIN_WALL_4D_DSLASH) {
496  dw_4d_matpc(spinorCheck, gauge, spinorOut, kappa5, inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param, inv_param.mass);
497  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
498  double *kappa_b, *kappa_c;
499  kappa_b = (double*)malloc(Lsdim*sizeof(double));
500  kappa_c = (double*)malloc(Lsdim*sizeof(double));
501  for(int xs = 0 ; xs < Lsdim ; xs++)
502  {
503  kappa_b[xs] = 1.0/(2*(inv_param.b_5[xs]*(4.0 + inv_param.m5) + 1.0));
504  kappa_c[xs] = 1.0/(2*(inv_param.c_5[xs]*(4.0 + inv_param.m5) - 1.0));
505  }
506  mdw_matpc(spinorCheck, gauge, spinorOut, kappa_b, kappa_c, inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param, inv_param.mass, inv_param.b_5, inv_param.c_5);
507  free(kappa_b);
508  free(kappa_c);
509  } else {
510  printfQuda("Unsupported dslash_type\n");
511  exit(-1);
512  }
513 
514  if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) {
518  ax(0.25/(kappa5*kappa5), spinorCheck, V*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
519  } else {
520  ax(0.25/(inv_param.kappa*inv_param.kappa), spinorCheck, Vh*spinorSiteSize, inv_param.cpu_prec);
521 
522  }
523  }
524 
525  } else if (inv_param.solution_type == QUDA_MATPCDAG_MATPC_SOLUTION) {
526 
527  void *spinorTmp = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
528 
529  ax(0, spinorCheck, V*spinorSiteSize, inv_param.cpu_prec);
530 
532  if (inv_param.twist_flavor != QUDA_TWIST_MINUS && inv_param.twist_flavor != QUDA_TWIST_PLUS)
533  errorQuda("Twisted mass solution type not supported");
534  tm_matpc(spinorTmp, gauge, spinorOut, inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
535  inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param);
536  tm_matpc(spinorCheck, gauge, spinorTmp, inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
537  inv_param.matpc_type, 1, inv_param.cpu_prec, gauge_param);
539  wil_matpc(spinorTmp, gauge, spinorOut, inv_param.kappa, inv_param.matpc_type, 0,
540  inv_param.cpu_prec, gauge_param);
541  wil_matpc(spinorCheck, gauge, spinorTmp, inv_param.kappa, inv_param.matpc_type, 1,
542  inv_param.cpu_prec, gauge_param);
543  } else {
544  printfQuda("Unsupported dslash_type\n");
545  exit(-1);
546  }
547 
548  if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) {
549  errorQuda("Mass normalization not implemented");
550  }
551 
552  free(spinorTmp);
553  }
554 
555 
556  int vol = inv_param.solution_type == QUDA_MAT_SOLUTION ? V : Vh;
557  mxpy(spinorIn, spinorCheck, vol*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
558  double nrm2 = norm_2(spinorCheck, vol*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
559  double src2 = norm_2(spinorIn, vol*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
560  double l2r = sqrt(nrm2 / src2);
561 
562  printfQuda("Residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g\n",
563  inv_param.tol, inv_param.true_res, l2r, inv_param.tol_hq, inv_param.true_res_hq);
564 
565  }
566 
567  freeGaugeQuda();
569 
570  // finalize the QUDA library
571  endQuda();
572 
573  finalizeComms();
574 
575  return 0;
576 }
int maxiter_precondition
Definition: quda.h:216
int device
Definition: test_util.cpp:1546
QudaGaugeParam gauge_param
Definition: dslash_test.cpp:37
double secs
Definition: quda.h:183
int dimPartitioned(int dim)
Definition: test_util.cpp:1577
QudaDiracFieldOrder dirac_order
Definition: quda.h:156
QudaMassNormalization mass_normalization
Definition: quda.h:146
double tol_hq_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:134
enum QudaMassNormalization_s QudaMassNormalization
QudaReconstructType reconstruct_sloppy
Definition: quda.h:46
double anisotropy
Definition: quda.h:31
void freeCloverQuda(void)
__constant__ int Vh
QudaTwistFlavorType twist_flavor
Definition: test_util.cpp:1570
double b_5[QUDA_MAX_DWF_LS]
Definition: quda.h:94
void dw_4d_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void invertMultiShiftQuda(void **_hp_x, void *_hp_b, QudaInvertParam *param)
void endQuda(void)
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
Definition: test_util.cpp:1003
QudaSolveType solve_type
Definition: quda.h:143
QudaVerbosity verbosity_precondition
Definition: quda.h:210
enum QudaPrecision_s QudaPrecision
int V
Definition: test_util.cpp:29
int ga_pad
Definition: quda.h:53
int multishift
Definition: test_util.cpp:1567
int main(int argc, char **argv)
Definition: invert_test.cpp:84
void dw_setDims(int *X, const int L5)
Definition: test_util.cpp:125
cpuColorSpinorField * spinorTmp
Definition: dslash_test.cpp:40
double mu
Definition: quda.h:97
QudaGaugeFixed gauge_fix
Definition: quda.h:51
QudaTune tune
Definition: quda.h:185
QudaSchwarzType schwarz_type
Definition: quda.h:225
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
Definition: complex_quda.h:859
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
enum QudaResidualType_s QudaResidualType
QudaInverterType inv_type_precondition
Definition: quda.h:203
int Lsdim
Definition: test_util.cpp:1557
QudaLinkType type
Definition: quda.h:35
void mdw_matpc(void *out, void **gauge, void *in, double *kappa_b, double *kappa_c, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *b5, double *c5)
double kappa
Definition: quda.h:89
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
#define errorQuda(...)
Definition: util_quda.h:73
double tol
Definition: quda.h:102
QudaPrecision prec
Definition: test_util.cpp:1551
void usage(char **)
Definition: test_util.cpp:1584
QudaDslashType dslash_type
Definition: quda.h:85
QudaReconstructType reconstruct_precondition
Definition: quda.h:49
QudaInverterType inv_type
Definition: quda.h:86
QudaPrecision cuda_prec
Definition: quda.h:152
double c_5[QUDA_MAX_DWF_LS]
Definition: quda.h:95
#define cloverSiteSize
Definition: test_util.h:8
void setDims(int *)
Definition: test_util.cpp:88
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:105
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
QudaPrecision cpu_prec
Definition: quda.h:151
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:1635
void tm_ndeg_mat(void *evenOut, void *oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
#define gaugeSiteSize
QudaDagType dagger
Definition: quda.h:145
const char * get_matpc_str(QudaMatPCType type)
Definition: misc.cpp:920
void finalizeComms()
Definition: test_util.cpp:65
QudaGaugeFieldOrder gauge_order
Definition: quda.h:36
void dw_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
double true_res
Definition: quda.h:105
QudaReconstructType link_recon
Definition: test_util.cpp:1549
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:658
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
QudaPrecision cpu_prec
Definition: dslash_test.cpp:34
void display_test_info()
Definition: invert_test.cpp:60
#define spinorSiteSize
QudaPrecision clover_cuda_prec_sloppy
Definition: quda.h:163
QudaFieldLocation input_location
Definition: quda.h:82
void freeGaugeQuda(void)
double true_res_hq_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:140
void ax(double a, void *x, int len, QudaPrecision precision)
double reliable_delta
Definition: quda.h:108
int xs
QudaSolutionType solution_type
Definition: quda.h:142
QudaSolverNormalization solver_normalization
Definition: quda.h:147
QudaPrecision clover_cuda_prec
Definition: quda.h:162
int precondition_cycle
Definition: quda.h:222
void initQuda(int device)
void dw_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
double spinorGiB
Definition: quda.h:180
QudaFieldLocation output_location
Definition: quda.h:83
QudaPrecision clover_cuda_prec_precondition
Definition: quda.h:164
double m5
Definition: quda.h:91
QudaPrecision cuda_prec_sloppy
Definition: quda.h:153
QudaMassNormalization normalization
Definition: test_util.cpp:1572
QudaVerbosity verbosity
Definition: quda.h:174
void setSpinorSiteSize(int n)
Definition: test_util.cpp:150
const char * get_mass_normalization_str(QudaMassNormalization type)
Definition: misc.cpp:876
double tol_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:131
double true_res_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:137
QudaInvertParam newQudaInvertParam(void)
double gflops
Definition: quda.h:182
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:724
QudaMatPCType matpc_type
Definition: test_util.cpp:1573
QudaPrecision cuda_prec_precondition
Definition: quda.h:48
QudaCloverFieldOrder clover_order
Definition: quda.h:166
double tol_hq
Definition: quda.h:104
enum QudaMatPCType_s QudaMatPCType
double true_res_hq
Definition: quda.h:106
QudaGammaBasis gamma_basis
Definition: quda.h:158
QudaPrecision cuda_prec_sloppy
Definition: quda.h:45
const char * get_dslash_str(QudaDslashType type)
Definition: misc.cpp:814
double tol_precondition
Definition: quda.h:213
void mxpy(void *x, void *y, int len, QudaPrecision precision)
double offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:128
int use_sloppy_partial_accumulator
Definition: quda.h:109
bool tune
Definition: test_util.cpp:1562
QudaReconstructType reconstruct
Definition: quda.h:43
void read_gauge_field(char *filename, void *gauge[], QudaPrecision precision, int *X, int argc, char *argv[])
Definition: gauge_qio.cpp:86
QudaPrecision cuda_prec
Definition: quda.h:42
int X[4]
Definition: quda.h:29
double mass
Definition: quda.h:88
int gcrNkrylov
Definition: quda.h:192
double norm_2(void *v, int len, QudaPrecision precision)
QudaPrecision cuda_prec
Definition: dslash_test.cpp:35
void axpy(double a, void *x, void *y, int len, QudaPrecision precision)
void construct_clover_field(void *clover, double norm, double diag, QudaPrecision precision)
Definition: test_util.cpp:1103
int max_res_increase
Definition: quda.h:114
QudaInvertParam inv_param
Definition: dslash_test.cpp:38
int xdim
Definition: test_util.cpp:1553
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaInverterType inv_type
Definition: test_util.cpp:1565
double gaugeGiB
Definition: quda.h:60
QudaPrecision cuda_prec_precondition
Definition: quda.h:154
void * memset(void *s, int c, size_t n)
double tol_restart
Definition: quda.h:103
if(x2 >=X2) return
int zdim
Definition: test_util.cpp:1555
int tdim
Definition: test_util.cpp:1556
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
cpuColorSpinorField * spinorOut
Definition: dslash_test.cpp:40
char latfile[]
Definition: test_util.cpp:1561
#define printfQuda(...)
Definition: util_quda.h:67
QudaTboundary t_boundary
Definition: quda.h:38
QudaTwistFlavorType twist_flavor
Definition: quda.h:100
#define MAX(a, b)
Definition: invert_test.cpp:25
enum QudaDslashType_s QudaDslashType
QudaResidualType residual_type
Definition: quda.h:235
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
int num_offset
Definition: quda.h:123
double cloverGiB
Definition: quda.h:181
QudaDslashType dslash_type
Definition: test_util.cpp:1560
QudaReconstructType link_recon_sloppy
Definition: test_util.cpp:1550
double epsilon
Definition: quda.h:98
int ydim
Definition: test_util.cpp:1554
double omega
Definition: quda.h:219
double mass
Definition: test_util.cpp:1569
QudaPrecision prec_sloppy
Definition: test_util.cpp:1552
QudaInverterType precon_type
Definition: test_util.cpp:1566
int gridsize_from_cmdline[]
Definition: test_util.cpp:1559
QudaPrecision clover_cpu_prec
Definition: quda.h:161
void initComms(int argc, char **argv, const int *commDims)
Definition: test_util.cpp:48
void * gauge[4]
Definition: su3_test.cpp:15
QudaMatPCType matpc_type
Definition: quda.h:144
enum QudaInverterType_s QudaInverterType
double kappa5
Definition: dslash_test.cpp:32
QudaPrecision cpu_prec
Definition: quda.h:40
QudaGaugeParam newQudaGaugeParam(void)
QudaPreserveSource preserve_source
Definition: quda.h:149
double clover_coeff
Definition: quda.h:169
enum QudaTwistFlavorType_s QudaTwistFlavorType