QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
invertmsrc_test.cpp
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <time.h>
4 #include <math.h>
5 #include <string.h>
6 
7 #include <util_quda.h>
8 #include <test_util.h>
9 #include <dslash_util.h>
10 #include <blas_reference.h>
13 #include "misc.h"
14 
15 #if defined(QMP_COMMS)
16 #include <qmp.h>
17 #elif defined(MPI_COMMS)
18 #include <mpi.h>
19 #endif
20 
21 #include <qio_field.h>
22 
23 #define MAX(a,b) ((a)>(b)?(a):(b))
24 
25 // In a typical application, quda.h is the only QUDA header required.
26 #include <quda.h>
27 
28 // Wilson, clover-improved Wilson, twisted mass, and domain wall are supported.
30 
31 // Twisted mass flavor type
33 
34 extern int device;
35 extern int xdim;
36 extern int ydim;
37 extern int zdim;
38 extern int tdim;
39 extern int Lsdim;
40 extern int gridsize_from_cmdline[];
42 extern QudaPrecision prec;
47 extern int multishift; // whether to test multi-shift or standard solver
48 extern double mass; // mass of Dirac operator
49 extern double anisotropy; // temporal anisotropy
50 extern double tol; // tolerance for inverter
51 extern double tol_hq; // heavy-quark tolerance for inverter
52 extern QudaMassNormalization normalization; // mass normalization of Dirac operators
53 extern QudaMatPCType matpc_type; // preconditioning type
54 
55 extern int niter; // max solver iterations
56 extern char latfile[];
57 extern bool unit_gauge;
58 
59 extern void usage(char** );
60 
61 
62 
63 void
65 {
66  printfQuda("running the following test:\n");
67 
68  printfQuda("prec prec_sloppy multishift matpc_type recon recon_sloppy S_dimension T_dimension Ls_dimension dslash_type normalization\n");
69  printfQuda("%6s %6s %d %12s %2s %2s %3d/%3d/%3d %3d %2d %14s %8s\n",
73  xdim, ydim, zdim, tdim, Lsdim,
76 
77  printfQuda("Grid partition info: X Y Z T\n");
78  printfQuda(" %d %d %d %d\n",
79  dimPartitioned(0),
80  dimPartitioned(1),
81  dimPartitioned(2),
82  dimPartitioned(3));
83 
84  return ;
85 
86 }
87 
88 
89 void
90 usage_extra(char** argv )
91 {
92 printfQuda("Extra options:\n");
93 printfQuda(" --num_src n # Numer of sources used\n");
94 
95 
96 return ;
97 }
98 
99 
100 int main(int argc, char **argv)
101 {
102 
103  int num_src=2;
104 
105  for (int i = 1; i < argc; i++){
106  if(process_command_line_option(argc, argv, &i) == 0){
107  continue;
108  }
109 
110 
111 
112 
113  if( strcmp(argv[i], "--num_src") == 0){
114  if (i+1 >= argc){
115  usage(argv);
116  }
117  num_src= atoi(argv[i+1]);
118  i++;
119  continue;
120  }
121 
122 
123  printfQuda("ERROR: Invalid option:%s\n", argv[i]);
124  usage(argv);
125  }
126 
128  prec_sloppy = prec;
129  }
132  }
133 
134  // initialize QMP/MPI, QUDA comms grid and RNG (test_util.cpp)
135  initComms(argc, argv, gridsize_from_cmdline);
136 
138 
139  // *** QUDA parameters begin here.
140 
148  printfQuda("dslash_type %d not supported\n", dslash_type);
149  exit(0);
150  }
151 
156 
159 
160  double kappa5;
161 
162  gauge_param.X[0] = xdim;
163  gauge_param.X[1] = ydim;
164  gauge_param.X[2] = zdim;
165  gauge_param.X[3] = tdim;
166  inv_param.Ls = 1;
167 
168  gauge_param.anisotropy = anisotropy;
169  gauge_param.type = QUDA_WILSON_LINKS;
170  gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
171  gauge_param.t_boundary = QUDA_PERIODIC_T;
172 
173  gauge_param.cpu_prec = cpu_prec;
174  gauge_param.cuda_prec = cuda_prec;
175  gauge_param.reconstruct = link_recon;
176  gauge_param.cuda_prec_sloppy = cuda_prec_sloppy;
180  gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
181 
182  inv_param.dslash_type = dslash_type;
183 
184  inv_param.mass = mass;
185  inv_param.kappa = 1.0 / (2.0 * (1 + 3/gauge_param.anisotropy + mass));
186 
188  inv_param.mu = 0.12;
189  inv_param.epsilon = 0.1385;
190  inv_param.twist_flavor = twist_flavor;
191  inv_param.Ls = (inv_param.twist_flavor == QUDA_TWIST_NONDEG_DOUBLET) ? 2 : 1;
192  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH ||
194  inv_param.m5 = -1.8;
195  kappa5 = 0.5/(5 + inv_param.m5);
196  inv_param.Ls = Lsdim;
197  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
198  inv_param.m5 = -1.8;
199  kappa5 = 0.5/(5 + inv_param.m5);
200  inv_param.Ls = Lsdim;
201  for(int k = 0; k < Lsdim; k++)
202  {
203  // b5[k], c[k] values are chosen for arbitrary values,
204  // but the difference of them are same as 1.0
205  inv_param.b_5[k] = 1.452;
206  inv_param.c_5[k] = 0.452;
207  }
208  }
209 
210  // offsets used only by multi-shift solver
211  inv_param.num_offset = 12;
212  inv_param.num_src= num_src;
213  double offset[12] = {0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12};
214  for (int i=0; i<inv_param.num_offset; i++) inv_param.offset[i] = offset[i];
215 
216  inv_param.inv_type = inv_type;
218  inv_param.solution_type = QUDA_MAT_SOLUTION;
219  } else {
221  }
222  inv_param.matpc_type = matpc_type;
223 
224  inv_param.dagger = QUDA_DAG_NO;
225  inv_param.mass_normalization = normalization;
227 
234  inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
235  } else {
236  inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
237  }
238 
239  inv_param.pipeline = 0;
240 
241  inv_param.Nsteps = 2;
242  inv_param.gcrNkrylov = 10;
243  inv_param.tol = tol;
244  inv_param.tol_restart = 1e-3; //now theoretical background for this parameter...
245  if(tol_hq == 0 && tol == 0){
246  errorQuda("qudaInvert: requesting zero residual\n");
247  exit(1);
248  }
249  // require both L2 relative and heavy quark residual to determine convergence
250  inv_param.residual_type = static_cast<QudaResidualType_s>(0);
251  inv_param.residual_type = (tol != 0) ? static_cast<QudaResidualType_s> ( inv_param.residual_type | QUDA_L2_RELATIVE_RESIDUAL) : inv_param.residual_type;
252  inv_param.residual_type = (tol_hq != 0) ? static_cast<QudaResidualType_s> (inv_param.residual_type | QUDA_HEAVY_QUARK_RESIDUAL) : inv_param.residual_type;
253 
254  inv_param.tol_hq = tol_hq; // specify a tolerance for the residual for heavy quark residual
255 
256  // these can be set individually
257  for (int i=0; i<inv_param.num_offset; i++) {
258  inv_param.tol_offset[i] = inv_param.tol;
259  inv_param.tol_hq_offset[i] = inv_param.tol_hq;
260  }
261  inv_param.maxiter = niter;
262  inv_param.reliable_delta = 1e-1;
263  inv_param.use_sloppy_partial_accumulator = 0;
264  inv_param.max_res_increase = 1;
265 
266  // domain decomposition preconditioner parameters
268 
270  inv_param.precondition_cycle = 1;
271  inv_param.tol_precondition = 1e-1;
272  inv_param.maxiter_precondition = 10;
275  inv_param.omega = 1.0;
276 
277  inv_param.cpu_prec = cpu_prec;
278  inv_param.cuda_prec = cuda_prec;
282  inv_param.dirac_order = QUDA_DIRAC_ORDER;
283 
286 
287  gauge_param.ga_pad = 0; // 24*24*24/2;
288  inv_param.sp_pad = 0; // 24*24*24/2;
289  inv_param.cl_pad = 0; // 24*24*24/2;
290 
291  // For multi-GPU, ga_pad must be large enough to store a time-slice
292 #ifdef MULTI_GPU
293  int x_face_size = gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2;
294  int y_face_size = gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2;
295  int z_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2;
296  int t_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2;
297  int pad_size =MAX(x_face_size, y_face_size);
298  pad_size = MAX(pad_size, z_face_size);
299  pad_size = MAX(pad_size, t_face_size);
300  gauge_param.ga_pad = pad_size;
301 #endif
302 
304  inv_param.clover_cpu_prec = cpu_prec;
305  inv_param.clover_cuda_prec = cuda_prec;
309  inv_param.clover_coeff = 1.5*inv_param.kappa;
310  }
311 
312  inv_param.verbosity = QUDA_VERBOSE;
313 
314  // *** Everything between here and the call to initQuda() is
315  // *** application-specific.
316 
317  // set parameters for the reference Dslash, and prepare fields to be loaded
321  dw_setDims(gauge_param.X, inv_param.Ls);
322  } else {
323  setDims(gauge_param.X);
324  }
325 
326  setSpinorSiteSize(24);
327 
328  size_t gSize = (gauge_param.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
329  size_t sSize = (inv_param.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
330 
331  void *gauge[4], *clover_inv=0, *clover=0;
332 
333  for (int dir = 0; dir < 4; dir++) {
334  gauge[dir] = malloc(V*gaugeSiteSize*gSize);
335  }
336 
337  if (strcmp(latfile,"")) { // load in the command line supplied gauge field
338  read_gauge_field(latfile, gauge, gauge_param.cpu_prec, gauge_param.X, argc, argv);
339  construct_gauge_field(gauge, 2, gauge_param.cpu_prec, &gauge_param);
340  } else { // else generate an SU(3) field
341  if (unit_gauge) {
342  // unit SU(3) field
343  construct_gauge_field(gauge, 0, gauge_param.cpu_prec, &gauge_param);
344  } else {
345  // random SU(3) field
346  construct_gauge_field(gauge, 1, gauge_param.cpu_prec, &gauge_param);
347  }
348  }
349 
351  double norm = 0.0; // clover components are random numbers in the range (-norm, norm)
352  double diag = 1.0; // constant added to the diagonal
353 
354  size_t cSize = (inv_param.clover_cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
355  clover_inv = malloc(V*cloverSiteSize*cSize);
356  construct_clover_field(clover_inv, norm, diag, inv_param.clover_cpu_prec);
357 
358  // The uninverted clover term is only needed when solving the unpreconditioned
359  // system or when using "asymmetric" even/odd preconditioning.
360  int preconditioned = (inv_param.solve_type == QUDA_DIRECT_PC_SOLVE ||
361  inv_param.solve_type == QUDA_NORMOP_PC_SOLVE);
362  int asymmetric = preconditioned &&
365  if (!preconditioned) {
366  clover = clover_inv;
367  clover_inv = NULL;
368  } else if (asymmetric) { // fake it by using the same random matrix
369  clover = clover_inv; // for both clover and clover_inv
370  } else {
371  clover = NULL;
372  }
373  }
374 
375 
376 
377  void **spinorIn = (void**)malloc(inv_param.num_src*sizeof(void *));
378  void **spinorCheck = (void**)malloc(inv_param.num_src*sizeof(void *));
379  for (int i=0; i<inv_param.num_src; i++) {
380  spinorIn[i] = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
381  spinorCheck[i] = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
382  }
383 
384  void **spinorOutMulti = NULL;
385 // if (multishift) {
386  spinorOutMulti = (void**)malloc(inv_param.num_src*sizeof(void *));
387  for (int i=0; i<inv_param.num_src; i++) {
388  spinorOutMulti[i] = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
389  }
390 // } else {
391 // spinorOut = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
392 // }
393 
394 
395 // if (multishift) {
396  for (int i=0; i<inv_param.num_src; i++) {
397  memset(spinorOutMulti[i], 0, inv_param.Ls*V*spinorSiteSize*sSize);
398  memset(spinorIn[i], 0, inv_param.Ls*V*spinorSiteSize*sSize);
399  memset(spinorCheck[i], 0, inv_param.Ls*V*spinorSiteSize*sSize);
400  }
401 // } else {
402 // memset(spinorOut, 0, inv_param.Ls*V*spinorSiteSize*sSize);
403 // }
404 
405  // create a point source at 0 (in each subvolume... FIXME)
406 
407  // create a point source at 0 (in each subvolume... FIXME)
408  for (int j=0; j<inv_param.num_src; j++) {
409  if (inv_param.cpu_prec == QUDA_SINGLE_PRECISION) {
410  //((float*)spinorIn)[0] = 1.0;
411  for (int i=0; i<inv_param.Ls*V; i++) ((float*)spinorIn[j])[i*spinorSiteSize+j] = rand() / (float)RAND_MAX;
412  } else {
413  //((double*)spinorIn)[0] = 1.0;
414  for (int i=0; i<inv_param.Ls*V; i++) ((double*)spinorIn[j])[i*spinorSiteSize+j] = rand() / (double)RAND_MAX;
415  }
416  }
417  // start the timer
418  double time0 = -((double)clock());
419 
420  // initialize the QUDA library
421  initQuda(device);
422 
423  // load the gauge field
424  loadGaugeQuda((void*)gauge, &gauge_param);
425 
426  // load the clover term, if desired
427  if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) loadCloverQuda(clover, clover_inv, &inv_param);
428 
429  if (dslash_type == QUDA_TWISTED_CLOVER_DSLASH) loadCloverQuda(NULL, NULL, &inv_param);
430 
431  // perform the inversion
432 // if (multishift) {
433 // invertMultiShiftQuda(spinorOutMulti, spinorIn, &inv_param);
434 // } else {
435  invertMultiSrcQuda(spinorOutMulti, spinorIn, &inv_param);
436 
437 
438  // stop the timer
439  time0 += clock();
440  time0 /= CLOCKS_PER_SEC;
441 
442  printfQuda("\nDone: %i iter / %g secs = %g Gflops, total time = %g secs\n",
443  inv_param.iter, inv_param.secs, inv_param.gflops/inv_param.secs, time0);
444 
445  for(int i = 0; i < inv_param.num_src ; i++){
446  invertQuda(spinorOutMulti[i], spinorIn[i], &inv_param);
447  }
448 
449 // if (true) {
450 // if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) {
451 // errorQuda("Mass normalization not supported for multi-shift solver in invert_test");
452 // }
453 //
454 // void *spinorTmp = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
455 //
456 // printfQuda("Host residuum checks: \n");
457 // for(int i=0; i < inv_param.num_src; i++) {
458 // ax(0, spinorCheck[i], V*spinorSiteSize, inv_param.cpu_prec);
459 //
460 // if (dslash_type == QUDA_TWISTED_MASS_DSLASH || dslash_type == QUDA_TWISTED_CLOVER_DSLASH) {
461 // if (inv_param.twist_flavor != QUDA_TWIST_SINGLET)
462 // errorQuda("Twisted mass solution type not supported");
463 // tm_matpc(spinorTmp, gauge, spinorOutMulti[i], inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
464 // inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param);
465 // tm_matpc(spinorCheck[i], gauge, spinorTmp, inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
466 // inv_param.matpc_type, 1, inv_param.cpu_prec, gauge_param);
467 // } else if (dslash_type == QUDA_WILSON_DSLASH || dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
468 // wil_matpc(spinorTmp, gauge, spinorOutMulti[i], inv_param.kappa, inv_param.matpc_type, 0,
469 // inv_param.cpu_prec, gauge_param);
470 // wil_matpc(spinorCheck[i], gauge, spinorTmp, inv_param.kappa, inv_param.matpc_type, 1,
471 // inv_param.cpu_prec, gauge_param);
472 // } else {
473 // printfQuda("Domain wall not supported for multi-shift\n");
474 // exit(-1);
475 // }
476 //
477 // axpy(inv_param.offset[i], spinorOutMulti[i], spinorCheck, Vh*spinorSiteSize, inv_param.cpu_prec);
478 // mxpy(spinorIn, spinorCheck, Vh*spinorSiteSize, inv_param.cpu_prec);
479 // double nrm2 = norm_2(spinorCheck, Vh*spinorSiteSize, inv_param.cpu_prec);
480 // double src2 = norm_2(spinorIn, Vh*spinorSiteSize, inv_param.cpu_prec);
481 // double l2r = sqrt(nrm2 / src2);
482 //
483 // printfQuda("Shift %d residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g\n",
484 // i, inv_param.tol_offset[i], inv_param.true_res_offset[i], l2r,
485 // inv_param.tol_hq_offset[i], inv_param.true_res_hq_offset[i]);
486 // }
487 // free(spinorTmp);
488 // }
489 // #if 0
490 // else
491  for (int i=0; i <inv_param.num_src; i++){
492 
493  if (inv_param.solution_type == QUDA_MAT_SOLUTION) {
494 
496  if(inv_param.twist_flavor == QUDA_TWIST_SINGLET)
497  tm_mat(spinorCheck[i], gauge, spinorOutMulti[i], inv_param.kappa, inv_param.mu, inv_param.twist_flavor, 0, inv_param.cpu_prec, gauge_param);
498  else
499  {
500  int tm_offset = V*spinorSiteSize; //12*spinorRef->Volume();
501  void *evenOut = spinorCheck[i];
502  void *oddOut = cpu_prec == sizeof(double) ? (void*)((double*)evenOut + tm_offset): (void*)((float*)evenOut + tm_offset);
503 
504  void *evenIn = spinorOutMulti[i];
505  void *oddIn = cpu_prec == sizeof(double) ? (void*)((double*)evenIn + tm_offset): (void*)((float*)evenIn + tm_offset);
506 
507  tm_ndeg_mat(evenOut, oddOut, gauge, evenIn, oddIn, inv_param.kappa, inv_param.mu, inv_param.epsilon, 0, inv_param.cpu_prec, gauge_param);
508  }
510  wil_mat(spinorCheck[i], gauge, spinorOutMulti[i], inv_param.kappa, 0, inv_param.cpu_prec, gauge_param);
511  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH) {
512  dw_mat(spinorCheck[i], gauge, spinorOutMulti[i], kappa5, inv_param.dagger, inv_param.cpu_prec, gauge_param, inv_param.mass);
513 // } else if (dslash_type == QUDA_DOMAIN_WALL_4D_DSLASH) {
514 // dw_4d_mat(spinorCheck, gauge, spinorOut, kappa5, inv_param.dagger, inv_param.cpu_prec, gauge_param, inv_param.mass);
515 // } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
516 // mdw_mat(spinorCheck, gauge, spinorOut, kappa5, inv_param.dagger, inv_param.cpu_prec, gauge_param, inv_param.mass);
517  } else {
518  printfQuda("Unsupported dslash_type\n");
519  exit(-1);
520  }
521  if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) {
525  ax(0.5/kappa5, spinorCheck[i], V*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
527  ax(0.5/inv_param.kappa, spinorCheck[i], 2*V*spinorSiteSize, inv_param.cpu_prec);
528  } else {
529  ax(0.5/inv_param.kappa, spinorCheck[i], V*spinorSiteSize, inv_param.cpu_prec);
530  }
531  }
532 
533  } else if(inv_param.solution_type == QUDA_MATPC_SOLUTION) {
534 
536  if (inv_param.twist_flavor != QUDA_TWIST_SINGLET)
537  errorQuda("Twisted mass solution type not supported");
538  tm_matpc(spinorCheck[i], gauge, spinorOutMulti[i], inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
539  inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param);
541  wil_matpc(spinorCheck[i], gauge, spinorOutMulti[i], inv_param.kappa, inv_param.matpc_type, 0,
542  inv_param.cpu_prec, gauge_param);
543  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH) {
544  dw_matpc(spinorCheck[i], gauge, spinorOutMulti[i], kappa5, inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param, inv_param.mass);
545  } else if (dslash_type == QUDA_DOMAIN_WALL_4D_DSLASH) {
546  dw_4d_matpc(spinorCheck[i], gauge, spinorOutMulti[i], kappa5, inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param, inv_param.mass);
547  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
548  double _Complex *kappa_b = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
549  double _Complex *kappa_c = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
550  for(int xs = 0 ; xs < Lsdim ; xs++)
551  {
552  kappa_b[xs] = 1.0/(2*(inv_param.b_5[xs]*(4.0 + inv_param.m5) + 1.0));
553  kappa_c[xs] = 1.0/(2*(inv_param.c_5[xs]*(4.0 + inv_param.m5) - 1.0));
554  }
555  mdw_matpc(spinorCheck[i], gauge, spinorOutMulti[i], kappa_b, kappa_c, inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param, inv_param.mass, inv_param.b_5, inv_param.c_5);
556  free(kappa_b);
557  free(kappa_c);
558  } else {
559  printfQuda("Unsupported dslash_type\n");
560  exit(-1);
561  }
562 
563  if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) {
567  ax(0.25/(kappa5*kappa5), spinorCheck[i], V*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
568  } else {
569  ax(0.25/(inv_param.kappa*inv_param.kappa), spinorCheck[i], Vh*spinorSiteSize, inv_param.cpu_prec);
570 
571  }
572  }
573 
574  } else if (inv_param.solution_type == QUDA_MATPCDAG_MATPC_SOLUTION) {
575 
576  void *spinorTmp = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
577 
578  ax(0, spinorCheck[i], V*spinorSiteSize, inv_param.cpu_prec);
579 
581  if (inv_param.twist_flavor != QUDA_TWIST_SINGLET)
582  errorQuda("Twisted mass solution type not supported");
583  tm_matpc(spinorTmp, gauge, spinorOutMulti[i], inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
584  inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param);
585  tm_matpc(spinorCheck[i], gauge, spinorTmp, inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
586  inv_param.matpc_type, 1, inv_param.cpu_prec, gauge_param);
588  wil_matpc(spinorTmp, gauge, spinorOutMulti[i], inv_param.kappa, inv_param.matpc_type, 0,
589  inv_param.cpu_prec, gauge_param);
590  wil_matpc(spinorCheck[i], gauge, spinorTmp, inv_param.kappa, inv_param.matpc_type, 1,
591  inv_param.cpu_prec, gauge_param);
592  } else {
593  printfQuda("Unsupported dslash_type\n");
594  exit(-1);
595  }
596 
597  if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) {
598  errorQuda("Mass normalization not implemented");
599  }
600 
601  free(spinorTmp);
602  }
603 
604 
605  int vol = inv_param.solution_type == QUDA_MAT_SOLUTION ? V : Vh;
606  mxpy(spinorIn[i], spinorCheck[i], vol*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
607  double nrm2 = norm_2(spinorCheck[i], vol*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
608  double src2 = norm_2(spinorIn[i], vol*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
609  double l2r = sqrt(nrm2 / src2);
610 
611 // printfQuda("Residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g\n",
612 // inv_param.tol, inv_param.true_res, l2r, inv_param.tol_hq, inv_param.true_res_hq);
613  printfQuda("rhs %d residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g\n",
614  i, inv_param.tol_offset[i], inv_param.true_res_offset[i], l2r,
615  inv_param.tol_hq_offset[i], inv_param.true_res_hq_offset[i]);
616  }
617 
618  freeGaugeQuda();
620 
621  // finalize the QUDA library
622  endQuda();
623 
624  finalizeComms();
625 
626  return 0;
627 }
int maxiter_precondition
Definition: quda.h:292
static size_t gSize
double secs
Definition: quda.h:251
int dimPartitioned(int dim)
Definition: test_util.cpp:1776
void ax(double a, ColorSpinorField &x)
Definition: blas_quda.cu:508
QudaDiracFieldOrder dirac_order
Definition: quda.h:219
void usage_extra(char **argv)
QudaMassNormalization mass_normalization
Definition: quda.h:208
double tol_hq_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:182
enum QudaMassNormalization_s QudaMassNormalization
QudaReconstructType reconstruct_sloppy
Definition: quda.h:53
double anisotropy
Definition: quda.h:38
void freeCloverQuda(void)
void mdw_matpc(void *out, void **gauge, void *in, double _Complex *kappa_b, double _Complex *kappa_c, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *b5, double _Complex *c5)
void dw_4d_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void endQuda(void)
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
Definition: test_util.cpp:1047
QudaDslashType dslash_type
Definition: test_util.cpp:1621
QudaSolveType solve_type
Definition: quda.h:205
QudaVerbosity verbosity_precondition
Definition: quda.h:286
enum QudaPrecision_s QudaPrecision
int ga_pad
Definition: quda.h:63
double_complex c_5[QUDA_MAX_DWF_LS]
Definition: quda.h:112
void dw_setDims(int *X, const int L5)
Definition: test_util.cpp:187
double mu
Definition: quda.h:114
QudaGaugeFixed gauge_fix
Definition: quda.h:61
QudaSchwarzType schwarz_type
Definition: quda.h:310
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaInverterType inv_type_precondition
Definition: quda.h:270
QudaLinkType type
Definition: quda.h:42
double kappa
Definition: quda.h:106
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
#define errorQuda(...)
Definition: util_quda.h:121
double tol
Definition: quda.h:121
QudaDslashType dslash_type
Definition: quda.h:102
QudaReconstructType reconstruct_precondition
Definition: quda.h:59
QudaInverterType inv_type
Definition: quda.h:103
QudaPrecision cuda_prec
Definition: quda.h:214
int tdim
Definition: test_util.cpp:1618
#define cloverSiteSize
Definition: test_util.h:9
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
double tol_hq
Definition: test_util.cpp:1657
QudaPrecision cpu_prec
Definition: quda.h:213
QudaPrecision & cuda_prec
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:2019
void tm_ndeg_mat(void *evenOut, void *oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaDagType dagger
Definition: quda.h:207
const char * get_matpc_str(QudaMatPCType type)
Definition: misc.cpp:1121
void finalizeComms()
Definition: test_util.cpp:128
QudaGaugeParam gauge_param
QudaGaugeFieldOrder gauge_order
Definition: quda.h:43
void dw_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:701
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
#define spinorSiteSize
QudaPrecision & cuda_prec_precondition
void usage(char **)
Definition: test_util.cpp:1783
QudaPrecision clover_cuda_prec_sloppy
Definition: quda.h:226
void setDims(int *)
Definition: test_util.cpp:151
QudaFieldLocation input_location
Definition: quda.h:99
void freeGaugeQuda(void)
double true_res_hq_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:191
double reliable_delta
Definition: quda.h:129
int xdim
Definition: test_util.cpp:1615
double_complex b_5[QUDA_MAX_DWF_LS]
Definition: quda.h:111
double anisotropy
Definition: test_util.cpp:1650
QudaSolutionType solution_type
Definition: quda.h:204
QudaSolverNormalization solver_normalization
Definition: quda.h:209
QudaPrecision clover_cuda_prec
Definition: quda.h:225
int precondition_cycle
Definition: quda.h:307
QudaPrecision & cuda_prec_sloppy
QudaInverterType precon_type
Definition: test_util.cpp:1641
void initQuda(int device)
void dw_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
int Lsdim
Definition: test_util.cpp:1619
QudaFieldLocation output_location
Definition: quda.h:100
QudaPrecision clover_cuda_prec_precondition
Definition: quda.h:228
QudaInvertParam inv_param
Definition: covdev_test.cpp:37
double m5
Definition: quda.h:108
QudaPrecision cuda_prec_sloppy
Definition: quda.h:215
QudaVerbosity verbosity
Definition: quda.h:244
void setSpinorSiteSize(int n)
Definition: test_util.cpp:211
const char * get_mass_normalization_str(QudaMassNormalization type)
Definition: misc.cpp:1077
double tol_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:179
double true_res_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:185
QudaInvertParam newQudaInvertParam(void)
double gflops
Definition: quda.h:250
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:768
QudaPrecision cuda_prec_precondition
Definition: quda.h:58
QudaCloverFieldOrder clover_order
Definition: quda.h:230
double tol_hq
Definition: quda.h:123
enum QudaMatPCType_s QudaMatPCType
int multishift
Definition: test_util.cpp:1642
QudaGammaBasis gamma_basis
Definition: quda.h:221
QudaPrecision cuda_prec_sloppy
Definition: quda.h:52
const char * get_dslash_str(QudaDslashType type)
Definition: misc.cpp:910
double tol_precondition
Definition: quda.h:289
int niter
Definition: test_util.cpp:1629
double offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:176
int use_sloppy_partial_accumulator
Definition: quda.h:132
QudaReconstructType reconstruct
Definition: quda.h:50
QudaPrecision cuda_prec
Definition: quda.h:49
int X[4]
Definition: quda.h:36
double mass
Definition: quda.h:105
int gcrNkrylov
Definition: quda.h:259
int V
Definition: test_util.cpp:27
char latfile[]
Definition: test_util.cpp:1623
double norm_2(void *v, int len, QudaPrecision precision)
QudaReconstructType link_recon_sloppy
Definition: test_util.cpp:1606
int ydim
Definition: test_util.cpp:1616
QudaMatPCType matpc_type
Definition: test_util.cpp:1662
void * memset(void *s, int c, size_t n)
int zdim
Definition: test_util.cpp:1617
void construct_clover_field(void *clover, double norm, double diag, QudaPrecision precision)
Definition: test_util.cpp:1167
int device
Definition: test_util.cpp:1602
int max_res_increase
Definition: quda.h:147
QudaResidualType_s
Definition: enum_quda.h:186
QudaPrecision prec
Definition: test_util.cpp:1608
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaPrecision cuda_prec_precondition
Definition: quda.h:217
double tol
Definition: test_util.cpp:1656
QudaInverterType inv_type
Definition: test_util.cpp:1640
double tol_restart
Definition: quda.h:122
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
void invertMultiSrcQuda(void **_hp_x, void **_hp_b, QudaInvertParam *param)
QudaTwistFlavorType twist_flavor
Definition: test_util.cpp:1660
#define printfQuda(...)
Definition: util_quda.h:115
QudaTboundary t_boundary
Definition: quda.h:45
QudaTwistFlavorType twist_flavor
Definition: quda.h:117
enum QudaDslashType_s QudaDslashType
bool unit_gauge
Definition: test_util.cpp:1624
void mxpy(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:34
QudaResidualType residual_type
Definition: quda.h:320
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
int num_offset
Definition: quda.h:169
int main(int argc, char **argv)
QudaPrecision & cpu_prec
QudaMassNormalization normalization
Definition: test_util.cpp:1661
QudaReconstructType link_recon
Definition: test_util.cpp:1605
double epsilon
Definition: quda.h:115
cpuColorSpinorField * spinorTmp
double omega
Definition: quda.h:295
void initComms(int argc, char **argv, int *const commDims)
Definition: test_util.cpp:88
void read_gauge_field(const char *filename, void *gauge[], QudaPrecision prec, const int *X, int argc, char *argv[])
Definition: qio_field.h:14
#define MAX(a, b)
QudaPrecision clover_cpu_prec
Definition: quda.h:224
int gridsize_from_cmdline[]
Definition: test_util.cpp:49
double mass
Definition: test_util.cpp:1646
QudaMatPCType matpc_type
Definition: quda.h:206
enum QudaInverterType_s QudaInverterType
double kappa5
QudaPrecision cpu_prec
Definition: quda.h:47
void display_test_info()
QudaPrecision prec_sloppy
Definition: test_util.cpp:1609
#define gaugeSiteSize
Definition: face_gauge.cpp:34
QudaGaugeParam newQudaGaugeParam(void)
QudaPreserveSource preserve_source
Definition: quda.h:211
double clover_coeff
Definition: quda.h:233
int Vh
Definition: test_util.cpp:28
enum QudaTwistFlavorType_s QudaTwistFlavorType