QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
deflation_test.cpp
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <time.h>
4 #include <math.h>
5 #include <string.h>
6 
7 #include <util_quda.h>
8 #include <test_util.h>
9 #include <dslash_util.h>
10 #include <blas_reference.h>
13 #include "misc.h"
14 
15 #include "face_quda.h"
16 
17 #if defined(QMP_COMMS)
18 #include <qmp.h>
19 #elif defined(MPI_COMMS)
20 #include <mpi.h>
21 #endif
22 
23 #include <gauge_qio.h>
24 
25 #define MAX(a,b) ((a)>(b)?(a):(b))
26 
27 // In a typical application, quda.h is the only QUDA header required.
28 #include <quda.h>
29 
30 // Wilson, clover-improved Wilson, twisted mass, and domain wall are supported.
32 extern bool tune;
33 extern int device;
34 extern int xdim;
35 extern int ydim;
36 extern int zdim;
37 extern int tdim;
38 extern int Lsdim;
39 extern int gridsize_from_cmdline[];
41 extern QudaPrecision prec;
44 
45 extern char latfile[];
46 
47 extern void usage(char** );
48 
49 void
51 {
52  printfQuda("running the following test:\n");
53 
54  printfQuda("prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension Ls_dimension\n");
55  printfQuda("%s %s %s %s %d/%d/%d %d %d\n",
59 
60  printfQuda("Grid partition info: X Y Z T\n");
61  printfQuda(" %d %d %d %d\n",
62  dimPartitioned(0),
63  dimPartitioned(1),
64  dimPartitioned(2),
65  dimPartitioned(3));
66 
67  return ;
68 
69 }
70 
71 int main(int argc, char **argv)
72 {
73 
74  for (int i = 1; i < argc; i++){
75  if(process_command_line_option(argc, argv, &i) == 0){
76  continue;
77  }
78  printfQuda("ERROR: Invalid option:%s\n", argv[i]);
79  usage(argv);
80  }
81 
83  prec_sloppy = prec;
84  }
87  }
88 
89  // initialize QMP/MPI, QUDA comms grid and RNG (test_util.cpp)
90  initComms(argc, argv, gridsize_from_cmdline);
91 
93 
94  // *** QUDA parameters begin here.
99  printfQuda("dslash_type %d not supported\n", dslash_type);
100  exit(0);
101  }
102 
105  QudaPrecision cuda_prec_sloppy = prec_sloppy;
106  QudaPrecision cuda_prec_precondition = QUDA_HALF_PRECISION;
107 
110 
111  double kappa5;
112 
113  gauge_param.X[0] = xdim;
114  gauge_param.X[1] = ydim;
115  gauge_param.X[2] = zdim;
116  gauge_param.X[3] = tdim;
117  inv_param.Ls = 1;
118 
119  gauge_param.anisotropy = 1.0;
120  gauge_param.type = QUDA_WILSON_LINKS;
121  gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
122  gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
123 
124  gauge_param.cpu_prec = cpu_prec;
125  gauge_param.cuda_prec = cuda_prec;
126  gauge_param.reconstruct = link_recon;
127  gauge_param.cuda_prec_sloppy = cuda_prec_sloppy;
129  gauge_param.cuda_prec_precondition = cuda_prec_precondition;
131  gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
132 
133  inv_param.dslash_type = dslash_type;
134 
135  double mass = -0.4086;
136  inv_param.kappa = 1.0 / (2.0 * (1 + 3/gauge_param.anisotropy + mass));
137 
139  inv_param.mu = 0.12;
140  inv_param.epsilon = 0.1385;
141  //inv_param.twist_flavor = QUDA_TWIST_NONDEG_DOUBLET;
142  inv_param.twist_flavor = QUDA_TWIST_PLUS;
143  inv_param.Ls = (inv_param.twist_flavor == QUDA_TWIST_NONDEG_DOUBLET) ? 2 : 1;
144  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH) {
145  inv_param.mass = 0.02;
146  inv_param.m5 = -1.8;
147  kappa5 = 0.5/(5 + inv_param.m5);
148  inv_param.Ls = Lsdim;
149  }
150 
151  // offsets used only by multi-shift solver
152  inv_param.num_offset = 4;
153  double offset[4] = {0.01, 0.02, 0.03, 0.04};
154  for (int i=0; i<inv_param.num_offset; i++) inv_param.offset[i] = offset[i];
155 
156  if (inv_param.dslash_type == QUDA_TWISTED_MASS_DSLASH) {
158  //inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
159  //inv_param.solution_type = QUDA_MATPC_SOLUTION;
160  inv_param.solution_type = QUDA_MAT_SOLUTION;
161  } else {
162  inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
164  }
165 
166  inv_param.dagger = QUDA_DAG_NO;
169 
170  inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
171 
172  inv_param.pipeline = 0;
173 
174  inv_param.gcrNkrylov = 10;
175  inv_param.tol = 1e-10;
176 
178  //inv_param.inv_type = QUDA_EIGCG_INVERTER;
179  inv_param.inv_type = QUDA_INC_EIGCG_INVERTER;
180 
181  inv_param.rhs_idx = 0;
182 
183  if(inv_param.inv_type == QUDA_EIGCG_INVERTER || inv_param.inv_type == QUDA_INC_EIGCG_INVERTER ){
184  inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
185  inv_param.nev = 8;
186  inv_param.max_search_dim = 128;
187  inv_param.deflation_grid = 24;//to test the stuff
188  inv_param.cuda_prec_ritz = cuda_prec;
189  inv_param.tol_restart = 5e+3*inv_param.tol;//think about this...
190  }else{
191  inv_param.nev = 0;
192  inv_param.max_search_dim = 0;
193  inv_param.tol_restart = 0.0;//restart is not requested...
194  }
195 
196 #if __COMPUTE_CAPABILITY__ >= 200
197  // require both L2 relative and heavy quark residual to determine convergence
199  inv_param.tol_hq = 1e-3; // specify a tolerance for the residual for heavy quark residual
200 #else
201  // Pre Fermi architecture only supports L2 relative residual norm
203 #endif
204  // these can be set individually
205  for (int i=0; i<inv_param.num_offset; i++) {
206  inv_param.tol_offset[i] = inv_param.tol;
207  inv_param.tol_hq_offset[i] = inv_param.tol_hq;
208  }
209 
210  inv_param.maxiter = 5000;
211  inv_param.reliable_delta = 1e-1; // ignored by multi-shift solver
212 
213  // domain decomposition preconditioner parameters
216  inv_param.precondition_cycle = 1;
217  inv_param.tol_precondition = 1e-1;
218  inv_param.maxiter_precondition = 10;
220  inv_param.cuda_prec_precondition = cuda_prec_precondition;
221  inv_param.omega = 1.0;
222 
223  inv_param.use_sloppy_partial_accumulator = 1;
224 
225  inv_param.cpu_prec = cpu_prec;
226  inv_param.cuda_prec = cuda_prec;
227  inv_param.cuda_prec_sloppy = cuda_prec_sloppy;
230  inv_param.dirac_order = QUDA_DIRAC_ORDER;
231 
234 
235  inv_param.tune = tune ? QUDA_TUNE_YES : QUDA_TUNE_NO;
236 
237  gauge_param.ga_pad = 0;//24*24*24/2;
238  inv_param.sp_pad = 0;//24*24*24/2;
239  inv_param.cl_pad = 0; // 24*24*24/2;
240 
241  // For multi-GPU, ga_pad must be large enough to store a time-slice
242 #ifdef MULTI_GPU
243  int x_face_size = gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2;
244  int y_face_size = gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2;
245  int z_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2;
246  int t_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2;
247  int pad_size =MAX(x_face_size, y_face_size);
248  pad_size = MAX(pad_size, z_face_size);
249  pad_size = MAX(pad_size, t_face_size);
250  gauge_param.ga_pad = pad_size;
251 #endif
252 
254  inv_param.clover_cpu_prec = cpu_prec;
255  inv_param.clover_cuda_prec = cuda_prec;
256  inv_param.clover_cuda_prec_sloppy = cuda_prec_sloppy;
257  inv_param.clover_cuda_prec_precondition = cuda_prec_precondition;
259  }
260 
261  inv_param.verbosity = QUDA_VERBOSE;
262 
263  // *** Everything between here and the call to initQuda() is
264  // *** application-specific.
265 
266  // set parameters for the reference Dslash, and prepare fields to be loaded
268  dw_setDims(gauge_param.X, inv_param.Ls);
269  } else {
270  setDims(gauge_param.X);
271  }
272 
273  setSpinorSiteSize(24);
274 
275  size_t gSize = (gauge_param.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
276  size_t sSize = (inv_param.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
277 
278  void *gauge[4], *clover_inv=0, *clover=0;
279 
280  for (int dir = 0; dir < 4; dir++) {
281  gauge[dir] = malloc(V*gaugeSiteSize*gSize);
282  }
283 
284  if (strcmp(latfile,"")) { // load in the command line supplied gauge field
285  read_gauge_field(latfile, gauge, gauge_param.cpu_prec, gauge_param.X, argc, argv);
286  construct_gauge_field(gauge, 2, gauge_param.cpu_prec, &gauge_param);
287  //printfQuda("Configuration load: done.");
288  } else { // else generate a random SU(3) field
289  construct_gauge_field(gauge, 1, gauge_param.cpu_prec, &gauge_param);
290  }
291 
293  double norm = 0.0; // clover components are random numbers in the range (-norm, norm)
294  double diag = 1.0; // constant added to the diagonal
295 
296  size_t cSize = (inv_param.clover_cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
297  clover_inv = malloc(V*cloverSiteSize*cSize);
298  construct_clover_field(clover_inv, norm, diag, inv_param.clover_cpu_prec);
299 
300  // The uninverted clover term is only needed when solving the unpreconditioned
301  // system or when using "asymmetric" even/odd preconditioning.
302  int preconditioned = (inv_param.solve_type == QUDA_DIRECT_PC_SOLVE ||
303  inv_param.solve_type == QUDA_NORMOP_PC_SOLVE);
304  int asymmetric = preconditioned &&
307  if (!preconditioned) {
308  clover = clover_inv;
309  clover_inv = NULL;
310  } else if (asymmetric) { // fake it by using the same random matrix
311  clover = clover_inv; // for both clover and clover_inv
312  } else {
313  clover = NULL;
314  }
315  }
316 
317  void *spinorIn = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
318  void *spinorCheck = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
319 
320  void *spinorOut = NULL;
321  spinorOut = malloc(V*spinorSiteSize*sSize*inv_param.Ls);
322 
323  void *ritzVects = 0;
324 
325  double *inverse_ritzVals = 0;
326 
327  const int defl_dim = inv_param.deflation_grid*inv_param.nev;
328 
329  ritzVects = malloc(defl_dim*(Vh)*spinorSiteSize*sSize*inv_param.Ls);
330 
331  memset(ritzVects, 0, defl_dim*inv_param.Ls*(Vh)*spinorSiteSize*sSize);
332 
333  inverse_ritzVals = (double*)malloc(defl_dim*sizeof(double));
334 
335  //printf("\nDeflation: %p :: %u\n", ritzVects, defl_size);
336 
337  // create a point source at 0 (in each subvolume... FIXME)
338  memset(spinorIn, 0, inv_param.Ls*V*spinorSiteSize*sSize);
339 
340  memset(spinorCheck, 0, inv_param.Ls*V*spinorSiteSize*sSize);
341 
342  memset(spinorOut, 0, inv_param.Ls*V*spinorSiteSize*sSize);
343 
344  if (inv_param.cpu_prec == QUDA_SINGLE_PRECISION)
345  {
346  //((float*)spinorIn)[0] = 1.0;
347  for (int i=0; i<inv_param.Ls*V*spinorSiteSize; i++) ((float*)spinorIn)[i] = rand() / (float)RAND_MAX;
348  }
349  else
350  {
351  //((double*)spinorIn)[0] = 1.0;
352  for (int i=0; i<inv_param.Ls*V*spinorSiteSize; i++) ((double*)spinorIn)[i] = rand() / (double)RAND_MAX;
353  //for (int i=0; i<inv_param.Ls*24*24*24*spinorSiteSize; i++) ((double*)spinorIn)[i] = comm_rank() == 0 ? rand() / (double)RAND_MAX: 0.0;
354  }
355 
356  // start the timer
357  double time0 = -((double)clock());
358 
359  // initialize the QUDA library
360  initQuda(device);
361 
362  printfQuda("\nOpen MAGMA...\n");
363 
364  openMagma();
365 
366  printfQuda("\n...done.\n");
367 
368  // load the gauge field
369  loadGaugeQuda((void*)gauge, &gauge_param);
370 
371  // load the clover term, if desired
372  if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) loadCloverQuda(clover, clover_inv, &inv_param);
373 
374  // perform the inversions
375  printfQuda("\nStart the incremental stage.\n");
376 
377  for(int is = 0; is < inv_param.deflation_grid; is++)
378  {
379  if (inv_param.cpu_prec == QUDA_SINGLE_PRECISION)
380  {
381  for (int i=0; i<inv_param.Ls*V*spinorSiteSize; i++) ((float*)spinorIn)[i] = rand() / (float)RAND_MAX;
382  }
383  else
384  {
385  memset(spinorIn, 0, inv_param.Ls*V*spinorSiteSize*sSize);
386  memset(spinorOut, 0, inv_param.Ls*V*spinorSiteSize*sSize);
387 
388  for (int i=0; i<inv_param.Ls*V*spinorSiteSize; i++) ((double*)spinorIn)[i] = rand() / (double)RAND_MAX;
389  //for (int i=0; i<inv_param.Ls*24*24*24*spinorSiteSize; i++) ((double*)spinorIn)[i] = comm_rank() == 0 ? rand() / (double)RAND_MAX: 0.0;
390  }
391 
392  double time1 = -((double)clock());
393 
394  inv_param.cuda_prec_sloppy = cuda_prec_sloppy;
395  incrementalEigQuda(spinorOut, spinorIn, &inv_param, NULL, NULL, 0);
396 
397  time1 += clock();
398  time1 /= CLOCKS_PER_SEC;
399 
400  printfQuda("\nDone: %i iter / %g secs = %g Gflops, total time = %g secs\n",
401  inv_param.iter, inv_param.secs, inv_param.gflops/inv_param.secs, time1);
402 
403  printfQuda("\n Current RHS : %d\n", inv_param.rhs_idx);
404  }
405 
406  printfQuda("\n Total eigCG RHS : %d\n", inv_param.rhs_idx);
407 //***
408  printfQuda("\nStart the initCG stage.\n");
409 
410  const int initCGruns = 16;
411 
412  int last_rhs = 0;
413 
414  for(int is = inv_param.deflation_grid; is < (inv_param.deflation_grid+initCGruns); is++)
415  {
416  if (inv_param.cpu_prec == QUDA_SINGLE_PRECISION)
417  {
418  for (int i=0; i<inv_param.Ls*V*spinorSiteSize; i++) ((float*)spinorIn)[i] = rand() / (float)RAND_MAX;
419  }
420  else
421  {
422  memset(spinorIn, 0, inv_param.Ls*V*spinorSiteSize*sSize);
423  memset(spinorOut, 0, inv_param.Ls*V*spinorSiteSize*sSize);
424 
425  for (int i=0; i<inv_param.Ls*V*spinorSiteSize; i++) ((double*)spinorIn)[i] = rand() / (double)RAND_MAX;
426  //for (int i=0; i<inv_param.Ls*24*24*24*spinorSiteSize; i++) ((double*)spinorIn)[i] = comm_rank() == 0 ? rand() / (double)RAND_MAX: 0.0;
427  }
428 
429  if(is == (inv_param.deflation_grid+initCGruns-1)) last_rhs = 1;
430 
431  double time1 = -((double)clock());
432 
433  inv_param.cuda_prec_sloppy = cuda_prec_precondition;//QUDA_HALF_PRECISION
434  incrementalEigQuda(spinorOut, spinorIn, &inv_param, ritzVects, inverse_ritzVals, last_rhs);
435 
436  time1 += clock();
437  time1 /= CLOCKS_PER_SEC;
438 
439  printfQuda("\nDone: %i iter / %g secs = %g Gflops, total time = %g secs\n", inv_param.iter, inv_param.secs, inv_param.gflops/inv_param.secs, time1);
440  }
441 
442  printfQuda("\nTotal InitCG RHS : %d\n", inv_param.rhs_idx);
443 
444  // stop the timer
445  time0 += clock();
446  time0 /= CLOCKS_PER_SEC;
447 
448  closeMagma();
449 
450  printfQuda("Device memory used:\n Spinor: %f GiB\n Gauge: %f GiB\n",
451  inv_param.spinorGiB, gauge_param.gaugeGiB);
452  if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) printfQuda(" Clover: %f GiB\n", inv_param.cloverGiB);
453  printfQuda("\nDone: %i iter / %g secs = %g Gflops, total time = %g secs\n",
454  inv_param.iter, inv_param.secs, inv_param.gflops/inv_param.secs, time0);
455 
456  if (inv_param.solution_type == QUDA_MAT_SOLUTION) {
457 
459  if(inv_param.twist_flavor == QUDA_TWIST_PLUS || inv_param.twist_flavor == QUDA_TWIST_MINUS)
460  tm_mat(spinorCheck, gauge, spinorOut, inv_param.kappa, inv_param.mu, inv_param.twist_flavor, 0, inv_param.cpu_prec, gauge_param);
461  else
462  {
463  int tm_offset = V*spinorSiteSize; //12*spinorRef->Volume();
464  void *evenOut = spinorCheck;
465  void *oddOut = cpu_prec == sizeof(double) ? (void*)((double*)evenOut + tm_offset): (void*)((float*)evenOut + tm_offset);
466 
467  void *evenIn = spinorOut;
468  void *oddIn = cpu_prec == sizeof(double) ? (void*)((double*)evenIn + tm_offset): (void*)((float*)evenIn + tm_offset);
469 
470  tm_ndeg_mat(evenOut, oddOut, gauge, evenIn, oddIn, inv_param.kappa, inv_param.mu, inv_param.epsilon, 0, inv_param.cpu_prec, gauge_param);
471  }
473  wil_mat(spinorCheck, gauge, spinorOut, inv_param.kappa, 0, inv_param.cpu_prec, gauge_param);
474  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH) {
475  dw_mat(spinorCheck, gauge, spinorOut, kappa5, inv_param.dagger, inv_param.cpu_prec, gauge_param, inv_param.mass);
476  } else {
477  printfQuda("Unsupported dslash_type\n");
478  exit(-1);
479  }
480  if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) {
482  ax(0.5/kappa5, spinorCheck, V*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
483  } else {
484  ax(0.5/inv_param.kappa, spinorCheck, V*spinorSiteSize, inv_param.cpu_prec);
485  }
486  }
487 
488  } else if(inv_param.solution_type == QUDA_MATPC_SOLUTION) {
489 
491  if (inv_param.twist_flavor != QUDA_TWIST_MINUS && inv_param.twist_flavor != QUDA_TWIST_PLUS)
492  errorQuda("Twisted mass solution type not supported");
493  tm_matpc(spinorCheck, gauge, spinorOut, inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
494  inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param);
496  wil_matpc(spinorCheck, gauge, spinorOut, inv_param.kappa, inv_param.matpc_type, 0,
497  inv_param.cpu_prec, gauge_param);
498  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH) {
499  dw_matpc(spinorCheck, gauge, spinorOut, kappa5, inv_param.matpc_type, 0, inv_param.cpu_prec, gauge_param, inv_param.mass);
500  } else {
501  printfQuda("Unsupported dslash_type\n");
502  exit(-1);
503  }
504 
505  if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) {
507  ax(0.25/(kappa5*kappa5), spinorCheck, Vh*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
508  } else {
509  ax(0.25/(inv_param.kappa*inv_param.kappa), spinorCheck, Vh*spinorSiteSize, inv_param.cpu_prec);
510 
511  }
512  }
513 
514  }
515 
516  int vol = inv_param.solution_type == QUDA_MAT_SOLUTION ? V : Vh;
517  mxpy(spinorIn, spinorCheck, vol*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
518  double nrm2 = norm_2(spinorCheck, vol*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
519  double src2 = norm_2(spinorIn, vol*spinorSiteSize*inv_param.Ls, inv_param.cpu_prec);
520  double l2r = sqrt(nrm2 / src2);
521 
522  printfQuda("Residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g\n",
523  inv_param.tol, inv_param.true_res, l2r, inv_param.tol_hq, inv_param.true_res_hq);
524 
525  free(ritzVects);
526 
527  free(inverse_ritzVals);
528 
529  freeGaugeQuda();
531 
532  // finalize the QUDA library
533  endQuda();
534 
535  // finalize the communications layer
536  finalizeComms();
537 
538  return 0;
539 }
int maxiter_precondition
Definition: quda.h:216
QudaGaugeParam gauge_param
Definition: dslash_test.cpp:37
double secs
Definition: quda.h:183
int dimPartitioned(int dim)
Definition: test_util.cpp:1577
QudaDiracFieldOrder dirac_order
Definition: quda.h:156
QudaMassNormalization mass_normalization
Definition: quda.h:146
double tol_hq_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:134
QudaReconstructType reconstruct_sloppy
Definition: quda.h:46
double anisotropy
Definition: quda.h:31
void freeCloverQuda(void)
__constant__ int Vh
char latfile[]
Definition: test_util.cpp:1561
void endQuda(void)
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
Definition: test_util.cpp:1003
QudaSolveType solve_type
Definition: quda.h:143
QudaVerbosity verbosity_precondition
Definition: quda.h:210
enum QudaPrecision_s QudaPrecision
int V
Definition: test_util.cpp:29
int ga_pad
Definition: quda.h:53
void dw_setDims(int *X, const int L5)
Definition: test_util.cpp:125
QudaReconstructType link_recon_sloppy
Definition: test_util.cpp:1550
double mu
Definition: quda.h:97
QudaGaugeFixed gauge_fix
Definition: quda.h:51
QudaTune tune
Definition: quda.h:185
QudaSchwarzType schwarz_type
Definition: quda.h:225
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
Definition: complex_quda.h:859
int ydim
Definition: test_util.cpp:1554
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
enum QudaResidualType_s QudaResidualType
QudaInverterType inv_type_precondition
Definition: quda.h:203
QudaLinkType type
Definition: quda.h:35
double kappa
Definition: quda.h:89
QudaPrecision cuda_prec_ritz
Definition: quda.h:238
#define errorQuda(...)
Definition: util_quda.h:73
double tol
Definition: quda.h:102
QudaDslashType dslash_type
Definition: quda.h:85
QudaReconstructType reconstruct_precondition
Definition: quda.h:49
QudaInverterType inv_type
Definition: quda.h:86
QudaPrecision cuda_prec
Definition: quda.h:152
#define cloverSiteSize
Definition: test_util.h:8
void setDims(int *)
Definition: test_util.cpp:88
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:105
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
QudaPrecision cpu_prec
Definition: quda.h:151
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:1635
void tm_ndeg_mat(void *evenOut, void *oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
#define gaugeSiteSize
int gridsize_from_cmdline[]
Definition: test_util.cpp:1559
QudaDagType dagger
Definition: quda.h:145
void finalizeComms()
Definition: test_util.cpp:65
QudaGaugeFieldOrder gauge_order
Definition: quda.h:36
void dw_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
double true_res
Definition: quda.h:105
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:658
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
QudaPrecision cpu_prec
Definition: dslash_test.cpp:34
void openMagma()
#define spinorSiteSize
QudaPrecision clover_cuda_prec_sloppy
Definition: quda.h:163
QudaFieldLocation input_location
Definition: quda.h:82
void freeGaugeQuda(void)
void ax(double a, void *x, int len, QudaPrecision precision)
double reliable_delta
Definition: quda.h:108
QudaSolutionType solution_type
Definition: quda.h:142
QudaSolverNormalization solver_normalization
Definition: quda.h:147
QudaPrecision clover_cuda_prec
Definition: quda.h:162
int precondition_cycle
Definition: quda.h:222
#define MAX(a, b)
void initQuda(int device)
void dw_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
double spinorGiB
Definition: quda.h:180
int device
Definition: test_util.cpp:1546
QudaFieldLocation output_location
Definition: quda.h:83
QudaPrecision clover_cuda_prec_precondition
Definition: quda.h:164
bool tune
Definition: test_util.cpp:1562
double m5
Definition: quda.h:91
int Lsdim
Definition: test_util.cpp:1557
QudaPrecision cuda_prec_sloppy
Definition: quda.h:153
QudaVerbosity verbosity
Definition: quda.h:174
void setSpinorSiteSize(int n)
Definition: test_util.cpp:150
double tol_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:131
QudaInvertParam newQudaInvertParam(void)
double gflops
Definition: quda.h:182
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:724
QudaPrecision cuda_prec_precondition
Definition: quda.h:48
QudaCloverFieldOrder clover_order
Definition: quda.h:166
double tol_hq
Definition: quda.h:104
double true_res_hq
Definition: quda.h:106
QudaGammaBasis gamma_basis
Definition: quda.h:158
QudaReconstructType link_recon
Definition: test_util.cpp:1549
QudaPrecision cuda_prec_sloppy
Definition: quda.h:45
int max_search_dim
Definition: quda.h:242
double tol_precondition
Definition: quda.h:213
void mxpy(void *x, void *y, int len, QudaPrecision precision)
double offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:128
void incrementalEigQuda(void *_h_x, void *_h_b, QudaInvertParam *param, void *_h_u, double *inv_eigenvals, int last_rhs)
int use_sloppy_partial_accumulator
Definition: quda.h:109
QudaReconstructType reconstruct
Definition: quda.h:43
void read_gauge_field(char *filename, void *gauge[], QudaPrecision precision, int *X, int argc, char *argv[])
Definition: gauge_qio.cpp:86
QudaPrecision cuda_prec
Definition: quda.h:42
int X[4]
Definition: quda.h:29
double mass
Definition: quda.h:88
int gcrNkrylov
Definition: quda.h:192
double norm_2(void *v, int len, QudaPrecision precision)
QudaPrecision cuda_prec
Definition: dslash_test.cpp:35
void construct_clover_field(void *clover, double norm, double diag, QudaPrecision precision)
Definition: test_util.cpp:1103
void closeMagma()
QudaInvertParam inv_param
Definition: dslash_test.cpp:38
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
double gaugeGiB
Definition: quda.h:60
QudaPrecision cuda_prec_precondition
Definition: quda.h:154
void * memset(void *s, int c, size_t n)
int deflation_grid
Definition: quda.h:246
double tol_restart
Definition: quda.h:103
if(x2 >=X2) return
void display_test_info()
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
cpuColorSpinorField * spinorOut
Definition: dslash_test.cpp:40
#define printfQuda(...)
Definition: util_quda.h:67
QudaTboundary t_boundary
Definition: quda.h:38
QudaTwistFlavorType twist_flavor
Definition: quda.h:100
int main(int argc, char **argv)
enum QudaDslashType_s QudaDslashType
void usage(char **)
Definition: test_util.cpp:1584
QudaResidualType residual_type
Definition: quda.h:235
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
int num_offset
Definition: quda.h:123
double cloverGiB
Definition: quda.h:181
double epsilon
Definition: quda.h:98
double omega
Definition: quda.h:219
QudaPrecision prec_sloppy
Definition: test_util.cpp:1552
double mass
Definition: test_util.cpp:1569
QudaPrecision prec
Definition: test_util.cpp:1551
int xdim
Definition: test_util.cpp:1553
int tdim
Definition: test_util.cpp:1556
QudaDslashType dslash_type
Definition: test_util.cpp:1560
QudaPrecision clover_cpu_prec
Definition: quda.h:161
void initComms(int argc, char **argv, const int *commDims)
Definition: test_util.cpp:48
void * gauge[4]
Definition: su3_test.cpp:15
QudaMatPCType matpc_type
Definition: quda.h:144
int zdim
Definition: test_util.cpp:1555
double kappa5
Definition: dslash_test.cpp:32
QudaPrecision cpu_prec
Definition: quda.h:40
QudaGaugeParam newQudaGaugeParam(void)
QudaPreserveSource preserve_source
Definition: quda.h:149