QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
staggered_invert_test.cpp
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <time.h>
4 #include <math.h>
5 
6 #include <test_util.h>
7 #include <dslash_util.h>
8 #include <blas_reference.h>
10 #include <quda.h>
11 #include <string.h>
12 #include <face_quda.h>
13 #include "misc.h"
14 #include <gauge_field.h>
15 #include <blas_quda.h>
16 
17 #if defined(QMP_COMMS)
18 #include <qmp.h>
19 #elif defined(MPI_COMMS)
20 #include <mpi.h>
21 #endif
22 
23 #ifdef MULTI_GPU
24 #include <face_quda.h>
25 #endif
26 
27 #define MAX(a,b) ((a)>(b)?(a):(b))
28 #define mySpinorSiteSize 6
29 
30 extern void usage(char** argv);
31 void *qdp_fatlink[4];
32 void *qdp_longlink[4];
33 
34 void *fatlink;
35 void *longlink;
36 
37 #ifdef MULTI_GPU
38 void** ghost_fatlink, **ghost_longlink;
39 #endif
40 
41 extern int device;
42 extern bool tune;
43 
45 extern QudaPrecision prec;
47 
54 
57 
58 static double tol = 1e-7;
59 
60 extern int test_type;
61 extern int xdim;
62 extern int ydim;
63 extern int zdim;
64 extern int tdim;
65 extern int gridsize_from_cmdline[];
66 
67 // Dirac operator type
69 
71 extern double mass; // the mass of the Dirac operator
72 
73 static void end();
74 
75 template<typename Float>
77  for(int i = 0; i < Vh; i++) {
78  for (int s = 0; s < 1; s++) {
79  for (int m = 0; m < 3; m++) {
80  res[i*(1*3*2) + s*(3*2) + m*(2) + 0] = rand() / (Float)RAND_MAX;
81  res[i*(1*3*2) + s*(3*2) + m*(2) + 1] = rand() / (Float)RAND_MAX;
82  }
83  }
84  }
85 }
86 
87 
88 static void
90  int X1, int X2, int X3, int X4,
93  double mass, double tol, int maxiter, double reliable_delta,
94  double tadpole_coeff
95  )
96 {
97  gaugeParam->X[0] = X1;
98  gaugeParam->X[1] = X2;
99  gaugeParam->X[2] = X3;
100  gaugeParam->X[3] = X4;
101 
102  gaugeParam->cpu_prec = cpu_prec;
103  gaugeParam->cuda_prec = prec;
104  gaugeParam->reconstruct = link_recon;
105  gaugeParam->cuda_prec_sloppy = prec_sloppy;
107  gaugeParam->gauge_fix = QUDA_GAUGE_FIXED_NO;
108  gaugeParam->anisotropy = 1.0;
109  gaugeParam->tadpole_coeff = tadpole_coeff;
110  gaugeParam->scale = -1.0/(24.0*tadpole_coeff*tadpole_coeff);
111 
112  gaugeParam->t_boundary = QUDA_ANTI_PERIODIC_T;
113  gaugeParam->gauge_order = QUDA_MILC_GAUGE_ORDER;
114  gaugeParam->ga_pad = X1*X2*X3/2;
115 
116  inv_param->verbosity = QUDA_VERBOSE;
117  inv_param->mass = mass;
118 
119  // outer solver parameters
120  inv_param->inv_type = inv_type;
121  inv_param->tol = tol;
122  inv_param->tol_restart = 1e-3; //now theoretical background for this parameter...
123  inv_param->maxiter = 500000;
124  inv_param->reliable_delta = 1e-1;
125  inv_param->use_sloppy_partial_accumulator = false;
126  inv_param->pipeline = false;
127 
128 
129 
130 #if __COMPUTE_CAPABILITY__ >= 200
131  // require both L2 relative and heavy quark residual to determine convergence
133  inv_param->tol_hq = 1e-3; // specify a tolerance for the residual for heavy quark residual
134 #else
135  // Pre Fermi architecture only supports L2 relative residual norm
137 #endif
139 
140 
141 
142  inv_param->Nsteps = 2;
143 
144 
145  //inv_param->inv_type = QUDA_GCR_INVERTER;
146  //inv_param->gcrNkrylov = 10;
147 
148  // domain decomposition preconditioner parameters
150  inv_param->tol_precondition = 1e-1;
151  inv_param->maxiter_precondition = 10;
152  inv_param->verbosity_precondition = QUDA_SILENT;
154 
156  inv_param->solve_type = QUDA_NORMOP_PC_SOLVE;
157  inv_param->matpc_type = QUDA_MATPC_EVEN_EVEN;
158  inv_param->dagger = QUDA_DAG_NO;
160 
161  inv_param->cpu_prec = cpu_prec;
162  inv_param->cuda_prec = prec;
163  inv_param->cuda_prec_sloppy = prec_sloppy;
165  inv_param->gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; // this is meaningless, but must be thus set
166  inv_param->dirac_order = QUDA_DIRAC_ORDER;
167 
170  inv_param->dslash_type = dslash_type;
171 
172  inv_param->tune = tune ? QUDA_TUNE_YES : QUDA_TUNE_NO;
173  inv_param->sp_pad = X1*X2*X3/2;
175 
178 }
179 
180 
181  int
183 {
184  QudaGaugeParam gaugeParam = newQudaGaugeParam();
185  QudaInvertParam inv_param = newQudaInvertParam();
186 
187  set_params(&gaugeParam, &inv_param,
188  xdim, ydim, zdim, tdim,
189  cpu_prec, prec, prec_sloppy,
190  link_recon, link_recon_sloppy, mass, tol, 500, 1e-3,
191  0.8);
192 
193  // this must be before the FaceBuffer is created (this is because it allocates pinned memory - FIXME)
194  initQuda(device);
195 
196  setDims(gaugeParam.X);
198 
199  size_t gSize = (gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
200  for (int dir = 0; dir < 4; dir++) {
201  qdp_fatlink[dir] = malloc(V*gaugeSiteSize*gSize);
202  qdp_longlink[dir] = malloc(V*gaugeSiteSize*gSize);
203  }
204  fatlink = malloc(4*V*gaugeSiteSize*gSize);
205  longlink = malloc(4*V*gaugeSiteSize*gSize);
206 
208  &gaugeParam, dslash_type);
209 
210  const double cos_pi_3 = 0.5; // Cos(pi/3)
211  const double sin_pi_3 = sqrt(0.75); // Sin(pi/3)
212 
213  for(int dir=0; dir<4; ++dir){
214  for(int i=0; i<V; ++i){
215  for(int j=0; j<gaugeSiteSize; ++j){
216  if(gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION){
217  ((double*)qdp_fatlink[dir])[i*gaugeSiteSize + j] = 0.5*rand()/RAND_MAX;
218  if(link_recon != QUDA_RECONSTRUCT_8 && link_recon != QUDA_RECONSTRUCT_12){ // incorporate non-trivial phase into long links
219  if(j%2 == 0){
220  const double real = ((double*)qdp_longlink[dir])[i*gaugeSiteSize + j];
221  const double imag = ((double*)qdp_longlink[dir])[i*gaugeSiteSize + j + 1];
222  ((double*)qdp_longlink[dir])[i*gaugeSiteSize + j] = real*cos_pi_3 - imag*sin_pi_3;
223  ((double*)qdp_longlink[dir])[i*gaugeSiteSize + j + 1] = real*sin_pi_3 + imag*cos_pi_3;
224  }
225  }
226  ((double*)fatlink)[(i*4 + dir)*gaugeSiteSize + j] = ((double*)qdp_fatlink[dir])[i*gaugeSiteSize + j];
227  ((double*)longlink)[(i*4 + dir)*gaugeSiteSize + j] = ((double*)qdp_longlink[dir])[i*gaugeSiteSize + j];
228  }else{
229  ((float*)qdp_fatlink[dir])[i] = 0.5*rand()/RAND_MAX;
230  if(link_recon != QUDA_RECONSTRUCT_8 && link_recon != QUDA_RECONSTRUCT_12){ // incorporate non-trivial phase into long links
231  if(j%2 == 0){
232  const float real = ((float*)qdp_longlink[dir])[i*gaugeSiteSize + j];
233  const float imag = ((float*)qdp_longlink[dir])[i*gaugeSiteSize + j + 1];
234  ((float*)qdp_longlink[dir])[i*gaugeSiteSize + j] = real*cos_pi_3 - imag*sin_pi_3;
235  ((float*)qdp_longlink[dir])[i*gaugeSiteSize + j + 1] = real*sin_pi_3 + imag*cos_pi_3;
236  }
237  }
238  ((double*)fatlink)[(i*4 + dir)*gaugeSiteSize + j] = ((double*)qdp_fatlink[dir])[i*gaugeSiteSize + j];
239  ((float*)fatlink)[(i*4 + dir)*gaugeSiteSize + j] = ((float*)qdp_fatlink[dir])[i*gaugeSiteSize + j];
240  ((float*)longlink)[(i*4 + dir)*gaugeSiteSize + j] = ((float*)qdp_longlink[dir])[i*gaugeSiteSize + j];
241  }
242  }
243  }
244  }
245 
246 
248  csParam.nColor=3;
249  csParam.nSpin=1;
250  csParam.nDim=4;
251  for(int d = 0; d < 4; d++) {
252  csParam.x[d] = gaugeParam.X[d];
253  }
254  csParam.x[0] /= 2;
255 
256  csParam.precision = inv_param.cpu_prec;
257  csParam.pad = 0;
261  csParam.gammaBasis = inv_param.gamma_basis;
262  csParam.create = QUDA_ZERO_FIELD_CREATE;
263  in = new cpuColorSpinorField(csParam);
264  out = new cpuColorSpinorField(csParam);
265  ref = new cpuColorSpinorField(csParam);
266  tmp = new cpuColorSpinorField(csParam);
267 
268  if (inv_param.cpu_prec == QUDA_SINGLE_PRECISION){
269  constructSpinorField((float*)in->V());
270  }else{
271  constructSpinorField((double*)in->V());
272  }
273 
274 #ifdef MULTI_GPU
275  int tmp_value = MAX(ydim*zdim*tdim/2, xdim*zdim*tdim/2);
276  tmp_value = MAX(tmp_value, xdim*ydim*tdim/2);
277  tmp_value = MAX(tmp_value, xdim*ydim*zdim/2);
278 
279  int fat_pad = tmp_value;
280  int link_pad = 3*tmp_value;
281 
282  // FIXME: currently assume staggered is SU(3)
283  gaugeParam.type = dslash_type == QUDA_STAGGERED_DSLASH ?
285  gaugeParam.reconstruct = QUDA_RECONSTRUCT_NO;
286  GaugeFieldParam cpuFatParam(fatlink, gaugeParam);
287  cpuFat = new cpuGaugeField(cpuFatParam);
288  ghost_fatlink = (void**)cpuFat->Ghost();
289 
290  gaugeParam.type = QUDA_ASQTAD_LONG_LINKS;
291  GaugeFieldParam cpuLongParam(longlink, gaugeParam);
292  cpuLong = new cpuGaugeField(cpuLongParam);
293  ghost_longlink = (void**)cpuLong->Ghost();
294 
295  gaugeParam.type = dslash_type == QUDA_STAGGERED_DSLASH ?
297  gaugeParam.ga_pad = fat_pad;
298  gaugeParam.reconstruct= gaugeParam.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
300  loadGaugeQuda(fatlink, &gaugeParam);
301 
303  gaugeParam.type = QUDA_ASQTAD_LONG_LINKS;
304  gaugeParam.ga_pad = link_pad;
305  gaugeParam.reconstruct= link_recon;
307  loadGaugeQuda(longlink, &gaugeParam);
308  }
309 #else
310  gaugeParam.type = QUDA_ASQTAD_FAT_LINKS;
311  gaugeParam.reconstruct = gaugeParam.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
313  loadGaugeQuda(fatlink, &gaugeParam);
314 
316  gaugeParam.type = QUDA_ASQTAD_LONG_LINKS;
317  gaugeParam.reconstruct = link_recon;
319  loadGaugeQuda(longlink, &gaugeParam);
320  }
321 #endif
322 
323  double time0 = -((double)clock()); // Start the timer
324 
325  double nrm2=0;
326  double src2=0;
327  int ret = 0;
328 
329 
330 
331  switch(test_type){
332  case 0: //even
334  inv_param.inv_type = QUDA_GCR_INVERTER;
335  inv_param.gcrNkrylov = 50;
336  }else if(inv_type == QUDA_PCG_INVERTER){
337  inv_param.inv_type = QUDA_PCG_INVERTER;
338  }
339  inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
340 
341  invertQuda(out->V(), in->V(), &inv_param);
342 
343  time0 += clock();
344  time0 /= CLOCKS_PER_SEC;
345 
346 
347 
348 #ifdef MULTI_GPU
349  matdagmat_mg4dir(ref, qdp_fatlink, qdp_longlink, ghost_fatlink, ghost_longlink,
350  out, mass, 0, inv_param.cpu_prec, gaugeParam.cpu_prec, tmp, QUDA_EVEN_PARITY);
351 #else
352  matdagmat(ref->V(), qdp_fatlink, qdp_longlink, out->V(), mass, 0, inv_param.cpu_prec, gaugeParam.cpu_prec, tmp->V(), QUDA_EVEN_PARITY);
353 #endif
354 
355  mxpy(in->V(), ref->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
356  nrm2 = norm_2(ref->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
357  src2 = norm_2(in->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
358 
359  break;
360 
361  case 1: //odd
363  inv_param.inv_type = QUDA_GCR_INVERTER;
364  inv_param.gcrNkrylov = 50;
365  }else if(inv_type == QUDA_PCG_INVERTER){
366  inv_param.inv_type = QUDA_PCG_INVERTER;
367  }
368 
369  inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
370  invertQuda(out->V(), in->V(), &inv_param);
371  time0 += clock(); // stop the timer
372  time0 /= CLOCKS_PER_SEC;
373 
374 #ifdef MULTI_GPU
375  matdagmat_mg4dir(ref, qdp_fatlink, qdp_longlink, ghost_fatlink, ghost_longlink,
376  out, mass, 0, inv_param.cpu_prec, gaugeParam.cpu_prec, tmp, QUDA_ODD_PARITY);
377 #else
378  matdagmat(ref->V(), qdp_fatlink, qdp_longlink, out->V(), mass, 0, inv_param.cpu_prec, gaugeParam.cpu_prec, tmp->V(), QUDA_ODD_PARITY);
379 #endif
380  mxpy(in->V(), ref->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
381  nrm2 = norm_2(ref->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
382  src2 = norm_2(in->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
383 
384  break;
385 
386  case 2: //full spinor
387 
388  errorQuda("full spinor not supported\n");
389  break;
390 
391  case 3: //multi mass CG, even
392  case 4:
393 
394 #define NUM_OFFSETS 12
395 
396  {
397  double masses[NUM_OFFSETS] ={0.002, 0.0021, 0.0064, 0.070, 0.077, 0.081, 0.1, 0.11, 0.12, 0.13, 0.14, 0.205};
398  inv_param.num_offset = NUM_OFFSETS;
399  // these can be set independently
400  for (int i=0; i<inv_param.num_offset; i++) {
401  inv_param.tol_offset[i] = inv_param.tol;
402  inv_param.tol_hq_offset[i] = inv_param.tol_hq;
403  }
404  void* outArray[NUM_OFFSETS];
405  int len;
406 
407  cpuColorSpinorField* spinorOutArray[NUM_OFFSETS];
408  spinorOutArray[0] = out;
409  for(int i=1;i < inv_param.num_offset; i++){
410  spinorOutArray[i] = new cpuColorSpinorField(csParam);
411  }
412 
413  for(int i=0;i < inv_param.num_offset; i++){
414  outArray[i] = spinorOutArray[i]->V();
415  inv_param.offset[i] = 4*masses[i]*masses[i];
416  }
417 
418  len=Vh;
419 
420  if (test_type == 3) {
421  inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
422  } else {
423  inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
424  }
425 
426  invertMultiShiftQuda(outArray, in->V(), &inv_param);
427 
428  cudaDeviceSynchronize();
429  time0 += clock(); // stop the timer
430  time0 /= CLOCKS_PER_SEC;
431 
432  printfQuda("done: total time = %g secs, compute time = %g, %i iter / %g secs = %g gflops\n",
433  time0, inv_param.secs, inv_param.iter, inv_param.secs,
434  inv_param.gflops/inv_param.secs);
435 
436 
437  printfQuda("checking the solution\n");
439  if (inv_param.solve_type == QUDA_NORMOP_SOLVE){
440  //parity = QUDA_EVENODD_PARITY;
441  errorQuda("full parity not supported\n");
442  }else if (inv_param.matpc_type == QUDA_MATPC_EVEN_EVEN){
443  parity = QUDA_EVEN_PARITY;
444  }else if (inv_param.matpc_type == QUDA_MATPC_ODD_ODD){
445  parity = QUDA_ODD_PARITY;
446  }else{
447  errorQuda("ERROR: invalid spinor parity \n");
448  exit(1);
449  }
450  for(int i=0;i < inv_param.num_offset;i++){
451  printfQuda("%dth solution: mass=%f, ", i, masses[i]);
452 #ifdef MULTI_GPU
453  matdagmat_mg4dir(ref, qdp_fatlink, qdp_longlink, ghost_fatlink, ghost_longlink,
454  spinorOutArray[i], masses[i], 0, inv_param.cpu_prec,
455  gaugeParam.cpu_prec, tmp, parity);
456 #else
457  matdagmat(ref->V(), qdp_fatlink, qdp_longlink, outArray[i], masses[i], 0, inv_param.cpu_prec, gaugeParam.cpu_prec, tmp->V(), parity);
458 #endif
459  mxpy(in->V(), ref->V(), len*mySpinorSiteSize, inv_param.cpu_prec);
460  double nrm2 = norm_2(ref->V(), len*mySpinorSiteSize, inv_param.cpu_prec);
461  double src2 = norm_2(in->V(), len*mySpinorSiteSize, inv_param.cpu_prec);
462  double hqr = sqrt(HeavyQuarkResidualNormCpu(*spinorOutArray[i], *ref).z);
463  double l2r = sqrt(nrm2/src2);
464 
465  printfQuda("Shift %d residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g, host = %g\n",
466  i, inv_param.tol_offset[i], inv_param.true_res_offset[i], l2r,
467  inv_param.tol_hq_offset[i], inv_param.true_res_hq_offset[i], hqr);
468 
469  //emperical, if the cpu residue is more than 1 order the target accuracy, the it fails to converge
470  if (sqrt(nrm2/src2) > 10*inv_param.tol_offset[i]){
471  ret |=1;
472  }
473  }
474 
475  for(int i=1; i < inv_param.num_offset;i++) delete spinorOutArray[i];
476  }
477  break;
478 
479  default:
480  errorQuda("Unsupported test type");
481 
482  }//switch
483 
484  if (test_type <=2){
485 
486  double hqr = sqrt(HeavyQuarkResidualNormCpu(*out, *ref).z);
487  double l2r = sqrt(nrm2/src2);
488 
489  printfQuda("Residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g, host = %g\n",
490  inv_param.tol, inv_param.true_res, l2r, inv_param.tol_hq, inv_param.true_res_hq, hqr);
491 
492  printfQuda("done: total time = %g secs, compute time = %g secs, %i iter / %g secs = %g gflops, \n",
493  time0, inv_param.secs, inv_param.iter, inv_param.secs,
494  inv_param.gflops/inv_param.secs);
495  }
496 
497  end();
498  return ret;
499 }
500 
501 
502 
503  static void
504 end(void)
505 {
506  for(int i=0;i < 4;i++){
507  free(qdp_fatlink[i]);
508  free(qdp_longlink[i]);
509  }
510 
511  free(fatlink);
512  free(longlink);
513 
514  delete in;
515  delete out;
516  delete ref;
517  delete tmp;
518 
519  if (cpuFat) delete cpuFat;
520  if (cpuLong) delete cpuLong;
521 
522  endQuda();
523 }
524 
525 
526  void
528 {
529  printfQuda("running the following test:\n");
530 
531  printfQuda("prec sloppy_prec link_recon sloppy_link_recon test_type S_dimension T_dimension\n");
532  printfQuda("%s %s %s %s %s %d/%d/%d %d \n",
533  get_prec_str(prec),get_prec_str(prec_sloppy),
534  get_recon_str(link_recon),
535  get_recon_str(link_recon_sloppy), get_test_type(test_type), xdim, ydim, zdim, tdim);
536 
537  printfQuda("Grid partition info: X Y Z T\n");
538  printfQuda(" %d %d %d %d\n",
539  dimPartitioned(0),
540  dimPartitioned(1),
541  dimPartitioned(2),
542  dimPartitioned(3));
543 
544  return ;
545 
546 }
547 
548  void
549 usage_extra(char** argv )
550 {
551  printfQuda("Extra options:\n");
552  printfQuda(" --tol <resid_tol> # Set residual tolerance\n");
553  printfQuda(" --test <0/1> # Test method\n");
554  printfQuda(" 0: Even even spinor CG inverter\n");
555  printfQuda(" 1: Odd odd spinor CG inverter\n");
556  printfQuda(" 3: Even even spinor multishift CG inverter\n");
557  printfQuda(" 4: Odd odd spinor multishift CG inverter\n");
558  printfQuda(" --cpu_prec <double/single/half> # Set CPU precision\n");
559 
560  return ;
561 }
562 int main(int argc, char** argv)
563 {
564  for (int i = 1; i < argc; i++) {
565 
566  if(process_command_line_option(argc, argv, &i) == 0){
567  continue;
568  }
569 
570  if( strcmp(argv[i], "--tol") == 0){
571  float tmpf;
572  if (i+1 >= argc){
573  usage(argv);
574  }
575  sscanf(argv[i+1], "%f", &tmpf);
576  if (tmpf <= 0){
577  printf("ERROR: invalid tol(%f)\n", tmpf);
578  usage(argv);
579  }
580  tol = tmpf;
581  i++;
582  continue;
583  }
584 
585  if( strcmp(argv[i], "--cpu_prec") == 0){
586  if (i+1 >= argc){
587  usage(argv);
588  }
589  cpu_prec= get_prec(argv[i+1]);
590  i++;
591  continue;
592  }
593 
594  printf("ERROR: Invalid option:%s\n", argv[i]);
595  usage(argv);
596  }
597 
598  if (prec_sloppy == QUDA_INVALID_PRECISION){
599  prec_sloppy = prec;
600  }
601  if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID){
602  link_recon_sloppy = link_recon;
603  }
604 
605  if(inv_type != QUDA_CG_INVERTER){
606  if(test_type != 0 && test_type != 1) errorQuda("Preconditioning is currently not supported in multi-shift solver solvers");
607  }
608 
609 
610  // initialize QMP/MPI, QUDA comms grid and RNG (test_util.cpp)
611  initComms(argc, argv, gridsize_from_cmdline);
612 
614 
615  printfQuda("dslash_type = %d\n", dslash_type);
616 
617  int ret = invert_test();
618 
619  // finalize the communications layer
620  finalizeComms();
621 
622  return ret;
623 }
int maxiter_precondition
Definition: quda.h:216
int zdim
Definition: test_util.cpp:1555
double secs
Definition: quda.h:183
int dimPartitioned(int dim)
Definition: test_util.cpp:1577
QudaDiracFieldOrder dirac_order
Definition: quda.h:156
QudaMassNormalization mass_normalization
Definition: quda.h:146
double tol_hq_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:134
QudaReconstructType reconstruct_sloppy
Definition: quda.h:46
double anisotropy
Definition: quda.h:31
__constant__ int Vh
void invertMultiShiftQuda(void **_hp_x, void *_hp_b, QudaInvertParam *param)
void endQuda(void)
const void ** Ghost() const
Definition: gauge_field.h:209
QudaSolveType solve_type
Definition: quda.h:143
QudaVerbosity verbosity_precondition
Definition: quda.h:210
__constant__ int X2
enum QudaPrecision_s QudaPrecision
int V
Definition: test_util.cpp:29
void matdagmat_mg4dir(cpuColorSpinorField *out, void **fatlink, void **longlink, void **ghost_fatlink, void **ghost_longlink, cpuColorSpinorField *in, double mass, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision, cpuColorSpinorField *tmp, QudaParity parity)
int ga_pad
Definition: quda.h:53
QudaPrecision prec_sloppy
Definition: test_util.cpp:1552
QudaGaugeFixed gauge_fix
Definition: quda.h:51
QudaTune tune
Definition: quda.h:185
enum QudaResidualType_s QudaResidualType
QudaInverterType inv_type_precondition
Definition: quda.h:203
QudaLinkType type
Definition: quda.h:35
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
void usage_extra(char **argv)
#define errorQuda(...)
Definition: util_quda.h:73
double tol
Definition: quda.h:102
QudaDslashType dslash_type
Definition: quda.h:85
QudaInverterType inv_type
Definition: quda.h:86
QudaGaugeParam gaugeParam
QudaPrecision cuda_prec
Definition: quda.h:152
int ydim
Definition: test_util.cpp:1554
QudaReconstructType link_recon_sloppy
Definition: test_util.cpp:1550
void setDims(int *)
Definition: test_util.cpp:88
__constant__ int X1
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:105
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
int device
Definition: test_util.cpp:1546
QudaPrecision cpu_prec
Definition: quda.h:151
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:1635
void constructSpinorField(Float *res)
QudaPrecision precision
Definition: lattice_field.h:41
#define gaugeSiteSize
QudaDagType dagger
Definition: quda.h:145
void finalizeComms()
Definition: test_util.cpp:65
QudaReconstructType link_recon
Definition: test_util.cpp:1549
QudaGaugeFieldOrder gauge_order
Definition: quda.h:36
double true_res
Definition: quda.h:105
int test_type
Definition: test_util.cpp:1564
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:658
void * qdp_longlink[4]
int tdim
Definition: test_util.cpp:1556
QudaInverterType inv_type
Definition: test_util.cpp:1565
QudaSiteSubset siteSubset
Definition: lattice_field.h:42
const char * get_test_type(int t)
Definition: misc.cpp:752
QudaFieldLocation input_location
Definition: quda.h:82
double true_res_hq_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:140
double reliable_delta
Definition: quda.h:108
QudaUseInitGuess use_init_guess
Definition: quda.h:167
QudaSolutionType solution_type
Definition: quda.h:142
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:38
double scale
Definition: quda.h:33
void initQuda(int device)
QudaPrecision prec
Definition: test_util.cpp:1551
QudaFieldLocation output_location
Definition: quda.h:83
QudaPrecision cpu_prec
void * qdp_fatlink[4]
QudaPrecision cuda_prec_sloppy
Definition: quda.h:153
FloatingPoint< float > Float
Definition: gtest.h:7350
QudaVerbosity verbosity
Definition: quda.h:174
void setSpinorSiteSize(int n)
Definition: test_util.cpp:150
ColorSpinorParam csParam
Definition: pack_test.cpp:24
double tol_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:131
double true_res_offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:137
cpuColorSpinorField * in
QudaInvertParam newQudaInvertParam(void)
double gflops
Definition: quda.h:182
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:724
cpuColorSpinorField * tmp
QudaPrecision cuda_prec_precondition
Definition: quda.h:48
#define mySpinorSiteSize
double tol_hq
Definition: quda.h:104
int xdim
Definition: test_util.cpp:1553
double true_res_hq
Definition: quda.h:106
QudaGammaBasis gamma_basis
Definition: quda.h:158
QudaPrecision cuda_prec_sloppy
Definition: quda.h:45
double tol_precondition
Definition: quda.h:213
cpuGaugeField * cpuLong
void mxpy(void *x, void *y, int len, QudaPrecision precision)
double offset[QUDA_MAX_MULTI_SHIFT]
Definition: quda.h:128
int use_sloppy_partial_accumulator
Definition: quda.h:109
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
Definition: quda.h:43
QudaPrecision cuda_prec
Definition: quda.h:42
int X[4]
Definition: quda.h:29
double mass
Definition: quda.h:88
int gcrNkrylov
Definition: quda.h:192
#define NUM_OFFSETS
double mass
Definition: test_util.cpp:1569
void * longlink
double norm_2(void *v, int len, QudaPrecision precision)
int main(int argc, char **argv)
void construct_fat_long_gauge_field(void **fatlink, void **longlink, int type, QudaPrecision precision, QudaGaugeParam *param, QudaDslashType dslash_type)
Definition: test_util.cpp:1018
cpuGaugeField * cpuFat
void display_test_info()
QudaInvertParam inv_param
Definition: dslash_test.cpp:38
QudaDslashType dslash_type
Definition: test_util.cpp:1560
double tadpole_coeff
Definition: quda.h:32
cpuColorSpinorField * out
QudaPrecision cuda_prec_precondition
Definition: quda.h:154
cpuColorSpinorField * ref
QudaPrecision get_prec(QIO_Reader *infile)
Definition: gauge_qio.cpp:38
double tol_restart
Definition: quda.h:103
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
#define MAX(a, b)
__constant__ int X3
#define printfQuda(...)
Definition: util_quda.h:67
QudaTboundary t_boundary
Definition: quda.h:38
enum QudaDslashType_s QudaDslashType
void matdagmat(void *out, void **gauge, void *in, double kappa, QudaPrecision sPrecision, QudaPrecision gPrecision, double mferm)
QudaResidualType residual_type
Definition: quda.h:235
int gridsize_from_cmdline[]
Definition: test_util.cpp:1559
int num_offset
Definition: quda.h:123
double3 HeavyQuarkResidualNormCpu(cpuColorSpinorField &x, cpuColorSpinorField &r)
Definition: blas_cpu.cpp:331
VOLATILE spinorFloat * s
void * fatlink
int invert_test(void)
void initComms(int argc, char **argv, const int *commDims)
Definition: test_util.cpp:48
const QudaParity parity
Definition: dslash_test.cpp:29
QudaMatPCType matpc_type
Definition: quda.h:144
bool tune
Definition: test_util.cpp:1562
void usage(char **argv)
Definition: test_util.cpp:1584
enum QudaInverterType_s QudaInverterType
QudaPrecision cpu_prec
Definition: quda.h:40
void end()
__constant__ int X4
QudaGaugeParam newQudaGaugeParam(void)
QudaPreserveSource preserve_source
Definition: quda.h:149