QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
dslash_ctest.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 
6 #include <quda.h>
7 #include <quda_internal.h>
8 #include <dirac_quda.h>
9 #include <dslash_quda.h>
10 #include <invert_quda.h>
11 #include <util_quda.h>
12 #include <blas_quda.h>
13 
14 #include <test_util.h>
15 #include <dslash_util.h>
18 #include "misc.h"
19 
20 #include <qio_field.h>
21 // google test frame work
22 #include <gtest/gtest.h>
23 
24 #define MAX(a,b) ((a)>(b)?(a):(b))
25 
26 using namespace quda;
27 
28 const QudaParity parity = QUDA_EVEN_PARITY; // even or odd?
29 const int transfer = 0; // include transfer time in the benchmark?
30 
31 double kappa5;
32 
35 
38 
41 
43 
44 Dirac *dirac = NULL;
45 DiracMobiusPC *dirac_mdwf = NULL; // create the MDWF Dirac operator
46 DiracDomainWall4DPC *dirac_4dpc = NULL; // create the 4d preconditioned DWF Dirac operator
47 
48 // What test are we doing (0 = dslash, 1 = MatPC, 2 = Mat, 3 = MatPCDagMatPC, 4 = MatDagMat)
49 extern int test_type;
50 
51 // Dirac operator type
53 
54 // Twisted mass flavor type
57 
58 extern int device;
59 extern int xdim;
60 extern int ydim;
61 extern int zdim;
62 extern int tdim;
63 extern int Lsdim;
64 extern int gridsize_from_cmdline[];
65 extern QudaDagType dagger;
67 
68 extern bool compute_clover;
69 extern double clover_coeff;
70 
71 extern bool verify_results;
72 extern int niter;
73 extern char latfile[];
74 
75 extern double mass; // mass of Dirac operator
76 extern double mu;
77 extern double epsilon;
78 extern void usage(char**);
79 
81 
82 const char *prec_str[] = {"quarter", "half", "single", "double"};
83 const char *recon_str[] = {"r18", "r12", "r8"};
84 
85 // For googletest names must be non-empty, unique, and may only contain ASCII
86 // alphanumeric characters or underscore
87 
89 {
90  switch (prec) {
91  case QUDA_QUARTER_PRECISION: return 1e-1;
92  case QUDA_HALF_PRECISION: return 1e-3;
93  case QUDA_SINGLE_PRECISION: return 1e-4;
94  case QUDA_DOUBLE_PRECISION: return 1e-11;
95  case QUDA_INVALID_PRECISION: return 1.0;
96  }
97  return 1.0;
98 }
99 
100 void init(int precision, QudaReconstructType link_recon) {
101 
102  printfQuda("%s\n", __func__);
103  cuda_prec = getPrecision(precision);
104 
105  gauge_param = newQudaGaugeParam();
106  inv_param = newQudaInvertParam();
107 
108  gauge_param.X[0] = xdim;
109  gauge_param.X[1] = ydim;
110  gauge_param.X[2] = zdim;
111  gauge_param.X[3] = tdim;
112 
114  errorQuda("Asqtad not supported. Please try staggered_dslash_test instead");
117  dw_setDims(gauge_param.X, Lsdim);
118  } else {
119  setDims(gauge_param.X);
120  Ls = 1;
121  }
122 
123  setSpinorSiteSize(24);
124 
125  gauge_param.anisotropy = 1.0;
126 
127  gauge_param.type = QUDA_WILSON_LINKS;
128  gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
129  gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
130 
131  gauge_param.cpu_prec = cpu_prec;
132  gauge_param.cuda_prec = cuda_prec;
133  gauge_param.reconstruct = link_recon;
134  gauge_param.reconstruct_sloppy = link_recon;
135  gauge_param.cuda_prec_sloppy = cuda_prec;
136  gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
137 
138  inv_param.kappa = 0.1;
139 
141  inv_param.epsilon = epsilon;
142  inv_param.twist_flavor = twist_flavor;
144  inv_param.m5 = -1.5;
145  kappa5 = 0.5/(5 + inv_param.m5);
146  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
147  inv_param.m5 = -1.5;
148  kappa5 = 0.5/(5 + inv_param.m5);
149  for(int k = 0; k < Lsdim; k++)
150  {
151  // b5[k], c[k] values are chosen for arbitrary values,
152  // but the difference of them are same as 1.0
153  inv_param.b_5[k] = 1.50;
154  inv_param.c_5[k] = 0.50;
155  }
156  }
157 
158  inv_param.mu = mu;
159  inv_param.mass = mass;
160  inv_param.Ls = (inv_param.twist_flavor != QUDA_TWIST_NONDEG_DOUBLET) ? Ls : 2;
161 
163  inv_param.matpc_type = matpc_type;
164  inv_param.dagger = dagger;
165  not_dagger = (QudaDagType)((dagger + 1)%2);
166 
167  inv_param.cpu_prec = cpu_prec;
168  if (inv_param.cpu_prec != gauge_param.cpu_prec) {
169  errorQuda("Gauge and spinor CPU precisions must match");
170  }
171  inv_param.cuda_prec = cuda_prec;
172 
175 
176 #ifndef MULTI_GPU // free parameter for single GPU
177  gauge_param.ga_pad = 0;
178 #else // must be this one c/b face for multi gpu
179  int x_face_size = gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2;
180  int y_face_size = gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2;
181  int z_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2;
182  int t_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2;
183  int pad_size =MAX(x_face_size, y_face_size);
184  pad_size = MAX(pad_size, z_face_size);
185  pad_size = MAX(pad_size, t_face_size);
186  gauge_param.ga_pad = pad_size;
187 #endif
188  inv_param.sp_pad = 0;
189  inv_param.cl_pad = 0;
190 
191  //inv_param.sp_pad = xdim*ydim*zdim/2;
192  //inv_param.cl_pad = 24*24*24;
193 
194  inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; // test code only supports DeGrand-Rossi Basis
195  inv_param.dirac_order = QUDA_DIRAC_ORDER;
196 
198  switch(test_type) {
199  case 0:
200  case 1:
201  case 2:
202  case 3:
204  break;
205  case 4:
207  break;
208  default:
209  errorQuda("Test type %d not defined QUDA_DOMAIN_WALL_4D_DSLASH\n", test_type);
210  }
211  } else if(dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
212  switch(test_type) {
213  case 0:
214  case 1:
215  case 2:
216  case 3:
217  case 4:
219  break;
220  case 5:
222  break;
223  default:
224  errorQuda("Test type %d not defined on QUDA_MOBIUS_DWF_DSLASH\n", test_type);
225  }
226  }
227  else
228  {
229  switch(test_type) {
230  case 0:
231  case 1:
233  break;
234  case 2:
235  inv_param.solution_type = QUDA_MAT_SOLUTION;
236  break;
237  case 3:
239  break;
240  case 4:
242  break;
243  default:
244  errorQuda("Test type %d not defined\n", test_type);
245  }
246  }
247 
248  inv_param.dslash_type = dslash_type;
249 
251  inv_param.clover_cpu_prec = cpu_prec;
252  inv_param.clover_cuda_prec = cuda_prec;
253  inv_param.clover_cuda_prec_sloppy = inv_param.clover_cuda_prec;
257  inv_param.clover_coeff = clover_coeff;
258  hostClover = malloc((size_t)V*cloverSiteSize*inv_param.clover_cpu_prec);
259  hostCloverInv = malloc((size_t)V*cloverSiteSize*inv_param.clover_cpu_prec);
260  }
261 
262  // construct input fields
263  for (int dir = 0; dir < 4; dir++) hostGauge[dir] = malloc((size_t)V*gaugeSiteSize*gauge_param.cpu_prec);
264 
266 
267  csParam.nColor = 3;
268  csParam.nSpin = 4;
269  csParam.nDim = 4;
270  for (int d=0; d<4; d++) csParam.x[d] = gauge_param.X[d];
274  csParam.nDim = 5;
275  csParam.x[4] = Ls;
276  }
278  csParam.pc_type = QUDA_5D_PC;
279  } else {
280  csParam.pc_type = QUDA_4D_PC;
281  }
282 
283  //ndeg_tm
285  csParam.twistFlavor = inv_param.twist_flavor;
286  csParam.nDim = (inv_param.twist_flavor == QUDA_TWIST_SINGLET) ? 4 : 5;
287  csParam.x[4] = inv_param.Ls;
288  }
289 
290 
291  csParam.setPrecision(inv_param.cpu_prec);
292  csParam.pad = 0;
293 
296  csParam.x[0] /= 2;
297  } else {
298  if (test_type < 2 || test_type == 3) {
300  csParam.x[0] /= 2;
301  } else {
303  }
304  }
305 
308  csParam.gammaBasis = inv_param.gamma_basis;
309  csParam.create = QUDA_ZERO_FIELD_CREATE;
310 
311  spinor = new cpuColorSpinorField(csParam);
312  spinorOut = new cpuColorSpinorField(csParam);
313  spinorRef = new cpuColorSpinorField(csParam);
314  spinorTmp = new cpuColorSpinorField(csParam);
315 
316  csParam.x[0] = gauge_param.X[0];
317 
318  // printfQuda("Randomizing fields... ");
319 
320 
321  //FIXME
322  // if (strcmp(latfile,"")) { // load in the command line supplied gauge field
323  // read_gauge_field(latfile, hostGauge, gauge_param.cpu_prec, gauge_param.X, argc, argv);
324  // construct_gauge_field(hostGauge, 2, gauge_param.cpu_prec, &gauge_param);
325  // } else { // else generate a random SU(3) field
326  construct_gauge_field(hostGauge, 1, gauge_param.cpu_prec, &gauge_param);
327  // }
328 
329  spinor->Source(QUDA_RANDOM_SOURCE, 0);
330 
332  double norm = 0.1; // clover components are random numbers in the range (-norm, norm)
333  double diag = 1.0; // constant added to the diagonal
334  construct_clover_field(hostClover, norm, diag, inv_param.clover_cpu_prec);
335  memcpy(hostCloverInv, hostClover, (size_t)V*cloverSiteSize*inv_param.clover_cpu_prec);
336  }
337 
338  // printfQuda("done.\n"); fflush(stdout);
339 
340  // set verbosity prior to loadGaugeQuda
342  inv_param.verbosity = verbosity;
343 
344  // printfQuda("Sending gauge field to GPU\n");
345  loadGaugeQuda(hostGauge, &gauge_param);
346 
348  if (compute_clover) printfQuda("Computing clover field on GPU\n");
349  else printfQuda("Sending clover field to GPU\n");
350  inv_param.compute_clover = compute_clover;
351  inv_param.return_clover = compute_clover;
354  inv_param.return_clover_inverse = true;
355 
357  }
358 
359  if (!transfer) {
361  csParam.pad = inv_param.sp_pad;
362  csParam.setPrecision(inv_param.cuda_prec);
363  if (csParam.Precision() == QUDA_DOUBLE_PRECISION ) {
365  } else {
366  /* Single and half */
368  }
369 
372  csParam.x[0] /= 2;
373  } else {
374  if (test_type < 2 || test_type == 3) {
376  csParam.x[0] /= 2;
377  }
378  }
379 
380  // printfQuda("Creating cudaSpinor\n");
381  cudaSpinor = new cudaColorSpinorField(csParam);
382  // printfQuda("Creating cudaSpinorOut\n");
383  cudaSpinorOut = new cudaColorSpinorField(csParam);
384 
385  tmp1 = new cudaColorSpinorField(csParam);
386 
388  if (test_type == 2 || test_type == 4) csParam.x[0] /= 2;
389 
391  tmp2 = new cudaColorSpinorField(csParam);
392 
393  // printfQuda("Sending spinor field to GPU\n");
394  *cudaSpinor = *spinor;
395 
396  // double cpu_norm = blas::norm2(*spinor);
397  // double cuda_norm = blas::norm2(*cudaSpinor);
398  // printfQuda("Source: CPU = %e, CUDA = %e\n", cpu_norm, cuda_norm);
399 
400  bool pc;
402  pc = true;
403  else
404  pc = (test_type != 2 && test_type != 4);
405  DiracParam diracParam;
406  setDiracParam(diracParam, &inv_param, pc);
407  diracParam.tmp1 = tmp1;
408  diracParam.tmp2 = tmp2;
409 
411  dirac_4dpc = new DiracDomainWall4DPC(diracParam);
412  dirac = (Dirac*)dirac_4dpc;
413  }
414  else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH){
415  dirac_mdwf = new DiracMobiusPC(diracParam);
416  dirac = (Dirac*)dirac_mdwf;
417  }
418  else {
419  dirac = Dirac::create(diracParam);
420  }
421  } else {
422  // double cpu_norm = blas::norm2(*spinor);
423  // printfQuda("Source: CPU = %e\n", cpu_norm);
424  }
425 
426 }
427 
428 void end() {
429  printfQuda("%s\n", __func__);
430  if (!transfer) {
431  if(dirac != NULL)
432  {
433  delete dirac;
434  dirac = NULL;
435  }
436  delete cudaSpinor;
437  delete cudaSpinorOut;
438  delete tmp1;
439  delete tmp2;
440  }
441 
442  // release memory
443  delete spinor;
444  delete spinorOut;
445  delete spinorRef;
446  delete spinorTmp;
447 
448  freeGaugeQuda();
449 
450  for (int dir = 0; dir < 4; dir++) free(hostGauge[dir]);
452  free(hostClover);
453  free(hostCloverInv);
454  }
456 
457  }
458 
459  struct DslashTime {
460  double event_time;
461  double cpu_time;
462  double cpu_min;
463  double cpu_max;
464 
465  DslashTime() : event_time(0.0), cpu_time(0.0), cpu_min(DBL_MAX), cpu_max(0.0) {}
466  };
467 
468 // execute kernel
470 
471  DslashTime dslash_time;
472  timeval tstart, tstop;
473 
474  cudaEvent_t start, end;
475  cudaEventCreate(&start);
476  cudaEventCreate(&end);
477 
478  comm_barrier();
479  cudaEventRecord(start, 0);
480 
481  for (int i = 0; i < niter; i++) {
482 
483  gettimeofday(&tstart, NULL);
484 
486  switch (test_type) {
487  case 0:
488  if (transfer) {
489  dslashQuda_4dpc(spinorOut->V(), spinor->V(), &inv_param, parity, test_type);
490  } else {
491  dirac_4dpc->Dslash4(*cudaSpinorOut, *cudaSpinor, parity);
492  }
493  break;
494  case 1:
495  if (transfer) {
496  dslashQuda_4dpc(spinorOut->V(), spinor->V(), &inv_param, parity, test_type);
497  } else {
498  dirac_4dpc->Dslash5(*cudaSpinorOut, *cudaSpinor, parity);
499  }
500  break;
501  case 2:
502  if (transfer) {
503  dslashQuda_4dpc(spinorOut->V(), spinor->V(), &inv_param, parity, test_type);
504  } else {
505  dirac_4dpc->Dslash5inv(*cudaSpinorOut, *cudaSpinor, parity, kappa5);
506  }
507  break;
508  case 3:
509  if (transfer) {
510  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
511  } else {
512  dirac_4dpc->M(*cudaSpinorOut, *cudaSpinor);
513  }
514  break;
515  case 4:
516  if (transfer) {
517  MatDagMatQuda(spinorOut->V(), spinor->V(), &inv_param);
518  } else {
519  dirac_4dpc->MdagM(*cudaSpinorOut, *cudaSpinor);
520  }
521  break;
522  }
523  }
524  else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH){
525  switch (test_type) {
526  case 0:
527  if (transfer) {
528  dslashQuda_mdwf(spinorOut->V(), spinor->V(), &inv_param, parity, test_type);
529  } else {
530  dirac_mdwf->Dslash4(*cudaSpinorOut, *cudaSpinor, parity);
531  }
532  break;
533  case 1:
534  if (transfer) {
535  dslashQuda_mdwf(spinorOut->V(), spinor->V(), &inv_param, parity, test_type);
536  } else {
537  dirac_mdwf->Dslash5(*cudaSpinorOut, *cudaSpinor, parity);
538  }
539  break;
540  case 2:
541  if (transfer) {
542  dslashQuda_mdwf(spinorOut->V(), spinor->V(), &inv_param, parity, test_type);
543  } else {
544  dirac_mdwf->Dslash4pre(*cudaSpinorOut, *cudaSpinor, parity);
545  }
546  break;
547  case 3:
548  if (transfer) {
549  dslashQuda_mdwf(spinorOut->V(), spinor->V(), &inv_param, parity, test_type);
550  } else {
551  dirac_mdwf->Dslash5inv(*cudaSpinorOut, *cudaSpinor, parity);
552  }
553  break;
554  case 4:
555  if (transfer) {
556  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
557  } else {
558  dirac_mdwf->M(*cudaSpinorOut, *cudaSpinor);
559  }
560  break;
561  case 5:
562  if (transfer) {
563  MatDagMatQuda(spinorOut->V(), spinor->V(), &inv_param);
564  } else {
565  dirac_mdwf->MdagM(*cudaSpinorOut, *cudaSpinor);
566  }
567  break;
568  }
569  } else {
570  switch (test_type) {
571  case 0:
573  if (transfer) {
574  dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity);
575  } else {
576  dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
577  }
578  } else {
579  if (transfer) {
580  dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity);
581  } else {
582  dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
583  }
584  }
585  break;
586  case 1:
587  if (transfer) {
588  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
589  } else {
590  dirac->M(*cudaSpinorOut, *cudaSpinor);
591  }
592  break;
593  case 2:
594  if (transfer) {
595  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
596  } else {
597  dirac->M(*cudaSpinorOut, *cudaSpinor);
598  }
599  break;
600  case 3:
601  if (transfer) {
602  MatDagMatQuda(spinorOut->V(), spinor->V(), &inv_param);
603  } else {
604  dirac->MdagM(*cudaSpinorOut, *cudaSpinor);
605  }
606  break;
607  case 4:
608  if (transfer) {
609  MatDagMatQuda(spinorOut->V(), spinor->V(), &inv_param);
610  } else {
611  dirac->MdagM(*cudaSpinorOut, *cudaSpinor);
612  }
613  break;
614  }
615  }
616 
617  gettimeofday(&tstop, NULL);
618  long ds = tstop.tv_sec - tstart.tv_sec;
619  long dus = tstop.tv_usec - tstart.tv_usec;
620  double elapsed = ds + 0.000001*dus;
621 
622  dslash_time.cpu_time += elapsed;
623  // skip first and last iterations since they may skew these metrics if comms are not synchronous
624  if (i>0 && i<niter) {
625  if (elapsed < dslash_time.cpu_min) dslash_time.cpu_min = elapsed;
626  if (elapsed > dslash_time.cpu_max) dslash_time.cpu_max = elapsed;
627  }
628  }
629 
630  cudaEventRecord(end, 0);
631  cudaEventSynchronize(end);
632  float runTime;
633  cudaEventElapsedTime(&runTime, start, end);
634  cudaEventDestroy(start);
635  cudaEventDestroy(end);
636 
637  dslash_time.event_time = runTime / 1000;
638 
639  // check for errors
640  cudaError_t stat = cudaGetLastError();
641  if (stat != cudaSuccess)
642  printfQuda("with ERROR: %s\n", cudaGetErrorString(stat));
643 
644  return dslash_time;
645 }
646 
647 void dslashRef() {
648 
649  // compare to dslash reference implementation
650  // printfQuda("Calculating reference implementation...");
651  fflush(stdout);
652 
654  switch (test_type) {
655  case 0:
656  wil_dslash(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, inv_param.cpu_prec, gauge_param);
657  break;
658  case 1:
659  wil_matpc(spinorRef->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.matpc_type, dagger,
660  inv_param.cpu_prec, gauge_param);
661  break;
662  case 2:
663  wil_mat(spinorRef->V(), hostGauge, spinor->V(), inv_param.kappa, dagger, inv_param.cpu_prec, gauge_param);
664  break;
665  case 3:
666  wil_matpc(spinorTmp->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.matpc_type, dagger,
667  inv_param.cpu_prec, gauge_param);
668  wil_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), inv_param.kappa, inv_param.matpc_type, not_dagger,
669  inv_param.cpu_prec, gauge_param);
670  break;
671  case 4:
672  wil_mat(spinorTmp->V(), hostGauge, spinor->V(), inv_param.kappa, dagger, inv_param.cpu_prec, gauge_param);
673  wil_mat(spinorRef->V(), hostGauge, spinorTmp->V(), inv_param.kappa, not_dagger, inv_param.cpu_prec, gauge_param);
674  break;
675  default:
676  printfQuda("Test type not defined\n");
677  exit(-1);
678  }
679  } else if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
680  switch (test_type) {
681  case 0:
682  clover_dslash(spinorRef->V(), hostGauge, hostCloverInv, spinor->V(), parity, dagger, inv_param.cpu_prec, gauge_param);
683  break;
684  case 1:
685  clover_matpc(spinorRef->V(), hostGauge, hostClover, hostCloverInv, spinor->V(), inv_param.kappa, inv_param.matpc_type,
686  dagger, inv_param.cpu_prec, gauge_param);
687  break;
688  case 2:
689  clover_mat(spinorRef->V(), hostGauge, hostClover, spinor->V(), inv_param.kappa, dagger, inv_param.cpu_prec, gauge_param);
690  break;
691  case 3:
692  clover_matpc(spinorTmp->V(), hostGauge, hostClover, hostCloverInv, spinor->V(), inv_param.kappa, inv_param.matpc_type,
693  dagger, inv_param.cpu_prec, gauge_param);
694  clover_matpc(spinorRef->V(), hostGauge, hostClover, hostCloverInv, spinorTmp->V(), inv_param.kappa, inv_param.matpc_type,
695  not_dagger, inv_param.cpu_prec, gauge_param);
696  break;
697  case 4:
698  clover_mat(spinorTmp->V(), hostGauge, hostClover, spinor->V(), inv_param.kappa, dagger, inv_param.cpu_prec, gauge_param);
699  clover_mat(spinorRef->V(), hostGauge, hostClover, spinorTmp->V(), inv_param.kappa, not_dagger,
700  inv_param.cpu_prec, gauge_param);
701  break;
702  default:
703  printfQuda("Test type not defined\n");
704  exit(-1);
705  }
706  } else if (dslash_type == QUDA_TWISTED_MASS_DSLASH) {
707  switch (test_type) {
708  case 0:
709  if(inv_param.twist_flavor == QUDA_TWIST_SINGLET)
710  tm_dslash(spinorRef->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor, parity, inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
711  else
712  {
713  int tm_offset = 12*spinorRef->Volume();
714 
715  void *ref1 = spinorRef->V();
716  void *ref2 = (char*)ref1 + tm_offset*cpu_prec;
717 
718  void *flv1 = spinor->V();
719  void *flv2 = (char*)flv1 + tm_offset*cpu_prec;
720 
721  tm_ndeg_dslash(ref1, ref2, hostGauge, flv1, flv2, inv_param.kappa, inv_param.mu, inv_param.epsilon, parity,
722  dagger, inv_param.matpc_type, inv_param.cpu_prec, gauge_param);
723  }
724  break;
725  case 1:
726  if (inv_param.twist_flavor == QUDA_TWIST_SINGLET)
727  tm_matpc(spinorRef->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
728  inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
729  else {
730  int tm_offset = 12 * spinorRef->Volume();
731 
732  void *ref1 = spinorRef->V();
733  void *ref2 = (char *)ref1 + tm_offset * cpu_prec;
734 
735  void *flv1 = spinor->V();
736  void *flv2 = (char *)flv1 + tm_offset * cpu_prec;
737 
738  tm_ndeg_matpc(ref1, ref2, hostGauge, flv1, flv2, inv_param.kappa, inv_param.mu, inv_param.epsilon,
739  inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
740  }
741  break;
742  case 2:
743  if (inv_param.twist_flavor == QUDA_TWIST_SINGLET)
744  tm_mat(spinorRef->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor, dagger,
745  inv_param.cpu_prec, gauge_param);
746  else {
747  int tm_offset = 12 * spinorRef->Volume();
748 
749  void *evenOut = spinorRef->V();
750  void *oddOut = (char *)evenOut + tm_offset * cpu_prec;
751 
752  void *evenIn = spinor->V();
753  void *oddIn = (char *)evenIn + tm_offset * cpu_prec;
754 
755  tm_ndeg_mat(evenOut, oddOut, hostGauge, evenIn, oddIn, inv_param.kappa, inv_param.mu, inv_param.epsilon, dagger,
756  inv_param.cpu_prec, gauge_param);
757 }
758 break;
759  case 3:
760  if (inv_param.twist_flavor == QUDA_TWIST_SINGLET) {
761  tm_matpc(spinorTmp->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
762  inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
763  tm_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
764  inv_param.matpc_type, not_dagger, inv_param.cpu_prec, gauge_param);
765  } else {
766  int tm_offset = 12 * spinorRef->Volume();
767 
768  void *ref1 = spinorRef->V();
769  void *ref2 = (char *)ref1 + tm_offset * cpu_prec;
770 
771  void *flv1 = spinor->V();
772  void *flv2 = (char *)flv1 + tm_offset * cpu_prec;
773 
774  void *tmp1 = spinorTmp->V();
775  void *tmp2 = (char *)tmp1 + tm_offset * cpu_prec;
776 
777  tm_ndeg_matpc(tmp1, tmp2, hostGauge, flv1, flv2, inv_param.kappa, inv_param.mu, inv_param.epsilon,
778  inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
779  tm_ndeg_matpc(ref1, ref2, hostGauge, tmp1, tmp2, inv_param.kappa, inv_param.mu, inv_param.epsilon,
780  inv_param.matpc_type, not_dagger, inv_param.cpu_prec, gauge_param);
781  }
782  break;
783  case 4:
784  if (inv_param.twist_flavor == QUDA_TWIST_SINGLET) {
785  tm_mat(spinorTmp->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor, dagger,
786  inv_param.cpu_prec, gauge_param);
787  tm_mat(spinorRef->V(), hostGauge, spinorTmp->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
788  not_dagger, inv_param.cpu_prec, gauge_param);
789  } else {
790  int tm_offset = 12 * spinorRef->Volume();
791 
792  void *evenOut = spinorRef->V();
793  void *oddOut = (char *)evenOut + tm_offset * cpu_prec;
794 
795  void *evenIn = spinor->V();
796  void *oddIn = (char *)evenIn + tm_offset * cpu_prec;
797 
798  void *evenTmp = spinorTmp->V();
799  void *oddTmp = (char *)evenTmp + tm_offset * cpu_prec;
800 
801  tm_ndeg_mat(evenTmp, oddTmp, hostGauge, evenIn, oddIn, inv_param.kappa, inv_param.mu, inv_param.epsilon, dagger,
802  inv_param.cpu_prec, gauge_param);
803  tm_ndeg_mat(evenOut, oddOut, hostGauge, evenTmp, oddTmp, inv_param.kappa, inv_param.mu, inv_param.epsilon,
804  not_dagger, inv_param.cpu_prec, gauge_param);
805  }
806  break;
807  default: printfQuda("Test type not defined\n"); exit(-1);
808 }
810  switch (test_type) {
811  case 0:
812  if(inv_param.twist_flavor == QUDA_TWIST_SINGLET)
813  tmc_dslash(spinorRef->V(), hostGauge, spinor->V(), hostClover, hostCloverInv, inv_param.kappa, inv_param.mu, inv_param.twist_flavor, parity, inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
814  else
815  errorQuda("Not supported\n");
816  break;
817  case 1:
818  if (inv_param.twist_flavor == QUDA_TWIST_SINGLET)
819  tmc_matpc(spinorRef->V(), hostGauge, spinor->V(), hostClover, hostCloverInv, inv_param.kappa, inv_param.mu,
820  inv_param.twist_flavor, inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
821  else
822  errorQuda("Not supported\n");
823  break;
824 case 2:
825  if (inv_param.twist_flavor == QUDA_TWIST_SINGLET)
826  tmc_mat(spinorRef->V(), hostGauge, hostClover, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
827  dagger, inv_param.cpu_prec, gauge_param);
828  else
829  errorQuda("Not supported\n");
830  break;
831 case 3:
832  if (inv_param.twist_flavor == QUDA_TWIST_SINGLET) {
833  tmc_matpc(spinorTmp->V(), hostGauge, spinor->V(), hostClover, hostCloverInv, inv_param.kappa, inv_param.mu,
834  inv_param.twist_flavor, inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
835  tmc_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), hostClover, hostCloverInv, inv_param.kappa, inv_param.mu,
836  inv_param.twist_flavor, inv_param.matpc_type, not_dagger, inv_param.cpu_prec, gauge_param);
837  } else
838  errorQuda("Not supported\n");
839  break;
840 case 4:
841 if(inv_param.twist_flavor == QUDA_TWIST_SINGLET) {
842  tmc_mat(spinorTmp->V(), hostGauge, hostClover, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor, dagger, inv_param.cpu_prec, gauge_param);
843  tmc_mat(spinorRef->V(), hostGauge, hostClover, spinorTmp->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor, not_dagger, inv_param.cpu_prec, gauge_param);
844 } else
845 errorQuda("Not supported\n");
846 break;
847 default:
848 printfQuda("Test type not defined\n");
849 exit(-1);
850 }
851 } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH ){
852  switch (test_type) {
853  case 0:
854  dw_dslash(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
855  break;
856  case 1:
857  dw_matpc(spinorRef->V(), hostGauge, spinor->V(), kappa5, inv_param.matpc_type, dagger, gauge_param.cpu_prec,
858  gauge_param, inv_param.mass);
859  break;
860  case 2:
861  dw_mat(spinorRef->V(), hostGauge, spinor->V(), kappa5, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
862  break;
863  case 3:
864  dw_matpc(spinorTmp->V(), hostGauge, spinor->V(), kappa5, inv_param.matpc_type, dagger, gauge_param.cpu_prec,
865  gauge_param, inv_param.mass);
866  dw_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), kappa5, inv_param.matpc_type, not_dagger,
867  gauge_param.cpu_prec, gauge_param, inv_param.mass);
868  break;
869  case 4:
870  dw_matdagmat(spinorRef->V(), hostGauge, spinor->V(), kappa5, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
871  break;
872  default:
873  printf("Test type not supported for domain wall\n");
874  exit(-1);
875  }
877  double *kappa_5 = (double*)malloc(Ls*sizeof(double));
878  for(int xs = 0; xs < Ls ; xs++)
879  kappa_5[xs] = kappa5;
880  switch (test_type) {
881  case 0:
882  dslash_4_4d(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
883  break;
884  case 1:
885  dw_dslash_5_4d(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param,
886  inv_param.mass, true);
887  break;
888  case 2:
889  dslash_5_inv(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param,
890  inv_param.mass, kappa_5);
891  break;
892  case 3:
893  dw_4d_matpc(spinorRef->V(), hostGauge, spinor->V(), kappa5, inv_param.matpc_type, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
894  break;
895  case 4:
896  dw_4d_matpc(spinorTmp->V(), hostGauge, spinor->V(), kappa5, inv_param.matpc_type, dagger, gauge_param.cpu_prec,
897  gauge_param, inv_param.mass);
898  dw_4d_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), kappa5, inv_param.matpc_type, not_dagger,
899  gauge_param.cpu_prec, gauge_param, inv_param.mass);
900  break;
901  break;
902  default:
903  printf("Test type not supported for domain wall\n");
904  exit(-1);
905  }
906  free(kappa_5);
907 } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH){
908  double _Complex *kappa_b = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
909  double _Complex *kappa_c = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
910  double _Complex *kappa_5 = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
911  double _Complex *kappa_mdwf = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
912  for(int xs = 0 ; xs < Lsdim ; xs++)
913  {
914  kappa_b[xs] = 1.0/(2*(inv_param.b_5[xs]*(4.0 + inv_param.m5) + 1.0));
915  kappa_c[xs] = 1.0/(2*(inv_param.c_5[xs]*(4.0 + inv_param.m5) - 1.0));
916  kappa_5[xs] = 0.5*kappa_b[xs]/kappa_c[xs];
917  kappa_mdwf[xs] = -kappa_5[xs];
918  }
919  switch (test_type) {
920  case 0:
921  dslash_4_4d(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
922  break;
923  case 1:
924  mdw_dslash_5(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass, kappa_5, true);
925  break;
926  case 2:
927  mdw_dslash_4_pre(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass, inv_param.b_5, inv_param.c_5, true);
928  break;
929  case 3:
930  mdw_dslash_5_inv(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param,
931  inv_param.mass, kappa_mdwf);
932  break;
933  case 4:
934  mdw_matpc(spinorRef->V(), hostGauge, spinor->V(), kappa_b, kappa_c, inv_param.matpc_type, dagger,
935  gauge_param.cpu_prec, gauge_param, inv_param.mass, inv_param.b_5, inv_param.c_5);
936  break;
937  case 5:
938  mdw_matpc(spinorTmp->V(), hostGauge, spinor->V(), kappa_b, kappa_c, inv_param.matpc_type, dagger,
939  gauge_param.cpu_prec, gauge_param, inv_param.mass, inv_param.b_5, inv_param.c_5);
940  mdw_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), kappa_b, kappa_c, inv_param.matpc_type, not_dagger,
941  gauge_param.cpu_prec, gauge_param, inv_param.mass, inv_param.b_5, inv_param.c_5);
942  break;
943  break;
944  default:
945  printf("Test type not supported for domain wall\n");
946  exit(-1);
947  }
948  free(kappa_b);
949  free(kappa_c);
950  free(kappa_5);
951  free(kappa_mdwf);
952 } else {
953  printfQuda("Unsupported dslash_type\n");
954  exit(-1);
955 }
956 
957 // printfQuda("done.\n");
958 }
959 
960 
962 {
963  auto prec = getPrecision(precision);
964  // printfQuda("running the following test:\n");
965 
966  printfQuda("prec recon test_type matpc_type dagger S_dim T_dimension Ls_dimension dslash_type niter\n");
967  printfQuda("%6s %2s %d %12s %d %3d/%3d/%3d %3d %2d %14s %d\n",
970  // printfQuda("Grid partition info: X Y Z T\n");
971  // printfQuda(" %d %d %d %d\n",
972  // dimPartitioned(0),
973  // dimPartitioned(1),
974  // dimPartitioned(2),
975  // dimPartitioned(3));
976 
977  return ;
978 
979 }
980 
981 
982 
983 using ::testing::TestWithParam;
984 using ::testing::Bool;
985 using ::testing::Values;
986 using ::testing::Range;
987 using ::testing::Combine;
988 
989 class DslashTest : public ::testing::TestWithParam<::testing::tuple<int, int, int>> {
990 protected:
991  ::testing::tuple<int, int, int> param;
992 
993  bool skip()
994  {
995  QudaReconstructType recon = static_cast<QudaReconstructType>(::testing::get<1>(GetParam()));
996  if ((QUDA_PRECISION & getPrecision(::testing::get<0>(GetParam()))) == 0
997  || (QUDA_RECONSTRUCT & getReconstructNibble(recon)) == 0) {
998  return true;
999  }
1000  return false;
1001  }
1002 
1003  public:
1004  virtual ~DslashTest() { }
1005  virtual void SetUp() {
1006  int prec = ::testing::get<0>(GetParam());
1007  QudaReconstructType recon = static_cast<QudaReconstructType>(::testing::get<1>(GetParam()));
1008 
1009  if (skip()) GTEST_SKIP();
1010 
1011  int value = ::testing::get<2>(GetParam());
1012  for(int j=0; j < 4;j++){
1013  if (value & (1 << j)){
1015  }
1016 
1017  }
1018  updateR();
1019 
1020  init(prec, recon);
1021  display_test_info(prec, recon);
1022  }
1023 
1024  virtual void TearDown()
1025  {
1026  if (skip()) GTEST_SKIP();
1027  end();
1028  }
1029 
1030  static void SetUpTestCase() {
1031  initQuda(device);
1032  }
1033 
1034  // Per-test-case tear-down.
1035  // Called after the last test in this test case.
1036  // Can be omitted if not needed.
1037  static void TearDownTestCase() {
1038  endQuda();
1039  }
1040 
1041 };
1042 
1044 {
1045  dslashRef();
1046 
1047  dslashCUDA(1); // warm-up run
1048  dslashCUDA(2);
1049 
1050  if (!transfer) *spinorOut = *cudaSpinorOut;
1051 
1052  double norm2_cpu = blas::norm2(*spinorRef);
1053  double norm2_cpu_cuda= blas::norm2(*spinorOut);
1054  if (!transfer) {
1055  double norm2_cuda= blas::norm2(*cudaSpinorOut);
1056  printfQuda("Results: CPU = %f, CUDA=%f, CPU-CUDA = %f\n", norm2_cpu, norm2_cuda, norm2_cpu_cuda);
1057  } else {
1058  printfQuda("Result: CPU = %f, CPU-QUDA = %f\n", norm2_cpu, norm2_cpu_cuda);
1059  }
1060  double deviation = pow(10, -(double)(cpuColorSpinorField::Compare(*spinorRef, *spinorOut)));
1061  double tol = getTolerance(inv_param.cuda_prec);
1062  if (gauge_param.reconstruct == QUDA_RECONSTRUCT_8 && inv_param.cuda_prec >= QUDA_HALF_PRECISION)
1063  tol *= 10; // if recon 8, we tolerate a greater deviation
1064 
1065  ASSERT_LE(deviation, tol) << "CPU and CUDA implementations do not agree";
1066 }
1067 
1069 {
1070  dslashCUDA(1); // warm-up run
1071 
1072  if (!transfer) dirac->Flops();
1073  auto dslash_time = dslashCUDA(niter);
1074  printfQuda("%fus per kernel call\n", 1e6 * dslash_time.event_time / niter);
1075  // FIXME No flops count for twisted-clover yet
1076  unsigned long long flops = 0;
1077  if (!transfer) flops = dirac->Flops();
1078  double gflops = 1.0e-9 * flops / dslash_time.event_time;
1079  printfQuda("GFLOPS = %f\n", gflops);
1080  RecordProperty("Gflops", std::to_string(gflops));
1081  RecordProperty("Halo_bidirectitonal_BW_GPU", 1.0e-9 * 2 * cudaSpinor->GhostBytes() * niter / dslash_time.event_time);
1082  RecordProperty("Halo_bidirectitonal_BW_CPU", 1.0e-9 * 2 * cudaSpinor->GhostBytes() * niter / dslash_time.cpu_time);
1083  RecordProperty("Halo_bidirectitonal_BW_CPU_min", 1.0e-9 * 2 * cudaSpinor->GhostBytes() / dslash_time.cpu_max);
1084  RecordProperty("Halo_bidirectitonal_BW_CPU_max", 1.0e-9 * 2 * cudaSpinor->GhostBytes() / dslash_time.cpu_min);
1085  RecordProperty("Halo_message_size_bytes", 2 * cudaSpinor->GhostBytes());
1086 
1087  printfQuda("Effective halo bi-directional bandwidth (GB/s) GPU = %f ( CPU = %f, min = %f , max = %f ) for aggregate "
1088  "message size %lu bytes\n",
1089  1.0e-9 * 2 * cudaSpinor->GhostBytes() * niter / dslash_time.event_time,
1090  1.0e-9 * 2 * cudaSpinor->GhostBytes() * niter / dslash_time.cpu_time,
1091  1.0e-9 * 2 * cudaSpinor->GhostBytes() / dslash_time.cpu_max,
1092  1.0e-9 * 2 * cudaSpinor->GhostBytes() / dslash_time.cpu_min, 2 * cudaSpinor->GhostBytes());
1093 }
1094 
1095 int main(int argc, char **argv)
1096 {
1097  // initalize google test, includes command line options
1098  ::testing::InitGoogleTest(&argc, argv);
1099  // return code for google test
1100  int test_rc = 0;
1101  for (int i = 1; i < argc; i++) {
1102  if (process_command_line_option(argc, argv, &i) == 0) { continue; }
1103 
1104  fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
1105  usage(argv);
1106  }
1107 
1108  initComms(argc, argv, gridsize_from_cmdline);
1109 
1110  ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
1111  if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
1112  test_rc = RUN_ALL_TESTS();
1113 
1114  finalizeComms();
1115  return test_rc;
1116 }
1117 
1118 std::string getdslashtestname(testing::TestParamInfo<::testing::tuple<int, int, int>> param)
1119 {
1120  const int prec = ::testing::get<0>(param.param);
1121  const int recon = ::testing::get<1>(param.param);
1122  const int part = ::testing::get<2>(param.param);
1123  std::stringstream ss;
1124  // std::cout << "getdslashtestname" << get_dslash_str(dslash_type) << "_" << prec_str[prec] << "_r" << recon <<
1125  // "_partition" << part << std::endl; ss << get_dslash_str(dslash_type) << "_";
1126  ss << prec_str[prec];
1127  ss << "_r" << recon;
1128  ss << "_partition" << part;
1129  return ss.str();
1130 }
1131 
1132 #ifdef MULTI_GPU
1134  Combine(Range(0, 4), ::testing::Values(QUDA_RECONSTRUCT_NO, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_8), Range(0, 16)),
1136 #else
1138  Combine(Range(0, 4), ::testing::Values(QUDA_RECONSTRUCT_NO, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_8),
1139  ::testing::Values(0)),
1141 #endif
cudaColorSpinorField * tmp2
QudaDiracFieldOrder dirac_order
Definition: quda.h:219
int comm_rank(void)
Definition: comm_mpi.cpp:82
QudaReconstructType reconstruct_sloppy
Definition: quda.h:53
double anisotropy
Definition: quda.h:38
void mdw_matpc(void *out, void **gauge, void *in, double _Complex *kappa_b, double _Complex *kappa_c, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *b5, double _Complex *c5)
const int transfer
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
int main(int argc, char **argv)
cudaColorSpinorField * tmp1
double mu
Definition: test_util.cpp:1648
void dw_4d_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void endQuda(void)
void dw_dslash_5_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, bool zero_initialize)
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
Definition: test_util.cpp:1047
int getReconstructNibble(QudaReconstructType recon)
Definition: test_util.h:140
QudaSolveType solve_type
Definition: quda.h:205
int gridsize_from_cmdline[]
Definition: test_util.cpp:49
QudaVerbosity verbosity
Definition: test_util.cpp:1614
enum QudaPrecision_s QudaPrecision
DiracMobiusPC * dirac_mdwf
void Dslash5(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
int ga_pad
Definition: quda.h:63
double_complex c_5[QUDA_MAX_DWF_LS]
Definition: quda.h:112
void dw_setDims(int *X, const int L5)
Definition: test_util.cpp:187
double mu
Definition: quda.h:114
QudaGaugeFixed gauge_fix
Definition: quda.h:61
const QudaParity parity
cpuColorSpinorField * spinorRef
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_dslash(void *res, void **gaugeFull, void *spinorField, double kappa, double mu, QudaTwistFlavorType flavor, int oddBit, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
int niter
Definition: test_util.cpp:1629
QudaLinkType type
Definition: quda.h:42
double kappa
Definition: quda.h:106
void MatDagMatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
int xdim
Definition: test_util.cpp:1615
#define errorQuda(...)
Definition: util_quda.h:121
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:721
QudaDslashType dslash_type
Definition: quda.h:102
QudaPrecision cuda_prec
Definition: quda.h:214
virtual void TearDown()
#define cloverSiteSize
Definition: test_util.h:9
int return_clover_inverse
Definition: quda.h:242
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
double cpu_min
void tmc_dslash(void *out, void **gauge, void *in, void *clover, void *cInv, double kappa, double mu, QudaTwistFlavorType flavor, int parity, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &param)
void commDimPartitionedSet(int dir)
QudaPrecision cpu_prec
Definition: quda.h:213
QudaDagType dagger
Definition: test_util.cpp:1620
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:2019
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
void tm_ndeg_mat(void *evenOut, void *oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaMatPCType matpc_type
Definition: test_util.cpp:1662
QudaDagType not_dagger
void clover_matpc(void *out, void **gauge, void *clover, void *clover_inv, void *in, double kappa, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaDagType dagger
Definition: quda.h:207
const char * get_matpc_str(QudaMatPCType type)
Definition: misc.cpp:1121
void finalizeComms()
Definition: test_util.cpp:128
void dw_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaGaugeParam gauge_param
QudaPrecision clover_cuda_prec_refinement_sloppy
Definition: quda.h:227
QudaGaugeFieldOrder gauge_order
Definition: quda.h:43
void dw_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void tmc_mat(void *out, void **gauge, void *clover, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
bool compute_clover
Definition: test_util.cpp:1654
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:701
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
int return_clover
Definition: quda.h:241
void M(ColorSpinorField &out, const ColorSpinorField &in) const
unsigned long long Flops() const
Definition: dirac_quda.h:177
QudaTwistFlavorType twist_flavor
Definition: test_util.cpp:1660
void dslashRef()
QudaDslashType dslash_type
Definition: test_util.cpp:1621
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const =0
QudaSiteSubset siteSubset
Definition: lattice_field.h:71
QudaPrecision clover_cuda_prec_sloppy
Definition: quda.h:226
void setDims(int *)
Definition: test_util.cpp:151
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity)
QudaFieldLocation input_location
Definition: quda.h:99
void freeGaugeQuda(void)
bool verify_results
Definition: test_util.cpp:1643
const char * recon_str[]
int Ls
Definition: test_util.cpp:38
QudaGaugeParam param
Definition: pack_test.cpp:17
double_complex b_5[QUDA_MAX_DWF_LS]
Definition: quda.h:111
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
QudaSolutionType solution_type
Definition: quda.h:204
void dslashQuda_4dpc(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int test_type)
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:67
QudaPrecision clover_cuda_prec
Definition: quda.h:225
void mdw_dslash_4_pre(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *b5, double _Complex *c5, bool zero_initialize)
void Dslash4(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
void dw_matdagmat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
Dirac * dirac
void initQuda(int device)
void dw_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
double tol
Definition: test_util.cpp:1656
QudaPrecision getPrecision(int i)
Definition: test_util.h:129
QudaFieldLocation output_location
Definition: quda.h:100
QudaPrecision clover_cuda_prec_precondition
Definition: quda.h:228
double benchmark(int kernel, const int niter)
Definition: blas_test.cu:303
virtual void SetUp()
double m5
Definition: quda.h:108
virtual ~DslashTest()
QudaReconstructType link_recon
Definition: test_util.cpp:1605
void Dslash5inv(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const double &kappa5) const
void mdw_dslash_5_inv(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *kappa)
QudaVerbosity verbosity
Definition: quda.h:244
void setSpinorSiteSize(int n)
Definition: test_util.cpp:211
ColorSpinorParam csParam
Definition: pack_test.cpp:24
QudaInvertParam newQudaInvertParam(void)
int tdim
Definition: test_util.cpp:1618
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:768
double event_time
QudaCloverFieldOrder clover_order
Definition: quda.h:230
enum QudaMatPCType_s QudaMatPCType
double cpu_time
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
Definition: complex_quda.h:111
QudaGammaBasis gamma_basis
Definition: quda.h:221
char latfile[]
Definition: test_util.cpp:1623
QudaPrecision cuda_prec_sloppy
Definition: quda.h:52
void Dslash4pre(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
const char * get_dslash_str(QudaDslashType type)
Definition: misc.cpp:910
void display_test_info(int precision, QudaReconstructType link_recon)
std::string getdslashtestname(testing::TestParamInfo<::testing::tuple< int, int, int >> param)
void clover_mat(void *out, void **gauge, void *clover, void *in, double kappa, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_ndeg_matpc(void *outEven1, void *outEven2, void **gauge, void *inEven1, void *inEven2, double kappa, double mu, double epsilon, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
enum QudaDagType_s QudaDagType
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
Definition: quda.h:50
QudaPrecision cuda_prec
Definition: quda.h:49
void Dslash4(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
int X[4]
Definition: quda.h:36
QudaPrecision cuda_prec
double mass
Definition: quda.h:105
double clover_coeff
Definition: test_util.cpp:1653
void end()
void * hostGauge[4]
int V
Definition: test_util.cpp:27
void * hostClover
int zdim
Definition: test_util.cpp:1617
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
Perform a component by component comparison of two color-spinor fields. In doing we normalize with re...
void clover_dslash(void *out, void **gauge, void *clover, void *in, int parity, int dagger, QudaPrecision precision, QudaGaugeParam &param)
int compute_clover_inverse
Definition: quda.h:240
::testing::tuple< int, int, int > param
void tm_ndeg_dslash(void *res1, void *res2, void **gauge, void *spinorField1, void *spinorField2, double kappa, double mu, double epsilon, int oddBit, int daggerBit, QudaMatPCType matpc_type, QudaPrecision precision, QudaGaugeParam &gauge_param)
void * hostCloverInv
void construct_clover_field(void *clover, double norm, double diag, QudaPrecision precision)
Definition: test_util.cpp:1167
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const =0
double epsilon
Definition: test_util.cpp:1649
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
DslashTime dslashCUDA(int niter)
void mdw_dslash_5(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *kappa, bool zero_initialize)
int Lsdim
Definition: test_util.cpp:1619
enum QudaReconstructType_s QudaReconstructType
void commDimPartitionedReset()
Reset the comm dim partioned array to zero,.
Main header file for the QUDA library.
QudaPrecision Precision() const
Definition: lattice_field.h:58
void tmc_matpc(void *out, void **gauge, void *in, void *clover, void *cInv, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
INSTANTIATE_TEST_SUITE_P(QUDA, DslashTest, Combine(Range(0, 4), ::testing::Values(QUDA_RECONSTRUCT_NO, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_8), ::testing::Values(0)), getdslashtestname)
void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
#define printfQuda(...)
Definition: util_quda.h:115
QudaTboundary t_boundary
Definition: quda.h:45
QudaTwistFlavorType twist_flavor
Definition: quda.h:117
void dslash_4_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaTwistFlavorType twistFlavor
void usage(char **)
Definition: test_util.cpp:1783
unsigned long long flops
Definition: blas_quda.cu:22
double cpu_max
void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
enum QudaDslashType_s QudaDslashType
ColorSpinorField * tmp2
Definition: dirac_quda.h:42
static void SetUpTestCase()
QudaInvertParam inv_param
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
static void TearDownTestCase()
double mass
Definition: test_util.cpp:1646
enum QudaVerbosity_s QudaVerbosity
int compute_clover
Definition: quda.h:239
double epsilon
Definition: quda.h:115
void wil_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
cpuColorSpinorField * spinorTmp
static Dirac * create(const DiracParam &param)
Definition: dirac.cpp:159
void Dslash5(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
cudaColorSpinorField * cudaSpinorOut
QudaPrecision cpu_prec
void initComms(int argc, char **argv, int *const commDims)
Definition: test_util.cpp:88
void Dslash5inv(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
int device
Definition: test_util.cpp:1602
virtual void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const =0
cudaColorSpinorField * cudaSpinor
cpuColorSpinorField * spinor
QudaPrecision clover_cpu_prec
Definition: quda.h:224
QudaPrecision prec
Definition: test_util.cpp:1608
void M(ColorSpinorField &out, const ColorSpinorField &in) const
void init(int precision, QudaReconstructType link_recon)
void setVerbosity(QudaVerbosity verbosity)
Definition: util_quda.cpp:25
QudaMatPCType matpc_type
Definition: quda.h:206
void dslashQuda_mdwf(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int test_type)
int test_type
Definition: test_util.cpp:1636
double getTolerance(QudaPrecision prec)
void dslash_5_inv(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *kappa)
ColorSpinorField * tmp1
Definition: dirac_quda.h:41
const char * prec_str[]
#define MAX(a, b)
double kappa5
QudaPrecision cpu_prec
Definition: quda.h:47
int ydim
Definition: test_util.cpp:1616
void updateR()
update the radius for halos.
#define gaugeSiteSize
Definition: face_gauge.cpp:34
QudaGaugeParam newQudaGaugeParam(void)
double clover_coeff
Definition: quda.h:233
void comm_barrier(void)
Definition: comm_mpi.cpp:326
cpuColorSpinorField * spinorOut
enum QudaTwistFlavorType_s QudaTwistFlavorType
DiracDomainWall4DPC * dirac_4dpc
TEST_P(DslashTest, verify)