QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
dslash_test.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 
6 #include <quda.h>
7 #include <quda_internal.h>
8 #include <dirac_quda.h>
9 #include <dslash_quda.h>
10 #include <invert_quda.h>
11 #include <util_quda.h>
12 #include <blas_quda.h>
13 
14 #include <test_util.h>
15 #include <dslash_util.h>
18 #include "misc.h"
19 
20 // google test frame work
21 #include <gtest.h>
22 
23 #include <gauge_qio.h>
24 
25 #define MAX(a,b) ((a)>(b)?(a):(b))
26 
27 using namespace quda;
28 
29 const QudaParity parity = QUDA_EVEN_PARITY; // even or odd?
30 const int transfer = 0; // include transfer time in the benchmark?
31 
32 double kappa5;
33 
36 
39 
42 
44 
45 Dirac *dirac = NULL;
46 DiracMobiusDomainWallPC *dirac_mdwf = NULL; // create the MDWF Dirac operator
47 DiracDomainWall4DPC *dirac_4dpc = NULL; // create the 4d preconditioned DWF Dirac operator
48 
49 // What test are we doing (0 = dslash, 1 = MatPC, 2 = Mat, 3 = MatPCDagMatPC, 4 = MatDagMat)
50 extern int test_type;
51 
52 // Dirac operator type
54 
55 // Twisted mass flavor type
58 
59 extern bool tune;
60 
61 extern int device;
62 extern int xdim;
63 extern int ydim;
64 extern int zdim;
65 extern int tdim;
66 extern int Lsdim;
67 extern int gridsize_from_cmdline[];
69 extern QudaPrecision prec;
70 extern QudaDagType dagger;
72 
73 extern bool verify_results;
74 extern int niter;
75 extern char latfile[];
76 
77 extern bool kernel_pack_t;
78 
79 void init(int argc, char **argv) {
80 
81  cuda_prec = prec;
82 
85 
86  gauge_param.X[0] = xdim;
87  gauge_param.X[1] = ydim;
88  gauge_param.X[2] = zdim;
89  gauge_param.X[3] = tdim;
90 
92  errorQuda("Asqtad not supported. Please try staggered_dslash_test instead");
93  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH ||
96  // for these we always use kernel packing
98  setKernelPackT(true);
99  } else {
102  Ls = 1;
103  }
104 
105  setSpinorSiteSize(24);
106 
107  gauge_param.anisotropy = 1.0;
108 
112 
119 
120  inv_param.kappa = 0.1;
121 
123  inv_param.mu = 0.01;
124  inv_param.epsilon = 0.01;
126  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH ||
128  inv_param.mass = 0.01;
129  inv_param.m5 = -1.5;
130  kappa5 = 0.5/(5 + inv_param.m5);
131  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH ) {
132  inv_param.mass = 0.01;
133  inv_param.m5 = -1.5;
134  kappa5 = 0.5/(5 + inv_param.m5);
135  for(int k = 0; k < Lsdim; k++)
136  {
137  // b5[k], c[k] values are chosen for arbitrary values,
138  // but the difference of them are same as 1.0
139  inv_param.b_5[k] = 1.50;
140  inv_param.c_5[k] = 0.50;
141  }
142  }
143 
145 
148  not_dagger = (QudaDagType)((dagger + 1)%2);
149 
152  errorQuda("Gauge and spinor CPU precisions must match");
153  }
155 
158 
159 #ifndef MULTI_GPU // free parameter for single GPU
160  gauge_param.ga_pad = 0;
161 #else // must be this one c/b face for multi gpu
162  int x_face_size = gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2;
163  int y_face_size = gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2;
164  int z_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2;
165  int t_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2;
166  int pad_size =MAX(x_face_size, y_face_size);
167  pad_size = MAX(pad_size, z_face_size);
168  pad_size = MAX(pad_size, t_face_size);
169  gauge_param.ga_pad = pad_size;
170 #endif
171  inv_param.sp_pad = 0;
172  inv_param.cl_pad = 0;
173 
174  //inv_param.sp_pad = xdim*ydim*zdim/2;
175  //inv_param.cl_pad = 24*24*24;
176 
177  inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; // test code only supports DeGrand-Rossi Basis
179 
181  switch(test_type) {
182  case 0:
183  case 1:
184  case 2:
185  case 3:
187  break;
188  case 4:
190  break;
191  default:
192  errorQuda("Test type %d not defined QUDA_DOMAIN_WALL_4D_DSLASH\n", test_type);
193  }
194  } else if(dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
195  switch(test_type) {
196  case 0:
197  case 1:
198  case 2:
199  case 3:
200  case 4:
202  break;
203  case 5:
205  break;
206  default:
207  errorQuda("Test type %d not defined on QUDA_MOBIUS_DWF_DSLASH\n", test_type);
208  }
209  }
210  else
211  {
212  switch(test_type) {
213  case 0:
214  case 1:
216  break;
217  case 2:
219  break;
220  case 3:
222  break;
223  case 4:
225  break;
226  default:
227  errorQuda("Test type %d not defined\n", test_type);
228  }
229  }
230 
232 
239  //if (test_type > 0) {
241  hostCloverInv = hostClover; // fake it
242  /*} else {
243  hostClover = NULL;
244  hostCloverInv = malloc(V*cloverSiteSize*inv_param.clover_cpu_prec);
245  }*/
246  } else if (dslash_type == QUDA_TWISTED_MASS_DSLASH) {
247 
248  }
249 
251 
252  // construct input fields
253  for (int dir = 0; dir < 4; dir++) hostGauge[dir] = malloc(V*gaugeSiteSize*gauge_param.cpu_prec);
254 
256 
257  csParam.nColor = 3;
258  csParam.nSpin = 4;
261  }
262  csParam.nDim = 4;
263  for (int d=0; d<4; d++) csParam.x[d] = gauge_param.X[d];
267  csParam.nDim = 5;
268  csParam.x[4] = Ls;
269  }
272  csParam.PCtype = QUDA_4D_PC;
273  } else {
274  csParam.PCtype = QUDA_5D_PC;
275  }
276 
277 //ndeg_tm
281  csParam.x[4] = inv_param.Ls;
282  }
283 
284 
285  csParam.precision = inv_param.cpu_prec;
286  csParam.pad = 0;
287 
290  {
292  csParam.x[0] /= 2;
293 
294  } else
295  {
296  if (test_type < 2 || test_type ==3) {
298  csParam.x[0] /= 2;
299  } else {
301  }
302  }
303 
306  csParam.gammaBasis = inv_param.gamma_basis;
307  csParam.create = QUDA_ZERO_FIELD_CREATE;
308 
309  spinor = new cpuColorSpinorField(csParam);
310  spinorOut = new cpuColorSpinorField(csParam);
311  spinorRef = new cpuColorSpinorField(csParam);
312  spinorTmp = new cpuColorSpinorField(csParam);
313 
314  csParam.x[0] = gauge_param.X[0];
315 
316  printfQuda("Randomizing fields... ");
317 
318  if (strcmp(latfile,"")) { // load in the command line supplied gauge field
321  } else { // else generate a random SU(3) field
323  }
324 
326 
328  double norm = 0.0; // clover components are random numbers in the range (-norm, norm)
329  double diag = 1.0; // constant added to the diagonal
330 
331  if (test_type == 2 || test_type == 4) {
333  } else {
335  }
336  }
337  printfQuda("done.\n"); fflush(stdout);
338 
339  initQuda(device);
340 
341  printfQuda("Sending gauge field to GPU\n");
343 
345  printfQuda("Sending clover field to GPU\n");
347  }
348 
350  printfQuda("Sending clover field to GPU\n");
351  loadCloverQuda(NULL, NULL, &inv_param);
352  }
353 
354  if (!transfer) {
356  csParam.pad = inv_param.sp_pad;
357  csParam.precision = inv_param.cuda_prec;
358  if (csParam.precision == QUDA_DOUBLE_PRECISION ) {
360  } else {
361  /* Single and half */
363  }
364 
367  {
369  csParam.x[0] /= 2;
370  } else
371  {
372  if (test_type < 2 || test_type == 3) {
374  csParam.x[0] /= 2;
375  }
376  }
377 
378  printfQuda("Creating cudaSpinor\n");
379  cudaSpinor = new cudaColorSpinorField(csParam);
380  printfQuda("Creating cudaSpinorOut\n");
381  cudaSpinorOut = new cudaColorSpinorField(csParam);
382 
383  tmp1 = new cudaColorSpinorField(csParam);
384 
387  if (test_type == 2 || test_type == 4) csParam.x[0] /= 2;
388 
390  tmp2 = new cudaColorSpinorField(csParam);
391 
392  printfQuda("Sending spinor field to GPU\n");
393  *cudaSpinor = *spinor;
394 
395  double cpu_norm = norm2(*spinor);
396  double cuda_norm = norm2(*cudaSpinor);
397  printfQuda("Source: CPU = %e, CUDA = %e\n", cpu_norm, cuda_norm);
398 
399  bool pc;
402  pc = true;
403  else
404  pc = (test_type != 2 && test_type != 4);
405  DiracParam diracParam;
406  setDiracParam(diracParam, &inv_param, pc);
407  diracParam.tmp1 = tmp1;
408  diracParam.tmp2 = tmp2;
409 
411  dirac_4dpc = new DiracDomainWall4DPC(diracParam);
412  dirac = (Dirac*)dirac_4dpc;
413  }
414  else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH){
415  dirac_mdwf = new DiracMobiusDomainWallPC(diracParam);
416  dirac = (Dirac*)dirac_mdwf;
417  }
418  else {
419  dirac = Dirac::create(diracParam);
420  }
421  } else {
422  double cpu_norm = norm2(*spinor);
423  printfQuda("Source: CPU = %e\n", cpu_norm);
424  }
425 
426 }
427 
428 void end() {
429  if (!transfer) {
430  if(dirac != NULL)
431  {
432  delete dirac;
433  dirac = NULL;
434  }
435  delete cudaSpinor;
436  delete cudaSpinorOut;
437  delete tmp1;
438  delete tmp2;
439  }
440 
441  // release memory
442  delete spinor;
443  delete spinorOut;
444  delete spinorRef;
445  delete spinorTmp;
446 
447  for (int dir = 0; dir < 4; dir++) free(hostGauge[dir]);
450  free(hostCloverInv);
451  }
452  endQuda();
453 
454 }
455 
456 // execute kernel
457 double dslashCUDA(int niter) {
458 
459  cudaEvent_t start, end;
460  cudaEventCreate(&start);
461  cudaEventCreate(&end);
462  cudaEventRecord(start, 0);
463 
464  for (int i = 0; i < niter; i++) {
466  switch (test_type) {
467  case 0:
468  if (transfer) {
470  } else {
472  }
473  break;
474  case 1:
475  if (transfer) {
477  } else {
479  }
480  break;
481  case 2:
482  if (transfer) {
484  } else {
486  }
487  break;
488  case 3:
489  if (transfer) {
490  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
491  } else {
493  }
494  break;
495  case 4:
496  if (transfer) {
498  } else {
500  }
501  break;
502  }
503  }
504  else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH){
505  switch (test_type) {
506  case 0:
507  if (transfer) {
509  } else {
511  }
512  break;
513  case 1:
514  if (transfer) {
516  } else {
518  }
519  break;
520  case 2:
521  if (transfer) {
523  } else {
525  }
526  break;
527  case 3:
528  if (transfer) {
530  } else {
532  }
533  break;
534  case 4:
535  if (transfer) {
536  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
537  } else {
539  }
540  break;
541  case 5:
542  if (transfer) {
544  } else {
546  }
547  break;
548  }
549  } else {
550  switch (test_type) {
551  case 0:
553  if (transfer) {
555  } else {
556  ((DiracTwistedCloverPC *) dirac)->TwistCloverInv(*tmp1, *cudaSpinor, (parity+1)%2);
558  }
559  } else {
560  if (transfer) {
562  } else {
564  }
565  }
566  break;
567  case 1:
568  if (transfer) {
569  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
570  } else {
572  }
573  break;
574  case 2:
575  if (transfer) {
576  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
577  } else {
579  }
580  break;
581  case 3:
582  if (transfer) {
584  } else {
586  }
587  break;
588  case 4:
589  if (transfer) {
591  } else {
593  }
594  break;
595  }
596  }
597  }
598 
599  cudaEventRecord(end, 0);
600  cudaEventSynchronize(end);
601  float runTime;
602  cudaEventElapsedTime(&runTime, start, end);
603  cudaEventDestroy(start);
604  cudaEventDestroy(end);
605 
606  double secs = runTime / 1000; //stopwatchReadSeconds();
607 
608  // check for errors
609  cudaError_t stat = cudaGetLastError();
610  if (stat != cudaSuccess)
611  printfQuda("with ERROR: %s\n", cudaGetErrorString(stat));
612 
613  return secs;
614 }
615 
616 void dslashRef() {
617 
618  // compare to dslash reference implementation
619  printfQuda("Calculating reference implementation...");
620  fflush(stdout);
621 
624  switch (test_type) {
625  case 0:
627  break;
628  case 1:
631  break;
632  case 2:
634  break;
635  case 3:
640  break;
641  case 4:
644  break;
645  default:
646  printfQuda("Test type not defined\n");
647  exit(-1);
648  }
650  switch (test_type) {
651  case 0:
654  else
655  {
656  int tm_offset = 12*spinorRef->Volume();
657 
658  void *ref1 = spinorRef->V();
659  void *ref2 = cpu_prec == sizeof(double) ? (void*)((double*)ref1 + tm_offset): (void*)((float*)ref1 + tm_offset);
660 
661  void *flv1 = spinor->V();
662  void *flv2 = cpu_prec == sizeof(double) ? (void*)((double*)flv1 + tm_offset): (void*)((float*)flv1 + tm_offset);
663 
664  tm_ndeg_dslash(ref1, ref2, hostGauge, flv1, flv2, inv_param.kappa, inv_param.mu, inv_param.epsilon,
666  }
667  break;
668  case 1:
671  else
672  {
673  int tm_offset = 12*spinorRef->Volume();
674 
675  void *ref1 = spinorRef->V();
676  void *ref2 = cpu_prec == sizeof(double) ? (void*)((double*)ref1 + tm_offset): (void*)((float*)ref1 + tm_offset);
677 
678  void *flv1 = spinor->V();
679  void *flv2 = cpu_prec == sizeof(double) ? (void*)((double*)flv1 + tm_offset): (void*)((float*)flv1 + tm_offset);
680 
682  }
683  break;
684  case 2:
687  else
688  {
689  int tm_offset = 12*spinorRef->Volume();
690 
691  void *evenOut = spinorRef->V();
692  void *oddOut = cpu_prec == sizeof(double) ? (void*)((double*)evenOut + tm_offset): (void*)((float*)evenOut + tm_offset);
693 
694  void *evenIn = spinor->V();
695  void *oddIn = cpu_prec == sizeof(double) ? (void*)((double*)evenIn + tm_offset): (void*)((float*)evenIn + tm_offset);
696 
698  }
699  break;
700  case 3:
706  }
707  else
708  {
709  int tm_offset = 12*spinorRef->Volume();
710 
711  void *ref1 = spinorRef->V();
712  void *ref2 = cpu_prec == sizeof(double) ? (void*)((double*)ref1 + tm_offset): (void*)((float*)ref1 + tm_offset);
713 
714  void *flv1 = spinor->V();
715  void *flv2 = cpu_prec == sizeof(double) ? (void*)((double*)flv1 + tm_offset): (void*)((float*)flv1 + tm_offset);
716 
717  void *tmp1 = spinorTmp->V();
718  void *tmp2 = cpu_prec == sizeof(double) ? (void*)((double*)tmp1 + tm_offset): (void*)((float*)tmp1 + tm_offset);
719 
722  }
723  break;
724  case 4:
730  }
731  else
732  {
733  int tm_offset = 12*spinorRef->Volume();
734 
735  void *evenOut = spinorRef->V();
736  void *oddOut = cpu_prec == sizeof(double) ? (void*)((double*)evenOut + tm_offset): (void*)((float*)evenOut + tm_offset);
737 
738  void *evenIn = spinor->V();
739  void *oddIn = cpu_prec == sizeof(double) ? (void*)((double*)evenIn + tm_offset): (void*)((float*)evenIn + tm_offset);
740 
741  void *evenTmp = spinorTmp->V();
742  void *oddTmp = cpu_prec == sizeof(double) ? (void*)((double*)evenTmp + tm_offset): (void*)((float*)evenTmp + tm_offset);
743 
746  }
747  break;
748  default:
749  printfQuda("Test type not defined\n");
750  exit(-1);
751  }
752  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH ){
753  switch (test_type) {
754  case 0:
756  break;
757  case 1:
759  break;
760  case 2:
762  break;
763  case 3:
766  break;
767  case 4:
769  break;
770  default:
771  printf("Test type not supported for domain wall\n");
772  exit(-1);
773  }
775  double *kappa_5 = (double*)malloc(Ls*sizeof(double));
776  for(int xs = 0; xs < Ls ; xs++)
777  kappa_5[xs] = kappa5;
778  switch (test_type) {
779  case 0:
781  break;
782  case 1:
784  break;
785  case 2:
787  break;
788  case 3:
790  break;
791  case 4:
794  break;
795  break;
796  default:
797  printf("Test type not supported for domain wall\n");
798  exit(-1);
799  }
800  free(kappa_5);
801  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH){
802  double *kappa_b, *kappa_c, *kappa_5, *kappa_mdwf;
803  kappa_b = (double*)malloc(Lsdim*sizeof(double));
804  kappa_c = (double*)malloc(Lsdim*sizeof(double));
805  kappa_5 = (double*)malloc(Lsdim*sizeof(double));
806  kappa_mdwf = (double*)malloc(Lsdim*sizeof(double));
807  for(int xs = 0 ; xs < Lsdim ; xs++)
808  {
809  kappa_b[xs] = 1.0/(2*(inv_param.b_5[xs]*(4.0 + inv_param.m5) + 1.0));
810  kappa_c[xs] = 1.0/(2*(inv_param.c_5[xs]*(4.0 + inv_param.m5) - 1.0));
811  kappa_5[xs] = 0.5*kappa_b[xs]/kappa_c[xs];
812  kappa_mdwf[xs] = -kappa_5[xs];
813  }
814  switch (test_type) {
815  case 0:
817  break;
818  case 1:
820  break;
821  case 2:
823  break;
824  case 3:
826  break;
827  case 4:
829  break;
830  case 5:
833  break;
834  break;
835  default:
836  printf("Test type not supported for domain wall\n");
837  exit(-1);
838  }
839  free(kappa_b);
840  free(kappa_c);
841  free(kappa_5);
842  free(kappa_mdwf);
843  } else {
844  printfQuda("Unsupported dslash_type\n");
845  exit(-1);
846  }
847 
848  printfQuda("done.\n");
849 }
850 
851 
853 {
854  printfQuda("running the following test:\n");
855 
856  printfQuda("prec recon test_type matpc_type dagger S_dim T_dimension Ls_dimension dslash_type niter\n");
857  printfQuda("%6s %2s %d %12s %d %3d/%3d/%3d %3d %2d %14s %d\n",
861  printfQuda("Grid partition info: X Y Z T\n");
862  printfQuda(" %d %d %d %d\n",
863  dimPartitioned(0),
864  dimPartitioned(1),
865  dimPartitioned(2),
866  dimPartitioned(3));
867 
868  return ;
869 
870 }
871 
872 extern void usage(char**);
873 
874 TEST(dslash, verify) {
875  double deviation = pow(10, -(double)(cpuColorSpinorField::Compare(*spinorRef, *spinorOut)));
876  double tol = (inv_param.cuda_prec == QUDA_DOUBLE_PRECISION ? 1e-12 :
877  (inv_param.cuda_prec == QUDA_SINGLE_PRECISION ? 1e-3 : 1e-1));
878  ASSERT_LE(deviation, tol) << "CPU and CUDA implementations do not agree";
879 }
880 
881 int main(int argc, char **argv)
882 {
883 
884  for (int i =1;i < argc; i++){
885  if(process_command_line_option(argc, argv, &i) == 0){
886  continue;
887  }
888 
889  fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
890  usage(argv);
891  }
892 
893  initComms(argc, argv, gridsize_from_cmdline);
894 
896 
897  init(argc, argv);
898 
899  float spinorGiB = (float)Vh*spinorSiteSize*inv_param.cuda_prec / (1 << 30);
900  printfQuda("\nSpinor mem: %.3f GiB\n", spinorGiB);
901  printfQuda("Gauge mem: %.3f GiB\n", gauge_param.gaugeGiB);
902 
903  int attempts = 1;
904  dslashRef();
905  for (int i=0; i<attempts; i++) {
906 
907  if (tune) { // warm-up run
908  printfQuda("Tuning...\n");
910  dslashCUDA(1);
911  }
912  printfQuda("Executing %d kernel loops...\n", niter);
913  if (!transfer) dirac->Flops();
914  double secs = dslashCUDA(niter);
915  printfQuda("done.\n\n");
916 
917  if (!transfer) *spinorOut = *cudaSpinorOut;
918 
919  // print timing information
920  printfQuda("%fus per kernel call\n", 1e6*secs / niter);
921  //FIXME No flops count for twisted-clover yet
922  unsigned long long flops = 0;
923  if (!transfer) flops = dirac->Flops();
924  int spinor_floats = test_type ? 2*(7*24+24)+24 : 7*24+24;
926  spinor_floats += test_type ? 2*(7*2 + 2) + 2 : 7*2 + 2; // relative size of norm is twice a short
927  int gauge_floats = (test_type ? 2 : 1) * (gauge_param.gauge_fix ? 6 : 8) * gauge_param.reconstruct;
929  gauge_floats += test_type ? 72*2 : 72;
930  }
931  printfQuda("GFLOPS = %f\n", 1.0e-9*flops/secs);
932  printfQuda("GB/s = %f\n\n",
933  (double)Vh*(Ls*spinor_floats+gauge_floats)*inv_param.cuda_prec/((secs/niter)*1e+9));
934 
935  double norm2_cpu = norm2(*spinorRef);
936  double norm2_cpu_cuda= norm2(*spinorOut);
937  if (!transfer) {
938  double norm2_cuda= norm2(*cudaSpinorOut);
939  printfQuda("Results: CPU = %f, CUDA=%f, CPU-CUDA = %f\n", norm2_cpu, norm2_cuda, norm2_cpu_cuda);
940  } else {
941  printfQuda("Result: CPU = %f, CPU-QUDA = %f\n", norm2_cpu, norm2_cpu_cuda);
942  }
943 
944  if (verify_results) {
945  ::testing::InitGoogleTest(&argc, argv);
946  if (RUN_ALL_TESTS() != 0) warningQuda("Tests failed");
947  }
948  }
949  end();
950 
951  finalizeComms();
952 }
QudaGaugeParam gauge_param
Definition: dslash_test.cpp:37
cudaColorSpinorField * cudaSpinorOut
Definition: dslash_test.cpp:41
void Dslash5(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity) const
int dimPartitioned(int dim)
Definition: test_util.cpp:1577
QudaDiracFieldOrder dirac_order
Definition: quda.h:156
QudaReconstructType reconstruct_sloppy
Definition: quda.h:46
double anisotropy
Definition: quda.h:31
__constant__ int Vh
void MdagM(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
int tdim
Definition: test_util.cpp:1556
cudaColorSpinorField * tmp1
Definition: dirac_quda.h:39
void dw_dslash_5_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
double b_5[QUDA_MAX_DWF_LS]
Definition: quda.h:94
void dw_4d_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void endQuda(void)
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
Definition: test_util.cpp:1003
cpuColorSpinorField * spinorRef
Definition: dslash_test.cpp:40
enum QudaPrecision_s QudaPrecision
int V
Definition: test_util.cpp:29
int attempts
int ga_pad
Definition: quda.h:53
#define ASSERT_LE(val1, val2)
Definition: gtest.h:19789
void dw_setDims(int *X, const int L5)
Definition: test_util.cpp:125
cpuColorSpinorField * spinorTmp
Definition: dslash_test.cpp:40
double mu
Definition: quda.h:97
QudaGaugeFixed gauge_fix
Definition: quda.h:51
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
Definition: complex_quda.h:859
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void Dslash5inv(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity, const double &k) const
void tm_dslash(void *res, void **gaugeFull, void *spinorField, double kappa, double mu, QudaTwistFlavorType flavor, int oddBit, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaLinkType type
Definition: quda.h:35
void mdw_matpc(void *out, void **gauge, void *in, double *kappa_b, double *kappa_c, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *b5, double *c5)
double kappa
Definition: quda.h:89
void MatDagMatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
int xdim
Definition: test_util.cpp:1553
void MdagM(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
#define errorQuda(...)
Definition: util_quda.h:73
QudaDslashType dslash_type
Definition: quda.h:85
void M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
QudaPrecision cuda_prec
Definition: quda.h:152
double c_5[QUDA_MAX_DWF_LS]
Definition: quda.h:95
#define cloverSiteSize
Definition: test_util.h:8
void setDims(int *)
Definition: test_util.cpp:88
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
bool kernel_pack_t
Definition: test_util.cpp:1571
QudaPrecision cpu_prec
Definition: quda.h:151
int ydim
Definition: test_util.cpp:1554
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:1635
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
cudaColorSpinorField * tmp1
Definition: dslash_test.cpp:41
void tm_ndeg_mat(void *evenOut, void *oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaPrecision precision
Definition: lattice_field.h:41
#define gaugeSiteSize
QudaDagType dagger
Definition: quda.h:145
DiracMobiusDomainWallPC * dirac_mdwf
Definition: dslash_test.cpp:46
const char * get_matpc_str(QudaMatPCType type)
Definition: misc.cpp:920
void finalizeComms()
Definition: test_util.cpp:65
int test_type
Definition: test_util.cpp:1564
void Dslash5inv(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity, const double &k) const
void dw_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaGaugeFieldOrder gauge_order
Definition: quda.h:36
void dw_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
double dslashCUDA(int niter)
cpuColorSpinorField * spinor
Definition: dslash_test.cpp:40
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:658
TEST(dslash, verify)
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
QudaPrecision cpu_prec
Definition: dslash_test.cpp:34
#define spinorSiteSize
QudaSiteSubset siteSubset
Definition: lattice_field.h:42
QudaPrecision clover_cuda_prec_sloppy
Definition: quda.h:163
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity)
QudaFieldLocation input_location
Definition: quda.h:82
QudaDagType dagger
Definition: test_util.cpp:1558
int gridsize_from_cmdline[]
Definition: test_util.cpp:1559
int Ls
Definition: test_util.cpp:40
int xs
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
QudaSolutionType solution_type
Definition: quda.h:142
int main(int argc, char **argv)
void dslashQuda_4dpc(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int test_type)
virtual void Dslash(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity) const =0
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:38
QudaPrecision clover_cuda_prec
Definition: quda.h:162
bool tune
Definition: test_util.cpp:1562
void dw_matdagmat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void setTuning(QudaTune tune)
Definition: util_quda.cpp:33
void initQuda(int device)
void dw_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
cudaColorSpinorField * tmp2
Definition: dslash_test.cpp:41
void Dslash4(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity) const
void Dslash5(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity) const
void M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
QudaFieldLocation output_location
Definition: quda.h:83
double m5
Definition: quda.h:91
char latfile[]
Definition: test_util.cpp:1561
unsigned long long Flops() const
Definition: dirac_quda.h:136
void setSpinorSiteSize(int n)
Definition: test_util.cpp:150
const int transfer
Definition: dslash_test.cpp:30
ColorSpinorParam csParam
Definition: pack_test.cpp:24
QudaInvertParam newQudaInvertParam(void)
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:724
QudaCloverFieldOrder clover_order
Definition: quda.h:166
GTEST_API_ void InitGoogleTest(int *argc, char **argv)
enum QudaMatPCType_s QudaMatPCType
DiracDomainWall4DPC * dirac_4dpc
Definition: dslash_test.cpp:47
#define MAX(a, b)
Definition: dslash_test.cpp:25
void * hostGauge[4]
Definition: dslash_test.cpp:43
virtual void MdagM(cudaColorSpinorField &out, const cudaColorSpinorField &in) const =0
#define warningQuda(...)
Definition: util_quda.h:84
cudaColorSpinorField * tmp2
Definition: dirac_quda.h:40
int niter
Definition: test_util.cpp:1563
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
Definition: complex_quda.h:100
QudaGammaBasis gamma_basis
Definition: quda.h:158
QudaDslashType dslash_type
Definition: test_util.cpp:1560
QudaPrecision cuda_prec_sloppy
Definition: quda.h:45
Dirac * dirac
Definition: dslash_test.cpp:45
const char * get_dslash_str(QudaDslashType type)
Definition: misc.cpp:814
bool verify_results
Definition: test_util.cpp:1568
void * hostClover
Definition: dslash_test.cpp:43
void tm_ndeg_matpc(void *outEven1, void *outEven2, void **gauge, void *inEven1, void *inEven2, double kappa, double mu, double epsilon, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
enum QudaDagType_s QudaDagType
enum QudaParity_s QudaParity
int device
Definition: test_util.cpp:1546
QudaReconstructType reconstruct
Definition: quda.h:43
void read_gauge_field(char *filename, void *gauge[], QudaPrecision precision, int *X, int argc, char *argv[])
Definition: gauge_qio.cpp:86
QudaPrecision cuda_prec
Definition: quda.h:42
int X[4]
Definition: quda.h:29
double mass
Definition: quda.h:88
void mdw_dslash_5(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *kappa)
QudaTwistFlavorType twist_flavor
Definition: test_util.cpp:1570
void display_test_info()
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
void Dslash4(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity) const
void tm_ndeg_dslash(void *res1, void *res2, void **gauge, void *spinorField1, void *spinorField2, double kappa, double mu, double epsilon, int oddBit, int daggerBit, QudaMatPCType matpc_type, QudaPrecision precision, QudaGaugeParam &gauge_param)
void Dslash4pre(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity) const
QudaPrecision cuda_prec
Definition: dslash_test.cpp:35
void construct_clover_field(void *clover, double norm, double diag, QudaPrecision precision)
Definition: test_util.cpp:1103
QudaInvertParam inv_param
Definition: dslash_test.cpp:38
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
double gaugeGiB
Definition: quda.h:60
void mdw_dslash_4_pre(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *b5, double *c5)
if(x2 >=X2) return
QudaPrecision prec
Definition: test_util.cpp:1551
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
cpuColorSpinorField * spinorOut
Definition: dslash_test.cpp:40
virtual void M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const =0
void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
#define printfQuda(...)
Definition: util_quda.h:67
QudaTboundary t_boundary
Definition: quda.h:38
QudaTwistFlavorType twist_flavor
Definition: quda.h:100
void dslash_4_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaTwistFlavorType twistFlavor
int Lsdim
Definition: test_util.cpp:1557
int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_
Definition: gtest.h:20057
QudaDagType not_dagger
Definition: dslash_test.cpp:71
QudaMatPCType matpc_type
Definition: test_util.cpp:1573
enum QudaDslashType_s QudaDslashType
void setKernelPackT(bool pack)
Definition: dslash_quda.cu:82
void usage(char **)
Definition: test_util.cpp:1584
QudaReconstructType link_recon
Definition: test_util.cpp:1549
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void init(int argc, char **argv)
Definition: dslash_test.cpp:79
void dslashRef()
double epsilon
Definition: quda.h:98
void wil_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
static Dirac * create(const DiracParam &param)
Definition: dirac.cpp:134
int zdim
Definition: test_util.cpp:1555
double norm2(const ColorSpinorField &)
QudaPrecision clover_cpu_prec
Definition: quda.h:161
cudaColorSpinorField * cudaSpinor
Definition: dslash_test.cpp:41
void * hostCloverInv
Definition: dslash_test.cpp:43
void initComms(int argc, char **argv, const int *commDims)
Definition: test_util.cpp:48
const QudaParity parity
Definition: dslash_test.cpp:29
void setVerbosity(const QudaVerbosity verbosity)
Definition: util_quda.cpp:24
QudaMatPCType matpc_type
Definition: quda.h:144
void dslashQuda_mdwf(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int test_type)
void dslash_5_inv(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *kappa)
double kappa5
Definition: dslash_test.cpp:32
void end()
QudaPrecision cpu_prec
Definition: quda.h:40
QudaGaugeParam newQudaGaugeParam(void)
double clover_coeff
Definition: quda.h:169
enum QudaTwistFlavorType_s QudaTwistFlavorType