QUDA  0.9.0
dslash_test.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 
6 #include <quda.h>
7 #include <quda_internal.h>
8 #include <dirac_quda.h>
9 #include <dslash_quda.h>
10 #include <invert_quda.h>
11 #include <util_quda.h>
12 #include <blas_quda.h>
13 
14 #include <test_util.h>
15 #include <dslash_util.h>
18 #include "misc.h"
19 
20 #include <qio_field.h>
21 // google test frame work
22 #include <gtest.h>
23 
24 #define MAX(a,b) ((a)>(b)?(a):(b))
25 
26 using namespace quda;
27 
28 const QudaParity parity = QUDA_EVEN_PARITY; // even or odd?
29 const int transfer = 0; // include transfer time in the benchmark?
30 
31 double kappa5;
32 
35 
38 
41 
43 
44 Dirac *dirac = NULL;
45 DiracMobiusPC *dirac_mdwf = NULL; // create the MDWF Dirac operator
46 DiracDomainWall4DPC *dirac_4dpc = NULL; // create the 4d preconditioned DWF Dirac operator
47 
48 // What test are we doing (0 = dslash, 1 = MatPC, 2 = Mat, 3 = MatPCDagMatPC, 4 = MatDagMat)
49 extern int test_type;
50 
51 // Dirac operator type
53 
54 // Twisted mass flavor type
57 
58 extern int device;
59 extern int xdim;
60 extern int ydim;
61 extern int zdim;
62 extern int tdim;
63 extern int Lsdim;
64 extern int gridsize_from_cmdline[];
66 extern QudaPrecision prec;
67 extern QudaDagType dagger;
69 
70 extern bool compute_clover;
71 extern double clover_coeff;
72 
73 extern bool verify_results;
74 extern int niter;
75 extern char latfile[];
76 
77 extern bool kernel_pack_t;
78 
79 extern double mass; // mass of Dirac operator
80 extern double mu;
81 
83 
84 void init(int argc, char **argv) {
85 
86  cuda_prec = prec;
87 
90 
91  gauge_param.X[0] = xdim;
92  gauge_param.X[1] = ydim;
93  gauge_param.X[2] = zdim;
94  gauge_param.X[3] = tdim;
95 
97  errorQuda("Asqtad not supported. Please try staggered_dslash_test instead");
98  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH ||
101  // for these we always use kernel packing
103  setKernelPackT(true);
104  } else {
107  Ls = 1;
108  }
109 
110  setSpinorSiteSize(24);
111 
112  gauge_param.anisotropy = 1.0;
113 
117 
124 
125  inv_param.kappa = 0.1;
126 
128  inv_param.epsilon = 0.1;
130  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH ||
132  inv_param.m5 = -1.5;
133  kappa5 = 0.5/(5 + inv_param.m5);
134  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH ) {
135  inv_param.m5 = -1.5;
136  kappa5 = 0.5/(5 + inv_param.m5);
137  for(int k = 0; k < Lsdim; k++)
138  {
139  // b5[k], c[k] values are chosen for arbitrary values,
140  // but the difference of them are same as 1.0
141  inv_param.b_5[k] = 1.50;
142  inv_param.c_5[k] = 0.50;
143  }
144  }
145 
146  inv_param.mu = mu;
147  inv_param.mass = mass;
149 
153  not_dagger = (QudaDagType)((dagger + 1)%2);
154 
157  errorQuda("Gauge and spinor CPU precisions must match");
158  }
160 
163 
164 #ifndef MULTI_GPU // free parameter for single GPU
165  gauge_param.ga_pad = 0;
166 #else // must be this one c/b face for multi gpu
167  int x_face_size = gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2;
168  int y_face_size = gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2;
169  int z_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2;
170  int t_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2;
171  int pad_size =MAX(x_face_size, y_face_size);
172  pad_size = MAX(pad_size, z_face_size);
173  pad_size = MAX(pad_size, t_face_size);
174  gauge_param.ga_pad = pad_size;
175 #endif
176  inv_param.sp_pad = 0;
177  inv_param.cl_pad = 0;
178 
179  //inv_param.sp_pad = xdim*ydim*zdim/2;
180  //inv_param.cl_pad = 24*24*24;
181 
182  inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; // test code only supports DeGrand-Rossi Basis
184 
186  switch(test_type) {
187  case 0:
188  case 1:
189  case 2:
190  case 3:
192  break;
193  case 4:
195  break;
196  default:
197  errorQuda("Test type %d not defined QUDA_DOMAIN_WALL_4D_DSLASH\n", test_type);
198  }
199  } else if(dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
200  switch(test_type) {
201  case 0:
202  case 1:
203  case 2:
204  case 3:
205  case 4:
207  break;
208  case 5:
210  break;
211  default:
212  errorQuda("Test type %d not defined on QUDA_MOBIUS_DWF_DSLASH\n", test_type);
213  }
214  }
215  else
216  {
217  switch(test_type) {
218  case 0:
219  case 1:
221  break;
222  case 2:
224  break;
225  case 3:
227  break;
228  case 4:
230  break;
231  default:
232  errorQuda("Test type %d not defined\n", test_type);
233  }
234  }
235 
237 
247  }
248 
249  // construct input fields
250  for (int dir = 0; dir < 4; dir++) hostGauge[dir] = malloc((size_t)V*gaugeSiteSize*gauge_param.cpu_prec);
251 
253 
254  csParam.nColor = 3;
255  csParam.nSpin = 4;
256  csParam.nDim = 4;
257  for (int d=0; d<4; d++) csParam.x[d] = gauge_param.X[d];
261  csParam.nDim = 5;
262  csParam.x[4] = Ls;
263  }
267  } else {
269  }
270 
271 //ndeg_tm
275  csParam.x[4] = inv_param.Ls;
276  }
277 
278 
280  csParam.pad = 0;
281 
284  csParam.x[0] /= 2;
285  } else {
286  if (test_type < 2 || test_type == 3) {
288  csParam.x[0] /= 2;
289  } else {
291  }
292  }
293 
298 
303 
304  csParam.x[0] = gauge_param.X[0];
305 
306  printfQuda("Randomizing fields... ");
307 
308  if (strcmp(latfile,"")) { // load in the command line supplied gauge field
311  } else { // else generate a random SU(3) field
313  }
314 
316 
318  double norm = 0.1; // clover components are random numbers in the range (-norm, norm)
319  double diag = 1.0; // constant added to the diagonal
322  }
323 
324  printfQuda("done.\n"); fflush(stdout);
325 
326  initQuda(device);
327 
328  // set verbosity prior to loadGaugeQuda
331 
332  printfQuda("Sending gauge field to GPU\n");
334 
336  if (compute_clover) printfQuda("Computing clover field on GPU\n");
337  else printfQuda("Sending clover field to GPU\n");
342 
344 
346  }
347 
348  if (!transfer) {
354  } else {
355  /* Single and half */
357  }
358 
361  {
363  csParam.x[0] /= 2;
364  } else
365  {
366  if (test_type < 2 || test_type == 3) {
368  csParam.x[0] /= 2;
369  }
370  }
371 
372  printfQuda("Creating cudaSpinor\n");
374  printfQuda("Creating cudaSpinorOut\n");
376 
378 
381  if (test_type == 2 || test_type == 4) csParam.x[0] /= 2;
382 
385 
386  printfQuda("Sending spinor field to GPU\n");
387  *cudaSpinor = *spinor;
388 
389  double cpu_norm = blas::norm2(*spinor);
390  double cuda_norm = blas::norm2(*cudaSpinor);
391  printfQuda("Source: CPU = %e, CUDA = %e\n", cpu_norm, cuda_norm);
392 
393  bool pc;
396  pc = true;
397  else
398  pc = (test_type != 2 && test_type != 4);
399  DiracParam diracParam;
400  setDiracParam(diracParam, &inv_param, pc);
401  diracParam.tmp1 = tmp1;
402  diracParam.tmp2 = tmp2;
403 
405  dirac_4dpc = new DiracDomainWall4DPC(diracParam);
406  dirac = (Dirac*)dirac_4dpc;
407  }
408  else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH){
409  dirac_mdwf = new DiracMobiusPC(diracParam);
410  dirac = (Dirac*)dirac_mdwf;
411  }
412  else {
413  dirac = Dirac::create(diracParam);
414  }
415  } else {
416  double cpu_norm = blas::norm2(*spinor);
417  printfQuda("Source: CPU = %e\n", cpu_norm);
418  }
419 
420 }
421 
422 void end() {
423  if (!transfer) {
424  if(dirac != NULL)
425  {
426  delete dirac;
427  dirac = NULL;
428  }
429  delete cudaSpinor;
430  delete cudaSpinorOut;
431  delete tmp1;
432  delete tmp2;
433  }
434 
435  // release memory
436  delete spinor;
437  delete spinorOut;
438  delete spinorRef;
439  delete spinorTmp;
440 
441  for (int dir = 0; dir < 4; dir++) free(hostGauge[dir]);
443  free(hostClover);
445  }
446  endQuda();
447 
448 }
449 
450 struct DslashTime {
451  double event_time;
452  double cpu_time;
453  double cpu_min;
454  double cpu_max;
455 
456  DslashTime() : event_time(0.0), cpu_time(0.0), cpu_min(DBL_MAX), cpu_max(0.0) {}
457 };
458 
459 // execute kernel
461 
462  DslashTime dslash_time;
463  timeval tstart, tstop;
464 
465  cudaEvent_t start, end;
466  cudaEventCreate(&start);
467  cudaEventCreate(&end);
468 
469  comm_barrier();
470  cudaEventRecord(start, 0);
471 
472  for (int i = 0; i < niter; i++) {
473 
474  gettimeofday(&tstart, NULL);
475 
477  switch (test_type) {
478  case 0:
479  if (transfer) {
481  } else {
483  }
484  break;
485  case 1:
486  if (transfer) {
488  } else {
490  }
491  break;
492  case 2:
493  if (transfer) {
495  } else {
497  }
498  break;
499  case 3:
500  if (transfer) {
501  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
502  } else {
504  }
505  break;
506  case 4:
507  if (transfer) {
509  } else {
511  }
512  break;
513  }
514  }
515  else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH){
516  switch (test_type) {
517  case 0:
518  if (transfer) {
520  } else {
522  }
523  break;
524  case 1:
525  if (transfer) {
527  } else {
529  }
530  break;
531  case 2:
532  if (transfer) {
534  } else {
536  }
537  break;
538  case 3:
539  if (transfer) {
541  } else {
543  }
544  break;
545  case 4:
546  if (transfer) {
547  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
548  } else {
550  }
551  break;
552  case 5:
553  if (transfer) {
555  } else {
557  }
558  break;
559  }
560  } else {
561  switch (test_type) {
562  case 0:
564  if (transfer) {
566  } else {
567  if (dagger) {
568  ((DiracTwistedCloverPC *) dirac)->TwistCloverInv(*tmp1, *cudaSpinor, (parity+1)%2);
570  } else {
572  }
573  }
574  } else {
575  if (transfer) {
577  } else {
579  }
580  }
581  break;
582  case 1:
583  if (transfer) {
584  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
585  } else {
587  }
588  break;
589  case 2:
590  if (transfer) {
591  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
592  } else {
594  }
595  break;
596  case 3:
597  if (transfer) {
599  } else {
601  }
602  break;
603  case 4:
604  if (transfer) {
606  } else {
608  }
609  break;
610  }
611  }
612 
613  gettimeofday(&tstop, NULL);
614  long ds = tstop.tv_sec - tstart.tv_sec;
615  long dus = tstop.tv_usec - tstart.tv_usec;
616  double elapsed = ds + 0.000001*dus;
617 
618  dslash_time.cpu_time += elapsed;
619  // skip first and last iterations since they may skew these metrics if comms are not synchronous
620  if (i>0 && i<niter) {
621  if (elapsed < dslash_time.cpu_min) dslash_time.cpu_min = elapsed;
622  if (elapsed > dslash_time.cpu_max) dslash_time.cpu_max = elapsed;
623  }
624  }
625 
626  cudaEventRecord(end, 0);
627  cudaEventSynchronize(end);
628  float runTime;
629  cudaEventElapsedTime(&runTime, start, end);
630  cudaEventDestroy(start);
631  cudaEventDestroy(end);
632 
633  dslash_time.event_time = runTime / 1000;
634 
635  // check for errors
636  cudaError_t stat = cudaGetLastError();
637  if (stat != cudaSuccess)
638  printfQuda("with ERROR: %s\n", cudaGetErrorString(stat));
639 
640  return dslash_time;
641 }
642 
643 void dslashRef() {
644 
645  // compare to dslash reference implementation
646  printfQuda("Calculating reference implementation...");
647  fflush(stdout);
648 
650  switch (test_type) {
651  case 0:
653  break;
654  case 1:
657  break;
658  case 2:
660  break;
661  case 3:
666  break;
667  case 4:
670  break;
671  default:
672  printfQuda("Test type not defined\n");
673  exit(-1);
674  }
675  } else if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
676  switch (test_type) {
677  case 0:
679  break;
680  case 1:
683  break;
684  case 2:
686  break;
687  case 3:
692  break;
693  case 4:
697  break;
698  default:
699  printfQuda("Test type not defined\n");
700  exit(-1);
701  }
702  } else if (dslash_type == QUDA_TWISTED_MASS_DSLASH) {
703  switch (test_type) {
704  case 0:
707  else
708  {
709  int tm_offset = 12*spinorRef->Volume();
710 
711  void *ref1 = spinorRef->V();
712  void *ref2 = (char*)ref1 + tm_offset*cpu_prec;
713 
714  void *flv1 = spinor->V();
715  void *flv2 = (char*)flv1 + tm_offset*cpu_prec;
716 
717  tm_ndeg_dslash(ref1, ref2, hostGauge, flv1, flv2, inv_param.kappa, inv_param.mu, inv_param.epsilon,
719  }
720  break;
721  case 1:
724  else
725  {
726  int tm_offset = 12*spinorRef->Volume();
727 
728  void *ref1 = spinorRef->V();
729  void *ref2 = (char*)ref1 + tm_offset*cpu_prec;
730 
731  void *flv1 = spinor->V();
732  void *flv2 = (char*)flv1 + tm_offset*cpu_prec;
733 
735  }
736  break;
737  case 2:
740  else
741  {
742  int tm_offset = 12*spinorRef->Volume();
743 
744  void *evenOut = spinorRef->V();
745  void *oddOut = (char*)evenOut + tm_offset*cpu_prec;
746 
747  void *evenIn = spinor->V();
748  void *oddIn = (char*)evenIn + tm_offset*cpu_prec;
749 
751  }
752  break;
753  case 3:
759  }
760  else
761  {
762  int tm_offset = 12*spinorRef->Volume();
763 
764  void *ref1 = spinorRef->V();
765  void *ref2 = (char*)ref1 + tm_offset*cpu_prec;
766 
767  void *flv1 = spinor->V();
768  void *flv2 = (char*)flv1 + tm_offset*cpu_prec;
769 
770  void *tmp1 = spinorTmp->V();
771  void *tmp2 = (char*)tmp1 + tm_offset*cpu_prec;
772 
775  }
776  break;
777  case 4:
783  }
784  else
785  {
786  int tm_offset = 12*spinorRef->Volume();
787 
788  void *evenOut = spinorRef->V();
789  void *oddOut = (char*)evenOut + tm_offset*cpu_prec;
790 
791  void *evenIn = spinor->V();
792  void *oddIn = (char*)evenIn + tm_offset*cpu_prec;
793 
794  void *evenTmp = spinorTmp->V();
795  void *oddTmp = (char*)evenTmp + tm_offset*cpu_prec;
796 
799  }
800  break;
801  default:
802  printfQuda("Test type not defined\n");
803  exit(-1);
804  }
805  } else if (dslash_type == QUDA_TWISTED_CLOVER_DSLASH) {
806  switch (test_type) {
807  case 0:
810  else
811  errorQuda("Not supported\n");
812  break;
813  case 1:
816  else
817  errorQuda("Not supported\n");
818  break;
819  case 2:
822  else
823  errorQuda("Not supported\n");
824  break;
825  case 3:
831  } else
832  errorQuda("Not supported\n");
833  break;
834  case 4:
838  } else
839  errorQuda("Not supported\n");
840  break;
841  default:
842  printfQuda("Test type not defined\n");
843  exit(-1);
844  }
845  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH ){
846  switch (test_type) {
847  case 0:
849  break;
850  case 1:
852  break;
853  case 2:
855  break;
856  case 3:
859  break;
860  case 4:
862  break;
863  default:
864  printf("Test type not supported for domain wall\n");
865  exit(-1);
866  }
868  double *kappa_5 = (double*)malloc(Ls*sizeof(double));
869  for(int xs = 0; xs < Ls ; xs++)
870  kappa_5[xs] = kappa5;
871  switch (test_type) {
872  case 0:
874  break;
875  case 1:
877  break;
878  case 2:
880  break;
881  case 3:
883  break;
884  case 4:
887  break;
888  break;
889  default:
890  printf("Test type not supported for domain wall\n");
891  exit(-1);
892  }
893  free(kappa_5);
894  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH){
895  double *kappa_b, *kappa_c, *kappa_5, *kappa_mdwf;
896  kappa_b = (double*)malloc(Lsdim*sizeof(double));
897  kappa_c = (double*)malloc(Lsdim*sizeof(double));
898  kappa_5 = (double*)malloc(Lsdim*sizeof(double));
899  kappa_mdwf = (double*)malloc(Lsdim*sizeof(double));
900  for(int xs = 0 ; xs < Lsdim ; xs++)
901  {
902  kappa_b[xs] = 1.0/(2*(inv_param.b_5[xs]*(4.0 + inv_param.m5) + 1.0));
903  kappa_c[xs] = 1.0/(2*(inv_param.c_5[xs]*(4.0 + inv_param.m5) - 1.0));
904  kappa_5[xs] = 0.5*kappa_b[xs]/kappa_c[xs];
905  kappa_mdwf[xs] = -kappa_5[xs];
906  }
907  switch (test_type) {
908  case 0:
910  break;
911  case 1:
913  break;
914  case 2:
916  break;
917  case 3:
919  break;
920  case 4:
922  break;
923  case 5:
926  break;
927  break;
928  default:
929  printf("Test type not supported for domain wall\n");
930  exit(-1);
931  }
932  free(kappa_b);
933  free(kappa_c);
934  free(kappa_5);
935  free(kappa_mdwf);
936  } else {
937  printfQuda("Unsupported dslash_type\n");
938  exit(-1);
939  }
940 
941  printfQuda("done.\n");
942 }
943 
944 
946 {
947  printfQuda("running the following test:\n");
948 
949  printfQuda("prec recon test_type matpc_type dagger S_dim T_dimension Ls_dimension dslash_type niter\n");
950  printfQuda("%6s %2s %d %12s %d %3d/%3d/%3d %3d %2d %14s %d\n",
954  printfQuda("Grid partition info: X Y Z T\n");
955  printfQuda(" %d %d %d %d\n",
956  dimPartitioned(0),
957  dimPartitioned(1),
958  dimPartitioned(2),
959  dimPartitioned(3));
960 
961  return ;
962 
963 }
964 
965 extern void usage(char**);
966 
967 TEST(dslash, verify) {
968  double deviation = pow(10, -(double)(cpuColorSpinorField::Compare(*spinorRef, *spinorOut)));
969  double tol = (inv_param.cuda_prec == QUDA_DOUBLE_PRECISION ? 1e-12 :
970  (inv_param.cuda_prec == QUDA_SINGLE_PRECISION ? 1e-3 : 1e-1));
971  ASSERT_LE(deviation, tol) << "CPU and CUDA implementations do not agree";
972 }
973 
974 int main(int argc, char **argv)
975 {
976  // initalize google test, includes command line options
977  ::testing::InitGoogleTest(&argc, argv);
978  // return code for google test
979  int test_rc = 0;
980  for (int i =1;i < argc; i++) {
981  if(process_command_line_option(argc, argv, &i) == 0){
982  continue;
983  }
984 
985  fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
986  usage(argv);
987  }
988 
989  initComms(argc, argv, gridsize_from_cmdline);
990 
992 
993  init(argc, argv);
994 
995  float spinorGiB = (float)Vh*spinorSiteSize*inv_param.cuda_prec / (1 << 30);
996  printfQuda("\nSpinor mem: %.3f GiB\n", spinorGiB);
997  printfQuda("Gauge mem: %.3f GiB\n", gauge_param.gaugeGiB);
998 
999  int attempts = 1;
1000  dslashRef();
1001  for (int i=0; i<attempts; i++) {
1002 
1003  {
1004  printfQuda("Tuning...\n");
1005  dslashCUDA(1); // warm-up run
1006  }
1007  printfQuda("Executing %d kernel loops...\n", niter);
1008  if (!transfer) dirac->Flops();
1009  DslashTime dslash_time = dslashCUDA(niter);
1010  printfQuda("done.\n\n");
1011 
1012  if (!transfer) *spinorOut = *cudaSpinorOut;
1013 
1014  // print timing information
1015  printfQuda("%fus per kernel call\n", 1e6*dslash_time.event_time / niter);
1016  //FIXME No flops count for twisted-clover yet
1017  unsigned long long flops = 0;
1018  if (!transfer) flops = dirac->Flops();
1019  printfQuda("GFLOPS = %f\n", 1.0e-9*flops/dslash_time.event_time);
1020 
1021  printfQuda("Effective halo bi-directional bandwidth (GB/s) GPU = %f ( CPU = %f, min = %f , max = %f ) for aggregate message size %lu bytes\n",
1022  1.0e-9*2*cudaSpinor->GhostBytes()*niter/dslash_time.event_time, 1.0e-9*2*cudaSpinor->GhostBytes()*niter/dslash_time.cpu_time,
1023  1.0e-9*2*cudaSpinor->GhostBytes()/dslash_time.cpu_max, 1.0e-9*2*cudaSpinor->GhostBytes()/dslash_time.cpu_min,
1024  2*cudaSpinor->GhostBytes());
1025 
1026  double norm2_cpu = blas::norm2(*spinorRef);
1027  double norm2_cpu_cuda= blas::norm2(*spinorOut);
1028  if (!transfer) {
1029  double norm2_cuda= blas::norm2(*cudaSpinorOut);
1030  printfQuda("Results: CPU = %f, CUDA=%f, CPU-CUDA = %f\n", norm2_cpu, norm2_cuda, norm2_cpu_cuda);
1031  } else {
1032  printfQuda("Result: CPU = %f, CPU-QUDA = %f\n", norm2_cpu, norm2_cpu_cuda);
1033  }
1034 
1035  if (verify_results) {
1036  test_rc = RUN_ALL_TESTS();
1037  if (test_rc != 0) warningQuda("Tests failed");
1038  }
1039  }
1040  end();
1041 
1042  finalizeComms();
1043  return test_rc;
1044 }
QudaGaugeParam gauge_param
Definition: dslash_test.cpp:36
cudaColorSpinorField * cudaSpinorOut
Definition: dslash_test.cpp:40
int dimPartitioned(int dim)
Definition: test_util.cpp:1686
QudaDiracFieldOrder dirac_order
Definition: quda.h:195
QudaReconstructType reconstruct_sloppy
Definition: quda.h:46
double anisotropy
Definition: quda.h:31
int tdim
Definition: test_util.cpp:1623
double b_5[QUDA_MAX_DWF_LS]
Definition: quda.h:102
void dw_4d_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void dw_dslash_5_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, bool zero_initialize)
void endQuda(void)
void free(void *)
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
Definition: test_util.cpp:1054
cpuColorSpinorField * spinorRef
Definition: dslash_test.cpp:39
QudaSolveType solve_type
Definition: quda.h:182
enum QudaPrecision_s QudaPrecision
void Dslash5(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
void Dslash5(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
int ga_pad
Definition: quda.h:53
void dw_setDims(int *X, const int L5)
Definition: test_util.cpp:167
cpuColorSpinorField * spinorTmp
Definition: dslash_test.cpp:39
double mu
Definition: quda.h:105
QudaGaugeFixed gauge_fix
Definition: quda.h:51
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
Definition: complex_quda.h:896
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_dslash(void *res, void **gaugeFull, void *spinorField, double kappa, double mu, QudaTwistFlavorType flavor, int oddBit, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
__darwin_time_t tv_sec
QudaLinkType type
Definition: quda.h:35
int fflush(FILE *)
double kappa
Definition: quda.h:97
void MatDagMatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
int xdim
Definition: test_util.cpp:1620
#define errorQuda(...)
Definition: util_quda.h:90
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:241
QudaDslashType dslash_type
Definition: quda.h:93
cudaEvent_t start
QudaPrecision cuda_prec
Definition: quda.h:191
double c_5[QUDA_MAX_DWF_LS]
Definition: quda.h:103
#define cloverSiteSize
Definition: test_util.h:8
int return_clover_inverse
Definition: quda.h:217
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
double cpu_min
void tmc_dslash(void *out, void **gauge, void *in, void *clover, void *cInv, double kappa, double mu, QudaTwistFlavorType flavor, int parity, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &param)
DiracMobiusPC * dirac_mdwf
Definition: dslash_test.cpp:45
bool kernel_pack_t
Definition: test_util.cpp:1650
QudaPrecision cpu_prec
Definition: quda.h:190
int ydim
Definition: test_util.cpp:1621
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:1795
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
cudaColorSpinorField * tmp1
Definition: dslash_test.cpp:40
void tm_ndeg_mat(void *evenOut, void *oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void end()
QudaPrecision precision
Definition: lattice_field.h:54
QudaVerbosity verbosity
Definition: dslash_test.cpp:82
void clover_matpc(void *out, void **gauge, void *clover, void *clover_inv, void *in, double kappa, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaDagType dagger
Definition: quda.h:184
const char * get_matpc_str(QudaMatPCType type)
Definition: misc.cpp:987
void finalizeComms()
Definition: test_util.cpp:107
int test_type
Definition: test_util.cpp:1634
void dw_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaGaugeFieldOrder gauge_order
Definition: quda.h:36
void dw_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void tmc_mat(void *out, void **gauge, void *clover, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
cpuColorSpinorField * spinor
Definition: dslash_test.cpp:39
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:704
TEST(dslash, verify)
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
int return_clover
Definition: quda.h:216
QudaPrecision cpu_prec
Definition: dslash_test.cpp:33
void M(ColorSpinorField &out, const ColorSpinorField &in) const
unsigned long long Flops() const
Definition: dirac_quda.h:148
#define spinorSiteSize
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const =0
QudaSiteSubset siteSubset
Definition: lattice_field.h:55
void exit(int) __attribute__((noreturn))
QudaPrecision clover_cuda_prec_sloppy
Definition: quda.h:202
void setDims(int *)
Definition: test_util.cpp:130
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity)
QudaFieldLocation input_location
Definition: quda.h:90
QudaDagType dagger
int gridsize_from_cmdline[]
Definition: test_util.cpp:50
__darwin_suseconds_t tv_usec
int Ls
Definition: test_util.cpp:39
bool compute_clover
Definition: test_util.cpp:1646
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
QudaSolutionType solution_type
Definition: quda.h:181
else return(__swbuf(_c, _p))
int main(int argc, char **argv)
void dslashQuda_4dpc(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int test_type)
int strcmp(const char *__s1, const char *__s2)
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:50
QudaPrecision clover_cuda_prec
Definition: quda.h:201
void Dslash4(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
void dw_matdagmat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
DslashTime dslashCUDA(int niter)
void initQuda(int device)
void dw_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
cudaColorSpinorField * tmp2
Definition: dslash_test.cpp:40
double tol
Definition: test_util.cpp:1647
QudaFieldLocation output_location
Definition: quda.h:91
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
QudaPrecision clover_cuda_prec_precondition
Definition: quda.h:203
int printf(const char *,...) __attribute__((__format__(__printf__
void mdw_dslash_5(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *kappa, bool zero_initialize)
double m5
Definition: quda.h:99
void Dslash5inv(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const double &kappa5) const
char latfile[]
Definition: test_util.cpp:1627
QudaVerbosity verbosity
Definition: quda.h:219
void setSpinorSiteSize(int n)
Definition: test_util.cpp:192
const int transfer
Definition: dslash_test.cpp:29
ColorSpinorParam csParam
Definition: pack_test.cpp:24
QudaInvertParam newQudaInvertParam(void)
void mdw_dslash_4_pre(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *b5, double *c5, bool zero_initialize)
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:770
double event_time
QudaCloverFieldOrder clover_order
Definition: quda.h:205
int V
Definition: test_util.cpp:28
enum QudaMatPCType_s QudaMatPCType
DiracDomainWall4DPC * dirac_4dpc
Definition: dslash_test.cpp:46
#define gaugeSiteSize
Definition: test_util.h:6
#define MAX(a, b)
Definition: dslash_test.cpp:24
void * hostGauge[4]
Definition: dslash_test.cpp:42
double cpu_time
#define warningQuda(...)
Definition: util_quda.h:101
void mdw_matpc(void *out, void **gauge, void *in, double *kappa_b, double *kappa_c, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *b5, double *c5)
int niter
Definition: test_util.cpp:1630
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
Definition: complex_quda.h:100
QudaGammaBasis gamma_basis
Definition: quda.h:197
QudaDslashType dslash_type
Definition: test_util.cpp:1626
QudaPrecision cuda_prec_sloppy
Definition: quda.h:45
Dirac * dirac
Definition: dslash_test.cpp:44
void Dslash4pre(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
const char * get_dslash_str(QudaDslashType type)
Definition: misc.cpp:878
bool verify_results
Definition: test_util.cpp:1641
void clover_mat(void *out, void **gauge, void *clover, void *in, double kappa, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
void * hostClover
Definition: dslash_test.cpp:42
void tm_ndeg_matpc(void *outEven1, void *outEven2, void **gauge, void *inEven1, void *inEven2, double kappa, double mu, double epsilon, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
enum QudaDagType_s QudaDagType
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
Definition: quda.h:43
QudaPrecision cuda_prec
Definition: quda.h:42
int X[4]
Definition: quda.h:29
double mass
Definition: quda.h:96
int fprintf(FILE *, const char *,...) __attribute__((__format__(__printf__
QudaTwistFlavorType twist_flavor
Definition: test_util.cpp:1649
void display_test_info()
void * memcpy(void *__dst, const void *__src, size_t __n)
double mass
Definition: test_util.cpp:1642
double mu
Definition: test_util.cpp:1643
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
void clover_dslash(void *out, void **gauge, void *clover, void *in, int parity, int dagger, QudaPrecision precision, QudaGaugeParam &param)
int compute_clover_inverse
Definition: quda.h:215
void Dslash4(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
void tm_ndeg_dslash(void *res1, void *res2, void **gauge, void *spinorField1, void *spinorField2, double kappa, double mu, double epsilon, int oddBit, int daggerBit, QudaMatPCType matpc_type, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaPrecision cuda_prec
Definition: dslash_test.cpp:34
void construct_clover_field(void *clover, double norm, double diag, QudaPrecision precision)
Definition: test_util.cpp:1166
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const =0
double clover_coeff
Definition: test_util.cpp:1645
QudaInvertParam inv_param
Definition: dslash_test.cpp:37
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
double gaugeGiB
Definition: quda.h:60
QudaPrecision prec
Definition: test_util.cpp:1615
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
cpuColorSpinorField * spinorOut
Definition: dslash_test.cpp:39
void tmc_matpc(void *out, void **gauge, void *in, void *clover, void *cInv, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
if(err !=cudaSuccess)
void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
#define printfQuda(...)
Definition: util_quda.h:84
QudaTboundary t_boundary
Definition: quda.h:38
QudaTwistFlavorType twist_flavor
Definition: quda.h:108
int Vh
Definition: test_util.cpp:29
void dslash_4_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaTwistFlavorType twistFlavor
unsigned long long flops
Definition: blas_quda.cu:42
double cpu_max
void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
int Lsdim
Definition: test_util.cpp:1624
QudaDagType not_dagger
Definition: dslash_test.cpp:68
QudaMatPCType matpc_type
Definition: test_util.cpp:1652
enum QudaDslashType_s QudaDslashType
void setKernelPackT(bool pack)
Definition: dslash_quda.cu:59
void usage(char **)
Definition: test_util.cpp:1693
ColorSpinorField * tmp2
Definition: dirac_quda.h:41
QudaReconstructType link_recon
Definition: test_util.cpp:1612
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void init(int argc, char **argv)
Definition: dslash_test.cpp:84
enum QudaVerbosity_s QudaVerbosity
void dslashRef()
int compute_clover
Definition: quda.h:214
double epsilon
Definition: quda.h:106
void wil_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
static Dirac * create(const DiracParam &param)
Definition: dirac.cpp:142
int zdim
Definition: test_util.cpp:1622
void Dslash5inv(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
void read_gauge_field(const char *filename, void *gauge[], QudaPrecision prec, const int *X, int argc, char *argv[])
Definition: qio_field.h:12
static __inline__ size_t size_t d
virtual void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const =0
QudaPrecision clover_cpu_prec
Definition: quda.h:200
cudaColorSpinorField * cudaSpinor
Definition: dslash_test.cpp:40
void M(ColorSpinorField &out, const ColorSpinorField &in) const
void * hostCloverInv
Definition: dslash_test.cpp:42
void initComms(int argc, char **argv, const int *commDims)
Definition: test_util.cpp:72
const QudaParity parity
Definition: dslash_test.cpp:28
void setVerbosity(const QudaVerbosity verbosity)
Definition: util_quda.cpp:24
QudaMatPCType matpc_type
Definition: quda.h:183
void dslashQuda_mdwf(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int test_type)
void dslash_5_inv(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *kappa)
ColorSpinorField * tmp1
Definition: dirac_quda.h:40
double kappa5
Definition: dslash_test.cpp:31
QudaPrecision cpu_prec
Definition: quda.h:40
QudaGaugeParam newQudaGaugeParam(void)
double clover_coeff
Definition: quda.h:208
void comm_barrier(void)
Definition: comm_mpi.cpp:328
enum QudaTwistFlavorType_s QudaTwistFlavorType