QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
dslash_test.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 
6 #include <quda.h>
7 #include <quda_internal.h>
8 #include <dirac_quda.h>
9 #include <dslash_quda.h>
10 #include <invert_quda.h>
11 #include <util_quda.h>
12 #include <blas_quda.h>
13 
14 #include <test_util.h>
15 #include <dslash_util.h>
18 #include "misc.h"
19 
20 #include <qio_field.h>
21 // google test frame work
22 #include <gtest/gtest.h>
23 
24 #define MAX(a,b) ((a)>(b)?(a):(b))
25 
26 using namespace quda;
27 
28 const QudaParity parity = QUDA_EVEN_PARITY; // even or odd?
29 const int transfer = 0; // include transfer time in the benchmark?
30 
31 double kappa5;
32 
35 
38 
41 
43 
44 Dirac *dirac = NULL;
45 
46 // What test are we doing (0 = dslash, 1 = MatPC, 2 = Mat, 3 = MatPCDagMatPC, 4 = MatDagMat)
47 extern int test_type;
48 
49 // Dirac operator type
51 
52 // Twisted mass flavor type
55 
56 extern int device;
57 extern int xdim;
58 extern int ydim;
59 extern int zdim;
60 extern int tdim;
61 extern int Lsdim;
62 extern int gridsize_from_cmdline[];
64 extern QudaPrecision prec;
65 extern QudaDagType dagger;
67 
68 extern bool compute_clover;
69 extern double clover_coeff;
70 
71 extern bool verify_results;
72 extern int niter;
73 extern char latfile[];
74 extern bool unit_gauge;
75 
76 extern double mass; // mass of Dirac operator
77 extern double mu;
78 extern double epsilon;
79 
81 
83 {
84  switch (prec) {
85  case QUDA_QUARTER_PRECISION: return 1e-1;
86  case QUDA_HALF_PRECISION: return 1e-3;
87  case QUDA_SINGLE_PRECISION: return 1e-4;
88  case QUDA_DOUBLE_PRECISION: return 1e-11;
89  case QUDA_INVALID_PRECISION: return 1.0;
90  }
91  return 1.0;
92 }
93 
94 void init(int argc, char **argv) {
95 
96  cuda_prec = prec;
97 
98  gauge_param = newQudaGaugeParam();
99  inv_param = newQudaInvertParam();
100 
101  gauge_param.X[0] = xdim;
102  gauge_param.X[1] = ydim;
103  gauge_param.X[2] = zdim;
104  gauge_param.X[3] = tdim;
105 
107  errorQuda("Asqtad not supported. Please try staggered_dslash_test instead");
108  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH ||
111  dw_setDims(gauge_param.X, Lsdim);
112  } else {
113  setDims(gauge_param.X);
114  Ls = 1;
115  }
116 
117  setSpinorSiteSize(24);
118 
119  gauge_param.anisotropy = 1.0;
120 
121  gauge_param.type = QUDA_WILSON_LINKS;
122  gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
123  gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
124 
125  gauge_param.cpu_prec = cpu_prec;
126  gauge_param.cuda_prec = cuda_prec;
127  gauge_param.reconstruct = link_recon;
128  gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
129 
130  inv_param.kappa = 0.1;
131 
133  inv_param.epsilon = epsilon;
134  inv_param.twist_flavor = twist_flavor;
135  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH ||
137  inv_param.m5 = -1.5;
138  kappa5 = 0.5/(5 + inv_param.m5);
139  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH ) {
140  inv_param.m5 = -1.5;
141  kappa5 = 0.5/(5 + inv_param.m5);
142  for(int k = 0; k < Lsdim; k++)
143  {
144  // b5[k], c[k] values are chosen for arbitrary values,
145  // but the difference of them are same as 1.0
146  inv_param.b_5[k] = 1.50; // + 0.5*k;
147  inv_param.c_5[k] = 0.50; // - 0.5*k;
148  }
149  }
150 
151  inv_param.mu = mu;
152  inv_param.mass = mass;
153  inv_param.Ls = (inv_param.twist_flavor != QUDA_TWIST_NONDEG_DOUBLET) ? Ls : 2;
154 
156  inv_param.matpc_type = matpc_type;
157  inv_param.dagger = dagger;
158  not_dagger = (QudaDagType)((dagger + 1)%2);
159 
160  inv_param.cpu_prec = cpu_prec;
161  if (inv_param.cpu_prec != gauge_param.cpu_prec) {
162  errorQuda("Gauge and spinor CPU precisions must match");
163  }
164  inv_param.cuda_prec = cuda_prec;
165 
168 
169 #ifndef MULTI_GPU // free parameter for single GPU
170  gauge_param.ga_pad = 0;
171 #else // must be this one c/b face for multi gpu
172  int x_face_size = gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2;
173  int y_face_size = gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2;
174  int z_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2;
175  int t_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2;
176  int pad_size =MAX(x_face_size, y_face_size);
177  pad_size = MAX(pad_size, z_face_size);
178  pad_size = MAX(pad_size, t_face_size);
179  gauge_param.ga_pad = pad_size;
180 #endif
181  inv_param.sp_pad = 0;
182  inv_param.cl_pad = 0;
183 
184  //inv_param.sp_pad = xdim*ydim*zdim/2;
185  //inv_param.cl_pad = 24*24*24;
186 
187  inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; // test code only supports DeGrand-Rossi Basis
188  inv_param.dirac_order = QUDA_DIRAC_ORDER;
189 
191  switch(test_type) {
192  case 0:
193  case 1:
194  case 2:
195  case 3:
197  break;
198  case 4: inv_param.solution_type = QUDA_MAT_SOLUTION; break;
199  case 5: inv_param.solution_type = QUDA_MATPCDAG_MATPC_SOLUTION; break;
200  case 6: inv_param.solution_type = QUDA_MATDAG_MAT_SOLUTION; break;
201  default:
202  errorQuda("Test type %d not defined QUDA_DOMAIN_WALL_4D_DSLASH\n", test_type);
203  }
204  } else if(dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
205  switch(test_type) {
206  case 0:
207  case 1:
208  case 2:
209  case 3:
210  case 4:
212  break;
213  case 5: inv_param.solution_type = QUDA_MAT_SOLUTION; break;
214  case 6: inv_param.solution_type = QUDA_MATPCDAG_MATPC_SOLUTION; break;
215  case 7: inv_param.solution_type = QUDA_MATDAG_MAT_SOLUTION; break;
216  default:
217  errorQuda("Test type %d not defined on QUDA_MOBIUS_DWF_DSLASH\n", test_type);
218  }
219  }
220  else
221  {
222  switch(test_type) {
223  case 0:
224  case 1:
226  break;
227  case 2:
228  inv_param.solution_type = QUDA_MAT_SOLUTION;
229  break;
230  case 3:
232  break;
233  case 4:
235  break;
236  default:
237  errorQuda("Test type %d not defined\n", test_type);
238  }
239  }
240 
241  inv_param.dslash_type = dslash_type;
242 
244  inv_param.clover_cpu_prec = cpu_prec;
245  inv_param.clover_cuda_prec = cuda_prec;
247  inv_param.clover_coeff = clover_coeff;
248  hostClover = malloc((size_t)V*cloverSiteSize*inv_param.clover_cpu_prec);
249  hostCloverInv = malloc((size_t)V*cloverSiteSize*inv_param.clover_cpu_prec);
250  }
251 
252  // construct input fields
253  for (int dir = 0; dir < 4; dir++) hostGauge[dir] = malloc((size_t)V*gaugeSiteSize*gauge_param.cpu_prec);
254 
256 
257  csParam.nColor = 3;
258  csParam.nSpin = 4;
259  csParam.nDim = 4;
260  for (int d=0; d<4; d++) csParam.x[d] = gauge_param.X[d];
264  csParam.nDim = 5;
265  csParam.x[4] = Ls;
266  }
268  csParam.pc_type = QUDA_5D_PC;
269  } else {
270  csParam.pc_type = QUDA_4D_PC;
271  }
272 
273 //ndeg_tm
275  csParam.twistFlavor = inv_param.twist_flavor;
276  csParam.nDim = (inv_param.twist_flavor == QUDA_TWIST_SINGLET) ? 4 : 5;
277  csParam.x[4] = inv_param.Ls;
278  }
279 
280  csParam.setPrecision(inv_param.cpu_prec);
281  csParam.pad = 0;
282 
283  if (inv_param.solution_type == QUDA_MAT_SOLUTION || inv_param.solution_type == QUDA_MATDAG_MAT_SOLUTION) {
285  } else {
287  csParam.x[0] /= 2;
288  }
289 
292  csParam.gammaBasis = inv_param.gamma_basis;
293  csParam.create = QUDA_ZERO_FIELD_CREATE;
294 
295  spinor = new cpuColorSpinorField(csParam);
296  spinorOut = new cpuColorSpinorField(csParam);
297  spinorRef = new cpuColorSpinorField(csParam);
298  spinorTmp = new cpuColorSpinorField(csParam);
299 
300  csParam.x[0] = gauge_param.X[0];
301 
302  printfQuda("Randomizing fields... ");
303 
304  if (strcmp(latfile,"")) { // load in the command line supplied gauge field
305  read_gauge_field(latfile, hostGauge, gauge_param.cpu_prec, gauge_param.X, argc, argv);
306  construct_gauge_field(hostGauge, 2, gauge_param.cpu_prec, &gauge_param);
307  } else { // else generate an SU(3) field
308  if (unit_gauge) {
309  // unit SU(3) field
310  construct_gauge_field(hostGauge, 0, gauge_param.cpu_prec, &gauge_param);
311  } else {
312  // random SU(3) field
313  construct_gauge_field(hostGauge, 1, gauge_param.cpu_prec, &gauge_param);
314  }
315  }
316 
317  spinor->Source(QUDA_RANDOM_SOURCE, 0);
318 
320  double norm = 0.1; // clover components are random numbers in the range (-norm, norm)
321  double diag = 1.0; // constant added to the diagonal
322  construct_clover_field(hostClover, norm, diag, inv_param.clover_cpu_prec);
323  memcpy(hostCloverInv, hostClover, (size_t)V*cloverSiteSize*inv_param.clover_cpu_prec);
324  }
325 
326  printfQuda("done.\n"); fflush(stdout);
327 
328  initQuda(device);
329 
330  // set verbosity prior to loadGaugeQuda
332  inv_param.verbosity = verbosity;
333 
334  printfQuda("Sending gauge field to GPU\n");
335  loadGaugeQuda(hostGauge, &gauge_param);
336 
338  if (compute_clover) printfQuda("Computing clover field on GPU\n");
339  else printfQuda("Sending clover field to GPU\n");
340  inv_param.compute_clover = compute_clover;
341  inv_param.return_clover = compute_clover;
344  inv_param.return_clover_inverse = true;
345 
347  }
348 
349  if (!transfer) {
351  csParam.pad = inv_param.sp_pad;
352  csParam.setPrecision(inv_param.cuda_prec);
353  if (csParam.Precision() == QUDA_DOUBLE_PRECISION ) {
355  } else {
356  /* Single and half */
358  }
359 
360  if (inv_param.solution_type == QUDA_MAT_SOLUTION || inv_param.solution_type == QUDA_MATDAG_MAT_SOLUTION) {
362  } else {
364  csParam.x[0] /= 2;
365  }
366 
367  printfQuda("Creating cudaSpinor with nParity = %d\n", csParam.siteSubset);
368  cudaSpinor = new cudaColorSpinorField(csParam);
369  printfQuda("Creating cudaSpinorOut with nParity = %d\n", csParam.siteSubset);
370  cudaSpinorOut = new cudaColorSpinorField(csParam);
371 
372  tmp1 = new cudaColorSpinorField(csParam);
373 
374  if (inv_param.solution_type == QUDA_MAT_SOLUTION || inv_param.solution_type == QUDA_MATDAG_MAT_SOLUTION) {
375  csParam.x[0] /= 2;
376  }
377 
379  tmp2 = new cudaColorSpinorField(csParam);
380 
381  printfQuda("Sending spinor field to GPU\n");
382  *cudaSpinor = *spinor;
383 
384  double cpu_norm = blas::norm2(*spinor);
385  double cuda_norm = blas::norm2(*cudaSpinor);
386  printfQuda("Source: CPU = %e, CUDA = %e\n", cpu_norm, cuda_norm);
387 
388  bool pc;
390  pc = (test_type != 4 && test_type != 6);
392  pc = (test_type != 5 && test_type != 7);
393  else
394  pc = (test_type != 2 && test_type != 4);
395 
396  DiracParam diracParam;
397  setDiracParam(diracParam, &inv_param, pc);
398  diracParam.tmp1 = tmp1;
399  diracParam.tmp2 = tmp2;
400  dirac = Dirac::create(diracParam);
401 
402  } else {
403  double cpu_norm = blas::norm2(*spinor);
404  printfQuda("Source: CPU = %e\n", cpu_norm);
405  }
406 
407 }
408 
409 void end() {
410  if (!transfer) {
411  if(dirac != NULL)
412  {
413  delete dirac;
414  dirac = NULL;
415  }
416  delete cudaSpinor;
417  delete cudaSpinorOut;
418  delete tmp1;
419  delete tmp2;
420  }
421 
422  // release memory
423  delete spinor;
424  delete spinorOut;
425  delete spinorRef;
426  delete spinorTmp;
427 
428  for (int dir = 0; dir < 4; dir++) free(hostGauge[dir]);
430  free(hostClover);
431  free(hostCloverInv);
432  }
433  endQuda();
434 
435 }
436 
437 struct DslashTime {
438  double event_time;
439  double cpu_time;
440  double cpu_min;
441  double cpu_max;
442 
443  DslashTime() : event_time(0.0), cpu_time(0.0), cpu_min(DBL_MAX), cpu_max(0.0) {}
444 };
445 
446 // execute kernel
448 
449  DslashTime dslash_time;
450  timeval tstart, tstop;
451 
452  cudaEvent_t start, end;
453  cudaEventCreate(&start);
454  cudaEventCreate(&end);
455 
456  comm_barrier();
457  cudaEventRecord(start, 0);
458 
459  for (int i = 0; i < niter; i++) {
460 
461  gettimeofday(&tstart, NULL);
462 
464  switch (test_type) {
465  case 0:
466  if (transfer) {
467  dslashQuda_4dpc(spinorOut->V(), spinor->V(), &inv_param, parity, test_type);
468  } else {
469  static_cast<DiracDomainWall4DPC *>(dirac)->Dslash4(*cudaSpinorOut, *cudaSpinor, parity);
470  }
471  break;
472  case 1:
473  if (transfer) {
474  dslashQuda_4dpc(spinorOut->V(), spinor->V(), &inv_param, parity, test_type);
475  } else {
476  static_cast<DiracDomainWall4DPC *>(dirac)->Dslash5(*cudaSpinorOut, *cudaSpinor, parity);
477  }
478  break;
479  case 2:
480  if (transfer) {
481  dslashQuda_4dpc(spinorOut->V(), spinor->V(), &inv_param, parity, test_type);
482  } else {
483  static_cast<DiracDomainWall4DPC *>(dirac)->Dslash5inv(*cudaSpinorOut, *cudaSpinor, parity, kappa5);
484  }
485  break;
486  case 3:
487  case 4:
488  if (transfer) {
489  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
490  } else {
491  dirac->M(*cudaSpinorOut, *cudaSpinor);
492  }
493  break;
494  case 5:
495  case 6:
496  if (transfer) {
497  MatDagMatQuda(spinorOut->V(), spinor->V(), &inv_param);
498  } else {
499  dirac->MdagM(*cudaSpinorOut, *cudaSpinor);
500  }
501  break;
502  }
503  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
504  switch (test_type) {
505  case 0:
506  if (transfer) {
507  dslashQuda_mdwf(spinorOut->V(), spinor->V(), &inv_param, parity, test_type);
508  } else {
509  static_cast<DiracMobiusPC *>(dirac)->Dslash4(*cudaSpinorOut, *cudaSpinor, parity);
510  }
511  break;
512  case 1:
513  if (transfer) {
514  dslashQuda_mdwf(spinorOut->V(), spinor->V(), &inv_param, parity, test_type);
515  } else {
516  static_cast<DiracMobiusPC *>(dirac)->Dslash5(*cudaSpinorOut, *cudaSpinor, parity);
517  }
518  break;
519  case 2:
520  if (transfer) {
521  dslashQuda_mdwf(spinorOut->V(), spinor->V(), &inv_param, parity, test_type);
522  } else {
523  static_cast<DiracMobiusPC *>(dirac)->Dslash4pre(*cudaSpinorOut, *cudaSpinor, parity);
524  }
525  break;
526  case 3:
527  if (transfer) {
528  dslashQuda_mdwf(spinorOut->V(), spinor->V(), &inv_param, parity, test_type);
529  } else {
530  static_cast<DiracMobiusPC *>(dirac)->Dslash5inv(*cudaSpinorOut, *cudaSpinor, parity);
531  }
532  break;
533  case 4:
534  case 5:
535  if (transfer) {
536  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
537  } else {
538  dirac->M(*cudaSpinorOut, *cudaSpinor);
539  }
540  break;
541  case 6:
542  case 7:
543  if (transfer) {
544  MatDagMatQuda(spinorOut->V(), spinor->V(), &inv_param);
545  } else {
546  dirac->MdagM(*cudaSpinorOut, *cudaSpinor);
547  }
548  break;
549  }
550  } else {
551  switch (test_type) {
552  case 0:
554  if (transfer) {
555  dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity);
556  } else {
557  dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
558  }
559  } else {
560  if (transfer) {
561  dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity);
562  } else {
563  dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
564  }
565  }
566  break;
567  case 1:
568  case 2:
569  if (transfer) {
570  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
571  } else {
572  dirac->M(*cudaSpinorOut, *cudaSpinor);
573  }
574  break;
575  case 3:
576  case 4:
577  if (transfer) {
578  MatDagMatQuda(spinorOut->V(), spinor->V(), &inv_param);
579  } else {
580  dirac->MdagM(*cudaSpinorOut, *cudaSpinor);
581  }
582  break;
583  }
584  }
585 
586  gettimeofday(&tstop, NULL);
587  long ds = tstop.tv_sec - tstart.tv_sec;
588  long dus = tstop.tv_usec - tstart.tv_usec;
589  double elapsed = ds + 0.000001*dus;
590 
591  dslash_time.cpu_time += elapsed;
592  // skip first and last iterations since they may skew these metrics if comms are not synchronous
593  if (i>0 && i<niter) {
594  if (elapsed < dslash_time.cpu_min) dslash_time.cpu_min = elapsed;
595  if (elapsed > dslash_time.cpu_max) dslash_time.cpu_max = elapsed;
596  }
597  }
598 
599  cudaEventRecord(end, 0);
600  cudaEventSynchronize(end);
601  float runTime;
602  cudaEventElapsedTime(&runTime, start, end);
603  cudaEventDestroy(start);
604  cudaEventDestroy(end);
605 
606  dslash_time.event_time = runTime / 1000;
607 
608  // check for errors
609  cudaError_t stat = cudaGetLastError();
610  if (stat != cudaSuccess)
611  printfQuda("with ERROR: %s\n", cudaGetErrorString(stat));
612 
613  return dslash_time;
614 }
615 
616 void dslashRef() {
617 
618  // compare to dslash reference implementation
619  printfQuda("Calculating reference implementation...");
620  fflush(stdout);
621 
623  switch (test_type) {
624  case 0:
625  wil_dslash(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, inv_param.cpu_prec, gauge_param);
626  break;
627  case 1:
628  wil_matpc(spinorRef->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.matpc_type, dagger,
629  inv_param.cpu_prec, gauge_param);
630  break;
631  case 2:
632  wil_mat(spinorRef->V(), hostGauge, spinor->V(), inv_param.kappa, dagger, inv_param.cpu_prec, gauge_param);
633  break;
634  case 3:
635  wil_matpc(spinorTmp->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.matpc_type, dagger,
636  inv_param.cpu_prec, gauge_param);
637  wil_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), inv_param.kappa, inv_param.matpc_type, not_dagger,
638  inv_param.cpu_prec, gauge_param);
639  break;
640  case 4:
641  wil_mat(spinorTmp->V(), hostGauge, spinor->V(), inv_param.kappa, dagger, inv_param.cpu_prec, gauge_param);
642  wil_mat(spinorRef->V(), hostGauge, spinorTmp->V(), inv_param.kappa, not_dagger, inv_param.cpu_prec, gauge_param);
643  break;
644  default:
645  printfQuda("Test type not defined\n");
646  exit(-1);
647  }
648  } else if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
649  switch (test_type) {
650  case 0:
651  clover_dslash(spinorRef->V(), hostGauge, hostCloverInv, spinor->V(), parity, dagger, inv_param.cpu_prec, gauge_param);
652  break;
653  case 1:
654  clover_matpc(spinorRef->V(), hostGauge, hostClover, hostCloverInv, spinor->V(), inv_param.kappa, inv_param.matpc_type,
655  dagger, inv_param.cpu_prec, gauge_param);
656  break;
657  case 2:
658  clover_mat(spinorRef->V(), hostGauge, hostClover, spinor->V(), inv_param.kappa, dagger, inv_param.cpu_prec, gauge_param);
659  break;
660  case 3:
661  clover_matpc(spinorTmp->V(), hostGauge, hostClover, hostCloverInv, spinor->V(), inv_param.kappa, inv_param.matpc_type,
662  dagger, inv_param.cpu_prec, gauge_param);
663  clover_matpc(spinorRef->V(), hostGauge, hostClover, hostCloverInv, spinorTmp->V(), inv_param.kappa, inv_param.matpc_type,
664  not_dagger, inv_param.cpu_prec, gauge_param);
665  break;
666  case 4:
667  clover_mat(spinorTmp->V(), hostGauge, hostClover, spinor->V(), inv_param.kappa, dagger, inv_param.cpu_prec, gauge_param);
668  clover_mat(spinorRef->V(), hostGauge, hostClover, spinorTmp->V(), inv_param.kappa, not_dagger,
669  inv_param.cpu_prec, gauge_param);
670  break;
671  default:
672  printfQuda("Test type not defined\n");
673  exit(-1);
674  }
675  } else if (dslash_type == QUDA_TWISTED_MASS_DSLASH) {
676  switch (test_type) {
677  case 0:
678  if(inv_param.twist_flavor == QUDA_TWIST_SINGLET)
679  tm_dslash(spinorRef->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor, parity,
680  inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
681  else
682  {
683  int tm_offset = 12*spinorRef->Volume();
684 
685  void *ref1 = spinorRef->V();
686  void *ref2 = (char*)ref1 + tm_offset*cpu_prec;
687 
688  void *flv1 = spinor->V();
689  void *flv2 = (char*)flv1 + tm_offset*cpu_prec;
690 
691  tm_ndeg_dslash(ref1, ref2, hostGauge, flv1, flv2, inv_param.kappa, inv_param.mu, inv_param.epsilon,
692  parity, dagger, inv_param.matpc_type, inv_param.cpu_prec, gauge_param);
693  }
694  break;
695  case 1:
696  if(inv_param.twist_flavor == QUDA_TWIST_SINGLET)
697  tm_matpc(spinorRef->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor, inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
698  else
699  {
700  int tm_offset = 12*spinorRef->Volume();
701 
702  void *ref1 = spinorRef->V();
703  void *ref2 = (char*)ref1 + tm_offset*cpu_prec;
704 
705  void *flv1 = spinor->V();
706  void *flv2 = (char*)flv1 + tm_offset*cpu_prec;
707 
708  tm_ndeg_matpc(ref1, ref2, hostGauge, flv1, flv2, inv_param.kappa, inv_param.mu, inv_param.epsilon, inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
709  }
710  break;
711  case 2:
712  if(inv_param.twist_flavor == QUDA_TWIST_SINGLET)
713  tm_mat(spinorRef->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor, dagger, inv_param.cpu_prec, gauge_param);
714  else
715  {
716  int tm_offset = 12*spinorRef->Volume();
717 
718  void *evenOut = spinorRef->V();
719  void *oddOut = (char*)evenOut + tm_offset*cpu_prec;
720 
721  void *evenIn = spinor->V();
722  void *oddIn = (char*)evenIn + tm_offset*cpu_prec;
723 
724  tm_ndeg_mat(evenOut, oddOut, hostGauge, evenIn, oddIn, inv_param.kappa, inv_param.mu, inv_param.epsilon, dagger, inv_param.cpu_prec, gauge_param);
725  }
726  break;
727  case 3:
728  if(inv_param.twist_flavor == QUDA_TWIST_SINGLET) {
729  tm_matpc(spinorTmp->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
730  inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
731  tm_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
732  inv_param.matpc_type, not_dagger, inv_param.cpu_prec, gauge_param);
733  }
734  else
735  {
736  int tm_offset = 12*spinorRef->Volume();
737 
738  void *ref1 = spinorRef->V();
739  void *ref2 = (char*)ref1 + tm_offset*cpu_prec;
740 
741  void *flv1 = spinor->V();
742  void *flv2 = (char*)flv1 + tm_offset*cpu_prec;
743 
744  void *tmp1 = spinorTmp->V();
745  void *tmp2 = (char*)tmp1 + tm_offset*cpu_prec;
746 
747  tm_ndeg_matpc(tmp1, tmp2, hostGauge, flv1, flv2, inv_param.kappa, inv_param.mu, inv_param.epsilon, inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
748  tm_ndeg_matpc(ref1, ref2, hostGauge, tmp1, tmp2, inv_param.kappa, inv_param.mu, inv_param.epsilon, inv_param.matpc_type, not_dagger, inv_param.cpu_prec, gauge_param);
749  }
750  break;
751  case 4:
752  if(inv_param.twist_flavor == QUDA_TWIST_SINGLET) {
753  tm_mat(spinorTmp->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
754  dagger, inv_param.cpu_prec, gauge_param);
755  tm_mat(spinorRef->V(), hostGauge, spinorTmp->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
756  not_dagger, inv_param.cpu_prec, gauge_param);
757  }
758  else
759  {
760  int tm_offset = 12*spinorRef->Volume();
761 
762  void *evenOut = spinorRef->V();
763  void *oddOut = (char*)evenOut + tm_offset*cpu_prec;
764 
765  void *evenIn = spinor->V();
766  void *oddIn = (char*)evenIn + tm_offset*cpu_prec;
767 
768  void *evenTmp = spinorTmp->V();
769  void *oddTmp = (char*)evenTmp + tm_offset*cpu_prec;
770 
771  tm_ndeg_mat(evenTmp, oddTmp, hostGauge, evenIn, oddIn, inv_param.kappa, inv_param.mu, inv_param.epsilon, dagger, inv_param.cpu_prec, gauge_param);
772  tm_ndeg_mat(evenOut, oddOut, hostGauge, evenTmp, oddTmp, inv_param.kappa, inv_param.mu, inv_param.epsilon, not_dagger, inv_param.cpu_prec, gauge_param);
773  }
774  break;
775  default:
776  printfQuda("Test type not defined\n");
777  exit(-1);
778  }
779  } else if (dslash_type == QUDA_TWISTED_CLOVER_DSLASH) {
780  switch (test_type) {
781  case 0:
782  if(inv_param.twist_flavor == QUDA_TWIST_SINGLET)
783  tmc_dslash(spinorRef->V(), hostGauge, spinor->V(), hostClover, hostCloverInv, inv_param.kappa, inv_param.mu, inv_param.twist_flavor, parity, inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
784  else
785  errorQuda("Not supported\n");
786  break;
787  case 1:
788  if(inv_param.twist_flavor == QUDA_TWIST_SINGLET)
789  tmc_matpc(spinorRef->V(), hostGauge, spinor->V(), hostClover, hostCloverInv, inv_param.kappa, inv_param.mu, inv_param.twist_flavor, inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
790  else
791  errorQuda("Not supported\n");
792  break;
793  case 2:
794  if(inv_param.twist_flavor == QUDA_TWIST_SINGLET)
795  tmc_mat(spinorRef->V(), hostGauge, hostClover, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor, dagger, inv_param.cpu_prec, gauge_param);
796  else
797  errorQuda("Not supported\n");
798  break;
799  case 3:
800  if(inv_param.twist_flavor == QUDA_TWIST_SINGLET) {
801  tmc_matpc(spinorTmp->V(), hostGauge, spinor->V(), hostClover, hostCloverInv, inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
802  inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param);
803  tmc_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), hostClover, hostCloverInv, inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
804  inv_param.matpc_type, not_dagger, inv_param.cpu_prec, gauge_param);
805  } else
806  errorQuda("Not supported\n");
807  break;
808  case 4:
809  if(inv_param.twist_flavor == QUDA_TWIST_SINGLET) {
810  tmc_mat(spinorTmp->V(), hostGauge, hostClover, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor, dagger, inv_param.cpu_prec, gauge_param);
811  tmc_mat(spinorRef->V(), hostGauge, hostClover, spinorTmp->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor, not_dagger, inv_param.cpu_prec, gauge_param);
812  } else
813  errorQuda("Not supported\n");
814  break;
815  default:
816  printfQuda("Test type not defined\n");
817  exit(-1);
818  }
819  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH ){
820  switch (test_type) {
821  case 0:
822  dw_dslash(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
823  break;
824  case 1:
825  dw_matpc(spinorRef->V(), hostGauge, spinor->V(), kappa5, inv_param.matpc_type, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
826  break;
827  case 2:
828  dw_mat(spinorRef->V(), hostGauge, spinor->V(), kappa5, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
829  break;
830  case 3:
831  dw_matpc(spinorTmp->V(), hostGauge, spinor->V(), kappa5, inv_param.matpc_type, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
832  dw_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), kappa5, inv_param.matpc_type, not_dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
833  break;
834  case 4:
835  dw_matdagmat(spinorRef->V(), hostGauge, spinor->V(), kappa5, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
836  break;
837  default:
838  printf("Test type not supported for domain wall\n");
839  exit(-1);
840  }
842  double *kappa_5 = (double*)malloc(Ls*sizeof(double));
843  for(int xs = 0; xs < Ls ; xs++)
844  kappa_5[xs] = kappa5;
845  switch (test_type) {
846  case 0:
847  dslash_4_4d(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
848  break;
849  case 1:
850  dw_dslash_5_4d(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass, true);
851  break;
852  case 2:
853  dslash_5_inv(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass, kappa_5);
854  break;
855  case 3:
856  dw_4d_matpc(spinorRef->V(), hostGauge, spinor->V(), kappa5, inv_param.matpc_type, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
857  break;
858  case 4:
859  dw_4d_mat(
860  spinorRef->V(), hostGauge, spinor->V(), kappa5, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
861  break;
862  case 5:
863  dw_4d_matpc(spinorTmp->V(), hostGauge, spinor->V(), kappa5, inv_param.matpc_type, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
864  dw_4d_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), kappa5, inv_param.matpc_type, not_dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
865  break;
866  case 6:
867  dw_4d_mat(
868  spinorTmp->V(), hostGauge, spinor->V(), kappa5, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
869  dw_4d_mat(spinorRef->V(), hostGauge, spinorTmp->V(), kappa5, not_dagger, gauge_param.cpu_prec, gauge_param,
870  inv_param.mass);
871  break;
872  default:
873  printf("Test type not supported for domain wall\n");
874  exit(-1);
875  }
876  free(kappa_5);
877  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH){
878  double _Complex *kappa_b = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
879  double _Complex *kappa_c = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
880  double _Complex *kappa_5 = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
881  double _Complex *kappa_mdwf = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
882  for(int xs = 0 ; xs < Lsdim ; xs++)
883  {
884  kappa_b[xs] = 1.0/(2*(inv_param.b_5[xs]*(4.0 + inv_param.m5) + 1.0));
885  kappa_c[xs] = 1.0/(2*(inv_param.c_5[xs]*(4.0 + inv_param.m5) - 1.0));
886  kappa_5[xs] = 0.5*kappa_b[xs]/kappa_c[xs];
887  kappa_mdwf[xs] = -kappa_5[xs];
888  }
889  switch (test_type) {
890  case 0:
891  dslash_4_4d(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass);
892  break;
893  case 1:
894  mdw_dslash_5(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass, kappa_5, true);
895  break;
896  case 2:
897  mdw_dslash_4_pre(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass, inv_param.b_5, inv_param.c_5, true);
898  break;
899  case 3:
900  mdw_dslash_5_inv(spinorRef->V(), hostGauge, spinor->V(), parity, dagger, gauge_param.cpu_prec, gauge_param,
901  inv_param.mass, kappa_mdwf);
902  break;
903  case 4:
904  mdw_matpc(spinorRef->V(), hostGauge, spinor->V(), kappa_b, kappa_c, inv_param.matpc_type, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass, inv_param.b_5, inv_param.c_5);
905  break;
906  case 5:
907  mdw_mat(spinorRef->V(), hostGauge, spinor->V(), kappa_b, kappa_c, dagger, gauge_param.cpu_prec, gauge_param,
908  inv_param.mass, inv_param.b_5, inv_param.c_5);
909  break;
910  case 6:
911  mdw_matpc(spinorTmp->V(), hostGauge, spinor->V(), kappa_b, kappa_c, inv_param.matpc_type, dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass, inv_param.b_5, inv_param.c_5);
912  mdw_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), kappa_b, kappa_c, inv_param.matpc_type, not_dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass, inv_param.b_5, inv_param.c_5);
913  break;
914  case 7:
915  mdw_mat(spinorTmp->V(), hostGauge, spinor->V(), kappa_b, kappa_c, dagger, gauge_param.cpu_prec, gauge_param,
916  inv_param.mass, inv_param.b_5, inv_param.c_5);
917  mdw_mat(spinorRef->V(), hostGauge, spinorTmp->V(), kappa_b, kappa_c, not_dagger, gauge_param.cpu_prec,
918  gauge_param, inv_param.mass, inv_param.b_5, inv_param.c_5);
919  break;
920  default:
921  printf("Test type not supported for domain wall\n");
922  exit(-1);
923  }
924  free(kappa_b);
925  free(kappa_c);
926  free(kappa_5);
927  free(kappa_mdwf);
928  } else {
929  printfQuda("Unsupported dslash_type\n");
930  exit(-1);
931  }
932 
933  printfQuda("done.\n");
934 }
935 
936 
938 {
939  printfQuda("running the following test:\n");
940 
941  printfQuda("prec recon test_type matpc_type dagger S_dim T_dimension Ls_dimension dslash_type niter\n");
942  printfQuda("%6s %2s %d %12s %d %3d/%3d/%3d %3d %2d %14s %d\n",
946  printfQuda("Grid partition info: X Y Z T\n");
947  printfQuda(" %d %d %d %d\n",
948  dimPartitioned(0),
949  dimPartitioned(1),
950  dimPartitioned(2),
951  dimPartitioned(3));
952 
953  return ;
954 
955 }
956 
957 extern void usage(char**);
958 
959 TEST(dslash, verify) {
960  double deviation = pow(10, -(double)(cpuColorSpinorField::Compare(*spinorRef, *spinorOut)));
961  double tol = getTolerance(inv_param.cuda_prec);
962  if (gauge_param.reconstruct == QUDA_RECONSTRUCT_8) tol *= 10; // if recon 8, we tolerate a greater deviation
963 
964  ASSERT_LE(deviation, tol) << "CPU and CUDA implementations do not agree";
965 }
966 
967 int main(int argc, char **argv)
968 {
969  // initalize google test, includes command line options
970  ::testing::InitGoogleTest(&argc, argv);
971 
972  // return code for google test
973  int test_rc = 0;
974  for (int i =1;i < argc; i++) {
975  if(process_command_line_option(argc, argv, &i) == 0){
976  continue;
977  }
978 
979  fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
980  usage(argv);
981  }
982 
983  initComms(argc, argv, gridsize_from_cmdline);
984 
986 
987  init(argc, argv);
988 
989  int attempts = 1;
990  dslashRef();
991  for (int i=0; i<attempts; i++) {
992 
993  {
994  printfQuda("Tuning...\n");
995  dslashCUDA(1); // warm-up run
996  }
997  printfQuda("Executing %d kernel loops...\n", niter);
998  if (!transfer) dirac->Flops();
999  DslashTime dslash_time = dslashCUDA(niter);
1000  printfQuda("done.\n\n");
1001 
1002  if (!transfer) *spinorOut = *cudaSpinorOut;
1003 
1004  // print timing information
1005  printfQuda("%fus per kernel call\n", 1e6*dslash_time.event_time / niter);
1006  //FIXME No flops count for twisted-clover yet
1007  unsigned long long flops = 0;
1008  if (!transfer) flops = dirac->Flops();
1009  printfQuda(
1010  "%llu flops per kernel call, %llu flops per site\n", flops / niter, (flops / niter) / cudaSpinor->Volume());
1011  printfQuda("GFLOPS = %f\n", 1.0e-9*flops/dslash_time.event_time);
1012 
1013  printfQuda("Effective halo bi-directional bandwidth (GB/s) GPU = %f ( CPU = %f, min = %f , max = %f ) for aggregate message size %lu bytes\n",
1014  1.0e-9*2*cudaSpinor->GhostBytes()*niter/dslash_time.event_time, 1.0e-9*2*cudaSpinor->GhostBytes()*niter/dslash_time.cpu_time,
1015  1.0e-9*2*cudaSpinor->GhostBytes()/dslash_time.cpu_max, 1.0e-9*2*cudaSpinor->GhostBytes()/dslash_time.cpu_min,
1016  2*cudaSpinor->GhostBytes());
1017 
1018  double norm2_cpu = blas::norm2(*spinorRef);
1019  double norm2_cpu_cuda = blas::norm2(*spinorOut);
1020  if (!transfer) {
1021  double norm2_cuda= blas::norm2(*cudaSpinorOut);
1022  printfQuda("Results: CPU = %f, CUDA=%f, CPU-CUDA = %f\n", norm2_cpu, norm2_cuda, norm2_cpu_cuda);
1023  } else {
1024  printfQuda("Result: CPU = %f, CPU-QUDA = %f\n", norm2_cpu, norm2_cpu_cuda);
1025  }
1026 
1027  if (verify_results) {
1028  ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
1029  if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
1030 
1031  test_rc = RUN_ALL_TESTS();
1032  if (test_rc != 0) warningQuda("Tests failed");
1033  }
1034  }
1035  end();
1036 
1037  finalizeComms();
1038  return test_rc;
1039 }
QudaGaugeParam gauge_param
Definition: dslash_test.cpp:36
cudaColorSpinorField * cudaSpinorOut
Definition: dslash_test.cpp:40
int dimPartitioned(int dim)
Definition: test_util.cpp:1776
QudaDiracFieldOrder dirac_order
Definition: quda.h:219
int comm_rank(void)
Definition: comm_mpi.cpp:82
double anisotropy
Definition: quda.h:38
double getTolerance(QudaPrecision prec)
Definition: dslash_test.cpp:82
int tdim
Definition: test_util.cpp:1618
void mdw_matpc(void *out, void **gauge, void *in, double _Complex *kappa_b, double _Complex *kappa_c, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *b5, double _Complex *c5)
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
void dw_4d_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void dw_dslash_5_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, bool zero_initialize)
void endQuda(void)
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
Definition: test_util.cpp:1047
cpuColorSpinorField * spinorRef
Definition: dslash_test.cpp:39
QudaSolveType solve_type
Definition: quda.h:205
enum QudaPrecision_s QudaPrecision
int ga_pad
Definition: quda.h:63
double_complex c_5[QUDA_MAX_DWF_LS]
Definition: quda.h:112
void mdw_mat(void *out, void **gauge, void *in, double _Complex *kappa_b, double _Complex *kappa_c, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *b5, double _Complex *c5)
void dw_setDims(int *X, const int L5)
Definition: test_util.cpp:187
cpuColorSpinorField * spinorTmp
Definition: dslash_test.cpp:39
double mu
Definition: quda.h:114
QudaGaugeFixed gauge_fix
Definition: quda.h:61
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_dslash(void *res, void **gaugeFull, void *spinorField, double kappa, double mu, QudaTwistFlavorType flavor, int oddBit, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaLinkType type
Definition: quda.h:42
double kappa
Definition: quda.h:106
void MatDagMatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
int xdim
Definition: test_util.cpp:1615
#define errorQuda(...)
Definition: util_quda.h:121
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:721
QudaDslashType dslash_type
Definition: quda.h:102
QudaPrecision cuda_prec
Definition: quda.h:214
#define cloverSiteSize
Definition: test_util.h:9
int return_clover_inverse
Definition: quda.h:242
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
double cpu_min
void tmc_dslash(void *out, void **gauge, void *in, void *clover, void *cInv, double kappa, double mu, QudaTwistFlavorType flavor, int parity, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &param)
QudaPrecision cpu_prec
Definition: quda.h:213
int ydim
Definition: test_util.cpp:1616
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:2019
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
cudaColorSpinorField * tmp1
Definition: dslash_test.cpp:40
void tm_ndeg_mat(void *evenOut, void *oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void end()
QudaVerbosity verbosity
Definition: test_util.cpp:1614
void clover_matpc(void *out, void **gauge, void *clover, void *clover_inv, void *in, double kappa, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
bool unit_gauge
Definition: test_util.cpp:1624
QudaDagType dagger
Definition: quda.h:207
const char * get_matpc_str(QudaMatPCType type)
Definition: misc.cpp:1121
void finalizeComms()
Definition: test_util.cpp:128
int test_type
Definition: test_util.cpp:1636
void dw_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaGaugeFieldOrder gauge_order
Definition: quda.h:43
void dw_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void tmc_mat(void *out, void **gauge, void *clover, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
cpuColorSpinorField * spinor
Definition: dslash_test.cpp:39
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:701
TEST(dslash, verify)
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
int return_clover
Definition: quda.h:241
QudaPrecision cpu_prec
Definition: dslash_test.cpp:33
unsigned long long Flops() const
Definition: dirac_quda.h:177
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const =0
QudaSiteSubset siteSubset
Definition: lattice_field.h:71
void setDims(int *)
Definition: test_util.cpp:151
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity)
QudaFieldLocation input_location
Definition: quda.h:99
QudaDagType dagger
Definition: test_util.cpp:1620
int gridsize_from_cmdline[]
Definition: test_util.cpp:49
int Ls
Definition: test_util.cpp:38
bool compute_clover
Definition: test_util.cpp:1654
double_complex b_5[QUDA_MAX_DWF_LS]
Definition: quda.h:111
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
QudaSolutionType solution_type
Definition: quda.h:204
int main(int argc, char **argv)
void dslashQuda_4dpc(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int test_type)
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:67
QudaPrecision clover_cuda_prec
Definition: quda.h:225
void mdw_dslash_4_pre(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *b5, double _Complex *c5, bool zero_initialize)
void dw_matdagmat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
DslashTime dslashCUDA(int niter)
void initQuda(int device)
void dw_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
cudaColorSpinorField * tmp2
Definition: dslash_test.cpp:40
double tol
Definition: test_util.cpp:1656
QudaFieldLocation output_location
Definition: quda.h:100
double m5
Definition: quda.h:108
char latfile[]
Definition: test_util.cpp:1623
void mdw_dslash_5_inv(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *kappa)
QudaVerbosity verbosity
Definition: quda.h:244
void setSpinorSiteSize(int n)
Definition: test_util.cpp:211
const int transfer
Definition: dslash_test.cpp:29
ColorSpinorParam csParam
Definition: pack_test.cpp:24
QudaInvertParam newQudaInvertParam(void)
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:768
double event_time
QudaCloverFieldOrder clover_order
Definition: quda.h:230
void dw_4d_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
enum QudaMatPCType_s QudaMatPCType
#define MAX(a, b)
Definition: dslash_test.cpp:24
void * hostGauge[4]
Definition: dslash_test.cpp:42
double cpu_time
#define warningQuda(...)
Definition: util_quda.h:133
int niter
Definition: test_util.cpp:1629
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
Definition: complex_quda.h:111
QudaGammaBasis gamma_basis
Definition: quda.h:221
QudaDslashType dslash_type
Definition: test_util.cpp:1621
Dirac * dirac
Definition: dslash_test.cpp:44
const char * get_dslash_str(QudaDslashType type)
Definition: misc.cpp:910
bool verify_results
Definition: test_util.cpp:1643
void clover_mat(void *out, void **gauge, void *clover, void *in, double kappa, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
void * hostClover
Definition: dslash_test.cpp:42
void tm_ndeg_matpc(void *outEven1, void *outEven2, void **gauge, void *inEven1, void *inEven2, double kappa, double mu, double epsilon, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
enum QudaDagType_s QudaDagType
enum QudaParity_s QudaParity
int device
Definition: test_util.cpp:1602
QudaReconstructType reconstruct
Definition: quda.h:50
QudaPrecision cuda_prec
Definition: quda.h:49
int X[4]
Definition: quda.h:36
double mass
Definition: quda.h:105
QudaTwistFlavorType twist_flavor
Definition: test_util.cpp:1660
void display_test_info()
double mass
Definition: test_util.cpp:1646
int V
Definition: test_util.cpp:27
double mu
Definition: test_util.cpp:1648
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
Perform a component by component comparison of two color-spinor fields. In doing we normalize with re...
void clover_dslash(void *out, void **gauge, void *clover, void *in, int parity, int dagger, QudaPrecision precision, QudaGaugeParam &param)
int compute_clover_inverse
Definition: quda.h:240
void tm_ndeg_dslash(void *res1, void *res2, void **gauge, void *spinorField1, void *spinorField2, double kappa, double mu, double epsilon, int oddBit, int daggerBit, QudaMatPCType matpc_type, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaPrecision cuda_prec
Definition: dslash_test.cpp:34
void construct_clover_field(void *clover, double norm, double diag, QudaPrecision precision)
Definition: test_util.cpp:1167
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const =0
double epsilon
Definition: test_util.cpp:1649
double clover_coeff
Definition: test_util.cpp:1653
QudaInvertParam inv_param
Definition: dslash_test.cpp:37
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void mdw_dslash_5(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *kappa, bool zero_initialize)
QudaPrecision prec
Definition: test_util.cpp:1608
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
cpuColorSpinorField * spinorOut
Definition: dslash_test.cpp:39
QudaPrecision Precision() const
Definition: lattice_field.h:58
void tmc_matpc(void *out, void **gauge, void *in, void *clover, void *cInv, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
#define printfQuda(...)
Definition: util_quda.h:115
QudaTboundary t_boundary
Definition: quda.h:45
QudaTwistFlavorType twist_flavor
Definition: quda.h:117
void dslash_4_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaTwistFlavorType twistFlavor
unsigned long long flops
Definition: blas_quda.cu:22
double cpu_max
int Lsdim
Definition: test_util.cpp:1619
QudaDagType not_dagger
Definition: dslash_test.cpp:66
QudaMatPCType matpc_type
Definition: test_util.cpp:1662
enum QudaDslashType_s QudaDslashType
void usage(char **)
Definition: test_util.cpp:1783
ColorSpinorField * tmp2
Definition: dirac_quda.h:42
QudaReconstructType link_recon
Definition: test_util.cpp:1605
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void init(int argc, char **argv)
Definition: dslash_test.cpp:94
enum QudaVerbosity_s QudaVerbosity
void dslashRef()
int compute_clover
Definition: quda.h:239
double epsilon
Definition: quda.h:115
void wil_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
static Dirac * create(const DiracParam &param)
Definition: dirac.cpp:159
void initComms(int argc, char **argv, int *const commDims)
Definition: test_util.cpp:88
int zdim
Definition: test_util.cpp:1617
void read_gauge_field(const char *filename, void *gauge[], QudaPrecision prec, const int *X, int argc, char *argv[])
Definition: qio_field.h:14
virtual void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const =0
QudaPrecision clover_cpu_prec
Definition: quda.h:224
cudaColorSpinorField * cudaSpinor
Definition: dslash_test.cpp:40
void * hostCloverInv
Definition: dslash_test.cpp:42
const QudaParity parity
Definition: dslash_test.cpp:28
void setVerbosity(QudaVerbosity verbosity)
Definition: util_quda.cpp:25
QudaMatPCType matpc_type
Definition: quda.h:206
void dslashQuda_mdwf(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int test_type)
void dslash_5_inv(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *kappa)
ColorSpinorField * tmp1
Definition: dirac_quda.h:41
double kappa5
Definition: dslash_test.cpp:31
QudaPrecision cpu_prec
Definition: quda.h:47
#define gaugeSiteSize
Definition: face_gauge.cpp:34
QudaGaugeParam newQudaGaugeParam(void)
double clover_coeff
Definition: quda.h:233
void comm_barrier(void)
Definition: comm_mpi.cpp:326
enum QudaTwistFlavorType_s QudaTwistFlavorType