QUDA  0.9.0
staggered_dslash_ctest.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 
6 #include <quda.h>
7 #include <quda_internal.h>
8 #include <dirac_quda.h>
9 #include <dslash_quda.h>
10 #include <invert_quda.h>
11 #include <util_quda.h>
12 #include <blas_quda.h>
13 
14 #include <misc.h>
15 #include <test_util.h>
16 #include <dslash_util.h>
18 #include <gauge_field.h>
19 
20 #include <assert.h>
21 #include <gtest.h>
22 
23 using namespace quda;
24 
25 #define MAX(a,b) ((a)>(b)?(a):(b))
26 #define staggeredSpinorSiteSize 6
27 // What test are we doing (0 = dslash, 1 = MatPC, 2 = Mat)
28 
29 extern void usage(char** argv );
30 
32 
33 extern int test_type;
34 
37 
40 
43 
45 
46 void *hostGauge[4];
47 void *fatlink[4], *longlink[4];
48 
49 #ifdef MULTI_GPU
50 void **ghost_fatlink, **ghost_longlink;
51 #endif
52 
54 extern QudaDagType dagger;
55 int transfer = 0; // include transfer time in the benchmark?
56 extern int xdim;
57 extern int ydim;
58 extern int zdim;
59 extern int tdim;
60 extern int gridsize_from_cmdline[];
61 
62 extern int device;
63 extern bool verify_results;
64 extern int niter;
65 
66 extern bool kernel_pack_t;
67 
68 extern double mass; // the mass of the Dirac operator
69 
70 int X[4];
71 extern int Nsrc; // number of spinors to apply to simultaneously
72 
74 
75 const char *prec_str[] = {"half", "single", "double"};
76 const char *recon_str[] = {"r18", "r13", "r9"};
77 
78 void init(int precision, QudaReconstructType link_recon) {
79 
80  auto prec = precision == 2 ? QUDA_DOUBLE_PRECISION : precision == 1 ? QUDA_SINGLE_PRECISION : QUDA_HALF_PRECISION;
81 
83 
85 
88 
89  gaugeParam.X[0] = X[0] = xdim;
90  gaugeParam.X[1] = X[1] = ydim;
91  gaugeParam.X[2] = X[2] = zdim;
92  gaugeParam.X[3] = X[3] = tdim;
93 
95  dw_setDims(gaugeParam.X,Nsrc); // so we can use 5-d indexing from dwf
97 
103 
104  // ensure that the default is improved staggered
108 
109  gaugeParam.anisotropy = 1.0;
115  gaugeParam.gaugeGiB = 0;
116 
124  inv_param.mass = mass;
125 
126  // ensure that the default is improved staggered
130 
133 
134  int tmpint = MAX(X[1]*X[2]*X[3], X[0]*X[2]*X[3]);
135  tmpint = MAX(tmpint, X[0]*X[1]*X[3]);
136  tmpint = MAX(tmpint, X[0]*X[1]*X[2]);
137 
138 
139  gaugeParam.ga_pad = tmpint;
140  inv_param.sp_pad = tmpint;
141 
143  csParam.nColor=3;
144  csParam.nSpin=1;
145  csParam.nDim=5;
146  for(int d = 0; d < 4; d++) {
147  csParam.x[d] = gaugeParam.X[d];
148  }
149  csParam.x[4] = Nsrc; // number of sources becomes the fifth dimension
150 
152  csParam.pad = 0;
153  if (test_type < 2) {
156  csParam.x[0] /= 2;
157  } else {
160  }
161 
164  csParam.gammaBasis = inv_param.gamma_basis; // this parameter is meaningless for staggered
166 
171 
173  csParam.x[0] = gaugeParam.X[0];
174 
175  // printfQuda("Randomizing fields ...\n");
176 
178 
179  size_t gSize = (gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
180 
181  for (int dir = 0; dir < 4; dir++) {
184 
185  if (fatlink[dir] == NULL || longlink[dir] == NULL){
186  errorQuda("ERROR: malloc failed for fatlink/longlink");
187  }
188  }
189 
191 
192 #ifdef MULTI_GPU
195  GaugeFieldParam cpuFatParam(fatlink, gaugeParam);
197  cpuFat = new cpuGaugeField(cpuFatParam);
198  ghost_fatlink = cpuFat->Ghost();
199 
201  GaugeFieldParam cpuLongParam(longlink, gaugeParam);
202  cpuLongParam.ghostExchange = QUDA_GHOST_EXCHANGE_PAD;
203  cpuLong = new cpuGaugeField(cpuLongParam);
204  ghost_longlink = cpuLong->Ghost();
205 
206  int x_face_size = X[1]*X[2]*X[3]/2;
207  int y_face_size = X[0]*X[2]*X[3]/2;
208  int z_face_size = X[0]*X[1]*X[3]/2;
209  int t_face_size = X[0]*X[1]*X[2]/2;
210  int pad_size = MAX(x_face_size, y_face_size);
211  pad_size = MAX(pad_size, z_face_size);
212  pad_size = MAX(pad_size, t_face_size);
213  gaugeParam.ga_pad = pad_size;
214 #endif
215 
219  } else {
221  }
222 
223  // printfQuda("Fat links sending...");
225  // printfQuda("Fat links sent\n");
226 
228 
229 #ifdef MULTI_GPU
230  gaugeParam.ga_pad = 3*pad_size;
231 #endif
232 
234 
236  // printfQuda("Long links sending...");
238  // printfQuda("Long links sent...\n");
239  }
240 
241  // printfQuda("Sending fields to GPU...");
242 
243  if (!transfer) {
244 
248  if (test_type < 2){
250  csParam.x[0] /=2;
251  }
252 
253  // printfQuda("Creating cudaSpinor\n");
255 
256  // printfQuda("Creating cudaSpinorOut\n");
258 
259  // printfQuda("Sending spinor field to GPU\n");
260  *cudaSpinor = *spinor;
261 
262  cudaDeviceSynchronize();
263  checkCudaError();
264 
265  // double spinor_norm2 = blas::norm2(*spinor);
266  // double cuda_spinor_norm2= blas::norm2(*cudaSpinor);
267  // printfQuda("Source CPU = %f, CUDA=%f\n", spinor_norm2, cuda_spinor_norm2);
268 
269  if(test_type == 2) csParam.x[0] /=2;
270 
273 
274  bool pc = (test_type != 2);
275  DiracParam diracParam;
276  setDiracParam(diracParam, &inv_param, pc);
277 
278  diracParam.tmp1=tmp;
279 
280  dirac = Dirac::create(diracParam);
281 
282  } else {
283  errorQuda("Error not suppported");
284  }
285 
286  return;
287 }
288 
289 void end(void)
290 {
291  for (int dir = 0; dir < 4; dir++) {
292  free(fatlink[dir]);
293  free(longlink[dir]);
294  }
295 
296  if (!transfer){
297  delete dirac;
298  delete cudaSpinor;
299  delete cudaSpinorOut;
300  delete tmp;
301  }
302 
303  delete spinor;
304  delete spinorOut;
305  delete spinorRef;
306  delete tmpCpu;
307 
308  freeGaugeQuda();
309 
310  if (cpuFat) delete cpuFat;
311  if (cpuLong) delete cpuLong;
313 
314 }
315 
316 struct DslashTime {
317  double event_time;
318  double cpu_time;
319  double cpu_min;
320  double cpu_max;
321 
322  DslashTime() : event_time(0.0), cpu_time(0.0), cpu_min(DBL_MAX), cpu_max(0.0) {}
323 };
324 
326 
327  DslashTime dslash_time;
328  timeval tstart, tstop;
329 
330  cudaEvent_t start, end;
331  cudaEventCreate(&start);
332  cudaEventRecord(start, 0);
333  cudaEventSynchronize(start);
334 
335  comm_barrier();
336  cudaEventRecord(start, 0);
337 
338  for (int i = 0; i < niter; i++) {
339 
340  gettimeofday(&tstart, NULL);
341 
342  switch (test_type) {
343  case 0:
344  if (transfer){
345  //dslashQuda(spinorOdd, spinorEven, &inv_param, parity);
346  } else {
348  }
349  break;
350  case 1:
351  if (transfer){
352  //MatPCDagMatPcQuda(spinorOdd, spinorEven, &inv_param);
353  } else {
355  }
356  break;
357  case 2:
358  errorQuda("Staggered operator acting on full-site not supported");
359  if (transfer){
360  //MatQuda(spinorGPU, spinor, &inv_param);
361  } else {
363  }
364  }
365 
366  gettimeofday(&tstop, NULL);
367  long ds = tstop.tv_sec - tstart.tv_sec;
368  long dus = tstop.tv_usec - tstart.tv_usec;
369  double elapsed = ds + 0.000001*dus;
370 
371  dslash_time.cpu_time += elapsed;
372  // skip first and last iterations since they may skew these metrics if comms are not synchronous
373  if (i>0 && i<niter) {
374  if (elapsed < dslash_time.cpu_min) dslash_time.cpu_min = elapsed;
375  if (elapsed > dslash_time.cpu_max) dslash_time.cpu_max = elapsed;
376  }
377  }
378 
379  cudaEventCreate(&end);
380  cudaEventRecord(end, 0);
381  cudaEventSynchronize(end);
382  float runTime;
383  cudaEventElapsedTime(&runTime, start, end);
384  cudaEventDestroy(start);
385  cudaEventDestroy(end);
386 
387  dslash_time.event_time = runTime / 1000;
388 
389  // check for errors
390  cudaError_t stat = cudaGetLastError();
391  if (stat != cudaSuccess)
392  errorQuda("with ERROR: %s\n", cudaGetErrorString(stat));
393 
394  return dslash_time;
395 }
396 
398 {
399 
400  // compare to dslash reference implementation
401  // printfQuda("Calculating reference implementation...");
402  fflush(stdout);
403  switch (test_type) {
404  case 0:
405 #ifdef MULTI_GPU
406  staggered_dslash_mg4dir(spinorRef, fatlink, longlink, ghost_fatlink, ghost_longlink,
408 #else
410 #endif
411  break;
412  case 1:
413 #ifdef MULTI_GPU
414  matdagmat_mg4dir(spinorRef, fatlink, longlink, ghost_fatlink, ghost_longlink,
416 #else
418 #endif
419  break;
420  case 2:
421  //mat(spinorRef->V(), fatlink, longlink, spinor->V(), kappa, dagger,
422  //inv_param.cpu_prec, gaugeParam.cpu_prec);
423  break;
424  default:
425  errorQuda("Test type not defined");
426  }
427 
428  // printfQuda("done.\n");
429 
430 }
431 
432 
434 {
435  auto prec = precision == 2 ? QUDA_DOUBLE_PRECISION : precision == 1 ? QUDA_SINGLE_PRECISION : QUDA_HALF_PRECISION;
436  // printfQuda("running the following test:\n");
437  // auto linkrecon = dslash_type == QUDA_ASQTAD_DSLASH ? (link_recon == QUDA_RECONSTRUCT_12 ? QUDA_RECONSTRUCT_13 : (link_recon == QUDA_RECONSTRUCT_8 ? QUDA_RECONSTRUCT_9: link_recon)) : link_recon;
438  printfQuda("prec recon test_type dagger S_dim T_dimension\n");
439  printfQuda("%s %s %d %d %d/%d/%d %d \n",
442  // printfQuda("Grid partition info: X Y Z T\n");
443  // printfQuda(" %d %d %d %d\n",
444  // dimPartitioned(0),
445  // dimPartitioned(1),
446  // dimPartitioned(2),
447  // dimPartitioned(3));
448 
449  return ;
450 
451 }
452 
453 using ::testing::TestWithParam;
454 using ::testing::Bool;
455 using ::testing::Values;
456 using ::testing::Range;
457 using ::testing::Combine;
458 
459 
460 void usage_extra(char** argv )
461 {
462  printfQuda("Extra options:\n");
463  printfQuda(" --test <0/1> # Test method\n");
464  printfQuda(" 0: Even destination spinor\n");
465  printfQuda(" 1: Odd destination spinor\n");
466  return ;
467 }
468 
469 using ::testing::TestWithParam;
470 using ::testing::Bool;
471 using ::testing::Values;
472 using ::testing::Range;
473 using ::testing::Combine;
474 
475 class StaggeredDslashTest : public ::testing::TestWithParam<::testing::tuple<int, int, int>> {
476 protected:
477  ::testing::tuple<int, int, int> param;
478 
479 public:
480  virtual ~StaggeredDslashTest() { }
481  virtual void SetUp() {
482  int prec = ::testing::get<0>(GetParam());
483  QudaReconstructType recon = static_cast<QudaReconstructType>(::testing::get<1>(GetParam()));
484 
485 
486  int value = ::testing::get<2>(GetParam());
487  for(int j=0; j < 4;j++){
488  if (value & (1 << j)){
490  }
491 
492  }
493  updateR();
494  init(prec, recon);
495  display_test_info(prec, recon);
496  }
497  virtual void TearDown() { end(); }
498 
499  static void SetUpTestCase() {
500  initQuda(device);
501  }
502 
503  // Per-test-case tear-down.
504  // Called after the last test in this test case.
505  // Can be omitted if not needed.
506  static void TearDownTestCase() {
507  endQuda();
508  }
509 
510 };
511 
513  { // warm-up run
514  // printfQuda("Tuning...\n");
515  dslashCUDA(1);
516  }
517 
518  dslashCUDA(2);
519 
520  if (!transfer) *spinorOut = *cudaSpinorOut;
521 
523  double spinor_ref_norm2 = blas::norm2(*spinorRef);
524  double spinor_out_norm2 = blas::norm2(*spinorOut);
525 
526  if (!transfer) {
527  double cuda_spinor_out_norm2 = blas::norm2(*cudaSpinorOut);
528  printfQuda("Results: CPU=%f, CUDA=%f, CPU-CUDA=%f\n", spinor_ref_norm2, cuda_spinor_out_norm2,
529  spinor_out_norm2);
530  } else {
531  printfQuda("Result: CPU=%f , CPU-CUDA=%f", spinor_ref_norm2, spinor_out_norm2);
532  }
533 
534  double deviation = pow(10, -(double)(cpuColorSpinorField::Compare(*spinorRef, *spinorOut)));
535  double tol = (inv_param.cuda_prec == QUDA_DOUBLE_PRECISION ? 1e-12 :
536  (inv_param.cuda_prec == QUDA_SINGLE_PRECISION ? 1e-3 : 1e-1));
537  ASSERT_LE(deviation, tol) << "CPU and CUDA implementations do not agree";
538  }
539 
541  { // warm-up run
542  // printfQuda("Tuning...\n");
543  dslashCUDA(1);
544  }
545 
546  // reset flop counter
547  dirac->Flops();
548 
549  DslashTime dslash_time = dslashCUDA(niter);
550 
551  if (!transfer) *spinorOut = *cudaSpinorOut;
552 
553  printfQuda("%fus per kernel call\n", 1e6*dslash_time.event_time / niter);
554 
555  unsigned long long flops = dirac->Flops();
556  double gflops=1.0e-9*flops/dslash_time.event_time;
557  printfQuda("GFLOPS = %f\n", gflops );
558  RecordProperty("Gflops", std::to_string(gflops));
559 
560  RecordProperty("Halo_bidirectitonal_BW_GPU", 1.0e-9*2*cudaSpinor->GhostBytes()*niter/dslash_time.event_time);
561  RecordProperty("Halo_bidirectitonal_BW_CPU", 1.0e-9*2*cudaSpinor->GhostBytes()*niter/dslash_time.cpu_time);
562  RecordProperty("Halo_bidirectitonal_BW_CPU_min", 1.0e-9*2*cudaSpinor->GhostBytes()/dslash_time.cpu_max);
563  RecordProperty("Halo_bidirectitonal_BW_CPU_max", 1.0e-9*2*cudaSpinor->GhostBytes()/dslash_time.cpu_min);
564  RecordProperty("Halo_message_size_bytes",2*cudaSpinor->GhostBytes());
565 
566  printfQuda("Effective halo bi-directional bandwidth (GB/s) GPU = %f ( CPU = %f, min = %f , max = %f ) for aggregate message size %lu bytes\n",
567  1.0e-9*2*cudaSpinor->GhostBytes()*niter/dslash_time.event_time, 1.0e-9*2*cudaSpinor->GhostBytes()*niter/dslash_time.cpu_time,
568  1.0e-9*2*cudaSpinor->GhostBytes()/dslash_time.cpu_max, 1.0e-9*2*cudaSpinor->GhostBytes()/dslash_time.cpu_min,
569  2*cudaSpinor->GhostBytes());
570 
571  }
572 
573  int main(int argc, char **argv)
574  {
575  // initalize google test
576  ::testing::InitGoogleTest(&argc, argv);
577  for (int i=1 ;i < argc; i++){
578 
579  if(process_command_line_option(argc, argv, &i) == 0){
580  continue;
581  }
582 
583  fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
584  usage(argv);
585  }
586 
587  initComms(argc, argv, gridsize_from_cmdline);
588 
589 
590  // return result of RUN_ALL_TESTS
591  int test_rc = RUN_ALL_TESTS();
592 
593  finalizeComms();
594 
595  return test_rc;
596  }
597 
598  std::string getstaggereddslashtestname(testing::TestParamInfo<::testing::tuple<int, int, int>> param){
599  const int prec = ::testing::get<0>(param.param);
600  const int recon = ::testing::get<1>(param.param);
601  const int part = ::testing::get<2>(param.param);
602  std::stringstream ss;
603  // ss << get_dslash_str(dslash_type) << "_";
604  ss << prec_str[prec];
605  ss << "_r" << recon;
606  ss << "_partition" << part;
607  return ss.str();
608  }
609 
610 
611 #ifdef MULTI_GPU
613 #else
614  INSTANTIATE_TEST_CASE_P(QUDA, StaggeredDslashTest, Combine( Range(0,3), ::testing::Values(QUDA_RECONSTRUCT_NO,QUDA_RECONSTRUCT_12,QUDA_RECONSTRUCT_8), ::testing::Values(0) ),getstaggereddslashtestname);
615 #endif
616 
int Nsrc
Definition: test_util.cpp:1628
INSTANTIATE_TEST_CASE_P(QUDA, StaggeredDslashTest, Combine(Range(0, 3), ::testing::Values(QUDA_RECONSTRUCT_NO, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_8), ::testing::Values(0)), getstaggereddslashtestname)
QudaDiracFieldOrder dirac_order
Definition: quda.h:195
QudaDagType dagger
QudaReconstructType reconstruct_sloppy
Definition: quda.h:46
double anisotropy
Definition: quda.h:31
void usage_extra(char **argv)
cpuColorSpinorField * spinorOut
QudaGhostExchange ghostExchange
Definition: lattice_field.h:60
void endQuda(void)
void free(void *)
int ga_pad
Definition: quda.h:53
void dw_setDims(int *X, const int L5)
Definition: test_util.cpp:167
QudaGaugeFixed gauge_fix
Definition: quda.h:51
QudaParity parity
__darwin_time_t tv_sec
cpuColorSpinorField * tmpCpu
QudaInvertParam inv_param
QudaLinkType type
Definition: quda.h:35
int fflush(FILE *)
#define errorQuda(...)
Definition: util_quda.h:90
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:241
QudaDslashType dslash_type
Definition: quda.h:93
cudaEvent_t start
QudaPrecision cuda_prec
Definition: quda.h:191
int ydim
Definition: test_util.cpp:1621
QudaDslashType dslash_type
Definition: test_util.cpp:1626
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
double cpu_min
void commDimPartitionedSet(int dir)
void staggered_dslash(void *res, void **fatlink, void **longlink, void *spinorField, int oddBit, int daggerBit, QudaPrecision sPrecision, QudaPrecision gPrecision)
QudaPrecision cpu_prec
Definition: quda.h:190
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:1795
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
int xdim
Definition: test_util.cpp:1620
std::string getstaggereddslashtestname(testing::TestParamInfo<::testing::tuple< int, int, int >> param)
QudaPrecision precision
Definition: lattice_field.h:54
QudaDagType dagger
Definition: quda.h:184
void matdagmat_mg4dir(cpuColorSpinorField *out, void **link, void **ghostLink, cpuColorSpinorField *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision, cpuColorSpinorField *tmp, QudaParity parity)
const char * recon_str[]
double mass
Definition: test_util.cpp:1642
void finalizeComms()
Definition: test_util.cpp:107
QudaGaugeFieldOrder gauge_order
Definition: quda.h:36
DslashTime dslashCUDA(int niter)
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:704
unsigned long long Flops() const
Definition: dirac_quda.h:148
cudaColorSpinorField * cudaSpinor
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const =0
QudaSiteSubset siteSubset
Definition: lattice_field.h:55
void setDims(int *)
Definition: test_util.cpp:130
QudaFieldLocation input_location
Definition: quda.h:90
void freeGaugeQuda(void)
cpuColorSpinorField * spinor
__darwin_suseconds_t tv_usec
bool verify_results
Definition: test_util.cpp:1641
static size_t gSize
Definition: llfat_test.cpp:36
QudaGaugeParam param
Definition: pack_test.cpp:17
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
QudaSolutionType solution_type
Definition: quda.h:181
else return(__swbuf(_c, _p))
void end(void)
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:50
void * longlink[4]
double scale
Definition: quda.h:33
void initQuda(int device)
double tol
Definition: test_util.cpp:1647
QudaFieldLocation output_location
Definition: quda.h:91
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
double benchmark(int kernel, const int niter)
Definition: blas_test.cu:283
void usage(char **argv)
Definition: test_util.cpp:1693
bool kernel_pack_t
Definition: test_util.cpp:1650
QudaReconstructType link_recon
Definition: test_util.cpp:1612
void setSpinorSiteSize(int n)
Definition: test_util.cpp:192
ColorSpinorParam csParam
Definition: pack_test.cpp:24
void * hostGauge[4]
QudaInvertParam newQudaInvertParam(void)
int test_type
Definition: test_util.cpp:1634
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:770
QudaGaugeParam gaugeParam
double event_time
int V
Definition: test_util.cpp:28
#define gaugeSiteSize
Definition: test_util.h:6
void init(int precision, QudaReconstructType link_recon)
double cpu_time
::testing::tuple< int, int, int > param
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
Definition: complex_quda.h:100
void matdagmat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision, void *tmp, QudaParity parity)
QudaGammaBasis gamma_basis
Definition: quda.h:197
QudaPrecision cuda_prec_sloppy
Definition: quda.h:45
const void ** Ghost() const
Definition: gauge_field.h:254
enum QudaDagType_s QudaDagType
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
Definition: quda.h:43
QudaPrecision cuda_prec
Definition: quda.h:42
int X[4]
Definition: quda.h:29
void staggeredDslashRef()
double mass
Definition: quda.h:96
int fprintf(FILE *, const char *,...) __attribute__((__format__(__printf__
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
cpuGaugeField * cpuLong
void construct_fat_long_gauge_field(void **fatlink, void **longlink, int type, QudaPrecision precision, QudaGaugeParam *param, QudaDslashType dslash_type)
Definition: test_util.cpp:1069
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const =0
const char * prec_str[]
int X[4]
double tadpole_coeff
Definition: quda.h:32
double gaugeGiB
Definition: quda.h:60
Dirac * dirac
void * fatlink[4]
void display_test_info(int precision, QudaReconstructType link_recon)
enum QudaReconstructType_s QudaReconstructType
void commDimPartitionedReset()
Reset the comm dim partioned array to zero,.
Main header file for the QUDA library.
#define MAX(a, b)
cpuGaugeField * cpuFat
#define printfQuda(...)
Definition: util_quda.h:84
QudaTboundary t_boundary
Definition: quda.h:38
unsigned long long flops
Definition: blas_quda.cu:42
double cpu_max
int main(int argc, char **argv)
void staggered_dslash_mg4dir(cpuColorSpinorField *out, void **fatlink, void **longlink, void **ghost_fatlink, void **ghost_longlink, cpuColorSpinorField *in, int oddBit, int daggerBit, QudaPrecision sPrecision, QudaPrecision gPrecision)
enum QudaDslashType_s QudaDslashType
TEST_P(StaggeredDslashTest, verify)
cudaColorSpinorField * cudaSpinorOut
void setKernelPackT(bool pack)
Definition: dslash_quda.cu:59
int tdim
Definition: test_util.cpp:1623
int zdim
Definition: test_util.cpp:1622
#define checkCudaError()
Definition: util_quda.h:129
static Dirac * create(const DiracParam &param)
Definition: dirac.cpp:142
cpuColorSpinorField * spinorRef
cudaColorSpinorField * tmp
static __inline__ size_t size_t d
virtual void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const =0
QudaPrecision prec
Definition: test_util.cpp:1615
int niter
Definition: test_util.cpp:1630
void initComms(int argc, char **argv, const int *commDims)
Definition: test_util.cpp:72
void setVerbosity(const QudaVerbosity verbosity)
Definition: util_quda.cpp:24
QudaMatPCType matpc_type
Definition: quda.h:183
ColorSpinorField * tmp1
Definition: dirac_quda.h:40
QudaPrecision cpu_prec
Definition: quda.h:40
void updateR()
update the radius for halos.
QudaGaugeParam newQudaGaugeParam(void)
int gridsize_from_cmdline[]
Definition: test_util.cpp:50
void comm_barrier(void)
Definition: comm_mpi.cpp:328