QUDA  0.9.0
covdev_test.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 
6 #include <quda.h>
7 #include <quda_internal.h>
8 #include <dirac_quda.h>
9 #include <dslash_quda.h>
10 #include <invert_quda.h>
11 #include <util_quda.h>
12 #include <blas_quda.h>
13 
14 #include <misc.h>
15 #include <test_util.h>
16 #include <dslash_util.h>
17 #include <covdev_reference.h>
18 #include <gauge_field.h>
19 
20 #include <assert.h>
21 #include <gtest.h>
22 
23 using namespace quda;
24 
25 #define MAX(a,b) ((a)>(b)?(a):(b))
26 
27 extern void usage(char** argv );
28 
30 
31 extern int test_type;
32 
35 
38 
40 
43 
45 
46 void *hostGauge[4];
47 void *links[4];
48 
49 #ifdef MULTI_GPU
50 void **ghostLink;
51 #endif
52 
54 extern QudaDagType dagger;
55 int transfer = 0; // include transfer time in the benchmark?
56 extern int xdim;
57 extern int ydim;
58 extern int zdim;
59 extern int tdim;
60 extern int gridsize_from_cmdline[];
62 extern QudaPrecision prec;
63 
64 extern int device;
65 extern bool verify_results;
66 extern int niter;
67 
68 extern bool kernel_pack_t;
69 
70 extern double mass; // the mass of the Dirac operator
71 
72 int X[4];
73 extern int Nsrc; // number of spinors to apply to simultaneously
74 
76 
77 const int nColor = 3;
78 
79 void init()
80 {
81 
83 
85 
87 
90 
91  cuda_prec = prec;
92 
93  gaugeParam.X[0] = X[0] = xdim;
94  gaugeParam.X[1] = X[1] = ydim;
95  gaugeParam.X[2] = X[2] = zdim;
96  gaugeParam.X[3] = X[3] = tdim;
97 
99  Ls = 1;
100 
101  if (Nsrc != 1)
102  printfQuda ("The covariant derivative doesn't support 5-d indexing, only source 0 will be tested.\n");
103 
104  setSpinorSiteSize(24);
105 
111 
112  // ensure we use the right dslash
114 
115  gaugeParam.anisotropy = 1.0;
117  gaugeParam.scale = 1.0;
122  gaugeParam.gaugeGiB = 0;
123 
131  inv_param.mass = mass;
132 
135 
136  int tmpint = MAX(X[1]*X[2]*X[3], X[0]*X[2]*X[3]);
137  tmpint = MAX(tmpint, X[0]*X[1]*X[3]);
138  tmpint = MAX(tmpint, X[0]*X[1]*X[2]);
139 
140 
141  gaugeParam.ga_pad = tmpint;
142  inv_param.sp_pad = tmpint;
143 
146  csParam.nSpin=4;
147  csParam.nDim=4;
148  for(int d = 0; d < 4; d++) {
149  csParam.x[d] = gaugeParam.X[d];
150  }
151 // csParam.x[4] = Nsrc; // number of sources becomes the fifth dimension
152 
154  csParam.pad = 0;
157 
160  csParam.gammaBasis = inv_param.gamma_basis; // this parameter is meaningless for staggered
162 
167 
169  csParam.x[0] = gaugeParam.X[0];
170 
171  printfQuda("Randomizing fields ...\n");
172 
174 
175  size_t gSize = (gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
176 
177  for (int dir = 0; dir < 4; dir++) {
178  links[dir] = malloc(V*gaugeSiteSize*gSize);
179 
180  if (links[dir] == NULL) {
181  errorQuda("ERROR: malloc failed for gauge links");
182  }
183  }
184 
186 
187 #ifdef MULTI_GPU
190  GaugeFieldParam cpuParam(links, gaugeParam);
192  cpuLink = new cpuGaugeField(cpuParam);
193  ghostLink = cpuLink->Ghost();
194 
195  int x_face_size = X[1]*X[2]*X[3]/2;
196  int y_face_size = X[0]*X[2]*X[3]/2;
197  int z_face_size = X[0]*X[1]*X[3]/2;
198  int t_face_size = X[0]*X[1]*X[2]/2;
199  int pad_size = MAX(x_face_size, y_face_size);
200  pad_size = MAX(pad_size, z_face_size);
201  pad_size = MAX(pad_size, t_face_size);
202  gaugeParam.ga_pad = pad_size;
203 #endif
204 
207 
208  printfQuda("Links sending...");
210  printfQuda("Links sent\n");
211 
212  printfQuda("Sending fields to GPU...");
213 
214  if (!transfer) {
220  } else {
221  /* Single and half */
223  }
224 
225  printfQuda("Creating cudaSpinor\n");
227 
228  printfQuda("Creating cudaSpinorOut\n");
230 
231  printfQuda("Sending spinor field to GPU\n");
232  *cudaSpinor = *spinor;
233 
234  cudaDeviceSynchronize();
235  checkCudaError();
236 
237  double spinor_norm2 = blas::norm2(*spinor);
238  double cuda_spinor_norm2= blas::norm2(*cudaSpinor);
239  printfQuda("Source CPU = %f, CUDA=%f\n", spinor_norm2, cuda_spinor_norm2);
240 
243 
244  DiracParam diracParam;
245  setDiracParam(diracParam, &inv_param, false);
246 
247  diracParam.tmp1=tmp;
248 
249  dirac = new GaugeCovDev(diracParam);
250 
251  } else {
252  errorQuda("Error not suppported");
253  }
254 
255  return;
256 }
257 
258 void end(void)
259 {
260  for (int dir = 0; dir < 4; dir++) {
261  free(links[dir]);
262  }
263 
264  if (!transfer){
265  delete dirac;
266  delete cudaSpinor;
267  delete cudaSpinorOut;
268  delete tmp;
269  }
270 
271  delete spinor;
272  delete spinorOut;
273  delete spinorRef;
274  delete tmpCpu;
275 
276  if (cpuLink) delete cpuLink;
277 
278  endQuda();
279 }
280 
281 double dslashCUDA(int niter, int mu) {
282 
283  cudaEvent_t start, end;
284  cudaEventCreate(&start);
285  cudaEventRecord(start, 0);
286  cudaEventSynchronize(start);
287 
288  for (int i = 0; i < niter; i++) {
289  if (transfer){
290  //MatQuda(spinorGPU, spinor, &inv_param);
291  } else {
293  }
294  }
295 
296  cudaEventCreate(&end);
297  cudaEventRecord(end, 0);
298  cudaEventSynchronize(end);
299  float runTime;
300  cudaEventElapsedTime(&runTime, start, end);
301  cudaEventDestroy(start);
302  cudaEventDestroy(end);
303 
304  double secs = runTime / 1000; //stopwatchReadSeconds();
305 
306  // check for errors
307  cudaError_t stat = cudaGetLastError();
308  if (stat != cudaSuccess)
309  errorQuda("with ERROR: %s\n", cudaGetErrorString(stat));
310 
311  return secs;
312 }
313 
314 void covdevRef(int mu)
315 {
316 
317  // compare to dslash reference implementation
318  printfQuda("Calculating reference implementation...");
319  fflush(stdout);
320 #ifdef MULTI_GPU
322 #else
324 #endif
325  printfQuda("done.\n");
326 
327 }
328 
329 TEST(dslash, verify) {
330  double deviation = pow(10, -(double)(cpuColorSpinorField::Compare(*spinorRef, *spinorOut)));
331  double tol = (inv_param.cuda_prec == QUDA_DOUBLE_PRECISION ? 1e-12 :
332  (inv_param.cuda_prec == QUDA_SINGLE_PRECISION ? 1e-3 : 1e-1));
333  ASSERT_LE(deviation, tol) << "CPU and CUDA implementations do not agree";
334 }
335 
336 static int dslashTest()
337 {
338  // return code for google test
339  int test_rc = 0;
340  init();
341 
342  int attempts = 1;
343 
344  for (int i=0; i<attempts; i++) {
345  for (int mu=0; mu<4; mu++) { // We test all directions in one go
346  int muCuda = mu + (dagger ? 4 : 0);
347  int muCpu = mu*2 + (dagger ? 1 : 0);
348 
349  { // warm-up run
350  printfQuda("Tuning...\n");
351  dslashCUDA(1,muCuda);
352  }
353  printfQuda("Executing %d kernel loops...", niter);
354 
355  double secs = dslashCUDA(niter, muCuda);
356 
357  if (!transfer) *spinorOut = *cudaSpinorOut;
358 
359  printfQuda("\n%fms per loop\n", 1000*secs);
360  covdevRef(muCpu);
361 
362  unsigned long long flops = niter * 8*nColor*nColor*2*(long long)cudaSpinor->VolumeCB();
363  printfQuda("GFLOPS = %f\n", 1.0e-9*flops/secs);
364  printfQuda("Effective halo bi-directional bandwidth = %f for aggregate message size %lu bytes\n",
365  1.0e-9*2*cudaSpinor->GhostBytes()*niter/secs, 2*cudaSpinor->GhostBytes());
366 
367  double spinor_ref_norm2 = blas::norm2(*spinorRef);
368  double spinor_out_norm2 = blas::norm2(*spinorOut);
369 
370  if (!transfer) {
371  double cuda_spinor_out_norm2 = blas::norm2(*cudaSpinorOut);
372  printfQuda("Results mu = %d: CPU=%f, CUDA=%f, CPU-CUDA=%f\n", mu, spinor_ref_norm2, cuda_spinor_out_norm2,
373  spinor_out_norm2);
374  } else {
375  printfQuda("Result mu = %d: CPU=%f , CPU-CUDA=%f", mu, spinor_ref_norm2, spinor_out_norm2);
376  }
377 
378  if (verify_results) {
379  test_rc = RUN_ALL_TESTS();
380  if (test_rc != 0) warningQuda("Tests failed");
381  }
382  } // Directions
383  }
384 
385  end();
386 
387  return test_rc;
388 }
389 
390 
392 {
393  printfQuda("running the following test:\n");
394 
395  printfQuda("prec recon test_type dagger S_dim T_dimension\n");
396  printfQuda("%s %s %d %d %d/%d/%d %d \n",
399  printfQuda("Grid partition info: X Y Z T\n");
400  printfQuda(" %d %d %d %d\n",
401  dimPartitioned(0),
402  dimPartitioned(1),
403  dimPartitioned(2),
404  dimPartitioned(3));
405 
406  return ;
407 
408 }
409 
410 
411  void
412 usage_extra(char** argv )
413 {
414  return ;
415 }
416 
417 int main(int argc, char **argv)
418 {
419  // initalize google test
420  ::testing::InitGoogleTest(&argc, argv);
421  for (int i=1 ;i < argc; i++){
422 
423  if(process_command_line_option(argc, argv, &i) == 0){
424  continue;
425  }
426 
427  fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
428  usage(argv);
429  }
430 
431  initComms(argc, argv, gridsize_from_cmdline);
432 
434 
435  // return result of RUN_ALL_TESTS
436  int test_rc = dslashTest();
437 
438  finalizeComms();
439 
440  return test_rc;
441 }
442 
int dimPartitioned(int dim)
Definition: test_util.cpp:1686
QudaDiracFieldOrder dirac_order
Definition: quda.h:195
QudaReconstructType reconstruct_sloppy
Definition: quda.h:46
double anisotropy
Definition: quda.h:31
QudaGhostExchange ghostExchange
Definition: lattice_field.h:60
QudaGaugeParam gaugeParam
Definition: covdev_test.cpp:36
void endQuda(void)
void free(void *)
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
Definition: test_util.cpp:1054
double mu
Definition: test_util.cpp:1643
enum QudaPrecision_s QudaPrecision
void init()
Definition: covdev_test.cpp:79
int ga_pad
Definition: quda.h:53
QudaGaugeFixed gauge_fix
Definition: quda.h:51
QudaLinkType type
Definition: quda.h:35
int fflush(FILE *)
void usage(char **argv)
Definition: test_util.cpp:1693
bool verify_results
Definition: test_util.cpp:1641
#define errorQuda(...)
Definition: util_quda.h:90
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:241
QudaDslashType dslash_type
Definition: quda.h:93
cudaEvent_t start
QudaPrecision cuda_prec
Definition: quda.h:191
cpuColorSpinorField * tmpCpu
Definition: covdev_test.cpp:41
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
void * links[4]
Definition: covdev_test.cpp:47
QudaPrecision cpu_prec
Definition: quda.h:190
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:1795
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44
QudaPrecision precision
Definition: lattice_field.h:54
QudaDagType dagger
Definition: quda.h:184
void finalizeComms()
Definition: test_util.cpp:107
void end(void)
static int dslashTest()
QudaGaugeFieldOrder gauge_order
Definition: quda.h:36
void usage_extra(char **argv)
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:704
QudaDslashType dslash_type
Definition: test_util.cpp:1626
QudaSiteSubset siteSubset
Definition: lattice_field.h:55
void setDims(int *)
Definition: test_util.cpp:130
QudaFieldLocation input_location
Definition: quda.h:90
QudaPrecision cpu_prec
Definition: covdev_test.cpp:33
static size_t gSize
Definition: llfat_test.cpp:36
int Ls
Definition: test_util.cpp:39
QudaPrecision prec
Definition: test_util.cpp:1615
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
QudaSolutionType solution_type
Definition: quda.h:181
else return(__swbuf(_c, _p))
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:50
double scale
Definition: quda.h:33
void initQuda(int device)
double tol
Definition: test_util.cpp:1647
QudaFieldLocation output_location
Definition: quda.h:91
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
QudaInvertParam inv_param
Definition: covdev_test.cpp:37
QudaReconstructType link_recon
Definition: test_util.cpp:1612
QudaPrecision cuda_prec
Definition: covdev_test.cpp:34
const int nColor
Definition: covdev_test.cpp:77
void setSpinorSiteSize(int n)
Definition: test_util.cpp:192
#define MAX(a, b)
Definition: covdev_test.cpp:25
ColorSpinorParam csParam
Definition: pack_test.cpp:24
int zdim
Definition: test_util.cpp:1622
QudaInvertParam newQudaInvertParam(void)
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:770
void display_test_info()
int V
Definition: test_util.cpp:28
cpuColorSpinorField * spinorOut
Definition: covdev_test.cpp:41
#define gaugeSiteSize
Definition: test_util.h:6
#define warningQuda(...)
Definition: util_quda.h:101
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
Definition: complex_quda.h:100
QudaGammaBasis gamma_basis
Definition: quda.h:197
QudaPrecision cuda_prec_sloppy
Definition: quda.h:45
const void ** Ghost() const
Definition: gauge_field.h:254
cudaColorSpinorField * cudaSpinor
Definition: covdev_test.cpp:42
int niter
Definition: test_util.cpp:1630
int main(int argc, char **argv)
cudaColorSpinorField * cudaSpinorOut
Definition: covdev_test.cpp:42
int X[4]
Definition: covdev_test.cpp:72
enum QudaDagType_s QudaDagType
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
Definition: quda.h:43
QudaPrecision cuda_prec
Definition: quda.h:42
int X[4]
Definition: quda.h:29
double mass
Definition: quda.h:96
int fprintf(FILE *, const char *,...) __attribute__((__format__(__printf__
void covdevRef(int mu)
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
TEST(dslash, verify)
GaugeCovDev * dirac
Definition: covdev_test.cpp:75
double tadpole_coeff
Definition: quda.h:32
double gaugeGiB
Definition: quda.h:60
int tdim
Definition: test_util.cpp:1623
int ydim
Definition: test_util.cpp:1621
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
int Nsrc
Definition: test_util.cpp:1628
#define printfQuda(...)
Definition: util_quda.h:84
QudaTboundary t_boundary
Definition: quda.h:38
bool kernel_pack_t
Definition: test_util.cpp:1650
unsigned long long flops
Definition: blas_quda.cu:42
int transfer
Definition: covdev_test.cpp:55
enum QudaDslashType_s QudaDslashType
void setKernelPackT(bool pack)
Definition: dslash_quda.cu:59
int xdim
Definition: test_util.cpp:1620
cpuColorSpinorField * spinorRef
Definition: covdev_test.cpp:41
void * hostGauge[4]
Definition: covdev_test.cpp:46
int gridsize_from_cmdline[]
Definition: test_util.cpp:50
cpuGaugeField * cpuLink
Definition: covdev_test.cpp:39
#define checkCudaError()
Definition: util_quda.h:129
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
void mat_mg4dir(cpuColorSpinorField *out, void **link, void **ghostLink, cpuColorSpinorField *in, int daggerBit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
static __inline__ size_t size_t d
QudaDagType dagger
double dslashCUDA(int niter, int mu)
QudaParity parity
Definition: covdev_test.cpp:53
Full Covariant Derivative operator. Although not a Dirac operator per se, it&#39;s a linear operator so i...
Definition: dirac_quda.h:948
void initComms(int argc, char **argv, const int *commDims)
Definition: test_util.cpp:72
void setVerbosity(const QudaVerbosity verbosity)
Definition: util_quda.cpp:24
QudaMatPCType matpc_type
Definition: quda.h:183
ColorSpinorField * tmp1
Definition: dirac_quda.h:40
cpuColorSpinorField * spinor
Definition: covdev_test.cpp:41
QudaPrecision cpu_prec
Definition: quda.h:40
virtual void MCD(ColorSpinorField &out, const ColorSpinorField &in, const int mu) const
QudaGaugeParam newQudaGaugeParam(void)
int test_type
Definition: test_util.cpp:1634