QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
covdev_test.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 
6 #include <quda.h>
7 #include <quda_internal.h>
8 #include <dirac_quda.h>
9 #include <dslash_quda.h>
10 #include <invert_quda.h>
11 #include <util_quda.h>
12 #include <blas_quda.h>
13 
14 #include <misc.h>
15 #include <test_util.h>
16 #include <dslash_util.h>
17 #include <covdev_reference.h>
18 #include <gauge_field.h>
19 
20 #include <assert.h>
21 #include <gtest/gtest.h>
22 
23 using namespace quda;
24 
25 #define MAX(a,b) ((a)>(b)?(a):(b))
26 
27 extern void usage(char** argv );
28 
30 
31 extern int test_type;
32 
35 
38 
40 
43 
45 
46 void *hostGauge[4];
47 void *links[4];
48 
49 #ifdef MULTI_GPU
50 void **ghostLink;
51 #endif
52 
53 extern QudaDagType dagger;
55 int transfer = 0; // include transfer time in the benchmark?
56 extern int xdim;
57 extern int ydim;
58 extern int zdim;
59 extern int tdim;
60 extern int gridsize_from_cmdline[];
62 extern QudaPrecision prec;
63 
64 extern int device;
65 extern bool verify_results;
66 extern int niter;
67 
68 extern double mass; // the mass of the Dirac operator
69 
70 int X[4];
71 extern int Nsrc; // number of spinors to apply to simultaneously
72 
74 
75 const int nColor = 3;
76 
77 void init(int argc, char **argv)
78 {
79 
81 
83 
84  gaugeParam = newQudaGaugeParam();
85  inv_param = newQudaInvertParam();
86 
87  cuda_prec = prec;
88 
89  gaugeParam.X[0] = X[0] = xdim;
90  gaugeParam.X[1] = X[1] = ydim;
91  gaugeParam.X[2] = X[2] = zdim;
92  gaugeParam.X[3] = X[3] = tdim;
93 
94  setDims(gaugeParam.X);
95  Ls = 1;
96 
97  if (Nsrc != 1)
98  printfQuda ("The covariant derivative doesn't support 5-d indexing, only source 0 will be tested.\n");
99 
100  setSpinorSiteSize(24);
101 
102  gaugeParam.cpu_prec = cpu_prec;
103  gaugeParam.cuda_prec = cuda_prec;
104  gaugeParam.reconstruct = link_recon;
105  gaugeParam.reconstruct_sloppy = gaugeParam.reconstruct;
106  gaugeParam.cuda_prec_sloppy = gaugeParam.cuda_prec;
107 
108  // ensure we use the right dslash
110 
111  gaugeParam.anisotropy = 1.0;
112  gaugeParam.tadpole_coeff = 0.8;
113  gaugeParam.scale = 1.0;
114  gaugeParam.type = QUDA_WILSON_LINKS;
115  gaugeParam.gauge_order = QUDA_QDP_GAUGE_ORDER;
116  gaugeParam.t_boundary = QUDA_ANTI_PERIODIC_T;
117  gaugeParam.gauge_fix = QUDA_GAUGE_FIXED_NO;
118 
119  inv_param.cpu_prec = cpu_prec;
120  inv_param.cuda_prec = cuda_prec;
121  inv_param.dirac_order = QUDA_DIRAC_ORDER;
123  inv_param.dagger = QUDA_DAG_NO;
124  inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
125  inv_param.dslash_type = dslash_type;
126  inv_param.mass = mass;
127 
130 
131  int tmpint = MAX(X[1]*X[2]*X[3], X[0]*X[2]*X[3]);
132  tmpint = MAX(tmpint, X[0]*X[1]*X[3]);
133  tmpint = MAX(tmpint, X[0]*X[1]*X[2]);
134 
135 
136  gaugeParam.ga_pad = tmpint;
137  inv_param.sp_pad = tmpint;
138 
140  csParam.nColor=nColor;
141  csParam.nSpin=4;
142  csParam.nDim=4;
143  for(int d = 0; d < 4; d++) {
144  csParam.x[d] = gaugeParam.X[d];
145  }
146 // csParam.x[4] = Nsrc; // number of sources becomes the fifth dimension
147 
148  csParam.setPrecision(inv_param.cpu_prec);
149  csParam.pad = 0;
150  inv_param.solution_type = QUDA_MAT_SOLUTION;
152  csParam.pc_type = QUDA_4D_PC;
155  csParam.gammaBasis = inv_param.gamma_basis; // this parameter is meaningless for staggered
156  csParam.create = QUDA_ZERO_FIELD_CREATE;
157 
158  spinor = new cpuColorSpinorField(csParam);
159  spinorOut = new cpuColorSpinorField(csParam);
160  spinorRef = new cpuColorSpinorField(csParam);
161 
163  csParam.x[0] = gaugeParam.X[0];
164 
165  printfQuda("Randomizing fields ...\n");
166 
167  spinor->Source(QUDA_RANDOM_SOURCE);
168 
169  size_t gSize = (gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
170 
171  for (int dir = 0; dir < 4; dir++) {
172  links[dir] = malloc(V*gaugeSiteSize*gSize);
173 
174  if (links[dir] == NULL) {
175  errorQuda("ERROR: malloc failed for gauge links");
176  }
177  }
178 
179  construct_gauge_field(links, 1, gaugeParam.cpu_prec, &gaugeParam);
180 
181 #ifdef MULTI_GPU
182  gaugeParam.type = QUDA_SU3_LINKS;
183  gaugeParam.reconstruct = QUDA_RECONSTRUCT_NO;
184  GaugeFieldParam cpuParam(links, gaugeParam);
186  cpuLink = new cpuGaugeField(cpuParam);
187  ghostLink = cpuLink->Ghost();
188 
189  int x_face_size = X[1]*X[2]*X[3]/2;
190  int y_face_size = X[0]*X[2]*X[3]/2;
191  int z_face_size = X[0]*X[1]*X[3]/2;
192  int t_face_size = X[0]*X[1]*X[2]/2;
193  int pad_size = MAX(x_face_size, y_face_size);
194  pad_size = MAX(pad_size, z_face_size);
195  pad_size = MAX(pad_size, t_face_size);
196  gaugeParam.ga_pad = pad_size;
197 #endif
198 
199  gaugeParam.type = QUDA_SU3_LINKS;
200  gaugeParam.reconstruct = gaugeParam.reconstruct_sloppy = link_recon;
201 
202  printfQuda("Links sending...");
203  loadGaugeQuda(links, &gaugeParam);
204  printfQuda("Links sent\n");
205 
206  printfQuda("Sending fields to GPU...");
207 
208  if (!transfer) {
210  csParam.pad = inv_param.sp_pad;
211  csParam.setPrecision(inv_param.cuda_prec);
212  if (csParam.Precision() == QUDA_DOUBLE_PRECISION ) {
214  } else {
215  /* Single and half */
217  }
218 
219  printfQuda("Creating cudaSpinor\n");
220  cudaSpinor = new cudaColorSpinorField(csParam);
221 
222  printfQuda("Creating cudaSpinorOut\n");
223  cudaSpinorOut = new cudaColorSpinorField(csParam);
224 
225  printfQuda("Sending spinor field to GPU\n");
226  *cudaSpinor = *spinor;
227 
228  cudaDeviceSynchronize();
229  checkCudaError();
230 
231  double spinor_norm2 = blas::norm2(*spinor);
232  double cuda_spinor_norm2= blas::norm2(*cudaSpinor);
233  printfQuda("Source CPU = %f, CUDA=%f\n", spinor_norm2, cuda_spinor_norm2);
234 
236  tmp = new cudaColorSpinorField(csParam);
237 
238  DiracParam diracParam;
239  setDiracParam(diracParam, &inv_param, false);
240 
241  diracParam.tmp1=tmp;
242 
243  dirac = new GaugeCovDev(diracParam);
244 
245  } else {
246  errorQuda("Error not suppported");
247  }
248 
249  return;
250 }
251 
252 void end(void)
253 {
254  for (int dir = 0; dir < 4; dir++) {
255  free(links[dir]);
256  }
257 
258  if (!transfer){
259  delete dirac;
260  delete cudaSpinor;
261  delete cudaSpinorOut;
262  delete tmp;
263  }
264 
265  delete spinor;
266  delete spinorOut;
267  delete spinorRef;
268 
269  if (cpuLink) delete cpuLink;
270 
271  endQuda();
272 }
273 
274 double dslashCUDA(int niter, int mu) {
275 
276  cudaEvent_t start, end;
277  cudaEventCreate(&start);
278  cudaEventRecord(start, 0);
279  cudaEventSynchronize(start);
280 
281  for (int i = 0; i < niter; i++) {
282  if (transfer){
283  //MatQuda(spinorGPU, spinor, &inv_param);
284  } else {
285  dirac->MCD(*cudaSpinorOut, *cudaSpinor, mu);
286  }
287  }
288 
289  cudaEventCreate(&end);
290  cudaEventRecord(end, 0);
291  cudaEventSynchronize(end);
292  float runTime;
293  cudaEventElapsedTime(&runTime, start, end);
294  cudaEventDestroy(start);
295  cudaEventDestroy(end);
296 
297  double secs = runTime / 1000; //stopwatchReadSeconds();
298 
299  // check for errors
300  cudaError_t stat = cudaGetLastError();
301  if (stat != cudaSuccess)
302  errorQuda("with ERROR: %s\n", cudaGetErrorString(stat));
303 
304  return secs;
305 }
306 
307 void covdevRef(int mu)
308 {
309 
310  // compare to dslash reference implementation
311  printfQuda("Calculating reference implementation...");
312  fflush(stdout);
313 #ifdef MULTI_GPU
314  mat_mg4dir(spinorRef, links, ghostLink, spinor, dagger, mu, inv_param.cpu_prec, gaugeParam.cpu_prec);
315 #else
316  mat(spinorRef->V(), links, spinor->V(), dagger, mu, inv_param.cpu_prec, gaugeParam.cpu_prec);
317 #endif
318  printfQuda("done.\n");
319 
320 }
321 
322 TEST(dslash, verify) {
323  double deviation = pow(10, -(double)(cpuColorSpinorField::Compare(*spinorRef, *spinorOut)));
324  double tol = (inv_param.cuda_prec == QUDA_DOUBLE_PRECISION ? 1e-12 :
325  (inv_param.cuda_prec == QUDA_SINGLE_PRECISION ? 1e-3 : 1e-1));
326  ASSERT_LE(deviation, tol) << "CPU and CUDA implementations do not agree";
327 }
328 
329 
331 {
332  printfQuda("running the following test:\n");
333 
334  printfQuda("prec recon test_type dagger S_dim T_dimension\n");
335  printfQuda("%s %s %d %d %d/%d/%d %d \n",
338  printfQuda("Grid partition info: X Y Z T\n");
339  printfQuda(" %d %d %d %d\n",
340  dimPartitioned(0),
341  dimPartitioned(1),
342  dimPartitioned(2),
343  dimPartitioned(3));
344 
345  return ;
346 
347 }
348 
349 void usage_extra(char **argv) { return; }
350 
351 int main(int argc, char **argv)
352 {
353  // initalize google test
354  ::testing::InitGoogleTest(&argc, argv);
355  // return code for google test
356  int test_rc = 0;
357  for (int i = 1; i < argc; i++) {
358  if(process_command_line_option(argc, argv, &i) == 0){
359  continue;
360  }
361 
362  fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
363  usage(argv);
364  }
365 
366  initComms(argc, argv, gridsize_from_cmdline);
367 
369 
370  init(argc, argv);
371 
372  int attempts = 1;
373  for (int i = 0; i < attempts; i++) {
374 
375  // Test forward directions, then backward
376  for (int dag = 0; dag < 2; dag++) {
377  dag == 0 ? dagger = QUDA_DAG_NO : dagger = QUDA_DAG_YES;
378 
379  for (int mu = 0; mu < 4; mu++) { // We test all directions in one go
380  int muCuda = mu + (dagger ? 4 : 0);
381  int muCpu = mu * 2 + (dagger ? 1 : 0);
382 
383  // Reference computation
384  covdevRef(muCpu);
385  printfQuda("\n\nChecking muQuda = %d\n", muCuda);
386 
387  { // warm-up run
388  printfQuda("Tuning...\n");
389  dslashCUDA(1, muCuda);
390  }
391 
392  printfQuda("Executing %d kernel loop(s)...", niter);
393 
394  double secs = dslashCUDA(niter, muCuda);
395  if (!transfer) *spinorOut = *cudaSpinorOut;
396  printfQuda("\n%fms per loop\n", 1000 * secs);
397 
398  unsigned long long flops
399  = niter * cudaSpinor->Nspin() * (8 * nColor - 2) * nColor * (long long)cudaSpinor->Volume();
400  printfQuda("GFLOPS = %f\n", 1.0e-9 * flops / secs);
401 
402  double spinor_ref_norm2 = blas::norm2(*spinorRef);
403  double spinor_out_norm2 = blas::norm2(*spinorOut);
404 
405  if (!transfer) {
406  double cuda_spinor_out_norm2 = blas::norm2(*cudaSpinorOut);
407  printfQuda("Results mu = %d: CPU=%f, CUDA=%f, CPU-CUDA=%f\n", muCuda, spinor_ref_norm2, cuda_spinor_out_norm2,
408  spinor_out_norm2);
409  } else {
410  printfQuda("Result mu = %d: CPU=%f , CPU-CUDA=%f", mu, spinor_ref_norm2, spinor_out_norm2);
411  }
412 
413  if (verify_results) {
414  ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
415  if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
416 
417  test_rc = RUN_ALL_TESTS();
418  if (test_rc != 0) warningQuda("Tests failed");
419  }
420  } // Directions
421  } // Dagger
422  }
423 
424  end();
425 
426  finalizeComms();
427  return test_rc;
428 }
429 
static size_t gSize
int dimPartitioned(int dim)
Definition: test_util.cpp:1776
QudaDiracFieldOrder dirac_order
Definition: quda.h:219
int comm_rank(void)
Definition: comm_mpi.cpp:82
QudaReconstructType reconstruct_sloppy
Definition: quda.h:53
double anisotropy
Definition: quda.h:38
QudaGhostExchange ghostExchange
Definition: lattice_field.h:76
QudaGaugeParam gaugeParam
Definition: covdev_test.cpp:36
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
void endQuda(void)
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
Definition: test_util.cpp:1047
double mu
Definition: test_util.cpp:1648
enum QudaPrecision_s QudaPrecision
int ga_pad
Definition: quda.h:63
QudaGaugeFixed gauge_fix
Definition: quda.h:61
QudaLinkType type
Definition: quda.h:42
void usage(char **argv)
Definition: test_util.cpp:1783
bool verify_results
Definition: test_util.cpp:1643
#define errorQuda(...)
Definition: util_quda.h:121
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:721
QudaDslashType dslash_type
Definition: quda.h:102
QudaPrecision cuda_prec
Definition: quda.h:214
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
void * links[4]
Definition: covdev_test.cpp:47
QudaPrecision cpu_prec
Definition: quda.h:213
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:2019
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44
QudaDagType dagger
Definition: quda.h:207
void finalizeComms()
Definition: test_util.cpp:128
QudaGaugeFieldOrder gauge_order
Definition: quda.h:43
void usage_extra(char **argv)
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:701
QudaDslashType dslash_type
Definition: test_util.cpp:1621
QudaSiteSubset siteSubset
Definition: lattice_field.h:71
void setDims(int *)
Definition: test_util.cpp:151
QudaFieldLocation input_location
Definition: quda.h:99
QudaPrecision cpu_prec
Definition: covdev_test.cpp:33
int Ls
Definition: test_util.cpp:38
QudaPrecision prec
Definition: test_util.cpp:1608
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
QudaSolutionType solution_type
Definition: quda.h:204
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:67
double scale
Definition: quda.h:40
void initQuda(int device)
double tol
Definition: test_util.cpp:1656
QudaFieldLocation output_location
Definition: quda.h:100
QudaInvertParam inv_param
Definition: covdev_test.cpp:37
QudaReconstructType link_recon
Definition: test_util.cpp:1605
QudaPrecision cuda_prec
Definition: covdev_test.cpp:34
const int nColor
Definition: covdev_test.cpp:75
void setSpinorSiteSize(int n)
Definition: test_util.cpp:211
#define MAX(a, b)
Definition: covdev_test.cpp:25
ColorSpinorParam csParam
Definition: pack_test.cpp:24
int zdim
Definition: test_util.cpp:1617
QudaInvertParam newQudaInvertParam(void)
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:768
void display_test_info()
cpuColorSpinorField * spinorOut
Definition: covdev_test.cpp:41
#define warningQuda(...)
Definition: util_quda.h:133
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
Definition: complex_quda.h:111
QudaGammaBasis gamma_basis
Definition: quda.h:221
QudaPrecision cuda_prec_sloppy
Definition: quda.h:52
const void ** Ghost() const
Definition: gauge_field.h:323
cudaColorSpinorField * cudaSpinor
Definition: covdev_test.cpp:42
int niter
Definition: test_util.cpp:1629
int main(int argc, char **argv)
cudaColorSpinorField * cudaSpinorOut
Definition: covdev_test.cpp:42
int X[4]
Definition: covdev_test.cpp:70
enum QudaDagType_s QudaDagType
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
Definition: quda.h:50
QudaPrecision cuda_prec
Definition: quda.h:49
int X[4]
Definition: quda.h:36
double mass
Definition: quda.h:105
void covdevRef(int mu)
int device
Definition: test_util.cpp:1602
int V
Definition: test_util.cpp:27
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
Perform a component by component comparison of two color-spinor fields. In doing we normalize with re...
TEST(dslash, verify)
GaugeCovDev * dirac
Definition: covdev_test.cpp:73
double tadpole_coeff
Definition: quda.h:39
int tdim
Definition: test_util.cpp:1618
int ydim
Definition: test_util.cpp:1616
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
QudaPrecision Precision() const
Definition: lattice_field.h:58
int Nsrc
Definition: test_util.cpp:1627
#define printfQuda(...)
Definition: util_quda.h:115
QudaTboundary t_boundary
Definition: quda.h:45
unsigned long long flops
Definition: blas_quda.cu:22
int transfer
Definition: covdev_test.cpp:55
enum QudaDslashType_s QudaDslashType
int xdim
Definition: test_util.cpp:1615
cpuColorSpinorField * spinorRef
Definition: covdev_test.cpp:41
void end(void)
void * hostGauge[4]
Definition: covdev_test.cpp:46
int gridsize_from_cmdline[]
Definition: test_util.cpp:49
cpuGaugeField * cpuLink
Definition: covdev_test.cpp:39
#define checkCudaError()
Definition: util_quda.h:161
void initComms(int argc, char **argv, int *const commDims)
Definition: test_util.cpp:88
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
void mat_mg4dir(cpuColorSpinorField *out, void **link, void **ghostLink, cpuColorSpinorField *in, int daggerBit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
QudaDagType dagger
Definition: test_util.cpp:1620
double dslashCUDA(int niter, int mu)
QudaParity parity
Definition: covdev_test.cpp:54
Full Covariant Derivative operator. Although not a Dirac operator per se, it&#39;s a linear operator so i...
Definition: dirac_quda.h:1069
void setVerbosity(QudaVerbosity verbosity)
Definition: util_quda.cpp:25
QudaMatPCType matpc_type
Definition: quda.h:206
ColorSpinorField * tmp1
Definition: dirac_quda.h:41
cpuColorSpinorField * spinor
Definition: covdev_test.cpp:41
void init(int argc, char **argv)
Definition: covdev_test.cpp:77
QudaPrecision cpu_prec
Definition: quda.h:47
virtual void MCD(ColorSpinorField &out, const ColorSpinorField &in, const int mu) const
#define gaugeSiteSize
Definition: face_gauge.cpp:34
QudaGaugeParam newQudaGaugeParam(void)
int test_type
Definition: test_util.cpp:1636