QUDA  v1.1.0
A library for QCD on GPUs
covdev_test.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 
6 #include <quda.h>
7 #include <quda_internal.h>
8 #include <dirac_quda.h>
9 #include <dslash_quda.h>
10 #include <invert_quda.h>
11 #include <util_quda.h>
12 #include <blas_quda.h>
13 
14 #include <misc.h>
15 #include <host_utils.h>
16 #include <command_line_params.h>
17 #include <dslash_reference.h>
18 #include <covdev_reference.h>
19 #include <gauge_field.h>
20 
21 #include <assert.h>
22 #include <gtest/gtest.h>
23 
24 using namespace quda;
25 
28 
30 
33 
35 
36 void *links[4];
37 
38 void **ghostLink;
39 
41 
43 
44 const int nColor = 3;
45 
46 void init(int argc, char **argv)
47 {
49 
51 
54 
56  Ls = 1;
57 
58  if (Nsrc != 1) warningQuda("The covariant derivative doesn't support 5-d indexing, only source 0 will be tested");
59 
62  inv_param.dslash_type = QUDA_COVDEV_DSLASH; // ensure we use the correct dslash
63 
66  csParam.nSpin=4;
67  csParam.nDim=4;
68  for (int d = 0; d < 4; d++) { csParam.x[d] = gauge_param.X[d]; }
69  // csParam.x[4] = Nsrc; // number of sources becomes the fifth dimension
70 
72  csParam.pad = 0;
78  csParam.gammaBasis = inv_param.gamma_basis; // this parameter is meaningless for staggered
80 
84 
86  csParam.x[0] = gauge_param.X[0];
87 
88  printfQuda("Randomizing fields ...\n");
90 
91  // Allocate host side memory for the gauge field.
92  //----------------------------------------------------------------------------
93  for (int dir = 0; dir < 4; dir++) {
95  if (links[dir] == NULL) {
96  errorQuda("ERROR: malloc failed for gauge links");
97  }
98  }
100 
101  // cpuLink is only used for ghost allocation
102  GaugeFieldParam cpuParam(links, gauge_param);
104  cpuLink = new cpuGaugeField(cpuParam);
105  ghostLink = cpuLink->Ghost();
106 
107  printfQuda("Links sending...");
109  printfQuda("Links sent\n");
110 
111  printfQuda("Sending fields to GPU...");
112 
116 
117  printfQuda("Creating cudaSpinor\n");
119 
120  printfQuda("Creating cudaSpinorOut\n");
122 
123  printfQuda("Sending spinor field to GPU\n");
124  *cudaSpinor = *spinor;
125 
126  double spinor_norm2 = blas::norm2(*spinor);
127  double cuda_spinor_norm2 = blas::norm2(*cudaSpinor);
128  printfQuda("Source CPU = %f, CUDA=%f\n", spinor_norm2, cuda_spinor_norm2);
129 
132 
133  DiracParam diracParam;
134  setDiracParam(diracParam, &inv_param, false);
135 
136  diracParam.tmp1 = tmp;
137 
138  dirac = new GaugeCovDev(diracParam);
139 }
140 
141 void end(void)
142 {
143  for (int dir = 0; dir < 4; dir++) {
144  free(links[dir]);
145  }
146 
147  delete dirac;
148  delete cudaSpinor;
149  delete cudaSpinorOut;
150  delete tmp;
151  delete spinor;
152  delete spinorOut;
153  delete spinorRef;
154 
155  if (cpuLink) delete cpuLink;
156 
157  endQuda();
158 }
159 
160 double dslashCUDA(int niter, int mu)
161 {
162  cudaEvent_t start, end;
163  cudaEventCreate(&start);
164  cudaEventRecord(start, 0);
165  cudaEventSynchronize(start);
166 
167  for (int i = 0; i < niter; i++) dirac->MCD(*cudaSpinorOut, *cudaSpinor, mu);
168 
169  cudaEventCreate(&end);
170  cudaEventRecord(end, 0);
171  cudaEventSynchronize(end);
172  float runTime;
173  cudaEventElapsedTime(&runTime, start, end);
174  cudaEventDestroy(start);
175  cudaEventDestroy(end);
176 
177  double secs = runTime / 1000; //stopwatchReadSeconds();
178 
179  return secs;
180 }
181 
182 void covdevRef(int mu)
183 {
184  // compare to dslash reference implementation
185  printfQuda("Calculating reference implementation...");
186 #ifdef MULTI_GPU
188 #else
190 #endif
191  printfQuda("done.\n");
192 }
193 
194 TEST(dslash, verify)
195 {
196  double deviation = pow(10, -(double)(cpuColorSpinorField::Compare(*spinorRef, *spinorOut)));
197  double tol = (inv_param.cuda_prec == QUDA_DOUBLE_PRECISION ? 1e-12 :
198  (inv_param.cuda_prec == QUDA_SINGLE_PRECISION ? 1e-3 : 1e-1));
199  ASSERT_LE(deviation, tol) << "CPU and CUDA implementations do not agree";
200 }
201 
203 {
204  printfQuda("running the following test:\n");
205 
206  printfQuda("prec recon test_type dagger S_dim T_dimension\n");
207  printfQuda("%s %s %d %d %d/%d/%d %d \n",
210  printfQuda("Grid partition info: X Y Z T\n");
211  printfQuda(" %d %d %d %d\n",
212  dimPartitioned(0),
213  dimPartitioned(1),
214  dimPartitioned(2),
215  dimPartitioned(3));
216 }
217 
218 int main(int argc, char **argv)
219 {
220  // initalize google test
221  ::testing::InitGoogleTest(&argc, argv);
222  // return code for google test
223  int test_rc = 0;
224  // command line options
225  auto app = make_app();
226  try {
227  app->parse(argc, argv);
228  } catch (const CLI::ParseError &e) {
229  return app->exit(e);
230  }
231 
232  initComms(argc, argv, gridsize_from_cmdline);
233 
234  // Ensure gtest prints only from rank 0
236  if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
237 
239 
240  init(argc, argv);
241 
242  int attempts = 1;
243  for (int i = 0; i < attempts; i++) {
244 
245  // Test forward directions, then backward
246  for (int dag = 0; dag < 2; dag++) {
247  dag == 0 ? dagger = QUDA_DAG_NO : dagger = QUDA_DAG_YES;
248 
249  for (int mu = 0; mu < 4; mu++) { // We test all directions in one go
250  int muCuda = mu + (dagger ? 4 : 0);
251  int muCpu = mu * 2 + (dagger ? 1 : 0);
252 
253  // Reference computation
254  covdevRef(muCpu);
255  printfQuda("\n\nChecking muQuda = %d\n", muCuda);
256 
257  { // warm-up run
258  printfQuda("Tuning...\n");
259  dslashCUDA(1, muCuda);
260  }
261 
262  printfQuda("Executing %d kernel loop(s)...", niter);
263 
264  double secs = dslashCUDA(niter, muCuda);
266  printfQuda("\n%fms per loop\n", 1000 * secs);
267 
268  unsigned long long flops
269  = niter * cudaSpinor->Nspin() * (8 * nColor - 2) * nColor * (long long)cudaSpinor->Volume();
270  printfQuda("GFLOPS = %f\n", 1.0e-9 * flops / secs);
271 
272  double spinor_ref_norm2 = blas::norm2(*spinorRef);
273  double spinor_out_norm2 = blas::norm2(*spinorOut);
274 
275  double cuda_spinor_out_norm2 = blas::norm2(*cudaSpinorOut);
276  printfQuda("Results mu = %d: CPU=%f, CUDA=%f, CPU-CUDA=%f\n", muCuda, spinor_ref_norm2, cuda_spinor_out_norm2,
277  spinor_out_norm2);
278 
279  if (verify_results) {
281  if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
282 
283  test_rc = RUN_ALL_TESTS();
284  if (test_rc != 0) warningQuda("Tests failed");
285  }
286  } // Directions
287  } // Dagger
288  }
289 
290  end();
291 
292  finalizeComms();
293  return test_rc;
294 }
295 
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
ColorSpinorField * tmp1
Definition: dirac_quda.h:52
Full Covariant Derivative operator. Although not a Dirac operator per se, it's a linear operator so i...
Definition: dirac_quda.h:1858
virtual void MCD(ColorSpinorField &out, const ColorSpinorField &in, const int mu) const
const void ** Ghost() const
Definition: gauge_field.h:368
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
Perform a component by component comparison of two color-spinor fields. In doing we normalize with re...
TestEventListener * Release(TestEventListener *listener)
TestEventListener * default_result_printer() const
Definition: gtest.h:1186
TestEventListeners & listeners()
static UnitTest * GetInstance()
int comm_rank(void)
std::shared_ptr< QUDAApp > make_app(std::string app_description, std::string app_name)
double tol
QudaReconstructType link_recon
int niter
int test_type
int device_ordinal
int & ydim
bool verify_results
double mu
int & zdim
int Nsrc
QudaPrecision prec
int & tdim
int & xdim
std::array< int, 4 > gridsize_from_cmdline
bool dagger
int V
Definition: host_utils.cpp:37
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
void mat_mg4dir(cpuColorSpinorField *out, void **link, void **ghostLink, cpuColorSpinorField *in, int daggerBit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
void setDims(int *)
Definition: host_utils.cpp:315
void end(void)
GaugeCovDev * dirac
Definition: covdev_test.cpp:42
void ** ghostLink
Definition: covdev_test.cpp:38
int main(int argc, char **argv)
double dslashCUDA(int niter, int mu)
cudaColorSpinorField * cudaSpinor
Definition: covdev_test.cpp:32
QudaParity parity
Definition: covdev_test.cpp:40
cudaColorSpinorField * cudaSpinorOut
Definition: covdev_test.cpp:32
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:34
const int nColor
Definition: covdev_test.cpp:44
TEST(dslash, verify)
void * links[4]
Definition: covdev_test.cpp:36
void init(int argc, char **argv)
Definition: covdev_test.cpp:46
cpuColorSpinorField * spinor
Definition: covdev_test.cpp:31
void covdevRef(int mu)
cpuColorSpinorField * spinorOut
Definition: covdev_test.cpp:31
void display_test_info()
cpuColorSpinorField * spinorRef
Definition: covdev_test.cpp:31
QudaGaugeParam gauge_param
Definition: covdev_test.cpp:26
cpuGaugeField * cpuLink
Definition: covdev_test.cpp:29
QudaInvertParam inv_param
Definition: covdev_test.cpp:27
@ QUDA_RANDOM_SOURCE
Definition: enum_quda.h:376
@ QUDA_COVDEV_DSLASH
Definition: enum_quda.h:102
@ QUDA_DAG_NO
Definition: enum_quda.h:223
@ QUDA_DAG_YES
Definition: enum_quda.h:223
@ QUDA_VERBOSE
Definition: enum_quda.h:267
@ QUDA_FULL_SITE_SUBSET
Definition: enum_quda.h:333
@ QUDA_UKQCD_GAMMA_BASIS
Definition: enum_quda.h:369
@ QUDA_EVEN_PARITY
Definition: enum_quda.h:284
@ QUDA_GHOST_EXCHANGE_PAD
Definition: enum_quda.h:509
@ QUDA_EVEN_ODD_SITE_ORDER
Definition: enum_quda.h:340
@ QUDA_MAT_SOLUTION
Definition: enum_quda.h:157
@ QUDA_DOUBLE_PRECISION
Definition: enum_quda.h:65
@ QUDA_SINGLE_PRECISION
Definition: enum_quda.h:64
@ QUDA_4D_PC
Definition: enum_quda.h:397
@ QUDA_SPACE_SPIN_COLOR_FIELD_ORDER
Definition: enum_quda.h:351
@ QUDA_ZERO_FIELD_CREATE
Definition: enum_quda.h:361
enum QudaParity_s QudaParity
#define gauge_site_size
Definition: face_gauge.cpp:34
#define ASSERT_LE(val1, val2)
Definition: gtest.h:2055
int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_
Definition: gtest.h:2468
size_t host_gauge_data_type_size
Definition: host_utils.cpp:65
int dimPartitioned(int dim)
Definition: host_utils.cpp:376
void initComms(int argc, char **argv, std::array< int, 4 > &commDims)
Definition: host_utils.cpp:255
void finalizeComms()
Definition: host_utils.cpp:292
void constructHostGaugeField(void **gauge, QudaGaugeParam &gauge_param, int argc, char **argv)
Definition: host_utils.cpp:166
int Ls
Definition: host_utils.cpp:48
void setWilsonGaugeParam(QudaGaugeParam &gauge_param)
Definition: set_params.cpp:37
void setInvertParam(QudaInvertParam &invertParam, QudaInvertArgs_t &inv_args, int external_precision, int quda_precision, double kappa, double reliable_delta)
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:26
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:68
unsigned long long flops
double norm2(const ColorSpinorField &a)
void start()
Start profiling.
Definition: device.cpp:226
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
Definition: complex_quda.h:111
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
GTEST_API_ void InitGoogleTest(int *argc, char **argv)
ColorSpinorParam csParam
Definition: pack_test.cpp:25
Main header file for the QUDA library.
QudaGaugeParam newQudaGaugeParam(void)
void initQuda(int device)
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
QudaInvertParam newQudaInvertParam(void)
void endQuda(void)
int X[4]
Definition: quda.h:35
QudaPrecision cpu_prec
Definition: quda.h:46
QudaSolutionType solution_type
Definition: quda.h:228
QudaDslashType dslash_type
Definition: quda.h:106
QudaPrecision cuda_prec
Definition: quda.h:238
QudaPrecision cpu_prec
Definition: quda.h:237
QudaGammaBasis gamma_basis
Definition: quda.h:246
QudaGhostExchange ghostExchange
Definition: lattice_field.h:77
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:68
QudaSiteSubset siteSubset
Definition: lattice_field.h:72
#define printfQuda(...)
Definition: util_quda.h:114
#define warningQuda(...)
Definition: util_quda.h:132
void setVerbosity(QudaVerbosity verbosity)
Definition: util_quda.cpp:25
#define errorQuda(...)
Definition: util_quda.h:120