QUDA  1.0.0
multigrid_benchmark_test.cu
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 
4 #include <quda_internal.h>
5 #include <color_spinor_field.h>
6 #include <blas_quda.h>
7 
8 #include <test_util.h>
9 #include <misc.h>
10 
11 // include because of nasty globals used in the tests
12 #include <dslash_util.h>
13 #include <dirac_quda.h>
14 
15 #define MAX(a,b) ((a)>(b)?(a):(b))
16 
19 extern int nvec;
20 extern int device;
21 extern int xdim;
22 extern int ydim;
23 extern int zdim;
24 extern int tdim;
25 extern int gridsize_from_cmdline[];
26 extern int niter;
27 
28 extern int Nsrc; // number of spinors to apply to simultaneously
29 
30 extern bool verify_results;
31 
32 extern int test_type;
33 
34 extern QudaPrecision prec;
37 
38 extern void usage(char** );
39 
40 using namespace quda;
41 
44 
47 
48 int Nspin;
49 int Ncolor;
50 
51 #define MAX(a,b) ((a)>(b)?(a):(b))
52 
53 void
55 {
56  printfQuda("running the following test:\n");
57  printfQuda("S_dimension T_dimension Nspin Ncolor\n");
58  printfQuda("%3d /%3d / %3d %3d %d %d\n", xdim, ydim, zdim, tdim, Nspin, Ncolor);
59  printfQuda("Grid partition info: X Y Z T\n");
60  printfQuda(" %d %d %d %d\n",
61  dimPartitioned(0),
62  dimPartitioned(1),
63  dimPartitioned(2),
64  dimPartitioned(3));
65  return;
66 }
67 
69 {
71  param.nColor = Ncolor;
72  param.nSpin = Nspin;
73  param.nDim = 5; // number of spacetime dimensions
74 
75  param.pad = 0; // padding must be zero for cpu fields
77  param.x[0] = xdim;
78  param.x[1] = ydim;
79  param.x[2] = zdim;
80  param.x[3] = tdim;
81  param.x[4] = Nsrc;
82  param.pc_type = QUDA_4D_PC;
83 
88 
90 
91  xH = new cpuColorSpinorField(param);
92  yH = new cpuColorSpinorField(param);
93 
94  //static_cast<cpuColorSpinorField*>(xH)->Source(QUDA_RANDOM_SOURCE, 0, 0, 0);
95  //static_cast<cpuColorSpinorField*>(yH)->Source(QUDA_RANDOM_SOURCE, 0, 0, 0);
96 
97  // Now set the parameters for the cuda fields
98  //param.pad = xdim*ydim*zdim/2;
99 
100  if (param.nSpin == 4) param.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
102  param.setPrecision(prec);
104 
105  xD = new cudaColorSpinorField(param);
106  yD = new cudaColorSpinorField(param);
107 
108  // check for successful allocation
109  checkCudaError();
110 
111  //*xD = *xH;
112  //*yD = *yH;
113 
115  gParam.x[0] = xdim;
116  gParam.x[1] = ydim;
117  gParam.x[2] = zdim;
118  gParam.x[3] = tdim;
119  gParam.nColor = param.nColor*param.nSpin;
121  gParam.order = QUDA_QDP_GAUGE_ORDER;
122  gParam.link_type = QUDA_COARSE_LINKS;
123  gParam.t_boundary = QUDA_PERIODIC_T;
125  gParam.setPrecision(param.Precision());
126  gParam.nDim = 4;
129  gParam.nFace = 1;
130 
132  Y_h = new cpuGaugeField(gParam);
133  Yhat_h = new cpuGaugeField(gParam);
134 
136  gParam.nFace = 0;
137  X_h = new cpuGaugeField(gParam);
138  Xinv_h = new cpuGaugeField(gParam);
139 
142  gParam.nFace = 1;
143 
144  int x_face_size = gParam.x[1]*gParam.x[2]*gParam.x[3]/2;
145  int y_face_size = gParam.x[0]*gParam.x[2]*gParam.x[3]/2;
146  int z_face_size = gParam.x[0]*gParam.x[1]*gParam.x[3]/2;
147  int t_face_size = gParam.x[0]*gParam.x[1]*gParam.x[2]/2;
148  int pad = MAX(x_face_size, y_face_size);
149  pad = MAX(pad, z_face_size);
150  pad = MAX(pad, t_face_size);
151  gParam.pad = gParam.nFace * pad * 2;
152 
153  gParam.setPrecision(prec_sloppy);
154 
155  Y_d = new cudaGaugeField(gParam);
156  Yhat_d = new cudaGaugeField(gParam);
157  Y_d->copy(*Y_h);
158  Yhat_d->copy(*Yhat_h);
159 
162  gParam.nFace = 0;
163  X_d = new cudaGaugeField(gParam);
164  Xinv_d = new cudaGaugeField(gParam);
165  X_d->copy(*X_h);
166  Xinv_d->copy(*Xinv_h);
167 }
168 
169 
171 {
172  delete xD;
173  delete yD;
174 
175  delete xH;
176  delete yH;
177 
178  delete Y_h;
179  delete X_h;
180  delete Xinv_h;
181  delete Yhat_h;
182 
183  delete Y_d;
184  delete X_d;
185  delete Xinv_d;
186  delete Yhat_d;
187 }
188 
190 
191 double benchmark(int test, const int niter) {
192 
193  cudaEvent_t start, end;
194  cudaEventCreate(&start);
195  cudaEventCreate(&end);
196  cudaEventRecord(start, 0);
197 
198  switch(test) {
199  case 0:
200  for (int i=0; i < niter; ++i) dirac->Dslash(xD->Even(), yD->Odd(), QUDA_EVEN_PARITY);
201  break;
202  case 1:
203  for (int i=0; i < niter; ++i) dirac->M(*xD, *yD);
204  break;
205  case 2:
206  for (int i=0; i < niter; ++i) dirac->Clover(xD->Even(), yD->Even(), QUDA_EVEN_PARITY);
207  break;
208  default:
209  errorQuda("Undefined test %d", test);
210  }
211 
212  cudaEventRecord(end, 0);
213  cudaEventSynchronize(end);
214  float runTime;
215  cudaEventElapsedTime(&runTime, start, end);
216  cudaEventDestroy(start);
217  cudaEventDestroy(end);
218 
219  double secs = runTime / 1000;
220  return secs;
221 }
222 
223 
224 const char *names[] = {
225  "Dslash",
226  "Mat",
227  "Clover"
228 };
229 
230 int main(int argc, char** argv)
231 {
232  // Set some defaults that lets the benchmark fit in memory if you run it
233  // with default parameters.
234  xdim = ydim = zdim = tdim = 8;
235 
236  for (int i = 1; i < argc; i++){
237  if(process_command_line_option(argc, argv, &i) == 0){
238  continue;
239  }
240  printfQuda("ERROR: Invalid option:%s\n", argv[i]);
241  usage(argv);
242  }
244 
245  initComms(argc, argv, gridsize_from_cmdline);
247  initQuda(device);
248 
249  // enable the tuning
251 
252  Nspin = 2;
253 
254  printfQuda("\nBenchmarking %s precision with %d iterations...\n\n", get_prec_str(prec), niter);
255  for (int c=24; c<=32; c+=8) {
256  Ncolor = c;
257 
258  initFields(prec);
259 
262  dirac = new DiracCoarse(param, Y_h, X_h, Xinv_h, Yhat_h, Y_d, X_d, Xinv_d, Yhat_d);
263 
264  // do the initial tune
265  benchmark(test_type, 1);
266 
267  // now rerun with more iterations to get accurate speed measurements
268  dirac->Flops(); // reset flops counter
269 
270  double secs = benchmark(test_type, niter);
271  double gflops = (dirac->Flops()*1e-9)/(secs);
272 
273  printfQuda("Ncolor = %2d, %-31s: Gflop/s = %6.1f\n", Ncolor, names[test_type], gflops);
274 
275  delete dirac;
276  freeFields();
277  }
278 
279  // clear the error state
280  cudaGetLastError();
281 
282  endQuda();
283 
284  finalizeComms();
285 }
DiracCoarse * dirac
QudaTboundary t_boundary
Definition: gauge_field.h:20
void display_test_info()
int dimPartitioned(int dim)
Definition: test_util.cpp:1776
QudaGhostExchange ghostExchange
Definition: lattice_field.h:76
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
void endQuda(void)
enum QudaPrecision_s QudaPrecision
double test(int kernel)
Definition: blas_test.cu:504
void freeFields()
void end(void)
Definition: blas_quda.cu:489
#define errorQuda(...)
Definition: util_quda.h:121
int tdim
Definition: test_util.cpp:1618
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:2019
const ColorSpinorField & Even() const
const ColorSpinorField & Odd() const
cudaGaugeField * Yhat_d
QudaPrecision smoother_halo_prec
Definition: test_util.cpp:1694
void finalizeComms()
Definition: test_util.cpp:128
bool verify_results
Definition: test_util.cpp:1643
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:701
void Clover(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply the coarse clover operator.
unsigned long long Flops() const
Definition: dirac_quda.h:177
QudaSiteSubset siteSubset
Definition: lattice_field.h:71
cpuGaugeField * Y_h
QudaPrecision prec_sloppy
Definition: test_util.cpp:1609
QudaGaugeParam param
Definition: pack_test.cpp:17
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:67
virtual void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply DslashXpay out = (D * in)
ColorSpinorField * xH
int niter
Definition: test_util.cpp:1629
void initQuda(int device)
const char * names[]
cudaGaugeField * Xinv_d
cpuGaugeField * Yhat_h
int Nsrc
Definition: test_util.cpp:1627
cpuGaugeField * Xinv_h
int test_type
Definition: test_util.cpp:1636
ColorSpinorField * yH
QudaGaugeFieldOrder order
Definition: gauge_field.h:17
QudaPrecision prec
Definition: test_util.cpp:1608
QudaPrecision halo_precision
Definition: dirac_quda.h:46
cudaGaugeField * X_d
double benchmark(int test, const int niter)
int nvec
Definition: test_util.cpp:1637
#define MAX(a, b)
void setPrecision(QudaPrecision precision, bool force_native=false)
Helper function for setting the precision and corresponding field order for QUDA internal fields...
Definition: gauge_field.h:131
GaugeFieldParam gParam
QudaDslashType dslash_type
Definition: test_util.cpp:1621
void usage(char **)
Definition: test_util.cpp:1783
cpuGaugeField * X_h
QudaLinkType link_type
Definition: gauge_field.h:19
ColorSpinorField * yD
int device
Definition: test_util.cpp:1602
QudaPrecision Precision() const
Definition: lattice_field.h:58
#define printfQuda(...)
Definition: util_quda.h:115
cudaGaugeField * Y_d
enum QudaDslashType_s QudaDslashType
QudaReconstructType reconstruct
Definition: gauge_field.h:16
QudaFieldCreate create
Definition: gauge_field.h:26
#define checkCudaError()
Definition: util_quda.h:161
QudaFieldGeometry geometry
Definition: gauge_field.h:28
int ydim
Definition: test_util.cpp:1616
QudaInverterType inv_type
Definition: test_util.cpp:1640
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply the full operator.
void initComms(int argc, char **argv, int *const commDims)
Definition: test_util.cpp:88
void copy(const GaugeField &src)
int gridsize_from_cmdline[]
Definition: test_util.cpp:49
int xdim
Definition: test_util.cpp:1615
int zdim
Definition: test_util.cpp:1617
void setVerbosity(QudaVerbosity verbosity)
Definition: util_quda.cpp:25
void initFields(QudaPrecision prec)
int main(int argc, char **argv)
ColorSpinorField * xD
enum QudaInverterType_s QudaInverterType