QUDA  v1.1.0
A library for QCD on GPUs
multigrid_benchmark_test.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 
4 #include <quda_internal.h>
5 #include <color_spinor_field.h>
6 #include <blas_quda.h>
7 
8 #include <host_utils.h>
9 #include <command_line_params.h>
10 #include <misc.h>
11 
12 // include because of nasty globals used in the tests
13 #include <dslash_reference.h>
14 #include <dirac_quda.h>
15 
16 #define MAX(a,b) ((a)>(b)?(a):(b))
17 
18 
19 extern void usage(char** );
20 
21 using namespace quda;
22 
25 
28 
29 int Nspin;
30 int Ncolor;
31 
32 #define MAX(a,b) ((a)>(b)?(a):(b))
33 
35 {
36  printfQuda("running the following test:\n");
37  printfQuda("S_dimension T_dimension Nspin Ncolor\n");
38  printfQuda("%3d /%3d / %3d %3d %d %d\n", xdim, ydim, zdim, tdim, Nspin, Ncolor);
39  printfQuda("Grid partition info: X Y Z T\n");
40  printfQuda(" %d %d %d %d\n",
41  dimPartitioned(0),
42  dimPartitioned(1),
43  dimPartitioned(2),
44  dimPartitioned(3));
45 }
46 
48 {
50  param.nColor = Ncolor;
51  param.nSpin = Nspin;
52  param.nDim = 5; // number of spacetime dimensions
53 
54  param.pad = 0; // padding must be zero for cpu fields
55  param.siteSubset = QUDA_FULL_SITE_SUBSET;
56  param.x[0] = xdim;
57  param.x[1] = ydim;
58  param.x[2] = zdim;
59  param.x[3] = tdim;
60  param.x[4] = Nsrc;
61  param.pc_type = QUDA_4D_PC;
62 
63  param.siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
65  param.setPrecision(QUDA_DOUBLE_PRECISION);
67 
69 
72 
73  //static_cast<cpuColorSpinorField*>(xH)->Source(QUDA_RANDOM_SOURCE, 0, 0, 0);
74  //static_cast<cpuColorSpinorField*>(yH)->Source(QUDA_RANDOM_SOURCE, 0, 0, 0);
75 
76  // Now set the parameters for the cuda fields
77  //param.pad = xdim*ydim*zdim/2;
78 
79  if (param.nSpin == 4) param.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
81  param.setPrecision(prec);
82  param.fieldOrder = QUDA_FLOAT2_FIELD_ORDER;
83 
86 
87  //*xD = *xH;
88  //*yD = *yH;
89 
91  gParam.x[0] = xdim;
92  gParam.x[1] = ydim;
93  gParam.x[2] = zdim;
94  gParam.x[3] = tdim;
95  gParam.nColor = param.nColor*param.nSpin;
101  gParam.setPrecision(param.Precision());
102  gParam.nDim = 4;
105  gParam.nFace = 1;
106 
108  Y_h = new cpuGaugeField(gParam);
109  Yhat_h = new cpuGaugeField(gParam);
110 
112  gParam.nFace = 0;
113  X_h = new cpuGaugeField(gParam);
114  Xinv_h = new cpuGaugeField(gParam);
115 
118  gParam.nFace = 1;
119 
120  int x_face_size = gParam.x[1]*gParam.x[2]*gParam.x[3]/2;
121  int y_face_size = gParam.x[0]*gParam.x[2]*gParam.x[3]/2;
122  int z_face_size = gParam.x[0]*gParam.x[1]*gParam.x[3]/2;
123  int t_face_size = gParam.x[0]*gParam.x[1]*gParam.x[2]/2;
124  int pad = MAX(x_face_size, y_face_size);
125  pad = MAX(pad, z_face_size);
126  pad = MAX(pad, t_face_size);
127  gParam.pad = gParam.nFace * pad * 2;
128 
130 
131  Y_d = new cudaGaugeField(gParam);
133  Y_d->copy(*Y_h);
134  Yhat_d->copy(*Yhat_h);
135 
138  gParam.nFace = 0;
139  X_d = new cudaGaugeField(gParam);
141  X_d->copy(*X_h);
142  Xinv_d->copy(*Xinv_h);
143 }
144 
145 
147 {
148  delete xD;
149  delete yD;
150 
151  delete xH;
152  delete yH;
153 
154  delete Y_h;
155  delete X_h;
156  delete Xinv_h;
157  delete Yhat_h;
158 
159  delete Y_d;
160  delete X_d;
161  delete Xinv_d;
162  delete Yhat_d;
163 }
164 
166 
167 double benchmark(int test, const int niter) {
168 
169  cudaEvent_t start, end;
170  cudaEventCreate(&start);
171  cudaEventCreate(&end);
172  cudaEventRecord(start, 0);
173 
174  switch(test) {
175  case 0:
176  for (int i=0; i < niter; ++i) dirac->Dslash(xD->Even(), yD->Odd(), QUDA_EVEN_PARITY);
177  break;
178  case 1:
179  for (int i=0; i < niter; ++i) dirac->M(*xD, *yD);
180  break;
181  case 2:
182  for (int i=0; i < niter; ++i) dirac->Clover(xD->Even(), yD->Even(), QUDA_EVEN_PARITY);
183  break;
184  default:
185  errorQuda("Undefined test %d", test);
186  }
187 
188  cudaEventRecord(end, 0);
189  cudaEventSynchronize(end);
190  float runTime;
191  cudaEventElapsedTime(&runTime, start, end);
192  cudaEventDestroy(start);
193  cudaEventDestroy(end);
194 
195  double secs = runTime / 1000;
196  return secs;
197 }
198 
199 
200 const char *names[] = {
201  "Dslash",
202  "Mat",
203  "Clover"
204 };
205 
206 int main(int argc, char** argv)
207 {
208  // Set some defaults that lets the benchmark fit in memory if you run it
209  // with default parameters.
210  xdim = ydim = zdim = tdim = 8;
211 
212  // command line options
213  auto app = make_app();
214  // add_eigen_option_group(app);
215  // add_deflation_option_group(app);
217  CLI::TransformPairs<int> test_type_map {{"Dslash", 0}, {"Mat", 1}, {"Clover", 2}};
218  app->add_option("--test", test_type, "Test method")->transform(CLI::CheckedTransformer(test_type_map));
219 
220  try {
221  app->parse(argc, argv);
222  } catch (const CLI::ParseError &e) {
223  return app->exit(e);
224  }
226 
227  initComms(argc, argv, gridsize_from_cmdline);
230 
232 
233  Nspin = 2;
234 
235  printfQuda("\nBenchmarking %s precision with %d iterations...\n\n", get_prec_str(prec), niter);
236  for (int c=24; c<=32; c+=8) {
237  Ncolor = c;
238 
239  initFields(prec);
240 
242  param.halo_precision = smoother_halo_prec;
244 
245  // do the initial tune
246  benchmark(test_type, 1);
247 
248  // now rerun with more iterations to get accurate speed measurements
249  dirac->Flops(); // reset flops counter
250 
251  double secs = benchmark(test_type, niter);
252  double gflops = (dirac->Flops()*1e-9)/(secs);
253 
254  printfQuda("Ncolor = %2d, %-31s: Gflop/s = %6.1f\n", Ncolor, names[test_type], gflops);
255 
256  delete dirac;
257  freeFields();
258  }
259 
260  endQuda();
261 
262  finalizeComms();
263 }
double test(int data_type)
const ColorSpinorField & Odd() const
const ColorSpinorField & Even() const
void Clover(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply the coarse clover operator.
virtual void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply DslashXpay out = (D * in)
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply the full operator.
unsigned long long Flops() const
returns and then zeroes flopcount
Definition: dirac_quda.h:313
void copy(const GaugeField &src)
std::shared_ptr< QUDAApp > make_app(std::string app_description, std::string app_name)
int niter
int test_type
int device_ordinal
QudaVerbosity verbosity
int & ydim
void add_multigrid_option_group(std::shared_ptr< QUDAApp > quda_app)
int & zdim
int Nsrc
QudaPrecision prec
int & tdim
int & xdim
QudaPrecision smoother_halo_prec
std::array< int, 4 > gridsize_from_cmdline
QudaPrecision prec_sloppy
void end(void)
enum QudaPrecision_s QudaPrecision
@ QUDA_FULL_SITE_SUBSET
Definition: enum_quda.h:333
@ QUDA_DEGRAND_ROSSI_GAMMA_BASIS
Definition: enum_quda.h:368
@ QUDA_UKQCD_GAMMA_BASIS
Definition: enum_quda.h:369
@ QUDA_RECONSTRUCT_NO
Definition: enum_quda.h:70
@ QUDA_PERIODIC_T
Definition: enum_quda.h:57
@ QUDA_EVEN_PARITY
Definition: enum_quda.h:284
@ QUDA_SCALAR_GEOMETRY
Definition: enum_quda.h:500
@ QUDA_COARSE_GEOMETRY
Definition: enum_quda.h:503
@ QUDA_GHOST_EXCHANGE_NO
Definition: enum_quda.h:508
@ QUDA_GHOST_EXCHANGE_PAD
Definition: enum_quda.h:509
@ QUDA_EVEN_ODD_SITE_ORDER
Definition: enum_quda.h:340
@ QUDA_DOUBLE_PRECISION
Definition: enum_quda.h:65
@ QUDA_INVALID_PRECISION
Definition: enum_quda.h:66
@ QUDA_4D_PC
Definition: enum_quda.h:397
@ QUDA_FLOAT2_GAUGE_ORDER
Definition: enum_quda.h:40
@ QUDA_QDP_GAUGE_ORDER
Definition: enum_quda.h:44
@ QUDA_FLOAT2_FIELD_ORDER
Definition: enum_quda.h:348
@ QUDA_SPACE_SPIN_COLOR_FIELD_ORDER
Definition: enum_quda.h:351
@ QUDA_ZERO_FIELD_CREATE
Definition: enum_quda.h:361
@ QUDA_COARSE_LINKS
Definition: enum_quda.h:28
GaugeFieldParam gParam
int dimPartitioned(int dim)
Definition: host_utils.cpp:376
void initComms(int argc, char **argv, std::array< int, 4 > &commDims)
Definition: host_utils.cpp:255
void finalizeComms()
Definition: host_utils.cpp:292
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:26
const char * names[]
ColorSpinorField * xH
cudaGaugeField * Xinv_d
void freeFields()
DiracCoarse * dirac
int main(int argc, char **argv)
ColorSpinorField * yH
cpuGaugeField * Xinv_h
ColorSpinorField * xD
cudaGaugeField * X_d
cudaGaugeField * Y_d
void initFields(QudaPrecision prec)
void usage(char **)
ColorSpinorField * yD
void display_test_info()
cpuGaugeField * X_h
cpuGaugeField * Yhat_h
double benchmark(int test, const int niter)
cpuGaugeField * Y_h
#define MAX(a, b)
cudaGaugeField * Yhat_d
void start()
Start profiling.
Definition: device.cpp:226
QudaGaugeParam param
Definition: pack_test.cpp:18
void initQuda(int device)
void endQuda(void)
QudaReconstructType reconstruct
Definition: gauge_field.h:50
QudaGaugeFieldOrder order
Definition: gauge_field.h:51
QudaFieldGeometry geometry
Definition: gauge_field.h:62
void setPrecision(QudaPrecision precision, bool force_native=false)
Helper function for setting the precision and corresponding field order for QUDA internal fields.
Definition: gauge_field.h:173
QudaLinkType link_type
Definition: gauge_field.h:53
QudaFieldCreate create
Definition: gauge_field.h:60
QudaTboundary t_boundary
Definition: gauge_field.h:54
QudaGhostExchange ghostExchange
Definition: lattice_field.h:77
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:68
QudaSiteSubset siteSubset
Definition: lattice_field.h:72
#define printfQuda(...)
Definition: util_quda.h:114
void setVerbosity(QudaVerbosity verbosity)
Definition: util_quda.cpp:25
#define errorQuda(...)
Definition: util_quda.h:120