QUDA  0.9.0
multigrid_benchmark_test.cu
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 
4 #include <quda_internal.h>
5 #include <color_spinor_field.h>
6 #include <blas_quda.h>
7 
8 #include <test_util.h>
9 #include <misc.h>
10 
11 // include because of nasty globals used in the tests
12 #include <dslash_util.h>
13 #include <dirac_quda.h>
14 #include <algorithm>
15 
18 extern int nvec;
19 extern int device;
20 extern int xdim;
21 extern int ydim;
22 extern int zdim;
23 extern int tdim;
24 extern int gridsize_from_cmdline[];
25 extern int niter;
26 
27 extern int Nsrc; // number of spinors to apply to simultaneously
28 
29 extern bool verify_results;
30 
31 extern int test_type;
32 
33 extern QudaPrecision prec;
34 
35 extern void usage(char** );
36 
37 using namespace quda;
38 
41 
44 
45 int Nspin;
46 int Ncolor;
47 
48 void
50 {
51  printfQuda("running the following test:\n");
52  printfQuda("S_dimension T_dimension Nspin Ncolor\n");
53  printfQuda("%3d /%3d / %3d %3d %d %d\n", xdim, ydim, zdim, tdim, Nspin, Ncolor);
54  printfQuda("Grid partition info: X Y Z T\n");
55  printfQuda(" %d %d %d %d\n",
56  dimPartitioned(0),
57  dimPartitioned(1),
58  dimPartitioned(2),
59  dimPartitioned(3));
60  return;
61 }
62 
64 {
66  param.nColor = Ncolor;
67  param.nSpin = Nspin;
68  param.nDim = 5; // number of spacetime dimensions
69 
70  param.pad = 0; // padding must be zero for cpu fields
71  param.siteSubset = QUDA_FULL_SITE_SUBSET;
72  param.x[0] = xdim;
73  param.x[1] = ydim;
74  param.x[2] = zdim;
75  param.x[3] = tdim;
76  param.x[4] = Nsrc;
77  param.PCtype = QUDA_4D_PC;
78 
79  param.siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
81  param.precision = QUDA_DOUBLE_PRECISION;
83 
85 
88 
89  //static_cast<cpuColorSpinorField*>(xH)->Source(QUDA_RANDOM_SOURCE, 0, 0, 0);
90  //static_cast<cpuColorSpinorField*>(yH)->Source(QUDA_RANDOM_SOURCE, 0, 0, 0);
91 
92  // Now set the parameters for the cuda fields
93  //param.pad = xdim*ydim*zdim/2;
94 
95  if (param.nSpin == 4) param.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
97  param.precision = prec;
98  param.fieldOrder = QUDA_FLOAT2_FIELD_ORDER;
99 
102 
103  // check for successful allocation
104  checkCudaError();
105 
106  //*xD = *xH;
107  //*yD = *yH;
108 
110  gParam.x[0] = xdim;
111  gParam.x[1] = ydim;
112  gParam.x[2] = zdim;
113  gParam.x[3] = tdim;
114  gParam.nColor = param.nColor*param.nSpin;
120  gParam.precision = param.precision;
121  gParam.nDim = 4;
124  gParam.nFace = 1;
125 
127  Y_h = new cpuGaugeField(gParam);
128  Yhat_h = new cpuGaugeField(gParam);
129 
131  gParam.nFace = 0;
132  X_h = new cpuGaugeField(gParam);
133  Xinv_h = new cpuGaugeField(gParam);
134 
137  gParam.nFace = 1;
138  int pad = std::max( { (gParam.x[0]*gParam.x[1]*gParam.x[2])/2,
139  (gParam.x[1]*gParam.x[2]*gParam.x[3])/2,
140  (gParam.x[0]*gParam.x[2]*gParam.x[3])/2,
141  (gParam.x[0]*gParam.x[1]*gParam.x[3])/2 } );
142  gParam.pad = gParam.nFace * pad * 2;
143  Y_d = new cudaGaugeField(gParam);
145  Y_d->copy(*Y_h);
146  Yhat_d->copy(*Yhat_h);
147 
150  gParam.nFace = 0;
151  X_d = new cudaGaugeField(gParam);
153  X_d->copy(*X_h);
154  Xinv_d->copy(*Xinv_h);
155 }
156 
157 
159 {
160  delete xD;
161  delete yD;
162 
163  delete xH;
164  delete yH;
165 
166  delete Y_h;
167  delete X_h;
168  delete Xinv_h;
169  delete Yhat_h;
170 
171  delete Y_d;
172  delete X_d;
173  delete Xinv_d;
174  delete Yhat_d;
175 }
176 
178 
179 double benchmark(int test, const int niter) {
180 
181  cudaEvent_t start, end;
182  cudaEventCreate(&start);
183  cudaEventCreate(&end);
184  cudaEventRecord(start, 0);
185 
186  switch(test) {
187  case 0:
188  for (int i=0; i < niter; ++i) dirac->Dslash(xD->Even(), yD->Odd(), QUDA_EVEN_PARITY);
189  break;
190  case 1:
191  for (int i=0; i < niter; ++i) dirac->M(*xD, *yD);
192  break;
193  case 2:
194  for (int i=0; i < niter; ++i) dirac->Clover(xD->Even(), yD->Even(), QUDA_EVEN_PARITY);
195  break;
196  default:
197  errorQuda("Undefined test %d", test);
198  }
199 
200  cudaEventRecord(end, 0);
201  cudaEventSynchronize(end);
202  float runTime;
203  cudaEventElapsedTime(&runTime, start, end);
204  cudaEventDestroy(start);
205  cudaEventDestroy(end);
206 
207  double secs = runTime / 1000;
208  return secs;
209 }
210 
211 
212 const char *names[] = {
213  "Dslash",
214  "Mat",
215  "Clover"
216 };
217 
218 int main(int argc, char** argv)
219 {
220  for (int i = 1; i < argc; i++){
221  if(process_command_line_option(argc, argv, &i) == 0){
222  continue;
223  }
224  printfQuda("ERROR: Invalid option:%s\n", argv[i]);
225  usage(argv);
226  }
227 
228  initComms(argc, argv, gridsize_from_cmdline);
230  initQuda(device);
231 
232  // enable the tuning
234 
235  Nspin = 2;
236 
237  printfQuda("\nBenchmarking %s precision with %d iterations...\n\n", get_prec_str(prec), niter);
238  for (int c=24; c<=32; c+=8) {
239  Ncolor = c;
240 
241  initFields(prec);
242 
245 
246  // do the initial tune
247  benchmark(test_type, 1);
248 
249  // now rerun with more iterations to get accurate speed measurements
250  dirac->Flops(); // reset flops counter
251 
252  double secs = benchmark(test_type, niter);
253  double gflops = (dirac->Flops()*1e-9)/(secs);
254 
255  printfQuda("Ncolor = %2d, %-31s: Gflop/s = %6.1f\n", Ncolor, names[test_type], gflops);
256 
257  delete dirac;
258  freeFields();
259  }
260 
261  // clear the error state
262  cudaGetLastError();
263 
264  endQuda();
265 
266  finalizeComms();
267 }
DiracCoarse * dirac
QudaTboundary t_boundary
Definition: gauge_field.h:18
void display_test_info()
int dimPartitioned(int dim)
Definition: test_util.cpp:1686
QudaGhostExchange ghostExchange
Definition: lattice_field.h:60
void endQuda(void)
enum QudaPrecision_s QudaPrecision
double test(int kernel)
Definition: blas_test.cu:492
void freeFields()
#define errorQuda(...)
Definition: util_quda.h:90
cudaEvent_t start
int tdim
Definition: test_util.cpp:1623
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:1795
const ColorSpinorField & Even() const
const ColorSpinorField & Odd() const
cudaGaugeField * Yhat_d
QudaPrecision precision
Definition: lattice_field.h:54
void finalizeComms()
Definition: test_util.cpp:107
bool verify_results
Definition: test_util.cpp:1641
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:704
void Clover(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply the coarse clover operator.
unsigned long long Flops() const
Definition: dirac_quda.h:148
QudaSiteSubset siteSubset
Definition: lattice_field.h:55
cpuGaugeField * Y_h
QudaGaugeParam param
Definition: pack_test.cpp:17
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:50
virtual void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply DslashXpay out = (D * in)
ColorSpinorField * xH
int niter
Definition: test_util.cpp:1630
void initQuda(int device)
const char * names[]
cudaGaugeField * Xinv_d
cpuGaugeField * Yhat_h
int Nsrc
Definition: test_util.cpp:1628
cpuGaugeField * Xinv_h
int test_type
Definition: test_util.cpp:1634
ColorSpinorField * yH
QudaGaugeFieldOrder order
Definition: gauge_field.h:15
QudaPrecision prec
Definition: test_util.cpp:1615
cudaGaugeField * X_d
double benchmark(int test, const int niter)
int nvec
Definition: test_util.cpp:1635
GaugeFieldParam gParam
QudaDslashType dslash_type
Definition: test_util.cpp:1626
void usage(char **)
Definition: test_util.cpp:1693
cpuGaugeField * X_h
QudaLinkType link_type
Definition: gauge_field.h:17
ColorSpinorField * yD
#define printfQuda(...)
Definition: util_quda.h:84
cudaGaugeField * Y_d
enum QudaDslashType_s QudaDslashType
QudaReconstructType reconstruct
Definition: gauge_field.h:14
QudaFieldCreate create
Definition: gauge_field.h:25
const void * c
#define checkCudaError()
Definition: util_quda.h:129
QudaFieldGeometry geometry
Definition: gauge_field.h:27
int ydim
Definition: test_util.cpp:1621
QudaInverterType inv_type
Definition: test_util.cpp:1638
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply the full operator.
void copy(const GaugeField &src)
int gridsize_from_cmdline[]
Definition: test_util.cpp:50
int xdim
Definition: test_util.cpp:1620
int zdim
Definition: test_util.cpp:1622
void initComms(int argc, char **argv, const int *commDims)
Definition: test_util.cpp:72
void setVerbosity(const QudaVerbosity verbosity)
Definition: util_quda.cpp:24
void initFields(QudaPrecision prec)
int main(int argc, char **argv)
ColorSpinorField * xD
enum QudaInverterType_s QudaInverterType
cudaEvent_t cudaEvent_t end