QUDA  v1.1.0
A library for QCD on GPUs
gauge_force_test.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 
5 #include <quda.h>
6 #include <host_utils.h>
7 #include <command_line_params.h>
8 #include <gauge_field.h>
9 #include "misc.h"
10 #include "gauge_force_reference.h"
11 #include "gauge_force_quda.h"
12 #include <sys/time.h>
13 #include <dslash_quda.h>
14 #include <gtest/gtest.h>
15 
16 static QudaGaugeFieldOrder gauge_order = QUDA_QDP_GAUGE_ORDER;
17 
18 int length[] = {
19  3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
20  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
21 };
22 
23 float loop_coeff_f[]={
24  1.1,
25  1.2,
26  1.3,
27  1.4,
28  1.5,
29  1.6,
30  2.5,
31  2.6,
32  2.7,
33  2.8,
34  2.9,
35  3.0,
36  3.1,
37  3.2,
38  3.3,
39  3.4,
40  3.5,
41  3.6,
42  3.7,
43  3.8,
44  3.9,
45  4.0,
46  4.1,
47  4.2,
48  4.3,
49  4.4,
50  4.5,
51  4.6,
52  4.7,
53  4.8,
54  4.9,
55  5.0,
56  5.1,
57  5.2,
58  5.3,
59  5.4,
60  5.5,
61  5.6,
62  5.7,
63  5.8,
64  5.9,
65  5.0,
66  6.1,
67  6.2,
68  6.3,
69  6.4,
70  6.5,
71  6.6,
72 };
73 
74 int path_dir_x[][5] = {
75  {1, 7, 6 },
76  {6, 7, 1 },
77  {2, 7, 5 },
78  {5, 7, 2 },
79  {3, 7, 4 },
80  {4, 7, 3 },
81  {0, 1, 7, 7, 6 },
82  {1, 7, 7, 6, 0 },
83  {6, 7, 7, 1, 0 },
84  {0, 6, 7, 7, 1 },
85  {0, 2, 7, 7, 5 },
86  {2, 7, 7, 5, 0 },
87  {5, 7, 7, 2, 0 },
88  {0, 5, 7, 7, 2 },
89  {0, 3, 7, 7, 4 },
90  {3, 7, 7, 4, 0 },
91  {4, 7, 7, 3, 0 },
92  {0, 4, 7, 7, 3 },
93  {6, 6, 7, 1, 1 },
94  {1, 1, 7, 6, 6 },
95  {5, 5, 7, 2, 2 },
96  {2, 2, 7, 5, 5 },
97  {4, 4, 7, 3, 3 },
98  {3, 3, 7, 4, 4 },
99  {1, 2, 7, 6, 5 },
100  {5, 6, 7, 2, 1 },
101  {1, 5, 7, 6, 2 },
102  {2, 6, 7, 5, 1 },
103  {6, 2, 7, 1, 5 },
104  {5, 1, 7, 2, 6 },
105  {6, 5, 7, 1, 2 },
106  {2, 1, 7, 5, 6 },
107  {1, 3, 7, 6, 4 },
108  {4, 6, 7, 3, 1 },
109  {1, 4, 7, 6, 3 },
110  {3, 6, 7, 4, 1 },
111  {6, 3, 7, 1, 4 },
112  {4, 1, 7, 3, 6 },
113  {6, 4, 7, 1, 3 },
114  {3, 1, 7, 4, 6 },
115  {2, 3, 7, 5, 4 },
116  {4, 5, 7, 3, 2 },
117  {2, 4, 7, 5, 3 },
118  {3, 5, 7, 4, 2 },
119  {5, 3, 7, 2, 4 },
120  {4, 2, 7, 3, 5 },
121  {5, 4, 7, 2, 3 },
122  {3, 2, 7, 4, 5 },
123 };
124 
125 
126 int path_dir_y[][5] = {
127  { 2 ,6 ,5 },
128  { 5 ,6 ,2 },
129  { 3 ,6 ,4 },
130  { 4 ,6 ,3 },
131  { 0 ,6 ,7 },
132  { 7 ,6 ,0 },
133  { 1 ,2 ,6 ,6 ,5 },
134  { 2 ,6 ,6 ,5 ,1 },
135  { 5 ,6 ,6 ,2 ,1 },
136  { 1 ,5 ,6 ,6 ,2 },
137  { 1 ,3 ,6 ,6 ,4 },
138  { 3 ,6 ,6 ,4 ,1 },
139  { 4 ,6 ,6 ,3 ,1 },
140  { 1 ,4 ,6 ,6 ,3 },
141  { 1 ,0 ,6 ,6 ,7 },
142  { 0 ,6 ,6 ,7 ,1 },
143  { 7 ,6 ,6 ,0 ,1 },
144  { 1 ,7 ,6 ,6 ,0 },
145  { 5 ,5 ,6 ,2 ,2 },
146  { 2 ,2 ,6 ,5 ,5 },
147  { 4 ,4 ,6 ,3 ,3 },
148  { 3 ,3 ,6 ,4 ,4 },
149  { 7 ,7 ,6 ,0 ,0 },
150  { 0 ,0 ,6 ,7 ,7 },
151  { 2 ,3 ,6 ,5 ,4 },
152  { 4 ,5 ,6 ,3 ,2 },
153  { 2 ,4 ,6 ,5 ,3 },
154  { 3 ,5 ,6 ,4 ,2 },
155  { 5 ,3 ,6 ,2 ,4 },
156  { 4 ,2 ,6 ,3 ,5 },
157  { 5 ,4 ,6 ,2 ,3 },
158  { 3 ,2 ,6 ,4 ,5 },
159  { 2 ,0 ,6 ,5 ,7 },
160  { 7 ,5 ,6 ,0 ,2 },
161  { 2 ,7 ,6 ,5 ,0 },
162  { 0 ,5 ,6 ,7 ,2 },
163  { 5 ,0 ,6 ,2 ,7 },
164  { 7 ,2 ,6 ,0 ,5 },
165  { 5 ,7 ,6 ,2 ,0 },
166  { 0 ,2 ,6 ,7 ,5 },
167  { 3 ,0 ,6 ,4 ,7 },
168  { 7 ,4 ,6 ,0 ,3 },
169  { 3 ,7 ,6 ,4 ,0 },
170  { 0 ,4 ,6 ,7 ,3 },
171  { 4 ,0 ,6 ,3 ,7 },
172  { 7 ,3 ,6 ,0 ,4 },
173  { 4 ,7 ,6 ,3 ,0 },
174  { 0 ,3 ,6 ,7 ,4 }
175 };
176 
177 int path_dir_z[][5] = {
178  {3, 5, 4}, {4, 5, 3}, {0, 5, 7}, {7, 5, 0}, {1, 5, 6}, {6, 5, 1}, {2, 3, 5, 5, 4},
179  {3, 5, 5, 4, 2}, {4, 5, 5, 3, 2}, {2, 4, 5, 5, 3}, {2, 0, 5, 5, 7}, {0, 5, 5, 7, 2}, {7, 5, 5, 0, 2}, {2, 7, 5, 5, 0},
180  {2, 1, 5, 5, 6}, {1, 5, 5, 6, 2}, {6, 5, 5, 1, 2}, {2, 6, 5, 5, 1}, {4, 4, 5, 3, 3}, {3, 3, 5, 4, 4}, {7, 7, 5, 0, 0},
181  {0, 0, 5, 7, 7}, {6, 6, 5, 1, 1}, {1, 1, 5, 6, 6}, {3, 0, 5, 4, 7}, {7, 4, 5, 0, 3}, {3, 7, 5, 4, 0}, {0, 4, 5, 7, 3},
182  {4, 0, 5, 3, 7}, {7, 3, 5, 0, 4}, {4, 7, 5, 3, 0}, {0, 3, 5, 7, 4}, {3, 1, 5, 4, 6}, {6, 4, 5, 1, 3}, {3, 6, 5, 4, 1},
183  {1, 4, 5, 6, 3}, {4, 1, 5, 3, 6}, {6, 3, 5, 1, 4}, {4, 6, 5, 3, 1}, {1, 3, 5, 6, 4}, {0, 1, 5, 7, 6}, {6, 7, 5, 1, 0},
184  {0, 6, 5, 7, 1}, {1, 7, 5, 6, 0}, {7, 1, 5, 0, 6}, {6, 0, 5, 1, 7}, {7, 6, 5, 0, 1}, {1, 0, 5, 6, 7}};
185 
186 int path_dir_t[][5] = {
187  { 0 ,4 ,7 },
188  { 7 ,4 ,0 },
189  { 1 ,4 ,6 },
190  { 6 ,4 ,1 },
191  { 2 ,4 ,5 },
192  { 5 ,4 ,2 },
193  { 3 ,0 ,4 ,4 ,7 },
194  { 0 ,4 ,4 ,7 ,3 },
195  { 7 ,4 ,4 ,0 ,3 },
196  { 3 ,7 ,4 ,4 ,0 },
197  { 3 ,1 ,4 ,4 ,6 },
198  { 1 ,4 ,4 ,6 ,3 },
199  { 6 ,4 ,4 ,1 ,3 },
200  { 3 ,6 ,4 ,4 ,1 },
201  { 3 ,2 ,4 ,4 ,5 },
202  { 2 ,4 ,4 ,5 ,3 },
203  { 5 ,4 ,4 ,2 ,3 },
204  { 3 ,5 ,4 ,4 ,2 },
205  { 7 ,7 ,4 ,0 ,0 },
206  { 0 ,0 ,4 ,7 ,7 },
207  { 6 ,6 ,4 ,1 ,1 },
208  { 1 ,1 ,4 ,6 ,6 },
209  { 5 ,5 ,4 ,2 ,2 },
210  { 2 ,2 ,4 ,5 ,5 },
211  { 0 ,1 ,4 ,7 ,6 },
212  { 6 ,7 ,4 ,1 ,0 },
213  { 0 ,6 ,4 ,7 ,1 },
214  { 1 ,7 ,4 ,6 ,0 },
215  { 7 ,1 ,4 ,0 ,6 },
216  { 6 ,0 ,4 ,1 ,7 },
217  { 7 ,6 ,4 ,0 ,1 },
218  { 1 ,0 ,4 ,6 ,7 },
219  { 0 ,2 ,4 ,7 ,5 },
220  { 5 ,7 ,4 ,2 ,0 },
221  { 0 ,5 ,4 ,7 ,2 },
222  { 2 ,7 ,4 ,5 ,0 },
223  { 7 ,2 ,4 ,0 ,5 },
224  { 5 ,0 ,4 ,2 ,7 },
225  { 7 ,5 ,4 ,0 ,2 },
226  { 2 ,0 ,4 ,5 ,7 },
227  { 1 ,2 ,4 ,6 ,5 },
228  { 5 ,6 ,4 ,2 ,1 },
229  { 1 ,5 ,4 ,6 ,2 },
230  { 2 ,6 ,4 ,5 ,1 },
231  { 6 ,2 ,4 ,1 ,5 },
232  { 5 ,1 ,4 ,2 ,6 },
233  { 6 ,5 ,4 ,1 ,2 },
234  { 2 ,1 ,4 ,5 ,6 }
235 };
236 
237 static double force_check;
238 static double deviation;
239 
241 {
242  int max_length = 6;
243 
244  setVerbosityQuda(QUDA_VERBOSE,"",stdout);
245 
248 
249  gauge_param.gauge_order = gauge_order;
251 
253 
254  double loop_coeff_d[sizeof(loop_coeff_f)/sizeof(float)];
255  for(unsigned int i=0;i < sizeof(loop_coeff_f)/sizeof(float); i++){
256  loop_coeff_d[i] = loop_coeff_f[i];
257  }
258 
259  void* loop_coeff;
261  loop_coeff = (void*)&loop_coeff_f[0];
262  } else {
263  loop_coeff = loop_coeff_d;
264  }
265  double eb3 = 0.3;
266  int num_paths = sizeof(path_dir_x)/sizeof(path_dir_x[0]);
267 
268  int** input_path_buf[4];
269  for (int dir = 0; dir < 4; dir++) {
270  input_path_buf[dir] = (int **)safe_malloc(num_paths * sizeof(int *));
271  for (int i = 0; i < num_paths; i++) {
272  input_path_buf[dir][i] = (int*)safe_malloc(length[i]*sizeof(int));
273  if (dir == 0)
274  memcpy(input_path_buf[dir][i], path_dir_x[i], length[i] * sizeof(int));
275  else if (dir == 1)
276  memcpy(input_path_buf[dir][i], path_dir_y[i], length[i] * sizeof(int));
277  else if (dir == 2)
278  memcpy(input_path_buf[dir][i], path_dir_z[i], length[i] * sizeof(int));
279  else if (dir == 3)
280  memcpy(input_path_buf[dir][i], path_dir_t[i], length[i] * sizeof(int));
281  }
282  }
283 
285  param.create = QUDA_NULL_FIELD_CREATE;
286  param.order = QUDA_QDP_GAUGE_ORDER;
287  auto U_qdp = new quda::cpuGaugeField(param);
288 
289  // fills the gauge field with random numbers
290  createSiteLinkCPU((void **)U_qdp->Gauge_p(), gauge_param.cpu_prec, 0);
291 
293  auto U_milc = new quda::cpuGaugeField(param);
294  if (gauge_order == QUDA_MILC_GAUGE_ORDER) U_milc->copy(*U_qdp);
296  param.create = QUDA_ZERO_FIELD_CREATE;
297  param.link_type = QUDA_ASQTAD_MOM_LINKS;
298  auto Mom_milc = new quda::cpuGaugeField(param);
299  auto Mom_ref_milc = new quda::cpuGaugeField(param);
300 
301  param.order = QUDA_QDP_GAUGE_ORDER;
302  auto Mom_qdp = new quda::cpuGaugeField(param);
303 
304  // initialize some data in cpuMom
305  createMomCPU(Mom_ref_milc->Gauge_p(), gauge_param.cpu_prec);
306  if (gauge_order == QUDA_MILC_GAUGE_ORDER) Mom_milc->copy(*Mom_ref_milc);
307  if (gauge_order == QUDA_QDP_GAUGE_ORDER) Mom_qdp->copy(*Mom_ref_milc);
308  void *mom = nullptr;
309  void *sitelink = nullptr;
310 
311  if (gauge_order == QUDA_MILC_GAUGE_ORDER) {
312  sitelink = U_milc->Gauge_p();
313  mom = Mom_milc->Gauge_p();
314  } else if (gauge_order == QUDA_QDP_GAUGE_ORDER) {
315  sitelink = U_qdp->Gauge_p();
316  mom = Mom_qdp->Gauge_p();
317  } else {
318  errorQuda("Unsupported gauge order %d", gauge_order);
319  }
320 
321  if (getTuning() == QUDA_TUNE_YES)
322  computeGaugeForceQuda(mom, sitelink, input_path_buf, length, loop_coeff_d, num_paths, max_length, eb3, &gauge_param);
323 
324  struct timeval t0, t1;
325  double total_time = 0.0;
326  // Multiple execution to exclude warmup time in the first run
327 
328  auto &Mom_ = gauge_order == QUDA_MILC_GAUGE_ORDER ? Mom_milc : Mom_qdp;
329  for (int i = 0; i < niter; i++) {
330  Mom_->copy(*Mom_ref_milc); // restore initial momentum for correctness
331  gettimeofday(&t0, NULL);
332  computeGaugeForceQuda(mom, sitelink, input_path_buf, length, loop_coeff_d, num_paths, max_length, eb3, &gauge_param);
333  gettimeofday(&t1, NULL);
334  total_time += t1.tv_sec - t0.tv_sec + 0.000001*(t1.tv_usec - t0.tv_usec);
335  }
336  if (gauge_order == QUDA_QDP_GAUGE_ORDER) Mom_milc->copy(*Mom_qdp);
337 
338  //The number comes from CPU implementation in MILC, gauge_force_imp.c
339  int flops = 153004;
340 
341  void *refmom = Mom_ref_milc->Gauge_p();
342  if (verify_results) {
343  gauge_force_reference(refmom, eb3, (void **)U_qdp->Gauge_p(), gauge_param.cpu_prec, input_path_buf, length,
344  loop_coeff, num_paths);
345  force_check = compare_floats(Mom_milc->Gauge_p(), refmom, 4 * V * mom_site_size, getTolerance(cuda_prec),
347  strong_check_mom(Mom_milc->Gauge_p(), refmom, 4 * V, gauge_param.cpu_prec);
348  }
349 
350  printfQuda("\nComputing momentum action\n");
351  auto action_quda = momActionQuda(mom, &gauge_param);
352  auto action_ref = mom_action(refmom, gauge_param.cpu_prec, 4 * V);
353  deviation = std::abs(action_quda - action_ref) / std::abs(action_ref);
354  printfQuda("QUDA action = %e, reference = %e relative deviation = %e\n", action_quda, action_ref, deviation);
355 
356  double perf = 1.0*niter*flops*V/(total_time*1e+9);
357  printfQuda("total time = %.2f ms\n", total_time * 1e+3);
358  printfQuda("overall performance : %.2f GFLOPS\n",perf);
359 
360  for(int dir = 0; dir < 4; dir++){
361  for(int i=0;i < num_paths; i++) host_free(input_path_buf[dir][i]);
362  host_free(input_path_buf[dir]);
363  }
364 
365  delete U_qdp;
366  delete U_milc;
367  delete Mom_qdp;
368  delete Mom_milc;
369  delete Mom_ref_milc;
370 }
371 
372 TEST(force, verify) { ASSERT_EQ(force_check, 1) << "CPU and QUDA implementations do not agree"; }
373 
374 TEST(action, verify) { ASSERT_LE(deviation, getTolerance(cuda_prec)) << "CPU and QUDA implementations do not agree"; }
375 
376 static void display_test_info()
377 {
378  printfQuda("running the following test:\n");
379 
380  printfQuda("link_precision link_reconstruct space_dim(x/y/z) T_dimension "
381  "Gauge_order niter\n");
382  printfQuda("%s %s %d/%d/%d %d "
383  "%s %d\n",
385  niter);
386 }
387 
388 int main(int argc, char **argv)
389 {
390  // initalize google test
391  ::testing::InitGoogleTest(&argc, argv);
392  // return code for google test
393  int test_rc = 0;
394 
395  // command line options
396  auto app = make_app();
397  CLI::TransformPairs<QudaGaugeFieldOrder> gauge_order_map {{"milc", QUDA_MILC_GAUGE_ORDER},
398  {"qdp", QUDA_QDP_GAUGE_ORDER}};
399  app->add_option("--gauge-order", gauge_order, "")->transform(CLI::QUDACheckedTransformer(gauge_order_map));
400  try {
401  app->parse(argc, argv);
402  } catch (const CLI::ParseError &e) {
403  return app->exit(e);
404  }
405 
406  initComms(argc, argv, gridsize_from_cmdline);
407 
409 
411 
413 
414  if (verify_results) {
415  // Ensure gtest prints only from rank 0
417  if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
418 
419  test_rc = RUN_ALL_TESTS();
420  if (test_rc != 0) warningQuda("Tests failed");
421  }
422 
423  endQuda();
424  finalizeComms();
425  return test_rc;
426 }
void display_test_info()
TestEventListener * Release(TestEventListener *listener)
TestEventListener * default_result_printer() const
Definition: gtest.h:1186
TestEventListeners & listeners()
static UnitTest * GetInstance()
int comm_rank(void)
std::shared_ptr< QUDAApp > make_app(std::string app_description, std::string app_name)
QudaReconstructType link_recon
int niter
int device_ordinal
int & ydim
bool verify_results
int & zdim
QudaPrecision prec
int & tdim
int & xdim
std::array< int, 4 > gridsize_from_cmdline
int V
Definition: host_utils.cpp:37
void setDims(int *)
Definition: host_utils.cpp:315
QudaGaugeParam gauge_param
Definition: covdev_test.cpp:26
@ QUDA_VERBOSE
Definition: enum_quda.h:267
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
@ QUDA_RECONSTRUCT_10
Definition: enum_quda.h:75
@ QUDA_PERIODIC_T
Definition: enum_quda.h:57
@ QUDA_TUNE_YES
Definition: enum_quda.h:272
@ QUDA_SINGLE_PRECISION
Definition: enum_quda.h:64
@ QUDA_QDP_GAUGE_ORDER
Definition: enum_quda.h:44
@ QUDA_MILC_GAUGE_ORDER
Definition: enum_quda.h:47
@ QUDA_ZERO_FIELD_CREATE
Definition: enum_quda.h:361
@ QUDA_NULL_FIELD_CREATE
Definition: enum_quda.h:360
@ QUDA_ASQTAD_MOM_LINKS
Definition: enum_quda.h:33
void gauge_force_reference(void *refMom, double eb3, void **sitelink, QudaPrecision prec, int ***path_dir, int *length, void *loop_coeff, int num_paths)
TEST(force, verify)
int path_dir_t[][5]
int length[]
int main(int argc, char **argv)
int path_dir_x[][5]
float loop_coeff_f[]
int path_dir_z[][5]
int path_dir_y[][5]
void gauge_force_test(void)
#define ASSERT_EQ(val1, val2)
Definition: gtest.h:2047
#define ASSERT_LE(val1, val2)
Definition: gtest.h:2055
int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_
Definition: gtest.h:2468
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision)
Definition: host_utils.cpp:889
double mom_action(real *mom_, int len)
QudaPrecision & cuda_prec
Definition: host_utils.cpp:58
void initComms(int argc, char **argv, std::array< int, 4 > &commDims)
Definition: host_utils.cpp:255
void createMomCPU(void *mom, QudaPrecision precision)
void finalizeComms()
Definition: host_utils.cpp:292
int strong_check_mom(void *momA, void *momB, int len, QudaPrecision prec)
void createSiteLinkCPU(void **link, QudaPrecision precision, int phase)
#define mom_site_size
Definition: host_utils.h:12
double getTolerance(QudaPrecision prec)
Definition: host_utils.h:245
#define safe_malloc(size)
Definition: malloc_quda.h:106
#define host_free(ptr)
Definition: malloc_quda.h:115
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:26
const char * get_gauge_order_str(QudaGaugeFieldOrder order)
Definition: misc.cpp:54
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:68
unsigned long long flops
__host__ __device__ ValueType abs(ValueType x)
Definition: complex_quda.h:125
GTEST_API_ void InitGoogleTest(int *argc, char **argv)
QudaGaugeParam param
Definition: pack_test.cpp:18
Main header file for the QUDA library.
double momActionQuda(void *momentum, QudaGaugeParam *param)
int computeGaugeForceQuda(void *mom, void *sitelink, int ***input_path_buf, int *path_length, double *loop_coeff, int num_paths, int max_length, double dt, QudaGaugeParam *qudaGaugeParam)
QudaGaugeParam newQudaGaugeParam(void)
void setVerbosityQuda(QudaVerbosity verbosity, const char prefix[], FILE *outfile)
void initQuda(int device)
void endQuda(void)
QudaReconstructType reconstruct
Definition: quda.h:49
QudaGaugeFieldOrder gauge_order
Definition: quda.h:42
int X[4]
Definition: quda.h:35
QudaPrecision cpu_prec
Definition: quda.h:46
QudaTboundary t_boundary
Definition: quda.h:44
void setGaugeParam(QudaGaugeParam &gauge_param)
Definition: su3_test.cpp:64
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
Definition: util_quda.cpp:52
#define printfQuda(...)
Definition: util_quda.h:114
#define warningQuda(...)
Definition: util_quda.h:132
#define errorQuda(...)
Definition: util_quda.h:120