QUDA  v1.1.0
A library for QCD on GPUs
unitarize_link_test.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <sys/time.h>
5 
6 #include "quda.h"
7 #include "gauge_field.h"
8 #include "host_utils.h"
9 #include <command_line_params.h>
10 #include "misc.h"
11 #include "util_quda.h"
12 #include "llfat_quda.h"
13 #include <unitarization_links.h>
14 #include "dslash_quda.h"
15 #include "ks_improved_force.h"
16 
17 #ifdef MULTI_GPU
18 #include "comm_quda.h"
19 #endif
20 
21 // google test frame work
22 #include <gtest/gtest.h>
23 
24 #define TDIFF(a,b) (b.tv_sec - a.tv_sec + 0.000001*(b.tv_usec - a.tv_usec))
25 
26 using namespace quda;
27 
28 static double unitarize_eps = 1e-6;
29 static bool reunit_allow_svd = true;
30 static bool reunit_svd_only = false;
31 static double svd_rel_error = 1e-4;
32 static double svd_abs_error = 1e-4;
33 static double max_allowed_error = 1e-11;
34 
36 
39 
40 const double unittol = (prec == QUDA_DOUBLE_PRECISION) ? 1e-10 : 1e-6;
41 
42 TEST(unitarization, verify) {
45 
47  unittol, cpu_prec);
48 
49 #ifdef MULTI_GPU
50  comm_allreduce_int(&res);
51  res /= comm_size();
52 #endif
53 
54  ASSERT_EQ(res,1) << "CPU and CUDA implementations do not agree";
55 }
56 
57 static int unitarize_link_test(int &test_rc)
58 {
59  QudaGaugeParam qudaGaugeParam = newQudaGaugeParam();
60 
61  qudaGaugeParam.anisotropy = 1.0;
62 
63  qudaGaugeParam.X[0] = xdim;
64  qudaGaugeParam.X[1] = ydim;
65  qudaGaugeParam.X[2] = zdim;
66  qudaGaugeParam.X[3] = tdim;
67 
68  setDims(qudaGaugeParam.X);
69 
70  qudaGaugeParam.type = QUDA_WILSON_LINKS;
71 
72  qudaGaugeParam.t_boundary = QUDA_PERIODIC_T;
73  qudaGaugeParam.anisotropy = 1.0;
74  qudaGaugeParam.gauge_fix = QUDA_GAUGE_FIXED_NO;
75  qudaGaugeParam.ga_pad = 0;
76  qudaGaugeParam.cpu_prec = cpu_prec;
77  qudaGaugeParam.cuda_prec = prec;
78  qudaGaugeParam.cuda_prec_sloppy = prec;
79 
80  if (gauge_order != QUDA_MILC_GAUGE_ORDER)
81  errorQuda("Unsupported gauge order %d", gauge_order);
82 
83  qudaGaugeParam.gauge_order = gauge_order;
84  qudaGaugeParam.type=QUDA_WILSON_LINKS;
85  qudaGaugeParam.reconstruct = link_recon;
86  qudaGaugeParam.reconstruct_sloppy = qudaGaugeParam.reconstruct;
87 
88  qudaGaugeParam.llfat_ga_pad = qudaGaugeParam.site_ga_pad = qudaGaugeParam.ga_pad = qudaGaugeParam.staple_pad = 0;
89 
90  GaugeFieldParam gParam(0, qudaGaugeParam);
91  gParam.pad = 0;
95 
96  TimeProfile profile("dummy");
97 
98  void *inlink = (void *)safe_malloc(4 * V * gauge_site_size * cpu_prec);
99  void *fatlink = (void *)safe_malloc(4 * V * gauge_site_size * cpu_prec);
100 
101  void* sitelink[4];
102  for (int i = 0; i < 4; i++) sitelink[i] = pinned_malloc(V * gauge_site_size * cpu_prec);
103 
104  createSiteLinkCPU(sitelink, qudaGaugeParam.cpu_prec, 1);
105 
107  double* link = reinterpret_cast<double*>(inlink);
108  for(int dir=0; dir<4; ++dir){
109  double* slink = reinterpret_cast<double*>(sitelink[dir]);
110  for(int i=0; i<V; ++i){
111  for (int j = 0; j < gauge_site_size; j++) {
112  link[(i * 4 + dir) * gauge_site_size + j] = slink[i * gauge_site_size + j];
113  }
114  }
115  }
116  } else if(cpu_prec == QUDA_SINGLE_PRECISION){
117  float* link = reinterpret_cast<float*>(inlink);
118  for(int dir=0; dir<4; ++dir){
119  float* slink = reinterpret_cast<float*>(sitelink[dir]);
120  for(int i=0; i<V; ++i){
121  for (int j = 0; j < gauge_site_size; j++) {
122  link[(i * 4 + dir) * gauge_site_size + j] = slink[i * gauge_site_size + j];
123  }
124  }
125  }
126  }
127 
129  gParam.gauge = fatlink;
131 
134 
137 
138  gParam.pad = 0;
141  gParam.setPrecision(prec, true);
144 
145  { // create fat links
146  double act_path_coeff[6];
147  act_path_coeff[0] = 0.625000;
148  act_path_coeff[1] = -0.058479;
149  act_path_coeff[2] = -0.087719;
150  act_path_coeff[3] = 0.030778;
151  act_path_coeff[4] = -0.007200;
152  act_path_coeff[5] = -0.123113;
153 
154  computeKSLinkQuda(fatlink, NULL, NULL, inlink, act_path_coeff, &qudaGaugeParam);
155 
157  }
158 
159  setUnitarizeLinksConstants(unitarize_eps,
160  max_allowed_error,
161  reunit_allow_svd,
162  reunit_svd_only,
163  svd_rel_error,
164  svd_abs_error);
165 
166  int *num_failures_h = static_cast<int *>(mapped_malloc(sizeof(int)));
167  int *num_failures_d = static_cast<int *>(get_mapped_device_pointer(num_failures_h));
168  *num_failures_h = 0;
169 
170  struct timeval t0, t1;
171 
172  gettimeofday(&t0,NULL);
173  unitarizeLinks(*cudaULink, *cudaFatLink, num_failures_d);
174  gettimeofday(&t1,NULL);
175 
176  if (verify_results) {
177  test_rc = RUN_ALL_TESTS();
178  if (test_rc != 0) warningQuda("Tests failed");
179  }
180 
181  delete cudaResult;
182  delete cpuULink;
183  delete cpuFatLink;
184  delete cudaFatLink;
185  delete cudaULink;
186  for (int dir = 0; dir < 4; ++dir) host_free(sitelink[dir]);
187 
188  host_free(fatlink);
189 
190  int num_failures = *num_failures_h;
191  host_free(num_failures_h);
192 
193  host_free(inlink);
194 #ifdef MULTI_GPU
196 #endif
197 
198  printfQuda("Unitarization time: %g ms\n", TDIFF(t0,t1)*1000);
199  return num_failures;
200 }
201 
202 static void display_test_info()
203 {
204  printfQuda("running the following test:\n");
205 
206  printfQuda("link_precision link_reconstruct space_dimension T_dimension algorithm max allowed error deviation tolerance\n");
207  printfQuda("%8s %s %d/%d/%d/ %d %s %g "
208  " %g\n",
210  get_unitarization_str(reunit_svd_only), max_allowed_error, unittol);
211 
212 #ifdef MULTI_GPU
213  printfQuda("Grid partition info: X Y Z T\n");
214  printfQuda(" %d %d %d %d\n",
215  dimPartitioned(0),
216  dimPartitioned(1),
217  dimPartitioned(2),
218  dimPartitioned(3));
219 #endif
220 }
221 
222 int main(int argc, char **argv)
223 {
224  // initalize google test, includes command line options
225  ::testing::InitGoogleTest(&argc, argv);
226  int test_rc;
227 
228  //default to 18 reconstruct, 8^3 x 8
230  xdim=ydim=zdim=tdim=8;
231 
232  auto app = make_app();
233  try {
234  app->parse(argc, argv);
235  } catch (const CLI::ParseError &e) {
236  return app->exit(e);
237  }
238 
239  initComms(argc, argv, gridsize_from_cmdline);
241 
242  // Ensure gtest prints only from rank 0
244  if (comm_rank() != 0) { delete listeners.Release(listeners.default_result_printer()); }
245 
247  int num_failures = unitarize_link_test(test_rc);
248  int num_procs = 1;
249 #ifdef MULTI_GPU
250  comm_allreduce_int(&num_failures);
251  num_procs = comm_size();
252 #endif
253 
254  printfQuda("Number of failures = %d\n", num_failures);
255  if(num_failures > 0){
256  printfQuda("Failure rate = %lf\n", num_failures/(4.0*V*num_procs));
257  printfQuda("You may want to increase the error tolerance or vary the unitarization parameters\n");
258  }else{
259  printfQuda("Unitarization successfull!\n");
260  }
261 
262  endQuda();
263  finalizeComms();
264 
265  return test_rc;
266 }
267 
268 
void display_test_info()
size_t Volume() const
void loadCPUField(const cpuGaugeField &cpu)
Download into this field from a CPU field.
void saveCPUField(cpuGaugeField &cpu) const
Upload from this field into a CPU field.
TestEventListener * Release(TestEventListener *listener)
TestEventListener * default_result_printer() const
Definition: gtest.h:1186
TestEventListeners & listeners()
static UnitTest * GetInstance()
int comm_rank(void)
int comm_size(void)
void comm_allreduce_int(int *data)
std::shared_ptr< QUDAApp > make_app(std::string app_description, std::string app_name)
QudaReconstructType link_recon
int device_ordinal
int & ydim
bool verify_results
int & zdim
QudaPrecision prec
int & tdim
int & xdim
std::array< int, 4 > gridsize_from_cmdline
int V
Definition: host_utils.cpp:37
void setDims(int *)
Definition: host_utils.cpp:315
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
@ QUDA_RECONSTRUCT_NO
Definition: enum_quda.h:70
@ QUDA_PERIODIC_T
Definition: enum_quda.h:57
@ QUDA_GHOST_EXCHANGE_NO
Definition: enum_quda.h:508
@ QUDA_GAUGE_FIXED_NO
Definition: enum_quda.h:80
@ QUDA_DOUBLE_PRECISION
Definition: enum_quda.h:65
@ QUDA_SINGLE_PRECISION
Definition: enum_quda.h:64
@ QUDA_MILC_GAUGE_ORDER
Definition: enum_quda.h:47
@ QUDA_ZERO_FIELD_CREATE
Definition: enum_quda.h:361
@ QUDA_REFERENCE_FIELD_CREATE
Definition: enum_quda.h:363
@ QUDA_NULL_FIELD_CREATE
Definition: enum_quda.h:360
@ QUDA_GENERAL_LINKS
Definition: enum_quda.h:25
@ QUDA_WILSON_LINKS
Definition: enum_quda.h:30
void exchange_llfat_cleanup(void)
#define gauge_site_size
Definition: face_gauge.cpp:34
#define ASSERT_EQ(val1, val2)
Definition: gtest.h:2047
int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_
Definition: gtest.h:2468
QudaGaugeFieldOrder gauge_order
GaugeFieldParam gParam
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision)
Definition: host_utils.cpp:889
int dimPartitioned(int dim)
Definition: host_utils.cpp:376
void initComms(int argc, char **argv, std::array< int, 4 > &commDims)
Definition: host_utils.cpp:255
void finalizeComms()
Definition: host_utils.cpp:292
void createSiteLinkCPU(void **link, QudaPrecision precision, int phase)
QudaPrecision & cpu_prec
Definition: host_utils.cpp:57
#define safe_malloc(size)
Definition: malloc_quda.h:106
#define pinned_malloc(size)
Definition: malloc_quda.h:107
#define get_mapped_device_pointer(ptr)
Definition: malloc_quda.h:116
#define host_free(ptr)
Definition: malloc_quda.h:115
#define mapped_malloc(size)
Definition: malloc_quda.h:108
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:26
const char * get_unitarization_str(bool svd_only)
Definition: misc.cpp:41
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:68
void setUnitarizeLinksConstants(double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
void unitarizeLinksCPU(GaugeField &outfield, const GaugeField &infield)
void unitarizeLinks(GaugeField &outfield, const GaugeField &infield, int *fails)
GTEST_API_ void InitGoogleTest(int *argc, char **argv)
Main header file for the QUDA library.
QudaGaugeParam newQudaGaugeParam(void)
void initQuda(int device)
void computeKSLinkQuda(void *fatlink, void *longlink, void *ulink, void *inlink, double *path_coeff, QudaGaugeParam *param)
void endQuda(void)
int llfat_ga_pad
Definition: quda.h:70
double anisotropy
Definition: quda.h:37
QudaReconstructType reconstruct
Definition: quda.h:49
int ga_pad
Definition: quda.h:65
QudaLinkType type
Definition: quda.h:41
QudaPrecision cuda_prec_sloppy
Definition: quda.h:51
QudaReconstructType reconstruct_sloppy
Definition: quda.h:52
QudaGaugeFixed gauge_fix
Definition: quda.h:63
int staple_pad
Definition: quda.h:69
QudaGaugeFieldOrder gauge_order
Definition: quda.h:42
QudaPrecision cuda_prec
Definition: quda.h:48
int X[4]
Definition: quda.h:35
QudaPrecision cpu_prec
Definition: quda.h:46
int site_ga_pad
Definition: quda.h:67
QudaTboundary t_boundary
Definition: quda.h:44
QudaReconstructType reconstruct
Definition: gauge_field.h:50
QudaGaugeFieldOrder order
Definition: gauge_field.h:51
void setPrecision(QudaPrecision precision, bool force_native=false)
Helper function for setting the precision and corresponding field order for QUDA internal fields.
Definition: gauge_field.h:173
QudaLinkType link_type
Definition: gauge_field.h:53
QudaFieldCreate create
Definition: gauge_field.h:60
QudaGhostExchange ghostExchange
Definition: lattice_field.h:77
#define printfQuda(...)
Definition: util_quda.h:114
#define warningQuda(...)
Definition: util_quda.h:132
#define errorQuda(...)
Definition: util_quda.h:120