QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
unitarize_link_test.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <sys/time.h>
5 
6 #include <cuda.h>
7 #include <cuda_runtime.h>
8 
9 #include "quda.h"
10 #include "gauge_field.h"
11 #include "test_util.h"
12 #include "llfat_reference.h"
13 #include "misc.h"
14 #include "util_quda.h"
15 #include "llfat_quda.h"
16 #include <unitarization_links.h>
17 #include "dslash_quda.h"
18 #include "ks_improved_force.h"
19 
20 #ifdef MULTI_GPU
21 #include "comm_quda.h"
22 #endif
23 
24 // google test frame work
25 #include <gtest/gtest.h>
26 
27 #define TDIFF(a,b) (b.tv_sec - a.tv_sec + 0.000001*(b.tv_usec - a.tv_usec))
28 
29 using namespace quda;
30 
31 
32 extern void usage(char** argv);
33 
34 extern int device;
35 
36 static double unitarize_eps = 1e-6;
37 static bool reunit_allow_svd = true;
38 static bool reunit_svd_only = false;
39 static double svd_rel_error = 1e-4;
40 static double svd_abs_error = 1e-4;
41 static double max_allowed_error = 1e-11;
42 
43 extern int xdim, ydim, zdim, tdim;
44 extern int gridsize_from_cmdline[];
45 
46 extern bool verify_results;
47 
49 extern QudaPrecision prec;
52 
55 
56 const double tol = (prec == QUDA_DOUBLE_PRECISION) ? 1e-10 : 1e-6;
57 
58 TEST(unitarization, verify) {
59  unitarizeLinksCPU(*cpuULink, *cpuFatLink);
60  cudaULink->saveCPUField(*cudaResult);
61 
62  int res = compare_floats(cudaResult->Gauge_p(), cpuULink->Gauge_p(),
63  4*cudaResult->Volume()*gaugeSiteSize, tol, cpu_prec);
64 
65 #ifdef MULTI_GPU
66  comm_allreduce_int(&res);
67  res /= comm_size();
68 #endif
69 
70  ASSERT_EQ(res,1) << "CPU and CUDA implementations do not agree";
71 }
72 
73 static int unitarize_link_test(int &test_rc)
74 {
76 
78 
79  qudaGaugeParam.anisotropy = 1.0;
80 
81  qudaGaugeParam.X[0] = xdim;
82  qudaGaugeParam.X[1] = ydim;
83  qudaGaugeParam.X[2] = zdim;
84  qudaGaugeParam.X[3] = tdim;
85 
86  setDims(qudaGaugeParam.X);
87 
88  qudaGaugeParam.type = QUDA_WILSON_LINKS;
89 
90  qudaGaugeParam.t_boundary = QUDA_PERIODIC_T;
91  qudaGaugeParam.anisotropy = 1.0;
92  qudaGaugeParam.gauge_fix = QUDA_GAUGE_FIXED_NO;
93  qudaGaugeParam.ga_pad = 0;
94  qudaGaugeParam.cpu_prec = cpu_prec;
95  qudaGaugeParam.cuda_prec = prec;
96  qudaGaugeParam.cuda_prec_sloppy = prec;
97 
99  errorQuda("Unsupported gauge order %d", gauge_order);
100 
101  qudaGaugeParam.gauge_order = gauge_order;
102  qudaGaugeParam.type=QUDA_WILSON_LINKS;
103  qudaGaugeParam.reconstruct = link_recon;
104  qudaGaugeParam.reconstruct_sloppy = qudaGaugeParam.reconstruct;
105 
106  qudaGaugeParam.llfat_ga_pad = qudaGaugeParam.site_ga_pad = qudaGaugeParam.ga_pad = qudaGaugeParam.staple_pad = 0;
107 
108  GaugeFieldParam gParam(0, qudaGaugeParam);
109  gParam.pad = 0;
110  gParam.link_type = QUDA_GENERAL_LINKS;
112  gParam.order = gauge_order;
113 
114  TimeProfile profile("dummy");
115 
116  void* fatlink = (void*)malloc(4*V*gaugeSiteSize*cpu_prec);
117  if(fatlink == NULL){
118  errorQuda("ERROR: allocating fatlink failed\n");
119  }
120 
121  void* sitelink[4];
122  for(int i=0;i < 4;i++){
123  cudaMallocHost((void**)&sitelink[i], V*gaugeSiteSize*cpu_prec);
124  if(sitelink[i] == NULL){
125  errorQuda("ERROR; allocate sitelink[%d] failed\n", i);
126  }
127  }
128 
129  createSiteLinkCPU(sitelink, qudaGaugeParam.cpu_prec, 1);
130  void* inlink = (void*)malloc(4*V*gaugeSiteSize*cpu_prec);
131 
132  if (cpu_prec == QUDA_DOUBLE_PRECISION){
133  double* link = reinterpret_cast<double*>(inlink);
134  for(int dir=0; dir<4; ++dir){
135  double* slink = reinterpret_cast<double*>(sitelink[dir]);
136  for(int i=0; i<V; ++i){
137  for(int j=0; j<gaugeSiteSize; j++){
138  link[(i*4 + dir)*gaugeSiteSize + j] = slink[i*gaugeSiteSize + j];
139  }
140  }
141  }
142  } else if(cpu_prec == QUDA_SINGLE_PRECISION){
143  float* link = reinterpret_cast<float*>(inlink);
144  for(int dir=0; dir<4; ++dir){
145  float* slink = reinterpret_cast<float*>(sitelink[dir]);
146  for(int i=0; i<V; ++i){
147  for(int j=0; j<gaugeSiteSize; j++){
148  link[(i*4 + dir)*gaugeSiteSize + j] = slink[i*gaugeSiteSize + j];
149  }
150  }
151  }
152  }
153 
155  gParam.gauge = fatlink;
156  cpuFatLink = new cpuGaugeField(gParam);
157 
159  cpuULink = new cpuGaugeField(gParam);
160 
162  cudaResult = new cpuGaugeField(gParam);
163 
164  gParam.pad = 0;
167  gParam.setPrecision(prec, true);
168  cudaFatLink = new cudaGaugeField(gParam);
169  cudaULink = new cudaGaugeField(gParam);
170 
171  { // create fat links
172  double act_path_coeff[6];
173  act_path_coeff[0] = 0.625000;
174  act_path_coeff[1] = -0.058479;
175  act_path_coeff[2] = -0.087719;
176  act_path_coeff[3] = 0.030778;
177  act_path_coeff[4] = -0.007200;
178  act_path_coeff[5] = -0.123113;
179 
180  computeKSLinkQuda(fatlink, NULL, NULL, inlink, act_path_coeff, &qudaGaugeParam);
181 
182  cudaFatLink->loadCPUField(*cpuFatLink);
183  }
184 
190  svd_abs_error);
191 
192  int *num_failures_h = (int*)mapped_malloc(sizeof(int));
193  int *num_failures_d = nullptr;
194  cudaError_t error = cudaHostGetDevicePointer(&num_failures_d, num_failures_h, 0);
195  if (error != cudaSuccess) errorQuda("cudaHostGetDevicePointer failed with error: %s", cudaGetErrorString(error));
196  *num_failures_h = 0;
197 
198  struct timeval t0, t1;
199 
200  gettimeofday(&t0,NULL);
201  unitarizeLinks(*cudaULink, *cudaFatLink, num_failures_d);
202  cudaDeviceSynchronize();
203  gettimeofday(&t1,NULL);
204 
205  if (verify_results) {
206  test_rc = RUN_ALL_TESTS();
207  if (test_rc != 0) warningQuda("Tests failed");
208  }
209 
210  delete cudaResult;
211  delete cpuULink;
212  delete cpuFatLink;
213  delete cudaFatLink;
214  delete cudaULink;
215  for(int dir=0; dir<4; ++dir) cudaFreeHost(sitelink[dir]);
216 
217  free(fatlink);
218 
220  host_free(num_failures_h);
221 
222  free(inlink);
223 #ifdef MULTI_GPU
225 #endif
226  endQuda();
227 
228  printfQuda("Unitarization time: %g ms\n", TDIFF(t0,t1)*1000);
229  return num_failures;
230 }
231 
232  static void
234 {
235  printfQuda("running the following test:\n");
236 
237  printfQuda("link_precision link_reconstruct space_dimension T_dimension algorithm max allowed error deviation tolerance\n");
238  printfQuda("%8s %s %d/%d/%d/ %d %s %g %g\n",
241  xdim, ydim, zdim, tdim,
244  tol);
245 
246 #ifdef MULTI_GPU
247  printfQuda("Grid partition info: X Y Z T\n");
248  printfQuda(" %d %d %d %d\n",
249  dimPartitioned(0),
250  dimPartitioned(1),
251  dimPartitioned(2),
252  dimPartitioned(3));
253 #endif
254 
255  return ;
256 
257 }
258 
259 
260 int main(int argc, char **argv)
261 {
262  // initalize google test, includes command line options
263  ::testing::InitGoogleTest(&argc, argv);
264  int test_rc;
265 
266  //default to 18 reconstruct, 8^3 x 8
268  xdim=ydim=zdim=tdim=8;
269 
270  int i;
271  for (i=1; i<argc; i++){
272  if(process_command_line_option(argc, argv, &i) == 0){
273  continue;
274  }
275 
276  fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
277  usage(argv);
278  }
279 
280  initComms(argc, argv, gridsize_from_cmdline);
281 
283  int num_failures = unitarize_link_test(test_rc);
284  int num_procs = 1;
285 #ifdef MULTI_GPU
286  comm_allreduce_int(&num_failures);
287  num_procs = comm_size();
288 #endif
289 
290  printfQuda("Number of failures = %d\n", num_failures);
291  if(num_failures > 0){
292  printfQuda("Failure rate = %lf\n", num_failures/(4.0*V*num_procs));
293  printfQuda("You may want to increase the error tolerance or vary the unitarization parameters\n");
294  }else{
295  printfQuda("Unitarization successfull!\n");
296  }
297  finalizeComms();
298 
299  return test_rc;
300 }
301 
302 
static QudaGaugeParam qudaGaugeParam
int dimPartitioned(int dim)
Definition: test_util.cpp:1776
QudaReconstructType reconstruct_sloppy
Definition: quda.h:53
double anisotropy
Definition: quda.h:38
QudaGhostExchange ghostExchange
Definition: lattice_field.h:76
void endQuda(void)
enum QudaPrecision_s QudaPrecision
int ga_pad
Definition: quda.h:63
void saveCPUField(cpuGaugeField &cpu) const
Upload from this field into a CPU field.
QudaGaugeFixed gauge_fix
Definition: quda.h:61
QudaLinkType type
Definition: quda.h:42
#define errorQuda(...)
Definition: util_quda.h:121
void setUnitarizeLinksConstants(double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
#define host_free(ptr)
Definition: malloc_quda.h:71
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:2019
void finalizeComms()
Definition: test_util.cpp:128
void exchange_llfat_cleanup(void)
QudaGaugeFieldOrder gauge_order
Definition: quda.h:43
void computeKSLinkQuda(void *fatlink, void *longlink, void *ulink, void *inlink, double *path_coeff, QudaGaugeParam *param)
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision)
Definition: test_util.cpp:434
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:701
void createSiteLinkCPU(void **link, QudaPrecision precision, int phase)
Definition: test_util.cpp:1227
int num_failures
void setDims(int *)
Definition: test_util.cpp:151
int llfat_ga_pad
Definition: quda.h:68
void loadCPUField(const cpuGaugeField &cpu)
Download into this field from a CPU field.
const char * get_unitarization_str(bool svd_only)
Definition: misc.cpp:728
int comm_size(void)
Definition: comm_mpi.cpp:88
void initQuda(int device)
void unitarizeLinks(cudaGaugeField &outfield, const cudaGaugeField &infield, int *fails)
int site_ga_pad
Definition: quda.h:65
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:768
int staple_pad
Definition: quda.h:67
QudaGaugeFieldOrder order
Definition: gauge_field.h:17
#define warningQuda(...)
Definition: util_quda.h:133
QudaPrecision cuda_prec_sloppy
Definition: quda.h:52
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
int Volume() const
QudaReconstructType reconstruct
Definition: quda.h:50
QudaPrecision cuda_prec
Definition: quda.h:49
static int * num_failures_h
int X[4]
Definition: quda.h:36
int V
Definition: test_util.cpp:27
static int * num_failures_d
void setPrecision(QudaPrecision precision, bool force_native=false)
Helper function for setting the precision and corresponding field order for QUDA internal fields...
Definition: gauge_field.h:131
void unitarizeLinksCPU(cpuGaugeField &outfield, const cpuGaugeField &infield)
GaugeFieldParam gParam
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
QudaLinkType link_type
Definition: gauge_field.h:19
#define printfQuda(...)
Definition: util_quda.h:115
QudaTboundary t_boundary
Definition: quda.h:45
void comm_allreduce_int(int *data)
Definition: comm_mpi.cpp:304
QudaReconstructType reconstruct
Definition: gauge_field.h:16
QudaFieldCreate create
Definition: gauge_field.h:26
void * fatlink
#define mapped_malloc(size)
Definition: malloc_quda.h:68
void initComms(int argc, char **argv, int *const commDims)
Definition: test_util.cpp:88
QudaPrecision cpu_prec
Definition: quda.h:47
#define gaugeSiteSize
Definition: face_gauge.cpp:34
QudaGaugeParam newQudaGaugeParam(void)