QUDA  v0.5.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
unitarize_link_test.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <sys/time.h>
5 #include <cuda.h>
6 #include <cuda_runtime.h>
7 
8 #include "quda.h"
9 #include "gauge_field.h"
10 #include "test_util.h"
11 #include "llfat_reference.h"
12 #include "misc.h"
13 #include "util_quda.h"
14 #include "llfat_quda.h"
15 #include "fat_force_quda.h"
16 #include "hisq_links_quda.h"
17 #include "dslash_quda.h"
18 #include "hisq_force_quda.h"
19 
20 #ifdef MULTI_GPU
21 #include "face_quda.h"
22 #include "comm_quda.h"
23 #endif
24 
25 #define TDIFF(a,b) (b.tv_sec - a.tv_sec + 0.000001*(b.tv_usec - a.tv_usec))
26 
27 using namespace quda;
28 
29 
30 extern void usage(char** argv);
31 
32 extern int device;
33 
34 static double unitarize_eps = 1e-6;
35 static bool reunit_allow_svd = true;
36 static bool reunit_svd_only = false;
37 static double svd_rel_error = 1e-4;
38 static double svd_abs_error = 1e-5;
39 static double max_allowed_error = 1e-11;
40 static bool check_unitarization = true;
41 
42 
43 extern int xdim, ydim, zdim, tdim;
44 extern int gridsize_from_cmdline[];
45 
47 extern QudaPrecision prec;
50 
51 static size_t gSize;
52 
53 
54 static int
55 unitarize_link_test()
56 {
57 
58  QudaGaugeParam qudaGaugeParam = newQudaGaugeParam();
59 
60  initQuda(0);
61 
62  cpu_prec = prec;
63  gSize = cpu_prec;
64  qudaGaugeParam.anisotropy = 1.0;
65 
66  qudaGaugeParam.X[0] = xdim;
67  qudaGaugeParam.X[1] = ydim;
68  qudaGaugeParam.X[2] = zdim;
69  qudaGaugeParam.X[3] = tdim;
70 
71  setDims(qudaGaugeParam.X);
72 
75 
76  qudaGaugeParam.cpu_prec = link_prec;
77  qudaGaugeParam.cuda_prec = link_prec;
78  qudaGaugeParam.reconstruct = link_recon;
79  qudaGaugeParam.type = QUDA_WILSON_LINKS;
80 
81 
82 
83  qudaGaugeParam.t_boundary = QUDA_PERIODIC_T;
84  qudaGaugeParam.anisotropy = 1.0;
85  qudaGaugeParam.cuda_prec_sloppy = prec;
86  qudaGaugeParam.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
87  qudaGaugeParam.gauge_fix = QUDA_GAUGE_FIXED_NO;
88  qudaGaugeParam.ga_pad = 0;
89  qudaGaugeParam.gaugeGiB = 0;
90  qudaGaugeParam.preserve_gauge = false;
91 
92 
93  qudaGaugeParam.cpu_prec = cpu_prec;
94  qudaGaugeParam.cuda_prec = prec;
95  qudaGaugeParam.gauge_order = gauge_order;
96  qudaGaugeParam.type=QUDA_WILSON_LINKS;
97  qudaGaugeParam.reconstruct = link_recon;
101 
103 
104  GaugeFieldParam gParam(0, qudaGaugeParam);
105  gParam.pad = 0;
108 
109  gParam.pad = 0;
115  cudaGaugeField *cudaULink = new cudaGaugeField(gParam);
116 
117 #define QUDA_VER ((10000*QUDA_VERSION_MAJOR) + (100*QUDA_VERSION_MINOR) + QUDA_VERSION_SUBMINOR)
118 #if (QUDA_VER > 400)
119  quda::initLatticeConstants(*cudaFatLink);
120 #else
121  quda::initCommonConstants(*cudaFatLink);
122 #endif
123 
124 
125 
126 
127 
128  void* fatlink = (void*)malloc(4*V*gaugeSiteSize*gSize);
129  if(fatlink == NULL){
130  errorQuda("ERROR: allocating fatlink failed\n");
131  }
132 
133  void* sitelink[4];
134  for(int i=0;i < 4;i++){
135  cudaMallocHost((void**)&sitelink[i], V*gaugeSiteSize*gSize);
136  if(sitelink[i] == NULL){
137  errorQuda("ERROR; allocate sitelink[%d] failed\n", i);
138  }
139  }
140 
141 
142  createSiteLinkCPU(sitelink, qudaGaugeParam.cpu_prec, 1);
143 
144  double act_path_coeff[6];
145  act_path_coeff[0] = 0.625000;
146  act_path_coeff[1] = -0.058479;
147  act_path_coeff[2] = -0.087719;
148  act_path_coeff[3] = 0.030778;
149  act_path_coeff[4] = -0.007200;
150  act_path_coeff[5] = -0.123113;
151 
152 
153  //only record the last call's performance
154  //the first one is for creating the cpu/cuda data structures
155 
157  computeFatLinkQuda(fatlink, sitelink, act_path_coeff, &qudaGaugeParam,
159  } // gauge order is QDP_GAUGE_ORDER
160 
161 
162  void* fatlink_2d[4];
163  for(int dir=0; dir<4; ++dir){
164  fatlink_2d[dir] = (char*)fatlink + dir*V*gaugeSiteSize*gSize;
165  }
166 
167 
169  gParam.gauge = fatlink_2d;
170  cpuGaugeField *cpuOutLink = new cpuGaugeField(gParam);
171 
172  cudaFatLink->loadCPUField(*cpuOutLink, QUDA_CPU_FIELD_LOCATION);
173 
174  delete cpuOutLink;
175 
176  setUnitarizeLinksConstants(unitarize_eps,
177  max_allowed_error,
178  reunit_allow_svd,
179  reunit_svd_only,
180  svd_rel_error,
181  svd_abs_error);
182 
184 
185  int* num_failures_dev;
186  if(cudaMalloc(&num_failures_dev, sizeof(int)) != cudaSuccess){
187  errorQuda("cudaMalloc failed for num_failures_dev\n");
188  }
189  cudaMemset(num_failures_dev, 0, sizeof(int));
190 
191  struct timeval t0, t1;
192 
193  gettimeofday(&t0,NULL);
194  unitarizeLinksCuda(qudaGaugeParam,*cudaFatLink, cudaULink, num_failures_dev);
195  cudaDeviceSynchronize();
196  gettimeofday(&t1,NULL);
197 
198  int num_failures=0;
199  cudaMemcpy(&num_failures, num_failures_dev, sizeof(int), cudaMemcpyDeviceToHost);
200 
201  delete cpuOutLink;
202  delete cudaFatLink;
203  delete cudaULink;
204  for(int dir=0; dir<4; ++dir) cudaFreeHost(sitelink[dir]);
205  cudaFree(num_failures_dev);
206 #ifdef MULTI_GPU
208 #endif
209  endQuda();
210 
211  printfQuda("Unitarization time: %g ms\n", TDIFF(t0,t1)*1000);
212  return num_failures;
213 }
214 
215 static void
217 {
218  printfQuda("running the following test:\n");
219 
220  printfQuda("link_precision link_reconstruct space_dimension T_dimension algorithm max allowed error\n");
221  printfQuda("%s %s %d/%d/%d/ %d %s %g \n",
223  get_recon_str(link_recon),
224  xdim, ydim, zdim, tdim,
225  get_unitarization_str(reunit_svd_only),
226  max_allowed_error);
227 
228 #ifdef MULTI_GPU
229  printfQuda("Grid partition info: X Y Z T\n");
230  printfQuda(" %d %d %d %d\n",
231  dimPartitioned(0),
232  dimPartitioned(1),
233  dimPartitioned(2),
234  dimPartitioned(3));
235 #endif
236 
237  return ;
238 
239 }
240 
241 
242 int
243 main(int argc, char **argv)
244 {
245  //default to 18 reconstruct, 8^3 x 8
246  link_recon = QUDA_RECONSTRUCT_NO;
247  xdim=ydim=zdim=tdim=8;
249 
250  int i;
251  for (i=1; i<argc; i++){
252  if(process_command_line_option(argc, argv, &i) == 0){
253  continue;
254  }
255 
256  fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
257  usage(argv);
258  }
259 
260  initComms(argc, argv, gridsize_from_cmdline);
261 
263  int num_failures = unitarize_link_test();
264  int num_procs = 1;
265 #ifdef MULTI_GPU
266  comm_allreduce_int(&num_failures);
267  num_procs = comm_size();
268 #endif
269 
270  printfQuda("Number of failures = %d\n", num_failures);
271  if(num_failures > 0){
272  printfQuda("Failure rate = %lf%\n", num_failures/(4.0*V*num_procs));
273  printfQuda("You may want to increase your error tolerance or vary the unitarization parameters\n");
274  }else{
275  printfQuda("Unitarization successfull!\n");
276  }
277  finalizeComms();
278 
279  return EXIT_SUCCESS;
280 }
281 
282