QUDA  0.9.0
unitarize_link_test.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <sys/time.h>
5 
6 #include <cuda.h>
7 #include <cuda_runtime.h>
8 
9 #include "quda.h"
10 #include "gauge_field.h"
11 #include "test_util.h"
12 #include "llfat_reference.h"
13 #include "misc.h"
14 #include "util_quda.h"
15 #include "llfat_quda.h"
16 #include <unitarization_links.h>
17 #include "dslash_quda.h"
18 #include "ks_improved_force.h"
19 
20 #ifdef MULTI_GPU
21 #include "comm_quda.h"
22 #endif
23 
24 // google test frame work
25 #include <gtest.h>
26 
27 #define TDIFF(a,b) (b.tv_sec - a.tv_sec + 0.000001*(b.tv_usec - a.tv_usec))
28 
29 using namespace quda;
30 
31 
32 extern void usage(char** argv);
33 
34 extern int device;
35 
36 static double unitarize_eps = 1e-6;
37 static bool reunit_allow_svd = true;
38 static bool reunit_svd_only = false;
39 static double svd_rel_error = 1e-4;
40 static double svd_abs_error = 1e-4;
41 static double max_allowed_error = 1e-11;
42 
43 extern int xdim, ydim, zdim, tdim;
44 extern int gridsize_from_cmdline[];
45 
46 extern bool verify_results;
47 
49 extern QudaPrecision prec;
52 
55 
56 const double tol = (prec == QUDA_DOUBLE_PRECISION) ? 1e-10 : 1e-6;
57 
58 TEST(unitarization, verify) {
61 
64 
65 #ifdef MULTI_GPU
66  comm_allreduce_int(&res);
67  res /= comm_size();
68 #endif
69 
70  ASSERT_EQ(res,1) << "CPU and CUDA implementations do not agree";
71 }
72 
73 static int unitarize_link_test(int &test_rc)
74 {
76 
78 
80 
81  qudaGaugeParam.X[0] = xdim;
82  qudaGaugeParam.X[1] = ydim;
83  qudaGaugeParam.X[2] = zdim;
84  qudaGaugeParam.X[3] = tdim;
85 
87 
89 
98 
100  errorQuda("Unsupported gauge order %d", gauge_order);
101 
106 
108 
110  gParam.pad = 0;
114 
115  TimeProfile profile("dummy");
116 
117  void* fatlink = (void*)malloc(4*V*gaugeSiteSize*cpu_prec);
118  if(fatlink == NULL){
119  errorQuda("ERROR: allocating fatlink failed\n");
120  }
121 
122  void* sitelink[4];
123  for(int i=0;i < 4;i++){
124  cudaMallocHost((void**)&sitelink[i], V*gaugeSiteSize*cpu_prec);
125  if(sitelink[i] == NULL){
126  errorQuda("ERROR; allocate sitelink[%d] failed\n", i);
127  }
128  }
129 
131  void* inlink = (void*)malloc(4*V*gaugeSiteSize*cpu_prec);
132 
134  double* link = reinterpret_cast<double*>(inlink);
135  for(int dir=0; dir<4; ++dir){
136  double* slink = reinterpret_cast<double*>(sitelink[dir]);
137  for(int i=0; i<V; ++i){
138  for(int j=0; j<gaugeSiteSize; j++){
139  link[(i*4 + dir)*gaugeSiteSize + j] = slink[i*gaugeSiteSize + j];
140  }
141  }
142  }
143  } else if(cpu_prec == QUDA_SINGLE_PRECISION){
144  float* link = reinterpret_cast<float*>(inlink);
145  for(int dir=0; dir<4; ++dir){
146  float* slink = reinterpret_cast<float*>(sitelink[dir]);
147  for(int i=0; i<V; ++i){
148  for(int j=0; j<gaugeSiteSize; j++){
149  link[(i*4 + dir)*gaugeSiteSize + j] = slink[i*gaugeSiteSize + j];
150  }
151  }
152  }
153  }
154 
156  gParam.gauge = fatlink;
158 
161 
164 
165  gParam.pad = 0;
171 
172  { // create fat links
173  double act_path_coeff[6];
174  act_path_coeff[0] = 0.625000;
175  act_path_coeff[1] = -0.058479;
176  act_path_coeff[2] = -0.087719;
177  act_path_coeff[3] = 0.030778;
178  act_path_coeff[4] = -0.007200;
179  act_path_coeff[5] = -0.123113;
180 
181  computeKSLinkQuda(fatlink, NULL, NULL, inlink, act_path_coeff, &qudaGaugeParam);
182 
184  }
185 
191  svd_abs_error);
192 
193  int* num_failures_dev;
194  if(cudaMalloc(&num_failures_dev, sizeof(int)) != cudaSuccess){
195  errorQuda("cudaMalloc failed for num_failures_dev\n");
196  }
197  cudaMemset(num_failures_dev, 0, sizeof(int));
198 
199  struct timeval t0, t1;
200 
201  gettimeofday(&t0,NULL);
203  cudaDeviceSynchronize();
204  gettimeofday(&t1,NULL);
205 
206  int num_failures=0;
207  cudaMemcpy(&num_failures, num_failures_dev, sizeof(int), cudaMemcpyDeviceToHost);
208 
209  if (verify_results) {
210  test_rc = RUN_ALL_TESTS();
211  if (test_rc != 0) warningQuda("Tests failed");
212  }
213 
214  delete cudaResult;
215  delete cpuULink;
216  delete cpuFatLink;
217  delete cudaFatLink;
218  delete cudaULink;
219  for(int dir=0; dir<4; ++dir) cudaFreeHost(sitelink[dir]);
220 
221  free(fatlink);
222 
223  cudaFree(num_failures_dev);
224 
225  free(inlink);
226 #ifdef MULTI_GPU
228 #endif
229  endQuda();
230 
231  printfQuda("Unitarization time: %g ms\n", TDIFF(t0,t1)*1000);
232  return num_failures;
233 }
234 
235  static void
237 {
238  printfQuda("running the following test:\n");
239 
240  printfQuda("link_precision link_reconstruct space_dimension T_dimension algorithm max allowed error deviation tolerance\n");
241  printfQuda("%8s %s %d/%d/%d/ %d %s %g %g\n",
244  xdim, ydim, zdim, tdim,
247  tol);
248 
249 #ifdef MULTI_GPU
250  printfQuda("Grid partition info: X Y Z T\n");
251  printfQuda(" %d %d %d %d\n",
252  dimPartitioned(0),
253  dimPartitioned(1),
254  dimPartitioned(2),
255  dimPartitioned(3));
256 #endif
257 
258  return ;
259 
260 }
261 
262 
263 int main(int argc, char **argv)
264 {
265  // initalize google test, includes command line options
266  ::testing::InitGoogleTest(&argc, argv);
267  int test_rc;
268 
269  //default to 18 reconstruct, 8^3 x 8
271  xdim=ydim=zdim=tdim=8;
272 
273  int i;
274  for (i=1; i<argc; i++){
275  if(process_command_line_option(argc, argv, &i) == 0){
276  continue;
277  }
278 
279  fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
280  usage(argv);
281  }
282 
283  initComms(argc, argv, gridsize_from_cmdline);
284 
286  int num_failures = unitarize_link_test(test_rc);
287  int num_procs = 1;
288 #ifdef MULTI_GPU
290  num_procs = comm_size();
291 #endif
292 
293  printfQuda("Number of failures = %d\n", num_failures);
294  if(num_failures > 0){
295  printfQuda("Failure rate = %lf\n", num_failures/(4.0*V*num_procs));
296  printfQuda("You may want to increase the error tolerance or vary the unitarization parameters\n");
297  }else{
298  printfQuda("Unitarization successfull!\n");
299  }
300  finalizeComms();
301 
302  return test_rc;
303 }
304 
305 
static QudaGaugeParam qudaGaugeParam
int dimPartitioned(int dim)
Definition: test_util.cpp:1686
QudaReconstructType reconstruct_sloppy
Definition: quda.h:46
double anisotropy
Definition: quda.h:31
QudaGhostExchange ghostExchange
Definition: lattice_field.h:60
void endQuda(void)
void free(void *)
enum QudaPrecision_s QudaPrecision
int ga_pad
Definition: quda.h:53
void saveCPUField(cpuGaugeField &cpu) const
Upload from this field into a CPU field.
QudaGaugeFixed gauge_fix
Definition: quda.h:51
QudaLinkType type
Definition: quda.h:35
#define errorQuda(...)
Definition: util_quda.h:90
void setUnitarizeLinksConstants(double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
void setPrecision(QudaPrecision precision)
Helper function for setting the precision and corresponding field order for QUDA internal fields...
Definition: gauge_field.h:113
int * num_failures_dev
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:1795
void finalizeComms()
Definition: test_util.cpp:107
QudaGaugeFieldOrder gauge_order
Definition: quda.h:36
void computeKSLinkQuda(void *fatlink, void *longlink, void *ulink, void *inlink, double *path_coeff, QudaGaugeParam *param)
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision)
Definition: test_util.cpp:437
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:704
void createSiteLinkCPU(void **link, QudaPrecision precision, int phase)
Definition: test_util.cpp:1229
int num_failures
void setDims(int *)
Definition: test_util.cpp:130
int llfat_ga_pad
Definition: quda.h:58
else return(__swbuf(_c, _p))
void loadCPUField(const cpuGaugeField &cpu)
Download into this field from a CPU field.
const char * get_unitarization_str(bool svd_only)
Definition: misc.cpp:730
int comm_size(void)
Definition: comm_mpi.cpp:126
void initQuda(int device)
void unitarizeLinks(cudaGaugeField &outfield, const cudaGaugeField &infield, int *fails)
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
int site_ga_pad
Definition: quda.h:55
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:770
int staple_pad
Definition: quda.h:57
int V
Definition: test_util.cpp:28
#define gaugeSiteSize
Definition: test_util.h:6
QudaGaugeFieldOrder order
Definition: gauge_field.h:15
#define warningQuda(...)
Definition: util_quda.h:101
QudaPrecision cuda_prec_sloppy
Definition: quda.h:45
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
int Volume() const
QudaReconstructType reconstruct
Definition: quda.h:43
QudaPrecision cuda_prec
Definition: quda.h:42
int X[4]
Definition: quda.h:29
int fprintf(FILE *, const char *,...) __attribute__((__format__(__printf__
void exchange_llfat_cleanup(void)
void unitarizeLinksCPU(cpuGaugeField &outfield, const cpuGaugeField &infield)
double gaugeGiB
Definition: quda.h:60
GaugeFieldParam gParam
void * fatlink[4]
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
QudaLinkType link_type
Definition: gauge_field.h:17
#define printfQuda(...)
Definition: util_quda.h:84
QudaTboundary t_boundary
Definition: quda.h:38
void comm_allreduce_int(int *data)
Definition: comm_mpi.cpp:305
QudaReconstructType reconstruct
Definition: gauge_field.h:14
QudaFieldCreate create
Definition: gauge_field.h:25
void initComms(int argc, char **argv, const int *commDims)
Definition: test_util.cpp:72
QudaPrecision cpu_prec
Definition: quda.h:40
QudaGaugeParam newQudaGaugeParam(void)