QUDA  v0.5.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
hisq_unitarize_force_test.cpp
Go to the documentation of this file.
1 #include <cstdio>
2 #include <cstdlib>
3 #include <cstring>
4 
5 #include <quda.h>
6 #include "test_util.h"
7 #include "gauge_field.h"
8 #include "fat_force_quda.h"
9 #include "misc.h"
10 #include "hisq_force_reference.h"
11 #include "hisq_force_quda.h"
12 #include "hw_quda.h"
13 #include <sys/time.h>
14 #include <dslash_quda.h>
15 
16 using namespace quda;
17 extern void usage(char** argv);
20 
23 
26 
27 
29 
31 
32 
34 double accuracy = 1e-5;
35 int ODD_BIT = 1;
36 extern int device;
37 extern int xdim, ydim, zdim, tdim;
38 extern int gridsize_from_cmdline[];
39 
41 extern QudaPrecision prec;
46 
47 void setPrecision(QudaPrecision precision)
48 {
49  link_prec = precision;
50  return;
51 }
52 
53 
54 
55 
56 // Create a field of links that are not su3_matrices
57 void createNoisyLinkCPU(void** field, QudaPrecision prec, int seed)
58 {
59  createSiteLinkCPU(field, prec, 0);
60 
61  srand(seed);
62  for(int dir=0; dir<4; ++dir){
63  for(int i=0; i<V*18; ++i){
64  if(prec == QUDA_DOUBLE_PRECISION){
65  double* ptr = ((double**)field)[dir] + i;
66  *ptr += (rand() - RAND_MAX/2.0)/(20.0*RAND_MAX);
67  }else if(prec == QUDA_SINGLE_PRECISION){
68  float* ptr = ((float**)field)[dir]+i;
69  *ptr += (rand() - RAND_MAX/2.0)/(20.0*RAND_MAX);
70  }
71  }
72  }
73  return;
74 }
75 
76 
77 
78 // allocate memory
79 // set the layout, etc.
80 static void
81 hisq_force_init()
82 {
84 
85  gaugeParam.X[0] = xdim;
86  gaugeParam.X[1] = ydim;
87  gaugeParam.X[2] = zdim;
88  gaugeParam.X[3] = tdim;
89 
91 
99  gParam.anisotropy = 1;
100 
104 
105  // create "gauge fields"
106  int seed=0;
107 #ifdef MULTI_GPU
108  seed += comm_rank();
109 #endif
110 
113 
117 
118  cudaFatLink->loadCPUField(*cpuFatLink, QUDA_CPU_FIELD_LOCATION);
120 
121 
123  return;
124 }
125 
126 
127 static void
128 hisq_force_end()
129 {
130  delete cpuFatLink;
131  delete cpuOprod;
132  delete cpuResult;
133 
134  delete cudaFatLink;
135  delete cudaOprod;
136  delete cudaResult;
137 
138  delete cpuReference;
139 
140  endQuda();
141  return;
142 }
143 
144 static void
145 hisq_force_test()
146 {
147  hisq_force_init();
149 
150 #define QUDA_VER ((10000*QUDA_VERSION_MAJOR) + (100*QUDA_VERSION_MINOR) + QUDA_VERSION_SUBMINOR)
151 #if (QUDA_VER > 400)
152  initLatticeConstants(*cudaFatLink);
153 #else
154  initCommonConstants(*cudaFatLink);
155 #endif
156  initGaugeConstants(*cudaFatLink);
157 
158 
159  double unitarize_eps = 1e-5;
160  const double hisq_force_filter = 5e-5;
161  const double max_det_error = 1e-12;
162  const bool allow_svd = true;
163  const bool svd_only = false;
164  const double svd_rel_err = 1e-8;
165  const double svd_abs_err = 1e-8;
166 
167  fermion_force::setUnitarizeForceConstants(unitarize_eps, hisq_force_filter, max_det_error, allow_svd, svd_only, svd_rel_err, svd_abs_err);
168 
169 
170 
171  int* num_failures_dev;
172  if(cudaMalloc(&num_failures_dev, sizeof(int)) != cudaSuccess){
173  errorQuda("cudaMalloc failed for num_failures_dev\n");
174  }
175  cudaMemset(num_failures_dev, 0, sizeof(int));
176 
177  printfQuda("Calling unitarizeForceCuda\n");
178  fermion_force::unitarizeForceCuda(gaugeParam, *cudaOprod, *cudaFatLink, cudaResult, num_failures_dev);
179 
180 
181  if(verify_results){
182  printfQuda("Calling unitarizeForceCPU\n");
184  }
186 
187  if(verify_results){
188  printfQuda("Comparing CPU and GPU results\n");
189  for(int dir=0; dir<4; ++dir){
190  int res = compare_floats(((char**)cpuReference->Gauge_p())[dir], ((char**)cpuResult->Gauge_p())[dir], cpuReference->Volume()*gaugeSiteSize, accuracy, gaugeParam.cpu_prec);
191 #ifdef MULTI_GPU
192  comm_allreduce_int(&res);
193  res /= comm_size();
194 #endif
195  printfQuda("Dir:%d Test %s\n",dir,(1 == res) ? "PASSED" : "FAILED");
196  }
197 
198  }
199 
200  hisq_force_end();
201 }
202 
203 
204 static void
206 {
207  printfQuda("running the following fermion force computation test:\n");
208 
209  printfQuda("link_precision link_reconstruct space_dim(x/y/z) T_dimension\n");
210  printfQuda("%s %s %d/%d/%d %d \n",
213  xdim, ydim, zdim, tdim);
214  return ;
215 
216 }
217 
218 void
219 usage_extra(char** argv )
220 {
221  printf("Extra options: \n");
222  printf(" --no_verify # Do not verify the GPU results using CPU results\n");
223  return ;
224 }
225 
226 int
227 main(int argc, char **argv)
228 {
229  int i;
230  for (i =1;i < argc; i++){
231 
232  if(process_command_line_option(argc, argv, &i) == 0){
233  continue;
234  }
235 
236  if( strcmp(argv[i], "--no_verify") == 0){
237  verify_results=0;
238  continue;
239  }
240 
241 
242  fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
243  usage(argv);
244  }
245 
246  initComms(argc, argv, gridsize_from_cmdline);
247 
249 
251 
252  hisq_force_test();
253 
254  finalizeComms();
255 
256  return EXIT_SUCCESS;
257 }
258