QUDA  v0.5.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fermion_force_test.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 
5 #include <quda.h>
6 #include "test_util.h"
7 #include "gauge_field.h"
8 #include "fat_force_quda.h"
9 #include "misc.h"
11 #include "fermion_force_quda.h"
12 #include "hw_quda.h"
13 #include <sys/time.h>
14 #include <dslash_quda.h>
15 
16 using namespace quda;
17 
18 extern void usage(char** argv);
19 extern int device;
22 
26 
27 static FullHw cudaHw;
29 static void* hw; //the array of half_wilson_vector
30 
31 extern int gridsize_from_cmdline[];
32 
34 
35 int ODD_BIT = 1;
36 extern int xdim, ydim, zdim, tdim;
37 
40 extern QudaPrecision prec;
43 
45 
46 static void
47 fermion_force_init()
48 {
50  //cudaSetDevice(dev); CUERR;
51 
52  gaugeParam.X[0] = xdim;
53  gaugeParam.X[1] = ydim;
54  gaugeParam.X[2] = zdim;
55  gaugeParam.X[3] = tdim;
57 
61 
63 
66 
67  cpuGauge = new cpuGaugeField(gParam);
68 
69  // this is a hack to have site link generated in 2d
70  // then copied to 1d array in "MILC" format
71  void* siteLink_2d[4];
72  for(int i=0;i < 4;i++){
73  siteLink_2d[i] = malloc(cpuGauge->Volume()*gaugeSiteSize*gaugeParam.cpu_prec);
74  if (siteLink_2d[i] == NULL){
75  errorQuda("ERROR: malloc failed for siteLink_2d\n");
76  }
77  }
78 
79  // fills the gauge field with random numbers
80  createSiteLinkCPU(siteLink_2d, gaugeParam.cpu_prec, 1);
81 
82  //copy the 2d sitelink to 1d milc format
83  for(int dir=0;dir < 4; dir++){
84  for(int i=0;i < cpuGauge->Volume(); i++){
85  char* src = ((char*)siteLink_2d[dir]) + i * gaugeSiteSize* gaugeParam.cpu_prec;
86  char* dst = ((char*)cpuGauge->Gauge_p()) + (4*i+dir)*gaugeSiteSize*gaugeParam.cpu_prec ;
87  memcpy(dst, src, gaugeSiteSize*gaugeParam.cpu_prec);
88  }
89  }
90 
91  for(int i=0;i < 4;i++){
92  free(siteLink_2d[i]);
93  }
94 
95 #if 0
97 #endif
98 
99  //gaugeParam.site_ga_pad = gaugeParam.ga_pad = 0;
100  //gaugeParam.reconstruct = link_recon;
101 
102  gParam.precision = gaugeParam.cuda_prec;
103  gParam.reconstruct = link_recon;
104  cudaGauge = new cudaGaugeField(gParam);
105 
107  gParam.precision = gaugeParam.cpu_prec;
108  cpuMom = new cpuGaugeField(gParam);
109  refMom = new cpuGaugeField(gParam);
110 
112  //memset(cpuMom->Gauge_p(), 0, 4*cpuMom->Volume()*momSiteSize*gaugeParam.cpu_prec);
113 
115 
116  gParam.precision = gaugeParam.cuda_prec;
117  cudaMom = new cudaGaugeField(gParam);
118 
119  hw = malloc(4*cpuGauge->Volume()*hwSiteSize*gaugeParam.cpu_prec);
120  if (hw == NULL){
121  fprintf(stderr, "ERROR: malloc failed for hw\n");
122  exit(1);
123  }
124  createHwCPU(hw, hw_prec);
125 
126  cudaHw = createHwQuda(gaugeParam.X, hw_prec);
127 
128  return;
129 }
130 
131 static void
132 fermion_force_end()
133 {
134  delete cudaMom;
135  delete cudaGauge;
136 
137  delete cpuGauge;
138  delete cpuMom;
139  delete refMom;
140 
141  freeHwQuda(cudaHw);
142  free(hw);
143 
144  endQuda();
145 }
146 
147 
148 static int
149 fermion_force_test(void)
150 {
151 
152  fermion_force_init();
155 
156 
157  float eps= 0.02;
158  float weight1 =1.0;
159  float weight2 =1.0;
160  float act_path_coeff[6];
161 
162  act_path_coeff[0] = 0.625000;
163  act_path_coeff[1] = -0.058479;
164  act_path_coeff[2] = -0.087719;
165  act_path_coeff[3] = 0.030778;
166  act_path_coeff[4] = -0.007200;
167  act_path_coeff[5] = -0.123113;
168 
169  // download the momentum field to the GPU
171 
172  // download the gauge field to the GPU
174 
175  loadHwToGPU(cudaHw, hw, cpu_hw_prec);
176 
177 
178  if (verify_results){
179  fermion_force_reference(eps, weight1, weight2, act_path_coeff, hw, cpuGauge->Gauge_p(), refMom->Gauge_p());
180  }
181 
182 
183  /*
184  * The flops number comes from CPU implementation in MILC
185  * function eo_fermion_force_twoterms_field(), fermion_force_asqtad.c
186  *
187  */
188  int flops = 433968;
189 
190  struct timeval t0, t1;
191  cudaDeviceSynchronize();
192 
193  gettimeofday(&t0, NULL);
194  fermion_force_cuda(eps, weight1, weight2, act_path_coeff, cudaHw, *cudaGauge, *cudaMom, &gaugeParam);
195  cudaDeviceSynchronize();
196  gettimeofday(&t1, NULL);
197  double secs = t1.tv_sec - t0.tv_sec + 0.000001*(t1.tv_usec - t0.tv_usec);
198 
199  // copy the new momentum back on the CPU
201 
202  int res;
204 
205  int accuracy_level;
206  accuracy_level = strong_check_mom(cpuMom->Gauge_p(), refMom->Gauge_p(), 4*cpuMom->Volume(), gaugeParam.cpu_prec);
207 
208  printf("Test %s\n",(1 == res) ? "PASSED" : "FAILED");
209 
210  int volume = gaugeParam.X[0]*gaugeParam.X[1]*gaugeParam.X[2]*gaugeParam.X[3];
211  double perf = 1.0* flops*volume/(secs*1024*1024*1024);
212  printf("GPU runtime =%.2f ms, kernel performance= %.2f GFLOPS\n", secs*1000, perf);
213 
214  fermion_force_end();
215 
216  if (res == 0){//failed
217  printf("\n");
218  printf("Warning: you test failed. \n");
219  printf(" Did you use --verify?\n");
220  printf(" Did you check the GPU health by running cuda memtest?\n");
221  }
222 
223  return accuracy_level;
224 }
225 
226 
227 static void
229 {
230  printf("running the following fermion force computation test:\n");
231 
232  printf("link_precision link_reconstruct space_dim(x/y/z) T_dimension\n");
233  printf("%s %s %d/%d/%d %d \n",
236  xdim, ydim, zdim, tdim);
237  return ;
238 
239 }
240 
241 void
242 usage_extra(char** argv )
243 {
244  printf("Extra options: \n");
245  printf(" --verify # Verify the GPU results using CPU results\n");
246  return ;
247 }
248 
249 int
250 main(int argc, char **argv)
251 {
252  int i;
253  for (i =1;i < argc; i++){
254  if(process_command_line_option(argc, argv, &i) == 0){
255  continue;
256  }
257 
258  if( strcmp(argv[i], "--verify") == 0){
259  verify_results=1;
260  continue;
261  }
262  fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
263  usage(argv);
264  }
265 
266  initComms(argc, argv, gridsize_from_cmdline);
267 
268  link_prec = prec;
269 
271 
272  int accuracy_level = fermion_force_test();
273  printfQuda("accuracy_level=%d\n", accuracy_level);
274 
275  finalizeComms();
276 
277  int ret;
278  if(accuracy_level >=3 ){
279  ret = 0;
280  }else{
281  ret = 1; //we delclare the test failed
282  }
283 
284  return ret;
285 }