QUDA  v0.7.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fermion_force_test.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 
5 #include <quda.h>
6 #include "test_util.h"
7 #include "gauge_field.h"
8 #include "fat_force_quda.h"
9 #include "misc.h"
11 #include "fermion_force_quda.h"
12 #include "hw_quda.h"
13 #include <sys/time.h>
14 #include <dslash_quda.h>
15 
16 using namespace quda;
17 
18 extern void usage(char** argv);
19 extern int device;
22 
26 
27 static FullHw cudaHw;
29 static void* hw; //the array of half_wilson_vector
30 
31 extern int gridsize_from_cmdline[];
32 
33 extern bool verify_results;
34 
35 int ODD_BIT = 1;
36 extern int xdim, ydim, zdim, tdim;
37 
40 extern QudaPrecision prec;
43 
45 
46 static void
47 fermion_force_init()
48 {
50  //cudaSetDevice(dev); CUERR;
51 
52  gaugeParam.X[0] = xdim;
53  gaugeParam.X[1] = ydim;
54  gaugeParam.X[2] = zdim;
55  gaugeParam.X[3] = tdim;
57 
61  gaugeParam.anisotropy = 1.0;
62 
64 
67 
68  cpuGauge = new cpuGaugeField(gParam);
69 
70  // this is a hack to have site link generated in 2d
71  // then copied to 1d array in "MILC" format
72  void* siteLink_2d[4];
73  for(int i=0;i < 4;i++){
74  siteLink_2d[i] = malloc(cpuGauge->Volume()*gaugeSiteSize*gaugeParam.cpu_prec);
75  if (siteLink_2d[i] == NULL){
76  errorQuda("ERROR: malloc failed for siteLink_2d\n");
77  }
78  }
79 
80  // fills the gauge field with random numbers
81  createSiteLinkCPU(siteLink_2d, gaugeParam.cpu_prec, 1);
82 
83  //copy the 2d sitelink to 1d milc format
84  for(int dir=0;dir < 4; dir++){
85  for(int i=0;i < cpuGauge->Volume(); i++){
86  char* src = ((char*)siteLink_2d[dir]) + i * gaugeSiteSize* gaugeParam.cpu_prec;
87  char* dst = ((char*)cpuGauge->Gauge_p()) + (4*i+dir)*gaugeSiteSize*gaugeParam.cpu_prec ;
88  memcpy(dst, src, gaugeSiteSize*gaugeParam.cpu_prec);
89  }
90  }
91 
92  for(int i=0;i < 4;i++){
93  free(siteLink_2d[i]);
94  }
95 
96 #if 0
98 #endif
99 
100  gParam.pad = 0;
101  gParam.precision = gaugeParam.cuda_prec;
102  gParam.reconstruct = link_recon;
105  cudaGauge = new cudaGaugeField(gParam);
106 
107  gParam.order = QUDA_MILC_GAUGE_ORDER;
108 
110  gParam.precision = gaugeParam.cpu_prec;
112  cpuMom = new cpuGaugeField(gParam);
113  refMom = new cpuGaugeField(gParam);
114 
116  //memset(cpuMom->Gauge_p(), 0, 4*cpuMom->Volume()*momSiteSize*gaugeParam.cpu_prec);
117 
119 
121  gParam.precision = gaugeParam.cuda_prec;
122  cudaMom = new cudaGaugeField(gParam);
123 
124  hw = malloc(4*cpuGauge->Volume()*hwSiteSize*gaugeParam.cpu_prec);
125  if (hw == NULL){
126  fprintf(stderr, "ERROR: malloc failed for hw\n");
127  exit(1);
128  }
129  createHwCPU(hw, hw_prec);
130 
131  cudaHw = createHwQuda(gaugeParam.X, hw_prec);
132 
133  return;
134 }
135 
136 static void
137 fermion_force_end()
138 {
139  delete cudaMom;
140  delete cudaGauge;
141 
142  delete cpuGauge;
143  delete cpuMom;
144  delete refMom;
145 
146  freeHwQuda(cudaHw);
147  free(hw);
148 
149  endQuda();
150 }
151 
152 namespace quda {
153  namespace fermionforce {
154  void initLatticeConstants(const LatticeField &lat, TimeProfile &profile);
155  }
156 }
157 
158 static int
159 fermion_force_test(void)
160 {
161 
162  fermion_force_init();
163  TimeProfile profile("dummy");
166 
167 
168  float eps= 0.02;
169  float weight1 =1.0;
170  float weight2 =1.0;
171  float act_path_coeff[6];
172 
173  act_path_coeff[0] = 0.625000;
174  act_path_coeff[1] = -0.058479;
175  act_path_coeff[2] = -0.087719;
176  act_path_coeff[3] = 0.030778;
177  act_path_coeff[4] = -0.007200;
178  act_path_coeff[5] = -0.123113;
179 
180  // download the momentum field to the GPU
182 
183  // download the gauge field to the GPU
185 
186  loadHwToGPU(cudaHw, hw, cpu_hw_prec);
187 
188 
189  if (verify_results){
190  fermion_force_reference(eps, weight1, weight2, act_path_coeff, hw, cpuGauge->Gauge_p(), refMom->Gauge_p());
191  }
192 
193 
194  /*
195  * The flops number comes from CPU implementation in MILC
196  * function eo_fermion_force_twoterms_field(), fermion_force_asqtad.c
197  *
198  */
199  int flops = 433968;
200 
201  struct timeval t0, t1;
202  cudaDeviceSynchronize();
203 
204  gettimeofday(&t0, NULL);
205  fermion_force_cuda(eps, weight1, weight2, act_path_coeff, cudaHw, *cudaGauge, *cudaMom, &gaugeParam);
206  cudaDeviceSynchronize();
207  gettimeofday(&t1, NULL);
208  double secs = t1.tv_sec - t0.tv_sec + 0.000001*(t1.tv_usec - t0.tv_usec);
209 
210  // copy the new momentum back on the CPU
212 
213  int res;
215 
216  int accuracy_level;
217  accuracy_level = strong_check_mom(cpuMom->Gauge_p(), refMom->Gauge_p(), 4*cpuMom->Volume(), gaugeParam.cpu_prec);
218 
219  printf("Test %s\n",(1 == res) ? "PASSED" : "FAILED");
220 
221  int volume = gaugeParam.X[0]*gaugeParam.X[1]*gaugeParam.X[2]*gaugeParam.X[3];
222  double perf = 1.0* flops*volume/(secs*1024*1024*1024);
223  printf("GPU runtime =%.2f ms, kernel performance= %.2f GFLOPS\n", secs*1000, perf);
224 
225  fermion_force_end();
226 
227  if (res == 0){//failed
228  printf("\n");
229  printf("Warning: you test failed. \n");
230  printf(" Did you use --verify?\n");
231  printf(" Did you check the GPU health by running cuda memtest?\n");
232  }
233 
234  return accuracy_level;
235 }
236 
237 
238 static void
240 {
241  printf("running the following fermion force computation test:\n");
242 
243  printf("link_precision link_reconstruct space_dim(x/y/z) T_dimension\n");
244  printf("%s %s %d/%d/%d %d \n",
247  xdim, ydim, zdim, tdim);
248  return ;
249 
250 }
251 
252 void
253 usage_extra(char** argv )
254 {
255  printf("Extra options: \n");
256  printf(" --verify # Verify the GPU results using CPU results\n");
257  return ;
258 }
259 
260 int
261 main(int argc, char **argv)
262 {
263  int i;
264  for (i =1;i < argc; i++){
265  if(process_command_line_option(argc, argv, &i) == 0){
266  continue;
267  }
268 
269  fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
270  usage(argv);
271  }
272 
273  initComms(argc, argv, gridsize_from_cmdline);
274 
275  link_prec = prec;
276 
278 
279  int accuracy_level = fermion_force_test();
280  printfQuda("accuracy_level=%d\n", accuracy_level);
281 
282  finalizeComms();
283 
284  int ret;
285  if(accuracy_level >=3 ){
286  ret = 0;
287  }else{
288  ret = 1; //we delclare the test failed
289  }
290 
291  return ret;
292 }
int ODD_BIT
double anisotropy
Definition: quda.h:31
void endQuda(void)
enum QudaPrecision_s QudaPrecision
int V
Definition: test_util.cpp:29
FullHw createHwQuda(int *X, QudaPrecision precision)
Definition: hw_quda.cpp:41
QudaPrecision mom_prec
void display_test_info()
Definition: blas_test.cu:56
int site_link_sanity_check(void *link, int len, int precision, QudaGaugeParam *gaugeParam)
Definition: misc.cpp:549
void usage(char **argv)
Definition: test_util.cpp:1584
void saveCPUField(cpuGaugeField &, const QudaFieldLocation &) const
#define errorQuda(...)
Definition: util_quda.h:73
void createHwCPU(void *hw, QudaPrecision precision)
Definition: test_util.cpp:1429
int device
Definition: test_util.cpp:1546
QudaGaugeParam gaugeParam
void setDims(int *)
Definition: test_util.cpp:88
void createMomCPU(void *mom, QudaPrecision precision)
Definition: test_util.cpp:1391
QudaReconstructType link_recon
Definition: test_util.cpp:1549
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:1635
QudaPrecision precision
Definition: lattice_field.h:41
#define gaugeSiteSize
void finalizeComms()
Definition: test_util.cpp:65
int tdim
Definition: test_util.cpp:1556
void fermion_force_cuda(double eps, double weight1, double weight2, void *act_path_coeff, FullHw cudaHw, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaMom, QudaGaugeParam *param)
QudaGaugeFieldOrder gauge_order
Definition: quda.h:36
void fermion_force_reference(float eps, float weight1, float weight2, void *act_path_coeff, void *temp_x, void *sitelink, void *mom)
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision)
Definition: test_util.cpp:395
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:658
cpuGaugeField * cpuMom
QudaPrecision link_prec
void createSiteLinkCPU(void **link, QudaPrecision precision, int phase)
Definition: test_util.cpp:1166
cudaGaugeField * cudaGauge
cpuGaugeField * refMom
void loadHwToGPU(FullHw ret, void *hw, QudaPrecision cpu_prec)
Definition: hw_quda.cpp:251
void fermion_force_init_cuda(QudaGaugeParam *param)
void * siteLink_2d[4]
void initQuda(int device)
int Volume() const
int main(int argc, char **argv)
void freeHwQuda(FullHw hw)
Definition: hw_quda.cpp:61
QudaPrecision prec
Definition: test_util.cpp:1551
#define momSiteSize
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:724
QudaGaugeFieldOrder order
Definition: gauge_field.h:15
int ydim
Definition: test_util.cpp:1554
void loadCPUField(const cpuGaugeField &, const QudaFieldLocation &)
cpuGaugeField * cpuGauge
QudaReconstructType reconstruct
Definition: quda.h:43
QudaPrecision cuda_prec
Definition: quda.h:42
int X[4]
Definition: quda.h:29
cudaGaugeField * cudaMom
int strong_check_mom(void *momA, void *momB, int len, QudaPrecision prec)
Definition: test_util.cpp:1502
QudaPrecision cpu_hw_prec
int zdim
Definition: test_util.cpp:1555
#define hwSiteSize
Definition: hw_quda.cpp:8
GaugeFieldParam gParam
void initLatticeConstants(const LatticeField &lat, TimeProfile &profile)
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
QudaLinkType link_type
Definition: gauge_field.h:17
#define printfQuda(...)
Definition: util_quda.h:67
int gridsize_from_cmdline[]
Definition: test_util.cpp:1559
QudaReconstructType reconstruct
Definition: gauge_field.h:14
QudaFieldCreate create
Definition: gauge_field.h:26
void usage_extra(char **argv)
bool verify_results
Definition: test_util.cpp:1568
int xdim
Definition: test_util.cpp:1553
void initComms(int argc, char **argv, const int *commDims)
Definition: test_util.cpp:48
QudaPrecision cpu_prec
Definition: quda.h:40
QudaPrecision hw_prec