QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
gauge_force_test.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 
5 #include <quda.h>
6 #include <test_util.h>
7 #include <gauge_field.h>
8 #include "misc.h"
10 #include "gauge_force_quda.h"
11 #include <sys/time.h>
12 #include "fat_force_quda.h"
13 #include <dslash_quda.h>
14 
15 #ifdef MULTI_GPU
16 #include <face_quda.h>
17 #endif
18 
19 extern int device;
20 
21 static QudaGaugeParam qudaGaugeParam;
23 extern bool verify_results;
24 extern int tdim;
25 extern QudaPrecision prec;
26 extern int xdim;
27 extern int ydim;
28 extern int zdim;
29 extern int tdim;
30 extern void usage(char** argv);
31 extern bool tune;
32 
33 int attempts = 1;
34 
37 
38 extern int gridsize_from_cmdline[];
39 
40 
41 int length[]={
42  3,
43  3,
44  3,
45  3,
46  3,
47  3,
48  5,
49  5,
50  5,
51  5,
52  5,
53  5,
54  5,
55  5,
56  5,
57  5,
58  5,
59  5,
60  5,
61  5,
62  5,
63  5,
64  5,
65  5,
66  5,
67  5,
68  5,
69  5,
70  5,
71  5,
72  5,
73  5,
74  5,
75  5,
76  5,
77  5,
78  5,
79  5,
80  5,
81  5,
82  5,
83  5,
84  5,
85  5,
86  5,
87  5,
88  5,
89  5,
90 };
91 
92 
93 float loop_coeff_f[]={
94  1.1,
95  1.2,
96  1.3,
97  1.4,
98  1.5,
99  1.6,
100  2.5,
101  2.6,
102  2.7,
103  2.8,
104  2.9,
105  3.0,
106  3.1,
107  3.2,
108  3.3,
109  3.4,
110  3.5,
111  3.6,
112  3.7,
113  3.8,
114  3.9,
115  4.0,
116  4.1,
117  4.2,
118  4.3,
119  4.4,
120  4.5,
121  4.6,
122  4.7,
123  4.8,
124  4.9,
125  5.0,
126  5.1,
127  5.2,
128  5.3,
129  5.4,
130  5.5,
131  5.6,
132  5.7,
133  5.8,
134  5.9,
135  5.0,
136  6.1,
137  6.2,
138  6.3,
139  6.4,
140  6.5,
141  6.6,
142 };
143 
144 int path_dir_x[][5] = {
145  {1, 7, 6 },
146  {6, 7, 1 },
147  {2, 7, 5 },
148  {5, 7, 2 },
149  {3, 7, 4 },
150  {4, 7, 3 },
151  {0, 1, 7, 7, 6 },
152  {1, 7, 7, 6, 0 },
153  {6, 7, 7, 1, 0 },
154  {0, 6, 7, 7, 1 },
155  {0, 2, 7, 7, 5 },
156  {2, 7, 7, 5, 0 },
157  {5, 7, 7, 2, 0 },
158  {0, 5, 7, 7, 2 },
159  {0, 3, 7, 7, 4 },
160  {3, 7, 7, 4, 0 },
161  {4, 7, 7, 3, 0 },
162  {0, 4, 7, 7, 3 },
163  {6, 6, 7, 1, 1 },
164  {1, 1, 7, 6, 6 },
165  {5, 5, 7, 2, 2 },
166  {2, 2, 7, 5, 5 },
167  {4, 4, 7, 3, 3 },
168  {3, 3, 7, 4, 4 },
169  {1, 2, 7, 6, 5 },
170  {5, 6, 7, 2, 1 },
171  {1, 5, 7, 6, 2 },
172  {2, 6, 7, 5, 1 },
173  {6, 2, 7, 1, 5 },
174  {5, 1, 7, 2, 6 },
175  {6, 5, 7, 1, 2 },
176  {2, 1, 7, 5, 6 },
177  {1, 3, 7, 6, 4 },
178  {4, 6, 7, 3, 1 },
179  {1, 4, 7, 6, 3 },
180  {3, 6, 7, 4, 1 },
181  {6, 3, 7, 1, 4 },
182  {4, 1, 7, 3, 6 },
183  {6, 4, 7, 1, 3 },
184  {3, 1, 7, 4, 6 },
185  {2, 3, 7, 5, 4 },
186  {4, 5, 7, 3, 2 },
187  {2, 4, 7, 5, 3 },
188  {3, 5, 7, 4, 2 },
189  {5, 3, 7, 2, 4 },
190  {4, 2, 7, 3, 5 },
191  {5, 4, 7, 2, 3 },
192  {3, 2, 7, 4, 5 },
193 };
194 
195 
196 int path_dir_y[][5] = {
197  { 2 ,6 ,5 },
198  { 5 ,6 ,2 },
199  { 3 ,6 ,4 },
200  { 4 ,6 ,3 },
201  { 0 ,6 ,7 },
202  { 7 ,6 ,0 },
203  { 1 ,2 ,6 ,6 ,5 },
204  { 2 ,6 ,6 ,5 ,1 },
205  { 5 ,6 ,6 ,2 ,1 },
206  { 1 ,5 ,6 ,6 ,2 },
207  { 1 ,3 ,6 ,6 ,4 },
208  { 3 ,6 ,6 ,4 ,1 },
209  { 4 ,6 ,6 ,3 ,1 },
210  { 1 ,4 ,6 ,6 ,3 },
211  { 1 ,0 ,6 ,6 ,7 },
212  { 0 ,6 ,6 ,7 ,1 },
213  { 7 ,6 ,6 ,0 ,1 },
214  { 1 ,7 ,6 ,6 ,0 },
215  { 5 ,5 ,6 ,2 ,2 },
216  { 2 ,2 ,6 ,5 ,5 },
217  { 4 ,4 ,6 ,3 ,3 },
218  { 3 ,3 ,6 ,4 ,4 },
219  { 7 ,7 ,6 ,0 ,0 },
220  { 0 ,0 ,6 ,7 ,7 },
221  { 2 ,3 ,6 ,5 ,4 },
222  { 4 ,5 ,6 ,3 ,2 },
223  { 2 ,4 ,6 ,5 ,3 },
224  { 3 ,5 ,6 ,4 ,2 },
225  { 5 ,3 ,6 ,2 ,4 },
226  { 4 ,2 ,6 ,3 ,5 },
227  { 5 ,4 ,6 ,2 ,3 },
228  { 3 ,2 ,6 ,4 ,5 },
229  { 2 ,0 ,6 ,5 ,7 },
230  { 7 ,5 ,6 ,0 ,2 },
231  { 2 ,7 ,6 ,5 ,0 },
232  { 0 ,5 ,6 ,7 ,2 },
233  { 5 ,0 ,6 ,2 ,7 },
234  { 7 ,2 ,6 ,0 ,5 },
235  { 5 ,7 ,6 ,2 ,0 },
236  { 0 ,2 ,6 ,7 ,5 },
237  { 3 ,0 ,6 ,4 ,7 },
238  { 7 ,4 ,6 ,0 ,3 },
239  { 3 ,7 ,6 ,4 ,0 },
240  { 0 ,4 ,6 ,7 ,3 },
241  { 4 ,0 ,6 ,3 ,7 },
242  { 7 ,3 ,6 ,0 ,4 },
243  { 4 ,7 ,6 ,3 ,0 },
244  { 0 ,3 ,6 ,7 ,4 }
245 };
246 
247 int path_dir_z[][5] = {
248  { 3 ,5 ,4 },
249  { 4 ,5 ,3 },
250  { 0 ,5 ,7 },
251  { 7 ,5 ,0 },
252  { 1 ,5 ,6 },
253  { 6 ,5 ,1 },
254  { 2 ,3 ,5 ,5 ,4 },
255  { 3 ,5 ,5 ,4 ,2 },
256  { 4 ,5 ,5 ,3 ,2 },
257  { 2 ,4 ,5 ,5 ,3 },
258  { 2 ,0 ,5 ,5 ,7 },
259  { 0 ,5 ,5 ,7 ,2 },
260  { 7 ,5 ,5 ,0 ,2 },
261  { 2 ,7 ,5 ,5 ,0 },
262  { 2 ,1 ,5 ,5 ,6 },
263  { 1 ,5 ,5 ,6 ,2 },
264  { 6 ,5 ,5 ,1 ,2 },
265  { 2 ,6 ,5 ,5 ,1 },
266  { 4 ,4 ,5 ,3 ,3 },
267  { 3 ,3 ,5 ,4 ,4 },
268  { 7 ,7 ,5 ,0 ,0 },
269  { 0 ,0 ,5 ,7 ,7 },
270  { 6 ,6 ,5 ,1 ,1 },
271  { 1 ,1 ,5 ,6 ,6 },
272  { 3 ,0 ,5 ,4 ,7 },
273  { 7 ,4 ,5 ,0 ,3 },
274  { 3 ,7 ,5 ,4 ,0 },
275  { 0 ,4 ,5 ,7 ,3 },
276  { 4 ,0 ,5 ,3 ,7 },
277  { 7 ,3 ,5 ,0 ,4 },
278  { 4 ,7 ,5 ,3 ,0 },
279  { 0 ,3 ,5 ,7 ,4 },
280  { 3 ,1 ,5 ,4 ,6 },
281  { 6 ,4 ,5 ,1 ,3 },
282  { 3 ,6 ,5 ,4 ,1 },
283  { 1 ,4 ,5 ,6 ,3 },
284  { 4 ,1 ,5 ,3 ,6 },
285  { 6 ,3 ,5 ,1 ,4 },
286  { 4 ,6 ,5 ,3 ,1 },
287  { 1 ,3 ,5 ,6 ,4 },
288  { 0 ,1 ,5 ,7 ,6 },
289  { 6 ,7 ,5 ,1 ,0 },
290  { 0 ,6 ,5 ,7 ,1 },
291  { 1 ,7 ,5 ,6 ,0 },
292  { 7 ,1 ,5 ,0 ,6 },
293  { 6 ,0 ,5 ,1 ,7 },
294  { 7 ,6 ,5 ,0 ,1 },
295  { 1 ,0 ,5 ,6 ,7 }
296 };
297 
298 int path_dir_t[][5] = {
299  { 0 ,4 ,7 },
300  { 7 ,4 ,0 },
301  { 1 ,4 ,6 },
302  { 6 ,4 ,1 },
303  { 2 ,4 ,5 },
304  { 5 ,4 ,2 },
305  { 3 ,0 ,4 ,4 ,7 },
306  { 0 ,4 ,4 ,7 ,3 },
307  { 7 ,4 ,4 ,0 ,3 },
308  { 3 ,7 ,4 ,4 ,0 },
309  { 3 ,1 ,4 ,4 ,6 },
310  { 1 ,4 ,4 ,6 ,3 },
311  { 6 ,4 ,4 ,1 ,3 },
312  { 3 ,6 ,4 ,4 ,1 },
313  { 3 ,2 ,4 ,4 ,5 },
314  { 2 ,4 ,4 ,5 ,3 },
315  { 5 ,4 ,4 ,2 ,3 },
316  { 3 ,5 ,4 ,4 ,2 },
317  { 7 ,7 ,4 ,0 ,0 },
318  { 0 ,0 ,4 ,7 ,7 },
319  { 6 ,6 ,4 ,1 ,1 },
320  { 1 ,1 ,4 ,6 ,6 },
321  { 5 ,5 ,4 ,2 ,2 },
322  { 2 ,2 ,4 ,5 ,5 },
323  { 0 ,1 ,4 ,7 ,6 },
324  { 6 ,7 ,4 ,1 ,0 },
325  { 0 ,6 ,4 ,7 ,1 },
326  { 1 ,7 ,4 ,6 ,0 },
327  { 7 ,1 ,4 ,0 ,6 },
328  { 6 ,0 ,4 ,1 ,7 },
329  { 7 ,6 ,4 ,0 ,1 },
330  { 1 ,0 ,4 ,6 ,7 },
331  { 0 ,2 ,4 ,7 ,5 },
332  { 5 ,7 ,4 ,2 ,0 },
333  { 0 ,5 ,4 ,7 ,2 },
334  { 2 ,7 ,4 ,5 ,0 },
335  { 7 ,2 ,4 ,0 ,5 },
336  { 5 ,0 ,4 ,2 ,7 },
337  { 7 ,5 ,4 ,0 ,2 },
338  { 2 ,0 ,4 ,5 ,7 },
339  { 1 ,2 ,4 ,6 ,5 },
340  { 5 ,6 ,4 ,2 ,1 },
341  { 1 ,5 ,4 ,6 ,2 },
342  { 2 ,6 ,4 ,5 ,1 },
343  { 6 ,2 ,4 ,1 ,5 },
344  { 5 ,1 ,4 ,2 ,6 },
345  { 6 ,5 ,4 ,1 ,2 },
346  { 2 ,1 ,4 ,5 ,6 }
347 };
348 
349 
350 
351 static void
352 gauge_force_test(void)
353 {
354  int max_length = 6;
355 
356  initQuda(device);
357  setVerbosityQuda(QUDA_VERBOSE,"",stdout);
358 
359  qudaGaugeParam = newQudaGaugeParam();
360 
361  qudaGaugeParam.X[0] = xdim;
362  qudaGaugeParam.X[1] = ydim;
363  qudaGaugeParam.X[2] = zdim;
364  qudaGaugeParam.X[3] = tdim;
365 
366  setDims(qudaGaugeParam.X);
367 
368  qudaGaugeParam.anisotropy = 1.0;
369  qudaGaugeParam.cpu_prec = link_prec;
370  qudaGaugeParam.cuda_prec = link_prec;
371  qudaGaugeParam.cuda_prec_sloppy = link_prec;
372  qudaGaugeParam.reconstruct = link_recon;
373  qudaGaugeParam.reconstruct_sloppy = link_recon;
374  qudaGaugeParam.type = QUDA_SU3_LINKS; // in this context, just means these are site links
375 
376  qudaGaugeParam.gauge_order = gauge_order;
377  qudaGaugeParam.t_boundary = QUDA_PERIODIC_T;
378  qudaGaugeParam.gauge_fix = QUDA_GAUGE_FIXED_NO;
379  qudaGaugeParam.ga_pad = 0;
380  qudaGaugeParam.mom_ga_pad = 0;
381 
382  size_t gSize = qudaGaugeParam.cpu_prec;
383 
384  void* sitelink;
385  void* sitelink_1d;
386 
387 #ifdef GPU_DIRECT
388  sitelink_1d = pinned_malloc(4*V*gaugeSiteSize*gSize);
389 #else
390  sitelink_1d = safe_malloc(4*V*gaugeSiteSize*gSize);
391 #endif
392 
393  // this is a hack to have site link generated in 2d
394  // then copied to 1d array in "MILC" format
395  void* sitelink_2d[4];
396 #ifdef GPU_DIRECT
397  for(int i=0;i<4;i++) sitelink_2d[i] = pinned_malloc(V*gaugeSiteSize*qudaGaugeParam.cpu_prec);
398 #else
399  for(int i=0;i<4;i++) sitelink_2d[i] = safe_malloc(V*gaugeSiteSize*qudaGaugeParam.cpu_prec);
400 #endif
401 
402  // fills the gauge field with random numbers
403  createSiteLinkCPU(sitelink_2d, qudaGaugeParam.cpu_prec, 0);
404 
405  //copy the 2d sitelink to 1d milc format
406 
407  for(int dir = 0; dir < 4; dir++){
408  for(int i=0; i < V; i++){
409  char* src = ((char*)sitelink_2d[dir]) + i * gaugeSiteSize* qudaGaugeParam.cpu_prec;
410  char* dst = ((char*)sitelink_1d) + (4*i+dir)*gaugeSiteSize*qudaGaugeParam.cpu_prec ;
411  memcpy(dst, src, gaugeSiteSize*qudaGaugeParam.cpu_prec);
412  }
413  }
414  if (qudaGaugeParam.gauge_order == QUDA_MILC_GAUGE_ORDER){
415  sitelink = sitelink_1d;
416  }else if (qudaGaugeParam.gauge_order == QUDA_QDP_GAUGE_ORDER) {
417  sitelink = (void**)sitelink_2d;
418  } else {
419  errorQuda("Unsupported gauge order %d", qudaGaugeParam.gauge_order);
420  }
421 
422 #ifdef MULTI_GPU
423  void* sitelink_ex_2d[4];
424  void* sitelink_ex_1d;
425 
426  sitelink_ex_1d = pinned_malloc(4*V_ex*gaugeSiteSize*gSize);
427  for(int i=0;i < 4;i++) sitelink_ex_2d[i] = pinned_malloc(V_ex*gaugeSiteSize*gSize);
428 
429  int X1= Z[0];
430  int X2= Z[1];
431  int X3= Z[2];
432  int X4= Z[3];
433 
434  for(int i=0; i < V_ex; i++){
435  int sid = i;
436  int oddBit=0;
437  if(i >= Vh_ex){
438  sid = i - Vh_ex;
439  oddBit = 1;
440  }
441 
442  int za = sid/E1h;
443  int x1h = sid - za*E1h;
444  int zb = za/E2;
445  int x2 = za - zb*E2;
446  int x4 = zb/E3;
447  int x3 = zb - x4*E3;
448  int x1odd = (x2 + x3 + x4 + oddBit) & 1;
449  int x1 = 2*x1h + x1odd;
450 
451  if( x1< 2 || x1 >= X1 +2
452  || x2< 2 || x2 >= X2 +2
453  || x3< 2 || x3 >= X3 +2
454  || x4< 2 || x4 >= X4 +2){
455  continue;
456  }
457 
458  x1 = (x1 - 2 + X1) % X1;
459  x2 = (x2 - 2 + X2) % X2;
460  x3 = (x3 - 2 + X3) % X3;
461  x4 = (x4 - 2 + X4) % X4;
462 
463  int idx = (x4*X3*X2*X1+x3*X2*X1+x2*X1+x1)>>1;
464  if(oddBit){
465  idx += Vh;
466  }
467  for(int dir= 0; dir < 4; dir++){
468  char* src = (char*)sitelink_2d[dir];
469  char* dst = (char*)sitelink_ex_2d[dir];
470  memcpy(dst+i*gaugeSiteSize*gSize, src+idx*gaugeSiteSize*gSize, gaugeSiteSize*gSize);
471  }//dir
472  }//i
473 
474 
475  for(int dir = 0; dir < 4; dir++){
476  for(int i=0; i < V_ex; i++){
477  char* src = ((char*)sitelink_ex_2d[dir]) + i * gaugeSiteSize* qudaGaugeParam.cpu_prec;
478  char* dst = ((char*)sitelink_ex_1d) + (4*i+dir)*gaugeSiteSize*qudaGaugeParam.cpu_prec ;
479  memcpy(dst, src, gaugeSiteSize*qudaGaugeParam.cpu_prec);
480  }
481  }
482 
483 #endif
484 
485  void* mom = safe_malloc(4*V*momSiteSize*gSize);
486  void* refmom = safe_malloc(4*V*momSiteSize*gSize);
487 
488  memset(mom, 0, 4*V*momSiteSize*gSize);
489  //initialize some data in cpuMom
490  createMomCPU(mom, qudaGaugeParam.cpu_prec);
491  memcpy(refmom, mom, 4*V*momSiteSize*gSize);
492 
493 
494  double loop_coeff_d[sizeof(loop_coeff_f)/sizeof(float)];
495  for(unsigned int i=0;i < sizeof(loop_coeff_f)/sizeof(float); i++){
496  loop_coeff_d[i] = loop_coeff_f[i];
497  }
498 
499  void* loop_coeff;
500  if(qudaGaugeParam.cuda_prec == QUDA_SINGLE_PRECISION){
501  loop_coeff = (void*)&loop_coeff_f[0];
502  }else{
503  loop_coeff = loop_coeff_d;
504  }
505  double eb3 = 0.3;
506  int num_paths = sizeof(path_dir_x)/sizeof(path_dir_x[0]);
507 
508  int** input_path_buf[4];
509  for(int dir =0; dir < 4; dir++){
510  input_path_buf[dir] = (int**)safe_malloc(num_paths*sizeof(int*));
511  for(int i=0;i < num_paths;i++){
512  input_path_buf[dir][i] = (int*)safe_malloc(length[i]*sizeof(int));
513  if(dir == 0) memcpy(input_path_buf[dir][i], path_dir_x[i], length[i]*sizeof(int));
514  else if(dir ==1) memcpy(input_path_buf[dir][i], path_dir_y[i], length[i]*sizeof(int));
515  else if(dir ==2) memcpy(input_path_buf[dir][i], path_dir_z[i], length[i]*sizeof(int));
516  else if(dir ==3) memcpy(input_path_buf[dir][i], path_dir_t[i], length[i]*sizeof(int));
517  }
518  }
519 
520  if (tune) {
521  printfQuda("Tuning...\n");
523  }
524 
525  struct timeval t0, t1;
526  double timeinfo[3];
527  /* Multiple execution to exclude warmup time in the first run*/
528  for (int i =0;i < attempts; i++){
529  gettimeofday(&t0, NULL);
530  computeGaugeForceQuda(mom, sitelink, input_path_buf, length,
531  loop_coeff_d, num_paths, max_length, eb3,
532  &qudaGaugeParam, timeinfo);
533  gettimeofday(&t1, NULL);
534  }
535 
536  double total_time = t1.tv_sec - t0.tv_sec + 0.000001*(t1.tv_usec - t0.tv_usec);
537  //The number comes from CPU implementation in MILC, gauge_force_imp.c
538  int flops=153004;
539 
540  if (verify_results){
541  for(int i = 0;i < attempts;i++){
542 #ifdef MULTI_GPU
543  //last arg=0 means no optimization for communication, i.e. exchange data in all directions
544  //even they are not partitioned
545  int R[4] = {2, 2, 2, 2};
546  exchange_cpu_sitelink_ex(qudaGaugeParam.X, R, (void**)sitelink_ex_2d,
547  QUDA_QDP_GAUGE_ORDER, qudaGaugeParam.cpu_prec, 0, 4);
548  gauge_force_reference(refmom, eb3, sitelink_2d, sitelink_ex_2d, qudaGaugeParam.cpu_prec,
549  input_path_buf, length, loop_coeff, num_paths);
550 #else
551  gauge_force_reference(refmom, eb3, sitelink_2d, NULL, qudaGaugeParam.cpu_prec,
552  input_path_buf, length, loop_coeff, num_paths);
553 #endif
554  }
555 
556  int res;
557  res = compare_floats(mom, refmom, 4*V*momSiteSize, 1e-3, qudaGaugeParam.cpu_prec);
558 
559  strong_check_mom(mom, refmom, 4*V, qudaGaugeParam.cpu_prec);
560 
561  printf("Test %s\n",(1 == res) ? "PASSED" : "FAILED");
562  }
563 
564  double perf = 1.0* flops*V/(total_time*1e+9);
565  double kernel_perf = 1.0*flops*V/(timeinfo[1]*1e+9);
566  printf("init and cpu->gpu time: %.2f ms, kernel time: %.2f ms, gpu->cpu and cleanup time: %.2f total time =%.2f ms\n",
567  timeinfo[0]*1e+3, timeinfo[1]*1e+3, timeinfo[2]*1e+3, total_time*1e+3);
568  printf("kernel performance: %.2f GFLOPS, overall performance : %.2f GFLOPS\n", kernel_perf, perf);
569 
570  for(int dir = 0; dir < 4; dir++){
571  for(int i=0;i < num_paths; i++) host_free(input_path_buf[dir][i]);
572  host_free(input_path_buf[dir]);
573  }
574 
575  host_free(sitelink_1d);
576  for(int dir=0;dir < 4;dir++) host_free(sitelink_2d[dir]);
577 
578 #ifdef MULTI_GPU
579  host_free(sitelink_ex_1d);
580  for(int dir=0; dir < 4; dir++) host_free(sitelink_ex_2d[dir]);
581 #endif
582 
583 
584  host_free(mom);
585  host_free(refmom);
586  endQuda();
587 }
588 
589 
590 static void
592 {
593  printf("running the following test:\n");
594 
595  printf("link_precision link_reconstruct space_dim(x/y/z) T_dimension Gauge_order Attempts\n");
596  printf("%s %s %d/%d/%d %d %s %d\n",
599  xdim,ydim,zdim, tdim,
601  attempts);
602  return ;
603 
604 }
605 
606 void
607 usage_extra(char** argv )
608 {
609  printf("Extra options:\n");
610  printf(" --gauge-order <qdp/milc> # Gauge storing order in CPU\n");
611  printf(" --attempts <n> # Number of tests\n");
612  return ;
613 }
614 
615 int
616 main(int argc, char **argv)
617 {
618  int i;
619  for (i =1;i < argc; i++){
620 
621  if(process_command_line_option(argc, argv, &i) == 0){
622  continue;
623  }
624 
625  if( strcmp(argv[i], "--gauge-order") == 0){
626  if(i+1 >= argc){
627  usage(argv);
628  }
629 
630  if(strcmp(argv[i+1], "milc") == 0){
632  }else if(strcmp(argv[i+1], "qdp") == 0){
634  }else{
635  fprintf(stderr, "Error: unsupported gauge-field order\n");
636  exit(1);
637  }
638  i++;
639  continue;
640  }
641  if( strcmp(argv[i], "--attempts") == 0){
642  if(i+1 >= argc){
643  usage(argv);
644  }
645 
646  attempts = atoi(argv[i+1]);
647  if(attempts <= 0){
648  printf("ERROR: invalid number of attempts(%d)\n", attempts);
649  }
650  i++;
651  continue;
652  }
653 
654  if( strcmp(argv[i], "--verify") == 0){
655  verify_results=1;
656  continue;
657  }
658 
659  fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
660  usage(argv);
661  }
662 
663 
664  link_prec = prec;
665 
666  initComms(argc, argv, gridsize_from_cmdline);
667 
669 
670  gauge_force_test();
671 
672  finalizeComms();
673 }
QudaReconstructType reconstruct_sloppy
Definition: quda.h:46
double anisotropy
Definition: quda.h:31
__constant__ int Vh
int main(int argc, char **argv)
void usage_extra(char **argv)
void setVerbosityQuda(QudaVerbosity verbosity, const char prefix[], FILE *outfile)
void endQuda(void)
float loop_coeff_f[]
__constant__ int X2
#define pinned_malloc(size)
Definition: malloc_quda.h:26
enum QudaPrecision_s QudaPrecision
int V
Definition: test_util.cpp:29
int attempts
int ga_pad
Definition: quda.h:53
QudaGaugeFixed gauge_fix
Definition: quda.h:51
__constant__ int Vh_ex
void display_test_info()
Definition: blas_test.cu:56
QudaLinkType type
Definition: quda.h:35
bool tune
Definition: test_util.cpp:1562
#define errorQuda(...)
Definition: util_quda.h:73
#define host_free(ptr)
Definition: malloc_quda.h:29
void setDims(int *)
Definition: test_util.cpp:88
void createMomCPU(void *mom, QudaPrecision precision)
Definition: test_util.cpp:1391
__constant__ int X1
int computeGaugeForceQuda(void *mom, void *sitelink, int ***input_path_buf, int *path_length, double *loop_coeff, int num_paths, int max_length, double dt, QudaGaugeParam *qudaGaugeParam, double *timeinfo)
int V_ex
Definition: test_util.cpp:38
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:1635
int path_dir_t[][5]
#define gaugeSiteSize
void finalizeComms()
Definition: test_util.cpp:65
const char * get_gauge_order_str(QudaGaugeFieldOrder order)
Definition: misc.cpp:697
QudaGaugeFieldOrder gauge_order
Definition: quda.h:36
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision)
Definition: test_util.cpp:395
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:658
int length[]
void createSiteLinkCPU(void **link, QudaPrecision precision, int phase)
Definition: test_util.cpp:1166
QudaReconstructType link_recon
Definition: test_util.cpp:1549
int device
Definition: test_util.cpp:1546
void gauge_force_reference(void *refMom, double eb3, void **sitelink, void **sitelink_ex_2d, QudaPrecision prec, int ***path_dir, int *length, void *loop_coeff, int num_paths)
void setTuning(QudaTune tune)
Definition: util_quda.cpp:33
void initQuda(int device)
int ydim
Definition: test_util.cpp:1554
void exchange_cpu_sitelink_ex(int *X, int *R, void **sitelink, QudaGaugeFieldOrder cpu_order, QudaPrecision gPrecision, int optflag, int geometry)
QudaPrecision prec
Definition: test_util.cpp:1551
#define momSiteSize
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:724
int path_dir_y[][5]
QudaPrecision cuda_prec_sloppy
Definition: quda.h:45
int gridsize_from_cmdline[]
Definition: test_util.cpp:1559
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
bool verify_results
Definition: test_util.cpp:1568
QudaReconstructType reconstruct
Definition: quda.h:43
QudaPrecision cuda_prec
Definition: quda.h:42
int X[4]
Definition: quda.h:29
int xdim
Definition: test_util.cpp:1553
int strong_check_mom(void *momA, void *momB, int len, QudaPrecision prec)
Definition: test_util.cpp:1502
#define safe_malloc(size)
Definition: malloc_quda.h:25
short x1h
Definition: llfat_core.h:815
void * memset(void *s, int c, size_t n)
int Z[4]
Definition: test_util.cpp:28
QudaGaugeFieldOrder gauge_order
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
__constant__ int X3
int mom_ga_pad
Definition: quda.h:59
#define printfQuda(...)
Definition: util_quda.h:67
QudaTboundary t_boundary
Definition: quda.h:38
short x1odd
Definition: llfat_core.h:821
int zdim
Definition: test_util.cpp:1555
int tdim
void usage(char **argv)
Definition: test_util.cpp:1584
int path_dir_z[][5]
QudaPrecision link_prec
void initComms(int argc, char **argv, const int *commDims)
Definition: test_util.cpp:48
__constant__ int E1h
int path_dir_x[][5]
int oddBit
__constant__ int E3
QudaPrecision cpu_prec
Definition: quda.h:40
__constant__ int E2
__constant__ int X4
QudaGaugeParam newQudaGaugeParam(void)