QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
gauge_force_test.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 
5 #include <quda.h>
6 #include <test_util.h>
7 #include <gauge_field.h>
8 #include "misc.h"
10 #include "gauge_force_quda.h"
11 #include <sys/time.h>
12 #include <dslash_quda.h>
13 
14 extern int device;
15 
18 extern bool verify_results;
19 extern int tdim;
20 extern QudaPrecision prec;
21 extern int xdim;
22 extern int ydim;
23 extern int zdim;
24 extern int tdim;
25 extern int niter;
26 extern void usage(char** argv);
27 
30 
31 extern int gridsize_from_cmdline[];
32 
33 
34 int length[]={
35  3,
36  3,
37  3,
38  3,
39  3,
40  3,
41  5,
42  5,
43  5,
44  5,
45  5,
46  5,
47  5,
48  5,
49  5,
50  5,
51  5,
52  5,
53  5,
54  5,
55  5,
56  5,
57  5,
58  5,
59  5,
60  5,
61  5,
62  5,
63  5,
64  5,
65  5,
66  5,
67  5,
68  5,
69  5,
70  5,
71  5,
72  5,
73  5,
74  5,
75  5,
76  5,
77  5,
78  5,
79  5,
80  5,
81  5,
82  5,
83 };
84 
85 
86 float loop_coeff_f[]={
87  1.1,
88  1.2,
89  1.3,
90  1.4,
91  1.5,
92  1.6,
93  2.5,
94  2.6,
95  2.7,
96  2.8,
97  2.9,
98  3.0,
99  3.1,
100  3.2,
101  3.3,
102  3.4,
103  3.5,
104  3.6,
105  3.7,
106  3.8,
107  3.9,
108  4.0,
109  4.1,
110  4.2,
111  4.3,
112  4.4,
113  4.5,
114  4.6,
115  4.7,
116  4.8,
117  4.9,
118  5.0,
119  5.1,
120  5.2,
121  5.3,
122  5.4,
123  5.5,
124  5.6,
125  5.7,
126  5.8,
127  5.9,
128  5.0,
129  6.1,
130  6.2,
131  6.3,
132  6.4,
133  6.5,
134  6.6,
135 };
136 
137 int path_dir_x[][5] = {
138  {1, 7, 6 },
139  {6, 7, 1 },
140  {2, 7, 5 },
141  {5, 7, 2 },
142  {3, 7, 4 },
143  {4, 7, 3 },
144  {0, 1, 7, 7, 6 },
145  {1, 7, 7, 6, 0 },
146  {6, 7, 7, 1, 0 },
147  {0, 6, 7, 7, 1 },
148  {0, 2, 7, 7, 5 },
149  {2, 7, 7, 5, 0 },
150  {5, 7, 7, 2, 0 },
151  {0, 5, 7, 7, 2 },
152  {0, 3, 7, 7, 4 },
153  {3, 7, 7, 4, 0 },
154  {4, 7, 7, 3, 0 },
155  {0, 4, 7, 7, 3 },
156  {6, 6, 7, 1, 1 },
157  {1, 1, 7, 6, 6 },
158  {5, 5, 7, 2, 2 },
159  {2, 2, 7, 5, 5 },
160  {4, 4, 7, 3, 3 },
161  {3, 3, 7, 4, 4 },
162  {1, 2, 7, 6, 5 },
163  {5, 6, 7, 2, 1 },
164  {1, 5, 7, 6, 2 },
165  {2, 6, 7, 5, 1 },
166  {6, 2, 7, 1, 5 },
167  {5, 1, 7, 2, 6 },
168  {6, 5, 7, 1, 2 },
169  {2, 1, 7, 5, 6 },
170  {1, 3, 7, 6, 4 },
171  {4, 6, 7, 3, 1 },
172  {1, 4, 7, 6, 3 },
173  {3, 6, 7, 4, 1 },
174  {6, 3, 7, 1, 4 },
175  {4, 1, 7, 3, 6 },
176  {6, 4, 7, 1, 3 },
177  {3, 1, 7, 4, 6 },
178  {2, 3, 7, 5, 4 },
179  {4, 5, 7, 3, 2 },
180  {2, 4, 7, 5, 3 },
181  {3, 5, 7, 4, 2 },
182  {5, 3, 7, 2, 4 },
183  {4, 2, 7, 3, 5 },
184  {5, 4, 7, 2, 3 },
185  {3, 2, 7, 4, 5 },
186 };
187 
188 
189 int path_dir_y[][5] = {
190  { 2 ,6 ,5 },
191  { 5 ,6 ,2 },
192  { 3 ,6 ,4 },
193  { 4 ,6 ,3 },
194  { 0 ,6 ,7 },
195  { 7 ,6 ,0 },
196  { 1 ,2 ,6 ,6 ,5 },
197  { 2 ,6 ,6 ,5 ,1 },
198  { 5 ,6 ,6 ,2 ,1 },
199  { 1 ,5 ,6 ,6 ,2 },
200  { 1 ,3 ,6 ,6 ,4 },
201  { 3 ,6 ,6 ,4 ,1 },
202  { 4 ,6 ,6 ,3 ,1 },
203  { 1 ,4 ,6 ,6 ,3 },
204  { 1 ,0 ,6 ,6 ,7 },
205  { 0 ,6 ,6 ,7 ,1 },
206  { 7 ,6 ,6 ,0 ,1 },
207  { 1 ,7 ,6 ,6 ,0 },
208  { 5 ,5 ,6 ,2 ,2 },
209  { 2 ,2 ,6 ,5 ,5 },
210  { 4 ,4 ,6 ,3 ,3 },
211  { 3 ,3 ,6 ,4 ,4 },
212  { 7 ,7 ,6 ,0 ,0 },
213  { 0 ,0 ,6 ,7 ,7 },
214  { 2 ,3 ,6 ,5 ,4 },
215  { 4 ,5 ,6 ,3 ,2 },
216  { 2 ,4 ,6 ,5 ,3 },
217  { 3 ,5 ,6 ,4 ,2 },
218  { 5 ,3 ,6 ,2 ,4 },
219  { 4 ,2 ,6 ,3 ,5 },
220  { 5 ,4 ,6 ,2 ,3 },
221  { 3 ,2 ,6 ,4 ,5 },
222  { 2 ,0 ,6 ,5 ,7 },
223  { 7 ,5 ,6 ,0 ,2 },
224  { 2 ,7 ,6 ,5 ,0 },
225  { 0 ,5 ,6 ,7 ,2 },
226  { 5 ,0 ,6 ,2 ,7 },
227  { 7 ,2 ,6 ,0 ,5 },
228  { 5 ,7 ,6 ,2 ,0 },
229  { 0 ,2 ,6 ,7 ,5 },
230  { 3 ,0 ,6 ,4 ,7 },
231  { 7 ,4 ,6 ,0 ,3 },
232  { 3 ,7 ,6 ,4 ,0 },
233  { 0 ,4 ,6 ,7 ,3 },
234  { 4 ,0 ,6 ,3 ,7 },
235  { 7 ,3 ,6 ,0 ,4 },
236  { 4 ,7 ,6 ,3 ,0 },
237  { 0 ,3 ,6 ,7 ,4 }
238 };
239 
240 int path_dir_z[][5] = {
241  { 3 ,5 ,4 },
242  { 4 ,5 ,3 },
243  { 0 ,5 ,7 },
244  { 7 ,5 ,0 },
245  { 1 ,5 ,6 },
246  { 6 ,5 ,1 },
247  { 2 ,3 ,5 ,5 ,4 },
248  { 3 ,5 ,5 ,4 ,2 },
249  { 4 ,5 ,5 ,3 ,2 },
250  { 2 ,4 ,5 ,5 ,3 },
251  { 2 ,0 ,5 ,5 ,7 },
252  { 0 ,5 ,5 ,7 ,2 },
253  { 7 ,5 ,5 ,0 ,2 },
254  { 2 ,7 ,5 ,5 ,0 },
255  { 2 ,1 ,5 ,5 ,6 },
256  { 1 ,5 ,5 ,6 ,2 },
257  { 6 ,5 ,5 ,1 ,2 },
258  { 2 ,6 ,5 ,5 ,1 },
259  { 4 ,4 ,5 ,3 ,3 },
260  { 3 ,3 ,5 ,4 ,4 },
261  { 7 ,7 ,5 ,0 ,0 },
262  { 0 ,0 ,5 ,7 ,7 },
263  { 6 ,6 ,5 ,1 ,1 },
264  { 1 ,1 ,5 ,6 ,6 },
265  { 3 ,0 ,5 ,4 ,7 },
266  { 7 ,4 ,5 ,0 ,3 },
267  { 3 ,7 ,5 ,4 ,0 },
268  { 0 ,4 ,5 ,7 ,3 },
269  { 4 ,0 ,5 ,3 ,7 },
270  { 7 ,3 ,5 ,0 ,4 },
271  { 4 ,7 ,5 ,3 ,0 },
272  { 0 ,3 ,5 ,7 ,4 },
273  { 3 ,1 ,5 ,4 ,6 },
274  { 6 ,4 ,5 ,1 ,3 },
275  { 3 ,6 ,5 ,4 ,1 },
276  { 1 ,4 ,5 ,6 ,3 },
277  { 4 ,1 ,5 ,3 ,6 },
278  { 6 ,3 ,5 ,1 ,4 },
279  { 4 ,6 ,5 ,3 ,1 },
280  { 1 ,3 ,5 ,6 ,4 },
281  { 0 ,1 ,5 ,7 ,6 },
282  { 6 ,7 ,5 ,1 ,0 },
283  { 0 ,6 ,5 ,7 ,1 },
284  { 1 ,7 ,5 ,6 ,0 },
285  { 7 ,1 ,5 ,0 ,6 },
286  { 6 ,0 ,5 ,1 ,7 },
287  { 7 ,6 ,5 ,0 ,1 },
288  { 1 ,0 ,5 ,6 ,7 }
289 };
290 
291 int path_dir_t[][5] = {
292  { 0 ,4 ,7 },
293  { 7 ,4 ,0 },
294  { 1 ,4 ,6 },
295  { 6 ,4 ,1 },
296  { 2 ,4 ,5 },
297  { 5 ,4 ,2 },
298  { 3 ,0 ,4 ,4 ,7 },
299  { 0 ,4 ,4 ,7 ,3 },
300  { 7 ,4 ,4 ,0 ,3 },
301  { 3 ,7 ,4 ,4 ,0 },
302  { 3 ,1 ,4 ,4 ,6 },
303  { 1 ,4 ,4 ,6 ,3 },
304  { 6 ,4 ,4 ,1 ,3 },
305  { 3 ,6 ,4 ,4 ,1 },
306  { 3 ,2 ,4 ,4 ,5 },
307  { 2 ,4 ,4 ,5 ,3 },
308  { 5 ,4 ,4 ,2 ,3 },
309  { 3 ,5 ,4 ,4 ,2 },
310  { 7 ,7 ,4 ,0 ,0 },
311  { 0 ,0 ,4 ,7 ,7 },
312  { 6 ,6 ,4 ,1 ,1 },
313  { 1 ,1 ,4 ,6 ,6 },
314  { 5 ,5 ,4 ,2 ,2 },
315  { 2 ,2 ,4 ,5 ,5 },
316  { 0 ,1 ,4 ,7 ,6 },
317  { 6 ,7 ,4 ,1 ,0 },
318  { 0 ,6 ,4 ,7 ,1 },
319  { 1 ,7 ,4 ,6 ,0 },
320  { 7 ,1 ,4 ,0 ,6 },
321  { 6 ,0 ,4 ,1 ,7 },
322  { 7 ,6 ,4 ,0 ,1 },
323  { 1 ,0 ,4 ,6 ,7 },
324  { 0 ,2 ,4 ,7 ,5 },
325  { 5 ,7 ,4 ,2 ,0 },
326  { 0 ,5 ,4 ,7 ,2 },
327  { 2 ,7 ,4 ,5 ,0 },
328  { 7 ,2 ,4 ,0 ,5 },
329  { 5 ,0 ,4 ,2 ,7 },
330  { 7 ,5 ,4 ,0 ,2 },
331  { 2 ,0 ,4 ,5 ,7 },
332  { 1 ,2 ,4 ,6 ,5 },
333  { 5 ,6 ,4 ,2 ,1 },
334  { 1 ,5 ,4 ,6 ,2 },
335  { 2 ,6 ,4 ,5 ,1 },
336  { 6 ,2 ,4 ,1 ,5 },
337  { 5 ,1 ,4 ,2 ,6 },
338  { 6 ,5 ,4 ,1 ,2 },
339  { 2 ,1 ,4 ,5 ,6 }
340 };
341 
342 
343 
344 static void
346 {
347  int max_length = 6;
348 
349  initQuda(device);
350  setVerbosityQuda(QUDA_VERBOSE,"",stdout);
351 
352  qudaGaugeParam = newQudaGaugeParam();
353 
354  qudaGaugeParam.X[0] = xdim;
355  qudaGaugeParam.X[1] = ydim;
356  qudaGaugeParam.X[2] = zdim;
357  qudaGaugeParam.X[3] = tdim;
358 
359  setDims(qudaGaugeParam.X);
360 
361  qudaGaugeParam.anisotropy = 1.0;
362  qudaGaugeParam.cpu_prec = link_prec;
363  qudaGaugeParam.cuda_prec = link_prec;
364  qudaGaugeParam.cuda_prec_sloppy = link_prec;
365  qudaGaugeParam.reconstruct = link_recon;
366  qudaGaugeParam.reconstruct_sloppy = link_recon;
367  qudaGaugeParam.type = QUDA_SU3_LINKS; // in this context, just means these are site links
368 
369  qudaGaugeParam.gauge_order = gauge_order;
370  qudaGaugeParam.t_boundary = QUDA_PERIODIC_T;
371  qudaGaugeParam.gauge_fix = QUDA_GAUGE_FIXED_NO;
372  qudaGaugeParam.ga_pad = 0;
373  qudaGaugeParam.mom_ga_pad = 0;
374 
375  size_t gSize = qudaGaugeParam.cpu_prec;
376 
377  void* sitelink = nullptr;
378  void* sitelink_1d = nullptr;
379 
380  sitelink_1d = pinned_malloc(4*V*gaugeSiteSize*gSize);
381 
382  // this is a hack to have site link generated in 2d
383  // then copied to 1d array in "MILC" format
384  void* sitelink_2d[4];
385  for(int i=0;i<4;i++) sitelink_2d[i] = pinned_malloc(V*gaugeSiteSize*qudaGaugeParam.cpu_prec);
386 
387  // fills the gauge field with random numbers
388  createSiteLinkCPU(sitelink_2d, qudaGaugeParam.cpu_prec, 0);
389 
390  //copy the 2d sitelink to 1d milc format
391 
392  for(int dir = 0; dir < 4; dir++){
393  for(int i=0; i < V; i++){
394  char* src = ((char*)sitelink_2d[dir]) + i * gaugeSiteSize* qudaGaugeParam.cpu_prec;
395  char* dst = ((char*)sitelink_1d) + (4*i+dir)*gaugeSiteSize*qudaGaugeParam.cpu_prec ;
396  memcpy(dst, src, gaugeSiteSize*qudaGaugeParam.cpu_prec);
397  }
398  }
399  if (qudaGaugeParam.gauge_order == QUDA_MILC_GAUGE_ORDER){
400  sitelink = sitelink_1d;
401  }else if (qudaGaugeParam.gauge_order == QUDA_QDP_GAUGE_ORDER) {
402  sitelink = (void**)sitelink_2d;
403  } else {
404  errorQuda("Unsupported gauge order %d", qudaGaugeParam.gauge_order);
405  }
406 
407 #ifdef MULTI_GPU
408  void* sitelink_ex_2d[4];
409  void* sitelink_ex_1d;
410 
411  sitelink_ex_1d = pinned_malloc(4*V_ex*gaugeSiteSize*gSize);
412  for(int i=0;i < 4;i++) sitelink_ex_2d[i] = pinned_malloc(V_ex*gaugeSiteSize*gSize);
413 
414  int X1= Z[0];
415  int X2= Z[1];
416  int X3= Z[2];
417  int X4= Z[3];
418 
419  for(int i=0; i < V_ex; i++){
420  int sid = i;
421  int oddBit=0;
422  if(i >= Vh_ex){
423  sid = i - Vh_ex;
424  oddBit = 1;
425  }
426 
427  int za = sid/E1h;
428  int x1h = sid - za*E1h;
429  int zb = za/E2;
430  int x2 = za - zb*E2;
431  int x4 = zb/E3;
432  int x3 = zb - x4*E3;
433  int x1odd = (x2 + x3 + x4 + oddBit) & 1;
434  int x1 = 2*x1h + x1odd;
435 
436  if( x1< 2 || x1 >= X1 +2
437  || x2< 2 || x2 >= X2 +2
438  || x3< 2 || x3 >= X3 +2
439  || x4< 2 || x4 >= X4 +2){
440  continue;
441  }
442 
443  x1 = (x1 - 2 + X1) % X1;
444  x2 = (x2 - 2 + X2) % X2;
445  x3 = (x3 - 2 + X3) % X3;
446  x4 = (x4 - 2 + X4) % X4;
447 
448  int idx = (x4*X3*X2*X1+x3*X2*X1+x2*X1+x1)>>1;
449  if(oddBit){
450  idx += Vh;
451  }
452  for(int dir= 0; dir < 4; dir++){
453  char* src = (char*)sitelink_2d[dir];
454  char* dst = (char*)sitelink_ex_2d[dir];
455  memcpy(dst+i*gaugeSiteSize*gSize, src+idx*gaugeSiteSize*gSize, gaugeSiteSize*gSize);
456  }//dir
457  }//i
458 
459 
460  for(int dir = 0; dir < 4; dir++){
461  for(int i=0; i < V_ex; i++){
462  char* src = ((char*)sitelink_ex_2d[dir]) + i * gaugeSiteSize* qudaGaugeParam.cpu_prec;
463  char* dst = ((char*)sitelink_ex_1d) + (4*i+dir)*gaugeSiteSize*qudaGaugeParam.cpu_prec ;
464  memcpy(dst, src, gaugeSiteSize*qudaGaugeParam.cpu_prec);
465  }
466  }
467 
468 #endif
469 
470  void* mom = pinned_malloc(4*V*momSiteSize*gSize);
471  void* refmom = safe_malloc(4*V*momSiteSize*gSize);
472 
473  memset(mom, 0, 4*V*momSiteSize*gSize);
474  //initialize some data in cpuMom
475  createMomCPU(mom, qudaGaugeParam.cpu_prec);
476 
477 
478  double loop_coeff_d[sizeof(loop_coeff_f)/sizeof(float)];
479  for(unsigned int i=0;i < sizeof(loop_coeff_f)/sizeof(float); i++){
480  loop_coeff_d[i] = loop_coeff_f[i];
481  }
482 
483  void* loop_coeff;
484  if(qudaGaugeParam.cuda_prec == QUDA_SINGLE_PRECISION){
485  loop_coeff = (void*)&loop_coeff_f[0];
486  }else{
487  loop_coeff = loop_coeff_d;
488  }
489  double eb3 = 0.3;
490  int num_paths = sizeof(path_dir_x)/sizeof(path_dir_x[0]);
491 
492  int** input_path_buf[4];
493  for(int dir =0; dir < 4; dir++){
494  input_path_buf[dir] = (int**)safe_malloc(num_paths*sizeof(int*));
495  for(int i=0;i < num_paths;i++){
496  input_path_buf[dir][i] = (int*)safe_malloc(length[i]*sizeof(int));
497  if(dir == 0) memcpy(input_path_buf[dir][i], path_dir_x[i], length[i]*sizeof(int));
498  else if(dir ==1) memcpy(input_path_buf[dir][i], path_dir_y[i], length[i]*sizeof(int));
499  else if(dir ==2) memcpy(input_path_buf[dir][i], path_dir_z[i], length[i]*sizeof(int));
500  else if(dir ==3) memcpy(input_path_buf[dir][i], path_dir_t[i], length[i]*sizeof(int));
501  }
502  }
503 
504  if (getTuning() == QUDA_TUNE_YES) {
505  printfQuda("Tuning...\n");
506  memcpy(refmom, mom, 4*V*momSiteSize*gSize);
507  computeGaugeForceQuda(mom, sitelink, input_path_buf, length,
508  loop_coeff_d, num_paths, max_length, eb3,
509  &qudaGaugeParam);
510  printfQuda("...done\n");
511  }
512 
513  struct timeval t0, t1;
514  double total_time = 0.0;
515  /* Multiple execution to exclude warmup time in the first run*/
516  for (int i =0; i<niter; i++){
517  memcpy(mom, refmom, 4*V*momSiteSize*gSize); // restore initial momentum for correctness
518  gettimeofday(&t0, NULL);
519  computeGaugeForceQuda(mom, sitelink, input_path_buf, length,
520  loop_coeff_d, num_paths, max_length, eb3,
521  &qudaGaugeParam);
522  gettimeofday(&t1, NULL);
523  total_time += t1.tv_sec - t0.tv_sec + 0.000001*(t1.tv_usec - t0.tv_usec);
524  }
525 
526  //The number comes from CPU implementation in MILC, gauge_force_imp.c
527  int flops=153004;
528 
529  if (verify_results){
530 #ifdef MULTI_GPU
531  //last arg=0 means no optimization for communication, i.e. exchange data in all directions
532  //even they are not partitioned
533  int R[4] = {2, 2, 2, 2};
534  exchange_cpu_sitelink_ex(qudaGaugeParam.X, R, (void**)sitelink_ex_2d,
535  QUDA_QDP_GAUGE_ORDER, qudaGaugeParam.cpu_prec, 0, 4);
536  gauge_force_reference(refmom, eb3, sitelink_2d, sitelink_ex_2d, qudaGaugeParam.cpu_prec,
537  input_path_buf, length, loop_coeff, num_paths);
538 #else
539  gauge_force_reference(refmom, eb3, sitelink_2d, NULL, qudaGaugeParam.cpu_prec,
540  input_path_buf, length, loop_coeff, num_paths);
541 #endif
542 
543  int res;
544  res = compare_floats(mom, refmom, 4*V*momSiteSize, 1e-3, qudaGaugeParam.cpu_prec);
545 
546  strong_check_mom(mom, refmom, 4*V, qudaGaugeParam.cpu_prec);
547 
548  printfQuda("Test %s\n",(1 == res) ? "PASSED" : "FAILED");
549  }
550 
551  double perf = 1.0*niter*flops*V/(total_time*1e+9);
552  printfQuda("total time =%.2f ms\n", total_time*1e+3);
553  printfQuda("overall performance : %.2f GFLOPS\n",perf);
554 
555  for(int dir = 0; dir < 4; dir++){
556  for(int i=0;i < num_paths; i++) host_free(input_path_buf[dir][i]);
557  host_free(input_path_buf[dir]);
558  }
559 
560  host_free(sitelink_1d);
561  for(int dir=0;dir < 4;dir++) host_free(sitelink_2d[dir]);
562 
563 #ifdef MULTI_GPU
564  host_free(sitelink_ex_1d);
565  for(int dir=0; dir < 4; dir++) host_free(sitelink_ex_2d[dir]);
566 #endif
567 
568 
569  host_free(mom);
570  host_free(refmom);
571  endQuda();
572 }
573 
574 
575 static void
577 {
578  printfQuda("running the following test:\n");
579 
580  printfQuda("link_precision link_reconstruct space_dim(x/y/z) T_dimension Gauge_order niter\n");
581  printfQuda("%s %s %d/%d/%d %d %s %d\n",
584  xdim,ydim,zdim, tdim,
586  niter);
587  return ;
588 
589 }
590 
591 void
592 usage_extra(char** argv )
593 {
594  printfQuda("Extra options:\n");
595  printfQuda(" --gauge-order <qdp/milc> # Gauge storing order in CPU\n");
596  return ;
597 }
598 
599 int
600 main(int argc, char **argv)
601 {
602  int i;
603  for (i =1;i < argc; i++){
604 
605  if(process_command_line_option(argc, argv, &i) == 0){
606  continue;
607  }
608 
609  if( strcmp(argv[i], "--gauge-order") == 0){
610  if(i+1 >= argc){
611  usage(argv);
612  }
613 
614  if(strcmp(argv[i+1], "milc") == 0){
616  }else if(strcmp(argv[i+1], "qdp") == 0){
618  }else{
619  fprintf(stderr, "Error: unsupported gauge-field order\n");
620  exit(1);
621  }
622  i++;
623  continue;
624  }
625 
626  if( strcmp(argv[i], "--verify") == 0){
627  verify_results=1;
628  continue;
629  }
630 
631  fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
632  usage(argv);
633  }
634 
635 
636  link_prec = prec;
637 
638  initComms(argc, argv, gridsize_from_cmdline);
639 
641 
643 
644  finalizeComms();
645 }
static QudaGaugeParam qudaGaugeParam
static size_t gSize
QudaReconstructType reconstruct_sloppy
Definition: quda.h:53
double anisotropy
Definition: quda.h:38
int main(int argc, char **argv)
void usage_extra(char **argv)
int Z[4]
Definition: test_util.cpp:26
void setVerbosityQuda(QudaVerbosity verbosity, const char prefix[], FILE *outfile)
void endQuda(void)
float loop_coeff_f[]
#define pinned_malloc(size)
Definition: malloc_quda.h:67
enum QudaPrecision_s QudaPrecision
int ga_pad
Definition: quda.h:63
QudaGaugeFixed gauge_fix
Definition: quda.h:61
QudaLinkType type
Definition: quda.h:42
static int X2
Definition: face_gauge.cpp:42
#define errorQuda(...)
Definition: util_quda.h:121
#define host_free(ptr)
Definition: malloc_quda.h:71
void createMomCPU(void *mom, QudaPrecision precision)
Definition: test_util.cpp:1451
int V_ex
Definition: test_util.cpp:36
static void gauge_force_test(void)
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:2019
int path_dir_t[][5]
static int R[4]
void finalizeComms()
Definition: test_util.cpp:128
const char * get_gauge_order_str(QudaGaugeFieldOrder order)
Definition: misc.cpp:741
QudaGaugeFieldOrder gauge_order
Definition: quda.h:43
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision)
Definition: test_util.cpp:434
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:701
int length[]
void createSiteLinkCPU(void **link, QudaPrecision precision, int phase)
Definition: test_util.cpp:1227
QudaReconstructType link_recon
Definition: test_util.cpp:1605
void setDims(int *)
Definition: test_util.cpp:151
int device
Definition: test_util.cpp:1602
int niter
Definition: test_util.cpp:1629
void gauge_force_reference(void *refMom, double eb3, void **sitelink, void **sitelink_ex_2d, QudaPrecision prec, int ***path_dir, int *length, void *loop_coeff, int num_paths)
int computeGaugeForceQuda(void *mom, void *sitelink, int ***input_path_buf, int *path_length, double *loop_coeff, int num_paths, int max_length, double dt, QudaGaugeParam *qudaGaugeParam)
void initQuda(int device)
int ydim
Definition: test_util.cpp:1616
QudaPrecision prec
Definition: test_util.cpp:1608
int Vh_ex
Definition: test_util.cpp:36
static void display_test_info()
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:768
int path_dir_y[][5]
QudaPrecision cuda_prec_sloppy
Definition: quda.h:52
int gridsize_from_cmdline[]
Definition: test_util.cpp:49
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
bool verify_results
Definition: test_util.cpp:1643
QudaReconstructType reconstruct
Definition: quda.h:50
QudaPrecision cuda_prec
Definition: quda.h:49
int X[4]
Definition: quda.h:36
int xdim
Definition: test_util.cpp:1615
int strong_check_mom(void *momA, void *momB, int len, QudaPrecision prec)
Definition: test_util.cpp:1559
static int X3
Definition: face_gauge.cpp:42
#define safe_malloc(size)
Definition: malloc_quda.h:66
int V
Definition: test_util.cpp:27
static int X1
Definition: face_gauge.cpp:42
void * memset(void *s, int c, size_t n)
QudaGaugeFieldOrder gauge_order
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
int E2
Definition: test_util.cpp:34
int mom_ga_pad
Definition: quda.h:69
#define printfQuda(...)
Definition: util_quda.h:115
QudaTboundary t_boundary
Definition: quda.h:45
unsigned long long flops
Definition: blas_quda.cu:22
int zdim
Definition: test_util.cpp:1617
int tdim
void usage(char **argv)
Definition: test_util.cpp:1783
int path_dir_z[][5]
QudaPrecision link_prec
void initComms(int argc, char **argv, int *const commDims)
Definition: test_util.cpp:88
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
Definition: util_quda.cpp:52
int E3
Definition: test_util.cpp:34
int path_dir_x[][5]
QudaPrecision cpu_prec
Definition: quda.h:47
void exchange_cpu_sitelink_ex(int *X, int *R, void **sitelink, QudaGaugeFieldOrder cpu_order, QudaPrecision gPrecision, int optflag, int geometry)
Definition: face_gauge.cpp:644
int E1h
Definition: test_util.cpp:34
static int X4
Definition: face_gauge.cpp:42
#define gaugeSiteSize
Definition: face_gauge.cpp:34
QudaGaugeParam newQudaGaugeParam(void)
#define momSiteSize
Definition: test_util.h:10
int Vh
Definition: test_util.cpp:28