QUDA  v1.1.0
A library for QCD on GPUs
llfat_utils.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 
5 #include <quda.h>
6 #include <gauge_field.h>
7 #include <host_utils.h>
8 #include <llfat_utils.h>
9 #include <unitarization_links.h>
10 #include <misc.h>
11 #include <string.h>
12 
13 #include <quda_internal.h>
14 #include <complex>
15 
16 #define XUP 0
17 #define YUP 1
18 #define ZUP 2
19 #define TUP 3
20 
21 using namespace quda;
22 
23 static int Vs[4];
24 static int Vsh[4];
25 
26 template <typename su3_matrix, typename Real>
27 void llfat_compute_gen_staple_field(su3_matrix *staple, int mu, int nu, su3_matrix *mulink, su3_matrix **sitelink,
28  void **fatlink, Real coef, int use_staple)
29 {
30  su3_matrix tmat1, tmat2;
31  int i;
32  su3_matrix *fat1;
33 
34  /* Upper staple */
35  /* Computes the staple :
36  * mu (B)
37  * +-------+
38  * nu | |
39  * (A) | |(C)
40  * X X
41  *
42  * Where the mu link can be any su3_matrix. The result is saved in staple.
43  * if staple==NULL then the result is not saved.
44  * It also adds the computed staple to the fatlink[mu] with weight coef.
45  */
46 
47  int dx[4];
48 
49  /* upper staple */
50 
51  for (i = 0; i < V; i++) {
52 
53  fat1 = ((su3_matrix *)fatlink[mu]) + i;
54  su3_matrix *A = sitelink[nu] + i;
55 
56  memset(dx, 0, sizeof(dx));
57  dx[nu] = 1;
58  int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]);
59  su3_matrix *B;
60  if (use_staple) {
61  B = mulink + nbr_idx;
62  } else {
63  B = mulink + nbr_idx;
64  }
65 
66  memset(dx, 0, sizeof(dx));
67  dx[mu] = 1;
68  nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]);
69  su3_matrix *C = sitelink[nu] + nbr_idx;
70 
71  llfat_mult_su3_nn(A, B, &tmat1);
72 
73  if (staple != NULL) { /* Save the staple */
74  llfat_mult_su3_na(&tmat1, C, &staple[i]);
75  } else { /* No need to save the staple. Add it to the fatlinks */
76  llfat_mult_su3_na(&tmat1, C, &tmat2);
77  llfat_scalar_mult_add_su3_matrix(fat1, &tmat2, coef, fat1);
78  }
79  }
80  /***************lower staple****************
81  *
82  * X X
83  * nu | |
84  * (A) | |(C)
85  * +-------+
86  * mu (B)
87  *
88  *********************************************/
89 
90  for (i = 0; i < V; i++) {
91 
92  fat1 = ((su3_matrix *)fatlink[mu]) + i;
93  memset(dx, 0, sizeof(dx));
94  dx[nu] = -1;
95  int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]);
96  if (nbr_idx >= V || nbr_idx < 0) {
97  fprintf(stderr, "ERROR: invliad nbr_idx(%d), line=%d\n", nbr_idx, __LINE__);
98  exit(1);
99  }
100  su3_matrix *A = sitelink[nu] + nbr_idx;
101 
102  su3_matrix *B;
103  if (use_staple) {
104  B = mulink + nbr_idx;
105  } else {
106  B = mulink + nbr_idx;
107  }
108 
109  memset(dx, 0, sizeof(dx));
110  dx[mu] = 1;
111  nbr_idx = neighborIndexFullLattice(nbr_idx, dx[3], dx[2], dx[1], dx[0]);
112  su3_matrix *C = sitelink[nu] + nbr_idx;
113 
114  llfat_mult_su3_an(A, B, &tmat1);
115  llfat_mult_su3_nn(&tmat1, C, &tmat2);
116 
117  if (staple != NULL) { /* Save the staple */
118  llfat_add_su3_matrix(&staple[i], &tmat2, &staple[i]);
119  llfat_scalar_mult_add_su3_matrix(fat1, &staple[i], coef, fat1);
120 
121  } else { /* No need to save the staple. Add it to the fatlinks */
122  llfat_scalar_mult_add_su3_matrix(fat1, &tmat2, coef, fat1);
123  }
124  }
125 } /* compute_gen_staple_site */
126 
127 /* Optimized fattening code for the Asq and Asqtad actions.
128  * I assume that:
129  * path 0 is the one link
130  * path 2 the 3-staple
131  * path 3 the 5-staple
132  * path 4 the 7-staple
133  * path 5 the Lapage term.
134  * Path 1 is the Naik term
135  *
136  */
137 template <typename su3_matrix, typename Float>
138 void llfat_cpu(void **fatlink, su3_matrix **sitelink, Float *act_path_coeff)
139 {
140  su3_matrix *staple = (su3_matrix *)malloc(V * sizeof(su3_matrix));
141  if (staple == NULL) {
142  fprintf(stderr, "Error: malloc failed for staple in function %s\n", __FUNCTION__);
143  exit(1);
144  }
145 
146  su3_matrix *tempmat1 = (su3_matrix *)malloc(V * sizeof(su3_matrix));
147  if (tempmat1 == NULL) {
148  fprintf(stderr, "ERROR: malloc failed for tempmat1 in function %s\n", __FUNCTION__);
149  exit(1);
150  }
151 
152  // to fix up the Lepage term, included by a trick below
153  Float one_link = (act_path_coeff[0] - 6.0 * act_path_coeff[5]);
154 
155  for (int dir = XUP; dir <= TUP; dir++) {
156 
157  // Intialize fat links with c_1*U_\mu(x)
158  for (int i = 0; i < V; i++) {
159  su3_matrix *fat1 = ((su3_matrix *)fatlink[dir]) + i;
160  llfat_scalar_mult_su3_matrix(sitelink[dir] + i, one_link, fat1);
161  }
162  }
163 
164  for (int dir = XUP; dir <= TUP; dir++) {
165  for (int nu = XUP; nu <= TUP; nu++) {
166  if (nu != dir) {
167  llfat_compute_gen_staple_field(staple, dir, nu, sitelink[dir], sitelink, fatlink, act_path_coeff[2], 0);
168 
169  // The Lepage term
170  // Note this also involves modifying c_1 (above)
171 
172  llfat_compute_gen_staple_field((su3_matrix *)NULL, dir, nu, staple, sitelink, fatlink, act_path_coeff[5], 1);
173 
174  for (int rho = XUP; rho <= TUP; rho++) {
175  if ((rho != dir) && (rho != nu)) {
176  llfat_compute_gen_staple_field(tempmat1, dir, rho, staple, sitelink, fatlink, act_path_coeff[3], 1);
177 
178  for (int sig = XUP; sig <= TUP; sig++) {
179  if ((sig != dir) && (sig != nu) && (sig != rho)) {
180  llfat_compute_gen_staple_field((su3_matrix *)NULL, dir, sig, tempmat1, sitelink, fatlink,
181  act_path_coeff[4], 1);
182  }
183  } // sig
184  }
185  } // rho
186  }
187  } // nu
188  } // dir
189 
190  free(staple);
191  free(tempmat1);
192 }
193 
194 void llfat_reference(void **fatlink, void **sitelink, QudaPrecision prec, void *act_path_coeff)
195 {
196  Vs[0] = Vs_x;
197  Vs[1] = Vs_y;
198  Vs[2] = Vs_z;
199  Vs[3] = Vs_t;
200 
201  Vsh[0] = Vsh_x;
202  Vsh[1] = Vsh_y;
203  Vsh[2] = Vsh_z;
204  Vsh[3] = Vsh_t;
205 
206  switch (prec) {
208  llfat_cpu((void **)fatlink, (su3_matrix<double> **)sitelink, (double *)act_path_coeff);
209  break;
210 
212  llfat_cpu((void **)fatlink, (su3_matrix<float> **)sitelink, (float *)act_path_coeff);
213  break;
214 
215  default:
216  fprintf(stderr, "ERROR: unsupported precision(%d)\n", prec);
217  exit(1);
218  break;
219  }
220  return;
221 }
222 
223 #ifdef MULTI_GPU
224 
225 template <typename su3_matrix, typename Real>
226 void llfat_compute_gen_staple_field_mg(su3_matrix *staple, int mu, int nu, su3_matrix *mulink,
227  su3_matrix **ghost_mulink, su3_matrix **sitelink, su3_matrix **ghost_sitelink,
228  su3_matrix **ghost_sitelink_diag, void **fatlink, Real coef, int use_staple)
229 {
230  su3_matrix tmat1, tmat2;
231  int i;
232  su3_matrix *fat1;
233 
234  int X1 = Z[0];
235  int X2 = Z[1];
236  int X3 = Z[2];
237  // int X4 = Z[3];
238  int X1h = X1 / 2;
239 
240  int X2X1 = X1 * X2;
241  int X3X2 = X3 * X2;
242  int X3X1 = X3 * X1;
243 
244  /* Upper staple */
245  /* Computes the staple :
246  * mu (B)
247  * +-------+
248  * nu | |
249  * (A) | |(C)
250  * X X
251  *
252  * Where the mu link can be any su3_matrix. The result is saved in staple.
253  * if staple==NULL then the result is not saved.
254  * It also adds the computed staple to the fatlink[mu] with weight coef.
255  */
256 
257  int dx[4];
258 
259  // upper staple
260 
261  for (i = 0; i < V; i++) {
262 
263  int half_index = i;
264  int oddBit = 0;
265  if (i >= Vh) {
266  oddBit = 1;
267  half_index = i - Vh;
268  }
269  // int x4 = x4_from_full_index(i);
270 
271  int sid = half_index;
272  int za = sid / X1h;
273  int x1h = sid - za * X1h;
274  int zb = za / X2;
275  int x2 = za - zb * X2;
276  int x4 = zb / X3;
277  int x3 = zb - x4 * X3;
278  int x1odd = (x2 + x3 + x4 + oddBit) & 1;
279  int x1 = 2 * x1h + x1odd;
280  int x[4] = {x1, x2, x3, x4};
281  int space_con[4] = {(x4 * X3X2 + x3 * X2 + x2) / 2, (x4 * X3X1 + x3 * X1 + x1) / 2, (x4 * X2X1 + x2 * X1 + x1) / 2,
282  (x3 * X2X1 + x2 * X1 + x1) / 2};
283 
284  fat1 = ((su3_matrix *)fatlink[mu]) + i;
285  su3_matrix *A = sitelink[nu] + i;
286 
287  memset(dx, 0, sizeof(dx));
288  dx[nu] = 1;
289  int nbr_idx;
290 
291  su3_matrix *B;
292  if (use_staple) {
293  if (x[nu] + dx[nu] >= Z[nu]) {
294  B = ghost_mulink[nu] + Vs[nu] + (1 - oddBit) * Vsh[nu] + space_con[nu];
295  } else {
296  nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]);
297  B = mulink + nbr_idx;
298  }
299  } else {
300  if (x[nu] + dx[nu] >= Z[nu]) { // out of boundary, use ghost data
301  B = ghost_sitelink[nu] + 4 * Vs[nu] + mu * Vs[nu] + (1 - oddBit) * Vsh[nu] + space_con[nu];
302  } else {
303  nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]);
304  B = sitelink[mu] + nbr_idx;
305  }
306  }
307 
308  // we could be in the ghost link area if mu is T and we are at high T boundary
309  su3_matrix *C;
310  memset(dx, 0, sizeof(dx));
311  dx[mu] = 1;
312  if (x[mu] + dx[mu] >= Z[mu]) { // out of boundary, use ghost data
313  C = ghost_sitelink[mu] + 4 * Vs[mu] + nu * Vs[mu] + (1 - oddBit) * Vsh[mu] + space_con[mu];
314  } else {
315  nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]);
316  C = sitelink[nu] + nbr_idx;
317  }
318 
319  llfat_mult_su3_nn(A, B, &tmat1);
320 
321  if (staple != NULL) { /* Save the staple */
322  llfat_mult_su3_na(&tmat1, C, &staple[i]);
323  } else { /* No need to save the staple. Add it to the fatlinks */
324  llfat_mult_su3_na(&tmat1, C, &tmat2);
325  llfat_scalar_mult_add_su3_matrix(fat1, &tmat2, coef, fat1);
326  }
327  }
328  /***************lower staple****************
329  *
330  * X X
331  * nu | |
332  * (A) | |(C)
333  * +-------+
334  * mu (B)
335  *
336  *********************************************/
337 
338  for (i = 0; i < V; i++) {
339 
340  int half_index = i;
341  int oddBit = 0;
342  if (i >= Vh) {
343  oddBit = 1;
344  half_index = i - Vh;
345  }
346 
347  int sid = half_index;
348  int za = sid / X1h;
349  int x1h = sid - za * X1h;
350  int zb = za / X2;
351  int x2 = za - zb * X2;
352  int x4 = zb / X3;
353  int x3 = zb - x4 * X3;
354  int x1odd = (x2 + x3 + x4 + oddBit) & 1;
355  int x1 = 2 * x1h + x1odd;
356  int x[4] = {x1, x2, x3, x4};
357  int space_con[4] = {(x4 * X3X2 + x3 * X2 + x2) / 2, (x4 * X3X1 + x3 * X1 + x1) / 2, (x4 * X2X1 + x2 * X1 + x1) / 2,
358  (x3 * X2X1 + x2 * X1 + x1) / 2};
359 
360  // int x4 = x4_from_full_index(i);
361 
362  fat1 = ((su3_matrix *)fatlink[mu]) + i;
363 
364  // we could be in the ghost link area if nu is T and we are at low T boundary
365  su3_matrix *A;
366  memset(dx, 0, sizeof(dx));
367  dx[nu] = -1;
368 
369  int nbr_idx;
370  if (x[nu] + dx[nu] < 0) { // out of boundary, use ghost data
371  A = ghost_sitelink[nu] + nu * Vs[nu] + (1 - oddBit) * Vsh[nu] + space_con[nu];
372  } else {
373  nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]);
374  A = sitelink[nu] + nbr_idx;
375  }
376 
377  su3_matrix *B;
378  if (use_staple) {
379  nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]);
380  if (x[nu] + dx[nu] < 0) {
381  B = ghost_mulink[nu] + (1 - oddBit) * Vsh[nu] + space_con[nu];
382  } else {
383  B = mulink + nbr_idx;
384  }
385  } else {
386  if (x[nu] + dx[nu] < 0) { // out of boundary, use ghost data
387  B = ghost_sitelink[nu] + mu * Vs[nu] + (1 - oddBit) * Vsh[nu] + space_con[nu];
388  } else {
389  nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]);
390  B = sitelink[mu] + nbr_idx;
391  }
392  }
393 
394  // we could be in the ghost link area if nu is T and we are at low T boundary
395  // or mu is T and we are on high T boundary
396  su3_matrix *C;
397  memset(dx, 0, sizeof(dx));
398  dx[nu] = -1;
399  dx[mu] = 1;
400  nbr_idx = neighborIndexFullLattice_mg(i, dx[3], dx[2], dx[1], dx[0]);
401 
402  // space con must be recomputed because we have coodinates change in 2 directions
403  int new_x1, new_x2, new_x3, new_x4;
404  new_x1 = (x[0] + dx[0] + Z[0]) % Z[0];
405  new_x2 = (x[1] + dx[1] + Z[1]) % Z[1];
406  new_x3 = (x[2] + dx[2] + Z[2]) % Z[2];
407  new_x4 = (x[3] + dx[3] + Z[3]) % Z[3];
408  int new_x[4] = {new_x1, new_x2, new_x3, new_x4};
409  space_con[0] = (new_x4 * X3X2 + new_x3 * X2 + new_x2) / 2;
410  space_con[1] = (new_x4 * X3X1 + new_x3 * X1 + new_x1) / 2;
411  space_con[2] = (new_x4 * X2X1 + new_x2 * X1 + new_x1) / 2;
412  space_con[3] = (new_x3 * X2X1 + new_x2 * X1 + new_x1) / 2;
413 
414  if ((x[nu] + dx[nu]) < 0 && (x[mu] + dx[mu] >= Z[mu])) {
415  // find the other 2 directions, dir1, dir2
416  // with dir2 the slowest changing direction
417  int dir1, dir2; // other two dimensions
418  for (dir1 = 0; dir1 < 4; dir1++) {
419  if (dir1 != nu && dir1 != mu) { break; }
420  }
421  for (dir2 = 0; dir2 < 4; dir2++) {
422  if (dir2 != nu && dir2 != mu && dir2 != dir1) { break; }
423  }
424  C = ghost_sitelink_diag[nu * 4 + mu] + oddBit * Z[dir1] * Z[dir2] / 2 + (new_x[dir2] * Z[dir1] + new_x[dir1]) / 2;
425  } else if (x[nu] + dx[nu] < 0) {
426  C = ghost_sitelink[nu] + nu * Vs[nu] + oddBit * Vsh[nu] + space_con[nu];
427  } else if (x[mu] + dx[mu] >= Z[mu]) {
428  C = ghost_sitelink[mu] + 4 * Vs[mu] + nu * Vs[mu] + oddBit * Vsh[mu] + space_con[mu];
429  } else {
430  C = sitelink[nu] + nbr_idx;
431  }
432  llfat_mult_su3_an(A, B, &tmat1);
433  llfat_mult_su3_nn(&tmat1, C, &tmat2);
434 
435  if (staple != NULL) { /* Save the staple */
436  llfat_add_su3_matrix(&staple[i], &tmat2, &staple[i]);
437  llfat_scalar_mult_add_su3_matrix(fat1, &staple[i], coef, fat1);
438 
439  } else { /* No need to save the staple. Add it to the fatlinks */
440  llfat_scalar_mult_add_su3_matrix(fat1, &tmat2, coef, fat1);
441  }
442  }
443 
444 } // compute_gen_staple_site
445 
446 template <typename su3_matrix, typename Float>
447 void llfat_cpu_mg(void **fatlink, su3_matrix **sitelink, su3_matrix **ghost_sitelink, su3_matrix **ghost_sitelink_diag,
448  Float *act_path_coeff)
449 {
451  if (sizeof(Float) == 4) {
453  } else {
455  }
456 
457  su3_matrix *staple = (su3_matrix *)malloc(V * sizeof(su3_matrix));
458  if (staple == NULL) {
459  fprintf(stderr, "Error: malloc failed for staple in function %s\n", __FUNCTION__);
460  exit(1);
461  }
462 
463  su3_matrix *ghost_staple[4];
464  su3_matrix *ghost_staple1[4];
465 
466  for (int i = 0; i < 4; i++) {
467  ghost_staple[i] = (su3_matrix *)malloc(2 * Vs[i] * sizeof(su3_matrix));
468  if (ghost_staple[i] == NULL) {
469  fprintf(stderr, "Error: malloc failed for ghost staple in function %s\n", __FUNCTION__);
470  exit(1);
471  }
472 
473  ghost_staple1[i] = (su3_matrix *)malloc(2 * Vs[i] * sizeof(su3_matrix));
474  if (ghost_staple1[i] == NULL) {
475  fprintf(stderr, "Error: malloc failed for ghost staple1 in function %s\n", __FUNCTION__);
476  exit(1);
477  }
478  }
479 
480  su3_matrix *tempmat1 = (su3_matrix *)malloc(V * sizeof(su3_matrix));
481  if (tempmat1 == NULL) {
482  fprintf(stderr, "ERROR: malloc failed for tempmat1 in function %s\n", __FUNCTION__);
483  exit(1);
484  }
485 
486  // to fix up the Lepage term, included by a trick below
487  Float one_link = (act_path_coeff[0] - 6.0 * act_path_coeff[5]);
488 
489  for (int dir = XUP; dir <= TUP; dir++) {
490 
491  // Intialize fat links with c_1*U_\mu(x)
492  for (int i = 0; i < V; i++) {
493  su3_matrix *fat1 = ((su3_matrix *)fatlink[dir]) + i;
494  llfat_scalar_mult_su3_matrix(sitelink[dir] + i, one_link, fat1);
495  }
496  }
497 
498  for (int dir = XUP; dir <= TUP; dir++) {
499  for (int nu = XUP; nu <= TUP; nu++) {
500  if (nu != dir) {
501  llfat_compute_gen_staple_field_mg(staple, dir, nu, sitelink[dir], (su3_matrix **)NULL, sitelink, ghost_sitelink,
502  ghost_sitelink_diag, fatlink, act_path_coeff[2], 0);
503  // The Lepage term */
504  // Note this also involves modifying c_1 (above)
505 
506  exchange_cpu_staple(Z, staple, (void **)ghost_staple, prec);
507 
508  llfat_compute_gen_staple_field_mg((su3_matrix *)NULL, dir, nu, staple, ghost_staple, sitelink, ghost_sitelink,
509  ghost_sitelink_diag, fatlink, act_path_coeff[5], 1);
510 
511  for (int rho = XUP; rho <= TUP; rho++) {
512  if ((rho != dir) && (rho != nu)) {
513  llfat_compute_gen_staple_field_mg(tempmat1, dir, rho, staple, ghost_staple, sitelink, ghost_sitelink,
514  ghost_sitelink_diag, fatlink, act_path_coeff[3], 1);
515 
516  exchange_cpu_staple(Z, tempmat1, (void **)ghost_staple1, prec);
517 
518  for (int sig = XUP; sig <= TUP; sig++) {
519  if ((sig != dir) && (sig != nu) && (sig != rho)) {
520 
521  llfat_compute_gen_staple_field_mg((su3_matrix *)NULL, dir, sig, tempmat1, ghost_staple1, sitelink,
522  ghost_sitelink, ghost_sitelink_diag, fatlink, act_path_coeff[4], 1);
523  // FIXME
524  // return;
525  }
526  } // sig
527  }
528  } // rho
529  }
530  } // nu
531  } // dir
532 
533  free(staple);
534  for (int i = 0; i < 4; i++) {
535  free(ghost_staple[i]);
536  free(ghost_staple1[i]);
537  }
538  free(tempmat1);
539 }
540 
541 void llfat_reference_mg(void **fatlink, void **sitelink, void **ghost_sitelink, void **ghost_sitelink_diag,
542  QudaPrecision prec, void *act_path_coeff)
543 {
544  Vs[0] = Vs_x;
545  Vs[1] = Vs_y;
546  Vs[2] = Vs_z;
547  Vs[3] = Vs_t;
548 
549  Vsh[0] = Vsh_x;
550  Vsh[1] = Vsh_y;
551  Vsh[2] = Vsh_z;
552  Vsh[3] = Vsh_t;
553 
554  switch (prec) {
555  case QUDA_DOUBLE_PRECISION: {
556  llfat_cpu_mg((void **)fatlink, (su3_matrix<double> **)sitelink, (su3_matrix<double> **)ghost_sitelink,
557  (su3_matrix<double> **)ghost_sitelink_diag, (double *)act_path_coeff);
558  break;
559  }
560  case QUDA_SINGLE_PRECISION: {
561  llfat_cpu_mg((void **)fatlink, (su3_matrix<float> **)sitelink, (su3_matrix<float> **)ghost_sitelink,
562  (su3_matrix<float> **)ghost_sitelink_diag, (float *)act_path_coeff);
563  break;
564  }
565  default:
566  fprintf(stderr, "ERROR: unsupported precision(%d)\n", prec);
567  exit(1);
568  break;
569  }
570  return;
571 }
572 #endif
double mu
QudaPrecision prec
int Vh
Definition: host_utils.cpp:38
int Z[4]
Definition: host_utils.cpp:36
int V
Definition: host_utils.cpp:37
void * memset(void *s, int c, size_t n)
enum QudaPrecision_s QudaPrecision
@ QUDA_DOUBLE_PRECISION
Definition: enum_quda.h:65
@ QUDA_SINGLE_PRECISION
Definition: enum_quda.h:64
void exchange_cpu_staple(int *X, void *staple, void **ghost_staple, QudaPrecision gPrecision)
Definition: face_gauge.cpp:976
int neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
Definition: host_utils.cpp:490
int Vs_y
Definition: host_utils.cpp:39
int Vsh_y
Definition: host_utils.cpp:40
int Vsh_x
Definition: host_utils.cpp:40
int neighborIndexFullLattice_mg(int i, int dx4, int dx3, int dx2, int dx1)
Definition: host_utils.cpp:528
int Vs_x
Definition: host_utils.cpp:39
int Vs_t
Definition: host_utils.cpp:39
int Vs_z
Definition: host_utils.cpp:39
int Vsh_z
Definition: host_utils.cpp:40
int Vsh_t
Definition: host_utils.cpp:40
void llfat_compute_gen_staple_field(su3_matrix *staple, int mu, int nu, su3_matrix *mulink, su3_matrix **sitelink, void **fatlink, Real coef, int use_staple)
Definition: llfat_utils.cpp:27
#define XUP
Definition: llfat_utils.cpp:16
#define TUP
Definition: llfat_utils.cpp:19
void llfat_reference(void **fatlink, void **sitelink, QudaPrecision prec, void *act_path_coeff)
void llfat_cpu(void **fatlink, su3_matrix **sitelink, Float *act_path_coeff)
void llfat_mult_su3_nn(su3_matrix *a, su3_matrix *b, su3_matrix *c)
Definition: llfat_utils.h:41
void llfat_scalar_mult_su3_matrix(su3_matrix *a, Real s, su3_matrix *b)
Definition: llfat_utils.h:14
void llfat_mult_su3_an(su3_matrix *a, su3_matrix *b, su3_matrix *c)
Definition: llfat_utils.h:55
void llfat_scalar_mult_add_su3_matrix(su3_matrix *a, su3_matrix *b, Real s, su3_matrix *c)
Definition: llfat_utils.h:21
void llfat_add_su3_matrix(su3_matrix *a, su3_matrix *b, su3_matrix *c)
Definition: llfat_utils.h:69
void llfat_reference_mg(void **fatlink, void **sitelink, void **ghost_sitelink, void **ghost_sitelink_diag, QudaPrecision prec, void *act_path_coeff)
void llfat_mult_su3_na(su3_matrix *a, su3_matrix *b, su3_matrix *c)
Definition: llfat_utils.h:27
FloatingPoint< float > Float
Main header file for the QUDA library.