QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
staggered_gauge_utils.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <string.h>
5 
6 #include <test_util.h>
7 #include <quda_internal.h>
8 #include <quda.h>
9 #include <util_quda.h>
10 #include <staggered_gauge_utils.h>
11 #include <llfat_reference.h>
12 #include <unitarization_links.h>
13 #include "misc.h"
14 
15 extern double tadpole_factor;
16 // Unitarization coefficients
17 static double unitarize_eps = 1e-6;
18 static bool reunit_allow_svd = true;
19 static bool reunit_svd_only = false;
20 static double svd_rel_error = 1e-4;
21 static double svd_abs_error = 1e-4;
22 static double max_allowed_error = 1e-11;
23 
24 // Wrap everything for the GPU construction of fat/long links here
25 void computeHISQLinksGPU(void **qdp_fatlink, void **qdp_longlink, void **qdp_fatlink_eps, void **qdp_longlink_eps,
26  void **qdp_inlink, QudaGaugeParam &gauge_param_in, double **act_path_coeffs, double eps_naik,
27  size_t gSize, int n_naiks)
28 {
29 
30  // since a lot of intermediaries can be general matrices, override the recon in `gauge_param_in`
31  auto gauge_param = gauge_param_in;
33  gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_NO; // probably irrelevant
34 
35  // inlink in different format
36  void *milc_inlink = pinned_malloc(4 * V * gaugeSiteSize * gSize);
38 
39  // Paths for step 1:
40  void *milc_vlink = pinned_malloc(4 * V * gaugeSiteSize * gSize); // V links
41  void *milc_wlink = pinned_malloc(4 * V * gaugeSiteSize * gSize); // W links
42 
43  // Paths for step 2:
44  void *milc_fatlink = pinned_malloc(4 * V * gaugeSiteSize * gSize); // final fat ("X") links
45  void *milc_longlink = pinned_malloc(4 * V * gaugeSiteSize * gSize); // final long links
46 
47  // Place to accumulate Naiks, step 3:
48  void *milc_fatlink_eps = nullptr;
49  void *milc_longlink_eps = nullptr;
50  if (n_naiks > 1) {
51  milc_fatlink_eps = pinned_malloc(4 * V * gaugeSiteSize * gSize); // epsilon fat links
52  milc_longlink_eps = pinned_malloc(4 * V * gaugeSiteSize * gSize); // epsilon long naiks
53  }
54 
55  // Create V links (fat7 links) and W links (unitarized V links), 1st path table set
56  computeKSLinkQuda(milc_vlink, nullptr, milc_wlink, milc_inlink, act_path_coeffs[0], &gauge_param);
57 
58  if (n_naiks > 1) {
59  // Create Naiks, 3rd path table set
60  computeKSLinkQuda(milc_fatlink, milc_longlink, nullptr, milc_wlink, act_path_coeffs[2], &gauge_param);
61 
62  // Rescale+copy Naiks into Naik field
63  cpu_axy(gauge_param.cpu_prec, eps_naik, milc_fatlink, milc_fatlink_eps, V * 4 * gaugeSiteSize);
64  cpu_axy(gauge_param.cpu_prec, eps_naik, milc_longlink, milc_longlink_eps, V * 4 * gaugeSiteSize);
65  } else {
66  memset(milc_fatlink, 0, V * 4 * gaugeSiteSize * gSize);
67  memset(milc_longlink, 0, V * 4 * gaugeSiteSize * gSize);
68  }
69 
70  // Create X and long links, 2nd path table set
71  computeKSLinkQuda(milc_fatlink, milc_longlink, nullptr, milc_wlink, act_path_coeffs[1], &gauge_param);
72 
73  if (n_naiks > 1) {
74  // Add into Naik field
75  cpu_xpy(gauge_param.cpu_prec, milc_fatlink, milc_fatlink_eps, V * 4 * gaugeSiteSize);
76  cpu_xpy(gauge_param.cpu_prec, milc_longlink, milc_longlink_eps, V * 4 * gaugeSiteSize);
77  }
78 
79  // Copy back
81  reorderMILCtoQDP(qdp_longlink, milc_longlink, V, gaugeSiteSize, gauge_param.cpu_prec, gauge_param.cpu_prec);
82 
83  if (n_naiks > 1) {
84  // Add into Naik field
85  reorderMILCtoQDP(qdp_fatlink_eps, milc_fatlink_eps, V, gaugeSiteSize, gauge_param.cpu_prec, gauge_param.cpu_prec);
86  reorderMILCtoQDP(qdp_longlink_eps, milc_longlink_eps, V, gaugeSiteSize, gauge_param.cpu_prec, gauge_param.cpu_prec);
87  }
88 
89  // Clean up GPU compute links
90  host_free(milc_inlink);
91  host_free(milc_vlink);
92  host_free(milc_wlink);
93  host_free(milc_fatlink);
94  host_free(milc_longlink);
95 
96  if (n_naiks > 1) {
97  host_free(milc_fatlink_eps);
98  host_free(milc_longlink_eps);
99  }
100 }
101 
102 void setActionPaths(double **act_paths)
103 {
104 
106  // Set path coefficients //
108 
109  // Reference: "generic_ks/imp_actions/hisq/hisq_action.h",
110  // in QHMC: https://github.com/jcosborn/qhmc/blob/master/lib/qopqdp/hisq.c
111 
112  double u1 = 1.0 / tadpole_factor;
113  double u2 = u1 * u1;
114  double u4 = u2 * u2;
115  double u6 = u4 * u2;
116 
117  // First path: create V, W links
118  double act_path_coeff_1[6] = {
119  (1.0 / 8.0), // one link
120  u2 * (0.0), // Naik
121  u2 * (-1.0 / 8.0) * 0.5, // simple staple
122  u4 * (1.0 / 8.0) * 0.25 * 0.5, // displace link in two directions
123  u6 * (-1.0 / 8.0) * 0.125 * (1.0 / 6.0), // displace link in three directions
124  u4 * (0.0) // Lepage term
125  };
126 
127  // Second path: create X, long links
128  double act_path_coeff_2[6] = {
129  ((1.0 / 8.0) + (2.0 * 6.0 / 16.0) + (1.0 / 8.0)), // one link
130  // One link is 1/8 as in fat7 + 2*3/8 for Lepage + 1/8 for Naik
131  (-1.0 / 24.0), // Naik
132  (-1.0 / 8.0) * 0.5, // simple staple
133  (1.0 / 8.0) * 0.25 * 0.5, // displace link in two directions
134  (-1.0 / 8.0) * 0.125 * (1.0 / 6.0), // displace link in three directions
135  (-2.0 / 16.0) // Lepage term, correct O(a^2) 2x ASQTAD
136  };
137 
138  // Paths for epsilon corrections. Not used if n_naiks = 1.
139  double act_path_coeff_3[6] = {
140  (1.0 / 8.0), // one link b/c of Naik
141  (-1.0 / 24.0), // Naik
142  0.0, // simple staple
143  0.0, // displace link in two directions
144  0.0, // displace link in three directions
145  0.0 // Lepage term
146  };
147 
148  for (int i = 0; i < 6; i++) {
149  act_paths[0][i] = act_path_coeff_1[i];
150  act_paths[1][i] = act_path_coeff_2[i];
151  act_paths[2][i] = act_path_coeff_3[i];
152  }
153 
155  // Set unitarization coefficients //
157 
159  svd_abs_error);
160 }
161 
163  size_t gSize, int n_naiks, double eps_naik)
164 {
165 
166  double **act_paths;
167  act_paths = new double *[3];
168  for (int i = 0; i < 3; i++) act_paths[i] = new double[6];
169  setActionPaths(act_paths);
170 
172  // Create some temporary space if we want to test the epsilon fields //
174 
175  void *qdp_fatlink_naik_temp[4];
176  void *qdp_longlink_naik_temp[4];
177  if (n_naiks == 2) {
178  for (int dir = 0; dir < 4; dir++) {
179  qdp_fatlink_naik_temp[dir] = malloc(V * gaugeSiteSize * gSize);
180  qdp_longlink_naik_temp[dir] = malloc(V * gaugeSiteSize * gSize);
181  }
182  }
183 
185  // Create the GPU links //
187 
188  // Skip eps field for now
189  // Note: GPU link creation only works for single and double precision
190  computeHISQLinksGPU(qdp_fatlink, qdp_longlink, (n_naiks == 2) ? qdp_fatlink_naik_temp : nullptr,
191  (n_naiks == 2) ? qdp_longlink_naik_temp : nullptr, qdp_inlink, gauge_param, act_paths, eps_naik,
192  gSize, n_naiks);
193 
194  if (n_naiks == 2) {
195  // Override the naik fields into the fat/long link fields
196  for (int dir = 0; dir < 4; dir++) {
197  memcpy(qdp_fatlink[dir], qdp_fatlink_naik_temp[dir], V * gaugeSiteSize * gSize);
198  memcpy(qdp_longlink[dir], qdp_longlink_naik_temp[dir], V * gaugeSiteSize * gSize);
199  free(qdp_fatlink_naik_temp[dir]);
200  qdp_fatlink_naik_temp[dir] = nullptr;
201  free(qdp_longlink_naik_temp[dir]);
202  qdp_longlink_naik_temp[dir] = nullptr;
203  }
204  }
205 
206  for (int i = 0; i < 3; i++) delete act_paths[i];
207  delete act_paths;
208 }
209 
210 void computeFatLongGPUandCPU(void **qdp_fatlink_gpu, void **qdp_longlink_gpu, void **qdp_fatlink_cpu,
212  int n_naiks, double eps_naik)
213 {
214 
215  double **act_paths;
216  act_paths = new double *[3];
217  for (int i = 0; i < 3; i++) act_paths[i] = new double[6];
218  setActionPaths(act_paths);
219 
221  // Create some temporary space if we want to test the epsilon fields //
223 
224  void *qdp_fatlink_naik_temp[4];
225  void *qdp_longlink_naik_temp[4];
226  if (n_naiks == 2) {
227  for (int dir = 0; dir < 4; dir++) {
228  qdp_fatlink_naik_temp[dir] = malloc(V * gaugeSiteSize * gSize);
229  qdp_longlink_naik_temp[dir] = malloc(V * gaugeSiteSize * gSize);
230  memset(qdp_fatlink_naik_temp[dir], 0, V * gaugeSiteSize * gSize);
231  memset(qdp_longlink_naik_temp[dir], 0, V * gaugeSiteSize * gSize);
232  }
233  }
234 
236  // Create the CPU links //
238 
239  // defined in "llfat_reference.cpp"
240  computeHISQLinksCPU(qdp_fatlink_cpu, qdp_longlink_cpu, (n_naiks == 2) ? qdp_fatlink_naik_temp : nullptr,
241  (n_naiks == 2) ? qdp_longlink_naik_temp : nullptr, qdp_inlink, &gauge_param, act_paths, eps_naik);
242 
243  if (n_naiks == 2) {
244  // Override the naik fields into the fat/long link fields
245  for (int dir = 0; dir < 4; dir++) {
246  memcpy(qdp_fatlink_cpu[dir], qdp_fatlink_naik_temp[dir], V * gaugeSiteSize * gSize);
247  memcpy(qdp_longlink_cpu[dir], qdp_longlink_naik_temp[dir], V * gaugeSiteSize * gSize);
248  memset(qdp_fatlink_naik_temp[dir], 0, V * gaugeSiteSize * gSize);
249  memset(qdp_longlink_naik_temp[dir], 0, V * gaugeSiteSize * gSize);
250  }
251  }
252 
254  // Create the GPU links //
256 
257  // Skip eps field for now
258  // Note: GPU link creation only works for single and double precision
259  computeHISQLinksGPU(qdp_fatlink_gpu, qdp_longlink_gpu, (n_naiks == 2) ? qdp_fatlink_naik_temp : nullptr,
260  (n_naiks == 2) ? qdp_longlink_naik_temp : nullptr, qdp_inlink, gauge_param, act_paths, eps_naik,
261  gSize, n_naiks);
262 
263  if (n_naiks == 2) {
264  // Override the naik fields into the fat/long link fields
265  for (int dir = 0; dir < 4; dir++) {
266  memcpy(qdp_fatlink_gpu[dir], qdp_fatlink_naik_temp[dir], V * gaugeSiteSize * gSize);
267  memcpy(qdp_longlink_gpu[dir], qdp_longlink_naik_temp[dir], V * gaugeSiteSize * gSize);
268  free(qdp_fatlink_naik_temp[dir]);
269  qdp_fatlink_naik_temp[dir] = nullptr;
270  free(qdp_longlink_naik_temp[dir]);
271  qdp_longlink_naik_temp[dir] = nullptr;
272  }
273  }
274 
275  for (int i = 0; i < 3; i++) delete act_paths[i];
276  delete act_paths;
277 }
278 
279 
280 // Routine that takes in a QDP-ordered field and outputs the plaquette.
281 // Assumes the gauge fields already have phases on them (unless it's the Laplace op),
282 // so it corrects the sign as appropriate.
283 void computeStaggeredPlaquetteQDPOrder(void** qdp_link, double plaq[3], const QudaGaugeParam& gauge_param_in,
285 {
286  if (dslash_type != QUDA_STAGGERED_DSLASH && dslash_type != QUDA_ASQTAD_DSLASH && dslash_type != QUDA_LAPLACE_DSLASH) {
287  errorQuda("computeStaggeredPlaquetteQDPOrder does not support dslash type %d\n", dslash_type);
288  }
289 
290  // Make no assumptions about any part of gauge_param_in beyond what we need to grab.
292  for (int d = 0; d < 4; d++) {
293  gauge_param.X[d] = gauge_param_in.X[d];
294  }
295 
296  gauge_param.type = QUDA_WILSON_LINKS;
297  gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
298  gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
299 
300  gauge_param.cpu_prec = gauge_param_in.cpu_prec;
301 
302  gauge_param.cuda_prec = gauge_param_in.cuda_prec;
303  gauge_param.reconstruct = gauge_param_in.reconstruct;
304 
305  gauge_param.cuda_prec_sloppy = gauge_param_in.cuda_prec; // for ease of use
306  gauge_param.reconstruct_sloppy = gauge_param_in.reconstruct_sloppy;
307 
308  gauge_param.anisotropy = 1;
309  gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
310 
311  gauge_param.ga_pad = 0;
312  // For multi-GPU, ga_pad must be large enough to store a time-slice
313 #ifdef MULTI_GPU
314  int x_face_size = gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2;
315  int y_face_size = gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2;
316  int z_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2;
317  int t_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2;
318  int pad_size = x_face_size > y_face_size ? x_face_size : y_face_size;
319  pad_size = pad_size > z_face_size ? pad_size : z_face_size;
320  pad_size = pad_size > t_face_size ? pad_size : t_face_size;
321  gauge_param.ga_pad = pad_size;
322 #endif
323 
324  loadGaugeQuda(qdp_link, &gauge_param);
325  plaqQuda(plaq);
326 
327  if (dslash_type == QUDA_STAGGERED_DSLASH || dslash_type == QUDA_ASQTAD_DSLASH) {
328  plaq[0] = -plaq[0];
329  plaq[1] = -plaq[1];
330  plaq[2] = -plaq[2];
331  }
332 
333 }
334 
QudaDslashType dslash_type
Definition: test_util.cpp:1621
void computeFatLongGPUandCPU(void **qdp_fatlink_gpu, void **qdp_longlink_gpu, void **qdp_fatlink_cpu, void **qdp_longlink_cpu, void **qdp_inlink, QudaGaugeParam &gauge_param, size_t gSize, int n_naiks, double eps_naik)
static size_t gSize
void * qdp_longlink[4]
QudaReconstructType reconstruct_sloppy
Definition: quda.h:53
double anisotropy
Definition: quda.h:38
#define pinned_malloc(size)
Definition: malloc_quda.h:67
int ga_pad
Definition: quda.h:63
QudaGaugeFixed gauge_fix
Definition: quda.h:61
QudaLinkType type
Definition: quda.h:42
static bool reunit_allow_svd
#define errorQuda(...)
Definition: util_quda.h:121
void setUnitarizeLinksConstants(double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
#define host_free(ptr)
Definition: malloc_quda.h:71
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
void plaqQuda(double plaq[3])
QudaGaugeParam gauge_param
QudaGaugeFieldOrder gauge_order
Definition: quda.h:43
void computeKSLinkQuda(void *fatlink, void *longlink, void *ulink, void *inlink, double *path_coeff, QudaGaugeParam *param)
void * qdp_fatlink_cpu[4]
void * qdp_fatlink[4]
static double max_allowed_error
void computeStaggeredPlaquetteQDPOrder(void **qdp_link, double plaq[3], const QudaGaugeParam &gauge_param_in, const QudaDslashType dslash_type)
double eps_naik
Definition: test_util.cpp:1652
static double unitarize_eps
void reorderQDPtoMILC(Out *milc_out, In **qdp_in, int V, int siteSize)
void cpu_xpy(QudaPrecision prec, void *x, void *y, int size)
QudaPrecision cuda_prec_sloppy
Definition: quda.h:52
static double svd_rel_error
void computeHISQLinksCPU(void **fatlink, void **longlink, void **fatlink_eps, void **longlink_eps, void **sitelink, void *qudaGaugeParamPtr, double **act_path_coeffs, double eps_naik)
QudaReconstructType reconstruct
Definition: quda.h:50
QudaPrecision cuda_prec
Definition: quda.h:49
int X[4]
Definition: quda.h:36
void setActionPaths(double **act_paths)
int V
Definition: test_util.cpp:27
void reorderMILCtoQDP(Out **qdp_out, In *milc_in, int V, int siteSize)
static int n_naiks
void computeFatLongGPU(void **qdp_fatlink, void **qdp_longlink, void **qdp_inlink, QudaGaugeParam &gauge_param, size_t gSize, int n_naiks, double eps_naik)
void * memset(void *s, int c, size_t n)
double tadpole_factor
Definition: test_util.cpp:1651
Main header file for the QUDA library.
void * qdp_inlink[4]
void cpu_axy(QudaPrecision prec, double a, void *x, void *y, int size)
QudaTboundary t_boundary
Definition: quda.h:45
enum QudaDslashType_s QudaDslashType
static double svd_abs_error
void computeHISQLinksGPU(void **qdp_fatlink, void **qdp_longlink, void **qdp_fatlink_eps, void **qdp_longlink_eps, void **qdp_inlink, QudaGaugeParam &gauge_param_in, double **act_path_coeffs, double eps_naik, size_t gSize, int n_naiks)
void * qdp_longlink_cpu[4]
static bool reunit_svd_only
QudaPrecision cpu_prec
Definition: quda.h:47
#define gaugeSiteSize
Definition: face_gauge.cpp:34
QudaGaugeParam newQudaGaugeParam(void)