QUDA  v1.1.0
A library for QCD on GPUs
staggered_gauge_utils.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <string.h>
5 
6 #include <host_utils.h>
7 #include <quda_internal.h>
8 #include <quda.h>
9 #include <util_quda.h>
10 #include <staggered_gauge_utils.h>
11 #include <llfat_reference.h>
12 #include <unitarization_links.h>
13 #include "misc.h"
14 
15 extern double tadpole_factor;
16 // Unitarization coefficients
17 static double unitarize_eps = 1e-6;
18 static bool reunit_allow_svd = true;
19 static bool reunit_svd_only = false;
20 static double svd_rel_error = 1e-4;
21 static double svd_abs_error = 1e-4;
22 static double max_allowed_error = 1e-11;
23 
24 // Wrap everything for the GPU construction of fat/long links here
25 void computeHISQLinksGPU(void **qdp_fatlink, void **qdp_longlink, void **qdp_fatlink_eps, void **qdp_longlink_eps,
26  void **qdp_inlink, QudaGaugeParam &gauge_param_in, double **act_path_coeffs, double eps_naik,
27  size_t gauge_data_type_size, int n_naiks)
28 {
29  // since a lot of intermediaries can be general matrices, override the recon in `gauge_param_in`
30  auto gauge_param = gauge_param_in;
32  gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_NO; // probably irrelevant
33 
34  // inlink in different format
35  void *milc_inlink = pinned_malloc(4 * V * gauge_site_size * gauge_data_type_size);
37 
38  // Paths for step 1:
39  void *milc_vlink = pinned_malloc(4 * V * gauge_site_size * gauge_data_type_size); // V links
40  void *milc_wlink = pinned_malloc(4 * V * gauge_site_size * gauge_data_type_size); // W links
41 
42  // Paths for step 2:
43  void *milc_fatlink = pinned_malloc(4 * V * gauge_site_size * gauge_data_type_size); // final fat ("X") links
44  void *milc_longlink = pinned_malloc(4 * V * gauge_site_size * gauge_data_type_size); // final long links
45 
46  // Place to accumulate Naiks, step 3:
47  void *milc_fatlink_eps = nullptr;
48  void *milc_longlink_eps = nullptr;
49  if (n_naiks > 1) {
50  milc_fatlink_eps = pinned_malloc(4 * V * gauge_site_size * gauge_data_type_size); // epsilon fat links
51  milc_longlink_eps = pinned_malloc(4 * V * gauge_site_size * gauge_data_type_size); // epsilon long naiks
52  }
53 
54  // Create V links (fat7 links) and W links (unitarized V links), 1st path table set
55  computeKSLinkQuda(milc_vlink, nullptr, milc_wlink, milc_inlink, act_path_coeffs[0], &gauge_param);
56 
57  if (n_naiks > 1) {
58  // Create Naiks, 3rd path table set
59  computeKSLinkQuda(milc_fatlink, milc_longlink, nullptr, milc_wlink, act_path_coeffs[2], &gauge_param);
60 
61  // Rescale+copy Naiks into Naik field
62  cpu_axy(gauge_param.cpu_prec, eps_naik, milc_fatlink, milc_fatlink_eps, V * 4 * gauge_site_size);
63  cpu_axy(gauge_param.cpu_prec, eps_naik, milc_longlink, milc_longlink_eps, V * 4 * gauge_site_size);
64  } else {
65  memset(milc_fatlink, 0, V * 4 * gauge_site_size * gauge_data_type_size);
66  memset(milc_longlink, 0, V * 4 * gauge_site_size * gauge_data_type_size);
67  }
68 
69  // Create X and long links, 2nd path table set
70  computeKSLinkQuda(milc_fatlink, milc_longlink, nullptr, milc_wlink, act_path_coeffs[1], &gauge_param);
71 
72  if (n_naiks > 1) {
73  // Add into Naik field
74  cpu_xpy(gauge_param.cpu_prec, milc_fatlink, milc_fatlink_eps, V * 4 * gauge_site_size);
75  cpu_xpy(gauge_param.cpu_prec, milc_longlink, milc_longlink_eps, V * 4 * gauge_site_size);
76  }
77 
78  // Copy back
81 
82  if (n_naiks > 1) {
83  // Add into Naik field
84  reorderMILCtoQDP(qdp_fatlink_eps, milc_fatlink_eps, V, gauge_site_size, gauge_param.cpu_prec, gauge_param.cpu_prec);
85  reorderMILCtoQDP(qdp_longlink_eps, milc_longlink_eps, V, gauge_site_size, gauge_param.cpu_prec, gauge_param.cpu_prec);
86  }
87 
88  // Clean up GPU compute links
89  host_free(milc_inlink);
90  host_free(milc_vlink);
91  host_free(milc_wlink);
92  host_free(milc_fatlink);
93  host_free(milc_longlink);
94 
95  if (n_naiks > 1) {
96  host_free(milc_fatlink_eps);
97  host_free(milc_longlink_eps);
98  }
99 }
100 
101 void setActionPaths(double **act_paths)
102 {
104  // Set path coefficients //
106 
107  // Reference: "generic_ks/imp_actions/hisq/hisq_action.h",
108  // in QHMC: https://github.com/jcosborn/qhmc/blob/master/lib/qopqdp/hisq.c
109 
110  double u1 = 1.0 / tadpole_factor;
111  double u2 = u1 * u1;
112  double u4 = u2 * u2;
113  double u6 = u4 * u2;
114 
115  // First path: create V, W links
116  double act_path_coeff_1[6] = {
117  (1.0 / 8.0), // one link
118  u2 * (0.0), // Naik
119  u2 * (-1.0 / 8.0) * 0.5, // simple staple
120  u4 * (1.0 / 8.0) * 0.25 * 0.5, // displace link in two directions
121  u6 * (-1.0 / 8.0) * 0.125 * (1.0 / 6.0), // displace link in three directions
122  u4 * (0.0) // Lepage term
123  };
124 
125  // Second path: create X, long links
126  double act_path_coeff_2[6] = {
127  ((1.0 / 8.0) + (2.0 * 6.0 / 16.0) + (1.0 / 8.0)), // one link
128  // One link is 1/8 as in fat7 + 2*3/8 for Lepage + 1/8 for Naik
129  (-1.0 / 24.0), // Naik
130  (-1.0 / 8.0) * 0.5, // simple staple
131  (1.0 / 8.0) * 0.25 * 0.5, // displace link in two directions
132  (-1.0 / 8.0) * 0.125 * (1.0 / 6.0), // displace link in three directions
133  (-2.0 / 16.0) // Lepage term, correct O(a^2) 2x ASQTAD
134  };
135 
136  // Paths for epsilon corrections. Not used if n_naiks = 1.
137  double act_path_coeff_3[6] = {
138  (1.0 / 8.0), // one link b/c of Naik
139  (-1.0 / 24.0), // Naik
140  0.0, // simple staple
141  0.0, // displace link in two directions
142  0.0, // displace link in three directions
143  0.0 // Lepage term
144  };
145 
146  for (int i = 0; i < 6; i++) {
147  act_paths[0][i] = act_path_coeff_1[i];
148  act_paths[1][i] = act_path_coeff_2[i];
149  act_paths[2][i] = act_path_coeff_3[i];
150  }
151 
153  // Set unitarization coefficients //
155 
156  setUnitarizeLinksConstants(unitarize_eps, max_allowed_error, reunit_allow_svd, reunit_svd_only, svd_rel_error,
157  svd_abs_error);
158 }
159 
160 void computeFatLongGPU(void **qdp_fatlink, void **qdp_longlink, void **qdp_inlink, QudaGaugeParam &gauge_param,
161  size_t gauge_data_type_size, int n_naiks, double eps_naik)
162 {
163  double **act_paths = new double *[3];
164  for (int i = 0; i < 3; i++) act_paths[i] = new double[6];
165  setActionPaths(act_paths);
166 
168  // Create some temporary space if we want to test the epsilon fields //
170 
171  void *qdp_fatlink_naik_temp[4];
172  void *qdp_longlink_naik_temp[4];
173  if (n_naiks == 2) {
174  for (int dir = 0; dir < 4; dir++) {
175  qdp_fatlink_naik_temp[dir] = malloc(V * gauge_site_size * gauge_data_type_size);
176  qdp_longlink_naik_temp[dir] = malloc(V * gauge_site_size * gauge_data_type_size);
177  }
178  }
179 
181  // Create the GPU links //
183 
184  // Skip eps field for now
185  // Note: GPU link creation only works for single and double precision
186  computeHISQLinksGPU(qdp_fatlink, qdp_longlink, (n_naiks == 2) ? qdp_fatlink_naik_temp : nullptr,
187  (n_naiks == 2) ? qdp_longlink_naik_temp : nullptr, qdp_inlink, gauge_param, act_paths, eps_naik,
188  gauge_data_type_size, n_naiks);
189 
190  if (n_naiks == 2) {
191  // Override the naik fields into the fat/long link fields
192  for (int dir = 0; dir < 4; dir++) {
193  memcpy(qdp_fatlink[dir], qdp_fatlink_naik_temp[dir], V * gauge_site_size * gauge_data_type_size);
194  memcpy(qdp_longlink[dir], qdp_longlink_naik_temp[dir], V * gauge_site_size * gauge_data_type_size);
195  free(qdp_fatlink_naik_temp[dir]);
196  qdp_fatlink_naik_temp[dir] = nullptr;
197  free(qdp_longlink_naik_temp[dir]);
198  qdp_longlink_naik_temp[dir] = nullptr;
199  }
200  }
201 
202  for (int i = 0; i < 3; i++) delete[] act_paths[i];
203  delete[] act_paths;
204 }
205 
206 void computeFatLongGPUandCPU(void **qdp_fatlink_gpu, void **qdp_longlink_gpu, void **qdp_fatlink_cpu,
207  void **qdp_longlink_cpu, void **qdp_inlink, QudaGaugeParam &gauge_param,
208  size_t gauge_data_type_size, int n_naiks, double eps_naik)
209 {
210  double **act_paths = new double *[3];
211  for (int i = 0; i < 3; i++) act_paths[i] = new double[6];
212  setActionPaths(act_paths);
213 
215  // Create some temporary space if we want to test the epsilon fields //
217 
218  void *qdp_fatlink_naik_temp[4];
219  void *qdp_longlink_naik_temp[4];
220  if (n_naiks == 2) {
221  for (int dir = 0; dir < 4; dir++) {
222  qdp_fatlink_naik_temp[dir] = malloc(V * gauge_site_size * gauge_data_type_size);
223  qdp_longlink_naik_temp[dir] = malloc(V * gauge_site_size * gauge_data_type_size);
224  memset(qdp_fatlink_naik_temp[dir], 0, V * gauge_site_size * gauge_data_type_size);
225  memset(qdp_longlink_naik_temp[dir], 0, V * gauge_site_size * gauge_data_type_size);
226  }
227  }
228 
230  // Create the CPU links //
232 
233  // defined in "llfat_reference.cpp"
234  computeHISQLinksCPU(qdp_fatlink_cpu, qdp_longlink_cpu, (n_naiks == 2) ? qdp_fatlink_naik_temp : nullptr,
235  (n_naiks == 2) ? qdp_longlink_naik_temp : nullptr, qdp_inlink, &gauge_param, act_paths, eps_naik);
236 
237  if (n_naiks == 2) {
238  // Override the naik fields into the fat/long link fields
239  for (int dir = 0; dir < 4; dir++) {
240  memcpy(qdp_fatlink_cpu[dir], qdp_fatlink_naik_temp[dir], V * gauge_site_size * gauge_data_type_size);
241  memcpy(qdp_longlink_cpu[dir], qdp_longlink_naik_temp[dir], V * gauge_site_size * gauge_data_type_size);
242  memset(qdp_fatlink_naik_temp[dir], 0, V * gauge_site_size * gauge_data_type_size);
243  memset(qdp_longlink_naik_temp[dir], 0, V * gauge_site_size * gauge_data_type_size);
244  }
245  }
246 
248  // Create the GPU links //
250 
251  // Skip eps field for now
252  // Note: GPU link creation only works for single and double precision
253  computeHISQLinksGPU(qdp_fatlink_gpu, qdp_longlink_gpu, (n_naiks == 2) ? qdp_fatlink_naik_temp : nullptr,
254  (n_naiks == 2) ? qdp_longlink_naik_temp : nullptr, qdp_inlink, gauge_param, act_paths, eps_naik,
255  gauge_data_type_size, n_naiks);
256 
257  if (n_naiks == 2) {
258  // Override the naik fields into the fat/long link fields
259  for (int dir = 0; dir < 4; dir++) {
260  memcpy(qdp_fatlink_gpu[dir], qdp_fatlink_naik_temp[dir], V * gauge_site_size * gauge_data_type_size);
261  memcpy(qdp_longlink_gpu[dir], qdp_longlink_naik_temp[dir], V * gauge_site_size * gauge_data_type_size);
262  free(qdp_fatlink_naik_temp[dir]);
263  qdp_fatlink_naik_temp[dir] = nullptr;
264  free(qdp_longlink_naik_temp[dir]);
265  qdp_longlink_naik_temp[dir] = nullptr;
266  }
267  }
268 
269  for (int i = 0; i < 3; i++) delete[] act_paths[i];
270  delete[] act_paths;
271 }
272 
273 
274 // Routine that takes in a QDP-ordered field and outputs the plaquette.
275 // Assumes the gauge fields already have phases on them (unless it's the Laplace op),
276 // so it corrects the sign as appropriate.
277 void computeStaggeredPlaquetteQDPOrder(void** qdp_link, double plaq[3], const QudaGaugeParam& gauge_param_in,
279 {
281  errorQuda("computeStaggeredPlaquetteQDPOrder does not support dslash type %d\n", dslash_type);
282  }
283 
284  // Make no assumptions about any part of gauge_param_in beyond what we need to grab.
286  for (int d = 0; d < 4; d++) {
287  gauge_param.X[d] = gauge_param_in.X[d];
288  }
289 
293 
294  gauge_param.cpu_prec = gauge_param_in.cpu_prec;
295 
296  gauge_param.cuda_prec = gauge_param_in.cuda_prec;
297  gauge_param.reconstruct = gauge_param_in.reconstruct;
298 
299  gauge_param.cuda_prec_sloppy = gauge_param_in.cuda_prec; // for ease of use
301 
304 
305  gauge_param.ga_pad = 0;
306  // For multi-GPU, ga_pad must be large enough to store a time-slice
307 #ifdef MULTI_GPU
308  int x_face_size = gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2;
309  int y_face_size = gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2;
310  int z_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2;
311  int t_face_size = gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2;
312  int pad_size = x_face_size > y_face_size ? x_face_size : y_face_size;
313  pad_size = pad_size > z_face_size ? pad_size : z_face_size;
314  pad_size = pad_size > t_face_size ? pad_size : t_face_size;
315  gauge_param.ga_pad = pad_size;
316 #endif
317 
318  loadGaugeQuda(qdp_link, &gauge_param);
319  plaqQuda(plaq);
320 
322  plaq[0] = -plaq[0];
323  plaq[1] = -plaq[1];
324  plaq[2] = -plaq[2];
325  }
326 
327 }
328 
double eps_naik
QudaDslashType dslash_type
int n_naiks
int V
Definition: host_utils.cpp:37
void * memset(void *s, int c, size_t n)
QudaGaugeParam gauge_param
Definition: covdev_test.cpp:26
@ QUDA_STAGGERED_DSLASH
Definition: enum_quda.h:97
@ QUDA_ASQTAD_DSLASH
Definition: enum_quda.h:98
@ QUDA_LAPLACE_DSLASH
Definition: enum_quda.h:101
@ QUDA_RECONSTRUCT_NO
Definition: enum_quda.h:70
@ QUDA_ANTI_PERIODIC_T
Definition: enum_quda.h:56
enum QudaDslashType_s QudaDslashType
@ QUDA_GAUGE_FIXED_NO
Definition: enum_quda.h:80
@ QUDA_QDP_GAUGE_ORDER
Definition: enum_quda.h:44
@ QUDA_WILSON_LINKS
Definition: enum_quda.h:30
#define gauge_site_size
Definition: face_gauge.cpp:34
void cpu_axy(QudaPrecision prec, double a, void *x, void *y, int size)
Definition: host_blas.cpp:74
void cpu_xpy(QudaPrecision prec, void *x, void *y, int size)
Definition: host_blas.cpp:87
void reorderQDPtoMILC(void *milc_out, void **qdp_in, int V, int siteSize, QudaPrecision out_precision, QudaPrecision in_precision)
void computeHISQLinksCPU(void **fatlink, void **longlink, void **fatlink_eps, void **longlink_eps, void **sitelink, void *qudaGaugeParamPtr, double **act_path_coeffs, double eps_naik)
void reorderMILCtoQDP(void **qdp_out, void *milc_in, int V, int siteSize, QudaPrecision out_precision, QudaPrecision in_precision)
#define pinned_malloc(size)
Definition: malloc_quda.h:107
#define host_free(ptr)
Definition: malloc_quda.h:115
void setUnitarizeLinksConstants(double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
Main header file for the QUDA library.
void plaqQuda(double plaq[3])
QudaGaugeParam newQudaGaugeParam(void)
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
void computeKSLinkQuda(void *fatlink, void *longlink, void *ulink, void *inlink, double *path_coeff, QudaGaugeParam *param)
double tadpole_factor
void computeFatLongGPU(void **qdp_fatlink, void **qdp_longlink, void **qdp_inlink, QudaGaugeParam &gauge_param, size_t gauge_data_type_size, int n_naiks, double eps_naik)
void computeFatLongGPUandCPU(void **qdp_fatlink_gpu, void **qdp_longlink_gpu, void **qdp_fatlink_cpu, void **qdp_longlink_cpu, void **qdp_inlink, QudaGaugeParam &gauge_param, size_t gauge_data_type_size, int n_naiks, double eps_naik)
void setActionPaths(double **act_paths)
void computeHISQLinksGPU(void **qdp_fatlink, void **qdp_longlink, void **qdp_fatlink_eps, void **qdp_longlink_eps, void **qdp_inlink, QudaGaugeParam &gauge_param_in, double **act_path_coeffs, double eps_naik, size_t gauge_data_type_size, int n_naiks)
void computeStaggeredPlaquetteQDPOrder(void **qdp_link, double plaq[3], const QudaGaugeParam &gauge_param_in, const QudaDslashType dslash_type)
double anisotropy
Definition: quda.h:37
QudaReconstructType reconstruct
Definition: quda.h:49
int ga_pad
Definition: quda.h:65
QudaLinkType type
Definition: quda.h:41
QudaPrecision cuda_prec_sloppy
Definition: quda.h:51
QudaReconstructType reconstruct_sloppy
Definition: quda.h:52
QudaGaugeFixed gauge_fix
Definition: quda.h:63
QudaGaugeFieldOrder gauge_order
Definition: quda.h:42
QudaPrecision cuda_prec
Definition: quda.h:48
int X[4]
Definition: quda.h:35
QudaPrecision cpu_prec
Definition: quda.h:46
QudaTboundary t_boundary
Definition: quda.h:44
#define errorQuda(...)
Definition: util_quda.h:120