QUDA  v0.5.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
dslash_constants.h
Go to the documentation of this file.
1 #define MAX(a,b) ((a)>(b) ? (a):(b))
2 
3 typedef struct fat_force_stride_s {
11 
12 __constant__ int X1h;
13 __constant__ int X2h;
14 __constant__ int X1;
15 __constant__ int X2;
16 __constant__ int X3;
17 __constant__ int X4;
18 
19 __constant__ int X1_3;
20 __constant__ int X2_3;
21 __constant__ int X3_3;
22 __constant__ int X4_3;
23 
24 __constant__ int X1m1;
25 __constant__ int X2m1;
26 __constant__ int X3m1;
27 __constant__ int X4m1;
28 
29 __constant__ int X1m3;
30 __constant__ int X2m3;
31 __constant__ int X3m3;
32 __constant__ int X4m3;
33 
34 __constant__ int X2X1mX1;
35 __constant__ int X3X2X1mX2X1;
36 __constant__ int X4X3X2X1mX3X2X1;
37 __constant__ int X4X3X2X1hmX3X2X1h;
38 
39 __constant__ int X2X1m3X1;
40 __constant__ int X3X2X1m3X2X1;
41 __constant__ int X4X3X2X1m3X3X2X1;
42 __constant__ int X4X3X2X1hm3X3X2X1h;
43 
44 __constant__ int X2X1;
45 __constant__ int X3X1;
46 __constant__ int X3X2;
47 __constant__ int X3X2X1;
48 __constant__ int X4X2X1;
49 __constant__ int X4X2X1h;
50 __constant__ int X4X3X1;
51 __constant__ int X4X3X1h;
52 __constant__ int X4X3X2;
53 __constant__ int X4X3X2h;
54 
55 __constant__ int Vh_2d_max;
56 
57 __constant__ int X2X1_3;
58 __constant__ int X3X2X1_3;
59 
60 __constant__ int Vh;
61 __constant__ int Vs;
62 __constant__ int Vsh;
63 __constant__ int sp_stride;
64 __constant__ int ga_stride;
65 __constant__ int cl_stride;
66 __constant__ int ghostFace[QUDA_MAX_DIM+1];
67 
68 __constant__ int fat_ga_stride;
69 __constant__ int long_ga_stride;
70 __constant__ float fat_ga_max;
71 
72 __constant__ int gauge_fixed;
73 
74 // domain wall constants
75 __constant__ int Ls;
76 
77 // single precision constants
78 __constant__ float anisotropy_f;
79 __constant__ float coeff_f;
80 __constant__ float t_boundary_f;
81 __constant__ float pi_f;
82 
83 // double precision constants
84 __constant__ double anisotropy;
85 __constant__ double t_boundary;
86 __constant__ double coeff;
87 
88 __constant__ float2 An2;
89 __constant__ float2 TB2;
90 __constant__ float2 No2;
91 
92 // Are we processor 0 in time?
93 __constant__ bool Pt0;
94 
95 // Are we processor Nt-1 in time?
96 __constant__ bool PtNm1;
97 
98 // factor of 2 (or 1) for T-dimensional spin projection
99 __constant__ double tProjScale;
100 __constant__ float tProjScale_f;
101 
102 //for link fattening/gauge force/fermion force code
103 __constant__ int E1, E2, E3, E4, E1h;
104 __constant__ int Vh_ex;
105 __constant__ int E2E1;
106 __constant__ int E3E2E1;
107 
108 __constant__ fat_force_const_t fl; //fatlink
109 __constant__ fat_force_const_t gf; //gauge force
110 __constant__ fat_force_const_t hf; //hisq force
111 
113 __constant__ int fl_stride;
114 
115 void initLatticeConstants(const LatticeField &lat)
116 {
117  checkCudaError();
118 
119  int volumeCB = lat.VolumeCB();
120  cudaMemcpyToSymbol(Vh, &volumeCB, sizeof(int));
121 
122  Vspatial = lat.X()[0]*lat.X()[1]*lat.X()[2]/2; // FIXME - this should not be called Vs, rather Vsh
123  cudaMemcpyToSymbol(Vs, &Vspatial, sizeof(int));
124 
125  int half_Vspatial = Vspatial;
126  cudaMemcpyToSymbol(Vsh, &half_Vspatial, sizeof(int));
127 
128  int L1 = lat.X()[0];
129  cudaMemcpyToSymbol(X1, &L1, sizeof(int));
130 
131  int L2 = lat.X()[1];
132  cudaMemcpyToSymbol(X2, &L2, sizeof(int));
133 
134  int L3 = lat.X()[2];
135  cudaMemcpyToSymbol(X3, &L3, sizeof(int));
136 
137  int L4 = lat.X()[3];
138  cudaMemcpyToSymbol(X4, &L4, sizeof(int));
139 
140  int ghostFace_h[4];
141  ghostFace_h[0] = L2*L3*L4/2;
142  ghostFace_h[1] = L1*L3*L4/2;
143  ghostFace_h[2] = L1*L2*L4/2;
144  ghostFace_h[3] = L1*L2*L3/2;
145  cudaMemcpyToSymbol(ghostFace, ghostFace_h, 4*sizeof(int));
146 
147  int L1_3 = 3*L1;
148  cudaMemcpyToSymbol(X1_3, &L1_3, sizeof(int));
149 
150  int L2_3 = 3*L2;
151  cudaMemcpyToSymbol(X2_3, &L2_3, sizeof(int));
152 
153  int L3_3 = 3*L3;
154  cudaMemcpyToSymbol(X3_3, &L3_3, sizeof(int));
155 
156  int L4_3 = 3*L4;
157  cudaMemcpyToSymbol(X4_3, &L4_3, sizeof(int));
158 
159  int L2L1 = L2*L1;
160  cudaMemcpyToSymbol(X2X1, &L2L1, sizeof(int));
161 
162  int L3L1 = L3*L1;
163  cudaMemcpyToSymbol(X3X1, &L3L1, sizeof(int));
164 
165  int L3L2 = L3*L2;
166  cudaMemcpyToSymbol(X3X2, &L3L2, sizeof(int));
167 
168  int L3L2L1 = L3*L2*L1;
169  cudaMemcpyToSymbol(X3X2X1, &L3L2L1, sizeof(int));
170 
171  int L4L2L1 = L4*L2*L1;
172  cudaMemcpyToSymbol(X4X2X1, &L4L2L1, sizeof(int));
173 
174  int L4L2L1h = L4*L2*L1/2;
175  cudaMemcpyToSymbol(X4X2X1h, &L4L2L1h, sizeof(int));
176 
177  int L4L3L1 = L4*L3*L1;
178  cudaMemcpyToSymbol(X4X3X1, &L4L3L1, sizeof(int));
179 
180  int L4L3L1h = L4*L3*L1/2;
181  cudaMemcpyToSymbol(X4X3X1h, &L4L3L1h, sizeof(int));
182 
183  int L4L3L2 = L4*L3*L2;
184  cudaMemcpyToSymbol(X4X3X2, &L4L3L2, sizeof(int));
185 
186  int L4L3L2h = L4*L3*L2/2;
187  cudaMemcpyToSymbol(X4X3X2h, &L4L3L2h, sizeof(int));
188 
189  int L2L1_3 = 3*L2*L1;
190  cudaMemcpyToSymbol(X2X1_3, &L2L1_3, sizeof(int));
191 
192  int L3L2L1_3 = 3*L3*L2*L1;
193  cudaMemcpyToSymbol(X3X2X1_3, &L3L2L1_3, sizeof(int));
194 
195  int L1h = L1/2;
196  cudaMemcpyToSymbol(X1h, &L1h, sizeof(int));
197 
198  int L2h = L2/2;
199  cudaMemcpyToSymbol(X2h, &L2h, sizeof(int));
200 
201  int L1m1 = L1 - 1;
202  cudaMemcpyToSymbol(X1m1, &L1m1, sizeof(int));
203 
204  int L2m1 = L2 - 1;
205  cudaMemcpyToSymbol(X2m1, &L2m1, sizeof(int));
206 
207  int L3m1 = L3 - 1;
208  cudaMemcpyToSymbol(X3m1, &L3m1, sizeof(int));
209 
210  int L4m1 = L4 - 1;
211  cudaMemcpyToSymbol(X4m1, &L4m1, sizeof(int));
212 
213  int L1m3 = L1 - 3;
214  cudaMemcpyToSymbol(X1m3, &L1m3, sizeof(int));
215 
216  int L2m3 = L2 - 3;
217  cudaMemcpyToSymbol(X2m3, &L2m3, sizeof(int));
218 
219  int L3m3 = L3 - 3;
220  cudaMemcpyToSymbol(X3m3, &L3m3, sizeof(int));
221 
222  int L4m3 = L4 - 3;
223  cudaMemcpyToSymbol(X4m3, &L4m3, sizeof(int));
224 
225  int L2L1mL1 = L2L1 - L1;
226  cudaMemcpyToSymbol(X2X1mX1, &L2L1mL1, sizeof(int));
227 
228  int L3L2L1mL2L1 = L3L2L1 - L2L1;
229  cudaMemcpyToSymbol(X3X2X1mX2X1, &L3L2L1mL2L1, sizeof(int));
230 
231  int L4L3L2L1mL3L2L1 = (L4-1)*L3L2L1;
232  cudaMemcpyToSymbol(X4X3X2X1mX3X2X1, &L4L3L2L1mL3L2L1, sizeof(int));
233 
234  int L4L3L2L1hmL3L2L1h = (L4-1)*L3*L2*L1h;
235  cudaMemcpyToSymbol(X4X3X2X1hmX3X2X1h, &L4L3L2L1hmL3L2L1h, sizeof(int));
236 
237  int L2L1m3L1 = L2L1 - 3*L1;
238  cudaMemcpyToSymbol(X2X1m3X1, &L2L1m3L1, sizeof(int));
239 
240  int L3L2L1m3L2L1 = L3L2L1 - 3*L2L1;
241  cudaMemcpyToSymbol(X3X2X1m3X2X1, &L3L2L1m3L2L1, sizeof(int));
242 
243  int L4L3L2L1m3L3L2L1 = (L4-3)*L3L2L1;
244  cudaMemcpyToSymbol(X4X3X2X1m3X3X2X1, &L4L3L2L1m3L3L2L1, sizeof(int));
245 
246  int L4L3L2L1hm3L3L2L1h = (L4-3)*L3*L2*L1h;
247  cudaMemcpyToSymbol(X4X3X2X1hm3X3X2X1h, &L4L3L2L1hm3L3L2L1h, sizeof(int));
248  int Vh_2d_max_h = MAX(L1*L2/2, L1*L3/2);
249  Vh_2d_max_h = MAX(Vh_2d_max_h, L1*L4/2);
250  Vh_2d_max_h = MAX(Vh_2d_max_h, L2*L3/2);
251  Vh_2d_max_h = MAX(Vh_2d_max_h, L2*L4/2);
252  Vh_2d_max_h = MAX(Vh_2d_max_h, L3*L4/2);
253  cudaMemcpyToSymbol(Vh_2d_max, &Vh_2d_max_h, sizeof(int));
254 
255 #ifdef MULTI_GPU
256  bool first_node_in_t = (commCoords(3) == 0);
257  bool last_node_in_t = (commCoords(3) == commDim(3)-1);
258 #else
259  bool first_node_in_t = true;
260  bool last_node_in_t = true;
261 #endif
262 
263  cudaMemcpyToSymbol(Pt0, &(first_node_in_t), sizeof(bool));
264  cudaMemcpyToSymbol(PtNm1, &(last_node_in_t), sizeof(bool));
265 
266  //constants used by fatlink/gauge force/hisq force code
267  int E1_h = L1+4;
268  int E1h_h = E1_h/2;
269  int E2_h = L2+4;
270  int E3_h = L3+4;
271  int E4_h = L4+4;
272  int E2E1_h = E2_h*E1_h;
273  int E3E2E1_h = E3_h*E2_h*E1_h;
274  int Vh_ex_h = E1_h*E2_h*E3_h*E4_h/2;
275 
276  cudaMemcpyToSymbol(E1, &E1_h, sizeof(int));
277  cudaMemcpyToSymbol(E1h, &E1h_h, sizeof(int));
278  cudaMemcpyToSymbol(E2, &E2_h, sizeof(int));
279  cudaMemcpyToSymbol(E3, &E3_h, sizeof(int));
280  cudaMemcpyToSymbol(E4, &E4_h, sizeof(int));
281  cudaMemcpyToSymbol(E2E1, &E2E1_h, sizeof(int));
282  cudaMemcpyToSymbol(E3E2E1, &E3E2E1_h, sizeof(int));
283  cudaMemcpyToSymbol(Vh_ex, &Vh_ex_h, sizeof(int));
284 
285  // copy a few of the constants needed by tuneLaunch()
286  dslashConstants.x[0] = L1;
287  dslashConstants.x[1] = L2;
288  dslashConstants.x[2] = L3;
289  dslashConstants.x[3] = L4;
290 
291  checkCudaError();
292 }
293 
294 
295 void initGaugeConstants(const cudaGaugeField &gauge)
296 {
297  int ga_stride_h = gauge.Stride();
298  cudaMemcpyToSymbol(ga_stride, &ga_stride_h, sizeof(int));
299 
300  int gf = (gauge.GaugeFixed() == QUDA_GAUGE_FIXED_YES);
301  cudaMemcpyToSymbol(gauge_fixed, &(gf), sizeof(int));
302 
303  double anisotropy_ = gauge.Anisotropy();
304  cudaMemcpyToSymbol(anisotropy, &(anisotropy_), sizeof(double));
305 
306  double t_bc = (gauge.TBoundary() == QUDA_PERIODIC_T) ? 1.0 : -1.0;
307  cudaMemcpyToSymbol(t_boundary, &(t_bc), sizeof(double));
308 
309  double coeff_h = -24.0*gauge.Tadpole()*gauge.Tadpole();
310  cudaMemcpyToSymbol(coeff, &(coeff_h), sizeof(double));
311 
312  float anisotropy_fh = gauge.Anisotropy();
313  cudaMemcpyToSymbol(anisotropy_f, &(anisotropy_fh), sizeof(float));
314 
315  float t_bc_f = (gauge.TBoundary() == QUDA_PERIODIC_T) ? 1.0 : -1.0;
316  cudaMemcpyToSymbol(t_boundary_f, &(t_bc_f), sizeof(float));
317 
318  float coeff_fh = -24.0*gauge.Tadpole()*gauge.Tadpole();
319  cudaMemcpyToSymbol(coeff_f, &(coeff_fh), sizeof(float));
320 
321  // constants used by the READ_GAUGE() macros in read_gauge.h
322  float2 An2_h = make_float2(gauge.Anisotropy(), 1.0 / (gauge.Anisotropy()*gauge.Anisotropy()));
323  cudaMemcpyToSymbol(An2, &(An2_h), sizeof(float2));
324  float2 TB2_h = make_float2(t_bc_f, 1.0 / (t_bc_f * t_bc_f));
325  cudaMemcpyToSymbol(TB2, &(TB2_h), sizeof(float2));
326  float2 No2_h = make_float2(1.0, 1.0);
327  cudaMemcpyToSymbol(No2, &(No2_h), sizeof(float2));
328 
329  checkCudaError();
330 }
331 
332 
337 void initSpinorConstants(const cudaColorSpinorField &spinor)
338 {
339  static int last_sp_stride = -1;
340  static int last_Ls = -1;
341 
342  int sp_stride_h = spinor.Stride();
343  if (sp_stride_h != last_sp_stride) {
344  cudaMemcpyToSymbol(sp_stride, &sp_stride_h, sizeof(int));
345  checkCudaError();
346  last_sp_stride = sp_stride_h;
347  }
348 
349  // for domain wall:
350  if (spinor.Ndim() == 5) {
351  int Ls_h = spinor.X(4);
352  if (Ls_h != last_Ls) {
353  cudaMemcpyToSymbol(Ls, &Ls_h, sizeof(int));
354  dslashConstants.Ls = Ls_h; // needed by tuneLaunch()
355  checkCudaError();
356  last_Ls = Ls_h;
357  }
358  }
359 }
360 
361 
363 {
364  float pi_f_h = M_PI;
365  cudaMemcpyToSymbol(pi_f, &pi_f_h, sizeof(float));
366 
367  // temporary additions (?) for checking Ron's T-packing kernel with old multi-gpu kernel
368 
369  double tProjScale_h = (kernelPackT ? 1.0 : 2.0);
370  cudaMemcpyToSymbol(tProjScale, &tProjScale_h, sizeof(double));
371 
372  float tProjScale_fh = (float)tProjScale_h;
373  cudaMemcpyToSymbol(tProjScale_f, &tProjScale_fh, sizeof(float));
374 
375  checkCudaError();
376 }
377 
378 
379 void initCloverConstants (const cudaCloverField &clover)
380 {
381  int cl_stride_h = clover.Stride();
382  cudaMemcpyToSymbol(cl_stride, &cl_stride_h, sizeof(int));
383 
384  checkCudaError();
385 }
386 
387 
388 void initStaggeredConstants(const cudaGaugeField &fatgauge, const cudaGaugeField &longgauge)
389 {
390  int fat_ga_stride_h = fatgauge.Stride();
391  int long_ga_stride_h = longgauge.Stride();
392  float fat_link_max_h = fatgauge.LinkMax();
393 
394  cudaMemcpyToSymbol(fat_ga_stride, &fat_ga_stride_h, sizeof(int));
395  cudaMemcpyToSymbol(long_ga_stride, &long_ga_stride_h, sizeof(int));
396  cudaMemcpyToSymbol(fat_ga_max, &fat_link_max_h, sizeof(float));
397 
398  checkCudaError();
399 }
400 
402 void initTwistedMassConstants(const int fl_stride_h)
403 {
404  cudaMemcpyToSymbol(fl_stride, &fl_stride_h, sizeof(int));
405 
406  checkCudaError();
407 }