1 #define MAX(a,b) ((a)>(b) ? (a):(b))
88 __constant__ float2
An2;
89 __constant__ float2
TB2;
90 __constant__ float2
No2;
93 __constant__
bool Pt0;
119 int volumeCB = lat.VolumeCB();
120 cudaMemcpyToSymbol(
Vh, &volumeCB,
sizeof(
int));
122 Vspatial = lat.X()[0]*lat.X()[1]*lat.X()[2]/2;
123 cudaMemcpyToSymbol(
Vs, &
Vspatial,
sizeof(
int));
126 cudaMemcpyToSymbol(
Vsh, &half_Vspatial,
sizeof(
int));
129 cudaMemcpyToSymbol(
X1, &L1,
sizeof(
int));
132 cudaMemcpyToSymbol(
X2, &L2,
sizeof(
int));
135 cudaMemcpyToSymbol(
X3, &L3,
sizeof(
int));
138 cudaMemcpyToSymbol(
X4, &L4,
sizeof(
int));
141 ghostFace_h[0] = L2*L3*L4/2;
142 ghostFace_h[1] = L1*L3*L4/2;
143 ghostFace_h[2] = L1*L2*L4/2;
144 ghostFace_h[3] = L1*L2*L3/2;
145 cudaMemcpyToSymbol(
ghostFace, ghostFace_h, 4*
sizeof(
int));
148 cudaMemcpyToSymbol(
X1_3, &L1_3,
sizeof(
int));
151 cudaMemcpyToSymbol(
X2_3, &L2_3,
sizeof(
int));
154 cudaMemcpyToSymbol(
X3_3, &L3_3,
sizeof(
int));
157 cudaMemcpyToSymbol(
X4_3, &L4_3,
sizeof(
int));
160 cudaMemcpyToSymbol(
X2X1, &L2L1,
sizeof(
int));
163 cudaMemcpyToSymbol(
X3X1, &L3L1,
sizeof(
int));
166 cudaMemcpyToSymbol(
X3X2, &L3L2,
sizeof(
int));
168 int L3L2L1 = L3*L2*L1;
169 cudaMemcpyToSymbol(
X3X2X1, &L3L2L1,
sizeof(
int));
171 int L4L2L1 = L4*L2*L1;
172 cudaMemcpyToSymbol(
X4X2X1, &L4L2L1,
sizeof(
int));
174 int L4L2L1h = L4*L2*L1/2;
175 cudaMemcpyToSymbol(
X4X2X1h, &L4L2L1h,
sizeof(
int));
177 int L4L3L1 = L4*L3*L1;
178 cudaMemcpyToSymbol(
X4X3X1, &L4L3L1,
sizeof(
int));
180 int L4L3L1h = L4*L3*L1/2;
181 cudaMemcpyToSymbol(
X4X3X1h, &L4L3L1h,
sizeof(
int));
183 int L4L3L2 = L4*L3*L2;
184 cudaMemcpyToSymbol(
X4X3X2, &L4L3L2,
sizeof(
int));
186 int L4L3L2h = L4*L3*L2/2;
187 cudaMemcpyToSymbol(
X4X3X2h, &L4L3L2h,
sizeof(
int));
189 int L2L1_3 = 3*L2*L1;
190 cudaMemcpyToSymbol(
X2X1_3, &L2L1_3,
sizeof(
int));
192 int L3L2L1_3 = 3*L3*L2*L1;
193 cudaMemcpyToSymbol(
X3X2X1_3, &L3L2L1_3,
sizeof(
int));
196 cudaMemcpyToSymbol(
X1h, &L1h,
sizeof(
int));
199 cudaMemcpyToSymbol(
X2h, &L2h,
sizeof(
int));
202 cudaMemcpyToSymbol(
X1m1, &L1m1,
sizeof(
int));
205 cudaMemcpyToSymbol(
X2m1, &L2m1,
sizeof(
int));
208 cudaMemcpyToSymbol(
X3m1, &L3m1,
sizeof(
int));
211 cudaMemcpyToSymbol(
X4m1, &L4m1,
sizeof(
int));
214 cudaMemcpyToSymbol(
X1m3, &L1m3,
sizeof(
int));
217 cudaMemcpyToSymbol(
X2m3, &L2m3,
sizeof(
int));
220 cudaMemcpyToSymbol(
X3m3, &L3m3,
sizeof(
int));
223 cudaMemcpyToSymbol(
X4m3, &L4m3,
sizeof(
int));
225 int L2L1mL1 = L2L1 - L1;
226 cudaMemcpyToSymbol(
X2X1mX1, &L2L1mL1,
sizeof(
int));
228 int L3L2L1mL2L1 = L3L2L1 - L2L1;
229 cudaMemcpyToSymbol(
X3X2X1mX2X1, &L3L2L1mL2L1,
sizeof(
int));
231 int L4L3L2L1mL3L2L1 = (L4-1)*L3L2L1;
234 int L4L3L2L1hmL3L2L1h = (L4-1)*L3*L2*L1h;
237 int L2L1m3L1 = L2L1 - 3*L1;
238 cudaMemcpyToSymbol(
X2X1m3X1, &L2L1m3L1,
sizeof(
int));
240 int L3L2L1m3L2L1 = L3L2L1 - 3*L2L1;
241 cudaMemcpyToSymbol(
X3X2X1m3X2X1, &L3L2L1m3L2L1,
sizeof(
int));
243 int L4L3L2L1m3L3L2L1 = (L4-3)*L3L2L1;
246 int L4L3L2L1hm3L3L2L1h = (L4-3)*L3*L2*L1h;
248 int Vh_2d_max_h =
MAX(L1*L2/2, L1*L3/2);
249 Vh_2d_max_h =
MAX(Vh_2d_max_h, L1*L4/2);
250 Vh_2d_max_h =
MAX(Vh_2d_max_h, L2*L3/2);
251 Vh_2d_max_h =
MAX(Vh_2d_max_h, L2*L4/2);
252 Vh_2d_max_h =
MAX(Vh_2d_max_h, L3*L4/2);
253 cudaMemcpyToSymbol(
Vh_2d_max, &Vh_2d_max_h,
sizeof(
int));
259 bool first_node_in_t =
true;
260 bool last_node_in_t =
true;
263 cudaMemcpyToSymbol(
Pt0, &(first_node_in_t),
sizeof(
bool));
264 cudaMemcpyToSymbol(
PtNm1, &(last_node_in_t),
sizeof(
bool));
272 int E2E1_h = E2_h*E1_h;
273 int E3E2E1_h = E3_h*E2_h*E1_h;
274 int Vh_ex_h = E1_h*E2_h*E3_h*E4_h/2;
276 cudaMemcpyToSymbol(
E1, &E1_h,
sizeof(
int));
277 cudaMemcpyToSymbol(
E1h, &E1h_h,
sizeof(
int));
278 cudaMemcpyToSymbol(
E2, &E2_h,
sizeof(
int));
279 cudaMemcpyToSymbol(
E3, &E3_h,
sizeof(
int));
280 cudaMemcpyToSymbol(
E4, &E4_h,
sizeof(
int));
281 cudaMemcpyToSymbol(
E2E1, &E2E1_h,
sizeof(
int));
282 cudaMemcpyToSymbol(
E3E2E1, &E3E2E1_h,
sizeof(
int));
283 cudaMemcpyToSymbol(
Vh_ex, &Vh_ex_h,
sizeof(
int));
286 dslashConstants.x[0] = L1;
287 dslashConstants.x[1] = L2;
288 dslashConstants.x[2] = L3;
289 dslashConstants.x[3] = L4;
297 int ga_stride_h = gauge.Stride();
298 cudaMemcpyToSymbol(
ga_stride, &ga_stride_h,
sizeof(
int));
301 cudaMemcpyToSymbol(
gauge_fixed, &(gf),
sizeof(
int));
303 double anisotropy_ = gauge.Anisotropy();
304 cudaMemcpyToSymbol(
anisotropy, &(anisotropy_),
sizeof(
double));
307 cudaMemcpyToSymbol(
t_boundary, &(t_bc),
sizeof(
double));
309 double coeff_h = -24.0*gauge.Tadpole()*gauge.Tadpole();
310 cudaMemcpyToSymbol(
coeff, &(coeff_h),
sizeof(
double));
312 float anisotropy_fh = gauge.Anisotropy();
313 cudaMemcpyToSymbol(
anisotropy_f, &(anisotropy_fh),
sizeof(
float));
316 cudaMemcpyToSymbol(
t_boundary_f, &(t_bc_f),
sizeof(
float));
318 float coeff_fh = -24.0*gauge.Tadpole()*gauge.Tadpole();
319 cudaMemcpyToSymbol(
coeff_f, &(coeff_fh),
sizeof(
float));
322 float2 An2_h = make_float2(gauge.Anisotropy(), 1.0 / (gauge.Anisotropy()*gauge.Anisotropy()));
323 cudaMemcpyToSymbol(
An2, &(An2_h),
sizeof(float2));
324 float2 TB2_h = make_float2(t_bc_f, 1.0 / (t_bc_f * t_bc_f));
325 cudaMemcpyToSymbol(
TB2, &(TB2_h),
sizeof(float2));
326 float2 No2_h = make_float2(1.0, 1.0);
327 cudaMemcpyToSymbol(
No2, &(No2_h),
sizeof(float2));
339 static int last_sp_stride = -1;
340 static int last_Ls = -1;
342 int sp_stride_h = spinor.Stride();
343 if (sp_stride_h != last_sp_stride) {
344 cudaMemcpyToSymbol(
sp_stride, &sp_stride_h,
sizeof(
int));
346 last_sp_stride = sp_stride_h;
350 if (spinor.Ndim() == 5) {
351 int Ls_h = spinor.X(4);
352 if (Ls_h != last_Ls) {
353 cudaMemcpyToSymbol(
Ls, &Ls_h,
sizeof(
int));
354 dslashConstants.Ls = Ls_h;
365 cudaMemcpyToSymbol(
pi_f, &pi_f_h,
sizeof(
float));
369 double tProjScale_h = (kernelPackT ? 1.0 : 2.0);
370 cudaMemcpyToSymbol(
tProjScale, &tProjScale_h,
sizeof(
double));
372 float tProjScale_fh = (float)tProjScale_h;
373 cudaMemcpyToSymbol(
tProjScale_f, &tProjScale_fh,
sizeof(
float));
381 int cl_stride_h = clover.Stride();
382 cudaMemcpyToSymbol(
cl_stride, &cl_stride_h,
sizeof(
int));
390 int fat_ga_stride_h = fatgauge.Stride();
391 int long_ga_stride_h = longgauge.Stride();
392 float fat_link_max_h = fatgauge.LinkMax();
394 cudaMemcpyToSymbol(
fat_ga_stride, &fat_ga_stride_h,
sizeof(
int));
395 cudaMemcpyToSymbol(
long_ga_stride, &long_ga_stride_h,
sizeof(
int));
396 cudaMemcpyToSymbol(
fat_ga_max, &fat_link_max_h,
sizeof(
float));
404 cudaMemcpyToSymbol(
fl_stride, &fl_stride_h,
sizeof(
int));