QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
dslash_constants.h
Go to the documentation of this file.
1 enum KernelType {
8 };
9 
10  struct DslashParam {
11 #ifndef STAGGERED_TESLA_HACK
12  char do_not_delete; // work around for bug in CUDA 6.5
13 #endif
14  int threads; // the desired number of active threads
15  int parity; // Even-Odd or Odd-Even
16  int X[4];
17 #ifdef GPU_DOMAIN_WALL_DIRAC
18  int Ls;
19 #endif
20  KernelType kernel_type; //is it INTERIOR_KERNEL, EXTERIOR_KERNEL_X/Y/Z/T
21 #ifndef STAGGERED_TESLA_HACK
22  int commDim[4]; // Whether to do comms or not
23 #endif
24  int ghostDim[4]; // Whether a ghost zone has been allocated for a given dimension
27  int sp_stride; // spinor stride
28 #ifdef GPU_CLOVER_DIRAC
29  int cl_stride; // clover stride
30 #endif
31 #if (defined GPU_TWISTED_MASS_DIRAC) || (defined GPU_NDEG_TWISTED_MASS_DIRAC)
32  int fl_stride; // twisted-mass flavor stride
33 #endif
34 #ifdef GPU_STAGGERED_DIRAC
35  int gauge_stride;
36  int long_gauge_stride;
37  float fat_link_max;
38 #endif
39 #ifdef MULTI_GPU
40  int threadDimMapLower[4];
41  int threadDimMapUpper[4];
42 #endif
43 
44 #ifdef USE_TEXTURE_OBJECTS
45  cudaTextureObject_t inTex;
46  cudaTextureObject_t inTexNorm;
47  cudaTextureObject_t xTex;
48  cudaTextureObject_t xTexNorm;
49  cudaTextureObject_t outTex;
50  cudaTextureObject_t outTexNorm;
51  cudaTextureObject_t gauge0Tex; // also applies to fat gauge
52  cudaTextureObject_t gauge1Tex; // also applies to fat gauge
53  cudaTextureObject_t longGauge0Tex;
54  cudaTextureObject_t longGauge1Tex;
55  cudaTextureObject_t longPhase0Tex;
56  cudaTextureObject_t longPhase1Tex;
57  cudaTextureObject_t cloverTex;
58  cudaTextureObject_t cloverNormTex;
59  cudaTextureObject_t cloverInvTex;
60  cudaTextureObject_t cloverInvNormTex;
61 #endif
62 
63  void print() {
64  printfQuda("threads = %d\n", threads);
65  printfQuda("parity = %d\n", parity);
66  printfQuda("X = {%d, %d, %d, %d}\n", X[0], X[1], X[2], X[3]);
67 #ifdef GPU_DOMAIN_WALL_DIRAC
68  printfQuda("Ls = %d\n", Ls);
69 #endif
70  printfQuda("commDim = {%d, %d, %d, %d}\n", commDim[0], commDim[1], commDim[2], commDim[3]);
71  printfQuda("ghostDim = {%d, %d, %d, %d}\n", ghostDim[0], ghostDim[1], ghostDim[2], ghostDim[3]);
72  printfQuda("ghostOffset = {%d, %d, %d, %d}\n", ghostOffset[0], ghostOffset[1], ghostOffset[2], ghostOffset[3]);
73  printfQuda("ghostNormOffset = {%d, %d, %d, %d}\n", ghostNormOffset[0], ghostNormOffset[1], ghostNormOffset[2], ghostNormOffset[3]);
74  printfQuda("kernel_type = %d\n", kernel_type);
75  printfQuda("sp_stride = %d\n", sp_stride);
76 #ifdef GPU_CLOVER_DIRAC
77  printfQuda("cl_stride = %d\n", cl_stride);
78 #endif
79  }
80  };
81 
82  static DslashParam dslashParam;
83 
84 
85 
86 #ifdef MULTI_GPU
87  static double twist_a = 0.0;
88  static double twist_b = 0.0;
89 #endif
90 
91 
92 #define MAX(a,b) ((a)>(b) ? (a):(b))
93 
94 typedef struct fat_force_stride_s {
103 
104 __constant__ int X1h;
105 __constant__ int X2h;
106 __constant__ int X1;
107 __constant__ int X2;
108 __constant__ int X3;
109 __constant__ int X4;
110 
111 __constant__ int X1_3;
112 __constant__ int X2_3;
113 __constant__ int X3_3;
114 __constant__ int X4_3;
115 
116 __constant__ int X1m1;
117 __constant__ int X2m1;
118 __constant__ int X3m1;
119 __constant__ int X4m1;
120 
121 __constant__ int X1m3;
122 __constant__ int X2m3;
123 __constant__ int X3m3;
124 __constant__ int X4m3;
125 
126 __constant__ int X2X1mX1;
127 __constant__ int X3X2X1mX2X1;
128 __constant__ int X4X3X2X1mX3X2X1;
129 __constant__ int X4X3X2X1hmX3X2X1h;
130 
131 __constant__ int X2X1m3X1;
132 __constant__ int X3X2X1m3X2X1;
133 __constant__ int X4X3X2X1m3X3X2X1;
134 __constant__ int X4X3X2X1hm3X3X2X1h;
135 
136 __constant__ int X2X1;
137 __constant__ int X3X1;
138 __constant__ int X3X2;
139 __constant__ int X3X2X1;
140 __constant__ int X4X2X1;
141 __constant__ int X4X2X1h;
142 __constant__ int X4X3X1;
143 __constant__ int X4X3X1h;
144 __constant__ int X4X3X2;
145 __constant__ int X4X3X2h;
146 
147 __constant__ int Vh_2d_max;
148 
149 __constant__ int X2X1_3;
150 __constant__ int X3X2X1_3;
151 
152 __constant__ int Vh;
153 __constant__ int Vs;
154 __constant__ int Vsh;
155 //__constant__ int sp_stride;
156 __constant__ int ga_stride;
157 //__constant__ int cl_stride;
158 __constant__ int ghostFace[QUDA_MAX_DIM+1];
159 
160 __constant__ int fat_ga_stride;
161 __constant__ int long_ga_stride;
162 __constant__ float fat_ga_max;
163 
164 __constant__ int gauge_fixed;
165 
166 // domain wall constants
167 //__constant__ int Ls;
168 __constant__ double m5_d;
169 __constant__ float m5_f;
170 
171 // single precision constants
172 __constant__ float anisotropy_f;
173 __constant__ float coeff_f;
174 __constant__ float t_boundary_f;
175 __constant__ float pi_f;
176 
177 // double precision constants
178 __constant__ double anisotropy;
179 __constant__ double t_boundary;
180 __constant__ double coeff;
181 
182 __constant__ float2 An2;
183 __constant__ float2 TB2;
184 __constant__ float2 No2;
185 
186 // Are we processor 0 in time?
187 __constant__ bool Pt0;
188 
189 // Are we processor Nt-1 in time?
190 __constant__ bool PtNm1;
191 
192 // factor of 2 (or 1) for T-dimensional spin projection
193 __constant__ double tProjScale;
194 __constant__ float tProjScale_f;
195 
196 //for link fattening/gauge force/fermion force code
197 __constant__ int E1, E2, E3, E4, E1h;
198 __constant__ int Vh_ex;
199 __constant__ int E2E1;
200 __constant__ int E3E2E1;
201 
202 __constant__ fat_force_const_t fl; //fatlink
203 __constant__ fat_force_const_t gf; //gauge force
204 __constant__ fat_force_const_t hf; //hisq force
205 
206 void initLatticeConstants(const LatticeField &lat, TimeProfile &profile)
207 {
208  profile.Start(QUDA_PROFILE_CONSTANT);
209 
210  checkCudaError();
211 
212  int volumeCB = lat.VolumeCB();
213  cudaMemcpyToSymbol(Vh, &volumeCB, sizeof(int));
214 
215  int Vspatial = lat.X()[0]*lat.X()[1]*lat.X()[2]/2; // FIXME - this should not be called Vs, rather Vsh
216  cudaMemcpyToSymbol(Vs, &Vspatial, sizeof(int));
217 
218  int half_Vspatial = Vspatial;
219  cudaMemcpyToSymbol(Vsh, &half_Vspatial, sizeof(int));
220 
221  int L1 = lat.X()[0];
222  cudaMemcpyToSymbol(X1, &L1, sizeof(int));
223 
224  int L2 = lat.X()[1];
225  cudaMemcpyToSymbol(X2, &L2, sizeof(int));
226 
227  int L3 = lat.X()[2];
228  cudaMemcpyToSymbol(X3, &L3, sizeof(int));
229 
230  int L4 = lat.X()[3];
231  cudaMemcpyToSymbol(X4, &L4, sizeof(int));
232 
233  int ghostFace_h[4];
234  ghostFace_h[0] = L2*L3*L4/2;
235  ghostFace_h[1] = L1*L3*L4/2;
236  ghostFace_h[2] = L1*L2*L4/2;
237  ghostFace_h[3] = L1*L2*L3/2;
238  cudaMemcpyToSymbol(ghostFace, ghostFace_h, 4*sizeof(int));
239 
240  int L1_3 = 3*L1;
241  cudaMemcpyToSymbol(X1_3, &L1_3, sizeof(int));
242 
243  int L2_3 = 3*L2;
244  cudaMemcpyToSymbol(X2_3, &L2_3, sizeof(int));
245 
246  int L3_3 = 3*L3;
247  cudaMemcpyToSymbol(X3_3, &L3_3, sizeof(int));
248 
249  int L4_3 = 3*L4;
250  cudaMemcpyToSymbol(X4_3, &L4_3, sizeof(int));
251 
252  int L2L1 = L2*L1;
253  cudaMemcpyToSymbol(X2X1, &L2L1, sizeof(int));
254 
255  int L3L1 = L3*L1;
256  cudaMemcpyToSymbol(X3X1, &L3L1, sizeof(int));
257 
258  int L3L2 = L3*L2;
259  cudaMemcpyToSymbol(X3X2, &L3L2, sizeof(int));
260 
261  int L3L2L1 = L3*L2*L1;
262  cudaMemcpyToSymbol(X3X2X1, &L3L2L1, sizeof(int));
263 
264  int L4L2L1 = L4*L2*L1;
265  cudaMemcpyToSymbol(X4X2X1, &L4L2L1, sizeof(int));
266 
267  int L4L2L1h = L4*L2*L1/2;
268  cudaMemcpyToSymbol(X4X2X1h, &L4L2L1h, sizeof(int));
269 
270  int L4L3L1 = L4*L3*L1;
271  cudaMemcpyToSymbol(X4X3X1, &L4L3L1, sizeof(int));
272 
273  int L4L3L1h = L4*L3*L1/2;
274  cudaMemcpyToSymbol(X4X3X1h, &L4L3L1h, sizeof(int));
275 
276  int L4L3L2 = L4*L3*L2;
277  cudaMemcpyToSymbol(X4X3X2, &L4L3L2, sizeof(int));
278 
279  int L4L3L2h = L4*L3*L2/2;
280  cudaMemcpyToSymbol(X4X3X2h, &L4L3L2h, sizeof(int));
281 
282  int L2L1_3 = 3*L2*L1;
283  cudaMemcpyToSymbol(X2X1_3, &L2L1_3, sizeof(int));
284 
285  int L3L2L1_3 = 3*L3*L2*L1;
286  cudaMemcpyToSymbol(X3X2X1_3, &L3L2L1_3, sizeof(int));
287 
288  int L1h = L1/2;
289  cudaMemcpyToSymbol(X1h, &L1h, sizeof(int));
290 
291  int L2h = L2/2;
292  cudaMemcpyToSymbol(X2h, &L2h, sizeof(int));
293 
294  int L1m1 = L1 - 1;
295  cudaMemcpyToSymbol(X1m1, &L1m1, sizeof(int));
296 
297  int L2m1 = L2 - 1;
298  cudaMemcpyToSymbol(X2m1, &L2m1, sizeof(int));
299 
300  int L3m1 = L3 - 1;
301  cudaMemcpyToSymbol(X3m1, &L3m1, sizeof(int));
302 
303  int L4m1 = L4 - 1;
304  cudaMemcpyToSymbol(X4m1, &L4m1, sizeof(int));
305 
306  int L1m3 = L1 - 3;
307  cudaMemcpyToSymbol(X1m3, &L1m3, sizeof(int));
308 
309  int L2m3 = L2 - 3;
310  cudaMemcpyToSymbol(X2m3, &L2m3, sizeof(int));
311 
312  int L3m3 = L3 - 3;
313  cudaMemcpyToSymbol(X3m3, &L3m3, sizeof(int));
314 
315  int L4m3 = L4 - 3;
316  cudaMemcpyToSymbol(X4m3, &L4m3, sizeof(int));
317 
318  int L2L1mL1 = L2L1 - L1;
319  cudaMemcpyToSymbol(X2X1mX1, &L2L1mL1, sizeof(int));
320 
321  int L3L2L1mL2L1 = L3L2L1 - L2L1;
322  cudaMemcpyToSymbol(X3X2X1mX2X1, &L3L2L1mL2L1, sizeof(int));
323 
324  int L4L3L2L1mL3L2L1 = (L4-1)*L3L2L1;
325  cudaMemcpyToSymbol(X4X3X2X1mX3X2X1, &L4L3L2L1mL3L2L1, sizeof(int));
326 
327  int L4L3L2L1hmL3L2L1h = (L4-1)*L3*L2*L1h;
328  cudaMemcpyToSymbol(X4X3X2X1hmX3X2X1h, &L4L3L2L1hmL3L2L1h, sizeof(int));
329 
330  int L2L1m3L1 = L2L1 - 3*L1;
331  cudaMemcpyToSymbol(X2X1m3X1, &L2L1m3L1, sizeof(int));
332 
333  int L3L2L1m3L2L1 = L3L2L1 - 3*L2L1;
334  cudaMemcpyToSymbol(X3X2X1m3X2X1, &L3L2L1m3L2L1, sizeof(int));
335 
336  int L4L3L2L1m3L3L2L1 = (L4-3)*L3L2L1;
337  cudaMemcpyToSymbol(X4X3X2X1m3X3X2X1, &L4L3L2L1m3L3L2L1, sizeof(int));
338 
339  int L4L3L2L1hm3L3L2L1h = (L4-3)*L3*L2*L1h;
340  cudaMemcpyToSymbol(X4X3X2X1hm3X3X2X1h, &L4L3L2L1hm3L3L2L1h, sizeof(int));
341  int Vh_2d_max_h = MAX(L1*L2/2, L1*L3/2);
342  Vh_2d_max_h = MAX(Vh_2d_max_h, L1*L4/2);
343  Vh_2d_max_h = MAX(Vh_2d_max_h, L2*L3/2);
344  Vh_2d_max_h = MAX(Vh_2d_max_h, L2*L4/2);
345  Vh_2d_max_h = MAX(Vh_2d_max_h, L3*L4/2);
346  cudaMemcpyToSymbol(Vh_2d_max, &Vh_2d_max_h, sizeof(int));
347 
348 #ifdef MULTI_GPU
349  bool first_node_in_t = (commCoords(3) == 0);
350  bool last_node_in_t = (commCoords(3) == commDim(3)-1);
351 #else
352  bool first_node_in_t = true;
353  bool last_node_in_t = true;
354 #endif
355 
356  cudaMemcpyToSymbol(Pt0, &(first_node_in_t), sizeof(bool));
357  cudaMemcpyToSymbol(PtNm1, &(last_node_in_t), sizeof(bool));
358 
359  //constants used by fatlink/gauge force/hisq force code
360  int E1_h = L1+4;
361  int E1h_h = E1_h/2;
362  int E2_h = L2+4;
363  int E3_h = L3+4;
364  int E4_h = L4+4;
365  int E2E1_h = E2_h*E1_h;
366  int E3E2E1_h = E3_h*E2_h*E1_h;
367  int Vh_ex_h = E1_h*E2_h*E3_h*E4_h/2;
368 
369  cudaMemcpyToSymbol(E1, &E1_h, sizeof(int));
370  cudaMemcpyToSymbol(E1h, &E1h_h, sizeof(int));
371  cudaMemcpyToSymbol(E2, &E2_h, sizeof(int));
372  cudaMemcpyToSymbol(E3, &E3_h, sizeof(int));
373  cudaMemcpyToSymbol(E4, &E4_h, sizeof(int));
374  cudaMemcpyToSymbol(E2E1, &E2E1_h, sizeof(int));
375  cudaMemcpyToSymbol(E3E2E1, &E3E2E1_h, sizeof(int));
376  cudaMemcpyToSymbol(Vh_ex, &Vh_ex_h, sizeof(int));
377 
378  checkCudaError();
379 
380  profile.Stop(QUDA_PROFILE_CONSTANT);
381 }
382 
383 
384 void initGaugeConstants(const cudaGaugeField &gauge, TimeProfile &profile)
385 {
386  profile.Start(QUDA_PROFILE_CONSTANT);
387 
388  int ga_stride_h = gauge.Stride();
389  cudaMemcpyToSymbol(ga_stride, &ga_stride_h, sizeof(int));
390 
391  // set fat link stride and max (used by naive staggered)
392  cudaMemcpyToSymbol(fat_ga_stride, &ga_stride_h, sizeof(int));
393  float link_max_h = gauge.LinkMax();
394  cudaMemcpyToSymbol(fat_ga_max, &link_max_h, sizeof(float));
395 
396  int gf = (gauge.GaugeFixed() == QUDA_GAUGE_FIXED_YES);
397  cudaMemcpyToSymbol(gauge_fixed, &(gf), sizeof(int));
398 
399  double anisotropy_ = gauge.Anisotropy();
400  cudaMemcpyToSymbol(anisotropy, &(anisotropy_), sizeof(double));
401 
402  double t_bc = (gauge.TBoundary() == QUDA_PERIODIC_T) ? 1.0 : -1.0;
403  cudaMemcpyToSymbol(t_boundary, &(t_bc), sizeof(double));
404 
405  float anisotropy_fh = gauge.Anisotropy();
406  cudaMemcpyToSymbol(anisotropy_f, &(anisotropy_fh), sizeof(float));
407 
408  float t_bc_f = (gauge.TBoundary() == QUDA_PERIODIC_T) ? 1.0 : -1.0;
409  cudaMemcpyToSymbol(t_boundary_f, &(t_bc_f), sizeof(float));
410 
411 
412  // constants used by the READ_GAUGE() macros in read_gauge.h
413  float2 An2_h = make_float2(gauge.Anisotropy(), 1.0 / (gauge.Anisotropy()*gauge.Anisotropy()));
414  cudaMemcpyToSymbol(An2, &(An2_h), sizeof(float2));
415  float2 TB2_h = make_float2(t_bc_f, 1.0 / (t_bc_f * t_bc_f));
416  cudaMemcpyToSymbol(TB2, &(TB2_h), sizeof(float2));
417  float2 No2_h = make_float2(1.0, 1.0);
418  cudaMemcpyToSymbol(No2, &(No2_h), sizeof(float2));
419 
420  checkCudaError();
421 
422  profile.Stop(QUDA_PROFILE_CONSTANT);
423 }
424 
425 void initDslashConstants(TimeProfile &profile)
426 {
427  profile.Start(QUDA_PROFILE_CONSTANT);
428 
429  float pi_f_h = M_PI;
430  cudaMemcpyToSymbol(pi_f, &pi_f_h, sizeof(float));
431 
432  // temporary additions (?) for checking Ron's T-packing kernel with old multi-gpu kernel
433 
434  double tProjScale_h = (getKernelPackT() ? 1.0 : 2.0);
435  cudaMemcpyToSymbol(tProjScale, &tProjScale_h, sizeof(double));
436 
437  float tProjScale_fh = (float)tProjScale_h;
438  cudaMemcpyToSymbol(tProjScale_f, &tProjScale_fh, sizeof(float));
439 
440  checkCudaError();
441 
442  profile.Stop(QUDA_PROFILE_CONSTANT);
443 }
444 
445 void initStaggeredConstants(const cudaGaugeField &fatgauge, const cudaGaugeField &longgauge,
446  TimeProfile &profile)
447 {
448  profile.Start(QUDA_PROFILE_CONSTANT);
449 
450  int fat_ga_stride_h = fatgauge.Stride();
451  int long_ga_stride_h = longgauge.Stride();
452  float fat_link_max_h = fatgauge.LinkMax();
453 
454  float coeff_fh = 1.0/longgauge.Scale();
455  cudaMemcpyToSymbol(coeff_f, &(coeff_fh), sizeof(float));
456 
457  double coeff_h = 1.0/longgauge.Scale();
458  cudaMemcpyToSymbol(coeff, &(coeff_h), sizeof(double));
459 
460  cudaMemcpyToSymbol(fat_ga_stride, &fat_ga_stride_h, sizeof(int));
461  cudaMemcpyToSymbol(long_ga_stride, &long_ga_stride_h, sizeof(int));
462  cudaMemcpyToSymbol(fat_ga_max, &fat_link_max_h, sizeof(float));
463 
464  checkCudaError();
465 
466  profile.Stop(QUDA_PROFILE_CONSTANT);
467 }
468 
469 //For initializing the coefficients used in MDWF
470 __constant__ double mdwf_b5_d[QUDA_MAX_DWF_LS];
471 __constant__ double mdwf_c5_d[QUDA_MAX_DWF_LS];
472 
473 __constant__ float mdwf_b5_f[QUDA_MAX_DWF_LS];
474 __constant__ float mdwf_c5_f[QUDA_MAX_DWF_LS];
475 
476 void initMDWFConstants(const double *b_5, const double *c_5, int dim_s, const double m5h, TimeProfile &profile)
477 {
478  profile.Start(QUDA_PROFILE_CONSTANT);
479 
480  static int last_Ls = -1;
481  if (dim_s != last_Ls) {
482  float b_5_f[QUDA_MAX_DWF_LS];
483  float c_5_f[QUDA_MAX_DWF_LS];
484  for (int i=0; i<dim_s; i++) {
485  b_5_f[i] = (float)b_5[i];
486  c_5_f[i] = (float)c_5[i];
487  }
488 
489  cudaMemcpyToSymbol(mdwf_b5_d, b_5, dim_s*sizeof(double));
490  cudaMemcpyToSymbol(mdwf_c5_d, c_5, dim_s*sizeof(double));
491  cudaMemcpyToSymbol(mdwf_b5_f, b_5_f, dim_s*sizeof(float));
492  cudaMemcpyToSymbol(mdwf_c5_f, c_5_f, dim_s*sizeof(float));
493  checkCudaError();
494  last_Ls = dim_s;
495  }
496 
497  static double last_m5 = 99999;
498  if (m5h != last_m5) {
499  float m5h_f = (float)m5h;
500  cudaMemcpyToSymbol(m5_d, &m5h, sizeof(double));
501  cudaMemcpyToSymbol(m5_f, &m5h_f, sizeof(float));
502  checkCudaError();
503  last_m5 = m5h;
504  }
505 
506  profile.Stop(QUDA_PROFILE_CONSTANT);
507 }
508 
509 void setTwistParam(double &a, double &b, const double &kappa, const double &mu,
510  const int dagger, const QudaTwistGamma5Type twist) {
511  if (twist == QUDA_TWIST_GAMMA5_DIRECT) {
512  a = 2.0 * kappa * mu;
513  b = 1.0;
514  } else if (twist == QUDA_TWIST_GAMMA5_INVERSE) {
515  a = -2.0 * kappa * mu;
516  b = 1.0 / (1.0 + a*a);
517  } else {
518  errorQuda("Twist type %d not defined\n", twist);
519  }
520  if (dagger) a *= -1.0;
521 
522 }
int commDim(int)
__constant__ int X4X3X2X1hm3X3X2X1h
__constant__ float coeff_f
__constant__ int X4m3
void initStaggeredConstants(const cudaGaugeField &fatgauge, const cudaGaugeField &longgauge, TimeProfile &profile)
__constant__ int Vh
__constant__ float t_boundary_f
__constant__ int X3X2X1m3X2X1
__constant__ int X1h
__constant__ float m5_f
__constant__ int X4X3X1
__constant__ int X2
__constant__ int X4X3X2h
__constant__ fat_force_const_t hf
struct fat_force_stride_s fat_force_const_t
__constant__ int X2X1mX1
bool getKernelPackT()
Definition: dslash_quda.cu:84
__constant__ int Vh_ex
void initMDWFConstants(const double *b_5, const double *c_5, int dim_s, const double m5h, TimeProfile &profile)
__constant__ fat_force_const_t gf
__constant__ int X3m3
__constant__ int Vsh
#define errorQuda(...)
Definition: util_quda.h:73
__constant__ int X3X2X1mX2X1
__constant__ float fat_ga_max
__constant__ int X1
__global__ void const RealA *const const RealA *const const RealA *const const RealB *const const RealB *const int int mu
#define MAX(a, b)
__constant__ int X4X3X2X1m3X3X2X1
__constant__ int X3X2X1
__constant__ int X3X2
__constant__ int E3E2E1
__constant__ int X4X3X1h
__constant__ bool Pt0
int ghostOffset[QUDA_MAX_DIM]
KernelType
__constant__ int E4
__constant__ bool PtNm1
__constant__ int E2E1
QudaDagType dagger
Definition: test_util.cpp:1558
__constant__ float mdwf_b5_f[QUDA_MAX_DWF_LS]
int Ls
Definition: test_util.cpp:40
int ghostNormOffset[QUDA_MAX_DIM]
__constant__ int Vs
__constant__ int fat_ga_stride
__constant__ int ghostFace[QUDA_MAX_DIM+1]
__constant__ int long_ga_stride
__constant__ int X2h
__constant__ int X4_3
__constant__ int X2X1_3
__constant__ int X3_3
__constant__ int X3X2X1_3
VOLATILE spinorFloat kappa
KernelType kernel_type
__constant__ int X2X1m3X1
__constant__ int X4X3X2
__constant__ double anisotropy
__constant__ double m5_d
__constant__ double coeff
__constant__ float anisotropy_f
void setTwistParam(double &a, double &b, const double &kappa, const double &mu, const int dagger, const QudaTwistGamma5Type twist)
__constant__ float pi_f
__constant__ int X2m1
void initGaugeConstants(const cudaGaugeField &gauge, TimeProfile &profile)
__constant__ int gauge_fixed
int commCoords(int)
__constant__ float2 An2
__constant__ int X4X3X2X1mX3X2X1
__constant__ double mdwf_b5_d[QUDA_MAX_DWF_LS]
__constant__ int X2_3
__constant__ float2 TB2
__constant__ int X1m3
__constant__ int ga_stride
__constant__ fat_force_const_t fl
__constant__ int Vh_2d_max
__constant__ int X1m1
__constant__ int X3
#define QUDA_MAX_DWF_LS
Maximum length of the Ls dimension for domain-wall fermions.
#define printfQuda(...)
Definition: util_quda.h:67
__constant__ float tProjScale_f
__constant__ int X4X2X1h
__constant__ double tProjScale
__constant__ double t_boundary
__constant__ int X4m1
enum QudaTwistGamma5Type_s QudaTwistGamma5Type
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
__constant__ int E1
__constant__ int X1_3
#define checkCudaError()
Definition: util_quda.h:110
__constant__ float mdwf_c5_f[QUDA_MAX_DWF_LS]
__constant__ int X3X1
void initLatticeConstants(const LatticeField &lat, TimeProfile &profile)
__constant__ int X4X3X2X1hmX3X2X1h
__constant__ int X4X2X1
__constant__ int E1h
void * gauge[4]
Definition: su3_test.cpp:15
float fat_link_max
__constant__ double mdwf_c5_d[QUDA_MAX_DWF_LS]
void initDslashConstants(TimeProfile &profile)
__constant__ int E3
__constant__ int E2
__constant__ int X4
__constant__ int X3m1
__constant__ float2 No2
__constant__ int X2X1
__constant__ int X2m3