QUDA  0.9.0
dslash_constants.h
Go to the documentation of this file.
1 #include <unistd.h>
2 #include <fast_intdiv.h>
3 #include <convert.h>
4 
5 enum KernelType {
13 };
14 
15  struct DslashParam {
16  int threads; // the desired number of active threads
17  int parity; // Even-Odd or Odd-Even
18 
19  int_fastdiv block[4]; // dslash tile block parameter
20  int_fastdiv grid[4]; // dslash tile grid parameter
21  int_fastdiv swizzle; // block index swizzle factor
22 
23  DslashConstant dc;
24 
25  KernelType kernel_type; //is it INTERIOR_KERNEL, EXTERIOR_KERNEL_X/Y/Z/T
26 
27  int commDim[QUDA_MAX_DIM]; // Whether to do comms or not
28  int ghostDim[QUDA_MAX_DIM]; // Whether a ghost zone has been allocated for a given dimension
31  int sp_stride; // spinor stride
32 
33 #ifdef GPU_CLOVER_DIRAC
34  int cl_stride; // clover stride
35 #endif
36 #if (defined GPU_TWISTED_MASS_DIRAC) || (defined GPU_NDEG_TWISTED_MASS_DIRAC)
37  int fl_stride; // twisted-mass flavor stride
38 #endif
40 #ifdef GPU_STAGGERED_DIRAC
41  int long_gauge_stride;
42  float fat_link_max;
43 #endif
44 
45  int gauge_fixed; // whether the gauge field is fixed to axial gauge
46 
47  double t_boundary;
48  float t_boundary_f;
49 
50  bool Pt0;
51  bool PtNm1;
52 
53  double anisotropy;
54  float anisotropy_f;
55 
56  float2 An2;
57  float2 TB2;
58  float2 No2;
59 
62 
63  double coeff; // used as a gauge field scaling factor by the staggered kernels
64  float coeff_f;
65 
66  double a;
67  float a_f;
68 
69  double b;
70  float b_f;
71 
72  double c;
73  float c_f;
74 
75  double d;
76  float d_f;
77 
78  double a_inv;
79  float a_inv_f;
80 
81  double rho;
82  float rho_f;
83 
84  double mferm;
85  float mferm_f;
86 
87  // domain wall constants
88  double m5_d;
89  float m5_f;
90 
91  // the coefficients used in MDWF
94 
97 
98  double tProjScale;
99  float tProjScale_f;
100 
101  void *out;
102  float *outNorm;
103 
104  void *in;
105  float *inNorm;
106 
107  void *ghost[2*QUDA_MAX_DIM];
109 
110  void *x;
111  float *xNorm;
112 
113  void *gauge0;
114  void *gauge1;
115 
116  void *longGauge0;
117  void *longGauge1;
118 
119  void *longPhase0;
120  void *longPhase1;
121 
122  void *clover;
123  float *cloverNorm;
124 
125  void *cloverInv;
127 
128  double twist_a;
129  double twist_b;
130 
131  int Vsh; // used by contraction kernels
132 
133 #ifdef USE_TEXTURE_OBJECTS
134  cudaTextureObject_t inTex;
135  cudaTextureObject_t inTexNorm;
136  cudaTextureObject_t ghostTex[2*QUDA_MAX_DIM];
137  cudaTextureObject_t ghostTexNorm[2*QUDA_MAX_DIM];
138  cudaTextureObject_t xTex;
139  cudaTextureObject_t xTexNorm;
140  cudaTextureObject_t outTex;
141  cudaTextureObject_t outTexNorm;
142  cudaTextureObject_t gauge0Tex; // also applies to fat gauge
143  cudaTextureObject_t gauge1Tex; // also applies to fat gauge
144  cudaTextureObject_t longGauge0Tex;
145  cudaTextureObject_t longGauge1Tex;
146  cudaTextureObject_t longPhase0Tex;
147  cudaTextureObject_t longPhase1Tex;
148  cudaTextureObject_t cloverTex;
149  cudaTextureObject_t cloverNormTex;
150  cudaTextureObject_t cloverInvTex;
151  cudaTextureObject_t cloverInvNormTex;
152 #endif
153 
154  // used by the autotuner to switch on/off remote writing vs using copy engines
156 
157  void print() {
158  printfQuda("threads = %d\n", threads);
159  printfQuda("parity = %d\n", parity);
160  printfQuda("X = {%d, %d, %d, %d}\n", (int)dc.X[0], (int)dc.X[1], (int)dc.X[2], (int)dc.X[3]);
161  printfQuda("Xh = {%d, %d, %d, %d}\n", (int)dc.Xh[0], (int)dc.Xh[1], (int)dc.Xh[2], (int)dc.Xh[3]);
162  printfQuda("volume4CB = %d\n", (int)dc.volume_4d_cb);
163  printfQuda("Ls = %d\n", dc.Ls);
164  printfQuda("kernel_type = %d\n", kernel_type);
165  printfQuda("commDim = {%d, %d, %d, %d}\n", commDim[0], commDim[1], commDim[2], commDim[3]);
166  printfQuda("ghostDim = {%d, %d, %d, %d}\n", ghostDim[0], ghostDim[1], ghostDim[2], ghostDim[3]);
167  printfQuda("ghostOffset = {{%d, %d}, {%d, %d}, {%d, %d}, {%d, %d}}\n", ghostOffset[0][0], ghostOffset[0][1],
168  ghostOffset[1][0], ghostOffset[1][1],
169  ghostOffset[2][0], ghostOffset[2][1],
170  ghostOffset[3][0], ghostOffset[3][1]);
171  printfQuda("ghostNormOffset = {{%d, %d}, {%d, %d}, {%d, %d}, {%d, %d}}\n", ghostNormOffset[0][0], ghostNormOffset[0][1],
172  ghostNormOffset[1][0], ghostNormOffset[1][1],
173  ghostNormOffset[2][0], ghostNormOffset[2][1],
174  ghostNormOffset[3][0], ghostNormOffset[3][1]);
175  printfQuda("sp_stride = %d\n", sp_stride);
176 #ifdef GPU_CLOVER_DIRAC
177  printfQuda("cl_stride = %d\n", cl_stride);
178 #endif
179 #if (defined GPU_TWISTED_MASS_DIRAC) || (defined GPU_NDEG_TWISTED_MASS_DIRAC)
180  printfQuda("fl_stride = %d\n", fl_stride);
181 #endif
182 #ifdef GPU_STAGGERED_DIRAC
183  printfQuda("gauge_stride = %d\n", gauge_stride);
184  printfQuda("long_gauge_stride = %d\n", long_gauge_stride);
185  printfQuda("fat_link_max = %e\n", fat_link_max);
186 #endif
187  printfQuda("threadDimMapLower = {%d, %d, %d, %d}\n", threadDimMapLower[0], threadDimMapLower[1],
189  printfQuda("threadDimMapUpper = {%d, %d, %d, %d}\n", threadDimMapUpper[0], threadDimMapUpper[1],
191  printfQuda("a = %e\n", a);
192  printfQuda("b = %e\n", b);
193  printfQuda("c = %e\n", c);
194  printfQuda("d = %e\n", d);
195  printfQuda("a_inv = %e\n", a_inv);
196  printfQuda("rho = %e\n", rho);
197  printfQuda("mferm = %e\n", mferm);
198  printfQuda("tProjScale = %e\n", tProjScale);
199  printfQuda("twist_a = %e\n", twist_a);
200  printfQuda("twist_b = %e\n", twist_b);
201  }
202  };
int commDim[QUDA_MAX_DIM]
int threadDimMapLower[4]
int_fastdiv grid[4]
float * ghostNorm[2 *QUDA_MAX_DIM]
int ghostOffset[QUDA_MAX_DIM+1][2]
KernelType
double mdwf_b5_d[QUDA_MAX_DWF_LS]
double mdwf_c5_d[QUDA_MAX_DWF_LS]
void * ghost[2 *QUDA_MAX_DIM]
float mdwf_c5_f[QUDA_MAX_DWF_LS]
int_fastdiv block[4]
DslashConstant dc
KernelType kernel_type
float mdwf_b5_f[QUDA_MAX_DWF_LS]
int threadDimMapUpper[4]
#define QUDA_MAX_DWF_LS
Maximum length of the Ls dimension for domain-wall fermions.
float * cloverNorm
#define printfQuda(...)
Definition: util_quda.h:84
int_fastdiv swizzle
int ghostDim[QUDA_MAX_DIM]
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
float * cloverInvNorm
int ghostNormOffset[QUDA_MAX_DIM+1][2]
float fat_link_max