QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
dslash_constants.h
Go to the documentation of this file.
1 #include <unistd.h>
2 #include <fast_intdiv.h>
3 #include <convert.h>
4 
5  struct DslashParam {
6  int threads; // the desired number of active threads
7  int parity; // Even-Odd or Odd-Even
8 
9  int_fastdiv block[4]; // dslash tile block parameter
10  int_fastdiv grid[4]; // dslash tile grid parameter
11  int_fastdiv swizzle; // block index swizzle factor
12 
13  DslashConstant dc;
14 
15  KernelType kernel_type; //is it INTERIOR_KERNEL, EXTERIOR_KERNEL_X/Y/Z/T
16 
17  int commDim[QUDA_MAX_DIM]; // Whether to do comms or not
18  int ghostDim[QUDA_MAX_DIM]; // Whether a ghost zone has been allocated for a given dimension
21  int sp_stride; // spinor stride
22 
23 #ifdef GPU_CLOVER_DIRAC
24  int cl_stride; // clover stride
25 #endif
26 #if (defined GPU_TWISTED_MASS_DIRAC) || (defined GPU_NDEG_TWISTED_MASS_DIRAC)
27  int fl_stride; // twisted-mass flavor stride
28 #endif
30 #ifdef GPU_STAGGERED_DIRAC
31  int long_gauge_stride;
32  float fat_link_max;
33 #endif
34 
35  bool spin_project; // If using covDev, turn off spin projection.
36 
37  int gauge_fixed; // whether the gauge field is fixed to axial gauge
38 
39  double t_boundary;
40  float t_boundary_f;
41 
42  bool Pt0;
43  bool PtNm1;
44 
45  double anisotropy;
46  float anisotropy_f;
47 
48  float2 An2;
49  float2 TB2;
50  float2 No2;
51 
54 
55  double coeff; // used as a gauge field scaling factor by the staggered kernels
56  float coeff_f;
57 
58  double a;
59  float a_f;
60 
61  double b;
62  float b_f;
63 
64  double c;
65  float c_f;
66 
67  double d;
68  float d_f;
69 
70  double a_inv;
71  float a_inv_f;
72 
73  double rho;
74  float rho_f;
75 
76  double mferm;
77  float mferm_f;
78 
79  // domain wall constants
80  double m5_d;
81  float m5_f;
82 
83  // the coefficients used in MDWF
86 
89 
90  double tProjScale;
91  float tProjScale_f;
92 
93  void *out;
94  float *outNorm;
95 
96  void *in;
97  float *inNorm;
98 
99  void *ghost[2*QUDA_MAX_DIM];
101 
102  void *x;
103  float *xNorm;
104 
105  void *gauge0;
106  void *gauge1;
107 
108  void *longGauge0;
109  void *longGauge1;
110 
111  void *longPhase0;
112  void *longPhase1;
113 
114  void *clover;
115  float *cloverNorm;
116 
117  void *cloverInv;
119 
120  double twist_a;
121  double twist_b;
122  double twist_c;
123 
124  int Vsh; // used by contraction kernels
125 
126 #ifdef USE_TEXTURE_OBJECTS
127  cudaTextureObject_t inTex;
128  cudaTextureObject_t inTexNorm;
129  cudaTextureObject_t ghostTex[2*QUDA_MAX_DIM];
130  cudaTextureObject_t ghostTexNorm[2*QUDA_MAX_DIM];
131  cudaTextureObject_t xTex;
132  cudaTextureObject_t xTexNorm;
133  cudaTextureObject_t outTex;
134  cudaTextureObject_t outTexNorm;
135  cudaTextureObject_t gauge0Tex; // also applies to fat gauge
136  cudaTextureObject_t gauge1Tex; // also applies to fat gauge
137  cudaTextureObject_t longGauge0Tex;
138  cudaTextureObject_t longGauge1Tex;
139  cudaTextureObject_t longPhase0Tex;
140  cudaTextureObject_t longPhase1Tex;
141  cudaTextureObject_t cloverTex;
142  cudaTextureObject_t cloverNormTex;
143  cudaTextureObject_t cloverInvTex;
144  cudaTextureObject_t cloverInvNormTex;
145 #endif
146 
147  // used by the autotuner to switch on/off remote writing vs using copy engines
149 
150  void print() {
151  printfQuda("threads = %d\n", threads);
152  printfQuda("parity = %d\n", parity);
153  printfQuda("X = {%d, %d, %d, %d}\n", (int)dc.X[0], (int)dc.X[1], (int)dc.X[2], (int)dc.X[3]);
154  printfQuda("Xh = {%d, %d, %d, %d}\n", (int)dc.Xh[0], (int)dc.Xh[1], (int)dc.Xh[2], (int)dc.Xh[3]);
155  printfQuda("volume4CB = %d\n", (int)dc.volume_4d_cb);
156  printfQuda("Ls = %d\n", dc.Ls);
157  printfQuda("kernel_type = %d\n", kernel_type);
158  printfQuda("commDim = {%d, %d, %d, %d}\n", commDim[0], commDim[1], commDim[2], commDim[3]);
159  printfQuda("ghostDim = {%d, %d, %d, %d}\n", ghostDim[0], ghostDim[1], ghostDim[2], ghostDim[3]);
160  printfQuda("ghostOffset = {{%d, %d}, {%d, %d}, {%d, %d}, {%d, %d}}\n", ghostOffset[0][0], ghostOffset[0][1],
161  ghostOffset[1][0], ghostOffset[1][1],
162  ghostOffset[2][0], ghostOffset[2][1],
163  ghostOffset[3][0], ghostOffset[3][1]);
164  printfQuda("ghostNormOffset = {{%d, %d}, {%d, %d}, {%d, %d}, {%d, %d}}\n", ghostNormOffset[0][0], ghostNormOffset[0][1],
165  ghostNormOffset[1][0], ghostNormOffset[1][1],
166  ghostNormOffset[2][0], ghostNormOffset[2][1],
167  ghostNormOffset[3][0], ghostNormOffset[3][1]);
168  printfQuda("sp_stride = %d\n", sp_stride);
169 #ifdef GPU_CLOVER_DIRAC
170  printfQuda("cl_stride = %d\n", cl_stride);
171 #endif
172 #if (defined GPU_TWISTED_MASS_DIRAC) || (defined GPU_NDEG_TWISTED_MASS_DIRAC)
173  printfQuda("fl_stride = %d\n", fl_stride);
174 #endif
175 #ifdef GPU_STAGGERED_DIRAC
176  printfQuda("gauge_stride = %d\n", gauge_stride);
177  printfQuda("long_gauge_stride = %d\n", long_gauge_stride);
178  printfQuda("fat_link_max = %e\n", fat_link_max);
179 #endif
180  printfQuda("spin_project = %s\n", spin_project ? "true" : "false");
181  printfQuda("threadDimMapLower = {%d, %d, %d, %d}\n", threadDimMapLower[0], threadDimMapLower[1],
182  threadDimMapLower[2], threadDimMapLower[3]);
183  printfQuda("threadDimMapUpper = {%d, %d, %d, %d}\n", threadDimMapUpper[0], threadDimMapUpper[1],
184  threadDimMapUpper[2], threadDimMapUpper[3]);
185  printfQuda("a = %e\n", a);
186  printfQuda("b = %e\n", b);
187  printfQuda("c = %e\n", c);
188  printfQuda("d = %e\n", d);
189  printfQuda("a_inv = %e\n", a_inv);
190  printfQuda("rho = %e\n", rho);
191  printfQuda("mferm = %e\n", mferm);
192  printfQuda("tProjScale = %e\n", tProjScale);
193  printfQuda("twist_a = %e\n", twist_a);
194  printfQuda("twist_b = %e\n", twist_b);
195  printfQuda("twist_c = %e\n", twist_c);
196  }
197  };
int commDim[QUDA_MAX_DIM]
int threadDimMapLower[4]
int_fastdiv grid[4]
float * ghostNorm[2 *QUDA_MAX_DIM]
int ghostOffset[QUDA_MAX_DIM+1][2]
double mdwf_b5_d[QUDA_MAX_DWF_LS]
double mdwf_c5_d[QUDA_MAX_DWF_LS]
void * ghost[2 *QUDA_MAX_DIM]
float mdwf_c5_f[QUDA_MAX_DWF_LS]
int_fastdiv block[4]
DslashConstant dc
KernelType kernel_type
float mdwf_b5_f[QUDA_MAX_DWF_LS]
int threadDimMapUpper[4]
#define QUDA_MAX_DWF_LS
Maximum length of the Ls dimension for domain-wall fermions.
float * cloverNorm
#define printfQuda(...)
Definition: util_quda.h:115
int_fastdiv swizzle
int ghostDim[QUDA_MAX_DIM]
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
float * cloverInvNorm
int ghostNormOffset[QUDA_MAX_DIM+1][2]
float fat_link_max