QUDA  v0.5.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
dslash_textures.h
Go to the documentation of this file.
1 #include <typeinfo>
2 
3 // Use this macro for texture fetching for supporting either texture objects of texture references
4 #ifdef USE_TEXTURE_OBJECTS
5 #define TEX1DFETCH(type, tex, idx) tex1Dfetch<type>((tex), idx)
6 #else
7 #define TEX1DFETCH(type, tex, idx) tex1Dfetch((tex), idx)
8 #endif
9 
10 #if (__COMPUTE_CAPABILITY__ >= 130)
11 template <typename Tex>
12 static __inline__ __device__ double2 fetch_double2(Tex t, int i)
13 {
14  int4 v = TEX1DFETCH(int4, t, i);
15  return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
16 }
17 
18 static __inline__ __device__ double2 fetch_double2_old(texture<int4, 1> t, int i)
19 {
20  int4 v = tex1Dfetch(t,i);
21  return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
22 }
23 #endif //__COMPUTE_CAPABILITY__ >= 130
24 
25 #ifndef USE_TEXTURE_OBJECTS
26 // Double precision gauge field
27 texture<int4, 1> gauge0TexDouble2;
28 texture<int4, 1> gauge1TexDouble2;
29 
30 // Single precision gauge field
31 texture<float4, 1, cudaReadModeElementType> gauge0TexSingle4;
32 texture<float4, 1, cudaReadModeElementType> gauge1TexSingle4;
33 texture<float2, 1, cudaReadModeElementType> gauge0TexSingle2;
34 texture<float2, 1, cudaReadModeElementType> gauge1TexSingle2;
35 
36 // Half precision gauge field
37 texture<short4, 1, cudaReadModeNormalizedFloat> gauge0TexHalf4;
38 texture<short4, 1, cudaReadModeNormalizedFloat> gauge1TexHalf4;
39 texture<short2, 1, cudaReadModeNormalizedFloat> gauge0TexHalf2;
40 texture<short2, 1, cudaReadModeNormalizedFloat> gauge1TexHalf2;
41 
42 texture<int4, 1> longGauge0TexDouble;
43 texture<int4, 1> longGauge1TexDouble;
44 texture<float4, 1, cudaReadModeElementType> longGauge0TexSingle;
45 texture<float4, 1, cudaReadModeElementType> longGauge1TexSingle;
46 texture<float2, 1, cudaReadModeElementType> longGauge0TexSingle_norecon;
47 texture<float2, 1, cudaReadModeElementType> longGauge1TexSingle_norecon;
48 
49 texture<short4, 1, cudaReadModeNormalizedFloat> longGauge0TexHalf;
50 texture<short4, 1, cudaReadModeNormalizedFloat> longGauge1TexHalf;
51 texture<short2, 1, cudaReadModeNormalizedFloat> longGauge0TexHalf_norecon;
52 texture<short2, 1, cudaReadModeNormalizedFloat> longGauge1TexHalf_norecon;
53 
54 // Double precision input spinor field
55 texture<int4, 1> spinorTexDouble;
56 
57 // Single precision input spinor field
58 texture<float4, 1, cudaReadModeElementType> spinorTexSingle;
59 texture<float2, 1, cudaReadModeElementType> spinorTexSingle2;
60 
61 // Half precision input spinor field
62 texture<short4, 1, cudaReadModeNormalizedFloat> spinorTexHalf;
63 texture<short2, 1, cudaReadModeNormalizedFloat> spinorTexHalf2;
64 texture<float, 1, cudaReadModeElementType> spinorTexHalfNorm;
65 texture<float, 1, cudaReadModeElementType> spinorTexHalf2Norm;
66 
67 // Double precision accumulate spinor field
68 texture<int4, 1> accumTexDouble;
69 
70 // Single precision accumulate spinor field
71 texture<float4, 1, cudaReadModeElementType> accumTexSingle;
72 texture<float2, 1, cudaReadModeElementType> accumTexSingle2;
73 
74 // Half precision accumulate spinor field
75 texture<short4, 1, cudaReadModeNormalizedFloat> accumTexHalf;
76 texture<short2, 1, cudaReadModeNormalizedFloat> accumTexHalf2;
77 texture<float, 1, cudaReadModeElementType> accumTexHalfNorm;
78 texture<float, 1, cudaReadModeElementType> accumTexHalf2Norm;
79 
80 // Double precision intermediate spinor field (used by exterior Dslash kernels)
81 texture<int4, 1> interTexDouble;
82 
83 // Single precision intermediate spinor field
84 texture<float4, 1, cudaReadModeElementType> interTexSingle;
85 texture<float2, 1, cudaReadModeElementType> interTexSingle2;
86 
87 // Half precision intermediate spinor field
88 texture<short4, 1, cudaReadModeNormalizedFloat> interTexHalf;
89 texture<short2, 1, cudaReadModeNormalizedFloat> interTexHalf2;
90 texture<float, 1, cudaReadModeElementType> interTexHalfNorm;
91 texture<float, 1, cudaReadModeElementType> interTexHalf2Norm;
92 #endif // not defined USE_TEXTURE_OBJECTS
93 
94 // FIXME update the below textures for texture objects
95 
96 // fatGauge textures are still used by llfat so we need to define
97 texture<int4, 1> fatGauge0TexDouble;
98 texture<int4, 1> fatGauge1TexDouble;
99 texture<float2, 1, cudaReadModeElementType> fatGauge0TexSingle;
100 texture<float2, 1, cudaReadModeElementType> fatGauge1TexSingle;
101 texture<short2, 1, cudaReadModeNormalizedFloat> fatGauge0TexHalf;
102 texture<short2, 1, cudaReadModeNormalizedFloat> fatGauge1TexHalf;
103 
104 //Double precision for site link
105 texture<int4, 1> siteLink0TexDouble;
106 texture<int4, 1> siteLink1TexDouble;
107 
108 //Single precision for site link
109 texture<float2, 1, cudaReadModeElementType> siteLink0TexSingle;
110 texture<float2, 1, cudaReadModeElementType> siteLink1TexSingle;
111 
112 texture<float4, 1, cudaReadModeElementType> siteLink0TexSingle_recon;
113 texture<float4, 1, cudaReadModeElementType> siteLink1TexSingle_recon;
114 
115 texture<float2, 1, cudaReadModeElementType> siteLink0TexSingle_norecon;
116 texture<float2, 1, cudaReadModeElementType> siteLink1TexSingle_norecon;
117 
118 
119 texture<int4, 1> muLink0TexDouble;
120 texture<int4, 1> muLink1TexDouble;
121 // Single precision mulink field
122 texture<float2, 1, cudaReadModeElementType> muLink0TexSingle;
123 texture<float2, 1, cudaReadModeElementType> muLink1TexSingle;
124 
125 void bindGaugeTex(const cudaGaugeField &gauge, const int oddBit, void **gauge0, void **gauge1)
126 {
127  if(oddBit) {
128  *gauge0 = gauge.odd;
129  *gauge1 = gauge.even;
130  } else {
131  *gauge0 = gauge.even;
132  *gauge1 = gauge.odd;
133  }
134 
135 #ifdef USE_TEXTURE_OBJECTS
136  dslashParam.gauge0Tex = oddBit ? gauge.OddTex() : gauge.EvenTex();
137  dslashParam.gauge1Tex = oddBit ? gauge.EvenTex() : gauge.OddTex();
138 #else
139  if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) {
140  if (gauge.precision == QUDA_DOUBLE_PRECISION) {
141  cudaBindTexture(0, gauge0TexDouble2, *gauge0, gauge.bytes/2);
142  cudaBindTexture(0, gauge1TexDouble2, *gauge1, gauge.bytes/2);
143  } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
144  cudaBindTexture(0, gauge0TexSingle2, *gauge0, gauge.bytes/2);
145  cudaBindTexture(0, gauge1TexSingle2, *gauge1, gauge.bytes/2);
146  } else {
147  cudaBindTexture(0, gauge0TexHalf2, *gauge0, gauge.bytes/2);
148  cudaBindTexture(0, gauge1TexHalf2, *gauge1, gauge.bytes/2);
149  }
150  } else {
151  if (gauge.precision == QUDA_DOUBLE_PRECISION) {
152  cudaBindTexture(0, gauge0TexDouble2, *gauge0, gauge.bytes/2);
153  cudaBindTexture(0, gauge1TexDouble2, *gauge1, gauge.bytes/2);
154  } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
155  cudaBindTexture(0, gauge0TexSingle4, *gauge0, gauge.bytes/2);
156  cudaBindTexture(0, gauge1TexSingle4, *gauge1, gauge.bytes/2);
157  } else {
158  cudaBindTexture(0, gauge0TexHalf4, *gauge0, gauge.bytes/2);
159  cudaBindTexture(0, gauge1TexHalf4, *gauge1, gauge.bytes/2);
160  }
161  }
162 #endif // USE_TEXTURE_OBJECTS
163 
164 }
165 
166 void unbindGaugeTex(const cudaGaugeField &gauge)
167 {
168 #if (!defined USE_TEXTURE_OBJECTS)
169  if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) {
170  if (gauge.precision == QUDA_DOUBLE_PRECISION) {
171  cudaUnbindTexture(gauge0TexDouble2);
172  cudaUnbindTexture(gauge1TexDouble2);
173  } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
174  cudaUnbindTexture(gauge0TexSingle2);
175  cudaUnbindTexture(gauge1TexSingle2);
176  } else {
177  cudaUnbindTexture(gauge0TexHalf2);
178  cudaUnbindTexture(gauge1TexHalf2);
179  }
180  } else {
181  if (gauge.precision == QUDA_DOUBLE_PRECISION) {
182  cudaUnbindTexture(gauge0TexDouble2);
183  cudaUnbindTexture(gauge1TexDouble2);
184  } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
185  cudaUnbindTexture(gauge0TexSingle4);
186  cudaUnbindTexture(gauge1TexSingle4);
187  } else {
188  cudaUnbindTexture(gauge0TexHalf4);
189  cudaUnbindTexture(gauge1TexHalf4);
190  }
191  }
192 #endif
193 }
194 
195 void bindFatGaugeTex(const cudaGaugeField &gauge, const int oddBit, void **gauge0, void **gauge1)
196 {
197  if(oddBit) {
198  *gauge0 = gauge.odd;
199  *gauge1 = gauge.even;
200  } else {
201  *gauge0 = gauge.even;
202  *gauge1 = gauge.odd;
203  }
204 
205 #ifdef USE_TEXTURE_OBJECTS
206  dslashParam.gauge0Tex = oddBit ? gauge.OddTex() : gauge.EvenTex();
207  dslashParam.gauge1Tex = oddBit ? gauge.EvenTex() : gauge.OddTex();
208 #else
209  if (gauge.precision == QUDA_DOUBLE_PRECISION) {
210  cudaBindTexture(0, fatGauge0TexDouble, *gauge0, gauge.bytes/2);
211  cudaBindTexture(0, fatGauge1TexDouble, *gauge1, gauge.bytes/2);
212  } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
213  cudaBindTexture(0, fatGauge0TexSingle, *gauge0, gauge.bytes/2);
214  cudaBindTexture(0, fatGauge1TexSingle, *gauge1, gauge.bytes/2);
215  } else {
216  cudaBindTexture(0, fatGauge0TexHalf, *gauge0, gauge.bytes/2);
217  cudaBindTexture(0, fatGauge1TexHalf, *gauge1, gauge.bytes/2);
218  }
219 #endif // USE_TEXTURE_OBJECTS
220 
221 }
222 
223 void unbindFatGaugeTex(const cudaGaugeField &gauge)
224 {
225 #if (!defined USE_TEXTURE_OBJECTS)
226  if (gauge.precision == QUDA_DOUBLE_PRECISION) {
227  cudaUnbindTexture(fatGauge0TexDouble);
228  cudaUnbindTexture(fatGauge1TexDouble);
229  } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
230  cudaUnbindTexture(fatGauge0TexSingle);
231  cudaUnbindTexture(fatGauge1TexSingle);
232  } else {
233  cudaUnbindTexture(fatGauge0TexHalf);
234  cudaUnbindTexture(fatGauge1TexHalf);
235  }
236 #endif
237 }
238 
239 void bindLongGaugeTex(const cudaGaugeField &gauge, const int oddBit, void **gauge0, void **gauge1)
240 {
241  if(oddBit) {
242  *gauge0 = gauge.odd;
243  *gauge1 = gauge.even;
244  } else {
245  *gauge0 = gauge.even;
246  *gauge1 = gauge.odd;
247  }
248 
249 #ifdef USE_TEXTURE_OBJECTS
250  dslashParam.longGauge0Tex = oddBit ? gauge.OddTex() : gauge.EvenTex();
251  dslashParam.longGauge1Tex = oddBit ? gauge.EvenTex() : gauge.OddTex();
252 #else
253  if (gauge.precision == QUDA_DOUBLE_PRECISION) {
254  cudaBindTexture(0, longGauge0TexDouble, *gauge0, gauge.bytes/2);
255  cudaBindTexture(0, longGauge1TexDouble, *gauge1, gauge.bytes/2);
256  } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
257  if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct
258  cudaBindTexture(0, longGauge0TexSingle_norecon, *gauge0, gauge.bytes/2);
259  cudaBindTexture(0, longGauge1TexSingle_norecon, *gauge1, gauge.bytes/2);
260  } else {
261  cudaBindTexture(0, longGauge0TexSingle, *gauge0, gauge.bytes/2);
262  cudaBindTexture(0, longGauge1TexSingle, *gauge1, gauge.bytes/2);
263  }
264  } else {
265  if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct
266  cudaBindTexture(0, longGauge0TexHalf_norecon, *gauge0, gauge.bytes/2);
267  cudaBindTexture(0, longGauge1TexHalf_norecon, *gauge1, gauge.bytes/2);
268  } else {
269  cudaBindTexture(0, longGauge0TexHalf, *gauge0, gauge.bytes/2);
270  cudaBindTexture(0, longGauge1TexHalf, *gauge1, gauge.bytes/2);
271  }
272  }
273 #endif // USE_TEXTURE_OBJECTS
274 }
275 
276 void unbindLongGaugeTex(const cudaGaugeField &gauge)
277 {
278 #if (!defined USE_TEXTURE_OBJECTS)
279  if (gauge.precision == QUDA_DOUBLE_PRECISION) {
280  cudaUnbindTexture(longGauge0TexDouble);
281  cudaUnbindTexture(longGauge1TexDouble);
282  } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
283  if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct
284  cudaUnbindTexture(longGauge0TexSingle_norecon);
285  cudaUnbindTexture(longGauge1TexSingle_norecon);
286  } else {
287  cudaUnbindTexture(longGauge0TexSingle);
288  cudaUnbindTexture(longGauge1TexSingle);
289  }
290  } else {
291  if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct
292  cudaUnbindTexture(longGauge0TexHalf_norecon);
293  cudaUnbindTexture(longGauge1TexHalf_norecon);
294  } else {
295  cudaUnbindTexture(longGauge0TexHalf);
296  cudaUnbindTexture(longGauge1TexHalf);
297  }
298  }
299 #endif
300 }
301 
302 
303 template <typename spinorFloat>
304 int bindSpinorTex(const cudaColorSpinorField *in, const cudaColorSpinorField *out=0,
305  const cudaColorSpinorField *x=0) {
306  int size = (sizeof(((spinorFloat*)0)->x) < sizeof(float)) ? sizeof(float) :
307  sizeof(((spinorFloat*)0)->x);
308 
309 #ifdef USE_TEXTURE_OBJECTS
310  dslashParam.inTex = in->Tex();
311  dslashParam.inTexNorm = in->TexNorm();
312  if (out) dslashParam.outTex = out->Tex();
313  if (out) dslashParam.outTexNorm = out->TexNorm();
314  if (x) dslashParam.xTex = x->Tex();
315  if (x) dslashParam.xTexNorm = x->TexNorm();
316 #else
317  if (typeid(spinorFloat) == typeid(double2)) {
318  cudaBindTexture(0, spinorTexDouble, in->V(), in->Bytes());
319  if (out) cudaBindTexture(0, interTexDouble, out->V(), in->Bytes());
320  if (x) cudaBindTexture(0, accumTexDouble, x->V(), in->Bytes());
321  } else if (typeid(spinorFloat) == typeid(float4)) {
322  cudaBindTexture(0, spinorTexSingle, in->V(), in->Bytes());
323  if (out) cudaBindTexture(0, interTexSingle, out->V(), in->Bytes());
324  if (x) cudaBindTexture(0, accumTexSingle, x->V(), in->Bytes());
325  } else if (typeid(spinorFloat) == typeid(float2)) {
326  cudaBindTexture(0, spinorTexSingle2, in->V(), in->Bytes());
327  if (out) cudaBindTexture(0, interTexSingle2, out->V(), in->Bytes());
328  if (x) cudaBindTexture(0, accumTexSingle2, x->V(), in->Bytes());
329  } else if (typeid(spinorFloat) == typeid(short4)) {
330  cudaBindTexture(0, spinorTexHalf, in->V(), in->Bytes());
331  cudaBindTexture(0, spinorTexHalfNorm, in->Norm(), in->NormBytes());
332  if (out) cudaBindTexture(0, interTexHalf, out->V(), in->Bytes());
333  if (out) cudaBindTexture(0, interTexHalfNorm, out->Norm(), in->NormBytes());
334  if (x) cudaBindTexture(0, accumTexHalf, x->V(), in->Bytes());
335  if (x) cudaBindTexture(0, accumTexHalfNorm, x->Norm(), in->NormBytes());
336  } else if (typeid(spinorFloat) == typeid(short2)) {
337  cudaBindTexture(0, spinorTexHalf2, in->V(), in->Bytes());
338  cudaBindTexture(0, spinorTexHalf2Norm, in->Norm(), in->NormBytes());
339  if (out) cudaBindTexture(0, interTexHalf2, out->V(), in->Bytes());
340  if (out) cudaBindTexture(0, interTexHalf2Norm, out->Norm(), in->NormBytes());
341  if (x) cudaBindTexture(0, accumTexHalf2, x->V(), in->Bytes());
342  if (x) cudaBindTexture(0, accumTexHalf2Norm, x->Norm(), in->NormBytes());
343  } else {
344  errorQuda("Unsupported precision and short vector type");
345  }
346 #endif // USE_TEXTURE_OBJECTS
347 
348  return size;
349 }
350 
351 template <typename spinorFloat>
352 void unbindSpinorTex(const cudaColorSpinorField *in, const cudaColorSpinorField *out=0,
353  const cudaColorSpinorField *x=0) {
354 #ifndef USE_TEXTURE_OBJECTS
355  if (typeid(spinorFloat) == typeid(double2)) {
356  cudaUnbindTexture(spinorTexDouble);
357  if (out) cudaUnbindTexture(interTexDouble);
358  if (x) cudaUnbindTexture(accumTexDouble);
359  } else if (typeid(spinorFloat) == typeid(float4)) {
360  cudaUnbindTexture(spinorTexSingle);
361  if (out) cudaUnbindTexture(interTexSingle);
362  if (x) cudaUnbindTexture(accumTexSingle);
363  } else if (typeid(spinorFloat) == typeid(float2)) {
364  cudaUnbindTexture(spinorTexSingle2);
365  if (out) cudaUnbindTexture(interTexSingle2);
366  if (x) cudaUnbindTexture(accumTexSingle2);
367  } else if (typeid(spinorFloat) == typeid(short4)) {
368  cudaUnbindTexture(spinorTexHalf);
369  cudaUnbindTexture(spinorTexHalfNorm);
370  if (out) cudaUnbindTexture(interTexHalf);
371  if (out) cudaUnbindTexture(interTexHalfNorm);
372  if (x) cudaUnbindTexture(accumTexHalf);
373  if (x) cudaUnbindTexture(accumTexHalfNorm);
374  } else if (typeid(spinorFloat) == typeid(short2)) {
375  cudaUnbindTexture(spinorTexHalf2);
376  cudaUnbindTexture(spinorTexHalf2Norm);
377  if (out) cudaUnbindTexture(interTexHalf2);
378  if (out) cudaUnbindTexture(interTexHalf2Norm);
379  if (x) cudaUnbindTexture(accumTexHalf2);
380  if (x) cudaUnbindTexture(accumTexHalf2Norm);
381  } else {
382  errorQuda("Unsupported precision and short vector type");
383  }
384 #endif // USE_TEXTURE_OBJECTS
385 }
386 
387 // Double precision clover term
388 texture<int4, 1> cloverTexDouble;
389 
390 // Single precision clover term
391 texture<float4, 1, cudaReadModeElementType> cloverTexSingle;
392 
393 // Half precision clover term
394 texture<short4, 1, cudaReadModeNormalizedFloat> cloverTexHalf;
395 texture<float, 1, cudaReadModeElementType> cloverTexNorm;
396 
397 QudaPrecision bindCloverTex(const FullClover clover, const int oddBit,
398  void **cloverP, void **cloverNormP)
399 {
400 
401  if (oddBit) {
402  *cloverP = clover.odd;
403  *cloverNormP = clover.oddNorm;
404  } else {
405  *cloverP = clover.even;
406  *cloverNormP = clover.evenNorm;
407  }
408 
409 #ifdef USE_TEXTURE_OBJECTS
410  dslashParam.cloverTex = oddBit ? clover.OddTex() : clover.EvenTex();
411  if (clover.precision == QUDA_HALF_PRECISION) dslashParam.cloverNormTex = oddBit ? clover.OddNormTex() : clover.EvenNormTex();
412 #else
413  if (clover.precision == QUDA_DOUBLE_PRECISION) {
414  cudaBindTexture(0, cloverTexDouble, *cloverP, clover.bytes);
415  } else if (clover.precision == QUDA_SINGLE_PRECISION) {
416  cudaBindTexture(0, cloverTexSingle, *cloverP, clover.bytes);
417  } else {
418  cudaBindTexture(0, cloverTexHalf, *cloverP, clover.bytes);
419  cudaBindTexture(0, cloverTexNorm, *cloverNormP, clover.norm_bytes);
420  }
421 #endif // USE_TEXTURE_OBJECTS
422 
423  return clover.precision;
424 }
425 
426 void unbindCloverTex(const FullClover clover)
427 {
428 #if (!defined USE_TEXTURE_OBJECTS)
429  if (clover.precision == QUDA_DOUBLE_PRECISION) {
430  cudaUnbindTexture(cloverTexDouble);
431  } else if (clover.precision == QUDA_SINGLE_PRECISION) {
432  cudaUnbindTexture(cloverTexSingle);
433  } else {
434  cudaUnbindTexture(cloverTexHalf);
435  cudaUnbindTexture(cloverTexNorm);
436  }
437 #endif // not defined USE_TEXTURE_OBJECTS
438 }
439