QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
dslash_textures.h
Go to the documentation of this file.
1 #include <typeinfo>
2 
3 // Use this macro for texture fetching for supporting either texture objects of texture references
4 #ifdef USE_TEXTURE_OBJECTS
5 #define TEX1DFETCH(type, tex, idx) tex1Dfetch<type>((tex), idx)
6 #else
7 #define TEX1DFETCH(type, tex, idx) tex1Dfetch((tex), idx)
8 #endif
9 
10 #if (__COMPUTE_CAPABILITY__ >= 130)
11 
12 template<typename Tex>
13 static __inline__ __device__ double fetch_double(Tex t, int i)
14 {
15  int2 v = TEX1DFETCH(int2, t, i);
16  return __hiloint2double(v.y, v.x);
17 }
18 
19 template <typename Tex>
20 static __inline__ __device__ double2 fetch_double2(Tex t, int i)
21 {
22  int4 v = TEX1DFETCH(int4, t, i);
23  return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
24 }
25 
26 static __inline__ __device__ double2 fetch_double2_old(texture<int4, 1> t, int i)
27 {
28  int4 v = tex1Dfetch(t,i);
29  return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
30 }
31 #endif //__COMPUTE_CAPABILITY__ >= 130
32 
33 
34 #ifndef USE_TEXTURE_OBJECTS
35 // Double precision gauge field
36 texture<int4, 1> gauge0TexDouble2;
37 texture<int4, 1> gauge1TexDouble2;
38 
39 // Single precision gauge field
40 texture<float4, 1, cudaReadModeElementType> gauge0TexSingle4;
41 texture<float4, 1, cudaReadModeElementType> gauge1TexSingle4;
42 texture<float2, 1, cudaReadModeElementType> gauge0TexSingle2;
43 texture<float2, 1, cudaReadModeElementType> gauge1TexSingle2;
44 
45 // Half precision gauge field
46 texture<short4, 1, cudaReadModeNormalizedFloat> gauge0TexHalf4;
47 texture<short4, 1, cudaReadModeNormalizedFloat> gauge1TexHalf4;
48 texture<short2, 1, cudaReadModeNormalizedFloat> gauge0TexHalf2;
49 texture<short2, 1, cudaReadModeNormalizedFloat> gauge1TexHalf2;
50 
51 texture<int4, 1> longGauge0TexDouble;
52 texture<int4, 1> longGauge1TexDouble;
53 texture<int2, 1> longPhase0TexDouble;
54 texture<int2, 1> longPhase1TexDouble;
55 
56 texture<float4, 1, cudaReadModeElementType> longGauge0TexSingle;
57 texture<float4, 1, cudaReadModeElementType> longGauge1TexSingle;
58 texture<float2, 1, cudaReadModeElementType> longGauge0TexSingle_norecon;
59 texture<float2, 1, cudaReadModeElementType> longGauge1TexSingle_norecon;
60 texture<float, 1, cudaReadModeElementType> longPhase0TexSingle;
61 texture<float, 1, cudaReadModeElementType> longPhase1TexSingle;
62 
63 
64 texture<short4, 1, cudaReadModeNormalizedFloat> longGauge0TexHalf;
65 texture<short4, 1, cudaReadModeNormalizedFloat> longGauge1TexHalf;
66 texture<short2, 1, cudaReadModeNormalizedFloat> longGauge0TexHalf_norecon;
67 texture<short2, 1, cudaReadModeNormalizedFloat> longGauge1TexHalf_norecon;
68 texture<short, 1, cudaReadModeNormalizedFloat> longPhase0TexHalf;
69 texture<short, 1, cudaReadModeNormalizedFloat> longPhase1TexHalf;
70 
71 
72 // Double precision input spinor field
73 texture<int4, 1> spinorTexDouble;
74 
75 // Single precision input spinor field
76 texture<float4, 1, cudaReadModeElementType> spinorTexSingle;
77 texture<float2, 1, cudaReadModeElementType> spinorTexSingle2;
78 
79 // Half precision input spinor field
80 texture<short4, 1, cudaReadModeNormalizedFloat> spinorTexHalf;
81 texture<short2, 1, cudaReadModeNormalizedFloat> spinorTexHalf2;
82 texture<float, 1, cudaReadModeElementType> spinorTexHalfNorm;
83 texture<float, 1, cudaReadModeElementType> spinorTexHalf2Norm;
84 
85 // Double precision accumulate spinor field
86 texture<int4, 1> accumTexDouble;
87 
88 // Single precision accumulate spinor field
89 texture<float4, 1, cudaReadModeElementType> accumTexSingle;
90 texture<float2, 1, cudaReadModeElementType> accumTexSingle2;
91 
92 // Half precision accumulate spinor field
93 texture<short4, 1, cudaReadModeNormalizedFloat> accumTexHalf;
94 texture<short2, 1, cudaReadModeNormalizedFloat> accumTexHalf2;
95 texture<float, 1, cudaReadModeElementType> accumTexHalfNorm;
96 texture<float, 1, cudaReadModeElementType> accumTexHalf2Norm;
97 
98 // Double precision intermediate spinor field (used by exterior Dslash kernels)
99 texture<int4, 1> interTexDouble;
100 
101 // Single precision intermediate spinor field
102 texture<float4, 1, cudaReadModeElementType> interTexSingle;
103 texture<float2, 1, cudaReadModeElementType> interTexSingle2;
104 
105 // Half precision intermediate spinor field
106 texture<short4, 1, cudaReadModeNormalizedFloat> interTexHalf;
107 texture<short2, 1, cudaReadModeNormalizedFloat> interTexHalf2;
108 texture<float, 1, cudaReadModeElementType> interTexHalfNorm;
109 texture<float, 1, cudaReadModeElementType> interTexHalf2Norm;
110 #endif // not defined USE_TEXTURE_OBJECTS
111 
112 // FIXME update the below textures for texture objects
113 
114 // fatGauge textures are still used by llfat so we need to define
115 texture<int4, 1> fatGauge0TexDouble;
116 texture<int4, 1> fatGauge1TexDouble;
117 texture<float2, 1, cudaReadModeElementType> fatGauge0TexSingle;
118 texture<float2, 1, cudaReadModeElementType> fatGauge1TexSingle;
119 texture<short2, 1, cudaReadModeNormalizedFloat> fatGauge0TexHalf;
120 texture<short2, 1, cudaReadModeNormalizedFloat> fatGauge1TexHalf;
121 
122 //Double precision for site link
123 texture<int4, 1> siteLink0TexDouble;
124 texture<int4, 1> siteLink1TexDouble;
125 
126 //Single precision for site link
127 texture<float2, 1, cudaReadModeElementType> siteLink0TexSingle;
128 texture<float2, 1, cudaReadModeElementType> siteLink1TexSingle;
129 
130 texture<float4, 1, cudaReadModeElementType> siteLink0TexSingle_recon;
131 texture<float4, 1, cudaReadModeElementType> siteLink1TexSingle_recon;
132 
133 texture<float2, 1, cudaReadModeElementType> siteLink0TexSingle_norecon;
134 texture<float2, 1, cudaReadModeElementType> siteLink1TexSingle_norecon;
135 
136 
137 texture<int4, 1> muLink0TexDouble;
138 texture<int4, 1> muLink1TexDouble;
139 // Single precision mulink field
140 texture<float2, 1, cudaReadModeElementType> muLink0TexSingle;
141 texture<float2, 1, cudaReadModeElementType> muLink1TexSingle;
142 
143 void bindGaugeTex(const cudaGaugeField &gauge, const int oddBit, void **gauge0, void **gauge1)
144 {
145  if(oddBit) {
146  *gauge0 = const_cast<void*>(gauge.Odd_p());
147  *gauge1 = const_cast<void*>(gauge.Even_p());
148  } else {
149  *gauge0 = const_cast<void*>(gauge.Even_p());
150  *gauge1 = const_cast<void*>(gauge.Odd_p());
151  }
152 
153 #ifdef USE_TEXTURE_OBJECTS
154  dslashParam.gauge0Tex = oddBit ? gauge.OddTex() : gauge.EvenTex();
155  dslashParam.gauge1Tex = oddBit ? gauge.EvenTex() : gauge.OddTex();
156 #else
157  if (gauge.Reconstruct() == QUDA_RECONSTRUCT_NO) {
158  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
159  cudaBindTexture(0, gauge0TexDouble2, *gauge0, gauge.Bytes()/2);
160  cudaBindTexture(0, gauge1TexDouble2, *gauge1, gauge.Bytes()/2);
161  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
162  cudaBindTexture(0, gauge0TexSingle2, *gauge0, gauge.Bytes()/2);
163  cudaBindTexture(0, gauge1TexSingle2, *gauge1, gauge.Bytes()/2);
164  } else {
165  cudaBindTexture(0, gauge0TexHalf2, *gauge0, gauge.Bytes()/2);
166  cudaBindTexture(0, gauge1TexHalf2, *gauge1, gauge.Bytes()/2);
167  }
168  } else {
169  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
170  cudaBindTexture(0, gauge0TexDouble2, *gauge0, gauge.Bytes()/2);
171  cudaBindTexture(0, gauge1TexDouble2, *gauge1, gauge.Bytes()/2);
172  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
173  cudaBindTexture(0, gauge0TexSingle4, *gauge0, gauge.Bytes()/2);
174  cudaBindTexture(0, gauge1TexSingle4, *gauge1, gauge.Bytes()/2);
175  } else {
176  cudaBindTexture(0, gauge0TexHalf4, *gauge0, gauge.Bytes()/2);
177  cudaBindTexture(0, gauge1TexHalf4, *gauge1, gauge.Bytes()/2);
178  }
179  }
180 #endif // USE_TEXTURE_OBJECTS
181 
182 }
183 
184 void unbindGaugeTex(const cudaGaugeField &gauge)
185 {
186 #if (!defined USE_TEXTURE_OBJECTS)
187  if (gauge.Reconstruct() == QUDA_RECONSTRUCT_NO) {
188  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
189  cudaUnbindTexture(gauge0TexDouble2);
190  cudaUnbindTexture(gauge1TexDouble2);
191  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
192  cudaUnbindTexture(gauge0TexSingle2);
193  cudaUnbindTexture(gauge1TexSingle2);
194  } else {
195  cudaUnbindTexture(gauge0TexHalf2);
196  cudaUnbindTexture(gauge1TexHalf2);
197  }
198  } else {
199  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
200  cudaUnbindTexture(gauge0TexDouble2);
201  cudaUnbindTexture(gauge1TexDouble2);
202  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
203  cudaUnbindTexture(gauge0TexSingle4);
204  cudaUnbindTexture(gauge1TexSingle4);
205  } else {
206  cudaUnbindTexture(gauge0TexHalf4);
207  cudaUnbindTexture(gauge1TexHalf4);
208  }
209  }
210 #endif
211 }
212 
213 void bindFatGaugeTex(const cudaGaugeField &gauge, const int oddBit, void **gauge0, void **gauge1)
214 {
215  if(oddBit) {
216  *gauge0 = const_cast<void*>(gauge.Odd_p());
217  *gauge1 = const_cast<void*>(gauge.Even_p());
218  } else {
219  *gauge0 = const_cast<void*>(gauge.Even_p());
220  *gauge1 = const_cast<void*>(gauge.Odd_p());
221  }
222 
223 #ifdef USE_TEXTURE_OBJECTS
224  dslashParam.gauge0Tex = oddBit ? gauge.OddTex() : gauge.EvenTex();
225  dslashParam.gauge1Tex = oddBit ? gauge.EvenTex() : gauge.OddTex();
226 #else
227  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
228  cudaBindTexture(0, fatGauge0TexDouble, *gauge0, gauge.Bytes()/2);
229  cudaBindTexture(0, fatGauge1TexDouble, *gauge1, gauge.Bytes()/2);
230  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
231  cudaBindTexture(0, fatGauge0TexSingle, *gauge0, gauge.Bytes()/2);
232  cudaBindTexture(0, fatGauge1TexSingle, *gauge1, gauge.Bytes()/2);
233  } else {
234  cudaBindTexture(0, fatGauge0TexHalf, *gauge0, gauge.Bytes()/2);
235  cudaBindTexture(0, fatGauge1TexHalf, *gauge1, gauge.Bytes()/2);
236  }
237 #endif // USE_TEXTURE_OBJECTS
238 
239 }
240 
241 void unbindFatGaugeTex(const cudaGaugeField &gauge)
242 {
243 #if (!defined USE_TEXTURE_OBJECTS)
244  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
245  cudaUnbindTexture(fatGauge0TexDouble);
246  cudaUnbindTexture(fatGauge1TexDouble);
247  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
248  cudaUnbindTexture(fatGauge0TexSingle);
249  cudaUnbindTexture(fatGauge1TexSingle);
250  } else {
251  cudaUnbindTexture(fatGauge0TexHalf);
252  cudaUnbindTexture(fatGauge1TexHalf);
253  }
254 #endif
255 }
256 
257 void bindLongGaugeTex(const cudaGaugeField &gauge, const int oddBit, void **gauge0, void **gauge1)
258 {
259  if(oddBit) {
260  *gauge0 = const_cast<void*>(gauge.Odd_p());
261  *gauge1 = const_cast<void*>(gauge.Even_p());
262  } else {
263  *gauge0 = const_cast<void*>(gauge.Even_p());
264  *gauge1 = const_cast<void*>(gauge.Odd_p());
265  }
266 
267 #ifdef USE_TEXTURE_OBJECTS
268  dslashParam.longGauge0Tex = oddBit ? gauge.OddTex() : gauge.EvenTex();
269  dslashParam.longGauge1Tex = oddBit ? gauge.EvenTex() : gauge.OddTex();
270 
271  if(gauge.Reconstruct() == QUDA_RECONSTRUCT_13 || gauge.Reconstruct() == QUDA_RECONSTRUCT_9){
272  dslashParam.longPhase0Tex = oddBit ? gauge.OddPhaseTex() : gauge.EvenPhaseTex();
273  dslashParam.longPhase1Tex = oddBit ? gauge.EvenPhaseTex() : gauge.OddPhaseTex();
274  }
275 #else
276  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
277  cudaBindTexture(0, longGauge0TexDouble, *gauge0, gauge.Bytes()/2);
278  cudaBindTexture(0, longGauge1TexDouble, *gauge1, gauge.Bytes()/2);
279  if(gauge.Reconstruct() == QUDA_RECONSTRUCT_13 || gauge.Reconstruct() == QUDA_RECONSTRUCT_9){
280  cudaBindTexture(0, longPhase0TexDouble, (char*)(*gauge0) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
281  cudaBindTexture(0, longPhase1TexDouble, (char*)(*gauge1) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
282  }
283  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
284  if (gauge.Reconstruct() == QUDA_RECONSTRUCT_NO) { //18 reconstruct
285  cudaBindTexture(0, longGauge0TexSingle_norecon, *gauge0, gauge.Bytes()/2);
286  cudaBindTexture(0, longGauge1TexSingle_norecon, *gauge1, gauge.Bytes()/2);
287  } else {
288  cudaBindTexture(0, longGauge0TexSingle, *gauge0, gauge.Bytes()/2);
289  cudaBindTexture(0, longGauge1TexSingle, *gauge1, gauge.Bytes()/2);
290  if(gauge.Reconstruct() == QUDA_RECONSTRUCT_13 || gauge.Reconstruct() == QUDA_RECONSTRUCT_9){
291  cudaBindTexture(0, longPhase0TexSingle, (char*)(*gauge0) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
292  cudaBindTexture(0, longPhase1TexSingle, (char*)(*gauge1) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
293  }
294  }
295  } else {
296  if (gauge.Reconstruct() == QUDA_RECONSTRUCT_NO) { //18 reconstruct
297  cudaBindTexture(0, longGauge0TexHalf_norecon, *gauge0, gauge.Bytes()/2);
298  cudaBindTexture(0, longGauge1TexHalf_norecon, *gauge1, gauge.Bytes()/2);
299  } else {
300  cudaBindTexture(0, longGauge0TexHalf, *gauge0, gauge.Bytes()/2);
301  cudaBindTexture(0, longGauge1TexHalf, *gauge1, gauge.Bytes()/2);
302  if(gauge.Reconstruct() == QUDA_RECONSTRUCT_13 || gauge.Reconstruct() == QUDA_RECONSTRUCT_9){
303  cudaBindTexture(0, longPhase0TexHalf, (char*)(*gauge0) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
304  cudaBindTexture(0, longPhase1TexHalf, (char*)(*gauge1) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
305  }
306  }
307  }
308 #endif // USE_TEXTURE_OBJECTS
309 }
310 
311 void unbindLongGaugeTex(const cudaGaugeField &gauge)
312 {
313 #if (!defined USE_TEXTURE_OBJECTS)
314  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
315  cudaUnbindTexture(longGauge0TexDouble);
316  cudaUnbindTexture(longGauge1TexDouble);
317  if(gauge.Reconstruct() == QUDA_RECONSTRUCT_13 || gauge.Reconstruct() == QUDA_RECONSTRUCT_9){
318  cudaUnbindTexture(longPhase0TexDouble);
319  cudaUnbindTexture(longPhase1TexDouble);
320  }
321  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
322  if (gauge.Reconstruct() == QUDA_RECONSTRUCT_NO) { //18 reconstruct
323  cudaUnbindTexture(longGauge0TexSingle_norecon);
324  cudaUnbindTexture(longGauge1TexSingle_norecon);
325  } else {
326  cudaUnbindTexture(longGauge0TexSingle);
327  cudaUnbindTexture(longGauge1TexSingle);
328  if(gauge.Reconstruct() == QUDA_RECONSTRUCT_13 || gauge.Reconstruct() == QUDA_RECONSTRUCT_9){
329  cudaUnbindTexture(longPhase0TexSingle);
330  cudaUnbindTexture(longPhase1TexSingle);
331  }
332  }
333  } else { // half precision
334  if (gauge.Reconstruct() == QUDA_RECONSTRUCT_NO) { //18 reconstruct
335  cudaUnbindTexture(longGauge0TexHalf_norecon);
336  cudaUnbindTexture(longGauge1TexHalf_norecon);
337  } else {
338  cudaUnbindTexture(longGauge0TexHalf);
339  cudaUnbindTexture(longGauge1TexHalf);
340  if(gauge.Reconstruct() == QUDA_RECONSTRUCT_13 || gauge.Reconstruct() == QUDA_RECONSTRUCT_9){
341  cudaUnbindTexture(longPhase0TexHalf);
342  cudaUnbindTexture(longPhase1TexHalf);
343  }
344  }
345  }
346 #endif
347 }
348 
349 
350 template <typename spinorFloat>
351 int bindSpinorTex(const cudaColorSpinorField *in, const cudaColorSpinorField *out=0,
352  const cudaColorSpinorField *x=0) {
353  int size = (sizeof(((spinorFloat*)0)->x) < sizeof(float)) ? sizeof(float) :
354  sizeof(((spinorFloat*)0)->x);
355 
356 #ifdef USE_TEXTURE_OBJECTS
357  dslashParam.inTex = in->Tex();
358  dslashParam.inTexNorm = in->TexNorm();
359  if (out) dslashParam.outTex = out->Tex();
360  if (out) dslashParam.outTexNorm = out->TexNorm();
361  if (x) dslashParam.xTex = x->Tex();
362  if (x) dslashParam.xTexNorm = x->TexNorm();
363 #else
364  if (typeid(spinorFloat) == typeid(double2)) {
365  cudaBindTexture(0, spinorTexDouble, in->V(), in->Bytes());
366  if (out) cudaBindTexture(0, interTexDouble, out->V(), in->Bytes());
367  if (x) cudaBindTexture(0, accumTexDouble, x->V(), in->Bytes());
368  } else if (typeid(spinorFloat) == typeid(float4)) {
369  cudaBindTexture(0, spinorTexSingle, in->V(), in->Bytes());
370  if (out) cudaBindTexture(0, interTexSingle, out->V(), in->Bytes());
371  if (x) cudaBindTexture(0, accumTexSingle, x->V(), in->Bytes());
372  } else if (typeid(spinorFloat) == typeid(float2)) {
373  cudaBindTexture(0, spinorTexSingle2, in->V(), in->Bytes());
374  if (out) cudaBindTexture(0, interTexSingle2, out->V(), in->Bytes());
375  if (x) cudaBindTexture(0, accumTexSingle2, x->V(), in->Bytes());
376  } else if (typeid(spinorFloat) == typeid(short4)) {
377  cudaBindTexture(0, spinorTexHalf, in->V(), in->Bytes());
378  cudaBindTexture(0, spinorTexHalfNorm, in->Norm(), in->NormBytes());
379  if (out) cudaBindTexture(0, interTexHalf, out->V(), in->Bytes());
380  if (out) cudaBindTexture(0, interTexHalfNorm, out->Norm(), in->NormBytes());
381  if (x) cudaBindTexture(0, accumTexHalf, x->V(), in->Bytes());
382  if (x) cudaBindTexture(0, accumTexHalfNorm, x->Norm(), in->NormBytes());
383  } else if (typeid(spinorFloat) == typeid(short2)) {
384  cudaBindTexture(0, spinorTexHalf2, in->V(), in->Bytes());
385  cudaBindTexture(0, spinorTexHalf2Norm, in->Norm(), in->NormBytes());
386  if (out) cudaBindTexture(0, interTexHalf2, out->V(), in->Bytes());
387  if (out) cudaBindTexture(0, interTexHalf2Norm, out->Norm(), in->NormBytes());
388  if (x) cudaBindTexture(0, accumTexHalf2, x->V(), in->Bytes());
389  if (x) cudaBindTexture(0, accumTexHalf2Norm, x->Norm(), in->NormBytes());
390  } else {
391  errorQuda("Unsupported precision and short vector type");
392  }
393 #endif // USE_TEXTURE_OBJECTS
394 
395  return size;
396 }
397 
398 template <typename spinorFloat>
399 void unbindSpinorTex(const cudaColorSpinorField *in, const cudaColorSpinorField *out=0,
400  const cudaColorSpinorField *x=0) {
401 #ifndef USE_TEXTURE_OBJECTS
402  if (typeid(spinorFloat) == typeid(double2)) {
403  cudaUnbindTexture(spinorTexDouble);
404  if (out) cudaUnbindTexture(interTexDouble);
405  if (x) cudaUnbindTexture(accumTexDouble);
406  } else if (typeid(spinorFloat) == typeid(float4)) {
407  cudaUnbindTexture(spinorTexSingle);
408  if (out) cudaUnbindTexture(interTexSingle);
409  if (x) cudaUnbindTexture(accumTexSingle);
410  } else if (typeid(spinorFloat) == typeid(float2)) {
411  cudaUnbindTexture(spinorTexSingle2);
412  if (out) cudaUnbindTexture(interTexSingle2);
413  if (x) cudaUnbindTexture(accumTexSingle2);
414  } else if (typeid(spinorFloat) == typeid(short4)) {
415  cudaUnbindTexture(spinorTexHalf);
416  cudaUnbindTexture(spinorTexHalfNorm);
417  if (out) cudaUnbindTexture(interTexHalf);
418  if (out) cudaUnbindTexture(interTexHalfNorm);
419  if (x) cudaUnbindTexture(accumTexHalf);
420  if (x) cudaUnbindTexture(accumTexHalfNorm);
421  } else if (typeid(spinorFloat) == typeid(short2)) {
422  cudaUnbindTexture(spinorTexHalf2);
423  cudaUnbindTexture(spinorTexHalf2Norm);
424  if (out) cudaUnbindTexture(interTexHalf2);
425  if (out) cudaUnbindTexture(interTexHalf2Norm);
426  if (x) cudaUnbindTexture(accumTexHalf2);
427  if (x) cudaUnbindTexture(accumTexHalf2Norm);
428  } else {
429  errorQuda("Unsupported precision and short vector type");
430  }
431 #endif // USE_TEXTURE_OBJECTS
432 }
433 
434 // Double precision clover term
435 texture<int4, 1> cloverTexDouble;
436 texture<int4, 1> cloverInvTexDouble;
437 
438 // Single precision clover term
439 texture<float4, 1, cudaReadModeElementType> cloverTexSingle;
440 texture<float4, 1, cudaReadModeElementType> cloverInvTexSingle;
441 
442 // Half precision clover term
443 texture<short4, 1, cudaReadModeNormalizedFloat> cloverTexHalf;
444 texture<float, 1, cudaReadModeElementType> cloverTexNorm;
445 
446 texture<short4, 1, cudaReadModeNormalizedFloat> cloverInvTexHalf;
447 texture<float, 1, cudaReadModeElementType> cloverInvTexNorm;
448 
449 QudaPrecision bindCloverTex(const FullClover clover, const int oddBit,
450  void **cloverP, void **cloverNormP)
451 {
452 
453  if (oddBit) {
454  *cloverP = clover.odd;
455  *cloverNormP = clover.oddNorm;
456  } else {
457  *cloverP = clover.even;
458  *cloverNormP = clover.evenNorm;
459  }
460 
461 #ifdef USE_TEXTURE_OBJECTS
462  dslashParam.cloverTex = oddBit ? clover.OddTex() : clover.EvenTex();
463  if (clover.precision == QUDA_HALF_PRECISION) dslashParam.cloverNormTex = oddBit ? clover.OddNormTex() : clover.EvenNormTex();
464 #else
465  if (clover.precision == QUDA_DOUBLE_PRECISION) {
466  cudaBindTexture(0, cloverTexDouble, *cloverP, clover.bytes);
467  } else if (clover.precision == QUDA_SINGLE_PRECISION) {
468  cudaBindTexture(0, cloverTexSingle, *cloverP, clover.bytes);
469  } else {
470  cudaBindTexture(0, cloverTexHalf, *cloverP, clover.bytes);
471  cudaBindTexture(0, cloverTexNorm, *cloverNormP, clover.norm_bytes);
472  }
473 #endif // USE_TEXTURE_OBJECTS
474 
475  return clover.precision;
476 }
477 
478 void unbindCloverTex(const FullClover clover)
479 {
480 #if (!defined USE_TEXTURE_OBJECTS)
481  if (clover.precision == QUDA_DOUBLE_PRECISION) {
482  cudaUnbindTexture(cloverTexDouble);
483  } else if (clover.precision == QUDA_SINGLE_PRECISION) {
484  cudaUnbindTexture(cloverTexSingle);
485  } else {
486  cudaUnbindTexture(cloverTexHalf);
487  cudaUnbindTexture(cloverTexNorm);
488  }
489 #endif // not defined USE_TEXTURE_OBJECTS
490 }
491 
492 QudaPrecision bindTwistedCloverTex(const FullClover clover, const FullClover cloverInv, const int oddBit, void **cloverP, void **cloverNormP, void **cloverInvP, void **cloverInvNormP)
493 {
494  if (oddBit)
495  {
496  *cloverP = clover.odd;
497  *cloverNormP = clover.oddNorm;
498  *cloverInvP = cloverInv.odd;
499  *cloverInvNormP = cloverInv.oddNorm;
500  }
501  else
502  {
503  *cloverP = clover.even;
504  *cloverNormP = clover.evenNorm;
505  *cloverInvP = cloverInv.even;
506  *cloverInvNormP = cloverInv.evenNorm;
507  }
508 
509 #ifdef USE_TEXTURE_OBJECTS
510  dslashParam.cloverTex = oddBit ? clover.OddTex() : clover.EvenTex();
511  if (clover.precision == QUDA_HALF_PRECISION) dslashParam.cloverNormTex = oddBit ? clover.OddNormTex() : clover.EvenNormTex();
512  dslashParam.cloverInvTex = oddBit ? cloverInv.OddTex() : cloverInv.EvenTex();
513  if (cloverInv.precision == QUDA_HALF_PRECISION) dslashParam.cloverInvNormTex = oddBit ? cloverInv.OddNormTex() : cloverInv.EvenNormTex();
514 #else
515  if (clover.precision == QUDA_DOUBLE_PRECISION) //I assume that the clover and cloverInv fields have the same precision
516  {
517  cudaBindTexture(0, cloverTexDouble, *cloverP, clover.bytes);
518  cudaBindTexture(0, cloverInvTexDouble, *cloverInvP, cloverInv.bytes);
519  }
520  else if (clover.precision == QUDA_SINGLE_PRECISION)
521  {
522  cudaBindTexture(0, cloverTexSingle, *cloverP, clover.bytes);
523  cudaBindTexture(0, cloverInvTexSingle, *cloverInvP, cloverInv.bytes);
524  }
525  else
526  {
527  cudaBindTexture(0, cloverTexHalf, *cloverP, clover.bytes);
528  cudaBindTexture(0, cloverTexNorm, *cloverNormP, clover.norm_bytes);
529  cudaBindTexture(0, cloverInvTexHalf, *cloverInvP, cloverInv.bytes);
530  cudaBindTexture(0, cloverInvTexNorm, *cloverInvNormP, cloverInv.norm_bytes);
531  }
532 #endif // USE_TEXTURE_OBJECTS
533 
534  return clover.precision;
535 }
536 
537 void unbindTwistedCloverTex(const FullClover clover) //We don't really need this function, but for the shake of completeness...
538 {
539 #if (!defined USE_TEXTURE_OBJECTS)
540  if (clover.precision == QUDA_DOUBLE_PRECISION) //Again we assume that the precision of the clover and cloverInv are the same
541  {
542  cudaUnbindTexture(cloverTexDouble);
543  cudaUnbindTexture(cloverInvTexDouble);
544  }
545  else if (clover.precision == QUDA_SINGLE_PRECISION)
546  {
547  cudaUnbindTexture(cloverTexSingle);
548  cudaUnbindTexture(cloverInvTexSingle);
549  }
550  else
551  {
552  cudaUnbindTexture(cloverTexHalf);
553  cudaUnbindTexture(cloverTexNorm);
554  cudaUnbindTexture(cloverInvTexHalf);
555  cudaUnbindTexture(cloverInvTexNorm);
556  }
557 #endif // not defined USE_TEXTURE_OBJECTS
558 }
559 
560 // define some function if we're not using textures (direct access)
561 #if defined(DIRECT_ACCESS_LINK) || defined(DIRECT_ACCESS_WILSON_SPINOR) || \
562  defined(DIRECT_ACCESS_WILSON_ACCUM) || defined(DIRECT_ACCESS_WILSON_PACK_SPINOR) || \
563  defined(DIRECT_ACCESS_WILSON_INTER) || defined(DIRECT_ACCESS_WILSON_PACK_SPINOR) || \
564  defined(DIRECT_ACCESS_CLOVER)
565 
566  static inline __device__ float short2float(short a) {
567  return (float)a/MAX_SHORT;
568  }
569 
570  static inline __device__ short float2short(float c, float a) {
571  return (short)(a*c*MAX_SHORT);
572  }
573 
574  static inline __device__ short4 float42short4(float c, float4 a) {
575  return make_short4(float2short(c, a.x), float2short(c, a.y), float2short(c, a.z), float2short(c, a.w));
576  }
577 
578  static inline __device__ float4 short42float4(short4 a) {
579  return make_float4(short2float(a.x), short2float(a.y), short2float(a.z), short2float(a.w));
580  }
581 
582  static inline __device__ float2 short22float2(short2 a) {
583  return make_float2(short2float(a.x), short2float(a.y));
584  }
585 #endif // DIRECT_ACCESS inclusions
586 
texture< short4, 1, cudaReadModeNormalizedFloat > cloverInvTexHalf
QudaPrecision bindTwistedCloverTex(const FullClover clover, const FullClover cloverInv, const int oddBit, void **cloverP, void **cloverNormP, void **cloverInvP, void **cloverInvNormP)
void unbindGaugeTex(const cudaGaugeField &gauge)
texture< short, 1, cudaReadModeNormalizedFloat > longPhase0TexHalf
enum QudaPrecision_s QudaPrecision
texture< short4, 1, cudaReadModeNormalizedFloat > gauge1TexHalf4
void bindFatGaugeTex(const cudaGaugeField &gauge, const int oddBit, void **gauge0, void **gauge1)
texture< int4, 1 > muLink0TexDouble
texture< float2, 1, cudaReadModeElementType > longGauge0TexSingle_norecon
texture< int4, 1 > cloverInvTexDouble
texture< float, 1, cudaReadModeElementType > longPhase1TexSingle
texture< float4, 1, cudaReadModeElementType > gauge0TexSingle4
#define errorQuda(...)
Definition: util_quda.h:73
texture< float2, 1, cudaReadModeElementType > fatGauge1TexSingle
texture< int4, 1 > fatGauge0TexDouble
texture< int4, 1 > accumTexDouble
void bindLongGaugeTex(const cudaGaugeField &gauge, const int oddBit, void **gauge0, void **gauge1)
texture< short2, 1, cudaReadModeNormalizedFloat > fatGauge0TexHalf
texture< int2, 1 > longPhase0TexDouble
texture< float2, 1, cudaReadModeElementType > longGauge1TexSingle_norecon
texture< short4, 1, cudaReadModeNormalizedFloat > spinorTexHalf
texture< short2, 1, cudaReadModeNormalizedFloat > spinorTexHalf2
texture< float, 1, cudaReadModeElementType > interTexHalf2Norm
texture< short2, 1, cudaReadModeNormalizedFloat > gauge1TexHalf2
texture< float2, 1, cudaReadModeElementType > gauge1TexSingle2
texture< float2, 1, cudaReadModeElementType > siteLink0TexSingle_norecon
void unbindSpinorTex(const cudaColorSpinorField *in, const cudaColorSpinorField *out=0, const cudaColorSpinorField *x=0)
texture< float2, 1, cudaReadModeElementType > siteLink1TexSingle_norecon
texture< short2, 1, cudaReadModeNormalizedFloat > fatGauge1TexHalf
__inline__ __device__ double fetch_double(texture< int2, 1 > t, int i)
Definition: texture.h:88
texture< float4, 1, cudaReadModeElementType > longGauge1TexSingle
texture< int4, 1 > interTexDouble
texture< float, 1, cudaReadModeElementType > cloverInvTexNorm
texture< float4, 1, cudaReadModeElementType > siteLink1TexSingle_recon
texture< short2, 1, cudaReadModeNormalizedFloat > accumTexHalf2
texture< float2, 1, cudaReadModeElementType > spinorTexSingle2
texture< int4, 1 > siteLink1TexDouble
texture< float4, 1, cudaReadModeElementType > gauge1TexSingle4
texture< float4, 1, cudaReadModeElementType > cloverTexSingle
void unbindLongGaugeTex(const cudaGaugeField &gauge)
texture< short2, 1, cudaReadModeNormalizedFloat > longGauge0TexHalf_norecon
texture< int4, 1 > gauge0TexDouble2
texture< float2, 1, cudaReadModeElementType > siteLink0TexSingle
texture< float2, 1, cudaReadModeElementType > accumTexSingle2
texture< int4, 1 > spinorTexDouble
texture< float4, 1, cudaReadModeElementType > spinorTexSingle
texture< float4, 1, cudaReadModeElementType > accumTexSingle
texture< int4, 1 > cloverTexDouble
int bindSpinorTex(const cudaColorSpinorField *in, const cudaColorSpinorField *out=0, const cudaColorSpinorField *x=0)
texture< float2, 1, cudaReadModeElementType > siteLink1TexSingle
cpuColorSpinorField * in
texture< float4, 1, cudaReadModeElementType > interTexSingle
texture< int4, 1 > muLink1TexDouble
texture< short2, 1, cudaReadModeNormalizedFloat > interTexHalf2
texture< float4, 1, cudaReadModeElementType > longGauge0TexSingle
texture< float2, 1, cudaReadModeElementType > interTexSingle2
texture< float2, 1, cudaReadModeElementType > muLink1TexSingle
texture< float2, 1, cudaReadModeElementType > muLink0TexSingle
texture< int4, 1 > fatGauge1TexDouble
void unbindCloverTex(const FullClover clover)
__inline__ __device__ double2 fetch_double2(texture< int4, 1 > t, int i)
Definition: texture.h:90
texture< int2, 1 > longPhase1TexDouble
texture< float, 1, cudaReadModeElementType > longPhase0TexSingle
texture< float4, 1, cudaReadModeElementType > siteLink0TexSingle_recon
int x[4]
texture< short4, 1, cudaReadModeNormalizedFloat > longGauge0TexHalf
texture< float, 1, cudaReadModeElementType > accumTexHalfNorm
texture< float2, 1, cudaReadModeElementType > fatGauge0TexSingle
texture< short4, 1, cudaReadModeNormalizedFloat > longGauge1TexHalf
cpuColorSpinorField * out
texture< float, 1, cudaReadModeElementType > interTexHalfNorm
texture< short, 1, cudaReadModeNormalizedFloat > longPhase1TexHalf
texture< float, 1, cudaReadModeElementType > cloverTexNorm
texture< float, 1, cudaReadModeElementType > spinorTexHalfNorm
texture< short4, 1, cudaReadModeNormalizedFloat > interTexHalf
texture< int4, 1 > longGauge0TexDouble
texture< float, 1, cudaReadModeElementType > accumTexHalf2Norm
texture< short4, 1, cudaReadModeNormalizedFloat > accumTexHalf
texture< short2, 1, cudaReadModeNormalizedFloat > gauge0TexHalf2
void bindGaugeTex(const cudaGaugeField &gauge, const int oddBit, void **gauge0, void **gauge1)
#define MAX_SHORT
Definition: quda_internal.h:30
QudaPrecision bindCloverTex(const FullClover clover, const int oddBit, void **cloverP, void **cloverNormP)
texture< int4, 1 > longGauge1TexDouble
texture< short4, 1, cudaReadModeNormalizedFloat > cloverTexHalf
texture< int4, 1 > siteLink0TexDouble
void unbindTwistedCloverTex(const FullClover clover)
texture< float4, 1, cudaReadModeElementType > cloverInvTexSingle
texture< short4, 1, cudaReadModeNormalizedFloat > gauge0TexHalf4
texture< int4, 1 > gauge1TexDouble2
texture< float, 1, cudaReadModeElementType > spinorTexHalf2Norm
void * gauge[4]
Definition: su3_test.cpp:15
texture< float2, 1, cudaReadModeElementType > gauge0TexSingle2
int oddBit
#define TEX1DFETCH(type, tex, idx)
void unbindFatGaugeTex(const cudaGaugeField &gauge)
texture< short2, 1, cudaReadModeNormalizedFloat > longGauge1TexHalf_norecon