QUDA  0.9.0
dslash_textures.h
Go to the documentation of this file.
1 #include <typeinfo>
2 
3 // Use this macro for texture fetching for supporting either texture objects of texture references
4 #ifdef USE_TEXTURE_OBJECTS
5 #define TEX1DFETCH(type, tex, idx) tex1Dfetch<type>((tex), idx)
6 #else
7 #define TEX1DFETCH(type, tex, idx) tex1Dfetch((tex), idx)
8 #endif
9 
10 template<typename Tex>
11 static __inline__ __device__ double fetch_double(Tex t, int i)
12 {
13  int2 v = TEX1DFETCH(int2, t, i);
14  return __hiloint2double(v.y, v.x);
15 }
16 
17 template <typename Tex>
18 static __inline__ __device__ double2 fetch_double2(Tex t, int i)
19 {
20  int4 v = TEX1DFETCH(int4, t, i);
21  return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
22 }
23 
24 static __inline__ __device__ double2 fetch_double2_old(texture<int4, 1> t, int i)
25 {
26  int4 v = tex1Dfetch(t,i);
27  return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
28 }
29 
30 
31 #ifndef USE_TEXTURE_OBJECTS
32 // Double precision gauge field
33 texture<int4, 1> gauge0TexDouble2;
34 texture<int4, 1> gauge1TexDouble2;
35 
36 // Single precision gauge field
37 texture<float4, 1, cudaReadModeElementType> gauge0TexSingle4;
38 texture<float4, 1, cudaReadModeElementType> gauge1TexSingle4;
39 texture<float2, 1, cudaReadModeElementType> gauge0TexSingle2;
40 texture<float2, 1, cudaReadModeElementType> gauge1TexSingle2;
41 
42 // Half precision gauge field
43 texture<short4, 1, cudaReadModeNormalizedFloat> gauge0TexHalf4;
44 texture<short4, 1, cudaReadModeNormalizedFloat> gauge1TexHalf4;
45 texture<short2, 1, cudaReadModeNormalizedFloat> gauge0TexHalf2;
46 texture<short2, 1, cudaReadModeNormalizedFloat> gauge1TexHalf2;
47 
48 texture<int4, 1> longGauge0TexDouble;
49 texture<int4, 1> longGauge1TexDouble;
50 texture<int2, 1> longPhase0TexDouble;
51 texture<int2, 1> longPhase1TexDouble;
52 
53 texture<float4, 1, cudaReadModeElementType> longGauge0TexSingle;
54 texture<float4, 1, cudaReadModeElementType> longGauge1TexSingle;
55 texture<float2, 1, cudaReadModeElementType> longGauge0TexSingle_norecon;
56 texture<float2, 1, cudaReadModeElementType> longGauge1TexSingle_norecon;
57 texture<float, 1, cudaReadModeElementType> longPhase0TexSingle;
58 texture<float, 1, cudaReadModeElementType> longPhase1TexSingle;
59 
60 
61 texture<short4, 1, cudaReadModeNormalizedFloat> longGauge0TexHalf;
62 texture<short4, 1, cudaReadModeNormalizedFloat> longGauge1TexHalf;
63 texture<short2, 1, cudaReadModeNormalizedFloat> longGauge0TexHalf_norecon;
64 texture<short2, 1, cudaReadModeNormalizedFloat> longGauge1TexHalf_norecon;
65 texture<short, 1, cudaReadModeNormalizedFloat> longPhase0TexHalf;
66 texture<short, 1, cudaReadModeNormalizedFloat> longPhase1TexHalf;
67 
68 
69 // Double precision input spinor field
70 texture<int4, 1> spinorTexDouble;
71 texture<int4, 1> ghostSpinorTexDouble;
72 
73 // Single precision input spinor field
74 texture<float4, 1, cudaReadModeElementType> spinorTexSingle;
75 texture<float2, 1, cudaReadModeElementType> spinorTexSingle2;
76 texture<float4, 1, cudaReadModeElementType> ghostSpinorTexSingle;
77 texture<float2, 1, cudaReadModeElementType> ghostSpinorTexSingle2;
78 
79 // Half precision input spinor field
80 texture<short4, 1, cudaReadModeNormalizedFloat> spinorTexHalf;
81 texture<short2, 1, cudaReadModeNormalizedFloat> spinorTexHalf2;
82 texture<float, 1, cudaReadModeElementType> spinorTexHalfNorm;
83 texture<float, 1, cudaReadModeElementType> spinorTexHalf2Norm;
84 texture<short4, 1, cudaReadModeNormalizedFloat> ghostSpinorTexHalf;
85 texture<short2, 1, cudaReadModeNormalizedFloat> ghostSpinorTexHalf2;
86 texture<float, 1, cudaReadModeElementType> ghostSpinorTexHalfNorm;
87 texture<float, 1, cudaReadModeElementType> ghostSpinorTexHalf2Norm;
88 
89 // Double precision accumulate spinor field
90 texture<int4, 1> accumTexDouble;
91 
92 // Single precision accumulate spinor field
93 texture<float4, 1, cudaReadModeElementType> accumTexSingle;
94 texture<float2, 1, cudaReadModeElementType> accumTexSingle2;
95 
96 // Half precision accumulate spinor field
97 texture<short4, 1, cudaReadModeNormalizedFloat> accumTexHalf;
98 texture<short2, 1, cudaReadModeNormalizedFloat> accumTexHalf2;
99 texture<float, 1, cudaReadModeElementType> accumTexHalfNorm;
100 texture<float, 1, cudaReadModeElementType> accumTexHalf2Norm;
101 
102 // Double precision intermediate spinor field (used by exterior Dslash kernels)
103 texture<int4, 1> interTexDouble;
104 
105 // Single precision intermediate spinor field
106 texture<float4, 1, cudaReadModeElementType> interTexSingle;
107 texture<float2, 1, cudaReadModeElementType> interTexSingle2;
108 
109 // Half precision intermediate spinor field
110 texture<short4, 1, cudaReadModeNormalizedFloat> interTexHalf;
111 texture<short2, 1, cudaReadModeNormalizedFloat> interTexHalf2;
112 texture<float, 1, cudaReadModeElementType> interTexHalfNorm;
113 texture<float, 1, cudaReadModeElementType> interTexHalf2Norm;
114 #endif // not defined USE_TEXTURE_OBJECTS
115 
116 // FIXME update the below textures for texture objects
117 
118 // fatGauge textures are still used by llfat so we need to define
119 texture<int4, 1> fatGauge0TexDouble;
120 texture<int4, 1> fatGauge1TexDouble;
121 texture<float2, 1, cudaReadModeElementType> fatGauge0TexSingle;
122 texture<float2, 1, cudaReadModeElementType> fatGauge1TexSingle;
123 texture<short2, 1, cudaReadModeNormalizedFloat> fatGauge0TexHalf;
124 texture<short2, 1, cudaReadModeNormalizedFloat> fatGauge1TexHalf;
125 
126 //Double precision for site link
127 texture<int4, 1> siteLink0TexDouble;
128 texture<int4, 1> siteLink1TexDouble;
129 
130 //Single precision for site link
131 texture<float2, 1, cudaReadModeElementType> siteLink0TexSingle;
132 texture<float2, 1, cudaReadModeElementType> siteLink1TexSingle;
133 
134 texture<float4, 1, cudaReadModeElementType> siteLink0TexSingle_recon;
135 texture<float4, 1, cudaReadModeElementType> siteLink1TexSingle_recon;
136 
137 texture<float2, 1, cudaReadModeElementType> siteLink0TexSingle_norecon;
138 texture<float2, 1, cudaReadModeElementType> siteLink1TexSingle_norecon;
139 
140 
141 texture<int4, 1> muLink0TexDouble;
142 texture<int4, 1> muLink1TexDouble;
143 // Single precision mulink field
144 texture<float2, 1, cudaReadModeElementType> muLink0TexSingle;
145 texture<float2, 1, cudaReadModeElementType> muLink1TexSingle;
146 
147 template<typename T>
148 void bindGaugeTex(const cudaGaugeField &gauge, const int oddBit, T &dslashParam)
149 {
150  if(oddBit) {
151  dslashParam.gauge0 = const_cast<void*>(gauge.Odd_p());
152  dslashParam.gauge1 = const_cast<void*>(gauge.Even_p());
153  } else {
154  dslashParam.gauge0 = const_cast<void*>(gauge.Even_p());
155  dslashParam.gauge1 = const_cast<void*>(gauge.Odd_p());
156  }
157 
158 #ifdef USE_TEXTURE_OBJECTS
159  dslashParam.gauge0Tex = oddBit ? gauge.OddTex() : gauge.EvenTex();
160  dslashParam.gauge1Tex = oddBit ? gauge.EvenTex() : gauge.OddTex();
161 #else
162  if (gauge.Reconstruct() == QUDA_RECONSTRUCT_NO) {
163  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
164  cudaBindTexture(0, gauge0TexDouble2, dslashParam.gauge0, gauge.Bytes()/2);
165  cudaBindTexture(0, gauge1TexDouble2, dslashParam.gauge1, gauge.Bytes()/2);
166  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
167  cudaBindTexture(0, gauge0TexSingle2, dslashParam.gauge0, gauge.Bytes()/2);
168  cudaBindTexture(0, gauge1TexSingle2, dslashParam.gauge1, gauge.Bytes()/2);
169  } else {
170  cudaBindTexture(0, gauge0TexHalf2, dslashParam.gauge0, gauge.Bytes()/2);
171  cudaBindTexture(0, gauge1TexHalf2, dslashParam.gauge1, gauge.Bytes()/2);
172  }
173  } else {
174  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
175  cudaBindTexture(0, gauge0TexDouble2, dslashParam.gauge0, gauge.Bytes()/2);
176  cudaBindTexture(0, gauge1TexDouble2, dslashParam.gauge1, gauge.Bytes()/2);
177  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
178  cudaBindTexture(0, gauge0TexSingle4, dslashParam.gauge0, gauge.Bytes()/2);
179  cudaBindTexture(0, gauge1TexSingle4, dslashParam.gauge1, gauge.Bytes()/2);
180  } else {
181  cudaBindTexture(0, gauge0TexHalf4, dslashParam.gauge0, gauge.Bytes()/2);
182  cudaBindTexture(0, gauge1TexHalf4, dslashParam.gauge1, gauge.Bytes()/2);
183  }
184  }
185 #endif // USE_TEXTURE_OBJECTS
186 
187 }
188 
189 void unbindGaugeTex(const cudaGaugeField &gauge)
190 {
191 #if (!defined USE_TEXTURE_OBJECTS)
192  if (gauge.Reconstruct() == QUDA_RECONSTRUCT_NO) {
193  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
194  cudaUnbindTexture(gauge0TexDouble2);
195  cudaUnbindTexture(gauge1TexDouble2);
196  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
197  cudaUnbindTexture(gauge0TexSingle2);
198  cudaUnbindTexture(gauge1TexSingle2);
199  } else {
200  cudaUnbindTexture(gauge0TexHalf2);
201  cudaUnbindTexture(gauge1TexHalf2);
202  }
203  } else {
204  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
205  cudaUnbindTexture(gauge0TexDouble2);
206  cudaUnbindTexture(gauge1TexDouble2);
207  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
208  cudaUnbindTexture(gauge0TexSingle4);
209  cudaUnbindTexture(gauge1TexSingle4);
210  } else {
211  cudaUnbindTexture(gauge0TexHalf4);
212  cudaUnbindTexture(gauge1TexHalf4);
213  }
214  }
215 #endif
216 }
217 
218 template <typename T>
219 void bindFatGaugeTex(const cudaGaugeField &gauge, const int oddBit, T &dslashParam)
220 {
221  if(oddBit) {
222  dslashParam.gauge0 = const_cast<void*>(gauge.Odd_p());
223  dslashParam.gauge1 = const_cast<void*>(gauge.Even_p());
224  } else {
225  dslashParam.gauge0 = const_cast<void*>(gauge.Even_p());
226  dslashParam.gauge1 = const_cast<void*>(gauge.Odd_p());
227  }
228 
229 #ifdef USE_TEXTURE_OBJECTS
230  dslashParam.gauge0Tex = oddBit ? gauge.OddTex() : gauge.EvenTex();
231  dslashParam.gauge1Tex = oddBit ? gauge.EvenTex() : gauge.OddTex();
232 #else
233  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
234  cudaBindTexture(0, fatGauge0TexDouble, dslashParam.gauge0, gauge.Bytes()/2);
235  cudaBindTexture(0, fatGauge1TexDouble, dslashParam.gauge1, gauge.Bytes()/2);
236  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
237  cudaBindTexture(0, fatGauge0TexSingle, dslashParam.gauge0, gauge.Bytes()/2);
238  cudaBindTexture(0, fatGauge1TexSingle, dslashParam.gauge1, gauge.Bytes()/2);
239  } else {
240  cudaBindTexture(0, fatGauge0TexHalf, dslashParam.gauge0, gauge.Bytes()/2);
241  cudaBindTexture(0, fatGauge1TexHalf, dslashParam.gauge1, gauge.Bytes()/2);
242  }
243 #endif // USE_TEXTURE_OBJECTS
244 
245 }
246 
247 void unbindFatGaugeTex(const cudaGaugeField &gauge)
248 {
249 #if (!defined USE_TEXTURE_OBJECTS)
250  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
251  cudaUnbindTexture(fatGauge0TexDouble);
252  cudaUnbindTexture(fatGauge1TexDouble);
253  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
254  cudaUnbindTexture(fatGauge0TexSingle);
255  cudaUnbindTexture(fatGauge1TexSingle);
256  } else {
257  cudaUnbindTexture(fatGauge0TexHalf);
258  cudaUnbindTexture(fatGauge1TexHalf);
259  }
260 #endif
261 }
262 
263 template <typename T>
264 void bindLongGaugeTex(const cudaGaugeField &gauge, const int oddBit, T &dslashParam)
265 {
266  if (oddBit) {
267  dslashParam.gauge0 = const_cast<void*>(gauge.Odd_p());
268  dslashParam.gauge1 = const_cast<void*>(gauge.Even_p());
269  } else {
270  dslashParam.gauge0 = const_cast<void*>(gauge.Even_p());
271  dslashParam.gauge1 = const_cast<void*>(gauge.Odd_p());
272  }
273 
274  dslashParam.longPhase0 = static_cast<char*>(dslashParam.longGauge0) + gauge.PhaseOffset();
275  dslashParam.longPhase1 = static_cast<char*>(dslashParam.longGauge1) + gauge.PhaseOffset();
276 
277 
278 #ifdef USE_TEXTURE_OBJECTS
279  dslashParam.longGauge0Tex = oddBit ? gauge.OddTex() : gauge.EvenTex();
280  dslashParam.longGauge1Tex = oddBit ? gauge.EvenTex() : gauge.OddTex();
281 
282  if(gauge.Reconstruct() == QUDA_RECONSTRUCT_13 || gauge.Reconstruct() == QUDA_RECONSTRUCT_9){
283  dslashParam.longPhase0Tex = oddBit ? gauge.OddPhaseTex() : gauge.EvenPhaseTex();
284  dslashParam.longPhase1Tex = oddBit ? gauge.EvenPhaseTex() : gauge.OddPhaseTex();
285  }
286 #else
287  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
288  cudaBindTexture(0, longGauge0TexDouble, dslashParam.gauge0, gauge.Bytes()/2);
289  cudaBindTexture(0, longGauge1TexDouble, dslashParam.gauge1, gauge.Bytes()/2);
290  if(gauge.Reconstruct() == QUDA_RECONSTRUCT_13 || gauge.Reconstruct() == QUDA_RECONSTRUCT_9){
291  cudaBindTexture(0, longPhase0TexDouble, (char*)(dslashParam.gauge0) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
292  cudaBindTexture(0, longPhase1TexDouble, (char*)(dslashParam.gauge1) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
293  }
294  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
295  if (gauge.Reconstruct() == QUDA_RECONSTRUCT_NO) { //18 reconstruct
296  cudaBindTexture(0, longGauge0TexSingle_norecon, dslashParam.gauge0, gauge.Bytes()/2);
297  cudaBindTexture(0, longGauge1TexSingle_norecon, dslashParam.gauge1, gauge.Bytes()/2);
298  } else {
299  cudaBindTexture(0, longGauge0TexSingle, dslashParam.gauge0, gauge.Bytes()/2);
300  cudaBindTexture(0, longGauge1TexSingle, dslashParam.gauge1, gauge.Bytes()/2);
301  if(gauge.Reconstruct() == QUDA_RECONSTRUCT_13 || gauge.Reconstruct() == QUDA_RECONSTRUCT_9){
302  cudaBindTexture(0, longPhase0TexSingle, (char*)(dslashParam.gauge0) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
303  cudaBindTexture(0, longPhase1TexSingle, (char*)(dslashParam.gauge1) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
304  }
305  }
306  } else {
307  if (gauge.Reconstruct() == QUDA_RECONSTRUCT_NO) { //18 reconstruct
308  cudaBindTexture(0, longGauge0TexHalf_norecon, dslashParam.gauge0, gauge.Bytes()/2);
309  cudaBindTexture(0, longGauge1TexHalf_norecon, dslashParam.gauge1, gauge.Bytes()/2);
310  } else {
311  cudaBindTexture(0, longGauge0TexHalf, dslashParam.gauge0, gauge.Bytes()/2);
312  cudaBindTexture(0, longGauge1TexHalf, dslashParam.gauge1, gauge.Bytes()/2);
313  if(gauge.Reconstruct() == QUDA_RECONSTRUCT_13 || gauge.Reconstruct() == QUDA_RECONSTRUCT_9){
314  cudaBindTexture(0, longPhase0TexHalf, (char*)(dslashParam.gauge0) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
315  cudaBindTexture(0, longPhase1TexHalf, (char*)(dslashParam.gauge1) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
316  }
317  }
318  }
319 #endif // USE_TEXTURE_OBJECTS
320 }
321 
322 void unbindLongGaugeTex(const cudaGaugeField &gauge)
323 {
324 #if (!defined USE_TEXTURE_OBJECTS)
325  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
326  cudaUnbindTexture(longGauge0TexDouble);
327  cudaUnbindTexture(longGauge1TexDouble);
328  if(gauge.Reconstruct() == QUDA_RECONSTRUCT_13 || gauge.Reconstruct() == QUDA_RECONSTRUCT_9){
329  cudaUnbindTexture(longPhase0TexDouble);
330  cudaUnbindTexture(longPhase1TexDouble);
331  }
332  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
333  if (gauge.Reconstruct() == QUDA_RECONSTRUCT_NO) { //18 reconstruct
334  cudaUnbindTexture(longGauge0TexSingle_norecon);
335  cudaUnbindTexture(longGauge1TexSingle_norecon);
336  } else {
337  cudaUnbindTexture(longGauge0TexSingle);
338  cudaUnbindTexture(longGauge1TexSingle);
339  if(gauge.Reconstruct() == QUDA_RECONSTRUCT_13 || gauge.Reconstruct() == QUDA_RECONSTRUCT_9){
340  cudaUnbindTexture(longPhase0TexSingle);
341  cudaUnbindTexture(longPhase1TexSingle);
342  }
343  }
344  } else { // half precision
345  if (gauge.Reconstruct() == QUDA_RECONSTRUCT_NO) { //18 reconstruct
346  cudaUnbindTexture(longGauge0TexHalf_norecon);
347  cudaUnbindTexture(longGauge1TexHalf_norecon);
348  } else {
349  cudaUnbindTexture(longGauge0TexHalf);
350  cudaUnbindTexture(longGauge1TexHalf);
351  if(gauge.Reconstruct() == QUDA_RECONSTRUCT_13 || gauge.Reconstruct() == QUDA_RECONSTRUCT_9){
352  cudaUnbindTexture(longPhase0TexHalf);
353  cudaUnbindTexture(longPhase1TexHalf);
354  }
355  }
356  }
357 #endif
358 }
359 
360 
361 template <typename spinorFloat>
362 int bindSpinorTex(const cudaColorSpinorField *in, const cudaColorSpinorField *out=0,
363  const cudaColorSpinorField *x=0) {
364  int size = (sizeof(((spinorFloat*)0)->x) < sizeof(float)) ? sizeof(float) :
365  sizeof(((spinorFloat*)0)->x);
366 
367 #ifndef USE_TEXTURE_OBJECTS
368  if (typeid(spinorFloat) == typeid(double2)) {
369  cudaBindTexture(0, spinorTexDouble, in->V(), in->Bytes());
370  if (in->GhostBytes()) cudaBindTexture(0, ghostSpinorTexDouble, in->Ghost2(), in->GhostBytes());
371  if (out) cudaBindTexture(0, interTexDouble, out->V(), in->Bytes());
372  if (x) cudaBindTexture(0, accumTexDouble, x->V(), in->Bytes());
373  } else if (typeid(spinorFloat) == typeid(float4)) {
374  cudaBindTexture(0, spinorTexSingle, in->V(), in->Bytes());
375  if (in->GhostBytes()) cudaBindTexture(0, ghostSpinorTexSingle, in->Ghost2(), in->GhostBytes());
376  if (out) cudaBindTexture(0, interTexSingle, out->V(), in->Bytes());
377  if (x) cudaBindTexture(0, accumTexSingle, x->V(), in->Bytes());
378  } else if (typeid(spinorFloat) == typeid(float2)) {
379  cudaBindTexture(0, spinorTexSingle2, in->V(), in->Bytes());
380  if (in->GhostBytes()) cudaBindTexture(0, ghostSpinorTexSingle2, in->Ghost2(), in->GhostBytes());
381  if (out) cudaBindTexture(0, interTexSingle2, out->V(), in->Bytes());
382  if (x) cudaBindTexture(0, accumTexSingle2, x->V(), in->Bytes());
383  } else if (typeid(spinorFloat) == typeid(short4)) {
384  cudaBindTexture(0, spinorTexHalf, in->V(), in->Bytes());
385  cudaBindTexture(0, spinorTexHalfNorm, in->Norm(), in->NormBytes());
386  if (in->GhostBytes()) cudaBindTexture(0, ghostSpinorTexHalf, in->Ghost2(), in->GhostBytes());
387  if (in->GhostBytes()) cudaBindTexture(0, ghostSpinorTexHalfNorm, in->Ghost2(), in->GhostBytes());
388  if (out) cudaBindTexture(0, interTexHalf, out->V(), in->Bytes());
389  if (out) cudaBindTexture(0, interTexHalfNorm, out->Norm(), in->NormBytes());
390  if (x) cudaBindTexture(0, accumTexHalf, x->V(), in->Bytes());
391  if (x) cudaBindTexture(0, accumTexHalfNorm, x->Norm(), in->NormBytes());
392  } else if (typeid(spinorFloat) == typeid(short2)) {
393  cudaBindTexture(0, spinorTexHalf2, in->V(), in->Bytes());
394  cudaBindTexture(0, spinorTexHalf2Norm, in->Norm(), in->NormBytes());
395  if (in->GhostBytes()) cudaBindTexture(0, ghostSpinorTexHalf2, in->Ghost2(), in->GhostBytes());
396  if (in->GhostBytes()) cudaBindTexture(0, ghostSpinorTexHalf2Norm, in->Ghost2(), in->GhostBytes());
397  if (out) cudaBindTexture(0, interTexHalf2, out->V(), in->Bytes());
398  if (out) cudaBindTexture(0, interTexHalf2Norm, out->Norm(), in->NormBytes());
399  if (x) cudaBindTexture(0, accumTexHalf2, x->V(), in->Bytes());
400  if (x) cudaBindTexture(0, accumTexHalf2Norm, x->Norm(), in->NormBytes());
401  } else {
402  errorQuda("Unsupported precision and short vector type");
403  }
404 #endif // !USE_TEXTURE_OBJECTS
405 
406  return size;
407 }
408 
409 template <typename spinorFloat>
410 void unbindSpinorTex(const cudaColorSpinorField *in, const cudaColorSpinorField *out=0,
411  const cudaColorSpinorField *x=0) {
412 #ifndef USE_TEXTURE_OBJECTS
413  if (typeid(spinorFloat) == typeid(double2)) {
414  cudaUnbindTexture(spinorTexDouble);
415  if (in->GhostBytes()) cudaUnbindTexture(ghostSpinorTexDouble);
416  if (out) cudaUnbindTexture(interTexDouble);
417  if (x) cudaUnbindTexture(accumTexDouble);
418  } else if (typeid(spinorFloat) == typeid(float4)) {
419  cudaUnbindTexture(spinorTexSingle);
420  if (in->GhostBytes()) cudaUnbindTexture(ghostSpinorTexSingle);
421  if (out) cudaUnbindTexture(interTexSingle);
422  if (x) cudaUnbindTexture(accumTexSingle);
423  } else if (typeid(spinorFloat) == typeid(float2)) {
424  cudaUnbindTexture(spinorTexSingle2);
425  if (in->GhostBytes()) cudaUnbindTexture(ghostSpinorTexSingle2);
426  if (out) cudaUnbindTexture(interTexSingle2);
427  if (x) cudaUnbindTexture(accumTexSingle2);
428  } else if (typeid(spinorFloat) == typeid(short4)) {
429  cudaUnbindTexture(spinorTexHalf);
430  cudaUnbindTexture(spinorTexHalfNorm);
431  if (in->GhostBytes()) cudaUnbindTexture(ghostSpinorTexHalf);
432  if (in->GhostBytes()) cudaUnbindTexture(ghostSpinorTexHalfNorm);
433  if (out) cudaUnbindTexture(interTexHalf);
434  if (out) cudaUnbindTexture(interTexHalfNorm);
435  if (x) cudaUnbindTexture(accumTexHalf);
436  if (x) cudaUnbindTexture(accumTexHalfNorm);
437  } else if (typeid(spinorFloat) == typeid(short2)) {
438  cudaUnbindTexture(spinorTexHalf2);
439  cudaUnbindTexture(spinorTexHalf2Norm);
440  if (in->GhostBytes()) cudaUnbindTexture(ghostSpinorTexHalf2);
441  if (in->GhostBytes()) cudaUnbindTexture(ghostSpinorTexHalf2Norm);
442  if (out) cudaUnbindTexture(interTexHalf2);
443  if (out) cudaUnbindTexture(interTexHalf2Norm);
444  if (x) cudaUnbindTexture(accumTexHalf2);
445  if (x) cudaUnbindTexture(accumTexHalf2Norm);
446  } else {
447  errorQuda("Unsupported precision and short vector type");
448  }
449 #endif // USE_TEXTURE_OBJECTS
450 }
451 
452 // Double precision clover term
453 texture<int4, 1> cloverTexDouble;
454 texture<int4, 1> cloverInvTexDouble;
455 
456 // Single precision clover term
457 texture<float4, 1, cudaReadModeElementType> cloverTexSingle;
458 texture<float4, 1, cudaReadModeElementType> cloverInvTexSingle;
459 
460 // Half precision clover term
461 texture<short4, 1, cudaReadModeNormalizedFloat> cloverTexHalf;
462 texture<float, 1, cudaReadModeElementType> cloverTexNorm;
463 
464 texture<short4, 1, cudaReadModeNormalizedFloat> cloverInvTexHalf;
465 texture<float, 1, cudaReadModeElementType> cloverInvTexNorm;
466 
467 template <typename T>
468 QudaPrecision bindCloverTex(const FullClover &clover, const int oddBit, T &dslashParam)
469 {
470  if (oddBit) {
471  dslashParam.clover = clover.odd;
472  dslashParam.cloverNorm = (float*)clover.oddNorm;
473  } else {
474  dslashParam.clover = clover.even;
475  dslashParam.cloverNorm = (float*)clover.evenNorm;
476  }
477 
478 #ifdef USE_TEXTURE_OBJECTS
479  dslashParam.cloverTex = oddBit ? clover.OddTex() : clover.EvenTex();
480  if (clover.precision == QUDA_HALF_PRECISION) dslashParam.cloverNormTex = oddBit ? clover.OddNormTex() : clover.EvenNormTex();
481 #else
482  if (clover.precision == QUDA_DOUBLE_PRECISION) {
483  cudaBindTexture(0, cloverTexDouble, dslashParam.clover, clover.bytes);
484  } else if (clover.precision == QUDA_SINGLE_PRECISION) {
485  cudaBindTexture(0, cloverTexSingle, dslashParam.clover, clover.bytes);
486  } else {
487  cudaBindTexture(0, cloverTexHalf, dslashParam.clover, clover.bytes);
488  cudaBindTexture(0, cloverTexNorm, dslashParam.cloverNorm, clover.norm_bytes);
489  }
490 #endif // USE_TEXTURE_OBJECTS
491 
492  return clover.precision;
493 }
494 
495 void unbindCloverTex(const FullClover clover)
496 {
497 #if (!defined USE_TEXTURE_OBJECTS)
498  if (clover.precision == QUDA_DOUBLE_PRECISION) {
499  cudaUnbindTexture(cloverTexDouble);
500  } else if (clover.precision == QUDA_SINGLE_PRECISION) {
501  cudaUnbindTexture(cloverTexSingle);
502  } else {
503  cudaUnbindTexture(cloverTexHalf);
504  cudaUnbindTexture(cloverTexNorm);
505  }
506 #endif // not defined USE_TEXTURE_OBJECTS
507 }
508 
509 template <typename T>
510 QudaPrecision bindTwistedCloverTex(const FullClover clover, const FullClover cloverInv, const int oddBit, T &dslashParam)
511 {
512  if (oddBit) {
513  dslashParam.clover = clover.odd;
514  dslashParam.cloverNorm = (float*)clover.oddNorm;
515 #ifndef DYNAMIC_CLOVER
516  dslashParam.cloverInv = cloverInv.odd;
517  dslashParam.cloverInvNorm = (float*)cloverInv.oddNorm;
518 #endif
519  } else {
520  dslashParam.clover = clover.even;
521  dslashParam.cloverNorm = (float*)clover.evenNorm;
522 #ifndef DYNAMIC_CLOVER
523  dslashParam.clover = cloverInv.even;
524  dslashParam.cloverInvNorm = (float*)cloverInv.evenNorm;
525 #endif
526  }
527 
528 #ifdef USE_TEXTURE_OBJECTS
529  dslashParam.cloverTex = oddBit ? clover.OddTex() : clover.EvenTex();
530  if (clover.precision == QUDA_HALF_PRECISION) dslashParam.cloverNormTex = oddBit ? clover.OddNormTex() : clover.EvenNormTex();
531 #ifndef DYNAMIC_CLOVER
532  dslashParam.cloverInvTex = oddBit ? cloverInv.OddTex() : cloverInv.EvenTex();
533  if (cloverInv.precision == QUDA_HALF_PRECISION) dslashParam.cloverInvNormTex = oddBit ? cloverInv.OddNormTex() : cloverInv.EvenNormTex();
534 #endif
535 #else
536  if (clover.precision == QUDA_DOUBLE_PRECISION) { //I assume that the clover and cloverInv fields have the same precision
537  cudaBindTexture(0, cloverTexDouble, dslashParam.clover, clover.bytes);
538 #ifndef DYNAMIC_CLOVER
539  cudaBindTexture(0, cloverInvTexDouble, dslashParam.cloverInv, cloverInv.bytes);
540 #endif
541  } else if (clover.precision == QUDA_SINGLE_PRECISION) {
542  cudaBindTexture(0, cloverTexSingle, dslashParam.clover, clover.bytes);
543 #ifndef DYNAMIC_CLOVER
544  cudaBindTexture(0, cloverInvTexSingle, dslashParam.cloverInv, cloverInv.bytes);
545 #endif
546  } else {
547  cudaBindTexture(0, cloverTexHalf, dslashParam.clover, clover.bytes);
548  cudaBindTexture(0, cloverTexNorm, dslashParam.cloverNorm, clover.norm_bytes);
549 #ifndef DYNAMIC_CLOVER
550  cudaBindTexture(0, cloverInvTexHalf, dslashParam.cloverInv, cloverInv.bytes);
551  cudaBindTexture(0, cloverInvTexNorm, dslashParam.cloverInvNorm, cloverInv.norm_bytes);
552 #endif
553  }
554 #endif // USE_TEXTURE_OBJECTS
555 
556  return clover.precision;
557 }
558 
559 void unbindTwistedCloverTex(const FullClover clover) //We don't really need this function, but for the shake of completeness...
560 {
561 #if (!defined USE_TEXTURE_OBJECTS)
562  if (clover.precision == QUDA_DOUBLE_PRECISION) //Again we assume that the precision of the clover and cloverInv are the same
563  {
564  cudaUnbindTexture(cloverTexDouble);
565 #ifndef DYNAMIC_CLOVER
566  cudaUnbindTexture(cloverInvTexDouble);
567 #endif
568  }
569  else if (clover.precision == QUDA_SINGLE_PRECISION)
570  {
571  cudaUnbindTexture(cloverTexSingle);
572 #ifndef DYNAMIC_CLOVER
573  cudaUnbindTexture(cloverInvTexSingle);
574 #endif
575  }
576  else
577  {
578  cudaUnbindTexture(cloverTexHalf);
579  cudaUnbindTexture(cloverTexNorm);
580 #ifndef DYNAMIC_CLOVER
581  cudaUnbindTexture(cloverInvTexHalf);
582  cudaUnbindTexture(cloverInvTexNorm);
583 #endif
584  }
585 #endif // not defined USE_TEXTURE_OBJECTS
586 }
587 
588 // define some function if we're not using textures (direct access)
589 #if defined(DIRECT_ACCESS_LINK) || defined(DIRECT_ACCESS_WILSON_SPINOR) || \
590  defined(DIRECT_ACCESS_WILSON_ACCUM) || defined(DIRECT_ACCESS_WILSON_PACK_SPINOR) || \
591  defined(DIRECT_ACCESS_WILSON_INTER) || defined(DIRECT_ACCESS_WILSON_PACK_SPINOR) || \
592  defined(DIRECT_ACCESS_CLOVER) || defined(DIRECT_ACCESS_PACK) || \
593  defined(DIRECT_ACCESS_LONG_LINK) || defined(DIRECT_ACCESS_FAT_LINK)
594 
595  static inline __device__ float short2float(short a) {
596  return (float)a/MAX_SHORT;
597  }
598 
599  static inline __device__ short float2short(float c, float a) {
600  return (short)(a*c*MAX_SHORT);
601  }
602 
603  static inline __device__ short4 float42short4(float c, float4 a) {
604  return make_short4(float2short(c, a.x), float2short(c, a.y), float2short(c, a.z), float2short(c, a.w));
605  }
606 
607  static inline __device__ float4 short42float4(short4 a) {
608  return make_float4(short2float(a.x), short2float(a.y), short2float(a.z), short2float(a.w));
609  }
610 
611  static inline __device__ float2 short22float2(short2 a) {
612  return make_float2(short2float(a.x), short2float(a.y));
613  }
614 #endif // DIRECT_ACCESS inclusions
texture< short4, 1, cudaReadModeNormalizedFloat > cloverInvTexHalf
void unbindGaugeTex(const cudaGaugeField &gauge)
void bindFatGaugeTex(const cudaGaugeField &gauge, const int oddBit, T &dslashParam)
texture< short, 1, cudaReadModeNormalizedFloat > longPhase0TexHalf
QudaPrecision bindCloverTex(const FullClover &clover, const int oddBit, T &dslashParam)
enum QudaPrecision_s QudaPrecision
texture< short4, 1, cudaReadModeNormalizedFloat > gauge1TexHalf4
texture< int4, 1 > muLink0TexDouble
texture< float2, 1, cudaReadModeElementType > longGauge0TexSingle_norecon
texture< int4, 1 > cloverInvTexDouble
texture< float, 1, cudaReadModeElementType > longPhase1TexSingle
texture< short4, 1, cudaReadModeNormalizedFloat > ghostSpinorTexHalf
texture< float4, 1, cudaReadModeElementType > gauge0TexSingle4
#define errorQuda(...)
Definition: util_quda.h:90
texture< float2, 1, cudaReadModeElementType > fatGauge1TexSingle
texture< int4, 1 > fatGauge0TexDouble
QudaPrecision bindTwistedCloverTex(const FullClover clover, const FullClover cloverInv, const int oddBit, T &dslashParam)
texture< int4, 1 > accumTexDouble
texture< short2, 1, cudaReadModeNormalizedFloat > fatGauge0TexHalf
texture< int2, 1 > longPhase0TexDouble
texture< float2, 1, cudaReadModeElementType > longGauge1TexSingle_norecon
texture< short4, 1, cudaReadModeNormalizedFloat > spinorTexHalf
texture< short2, 1, cudaReadModeNormalizedFloat > spinorTexHalf2
texture< float, 1, cudaReadModeElementType > interTexHalf2Norm
texture< short2, 1, cudaReadModeNormalizedFloat > gauge1TexHalf2
texture< float2, 1, cudaReadModeElementType > gauge1TexSingle2
texture< float2, 1, cudaReadModeElementType > siteLink0TexSingle_norecon
void unbindSpinorTex(const cudaColorSpinorField *in, const cudaColorSpinorField *out=0, const cudaColorSpinorField *x=0)
texture< float2, 1, cudaReadModeElementType > siteLink1TexSingle_norecon
texture< int4, 1 > ghostSpinorTexDouble
texture< short2, 1, cudaReadModeNormalizedFloat > fatGauge1TexHalf
texture< float4, 1, cudaReadModeElementType > longGauge1TexSingle
static __inline__ __device__ double fetch_double(Tex t, int i)
texture< int4, 1 > interTexDouble
texture< float, 1, cudaReadModeElementType > cloverInvTexNorm
texture< float4, 1, cudaReadModeElementType > siteLink1TexSingle_recon
texture< short2, 1, cudaReadModeNormalizedFloat > accumTexHalf2
texture< short2, 1, cudaReadModeNormalizedFloat > ghostSpinorTexHalf2
texture< float2, 1, cudaReadModeElementType > spinorTexSingle2
texture< int4, 1 > siteLink1TexDouble
texture< float4, 1, cudaReadModeElementType > gauge1TexSingle4
texture< float4, 1, cudaReadModeElementType > cloverTexSingle
void unbindLongGaugeTex(const cudaGaugeField &gauge)
static __inline__ __device__ double2 fetch_double2_old(texture< int4, 1 > t, int i)
texture< short2, 1, cudaReadModeNormalizedFloat > longGauge0TexHalf_norecon
texture< int4, 1 > gauge0TexDouble2
static __inline__ __device__ double2 fetch_double2(Tex t, int i)
texture< float2, 1, cudaReadModeElementType > siteLink0TexSingle
texture< float2, 1, cudaReadModeElementType > accumTexSingle2
texture< int4, 1 > spinorTexDouble
texture< float4, 1, cudaReadModeElementType > spinorTexSingle
texture< float, 1, cudaReadModeElementType > ghostSpinorTexHalf2Norm
texture< float4, 1, cudaReadModeElementType > accumTexSingle
texture< int4, 1 > cloverTexDouble
int bindSpinorTex(const cudaColorSpinorField *in, const cudaColorSpinorField *out=0, const cudaColorSpinorField *x=0)
texture< float2, 1, cudaReadModeElementType > siteLink1TexSingle
cpuColorSpinorField * in
texture< float4, 1, cudaReadModeElementType > interTexSingle
texture< int4, 1 > muLink1TexDouble
texture< float4, 1, cudaReadModeElementType > ghostSpinorTexSingle
texture< short2, 1, cudaReadModeNormalizedFloat > interTexHalf2
texture< float4, 1, cudaReadModeElementType > longGauge0TexSingle
texture< float2, 1, cudaReadModeElementType > interTexSingle2
texture< float2, 1, cudaReadModeElementType > muLink1TexSingle
texture< float2, 1, cudaReadModeElementType > muLink0TexSingle
texture< int4, 1 > fatGauge1TexDouble
void unbindCloverTex(const FullClover clover)
texture< int2, 1 > longPhase1TexDouble
texture< float, 1, cudaReadModeElementType > longPhase0TexSingle
texture< float4, 1, cudaReadModeElementType > siteLink0TexSingle_recon
texture< short4, 1, cudaReadModeNormalizedFloat > longGauge0TexHalf
texture< float, 1, cudaReadModeElementType > accumTexHalfNorm
texture< float2, 1, cudaReadModeElementType > fatGauge0TexSingle
texture< short4, 1, cudaReadModeNormalizedFloat > longGauge1TexHalf
cpuColorSpinorField * out
void bindLongGaugeTex(const cudaGaugeField &gauge, const int oddBit, T &dslashParam)
texture< float, 1, cudaReadModeElementType > interTexHalfNorm
texture< short, 1, cudaReadModeNormalizedFloat > longPhase1TexHalf
texture< float, 1, cudaReadModeElementType > cloverTexNorm
texture< float, 1, cudaReadModeElementType > spinorTexHalfNorm
texture< short4, 1, cudaReadModeNormalizedFloat > interTexHalf
texture< int4, 1 > longGauge0TexDouble
texture< float, 1, cudaReadModeElementType > accumTexHalf2Norm
texture< short4, 1, cudaReadModeNormalizedFloat > accumTexHalf
texture< short2, 1, cudaReadModeNormalizedFloat > gauge0TexHalf2
texture< float2, 1, cudaReadModeElementType > ghostSpinorTexSingle2
const void * c
#define MAX_SHORT
Definition: quda_internal.h:29
texture< int4, 1 > longGauge1TexDouble
texture< short4, 1, cudaReadModeNormalizedFloat > cloverTexHalf
texture< int4, 1 > siteLink0TexDouble
void unbindTwistedCloverTex(const FullClover clover)
texture< float4, 1, cudaReadModeElementType > cloverInvTexSingle
texture< short4, 1, cudaReadModeNormalizedFloat > gauge0TexHalf4
texture< int4, 1 > gauge1TexDouble2
texture< float, 1, cudaReadModeElementType > spinorTexHalf2Norm
#define a
texture< float2, 1, cudaReadModeElementType > gauge0TexSingle2
texture< float, 1, cudaReadModeElementType > ghostSpinorTexHalfNorm
#define TEX1DFETCH(type, tex, idx)
void bindGaugeTex(const cudaGaugeField &gauge, const int oddBit, T &dslashParam)
void unbindFatGaugeTex(const cudaGaugeField &gauge)
texture< short2, 1, cudaReadModeNormalizedFloat > longGauge1TexHalf_norecon