4 #ifdef USE_TEXTURE_OBJECTS
5 #define TEX1DFETCH(type, tex, idx) tex1Dfetch<type>((tex), idx)
7 #define TEX1DFETCH(type, tex, idx) tex1Dfetch((tex), idx)
10 #if (__COMPUTE_CAPABILITY__ >= 130)
11 template <
typename Tex>
12 static __inline__ __device__ double2
fetch_double2(Tex t,
int i)
15 return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
18 static __inline__ __device__ double2 fetch_double2_old(texture<int4, 1> t,
int i)
20 int4 v = tex1Dfetch(t,i);
21 return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
23 #endif //__COMPUTE_CAPABILITY__ >= 130
25 #ifndef USE_TEXTURE_OBJECTS
92 #endif // not defined USE_TEXTURE_OBJECTS
129 *gauge1 = gauge.even;
131 *gauge0 = gauge.even;
135 #ifdef USE_TEXTURE_OBJECTS
136 dslashParam.gauge0Tex = oddBit ? gauge.OddTex() : gauge.EvenTex();
137 dslashParam.gauge1Tex = oddBit ? gauge.EvenTex() : gauge.OddTex();
141 cudaBindTexture(0, gauge0TexDouble2, *gauge0, gauge.bytes/2);
142 cudaBindTexture(0, gauge1TexDouble2, *gauge1, gauge.bytes/2);
144 cudaBindTexture(0, gauge0TexSingle2, *gauge0, gauge.bytes/2);
145 cudaBindTexture(0, gauge1TexSingle2, *gauge1, gauge.bytes/2);
147 cudaBindTexture(0, gauge0TexHalf2, *gauge0, gauge.bytes/2);
148 cudaBindTexture(0, gauge1TexHalf2, *gauge1, gauge.bytes/2);
152 cudaBindTexture(0, gauge0TexDouble2, *gauge0, gauge.bytes/2);
153 cudaBindTexture(0, gauge1TexDouble2, *gauge1, gauge.bytes/2);
155 cudaBindTexture(0, gauge0TexSingle4, *gauge0, gauge.bytes/2);
156 cudaBindTexture(0, gauge1TexSingle4, *gauge1, gauge.bytes/2);
158 cudaBindTexture(0, gauge0TexHalf4, *gauge0, gauge.bytes/2);
159 cudaBindTexture(0, gauge1TexHalf4, *gauge1, gauge.bytes/2);
162 #endif // USE_TEXTURE_OBJECTS
168 #if (!defined USE_TEXTURE_OBJECTS)
171 cudaUnbindTexture(gauge0TexDouble2);
172 cudaUnbindTexture(gauge1TexDouble2);
174 cudaUnbindTexture(gauge0TexSingle2);
175 cudaUnbindTexture(gauge1TexSingle2);
177 cudaUnbindTexture(gauge0TexHalf2);
178 cudaUnbindTexture(gauge1TexHalf2);
182 cudaUnbindTexture(gauge0TexDouble2);
183 cudaUnbindTexture(gauge1TexDouble2);
185 cudaUnbindTexture(gauge0TexSingle4);
186 cudaUnbindTexture(gauge1TexSingle4);
188 cudaUnbindTexture(gauge0TexHalf4);
189 cudaUnbindTexture(gauge1TexHalf4);
199 *gauge1 = gauge.even;
201 *gauge0 = gauge.even;
205 #ifdef USE_TEXTURE_OBJECTS
206 dslashParam.gauge0Tex = oddBit ? gauge.OddTex() : gauge.EvenTex();
207 dslashParam.gauge1Tex = oddBit ? gauge.EvenTex() : gauge.OddTex();
210 cudaBindTexture(0, fatGauge0TexDouble, *gauge0, gauge.bytes/2);
211 cudaBindTexture(0, fatGauge1TexDouble, *gauge1, gauge.bytes/2);
213 cudaBindTexture(0, fatGauge0TexSingle, *gauge0, gauge.bytes/2);
214 cudaBindTexture(0, fatGauge1TexSingle, *gauge1, gauge.bytes/2);
216 cudaBindTexture(0, fatGauge0TexHalf, *gauge0, gauge.bytes/2);
217 cudaBindTexture(0, fatGauge1TexHalf, *gauge1, gauge.bytes/2);
219 #endif // USE_TEXTURE_OBJECTS
225 #if (!defined USE_TEXTURE_OBJECTS)
227 cudaUnbindTexture(fatGauge0TexDouble);
228 cudaUnbindTexture(fatGauge1TexDouble);
230 cudaUnbindTexture(fatGauge0TexSingle);
231 cudaUnbindTexture(fatGauge1TexSingle);
233 cudaUnbindTexture(fatGauge0TexHalf);
234 cudaUnbindTexture(fatGauge1TexHalf);
243 *gauge1 = gauge.even;
245 *gauge0 = gauge.even;
249 #ifdef USE_TEXTURE_OBJECTS
250 dslashParam.longGauge0Tex = oddBit ? gauge.OddTex() : gauge.EvenTex();
251 dslashParam.longGauge1Tex = oddBit ? gauge.EvenTex() : gauge.OddTex();
254 cudaBindTexture(0, longGauge0TexDouble, *gauge0, gauge.bytes/2);
255 cudaBindTexture(0, longGauge1TexDouble, *gauge1, gauge.bytes/2);
258 cudaBindTexture(0, longGauge0TexSingle_norecon, *gauge0, gauge.bytes/2);
259 cudaBindTexture(0, longGauge1TexSingle_norecon, *gauge1, gauge.bytes/2);
261 cudaBindTexture(0, longGauge0TexSingle, *gauge0, gauge.bytes/2);
262 cudaBindTexture(0, longGauge1TexSingle, *gauge1, gauge.bytes/2);
266 cudaBindTexture(0, longGauge0TexHalf_norecon, *gauge0, gauge.bytes/2);
267 cudaBindTexture(0, longGauge1TexHalf_norecon, *gauge1, gauge.bytes/2);
269 cudaBindTexture(0, longGauge0TexHalf, *gauge0, gauge.bytes/2);
270 cudaBindTexture(0, longGauge1TexHalf, *gauge1, gauge.bytes/2);
273 #endif // USE_TEXTURE_OBJECTS
278 #if (!defined USE_TEXTURE_OBJECTS)
280 cudaUnbindTexture(longGauge0TexDouble);
281 cudaUnbindTexture(longGauge1TexDouble);
284 cudaUnbindTexture(longGauge0TexSingle_norecon);
285 cudaUnbindTexture(longGauge1TexSingle_norecon);
287 cudaUnbindTexture(longGauge0TexSingle);
288 cudaUnbindTexture(longGauge1TexSingle);
292 cudaUnbindTexture(longGauge0TexHalf_norecon);
293 cudaUnbindTexture(longGauge1TexHalf_norecon);
295 cudaUnbindTexture(longGauge0TexHalf);
296 cudaUnbindTexture(longGauge1TexHalf);
303 template <
typename spinorFloat>
305 const cudaColorSpinorField *
x=0) {
306 int size = (
sizeof(((
spinorFloat*)0)->x) <
sizeof(
float)) ?
sizeof(
float) :
309 #ifdef USE_TEXTURE_OBJECTS
318 cudaBindTexture(0, spinorTexDouble, in->V(), in->Bytes());
319 if (
out) cudaBindTexture(0, interTexDouble,
out->V(), in->Bytes());
320 if (
x) cudaBindTexture(0, accumTexDouble,
x->V(), in->Bytes());
321 }
else if (
typeid(
spinorFloat) ==
typeid(float4)) {
322 cudaBindTexture(0, spinorTexSingle, in->V(), in->Bytes());
323 if (
out) cudaBindTexture(0, interTexSingle,
out->V(), in->Bytes());
324 if (
x) cudaBindTexture(0, accumTexSingle,
x->V(), in->Bytes());
325 }
else if (
typeid(
spinorFloat) ==
typeid(float2)) {
326 cudaBindTexture(0, spinorTexSingle2, in->V(), in->Bytes());
327 if (
out) cudaBindTexture(0, interTexSingle2,
out->V(), in->Bytes());
328 if (
x) cudaBindTexture(0, accumTexSingle2,
x->V(), in->Bytes());
329 }
else if (
typeid(
spinorFloat) ==
typeid(short4)) {
330 cudaBindTexture(0, spinorTexHalf, in->V(), in->Bytes());
331 cudaBindTexture(0, spinorTexHalfNorm, in->Norm(), in->NormBytes());
332 if (
out) cudaBindTexture(0, interTexHalf,
out->V(), in->Bytes());
333 if (
out) cudaBindTexture(0, interTexHalfNorm,
out->Norm(), in->NormBytes());
334 if (
x) cudaBindTexture(0, accumTexHalf,
x->V(), in->Bytes());
335 if (
x) cudaBindTexture(0, accumTexHalfNorm,
x->Norm(), in->NormBytes());
336 }
else if (
typeid(
spinorFloat) ==
typeid(short2)) {
337 cudaBindTexture(0, spinorTexHalf2, in->V(), in->Bytes());
338 cudaBindTexture(0, spinorTexHalf2Norm, in->Norm(), in->NormBytes());
339 if (
out) cudaBindTexture(0, interTexHalf2,
out->V(), in->Bytes());
340 if (
out) cudaBindTexture(0, interTexHalf2Norm,
out->Norm(), in->NormBytes());
341 if (
x) cudaBindTexture(0, accumTexHalf2,
x->V(), in->Bytes());
342 if (
x) cudaBindTexture(0, accumTexHalf2Norm,
x->Norm(), in->NormBytes());
344 errorQuda(
"Unsupported precision and short vector type");
346 #endif // USE_TEXTURE_OBJECTS
351 template <
typename spinorFloat>
353 const cudaColorSpinorField *
x=0) {
354 #ifndef USE_TEXTURE_OBJECTS
356 cudaUnbindTexture(spinorTexDouble);
357 if (
out) cudaUnbindTexture(interTexDouble);
358 if (
x) cudaUnbindTexture(accumTexDouble);
359 }
else if (
typeid(
spinorFloat) ==
typeid(float4)) {
360 cudaUnbindTexture(spinorTexSingle);
361 if (
out) cudaUnbindTexture(interTexSingle);
362 if (
x) cudaUnbindTexture(accumTexSingle);
363 }
else if (
typeid(
spinorFloat) ==
typeid(float2)) {
364 cudaUnbindTexture(spinorTexSingle2);
365 if (
out) cudaUnbindTexture(interTexSingle2);
366 if (
x) cudaUnbindTexture(accumTexSingle2);
367 }
else if (
typeid(
spinorFloat) ==
typeid(short4)) {
368 cudaUnbindTexture(spinorTexHalf);
369 cudaUnbindTexture(spinorTexHalfNorm);
370 if (
out) cudaUnbindTexture(interTexHalf);
371 if (
out) cudaUnbindTexture(interTexHalfNorm);
372 if (
x) cudaUnbindTexture(accumTexHalf);
373 if (
x) cudaUnbindTexture(accumTexHalfNorm);
374 }
else if (
typeid(
spinorFloat) ==
typeid(short2)) {
375 cudaUnbindTexture(spinorTexHalf2);
376 cudaUnbindTexture(spinorTexHalf2Norm);
377 if (
out) cudaUnbindTexture(interTexHalf2);
378 if (
out) cudaUnbindTexture(interTexHalf2Norm);
379 if (
x) cudaUnbindTexture(accumTexHalf2);
380 if (
x) cudaUnbindTexture(accumTexHalf2Norm);
382 errorQuda(
"Unsupported precision and short vector type");
384 #endif // USE_TEXTURE_OBJECTS
398 void **cloverP,
void **cloverNormP)
402 *cloverP = clover.odd;
403 *cloverNormP = clover.oddNorm;
405 *cloverP = clover.even;
406 *cloverNormP = clover.evenNorm;
409 #ifdef USE_TEXTURE_OBJECTS
410 dslashParam.cloverTex = oddBit ? clover.OddTex() : clover.EvenTex();
414 cudaBindTexture(0, cloverTexDouble, *cloverP, clover.bytes);
416 cudaBindTexture(0, cloverTexSingle, *cloverP, clover.bytes);
418 cudaBindTexture(0, cloverTexHalf, *cloverP, clover.bytes);
419 cudaBindTexture(0, cloverTexNorm, *cloverNormP, clover.norm_bytes);
421 #endif // USE_TEXTURE_OBJECTS
423 return clover.precision;
428 #if (!defined USE_TEXTURE_OBJECTS)
430 cudaUnbindTexture(cloverTexDouble);
432 cudaUnbindTexture(cloverTexSingle);
434 cudaUnbindTexture(cloverTexHalf);
435 cudaUnbindTexture(cloverTexNorm);
437 #endif // not defined USE_TEXTURE_OBJECTS