4 #ifdef USE_TEXTURE_OBJECTS 5 #define TEX1DFETCH(type, tex, idx) tex1Dfetch<type>((tex), idx) 7 #define TEX1DFETCH(type, tex, idx) tex1Dfetch((tex), idx) 10 template<
typename Tex>
14 return __hiloint2double(v.y, v.x);
17 template <
typename Tex>
21 return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
26 int4 v = tex1Dfetch(
t,
i);
27 return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
31 #ifndef USE_TEXTURE_OBJECTS 114 #endif // not defined USE_TEXTURE_OBJECTS 148 void bindGaugeTex(
const cudaGaugeField &gauge,
const int oddBit, T &dslashParam)
151 dslashParam.gauge0 =
const_cast<void*
>(gauge.Odd_p());
152 dslashParam.gauge1 =
const_cast<void*
>(gauge.Even_p());
154 dslashParam.gauge0 =
const_cast<void*
>(gauge.Even_p());
155 dslashParam.gauge1 =
const_cast<void*
>(gauge.Odd_p());
158 #ifdef USE_TEXTURE_OBJECTS 159 dslashParam.gauge0Tex = oddBit ? gauge.OddTex() : gauge.EvenTex();
160 dslashParam.gauge1Tex = oddBit ? gauge.EvenTex() : gauge.OddTex();
170 cudaBindTexture(0,
gauge0TexHalf2, dslashParam.gauge0, gauge.Bytes()/2);
171 cudaBindTexture(0,
gauge1TexHalf2, dslashParam.gauge1, gauge.Bytes()/2);
181 cudaBindTexture(0,
gauge0TexHalf4, dslashParam.gauge0, gauge.Bytes()/2);
182 cudaBindTexture(0,
gauge1TexHalf4, dslashParam.gauge1, gauge.Bytes()/2);
185 #endif // USE_TEXTURE_OBJECTS 191 #if (!defined USE_TEXTURE_OBJECTS) 218 template <
typename T>
222 dslashParam.gauge0 =
const_cast<void*
>(gauge.Odd_p());
223 dslashParam.gauge1 =
const_cast<void*
>(gauge.Even_p());
225 dslashParam.gauge0 =
const_cast<void*
>(gauge.Even_p());
226 dslashParam.gauge1 =
const_cast<void*
>(gauge.Odd_p());
229 #ifdef USE_TEXTURE_OBJECTS 230 dslashParam.gauge0Tex = oddBit ? gauge.OddTex() : gauge.EvenTex();
231 dslashParam.gauge1Tex = oddBit ? gauge.EvenTex() : gauge.OddTex();
243 #endif // USE_TEXTURE_OBJECTS 249 #if (!defined USE_TEXTURE_OBJECTS) 263 template <
typename T>
267 dslashParam.gauge0 =
const_cast<void*
>(gauge.Odd_p());
268 dslashParam.gauge1 =
const_cast<void*
>(gauge.Even_p());
270 dslashParam.gauge0 =
const_cast<void*
>(gauge.Even_p());
271 dslashParam.gauge1 =
const_cast<void*
>(gauge.Odd_p());
274 dslashParam.longPhase0 =
static_cast<char*
>(dslashParam.longGauge0) + gauge.PhaseOffset();
275 dslashParam.longPhase1 =
static_cast<char*
>(dslashParam.longGauge1) + gauge.PhaseOffset();
278 #ifdef USE_TEXTURE_OBJECTS 279 dslashParam.longGauge0Tex = oddBit ? gauge.OddTex() : gauge.EvenTex();
280 dslashParam.longGauge1Tex = oddBit ? gauge.EvenTex() : gauge.OddTex();
283 dslashParam.longPhase0Tex = oddBit ? gauge.OddPhaseTex() : gauge.EvenPhaseTex();
284 dslashParam.longPhase1Tex = oddBit ? gauge.EvenPhaseTex() : gauge.OddPhaseTex();
291 cudaBindTexture(0,
longPhase0TexDouble, (
char*)(dslashParam.gauge0) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
292 cudaBindTexture(0,
longPhase1TexDouble, (
char*)(dslashParam.gauge1) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
302 cudaBindTexture(0,
longPhase0TexSingle, (
char*)(dslashParam.gauge0) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
303 cudaBindTexture(0,
longPhase1TexSingle, (
char*)(dslashParam.gauge1) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
314 cudaBindTexture(0,
longPhase0TexHalf, (
char*)(dslashParam.gauge0) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
315 cudaBindTexture(0,
longPhase1TexHalf, (
char*)(dslashParam.gauge1) + gauge.PhaseOffset(), gauge.PhaseBytes()/2);
319 #endif // USE_TEXTURE_OBJECTS 324 #if (!defined USE_TEXTURE_OBJECTS) 361 template <
typename spinorFloat>
363 const cudaColorSpinorField *
x=0) {
364 int size = (
sizeof(((
spinorFloat*)0)->x) <
sizeof(
float)) ?
sizeof(
float) :
367 #ifndef USE_TEXTURE_OBJECTS 373 }
else if (
typeid(
spinorFloat) ==
typeid(float4)) {
378 }
else if (
typeid(
spinorFloat) ==
typeid(float2)) {
383 }
else if (
typeid(
spinorFloat) ==
typeid(short4)) {
392 }
else if (
typeid(
spinorFloat) ==
typeid(short2)) {
402 errorQuda(
"Unsupported precision and short vector type");
404 #endif // !USE_TEXTURE_OBJECTS 409 template <
typename spinorFloat>
411 const cudaColorSpinorField *
x=0) {
412 #ifndef USE_TEXTURE_OBJECTS 418 }
else if (
typeid(
spinorFloat) ==
typeid(float4)) {
423 }
else if (
typeid(
spinorFloat) ==
typeid(float2)) {
428 }
else if (
typeid(
spinorFloat) ==
typeid(short4)) {
437 }
else if (
typeid(
spinorFloat) ==
typeid(short2)) {
447 errorQuda(
"Unsupported precision and short vector type");
449 #endif // USE_TEXTURE_OBJECTS 467 template <
typename T>
471 dslashParam.clover =
clover.odd;
472 dslashParam.cloverNorm = (
float*)
clover.oddNorm;
474 dslashParam.clover =
clover.even;
475 dslashParam.cloverNorm = (
float*)
clover.evenNorm;
478 #ifdef USE_TEXTURE_OBJECTS 479 dslashParam.cloverTex = oddBit ?
clover.OddTex() :
clover.EvenTex();
490 #endif // USE_TEXTURE_OBJECTS 497 #if (!defined USE_TEXTURE_OBJECTS) 506 #endif // not defined USE_TEXTURE_OBJECTS 509 template <
typename T>
513 dslashParam.clover =
clover.odd;
514 dslashParam.cloverNorm = (
float*)
clover.oddNorm;
515 #ifndef DYNAMIC_CLOVER
516 dslashParam.cloverInv = cloverInv.odd;
517 dslashParam.cloverInvNorm = (
float*)cloverInv.oddNorm;
520 dslashParam.clover =
clover.even;
521 dslashParam.cloverNorm = (
float*)
clover.evenNorm;
522 #ifndef DYNAMIC_CLOVER
523 dslashParam.clover = cloverInv.even;
524 dslashParam.cloverInvNorm = (
float*)cloverInv.evenNorm;
528 #ifdef USE_TEXTURE_OBJECTS 529 dslashParam.cloverTex = oddBit ?
clover.OddTex() :
clover.EvenTex();
531 #ifndef DYNAMIC_CLOVER 532 dslashParam.cloverInvTex = oddBit ? cloverInv.OddTex() : cloverInv.EvenTex();
533 if (cloverInv.precision ==
QUDA_HALF_PRECISION) dslashParam.cloverInvNormTex = oddBit ? cloverInv.OddNormTex() : cloverInv.EvenNormTex();
538 #ifndef DYNAMIC_CLOVER 543 #ifndef DYNAMIC_CLOVER 549 #ifndef DYNAMIC_CLOVER 550 cudaBindTexture(0,
cloverInvTexHalf, dslashParam.cloverInv, cloverInv.bytes);
551 cudaBindTexture(0,
cloverInvTexNorm, dslashParam.cloverInvNorm, cloverInv.norm_bytes);
554 #endif // USE_TEXTURE_OBJECTS 561 #if (!defined USE_TEXTURE_OBJECTS) 565 #ifndef DYNAMIC_CLOVER 572 #ifndef DYNAMIC_CLOVER 580 #ifndef DYNAMIC_CLOVER 585 #endif // not defined USE_TEXTURE_OBJECTS 589 #if defined(DIRECT_ACCESS_LINK) || defined(DIRECT_ACCESS_WILSON_SPINOR) || \ 590 defined(DIRECT_ACCESS_WILSON_ACCUM) || defined(DIRECT_ACCESS_WILSON_PACK_SPINOR) || \ 591 defined(DIRECT_ACCESS_WILSON_INTER) || defined(DIRECT_ACCESS_WILSON_PACK_SPINOR) || \ 592 defined(DIRECT_ACCESS_CLOVER) || defined(DIRECT_ACCESS_PACK) || \ 593 defined(DIRECT_ACCESS_LONG_LINK) || defined(DIRECT_ACCESS_FAT_LINK) 595 static inline __device__
float short2float(
short a) {
599 static inline __device__
short float2short(
float c,
float a) {
603 static inline __device__ short4 float42short4(
float c, float4
a) {
604 return make_short4(float2short(
c,
a.x), float2short(
c,
a.y), float2short(
c,
a.z), float2short(
c,
a.w));
607 static inline __device__ float4 short42float4(short4
a) {
608 return make_float4(short2float(
a.x), short2float(
a.y), short2float(
a.z), short2float(
a.w));
611 static inline __device__ float2 short22float2(short2
a) {
612 return make_float2(short2float(
a.x), short2float(
a.y));
614 #endif // DIRECT_ACCESS inclusions texture< short4, 1, cudaReadModeNormalizedFloat > cloverInvTexHalf
void unbindGaugeTex(const cudaGaugeField &gauge)
void bindFatGaugeTex(const cudaGaugeField &gauge, const int oddBit, T &dslashParam)
texture< short, 1, cudaReadModeNormalizedFloat > longPhase0TexHalf
QudaPrecision bindCloverTex(const FullClover &clover, const int oddBit, T &dslashParam)
enum QudaPrecision_s QudaPrecision
texture< short4, 1, cudaReadModeNormalizedFloat > gauge1TexHalf4
texture< int4, 1 > muLink0TexDouble
texture< float2, 1, cudaReadModeElementType > longGauge0TexSingle_norecon
texture< int4, 1 > cloverInvTexDouble
texture< float, 1, cudaReadModeElementType > longPhase1TexSingle
texture< short4, 1, cudaReadModeNormalizedFloat > ghostSpinorTexHalf
texture< float4, 1, cudaReadModeElementType > gauge0TexSingle4
texture< float2, 1, cudaReadModeElementType > fatGauge1TexSingle
texture< int4, 1 > fatGauge0TexDouble
QudaPrecision bindTwistedCloverTex(const FullClover clover, const FullClover cloverInv, const int oddBit, T &dslashParam)
texture< int4, 1 > accumTexDouble
texture< short2, 1, cudaReadModeNormalizedFloat > fatGauge0TexHalf
texture< int2, 1 > longPhase0TexDouble
texture< float2, 1, cudaReadModeElementType > longGauge1TexSingle_norecon
texture< short4, 1, cudaReadModeNormalizedFloat > spinorTexHalf
texture< short2, 1, cudaReadModeNormalizedFloat > spinorTexHalf2
texture< float, 1, cudaReadModeElementType > interTexHalf2Norm
texture< short2, 1, cudaReadModeNormalizedFloat > gauge1TexHalf2
texture< float2, 1, cudaReadModeElementType > gauge1TexSingle2
texture< float2, 1, cudaReadModeElementType > siteLink0TexSingle_norecon
void unbindSpinorTex(const cudaColorSpinorField *in, const cudaColorSpinorField *out=0, const cudaColorSpinorField *x=0)
texture< float2, 1, cudaReadModeElementType > siteLink1TexSingle_norecon
texture< int4, 1 > ghostSpinorTexDouble
texture< short2, 1, cudaReadModeNormalizedFloat > fatGauge1TexHalf
texture< float4, 1, cudaReadModeElementType > longGauge1TexSingle
static __inline__ __device__ double fetch_double(Tex t, int i)
texture< int4, 1 > interTexDouble
texture< float, 1, cudaReadModeElementType > cloverInvTexNorm
texture< float4, 1, cudaReadModeElementType > siteLink1TexSingle_recon
texture< short2, 1, cudaReadModeNormalizedFloat > accumTexHalf2
texture< short2, 1, cudaReadModeNormalizedFloat > ghostSpinorTexHalf2
texture< float2, 1, cudaReadModeElementType > spinorTexSingle2
texture< int4, 1 > siteLink1TexDouble
texture< float4, 1, cudaReadModeElementType > gauge1TexSingle4
texture< float4, 1, cudaReadModeElementType > cloverTexSingle
void unbindLongGaugeTex(const cudaGaugeField &gauge)
static __inline__ __device__ double2 fetch_double2_old(texture< int4, 1 > t, int i)
texture< short2, 1, cudaReadModeNormalizedFloat > longGauge0TexHalf_norecon
texture< int4, 1 > gauge0TexDouble2
static __inline__ __device__ double2 fetch_double2(Tex t, int i)
texture< float2, 1, cudaReadModeElementType > siteLink0TexSingle
texture< float2, 1, cudaReadModeElementType > accumTexSingle2
texture< int4, 1 > spinorTexDouble
texture< float4, 1, cudaReadModeElementType > spinorTexSingle
texture< float, 1, cudaReadModeElementType > ghostSpinorTexHalf2Norm
texture< float4, 1, cudaReadModeElementType > accumTexSingle
texture< int4, 1 > cloverTexDouble
int bindSpinorTex(const cudaColorSpinorField *in, const cudaColorSpinorField *out=0, const cudaColorSpinorField *x=0)
texture< float2, 1, cudaReadModeElementType > siteLink1TexSingle
texture< float4, 1, cudaReadModeElementType > interTexSingle
texture< int4, 1 > muLink1TexDouble
texture< float4, 1, cudaReadModeElementType > ghostSpinorTexSingle
texture< short2, 1, cudaReadModeNormalizedFloat > interTexHalf2
texture< float4, 1, cudaReadModeElementType > longGauge0TexSingle
texture< float2, 1, cudaReadModeElementType > interTexSingle2
texture< float2, 1, cudaReadModeElementType > muLink1TexSingle
texture< float2, 1, cudaReadModeElementType > muLink0TexSingle
texture< int4, 1 > fatGauge1TexDouble
void unbindCloverTex(const FullClover clover)
texture< int2, 1 > longPhase1TexDouble
texture< float, 1, cudaReadModeElementType > longPhase0TexSingle
texture< float4, 1, cudaReadModeElementType > siteLink0TexSingle_recon
texture< short4, 1, cudaReadModeNormalizedFloat > longGauge0TexHalf
texture< float, 1, cudaReadModeElementType > accumTexHalfNorm
texture< float2, 1, cudaReadModeElementType > fatGauge0TexSingle
texture< short4, 1, cudaReadModeNormalizedFloat > longGauge1TexHalf
cpuColorSpinorField * out
void bindLongGaugeTex(const cudaGaugeField &gauge, const int oddBit, T &dslashParam)
texture< float, 1, cudaReadModeElementType > interTexHalfNorm
texture< short, 1, cudaReadModeNormalizedFloat > longPhase1TexHalf
texture< float, 1, cudaReadModeElementType > cloverTexNorm
texture< float, 1, cudaReadModeElementType > spinorTexHalfNorm
texture< short4, 1, cudaReadModeNormalizedFloat > interTexHalf
texture< int4, 1 > longGauge0TexDouble
texture< float, 1, cudaReadModeElementType > accumTexHalf2Norm
texture< short4, 1, cudaReadModeNormalizedFloat > accumTexHalf
texture< short2, 1, cudaReadModeNormalizedFloat > gauge0TexHalf2
texture< float2, 1, cudaReadModeElementType > ghostSpinorTexSingle2
texture< int4, 1 > longGauge1TexDouble
texture< short4, 1, cudaReadModeNormalizedFloat > cloverTexHalf
texture< int4, 1 > siteLink0TexDouble
void unbindTwistedCloverTex(const FullClover clover)
texture< float4, 1, cudaReadModeElementType > cloverInvTexSingle
texture< short4, 1, cudaReadModeNormalizedFloat > gauge0TexHalf4
texture< int4, 1 > gauge1TexDouble2
texture< float, 1, cudaReadModeElementType > spinorTexHalf2Norm
texture< float2, 1, cudaReadModeElementType > gauge0TexSingle2
texture< float, 1, cudaReadModeElementType > ghostSpinorTexHalfNorm
#define TEX1DFETCH(type, tex, idx)
void bindGaugeTex(const cudaGaugeField &gauge, const int oddBit, T &dslashParam)
void unbindFatGaugeTex(const cudaGaugeField &gauge)
texture< short2, 1, cudaReadModeNormalizedFloat > longGauge1TexHalf_norecon