quda-ref/v0.7.0/texture_8h_source.html

 #ifndef _TEXTURE_H

 #define _TEXTURE_H


 #include <quda_internal.h>

 #include <color_spinor_field.h>

 #include <convert.h>

 #include <register_traits.h>

 #include <float_vector.h>


 //namespace quda {


 #ifdef USE_TEXTURE_OBJECTS


 template<typename OutputType, typename InputType>

 class Texture {


   typedef typename quda::mapper<InputType>::type RegType;


 private:

 #ifndef DIRECT_ACCESS_BLAS

   cudaTextureObject_t spinor;

 #else

   const InputType *spinor; // used when textures are disabled

 #endif


 public:

   Texture() : spinor(0) { }

 #ifndef DIRECT_ACCESS_BLAS

   Texture(const cudaColorSpinorField *x) : spinor(x->Tex()) { }

 #else

   Texture(const cudaColorSpinorField *x) : spinor((InputType*)(x->V())) { }

 #endif

   Texture(const Texture &tex) : spinor(tex.spinor) { }

   ~Texture() { }


   Texture& operator=(const Texture &tex) {

     if (this != &tex) spinor = tex.spinor;

     return *this;

   }


 #ifndef DIRECT_ACCESS_BLAS

   __device__ inline OutputType fetch(unsigned int idx)

   {

     OutputType rtn;

     copyFloatN(rtn, tex1Dfetch<RegType>(spinor, idx));

     return rtn;

   }

 #else

   __device__ inline OutputType fetch(unsigned int idx)

   { OutputType out; copyFloatN(out, spinor[idx]); return out; }

 #endif


   __device__ inline OutputType operator[](unsigned int idx) { return fetch(idx); }

 };


 #ifndef DIRECT_ACCESS_BLAS

 __device__ inline double fetch_double(int2 v)

 { return __hiloint2double(v.y, v.x); }


 __device__ inline double2 fetch_double2(int4 v)

 { return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z)); }


 template<> __device__ inline double2 Texture<double2,double2>::fetch(unsigned int idx)

 { double2 out; copyFloatN(out, fetch_double2(tex1Dfetch<int4>(spinor, idx))); return out; }


 template<> __device__ inline float2 Texture<float2,double2>::fetch(unsigned int idx)

 { float2 out; copyFloatN(out, fetch_double2(tex1Dfetch<int4>(spinor, idx))); return out; }

 #endif


 #else


 // legacy Texture references


 #if (__COMPUTE_CAPABILITY__ >= 130)


   __inline__ __device__ double fetch_double(texture<int2, 1> t, int i)

   {

     int2 v = tex1Dfetch(t,i);

     return __hiloint2double(v.y, v.x);

   }


   __inline__ __device__ double2 fetch_double2(texture<int4, 1> t, int i)

   {

     int4 v = tex1Dfetch(t,i);

     return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));

   }

 #else

   __inline__ __device__ double fetch_double(texture<int2, 1> t, int i){ return 0.0; }


   __inline__ __device__ double2 fetch_double2(texture<int4, 1> t, int i)

   {

     // do nothing

     return make_double2(0.0, 0.0);

   }

 #endif


 #define MAX_TEXELS (1<<27)


 #define MAX_TEX_ID 4


 // dynamically keep track of texture references we've already bound to

 bool tex_id_table[MAX_TEX_ID] = { };


 template<typename OutputType, typename InputType, int tex_id>

   class Texture {


   private:

 #ifdef DIRECT_ACCESS_BLAS

   const InputType *spinor; // used when textures are disabled

   size_t bytes;

 #endif

   static bool bound;

   static int count;


   public:

   Texture()

 #ifdef DIRECT_ACCESS_BLAS

   : spinor(0), bytes(0)

 #endif

   { count++; }


   Texture(const cudaColorSpinorField *x)

 #ifdef DIRECT_ACCESS_BLAS

   : spinor((const InputType*)x->V()), bytes(x->Bytes())

 #endif

   {

     // only bind if bytes > 0

     if (x->Bytes()) {

       if (tex_id > 0 && tex_id <= MAX_TEX_ID && tex_id_table[tex_id]) {

         errorQuda("Already bound to this texture reference");

       } else {

         tex_id_table[tex_id] = true;

       }

       bind((const InputType*)x->V(), x->Bytes()); bound = true;

     }

     count++;

   }


   Texture(const Texture &tex)

 #ifdef DIRECT_ACCESS_BLAS

   : spinor(tex.spinor), bytes(tex.bytes)

 #endif

   { count++; }


   ~Texture() { if (bound && !--count) {

       unbind(); bound = false; tex_id_table[tex_id]=false;

     } }


   Texture& operator=(const Texture &tex) {

 #ifdef DIRECT_ACCESS_BLAS

     spinor = tex.spinor;

     bytes = tex.bytes;

 #endif

     return *this;

   }


   inline void bind(const InputType*, size_t bytes){ /*errorQuda("Texture id is out of range");*/ }

   inline void unbind() { /*errorQuda("Texture id is out of range");*/ }


   //default should only be called if a tex_id is out of range

   __device__ inline OutputType fetch(unsigned int idx) { OutputType x; x.x =0; return x; };

   __device__ inline OutputType operator[](unsigned int idx) { return fetch(idx); }

   };


   template<typename OutputType, typename InputType, int tex_id>

     bool Texture<OutputType, InputType, tex_id>::bound = false;


   template<typename OutputType, typename InputType, int tex_id>

     int Texture<OutputType, InputType, tex_id>::count = 0;


 #define DECL_TEX(id)                                                    \

   texture<short2,1,cudaReadModeNormalizedFloat> tex_short2_##id;        \

   texture<short4,1,cudaReadModeNormalizedFloat> tex_short4_##id;        \

   texture<float,1> tex_float_##id;                                      \

   texture<float2,1> tex_float2_##id;                                    \

   texture<float4,1> tex_float4_##id;                                    \

   texture<int4,1> tex_double2_##id;


 #define DEF_BIND_UNBIND(outtype, intype, id)                            \

   template<> inline void Texture<outtype,intype,id>::bind(const intype *ptr, size_t bytes) \

     { cudaBindTexture(0,tex_##intype##_##id, ptr, bytes); }             \

   template<> inline void Texture<outtype,intype,id>::unbind() { cudaUnbindTexture(tex_##intype##_##id); }


 #define DEF_FETCH_TEX(outtype, intype, id)                              \

   template<> __device__ inline outtype Texture<outtype,intype,id>::fetch(unsigned int idx) \

     { return tex1Dfetch(tex_##intype##_##id,idx); }


 #define DEF_FETCH_DIRECT(outtype, intype, id)                           \

   template<> __device__ inline outtype Texture<outtype,intype,id>::fetch(unsigned int idx) \

     { outtype out; copyFloatN(out, spinor[idx]); return out; }


 #if defined(DIRECT_ACCESS_BLAS)

 #define DEF_FETCH DEF_FETCH_DIRECT

 #else

 #define DEF_FETCH DEF_FETCH_TEX

 #endif


 #if defined(DIRECT_ACCESS_BLAS) || defined(FERMI_NO_DBLE_TEX)

 #define DEF_FETCH_DBLE DEF_FETCH_DIRECT

 #else

 #define DEF_FETCH_DBLE(outtype, intype, id)                             \

   template<> __device__ inline outtype Texture<outtype,double2,id>::fetch(unsigned int idx) \

     { outtype out; copyFloatN(out, fetch_double2(tex_double2_##id,idx)); return out; }

 #endif


 #if defined(DIRECT_ACCESS_BLAS) || defined(FERMI_NO_DBLE_TEX)

 #define DEF_FETCH_DBLE_MIXED DEF_FETCH_DIRECT

 #else

 #define DEF_FETCH_DBLE_MIXED(outtype, intype, id)                      \

   template<> __device__ inline outtype Texture<outtype,intype,id>::fetch(unsigned int idx) \

   { outtype out; copyFloatN(out, tex1Dfetch(tex_##intype##_##id,idx)); return out; }

 #endif


 #define DEF_BIND_UNBIND_FETCH(outtype, intype, id)      \

   DEF_BIND_UNBIND(outtype, intype, id)                  \

   DEF_FETCH(outtype, intype, id)


 #define DEF_ALL(id)                             \

   DECL_TEX(id)                                  \

   DEF_BIND_UNBIND_FETCH(float2, short2, id)     \

   DEF_BIND_UNBIND_FETCH(float4, short4, id)     \

   DEF_BIND_UNBIND_FETCH(float, float, id)       \

   DEF_BIND_UNBIND_FETCH(float2, float2, id)     \

   DEF_BIND_UNBIND_FETCH(float4, float4, id)     \

   DEF_BIND_UNBIND(double2, double2, id)         \

   DEF_BIND_UNBIND(float2, double2, id)          \

   DEF_FETCH_DBLE(double2, double2, id)          \

   DEF_FETCH_DBLE(float2, double2, id)           \

   DEF_BIND_UNBIND(double2, float2, id)          \

   DEF_BIND_UNBIND(double4, float4, id)          \

   DEF_BIND_UNBIND(double2, short2, id)          \

   DEF_BIND_UNBIND(double4, short4, id)          \

   DEF_FETCH_DBLE_MIXED(double2, float2, id)     \

   DEF_FETCH_DBLE_MIXED(double4, float4, id)     \

   DEF_FETCH_DBLE_MIXED(double2, short2, id)     \

   DEF_FETCH_DBLE_MIXED(double4, short4, id)


   // Declare the textures and define the member functions of the corresponding templated classes.

   DEF_ALL(0)

   DEF_ALL(1)

   DEF_ALL(2)

   DEF_ALL(3)

   DEF_ALL(4)


 #undef DECL_TEX

 #undef DEF_BIND_UNBIND

 #undef DEF_FETCH_DIRECT

 #undef DEF_FETCH_TEX

 #undef DEF_FETCH

 #undef DEF_FETCH_DBLE

 #undef DEF_BIND_UNBIND_FETCH

 #undef DEF_ALL


 #endif // USE_TEXTURE_OBJECTS


   template <typename RegType, typename InterType, typename StoreType>

     void checkTypes() {


     const size_t reg_size = sizeof(((RegType*)0)->x);

     const size_t inter_size = sizeof(((InterType*)0)->x);

     const size_t store_size = sizeof(((StoreType*)0)->x);


     if (reg_size != inter_size  && store_size != 2 && inter_size != 4)

       errorQuda("Precision of register (%lu) and intermediate (%lu) types must match\n",

                 (unsigned long)reg_size, (unsigned long)inter_size);


     if (vecLength<InterType>() != vecLength<StoreType>()) {

       errorQuda("Vector lengths intermediate and register types must match\n");

     }


     if (vecLength<RegType>() == 0) errorQuda("Vector type not supported\n");

     if (vecLength<InterType>() == 0) errorQuda("Vector type not supported\n");

     if (vecLength<StoreType>() == 0) errorQuda("Vector type not supported\n");


   }


   template <typename FloatN, int M>

   __device__ inline float store_norm(float *norm, FloatN x[M], int i) {

     float c[M];

 #pragma unroll

     for (int j=0; j<M; j++) c[j] = max_fabs(x[j]);

 #pragma unroll

     for (int j=1; j<M; j++) c[0] = fmaxf(c[j],c[0]);

     norm[i] = c[0];

     return __fdividef(MAX_SHORT, c[0]);

   }


   // the number of elements per virtual register

 #define REG_LENGTH (sizeof(RegType) / sizeof(((RegType*)0)->x))


 // whether the type is a shortN vector

 #define IS_SHORT(type) (sizeof( ((type*)0)->x ) == sizeof(short) )


 template <typename RegType, typename InterType, typename StoreType, int N, int write, int tex_id=-1>

     class Spinor {


   private:

     StoreType *spinor;

 #ifdef USE_TEXTURE_OBJECTS // texture objects

     Texture<InterType, StoreType> tex;

 #else

     Texture<InterType, StoreType, tex_id> tex;

 #endif

     float *norm; // direct reads for norm

     int stride;


   public:

     Spinor()

       : spinor(0), tex(), norm(0), stride(0) { } // default constructor


     Spinor(const cudaColorSpinorField &x)

       : spinor((StoreType*)x.V()), tex(&x), norm((float*)x.Norm()),

       stride(x.Length()/(N*REG_LENGTH)) { checkTypes<RegType,InterType,StoreType>(); }


     Spinor(const Spinor &st)

       : spinor(st.spinor), tex(st.tex), norm(st.norm), stride(st.stride) { }


     Spinor(StoreType* spinor, float* norm, int stride)

       : spinor(spinor), norm(norm), stride(stride) { checkTypes<RegType, InterType, StoreType>(); }


     Spinor& operator=(const Spinor &src) {

       if (&src != this) {

         spinor = src.spinor;

         tex = src.tex;

         norm = src.norm;

         stride = src.stride;

       }

       return *this;

     }


     void set(const cudaColorSpinorField &x){

       spinor = (StoreType*)x.V();

 #ifdef USE_TEXTURE_OBJECTS

       tex = Texture<InterType, StoreType>(&x);

 #else

       tex = Texture<InterType, StoreType, tex_id>(&x);

 #endif

       norm = (float*)x.Norm();

       stride = x.Length()/(N*REG_LENGTH);


       checkTypes<RegType,InterType,StoreType>();

     }


     ~Spinor() { } /* on g80 / gt200 this must not be virtual */


     __device__ inline void load(RegType x[], const int i) {

       // load data into registers first using the storage order

       const int M = (N * sizeof(RegType)) / sizeof(InterType);

       InterType y[M];


       // If we are using tex references, then we can only use the predeclared texture ids

 #ifndef USE_TEXTURE_OBJECTS

       if (tex_id >= 0 && tex_id <= MAX_TEX_ID) {

 #endif

         // half precision types

         if ( IS_SHORT(StoreType) ) {

           float xN = norm[i];

 #pragma unroll

           for (int j=0; j<M; j++) y[j] = xN*tex[i + j*stride];

         } else { // other types

 #pragma unroll

           for (int j=0; j<M; j++) copyFloatN(y[j], tex[i + j*stride]);

         }

 #ifndef USE_TEXTURE_OBJECTS

       } else { // default load when out of tex_id range


         if ( IS_SHORT(StoreType) ) {

           float xN = norm[i];

 #pragma unroll

           for (int j=0; j<M; j++) {

             copyFloatN(y[j], spinor[i + j*stride]);

             y[j] *= xN;

           }

         } else { // other types

 #pragma unroll

           for (int j=0; j<M; j++) copyFloatN(y[j],spinor[i + j*stride]);

         }

       }

 #endif


       // now convert into desired register order

       convert<RegType, InterType>(x, y, N);

     }


     // default store used for simple fields

     __device__ inline void save(RegType x[], int i) {

       if (write) {

         const int M = (N * sizeof(RegType)) / sizeof(InterType);

         InterType y[M];

         convert<InterType, RegType>(y, x, M);


         if ( IS_SHORT(StoreType) ) {

           float C = store_norm<InterType, M>(norm, y, i);

 #pragma unroll

           for (int j=0; j<M; j++) copyFloatN(spinor[i+j*stride], C*y[j]);

         } else {

 #pragma unroll

           for (int j=0; j<M; j++) copyFloatN(spinor[i+j*stride], y[j]);

         }

       }

     }


     // used to backup the field to the host

     void save(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes) {

       if (write) {

         *spinor_h = new char[bytes];

         cudaMemcpy(*spinor_h, spinor, bytes, cudaMemcpyDeviceToHost);

         if (norm_bytes > 0) {

           *norm_h = new char[norm_bytes];

           cudaMemcpy(*norm_h, norm, norm_bytes, cudaMemcpyDeviceToHost);

         }

         checkCudaError();

       }

     }


     // restore the field from the host

     void load(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes) {

       if (write) {

         cudaMemcpy(spinor, *spinor_h, bytes, cudaMemcpyHostToDevice);

         if (norm_bytes > 0) {

           cudaMemcpy(norm, *norm_h, norm_bytes, cudaMemcpyHostToDevice);

           delete []*norm_h;

           *norm_h = 0;

         }

         delete []*spinor_h;

         *spinor_h = 0;

         checkCudaError();

       }

     }


     void* V() { return (void*)spinor; }

     float* Norm() { return norm; }


     QudaPrecision Precision() {

       QudaPrecision precision = QUDA_INVALID_PRECISION;

       if (sizeof(((StoreType*)0)->x) == sizeof(double)) precision = QUDA_DOUBLE_PRECISION;

       else if (sizeof(((StoreType*)0)->x) == sizeof(float)) precision = QUDA_SINGLE_PRECISION;

       else if (sizeof(((StoreType*)0)->x) == sizeof(short)) precision = QUDA_HALF_PRECISION;

       else errorQuda("Unknown precision type\n");

       return precision;

     }


     int Stride() const { return stride; }


     void setStride(int stride_) { stride = stride_; }

   };


 //} // namespace quda


 #ifndef USE_TEXTURE_OBJECTS

 #undef MAX_TEX_ID

 #endif


 #endif // _TEXTURE_H

float_vector.h

QudaPrecision
enum QudaPrecision_s QudaPrecision

V
int V
Definition: test_util.cpp:29

y
int y[4]
Definition: staggered_dslash_core.h:356

quda::norm
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
Definition: complex_quda.h:859

Spinor::Spinor
Spinor(StoreType *spinor, float *norm, int stride)
Definition: texture.h:345

Texture::~Texture
~Texture()
Definition: texture.h:145

QUDA_INVALID_PRECISION
Definition: enum_quda.h:51

Spinor::load
void load(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes)
Definition: texture.h:444

errorQuda
#define errorQuda(...)
Definition: util_quda.h:73

color_spinor_field.h

QUDA_HALF_PRECISION
Definition: enum_quda.h:48

DEF_ALL
#define DEF_ALL(id)
Definition: texture.h:225

Texture::Texture
Texture(const cudaColorSpinorField *x)
Definition: texture.h:122

Spinor::Spinor
Spinor()
Definition: texture.h:335

Spinor::set
void set(const cudaColorSpinorField &x)
Definition: texture.h:358

spinor
cpuColorSpinorField * spinor
Definition: dslash_test.cpp:40

fetch_double
__inline__ __device__ double fetch_double(texture< int2, 1 > t, int i)
Definition: texture.h:88

Texture::unbind
void unbind()
Definition: texture.h:158

Spinor::load
__device__ void load(RegType x[], const int i)
Definition: texture.h:373

copyFloatN
__device__ void copyFloatN(FloatN &a, const FloatN &b)
Definition: convert.h:34

Spinor::Precision
QudaPrecision Precision()
Definition: texture.h:461

Spinor::Spinor
Spinor(const cudaColorSpinorField &x)
Definition: texture.h:338

store_norm
__device__ float store_norm(float *norm, FloatN x[M], int i)
Definition: texture.h:297

Spinor::~Spinor
~Spinor()
Definition: texture.h:371

register_traits.h

checkTypes
void checkTypes()
Definition: texture.h:276

fetch_double2
__inline__ __device__ double2 fetch_double2(texture< int4, 1 > t, int i)
Definition: texture.h:90

Texture::Texture
Texture(const Texture &tex)
Definition: texture.h:139

x
int x[4]
Definition: hisq_paths_force_core.h:99

Texture
Definition: texture.h:105

REG_LENGTH
#define REG_LENGTH
Definition: texture.h:308

QUDA_DOUBLE_PRECISION
Definition: enum_quda.h:50

Texture::Texture
Texture()
Definition: texture.h:116

Spinor::save
__device__ void save(RegType x[], int i)
Definition: texture.h:413

out
cpuColorSpinorField * out
Definition: staggered_invert_test.cpp:51

idx
int idx
Definition: staggered_fused_exterior_dslash_core.h:342

Spinor::save
void save(char **spinor_h, char **norm_h, size_t bytes, size_t norm_bytes)
Definition: texture.h:431

QUDA_SINGLE_PRECISION
Definition: enum_quda.h:49

quda::mapper
Definition: register_traits.h:16

quda::max_fabs
__forceinline__ __host__ __device__ float max_fabs(const float4 &c)
Definition: float_vector.h:177

MAX_SHORT
#define MAX_SHORT
Definition: quda_internal.h:30

Spinor
Definition: texture.h:322

Texture::operator=
Texture & operator=(const Texture &tex)
Definition: texture.h:149

Texture::operator[]
__device__ OutputType operator[](unsigned int idx)
Definition: texture.h:162

Spinor::Norm
float * Norm()
Definition: texture.h:459

checkCudaError
#define checkCudaError()
Definition: util_quda.h:110

IS_SHORT
#define IS_SHORT(type)
Definition: texture.h:311

Spinor::V
void * V()
Definition: texture.h:458

Spinor::Stride
int Stride() const
Definition: texture.h:470

Spinor::operator=
Spinor & operator=(const Spinor &src)
Definition: texture.h:348

convert.h

Spinor::Spinor
Spinor(const Spinor &st)
Definition: texture.h:342

Spinor::setStride
void setStride(int stride_)
Definition: texture.h:472

Texture::fetch
__device__ OutputType fetch(unsigned int idx)
Definition: texture.h:161

tex_id_table
bool tex_id_table[MAX_TEX_ID]
Definition: texture.h:102

Texture::bind
void bind(const InputType *, size_t bytes)
Definition: texture.h:157

MAX_TEX_ID
#define MAX_TEX_ID
Definition: texture.h:99

quda_internal.h