v0.9.0/doc/register__traits_8h_source.html

 #ifndef _REGISTER_TRAITS_H
 #define _REGISTER_TRAITS_H

 #include <quda_internal.h>
 #include <generics/ldg.h>
 #include <complex_quda.h>
 #include <inline_ptx.h>

 namespace quda {

   /*
     Here we use traits to define the greater type used for mixing types of computation involving these types
   */
   template<class T, class U> struct PromoteTypeId { typedef T Type; };
   template<> struct PromoteTypeId<complex<float>, float> { typedef complex<float> Type; };
   template<> struct PromoteTypeId<float, complex<float> > { typedef complex<float> Type; };
   template<> struct PromoteTypeId<complex<double>, double> { typedef complex<double> Type; };
   template<> struct PromoteTypeId<double, complex<double> > { typedef complex<double> Type; };
   template<> struct PromoteTypeId<double,int> { typedef double Type; };
   template<> struct PromoteTypeId<int,double> { typedef double Type; };
   template<> struct PromoteTypeId<float,int> { typedef float Type; };
   template<> struct PromoteTypeId<int,float> { typedef float Type; };
   template<> struct PromoteTypeId<double,float> { typedef double Type; };
   template<> struct PromoteTypeId<float,double> { typedef double Type; };

   /*
     Here we use traits to define the mapping between storage type and
     register type:
     double -> double
     float -> float
     short -> float
     This allows us to wrap the encapsulate the register type into the storage template type
    */
   template<typename> struct mapper { };
   template<> struct mapper<double> { typedef double type; };
   template<> struct mapper<float> { typedef float type; };
   template<> struct mapper<short> { typedef float type; };

   template<> struct mapper<double2> { typedef double2 type; };
   template<> struct mapper<float2> { typedef float2 type; };
   template<> struct mapper<short2> { typedef float2 type; };

   template<> struct mapper<double4> { typedef double4 type; };
   template<> struct mapper<float4> { typedef float4 type; };
   template<> struct mapper<short4> { typedef float4 type; };

   template<typename,typename> struct bridge_mapper { };
   template<> struct bridge_mapper<double2,double2> { typedef double2 type; };
   template<> struct bridge_mapper<double2,float2> { typedef double2 type; };
   template<> struct bridge_mapper<double2,short2> { typedef float2 type; };
   template<> struct bridge_mapper<double2,float4> { typedef double4 type; };
   template<> struct bridge_mapper<double2,short4> { typedef float4 type; };
   template<> struct bridge_mapper<float4,double2> { typedef float2 type; };
   template<> struct bridge_mapper<float4,float4> { typedef float4 type; };
   template<> struct bridge_mapper<float4,short4> { typedef float4 type; };
   template<> struct bridge_mapper<float2,double2> { typedef float2 type; };
   template<> struct bridge_mapper<float2,float2> { typedef float2 type; };
   template<> struct bridge_mapper<float2,short2> { typedef float2 type; };

   template<typename> struct vec_length { static const int value = 0; };
   template<> struct vec_length<double4> { static const int value = 4; };
   template<> struct vec_length<double2> { static const int value = 2; };
   template<> struct vec_length<double> { static const int value = 1; };
   template<> struct vec_length<float4> { static const int value = 4; };
   template<> struct vec_length<float2> { static const int value = 2; };
   template<> struct vec_length<float> { static const int value = 1; };
   template<> struct vec_length<short4> { static const int value = 4; };
   template<> struct vec_length<short2> { static const int value = 2; };
   template<> struct vec_length<short> { static const int value = 1; };

   template<typename, int N> struct vector { };

   template<> struct vector<double, 2> {
     typedef double2 type;
     type a;
     vector(const type &a) { this->a.x = a.x; this->a.y = a.y; }
     operator type() const { return a; }
   };

   template<> struct vector<float, 2> {
     typedef float2 type;
     float2 a;
     vector(const double2 &a) { this->a.x = a.x; this->a.y = a.y; }
     operator float2() const { return a; }
   };

   template<typename> struct scalar { };
   template<> struct scalar<double4> { typedef double type; };
   template<> struct scalar<double3> { typedef double type; };
   template<> struct scalar<double2> { typedef double type; };
   template<> struct scalar<double> { typedef double type; };
   template<> struct scalar<float4> { typedef float type; };
   template<> struct scalar<float3> { typedef float type; };
   template<> struct scalar<float2> { typedef float type; };
   template<> struct scalar<float> { typedef float type; };
   template<> struct scalar<short4> { typedef short type; };
   template<> struct scalar<short3> { typedef short type; };
   template<> struct scalar<short2> { typedef short type; };
   template<> struct scalar<short> { typedef short type; };

   /* Traits used to determine if a variable is half precision or not */
   template< typename T > struct isHalf{ static const bool value = false; };
   template<> struct isHalf<short>{ static const bool value = true; };
   template<> struct isHalf<short2>{ static const bool value = true; };
   template<> struct isHalf<short4>{ static const bool value = true; };

   template<typename T1, typename T2> __host__ __device__ inline void copy (T1 &a, const T2 &b) { a = b; }

   template<> __host__ __device__ inline void copy(double &a, const int2 &b) {
 #ifdef __CUDA_ARCH__
     a = __hiloint2double(b.y, b.x);
 #else
     errorQuda("Undefined");
 #endif
   }

   template<> __host__ __device__ inline void copy(double2 &a, const int4 &b) {
 #ifdef __CUDA_ARCH__
     a.x = __hiloint2double(b.y, b.x); a.y = __hiloint2double(b.w, b.z);
 #else
     errorQuda("Undefined");
 #endif
   }

   // specializations for short-float conversion
 #define MAX_SHORT_INV 3.051850948e-5
   static inline __host__ __device__ float s2f(const short &a) { return static_cast<float>(a) * MAX_SHORT_INV; }
   static inline __host__ __device__ double s2d(const short &a) { return static_cast<double>(a) * MAX_SHORT_INV; }

   // Fast float to integer round
   __device__ __host__ inline int f2i(float f) {
 #ifdef __CUDA_ARCH__
     f += 12582912.0f; return reinterpret_cast<int&>(f);
 #else
     return static_cast<int>(f);
 #endif
   }

   // Fast double to integer round
   __device__ __host__ inline int d2i(double d) {
 #ifdef __CUDA_ARCH__
     d += 6755399441055744.0; return reinterpret_cast<int&>(d);
 #else
     return static_cast<int>(d);
 #endif
   }

   template<> __host__ __device__ inline void copy(float &a, const short &b) { a = s2f(b); }
   template<> __host__ __device__ inline void copy(short &a, const float &b) { a = f2i(b*MAX_SHORT); }

   template<> __host__ __device__ inline void copy(float2 &a, const short2 &b) {
     a.x = s2f(b.x); a.y = s2f(b.y);
   }

   template<> __host__ __device__ inline void copy(short2 &a, const float2 &b) {
     a.x = f2i(b.x*MAX_SHORT); a.y = f2i(b.y*MAX_SHORT);
   }

   template<> __host__ __device__ inline void copy(float4 &a, const short4 &b) {
     a.x = s2f(b.x); a.y = s2f(b.y); a.z = s2f(b.z); a.w = s2f(b.w);
   }

   template<> __host__ __device__ inline void copy(short4 &a, const float4 &b) {
     a.x = f2i(b.x*MAX_SHORT); a.y = f2i(b.y*MAX_SHORT); a.z = f2i(b.z*MAX_SHORT); a.w = f2i(b.w*MAX_SHORT);
   }


   template <bool isHalf, typename T>
     struct Trig {
       __device__ __host__ static T Atan2( const T &a, const T &b) { return atan2(a,b); }
       __device__ __host__ static T Sin( const T &a ) { return sin(a); }
       __device__ __host__ static T Cos( const T &a ) { return cos(a); }
       __device__ __host__ static void SinCos(const T& a, T *s, T *c) { *s = sin(a); *c = cos(a); }
     };

   template <>
     struct Trig<false,float> {
     __device__ __host__ static float Atan2( const float &a, const float &b) { return atan2f(a,b); }
     __device__ __host__ static float Sin( const float &a ) {
 #ifdef __CUDA_ARCH__
       return __sinf(a);
 #else
       return sinf(a);
 #endif
     }
     __device__ __host__ static float Cos( const float &a ) {
 #ifdef __CUDA_ARCH__
       return __cosf(a);
 #else
       return cosf(a);
 #endif
     }

     __device__ __host__ static void SinCos(const float& a, float *s, float *c) {
 #ifdef __CUDA_ARCH__
        __sincosf(a, s, c);
 #else
        sincosf(a, s, c);
 #endif
     }

   };

   template <>
     struct Trig<true,float> {
     __device__ __host__ static float Atan2( const float &a, const float &b) { return atan2f(a,b)/M_PI; }
     __device__ __host__ static float Sin( const float &a ) {
 #ifdef __CUDA_ARCH__
       return __sinf(a*M_PI);
 #else
       return sinf(a*M_PI);
 #endif
     }
     __device__ __host__ static float Cos( const float &a ) {
 #ifdef __CUDA_ARCH__
       return __cosf(a*M_PI);
 #else
       return cosf(a*M_PI);
 #endif
     }
   };


   template <typename Float, int number> struct VectorType;

   // double precision
   template <> struct VectorType<double, 1>{typedef double type; };
   template <> struct VectorType<double, 2>{typedef double2 type; };
   template <> struct VectorType<double, 4>{typedef double4 type; };

   // single precision
   template <> struct VectorType<float, 1>{typedef float type; };
   template <> struct VectorType<float, 2>{typedef float2 type; };
   template <> struct VectorType<float, 4>{typedef float4 type; };

   // half precision
   template <> struct VectorType<short, 1>{typedef short type; };
   template <> struct VectorType<short, 2>{typedef short2 type; };
   template <> struct VectorType<short, 4>{typedef short4 type; };

   // This trait returns the matching texture type (needed for double precision)
   template <typename Float, int number> struct TexVectorType;

   // double precision
   template <> struct TexVectorType<double, 1>{typedef int2 type; };
   template <> struct TexVectorType<double, 2>{typedef int4 type; };

   // single precision
   template <> struct TexVectorType<float, 1>{typedef float type; };
   template <> struct TexVectorType<float, 2>{typedef float2 type; };
   template <> struct TexVectorType<float, 4>{typedef float4 type; };

   // half precision
   template <> struct TexVectorType<short, 1>{typedef short type; };
   template <> struct TexVectorType<short, 2>{typedef short2 type; };
   template <> struct TexVectorType<short, 4>{typedef short4 type; };

   template <typename VectorType>
     __device__ __host__ inline VectorType vector_load(void *ptr, int idx) {
 #define USE_LDG
 #if defined(__CUDA_ARCH__) && defined(USE_LDG)
     return __ldg(reinterpret_cast< VectorType* >(ptr) + idx);
 #else
     return reinterpret_cast< VectorType* >(ptr)[idx];
 #endif
   }

   template <typename VectorType>
     __device__ __host__ inline void vector_store(void *ptr, int idx, const VectorType &value) {
     reinterpret_cast< __restrict__ VectorType* >(ptr)[idx] = value;
   }

   template <>
     __device__ __host__ inline void vector_store(void *ptr, int idx, const double2 &value) {
 #if defined(__CUDA_ARCH__)
     store_streaming_double2(reinterpret_cast<double2*>(ptr)+idx, value.x, value.y);
 #else
     reinterpret_cast<double2*>(ptr)[idx] = value;
 #endif
   }

   template <>
     __device__ __host__ inline void vector_store(void *ptr, int idx, const float4 &value) {
 #if defined(__CUDA_ARCH__)
     store_streaming_float4(reinterpret_cast<float4*>(ptr)+idx, value.x, value.y, value.z, value.w);
 #else
     reinterpret_cast<float4*>(ptr)[idx] = value;
 #endif
   }

   template <>
     __device__ __host__ inline void vector_store(void *ptr, int idx, const float2 &value) {
 #if defined(__CUDA_ARCH__)
     store_streaming_float2(reinterpret_cast<float2*>(ptr)+idx, value.x, value.y);
 #else
     reinterpret_cast<float2*>(ptr)[idx] = value;
 #endif
   }

   template <>
     __device__ __host__ inline void vector_store(void *ptr, int idx, const short4 &value) {
 #if defined(__CUDA_ARCH__)
     store_streaming_short4(reinterpret_cast<short4*>(ptr)+idx, value.x, value.y, value.z, value.w);
 #else
     reinterpret_cast<short4*>(ptr)[idx] = value;
 #endif
   }

   template <>
     __device__ __host__ inline void vector_store(void *ptr, int idx, const short2 &value) {
 #if defined(__CUDA_ARCH__)
     store_streaming_short2(reinterpret_cast<short2*>(ptr)+idx, value.x, value.y);
 #else
     reinterpret_cast<short2*>(ptr)[idx] = value;
 #endif
   }

   template<bool large_alloc> struct AllocType { };
   template<> struct AllocType<true> { typedef size_t type; };
   template<> struct AllocType<false> { typedef int type; };

 } // namespace quda

 #endif // _REGISTER_TRAITS_H
quda::TexVectorType< float, 2 >::type
float2 type
Definition: register_traits.h:266

cosf
float cosf(float)

quda::TexVectorType< short, 4 >::type
short4 type
Definition: register_traits.h:272

quda::bridge_mapper< double2, double2 >::type
double2 type
Definition: register_traits.h:55

inline_ptx.h

quda::mapper< short >::type
float type
Definition: register_traits.h:44

quda::mapper< short2 >::type
float2 type
Definition: register_traits.h:48

quda::scalar< double >::type
double type
Definition: register_traits.h:98

quda::mapper< float4 >::type
float4 type
Definition: register_traits.h:51

quda::vector< double, 2 >::vector
vector(const type &a)
Definition: register_traits.h:83

quda::vector< float, 2 >::type
float2 type
Definition: register_traits.h:88

quda::bridge_mapper< float4, double2 >::type
float2 type
Definition: register_traits.h:60

quda::Trig< false, float >::Cos
__device__ static __host__ float Cos(const float &a)
Definition: register_traits.h:199

quda::d2i
__device__ __host__ int d2i(double d)
Definition: register_traits.h:147

quda::TexVectorType< double, 1 >::type
int2 type
Definition: register_traits.h:261

errorQuda
#define errorQuda(...)
Definition: util_quda.h:90

quda::PromoteTypeId< double, int >::Type
double Type
Definition: register_traits.h:26

quda::TexVectorType< float, 1 >::type
float type
Definition: register_traits.h:265

quda::Trig::Atan2
__device__ static __host__ T Atan2(const T &a, const T &b)
Definition: register_traits.h:180

quda::VectorType< double, 2 >::type
double2 type
Definition: register_traits.h:244

quda::vector< double, 2 >::a
type a
Definition: register_traits.h:82

quda::scalar< short4 >::type
short type
Definition: register_traits.h:103

quda::Trig< false, float >::Sin
__device__ static __host__ float Sin(const float &a)
Definition: register_traits.h:192

quda::Trig< false, float >::SinCos
__device__ static __host__ void SinCos(const float &a, float *s, float *c)
Definition: register_traits.h:207

quda::VectorType< short, 4 >::type
short4 type
Definition: register_traits.h:255

quda::PromoteTypeId< int, float >::Type
float Type
Definition: register_traits.h:29

quda::Trig::Sin
__device__ static __host__ T Sin(const T &a)
Definition: register_traits.h:181

quda::s2f
static __host__ __device__ float s2f(const short &a)
Definition: register_traits.h:134

quda::copy
__host__ __device__ void copy(T1 &a, const T2 &b)
Definition: register_traits.h:114

quda::bridge_mapper< float4, short4 >::type
float4 type
Definition: register_traits.h:62

quda::complex< float >
Definition: complex_quda.h:443

quda::mapper< double2 >::type
double2 type
Definition: register_traits.h:46

quda::AllocType< false >::type
int type
Definition: register_traits.h:336

quda::PromoteTypeId< double, complex< double > >::Type
complex< double > Type
Definition: register_traits.h:25

quda::VectorType< short, 2 >::type
short2 type
Definition: register_traits.h:254

quda::PromoteTypeId< complex< double >, double >::Type
complex< double > Type
Definition: register_traits.h:24

quda::mapper< short4 >::type
float4 type
Definition: register_traits.h:52

quda
Definition: blas_cublas.h:6

quda::bridge_mapper< double2, short2 >::type
float2 type
Definition: register_traits.h:57

quda::vector< float, 2 >::vector
vector(const double2 &a)
Definition: register_traits.h:90

quda::scalar
Definition: register_traits.h:94

quda::isHalf::value
static const bool value
Definition: register_traits.h:109

quda::TexVectorType< short, 2 >::type
short2 type
Definition: register_traits.h:271

quda::bridge_mapper< float2, double2 >::type
float2 type
Definition: register_traits.h:63

quda::TexVectorType
Definition: register_traits.h:258

quda::bridge_mapper< float4, float4 >::type
float4 type
Definition: register_traits.h:61

b
#define b
Definition: dw_dslash4_core.h:83

quda::scalar< short >::type
short type
Definition: register_traits.h:106

quda::vector< double, 2 >::type
double2 type
Definition: register_traits.h:81

quda::VectorType
Definition: register_traits.h:240

quda::scalar< double4 >::type
double type
Definition: register_traits.h:95

quda::bridge_mapper< float2, short2 >::type
float2 type
Definition: register_traits.h:65

quda::sin
__host__ __device__ ValueType sin(ValueType x)
Definition: complex_quda.h:40

quda::Trig< true, float >::Sin
__device__ static __host__ float Sin(const float &a)
Definition: register_traits.h:223

quda::PromoteTypeId< double, float >::Type
double Type
Definition: register_traits.h:30

atan2f
float atan2f(float, float)

quda::vector_store
__device__ __host__ void vector_store(void *ptr, int idx, const VectorType &value)
Definition: register_traits.h:285

quda::scalar< short2 >::type
short type
Definition: register_traits.h:105

quda::atan2
__host__ __device__ ValueType atan2(ValueType x, ValueType y)
Definition: complex_quda.h:65

quda::PromoteTypeId< float, complex< float > >::Type
complex< float > Type
Definition: register_traits.h:23

quda::PromoteTypeId< float, int >::Type
float Type
Definition: register_traits.h:28

double
double
Definition: CMakeCUDACompilerId.cpp1.ii:8010

quda::Trig< false, float >::Atan2
__device__ static __host__ float Atan2(const float &a, const float &b)
Definition: register_traits.h:191

quda::Trig::SinCos
__device__ static __host__ void SinCos(const T &a, T *s, T *c)
Definition: register_traits.h:183

f
int int int enum cudaChannelFormatKind f
Definition: CMakeCUDACompilerId.cpp1.ii:2637

quda::vec_length
Definition: register_traits.h:67

quda::scalar< double2 >::type
double type
Definition: register_traits.h:97

quda::store_streaming_double2
__device__ void store_streaming_double2(double2 *addr, double x, double y)
Definition: inline_ptx.h:49

quda::bridge_mapper< double2, float4 >::type
double4 type
Definition: register_traits.h:58

quda::bridge_mapper< double2, short4 >::type
float4 type
Definition: register_traits.h:59

quda::Trig::Cos
__device__ static __host__ T Cos(const T &a)
Definition: register_traits.h:182

quda::scalar< float4 >::type
float type
Definition: register_traits.h:99

quda::scalar< double3 >::type
double type
Definition: register_traits.h:96

ptr
const void * ptr
Definition: CMakeCUDACompilerId.cpp1.ii:2613

quda::PromoteTypeId< complex< float >, float >::Type
complex< float > Type
Definition: register_traits.h:22

quda::TexVectorType< double, 2 >::type
int4 type
Definition: register_traits.h:262

quda::vector< float, 2 >::a
float2 a
Definition: register_traits.h:89

quda::vec_length::value
static const int value
Definition: register_traits.h:67

quda::bridge_mapper< double2, float2 >::type
double2 type
Definition: register_traits.h:56

quda::store_streaming_float4
__device__ void store_streaming_float4(float4 *addr, float x, float y, float z, float w)
Definition: inline_ptx.h:39

quda::s2d
static __host__ __device__ double s2d(const short &a)
Definition: register_traits.h:135

quda::VectorType< double, 4 >::type
double4 type
Definition: register_traits.h:245

idx
int idx
Definition: staggered_fused_exterior_dslash_core.h:355

quda::Trig< true, float >::Atan2
__device__ static __host__ float Atan2(const float &a, const float &b)
Definition: register_traits.h:222

quda::mapper
Definition: register_traits.h:41

quda::PromoteTypeId
Definition: register_traits.h:21

quda::PromoteTypeId::Type
T Type
Definition: register_traits.h:21

int
int
Definition: CMakeCUDACompilerId.cpp1.ii:3962

s
size_t s
Definition: CMakeCUDACompilerId.cpp1.ii:2229

sinf
float sinf(float)

quda::VectorType< float, 1 >::type
float type
Definition: register_traits.h:248

quda::Trig< true, float >::Cos
__device__ static __host__ float Cos(const float &a)
Definition: register_traits.h:230

quda::mapper< float >::type
float type
Definition: register_traits.h:43

quda::store_streaming_float2
__device__ void store_streaming_float2(float2 *addr, float x, float y)
Definition: inline_ptx.h:54

complex_quda.h

quda::PromoteTypeId< float, double >::Type
double Type
Definition: register_traits.h:31

quda::vector_load
__device__ __host__ VectorType vector_load(void *ptr, int idx)
Definition: register_traits.h:275

quda::bridge_mapper
Definition: register_traits.h:54

c
const void * c
Definition: CMakeCUDACompilerId.cpp1.ii:2234

quda::cos
__host__ __device__ ValueType cos(ValueType x)
Definition: complex_quda.h:35

MAX_SHORT
#define MAX_SHORT
Definition: quda_internal.h:29

quda::mapper< double4 >::type
double4 type
Definition: register_traits.h:50

quda::complex< double >
Definition: complex_quda.h:554

quda::scalar< float3 >::type
float type
Definition: register_traits.h:100

MAX_SHORT_INV
#define MAX_SHORT_INV
Definition: register_traits.h:133

quda::store_streaming_short2
__device__ void store_streaming_short2(short2 *addr, short x, short y)
Definition: inline_ptx.h:59

quda::TexVectorType< float, 4 >::type
float4 type
Definition: register_traits.h:267

float
float
Definition: CMakeCUDACompilerId.cpp1.ii:12791

quda::isHalf
Definition: register_traits.h:109

quda::VectorType< double, 1 >::type
double type
Definition: register_traits.h:243

quda::AllocType< true >::type
size_t type
Definition: register_traits.h:335

quda::AllocType
Definition: register_traits.h:334

quda::scalar< float >::type
float type
Definition: register_traits.h:102

value
int value
Definition: CMakeCUDACompilerId.cpp1.ii:2296

d
static __inline__ size_t size_t d
Definition: CMakeCUDACompilerId.cpp1.ii:3019

quda::TexVectorType< short, 1 >::type
short type
Definition: register_traits.h:270

quda::store_streaming_short4
__device__ void store_streaming_short4(short4 *addr, short x, short y, short z, short w)
Definition: inline_ptx.h:44

quda::VectorType< short, 1 >::type
short type
Definition: register_traits.h:253

a
#define a
Definition: dw_dslash4_core.h:82

quda::mapper< float2 >::type
float2 type
Definition: register_traits.h:47

quda::mapper< double >::type
double type
Definition: register_traits.h:42

quda::f2i
__device__ __host__ int f2i(float f)
Definition: register_traits.h:138

quda::scalar< short3 >::type
short type
Definition: register_traits.h:104

quda::VectorType< float, 4 >::type
float4 type
Definition: register_traits.h:250

quda::PromoteTypeId< int, double >::Type
double Type
Definition: register_traits.h:27

quda::bridge_mapper< float2, float2 >::type
float2 type
Definition: register_traits.h:64

quda::VectorType< float, 2 >::type
float2 type
Definition: register_traits.h:249

quda::scalar< float2 >::type
float type
Definition: register_traits.h:101

quda_internal.h

quda::Trig
Definition: register_traits.h:179