QUDA  0.9.0
register_traits.h
Go to the documentation of this file.
1 #ifndef _REGISTER_TRAITS_H
2 #define _REGISTER_TRAITS_H
3 
11 #include <quda_internal.h>
12 #include <generics/ldg.h>
13 #include <complex_quda.h>
14 #include <inline_ptx.h>
15 
16 namespace quda {
17 
18  /*
19  Here we use traits to define the greater type used for mixing types of computation involving these types
20  */
21  template<class T, class U> struct PromoteTypeId { typedef T Type; };
22  template<> struct PromoteTypeId<complex<float>, float> { typedef complex<float> Type; };
23  template<> struct PromoteTypeId<float, complex<float> > { typedef complex<float> Type; };
24  template<> struct PromoteTypeId<complex<double>, double> { typedef complex<double> Type; };
25  template<> struct PromoteTypeId<double, complex<double> > { typedef complex<double> Type; };
26  template<> struct PromoteTypeId<double,int> { typedef double Type; };
27  template<> struct PromoteTypeId<int,double> { typedef double Type; };
28  template<> struct PromoteTypeId<float,int> { typedef float Type; };
29  template<> struct PromoteTypeId<int,float> { typedef float Type; };
30  template<> struct PromoteTypeId<double,float> { typedef double Type; };
31  template<> struct PromoteTypeId<float,double> { typedef double Type; };
32 
33  /*
34  Here we use traits to define the mapping between storage type and
35  register type:
36  double -> double
37  float -> float
38  short -> float
39  This allows us to wrap the encapsulate the register type into the storage template type
40  */
41  template<typename> struct mapper { };
42  template<> struct mapper<double> { typedef double type; };
43  template<> struct mapper<float> { typedef float type; };
44  template<> struct mapper<short> { typedef float type; };
45 
46  template<> struct mapper<double2> { typedef double2 type; };
47  template<> struct mapper<float2> { typedef float2 type; };
48  template<> struct mapper<short2> { typedef float2 type; };
49 
50  template<> struct mapper<double4> { typedef double4 type; };
51  template<> struct mapper<float4> { typedef float4 type; };
52  template<> struct mapper<short4> { typedef float4 type; };
53 
54  template<typename,typename> struct bridge_mapper { };
55  template<> struct bridge_mapper<double2,double2> { typedef double2 type; };
56  template<> struct bridge_mapper<double2,float2> { typedef double2 type; };
57  template<> struct bridge_mapper<double2,short2> { typedef float2 type; };
58  template<> struct bridge_mapper<double2,float4> { typedef double4 type; };
59  template<> struct bridge_mapper<double2,short4> { typedef float4 type; };
60  template<> struct bridge_mapper<float4,double2> { typedef float2 type; };
61  template<> struct bridge_mapper<float4,float4> { typedef float4 type; };
62  template<> struct bridge_mapper<float4,short4> { typedef float4 type; };
63  template<> struct bridge_mapper<float2,double2> { typedef float2 type; };
64  template<> struct bridge_mapper<float2,float2> { typedef float2 type; };
65  template<> struct bridge_mapper<float2,short2> { typedef float2 type; };
66 
67  template<typename> struct vec_length { static const int value = 0; };
68  template<> struct vec_length<double4> { static const int value = 4; };
69  template<> struct vec_length<double2> { static const int value = 2; };
70  template<> struct vec_length<double> { static const int value = 1; };
71  template<> struct vec_length<float4> { static const int value = 4; };
72  template<> struct vec_length<float2> { static const int value = 2; };
73  template<> struct vec_length<float> { static const int value = 1; };
74  template<> struct vec_length<short4> { static const int value = 4; };
75  template<> struct vec_length<short2> { static const int value = 2; };
76  template<> struct vec_length<short> { static const int value = 1; };
77 
78  template<typename, int N> struct vector { };
79 
80  template<> struct vector<double, 2> {
81  typedef double2 type;
83  vector(const type &a) { this->a.x = a.x; this->a.y = a.y; }
84  operator type() const { return a; }
85  };
86 
87  template<> struct vector<float, 2> {
88  typedef float2 type;
89  float2 a;
90  vector(const double2 &a) { this->a.x = a.x; this->a.y = a.y; }
91  operator float2() const { return a; }
92  };
93 
94  template<typename> struct scalar { };
95  template<> struct scalar<double4> { typedef double type; };
96  template<> struct scalar<double3> { typedef double type; };
97  template<> struct scalar<double2> { typedef double type; };
98  template<> struct scalar<double> { typedef double type; };
99  template<> struct scalar<float4> { typedef float type; };
100  template<> struct scalar<float3> { typedef float type; };
101  template<> struct scalar<float2> { typedef float type; };
102  template<> struct scalar<float> { typedef float type; };
103  template<> struct scalar<short4> { typedef short type; };
104  template<> struct scalar<short3> { typedef short type; };
105  template<> struct scalar<short2> { typedef short type; };
106  template<> struct scalar<short> { typedef short type; };
107 
108  /* Traits used to determine if a variable is half precision or not */
109  template< typename T > struct isHalf{ static const bool value = false; };
110  template<> struct isHalf<short>{ static const bool value = true; };
111  template<> struct isHalf<short2>{ static const bool value = true; };
112  template<> struct isHalf<short4>{ static const bool value = true; };
113 
114  template<typename T1, typename T2> __host__ __device__ inline void copy (T1 &a, const T2 &b) { a = b; }
115 
116  template<> __host__ __device__ inline void copy(double &a, const int2 &b) {
117 #ifdef __CUDA_ARCH__
118  a = __hiloint2double(b.y, b.x);
119 #else
120  errorQuda("Undefined");
121 #endif
122  }
123 
124  template<> __host__ __device__ inline void copy(double2 &a, const int4 &b) {
125 #ifdef __CUDA_ARCH__
126  a.x = __hiloint2double(b.y, b.x); a.y = __hiloint2double(b.w, b.z);
127 #else
128  errorQuda("Undefined");
129 #endif
130  }
131 
132  // specializations for short-float conversion
133 #define MAX_SHORT_INV 3.051850948e-5
134  static inline __host__ __device__ float s2f(const short &a) { return static_cast<float>(a) * MAX_SHORT_INV; }
135  static inline __host__ __device__ double s2d(const short &a) { return static_cast<double>(a) * MAX_SHORT_INV; }
136 
137  // Fast float to integer round
138  __device__ __host__ inline int f2i(float f) {
139 #ifdef __CUDA_ARCH__
140  f += 12582912.0f; return reinterpret_cast<int&>(f);
141 #else
142  return static_cast<int>(f);
143 #endif
144  }
145 
146  // Fast double to integer round
147  __device__ __host__ inline int d2i(double d) {
148 #ifdef __CUDA_ARCH__
149  d += 6755399441055744.0; return reinterpret_cast<int&>(d);
150 #else
151  return static_cast<int>(d);
152 #endif
153  }
154 
155  template<> __host__ __device__ inline void copy(float &a, const short &b) { a = s2f(b); }
156  template<> __host__ __device__ inline void copy(short &a, const float &b) { a = f2i(b*MAX_SHORT); }
157 
158  template<> __host__ __device__ inline void copy(float2 &a, const short2 &b) {
159  a.x = s2f(b.x); a.y = s2f(b.y);
160  }
161 
162  template<> __host__ __device__ inline void copy(short2 &a, const float2 &b) {
163  a.x = f2i(b.x*MAX_SHORT); a.y = f2i(b.y*MAX_SHORT);
164  }
165 
166  template<> __host__ __device__ inline void copy(float4 &a, const short4 &b) {
167  a.x = s2f(b.x); a.y = s2f(b.y); a.z = s2f(b.z); a.w = s2f(b.w);
168  }
169 
170  template<> __host__ __device__ inline void copy(short4 &a, const float4 &b) {
171  a.x = f2i(b.x*MAX_SHORT); a.y = f2i(b.y*MAX_SHORT); a.z = f2i(b.z*MAX_SHORT); a.w = f2i(b.w*MAX_SHORT);
172  }
173 
174 
178  template <bool isHalf, typename T>
179  struct Trig {
180  __device__ __host__ static T Atan2( const T &a, const T &b) { return atan2(a,b); }
181  __device__ __host__ static T Sin( const T &a ) { return sin(a); }
182  __device__ __host__ static T Cos( const T &a ) { return cos(a); }
183  __device__ __host__ static void SinCos(const T& a, T *s, T *c) { *s = sin(a); *c = cos(a); }
184  };
185 
189  template <>
190  struct Trig<false,float> {
191  __device__ __host__ static float Atan2( const float &a, const float &b) { return atan2f(a,b); }
192  __device__ __host__ static float Sin( const float &a ) {
193 #ifdef __CUDA_ARCH__
194  return __sinf(a);
195 #else
196  return sinf(a);
197 #endif
198  }
199  __device__ __host__ static float Cos( const float &a ) {
200 #ifdef __CUDA_ARCH__
201  return __cosf(a);
202 #else
203  return cosf(a);
204 #endif
205  }
206 
207  __device__ __host__ static void SinCos(const float& a, float *s, float *c) {
208 #ifdef __CUDA_ARCH__
209  __sincosf(a, s, c);
210 #else
211  sincosf(a, s, c);
212 #endif
213  }
214 
215  };
216 
220  template <>
221  struct Trig<true,float> {
222  __device__ __host__ static float Atan2( const float &a, const float &b) { return atan2f(a,b)/M_PI; }
223  __device__ __host__ static float Sin( const float &a ) {
224 #ifdef __CUDA_ARCH__
225  return __sinf(a*M_PI);
226 #else
227  return sinf(a*M_PI);
228 #endif
229  }
230  __device__ __host__ static float Cos( const float &a ) {
231 #ifdef __CUDA_ARCH__
232  return __cosf(a*M_PI);
233 #else
234  return cosf(a*M_PI);
235 #endif
236  }
237  };
238 
239 
240  template <typename Float, int number> struct VectorType;
241 
242  // double precision
243  template <> struct VectorType<double, 1>{typedef double type; };
244  template <> struct VectorType<double, 2>{typedef double2 type; };
245  template <> struct VectorType<double, 4>{typedef double4 type; };
246 
247  // single precision
248  template <> struct VectorType<float, 1>{typedef float type; };
249  template <> struct VectorType<float, 2>{typedef float2 type; };
250  template <> struct VectorType<float, 4>{typedef float4 type; };
251 
252  // half precision
253  template <> struct VectorType<short, 1>{typedef short type; };
254  template <> struct VectorType<short, 2>{typedef short2 type; };
255  template <> struct VectorType<short, 4>{typedef short4 type; };
256 
257  // This trait returns the matching texture type (needed for double precision)
258  template <typename Float, int number> struct TexVectorType;
259 
260  // double precision
261  template <> struct TexVectorType<double, 1>{typedef int2 type; };
262  template <> struct TexVectorType<double, 2>{typedef int4 type; };
263 
264  // single precision
265  template <> struct TexVectorType<float, 1>{typedef float type; };
266  template <> struct TexVectorType<float, 2>{typedef float2 type; };
267  template <> struct TexVectorType<float, 4>{typedef float4 type; };
268 
269  // half precision
270  template <> struct TexVectorType<short, 1>{typedef short type; };
271  template <> struct TexVectorType<short, 2>{typedef short2 type; };
272  template <> struct TexVectorType<short, 4>{typedef short4 type; };
273 
274  template <typename VectorType>
275  __device__ __host__ inline VectorType vector_load(void *ptr, int idx) {
276 #define USE_LDG
277 #if defined(__CUDA_ARCH__) && defined(USE_LDG)
278  return __ldg(reinterpret_cast< VectorType* >(ptr) + idx);
279 #else
280  return reinterpret_cast< VectorType* >(ptr)[idx];
281 #endif
282  }
283 
284  template <typename VectorType>
285  __device__ __host__ inline void vector_store(void *ptr, int idx, const VectorType &value) {
286  reinterpret_cast< __restrict__ VectorType* >(ptr)[idx] = value;
287  }
288 
289  template <>
290  __device__ __host__ inline void vector_store(void *ptr, int idx, const double2 &value) {
291 #if defined(__CUDA_ARCH__)
292  store_streaming_double2(reinterpret_cast<double2*>(ptr)+idx, value.x, value.y);
293 #else
294  reinterpret_cast<double2*>(ptr)[idx] = value;
295 #endif
296  }
297 
298  template <>
299  __device__ __host__ inline void vector_store(void *ptr, int idx, const float4 &value) {
300 #if defined(__CUDA_ARCH__)
301  store_streaming_float4(reinterpret_cast<float4*>(ptr)+idx, value.x, value.y, value.z, value.w);
302 #else
303  reinterpret_cast<float4*>(ptr)[idx] = value;
304 #endif
305  }
306 
307  template <>
308  __device__ __host__ inline void vector_store(void *ptr, int idx, const float2 &value) {
309 #if defined(__CUDA_ARCH__)
310  store_streaming_float2(reinterpret_cast<float2*>(ptr)+idx, value.x, value.y);
311 #else
312  reinterpret_cast<float2*>(ptr)[idx] = value;
313 #endif
314  }
315 
316  template <>
317  __device__ __host__ inline void vector_store(void *ptr, int idx, const short4 &value) {
318 #if defined(__CUDA_ARCH__)
319  store_streaming_short4(reinterpret_cast<short4*>(ptr)+idx, value.x, value.y, value.z, value.w);
320 #else
321  reinterpret_cast<short4*>(ptr)[idx] = value;
322 #endif
323  }
324 
325  template <>
326  __device__ __host__ inline void vector_store(void *ptr, int idx, const short2 &value) {
327 #if defined(__CUDA_ARCH__)
328  store_streaming_short2(reinterpret_cast<short2*>(ptr)+idx, value.x, value.y);
329 #else
330  reinterpret_cast<short2*>(ptr)[idx] = value;
331 #endif
332  }
333 
334  template<bool large_alloc> struct AllocType { };
335  template<> struct AllocType<true> { typedef size_t type; };
336  template<> struct AllocType<false> { typedef int type; };
337 
338 } // namespace quda
339 
340 #endif // _REGISTER_TRAITS_H
float cosf(float)
__device__ static __host__ float Cos(const float &a)
__device__ __host__ int d2i(double d)
#define errorQuda(...)
Definition: util_quda.h:90
__device__ static __host__ T Atan2(const T &a, const T &b)
__device__ static __host__ float Sin(const float &a)
__device__ static __host__ void SinCos(const float &a, float *s, float *c)
__device__ static __host__ T Sin(const T &a)
static __host__ __device__ float s2f(const short &a)
__host__ __device__ void copy(T1 &a, const T2 &b)
vector(const double2 &a)
static const bool value
#define b
__host__ __device__ ValueType sin(ValueType x)
Definition: complex_quda.h:40
__device__ static __host__ float Sin(const float &a)
float atan2f(float, float)
__device__ __host__ void vector_store(void *ptr, int idx, const VectorType &value)
__host__ __device__ ValueType atan2(ValueType x, ValueType y)
Definition: complex_quda.h:65
__device__ static __host__ float Atan2(const float &a, const float &b)
__device__ static __host__ void SinCos(const T &a, T *s, T *c)
int int int enum cudaChannelFormatKind f
__device__ void store_streaming_double2(double2 *addr, double x, double y)
Definition: inline_ptx.h:49
__device__ static __host__ T Cos(const T &a)
const void * ptr
static const int value
__device__ void store_streaming_float4(float4 *addr, float x, float y, float z, float w)
Definition: inline_ptx.h:39
static __host__ __device__ double s2d(const short &a)
__device__ static __host__ float Atan2(const float &a, const float &b)
float sinf(float)
__device__ static __host__ float Cos(const float &a)
__device__ void store_streaming_float2(float2 *addr, float x, float y)
Definition: inline_ptx.h:54
__device__ __host__ VectorType vector_load(void *ptr, int idx)
const void * c
__host__ __device__ ValueType cos(ValueType x)
Definition: complex_quda.h:35
#define MAX_SHORT
Definition: quda_internal.h:29
#define MAX_SHORT_INV
__device__ void store_streaming_short2(short2 *addr, short x, short y)
Definition: inline_ptx.h:59
static __inline__ size_t size_t d
__device__ void store_streaming_short4(short4 *addr, short x, short y, short z, short w)
Definition: inline_ptx.h:44
#define a
__device__ __host__ int f2i(float f)