QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
register_traits.h
Go to the documentation of this file.
1 #ifndef _REGISTER_TRAITS_H
2 #define _REGISTER_TRAITS_H
3 
11 #include <quda_internal.h>
12 #include <convert.h>
13 #include <generics/ldg.h>
14 #include <complex_quda.h>
15 #include <inline_ptx.h>
16 
17 namespace quda {
18 
19  /*
20  Here we use traits to define the greater type used for mixing types of computation involving these types
21  */
22  template<class T, class U> struct PromoteTypeId { typedef T Type; };
23  template<> struct PromoteTypeId<complex<float>, float> { typedef complex<float> Type; };
24  template<> struct PromoteTypeId<float, complex<float> > { typedef complex<float> Type; };
25  template<> struct PromoteTypeId<complex<double>, double> { typedef complex<double> Type; };
26  template<> struct PromoteTypeId<double, complex<double> > { typedef complex<double> Type; };
27  template<> struct PromoteTypeId<double,int> { typedef double Type; };
28  template<> struct PromoteTypeId<int,double> { typedef double Type; };
29  template<> struct PromoteTypeId<float,int> { typedef float Type; };
30  template<> struct PromoteTypeId<int,float> { typedef float Type; };
31  template<> struct PromoteTypeId<double,float> { typedef double Type; };
32  template<> struct PromoteTypeId<float,double> { typedef double Type; };
33 
34  /*
35  Here we use traits to define the mapping between storage type and
36  register type:
37  double -> double
38  float -> float
39  short -> float
40  quarter -> float
41  This allows us to wrap the encapsulate the register type into the storage template type
42  */
43  template<typename> struct mapper { };
44  template<> struct mapper<double> { typedef double type; };
45  template<> struct mapper<float> { typedef float type; };
46  template<> struct mapper<short> { typedef float type; };
47  template<> struct mapper<char> { typedef float type; };
48 
49  template<> struct mapper<double2> { typedef double2 type; };
50  template<> struct mapper<float2> { typedef float2 type; };
51  template<> struct mapper<short2> { typedef float2 type; };
52  template<> struct mapper<char2> { typedef float2 type; };
53 
54  template<> struct mapper<double4> { typedef double4 type; };
55  template<> struct mapper<float4> { typedef float4 type; };
56  template<> struct mapper<short4> { typedef float4 type; };
57  template<> struct mapper<char4> { typedef float4 type; };
58 
59  template<typename,typename> struct bridge_mapper { };
60  template<> struct bridge_mapper<double2,double2> { typedef double2 type; };
61  template<> struct bridge_mapper<double2,float2> { typedef double2 type; };
62  template<> struct bridge_mapper<double2,short2> { typedef float2 type; };
63  template<> struct bridge_mapper<double2,char2> { typedef float2 type; };
64  template<> struct bridge_mapper<double2,float4> { typedef double4 type; };
65  template<> struct bridge_mapper<double2,short4> { typedef float4 type; };
66  template<> struct bridge_mapper<double2,char4> { typedef float4 type; };
67  template<> struct bridge_mapper<float4,double2> { typedef float2 type; };
68  template<> struct bridge_mapper<float4,float4> { typedef float4 type; };
69  template<> struct bridge_mapper<float4,short4> { typedef float4 type; };
70  template<> struct bridge_mapper<float4,char4> { typedef float4 type; };
71  template<> struct bridge_mapper<float2,double2> { typedef float2 type; };
72  template<> struct bridge_mapper<float2,float2> { typedef float2 type; };
73  template<> struct bridge_mapper<float2,short2> { typedef float2 type; };
74  template<> struct bridge_mapper<float2,char2> { typedef float2 type; };
75 
76  template<typename> struct vec_length { static const int value = 0; };
77  template<> struct vec_length<double4> { static const int value = 4; };
78  template<> struct vec_length<double2> { static const int value = 2; };
79  template<> struct vec_length<double> { static const int value = 1; };
80  template<> struct vec_length<float4> { static const int value = 4; };
81  template<> struct vec_length<float2> { static const int value = 2; };
82  template<> struct vec_length<float> { static const int value = 1; };
83  template<> struct vec_length<short4> { static const int value = 4; };
84  template<> struct vec_length<short2> { static const int value = 2; };
85  template<> struct vec_length<short> { static const int value = 1; };
86  template<> struct vec_length<char4> { static const int value = 4; };
87  template<> struct vec_length<char2> { static const int value = 2; };
88  template<> struct vec_length<char> { static const int value = 1; };
89 
90  template<typename, int N> struct vector { };
91 
92  template<> struct vector<double, 2> {
93  typedef double2 type;
94  type a;
95  vector(const type &a) { this->a.x = a.x; this->a.y = a.y; }
96  operator type() const { return a; }
97  };
98 
99  template<> struct vector<float, 2> {
100  typedef float2 type;
101  float2 a;
102  vector(const double2 &a) { this->a.x = a.x; this->a.y = a.y; }
103  operator type() const { return a; }
104  };
105 
106  template<> struct vector<int, 2> {
107  typedef int2 type;
108  int2 a;
109  vector(const int2 &a) { this->a.x = a.x; this->a.y = a.y; }
110  operator type() const { return a; }
111  };
112 
113  template<typename> struct scalar { };
114  template<> struct scalar<double4> { typedef double type; };
115  template<> struct scalar<double3> { typedef double type; };
116  template<> struct scalar<double2> { typedef double type; };
117  template<> struct scalar<double> { typedef double type; };
118  template<> struct scalar<float4> { typedef float type; };
119  template<> struct scalar<float3> { typedef float type; };
120  template<> struct scalar<float2> { typedef float type; };
121  template<> struct scalar<float> { typedef float type; };
122  template<> struct scalar<short4> { typedef short type; };
123  template<> struct scalar<short3> { typedef short type; };
124  template<> struct scalar<short2> { typedef short type; };
125  template<> struct scalar<short> { typedef short type; };
126  template<> struct scalar<char4> { typedef char type; };
127  template<> struct scalar<char3> { typedef char type; };
128  template<> struct scalar<char2> { typedef char type; };
129  template<> struct scalar<char> { typedef char type; };
130 
131  /* Traits used to determine if a variable is half precision or not */
132  template< typename T > struct isHalf{ static const bool value = false; };
133  template<> struct isHalf<short>{ static const bool value = true; };
134  template<> struct isHalf<short2>{ static const bool value = true; };
135  template<> struct isHalf<short4>{ static const bool value = true; };
136 
137  /* Traits used to determine if a variable is quarter precision or not */
138  template< typename T > struct isQuarter{ static const bool value = false; };
139  template<> struct isQuarter<char>{ static const bool value = true; };
140  template<> struct isQuarter<char2>{ static const bool value = true; };
141  template<> struct isQuarter<char4>{ static const bool value = true; };
142 
143  /* Traits used to determine if a variable is fixed precision or not */
144  template< typename T > struct isFixed{ static const bool value = false; };
145  template<> struct isFixed<short>{ static const bool value = true; };
146  template<> struct isFixed<short2>{ static const bool value = true; };
147  template<> struct isFixed<short4>{ static const bool value = true; };
148  template<> struct isFixed<char>{ static const bool value = true; };
149  template<> struct isFixed<char2>{ static const bool value = true; };
150  template<> struct isFixed<char4>{ static const bool value = true; };
151 
152  template<typename T1, typename T2> __host__ __device__ inline void copy (T1 &a, const T2 &b) { a = b; }
153 
154  template<> __host__ __device__ inline void copy(double &a, const int2 &b) {
155 #ifdef __CUDA_ARCH__
156  a = __hiloint2double(b.y, b.x);
157 #else
158  errorQuda("Undefined");
159 #endif
160  }
161 
162  template<> __host__ __device__ inline void copy(double2 &a, const int4 &b) {
163 #ifdef __CUDA_ARCH__
164  a.x = __hiloint2double(b.y, b.x); a.y = __hiloint2double(b.w, b.z);
165 #else
166  errorQuda("Undefined");
167 #endif
168  }
169 
170  template<> __host__ __device__ inline void copy(float &a, const short &b) { a = s2f(b); }
171  template<> __host__ __device__ inline void copy(short &a, const float &b) { a = f2i(b*fixedMaxValue<short>::value); }
172 
173  template<> __host__ __device__ inline void copy(float2 &a, const short2 &b) {
174  a.x = s2f(b.x); a.y = s2f(b.y);
175  }
176 
177  template<> __host__ __device__ inline void copy(short2 &a, const float2 &b) {
179  }
180 
181  template<> __host__ __device__ inline void copy(float4 &a, const short4 &b) {
182  a.x = s2f(b.x); a.y = s2f(b.y); a.z = s2f(b.z); a.w = s2f(b.w);
183  }
184 
185  template<> __host__ __device__ inline void copy(short4 &a, const float4 &b) {
187  }
188 
189  template<> __host__ __device__ inline void copy(float &a, const char &b) { a = c2f(b); }
190  template<> __host__ __device__ inline void copy(char &a, const float &b) { a = f2i(b*fixedMaxValue<char>::value); }
191 
192  template<> __host__ __device__ inline void copy(float2 &a, const char2 &b) {
193  a.x = c2f(b.x); a.y = c2f(b.y);
194  }
195 
196  template<> __host__ __device__ inline void copy(char2 &a, const float2 &b) {
198  }
199 
200  template<> __host__ __device__ inline void copy(float4 &a, const char4 &b) {
201  a.x = c2f(b.x); a.y = c2f(b.y); a.z = c2f(b.z); a.w = c2f(b.w);
202  }
203 
204  template<> __host__ __device__ inline void copy(char4 &a, const float4 &b) {
206  }
207 
208  // specialized variants of the copy function that assumes fixed-point scaling already done
209  template <typename T1, typename T2> __host__ __device__ inline void copy_scaled(T1 &a, const T2 &b) { copy(a, b); }
210 
211  template <> __host__ __device__ inline void copy_scaled(short4 &a, const float4 &b)
212  {
213  a.x = f2i(b.x);
214  a.y = f2i(b.y);
215  a.z = f2i(b.z);
216  a.w = f2i(b.w);
217  }
218 
219  template <> __host__ __device__ inline void copy_scaled(char4 &a, const float4 &b)
220  {
221  a.x = f2i(b.x);
222  a.y = f2i(b.y);
223  a.z = f2i(b.z);
224  a.w = f2i(b.w);
225  }
226 
227  template <> __host__ __device__ inline void copy_scaled(short2 &a, const float2 &b)
228  {
229  a.x = f2i(b.x);
230  a.y = f2i(b.y);
231  }
232 
233  template <> __host__ __device__ inline void copy_scaled(char2 &a, const float2 &b)
234  {
235  a.x = f2i(b.x);
236  a.y = f2i(b.y);
237  }
238 
239  template <> __host__ __device__ inline void copy_scaled(short &a, const float &b) { a = f2i(b); }
240 
241  template <> __host__ __device__ inline void copy_scaled(char &a, const float &b) { a = f2i(b); }
242 
248  template <typename T1, typename T2, typename T3>
249  __host__ __device__ inline void copy_and_scale(T1 &a, const T2 &b, const T3 &c)
250  {
251  copy(a, b);
252  }
253 
254  template <> __host__ __device__ inline void copy_and_scale(float4 &a, const short4 &b, const float &c)
255  {
256  a.x = s2f(b.x, c);
257  a.y = s2f(b.y, c);
258  a.z = s2f(b.z, c);
259  a.w = s2f(b.w, c);
260  }
261 
262  template <> __host__ __device__ inline void copy_and_scale(float4 &a, const char4 &b, const float &c)
263  {
264  a.x = c2f(b.x, c);
265  a.y = c2f(b.y, c);
266  a.z = c2f(b.z, c);
267  a.w = c2f(b.w, c);
268  }
269 
270  template <> __host__ __device__ inline void copy_and_scale(float2 &a, const short2 &b, const float &c)
271  {
272  a.x = s2f(b.x, c);
273  a.y = s2f(b.y, c);
274  }
275 
276  template <> __host__ __device__ inline void copy_and_scale(float2 &a, const char2 &b, const float &c)
277  {
278  a.x = c2f(b.x, c);
279  a.y = c2f(b.y, c);
280  }
281 
282  template <> __host__ __device__ inline void copy_and_scale(float &a, const short &b, const float &c)
283  {
284  a = s2f(b, c);
285  }
286 
287  template <> __host__ __device__ inline void copy_and_scale(float &a, const char &b, const float &c) { a = c2f(b, c); }
288 
292  template <bool isFixed, typename T>
293  struct Trig {
294  __device__ __host__ static T Atan2( const T &a, const T &b) { return atan2(a,b); }
295  __device__ __host__ static T Sin( const T &a ) { return sin(a); }
296  __device__ __host__ static T Cos( const T &a ) { return cos(a); }
297  __device__ __host__ static void SinCos(const T &a, T *s, T *c) { sincos(a, s, c); }
298  };
299 
303  template <>
304  struct Trig<false,float> {
305  __device__ __host__ static float Atan2( const float &a, const float &b) { return atan2f(a,b); }
306  __device__ __host__ static float Sin(const float &a)
307  {
308 #ifdef __CUDA_ARCH__
309  return __sinf(a);
310 #else
311  return sinf(a);
312 #endif
313  }
314  __device__ __host__ static float Cos(const float &a)
315  {
316 #ifdef __CUDA_ARCH__
317  return __cosf(a);
318 #else
319  return cosf(a);
320 #endif
321  }
322 
323  __device__ __host__ static void SinCos(const float &a, float *s, float *c)
324  {
325 #ifdef __CUDA_ARCH__
326  __sincosf(a, s, c);
327 #else
328  sincosf(a, s, c);
329 #endif
330  }
331  };
332 
336  template <>
337  struct Trig<true,float> {
338  __device__ __host__ static float Atan2( const float &a, const float &b) { return atan2f(a,b)/M_PI; }
339  __device__ __host__ static float Sin(const float &a)
340  {
341 #ifdef __CUDA_ARCH__
342  return __sinf(a * static_cast<float>(M_PI));
343 #else
344  return sinf(a * static_cast<float>(M_PI));
345 #endif
346  }
347  __device__ __host__ static float Cos(const float &a)
348  {
349 #ifdef __CUDA_ARCH__
350  return __cosf(a * static_cast<float>(M_PI));
351 #else
352  return cosf(a * static_cast<float>(M_PI));
353 #endif
354  }
355 
356  __device__ __host__ static void SinCos(const float &a, float *s, float *c)
357  {
358 #ifdef __CUDA_ARCH__
359  __sincosf(a * static_cast<float>(M_PI), s, c);
360 #else
361  sincosf(a * static_cast<float>(M_PI), s, c);
362 #endif
363  }
364  };
365 
366 
367  template <typename Float, int number> struct VectorType;
368 
369  // double precision
370  template <> struct VectorType<double, 1>{typedef double type; };
371  template <> struct VectorType<double, 2>{typedef double2 type; };
372  template <> struct VectorType<double, 4>{typedef double4 type; };
373 
374  // single precision
375  template <> struct VectorType<float, 1>{typedef float type; };
376  template <> struct VectorType<float, 2>{typedef float2 type; };
377  template <> struct VectorType<float, 4>{typedef float4 type; };
378 
379  // half precision
380  template <> struct VectorType<short, 1>{typedef short type; };
381  template <> struct VectorType<short, 2>{typedef short2 type; };
382  template <> struct VectorType<short, 4>{typedef short4 type; };
383 
384  // quarter precision
385  template <> struct VectorType<char, 1>{typedef char type; };
386  template <> struct VectorType<char, 2>{typedef char2 type; };
387  template <> struct VectorType<char, 4>{typedef char4 type; };
388 
389  // This trait returns the matching texture type (needed for double precision)
390  template <typename Float, int number> struct TexVectorType;
391 
392  // double precision
393  template <> struct TexVectorType<double, 1>{typedef int2 type; };
394  template <> struct TexVectorType<double, 2>{typedef int4 type; };
395 
396  // single precision
397  template <> struct TexVectorType<float, 1>{typedef float type; };
398  template <> struct TexVectorType<float, 2>{typedef float2 type; };
399  template <> struct TexVectorType<float, 4>{typedef float4 type; };
400 
401  // half precision
402  template <> struct TexVectorType<short, 1>{typedef short type; };
403  template <> struct TexVectorType<short, 2>{typedef short2 type; };
404  template <> struct TexVectorType<short, 4>{typedef short4 type; };
405 
406  // quarter precision
407  template <> struct TexVectorType<char, 1>{typedef char type; };
408  template <> struct TexVectorType<char, 2>{typedef char2 type; };
409  template <> struct TexVectorType<char, 4>{typedef char4 type; };
410 
411  template <typename VectorType>
412  __device__ __host__ inline VectorType vector_load(void *ptr, int idx) {
413 #define USE_LDG
414 #if defined(__CUDA_ARCH__) && defined(USE_LDG)
415  return __ldg(reinterpret_cast< VectorType* >(ptr) + idx);
416 #else
417  return reinterpret_cast< VectorType* >(ptr)[idx];
418 #endif
419  }
420 
421  template <typename VectorType>
422  __device__ __host__ inline void vector_store(void *ptr, int idx, const VectorType &value) {
423  reinterpret_cast< VectorType* >(ptr)[idx] = value;
424  }
425 
426  template <>
427  __device__ __host__ inline void vector_store(void *ptr, int idx, const double2 &value) {
428 #if defined(__CUDA_ARCH__)
429  store_streaming_double2(reinterpret_cast<double2*>(ptr)+idx, value.x, value.y);
430 #else
431  reinterpret_cast<double2*>(ptr)[idx] = value;
432 #endif
433  }
434 
435  template <>
436  __device__ __host__ inline void vector_store(void *ptr, int idx, const float4 &value) {
437 #if defined(__CUDA_ARCH__)
438  store_streaming_float4(reinterpret_cast<float4*>(ptr)+idx, value.x, value.y, value.z, value.w);
439 #else
440  reinterpret_cast<float4*>(ptr)[idx] = value;
441 #endif
442  }
443 
444  template <>
445  __device__ __host__ inline void vector_store(void *ptr, int idx, const float2 &value) {
446 #if defined(__CUDA_ARCH__)
447  store_streaming_float2(reinterpret_cast<float2*>(ptr)+idx, value.x, value.y);
448 #else
449  reinterpret_cast<float2*>(ptr)[idx] = value;
450 #endif
451  }
452 
453  template <>
454  __device__ __host__ inline void vector_store(void *ptr, int idx, const short4 &value) {
455 #if defined(__CUDA_ARCH__)
456  store_streaming_short4(reinterpret_cast<short4*>(ptr)+idx, value.x, value.y, value.z, value.w);
457 #else
458  reinterpret_cast<short4*>(ptr)[idx] = value;
459 #endif
460  }
461 
462  template <>
463  __device__ __host__ inline void vector_store(void *ptr, int idx, const short2 &value) {
464 #if defined(__CUDA_ARCH__)
465  store_streaming_short2(reinterpret_cast<short2*>(ptr)+idx, value.x, value.y);
466 #else
467  reinterpret_cast<short2*>(ptr)[idx] = value;
468 #endif
469  }
470 
471  // A char4 is the same size as a short2
472  template <>
473  __device__ __host__ inline void vector_store(void *ptr, int idx, const char4 &value) {
474 #if defined(__CUDA_ARCH__)
475 
476  store_streaming_short2(reinterpret_cast<short2*>(ptr)+idx, reinterpret_cast<const short2*>(&value)->x, reinterpret_cast<const short2*>(&value)->y);
477 #else
478  reinterpret_cast<char4*>(ptr)[idx] = value;
479  //reinterpret_cast<short2*>(ptr)[idx] = *reinterpret_cast<const short2*>(&value);
480 #endif
481  }
482 
483  template <>
484  __device__ __host__ inline void vector_store(void *ptr, int idx, const char2 &value) {
485 #if defined(__CUDA_ARCH__)
486  vector_store(ptr, idx, *reinterpret_cast<const short*>(&value));
487  //store_streaming_char2(reinterpret_cast<char2*>(ptr)+idx, reinterpret_cast<const char2*>(&value)->x, reinterpret_cast<const char2*>(&value)->y);
488 #else
489  reinterpret_cast<char2*>(ptr)[idx] = value;
490 #endif
491  }
492 
493  template<bool large_alloc> struct AllocType { };
494  template<> struct AllocType<true> { typedef size_t type; };
495  template<> struct AllocType<false> { typedef int type; };
496 
497 } // namespace quda
498 
499 #endif // _REGISTER_TRAITS_H
__device__ static __host__ void SinCos(const T &a, T *s, T *c)
__device__ static __host__ T Cos(const T &a)
__device__ static __host__ float Cos(const float &a)
#define errorQuda(...)
Definition: util_quda.h:121
__host__ __device__ void copy_scaled(T1 &a, const T2 &b)
__device__ static __host__ void SinCos(const float &a, float *s, float *c)
__device__ static __host__ float Sin(const float &a)
__device__ static __host__ void SinCos(const float &a, float *s, float *c)
__host__ __device__ void copy(T1 &a, const T2 &b)
vector(const double2 &a)
__host__ __device__ ValueType sin(ValueType x)
Definition: complex_quda.h:51
__device__ static __host__ float Sin(const float &a)
__device__ __host__ void vector_store(void *ptr, int idx, const VectorType &value)
__host__ __device__ ValueType atan2(ValueType x, ValueType y)
Definition: complex_quda.h:76
__device__ static __host__ float Atan2(const float &a, const float &b)
__device__ void store_streaming_double2(double2 *addr, double x, double y)
Definition: inline_ptx.h:88
__device__ void store_streaming_float4(float4 *addr, float x, float y, float z, float w)
Definition: inline_ptx.h:78
__device__ static __host__ float Atan2(const float &a, const float &b)
__shared__ float s[]
__device__ static __host__ float Cos(const float &a)
__device__ void store_streaming_float2(float2 *addr, float x, float y)
Definition: inline_ptx.h:93
__device__ static __host__ T Sin(const T &a)
__host__ __device__ float s2f(short a)
Definition: convert.h:34
__device__ __host__ VectorType vector_load(void *ptr, int idx)
__host__ __device__ ValueType cos(ValueType x)
Definition: complex_quda.h:46
__device__ void store_streaming_short2(short2 *addr, short x, short y)
Definition: inline_ptx.h:98
__device__ void store_streaming_short4(short4 *addr, short x, short y, short z, short w)
Definition: inline_ptx.h:83
__device__ __host__ int f2i(float f)
Definition: convert.h:93
__host__ __device__ float c2f(char a)
Definition: convert.h:38
__device__ static __host__ T Atan2(const T &a, const T &b)
__host__ __device__ void copy_and_scale(T1 &a, const T2 &b, const T3 &c)
Specialized variants of the copy function that include an additional scale factor. Note the scale factor is ignored unless the input type (b) is either a short or char vector.