1 #ifndef _REGISTER_TRAITS_H 2 #define _REGISTER_TRAITS_H 13 #include <generics/ldg.h> 43 template<
typename>
struct mapper { };
44 template<>
struct mapper<double> {
typedef double type; };
49 template<>
struct mapper<double2> {
typedef double2
type; };
50 template<>
struct mapper<float2> {
typedef float2
type; };
51 template<>
struct mapper<short2> {
typedef float2
type; };
52 template<>
struct mapper<char2> {
typedef float2
type; };
54 template<>
struct mapper<double4> {
typedef double4
type; };
55 template<>
struct mapper<float4> {
typedef float4
type; };
56 template<>
struct mapper<short4> {
typedef float4
type; };
57 template<>
struct mapper<char4> {
typedef float4
type; };
76 template<
typename>
struct vec_length {
static const int value = 0; };
77 template<>
struct vec_length<double4> {
static const int value = 4; };
78 template<>
struct vec_length<double2> {
static const int value = 2; };
79 template<>
struct vec_length<double> {
static const int value = 1; };
80 template<>
struct vec_length<float4> {
static const int value = 4; };
81 template<>
struct vec_length<float2> {
static const int value = 2; };
82 template<>
struct vec_length<float> {
static const int value = 1; };
83 template<>
struct vec_length<short4> {
static const int value = 4; };
84 template<>
struct vec_length<short2> {
static const int value = 2; };
85 template<>
struct vec_length<short> {
static const int value = 1; };
86 template<>
struct vec_length<char4> {
static const int value = 4; };
87 template<>
struct vec_length<char2> {
static const int value = 2; };
88 template<>
struct vec_length<char> {
static const int value = 1; };
90 template<
typename,
int N>
struct vector { };
92 template<>
struct vector<double, 2> {
95 vector(
const type &a) { this->a.x = a.x; this->a.y = a.y; }
96 operator type()
const {
return a; }
99 template<>
struct vector<float, 2> {
102 vector(
const double2 &a) { this->a.x = a.x; this->a.y = a.y; }
103 operator type()
const {
return a; }
106 template<>
struct vector<int, 2> {
109 vector(
const int2 &a) { this->a.x = a.x; this->a.y = a.y; }
110 operator type()
const {
return a; }
132 template<
typename T >
struct isHalf{
static const bool value =
false; };
133 template<>
struct isHalf<short>{
static const bool value =
true; };
134 template<>
struct isHalf<short2>{
static const bool value =
true; };
135 template<>
struct isHalf<short4>{
static const bool value =
true; };
138 template<
typename T >
struct isQuarter{
static const bool value =
false; };
139 template<>
struct isQuarter<char>{
static const bool value =
true; };
140 template<>
struct isQuarter<char2>{
static const bool value =
true; };
141 template<>
struct isQuarter<char4>{
static const bool value =
true; };
144 template<
typename T >
struct isFixed{
static const bool value =
false; };
145 template<>
struct isFixed<short>{
static const bool value =
true; };
146 template<>
struct isFixed<short2>{
static const bool value =
true; };
147 template<>
struct isFixed<short4>{
static const bool value =
true; };
148 template<>
struct isFixed<char>{
static const bool value =
true; };
149 template<>
struct isFixed<char2>{
static const bool value =
true; };
150 template<>
struct isFixed<char4>{
static const bool value =
true; };
152 template<
typename T1,
typename T2> __host__ __device__
inline void copy (T1 &a,
const T2 &b) { a = b; }
154 template<> __host__ __device__
inline void copy(
double &a,
const int2 &b) {
156 a = __hiloint2double(b.y, b.x);
162 template<> __host__ __device__
inline void copy(double2 &a,
const int4 &b) {
164 a.x = __hiloint2double(b.y, b.x); a.y = __hiloint2double(b.w, b.z);
170 template<> __host__ __device__
inline void copy(
float &a,
const short &b) { a =
s2f(b); }
173 template<> __host__ __device__
inline void copy(float2 &a,
const short2 &b) {
174 a.x =
s2f(b.x); a.y =
s2f(b.y);
177 template<> __host__ __device__
inline void copy(short2 &a,
const float2 &b) {
181 template<> __host__ __device__
inline void copy(float4 &a,
const short4 &b) {
182 a.x =
s2f(b.x); a.y =
s2f(b.y); a.z =
s2f(b.z); a.w =
s2f(b.w);
185 template<> __host__ __device__
inline void copy(short4 &a,
const float4 &b) {
189 template<> __host__ __device__
inline void copy(
float &a,
const char &b) { a =
c2f(b); }
192 template<> __host__ __device__
inline void copy(float2 &a,
const char2 &b) {
193 a.x =
c2f(b.x); a.y =
c2f(b.y);
196 template<> __host__ __device__
inline void copy(char2 &a,
const float2 &b) {
200 template<> __host__ __device__
inline void copy(float4 &a,
const char4 &b) {
201 a.x =
c2f(b.x); a.y =
c2f(b.y); a.z =
c2f(b.z); a.w =
c2f(b.w);
204 template<> __host__ __device__
inline void copy(char4 &a,
const float4 &b) {
209 template <
typename T1,
typename T2> __host__ __device__
inline void copy_scaled(T1 &a,
const T2 &b) {
copy(a, b); }
211 template <> __host__ __device__
inline void copy_scaled(short4 &a,
const float4 &b)
219 template <> __host__ __device__
inline void copy_scaled(char4 &a,
const float4 &b)
227 template <> __host__ __device__
inline void copy_scaled(short2 &a,
const float2 &b)
233 template <> __host__ __device__
inline void copy_scaled(char2 &a,
const float2 &b)
239 template <> __host__ __device__
inline void copy_scaled(
short &a,
const float &b) { a =
f2i(b); }
241 template <> __host__ __device__
inline void copy_scaled(
char &a,
const float &b) { a =
f2i(b); }
248 template <
typename T1,
typename T2,
typename T3>
254 template <> __host__ __device__
inline void copy_and_scale(float4 &a,
const short4 &b,
const float &c)
262 template <> __host__ __device__
inline void copy_and_scale(float4 &a,
const char4 &b,
const float &c)
270 template <> __host__ __device__
inline void copy_and_scale(float2 &a,
const short2 &b,
const float &c)
276 template <> __host__ __device__
inline void copy_and_scale(float2 &a,
const char2 &b,
const float &c)
282 template <> __host__ __device__
inline void copy_and_scale(
float &a,
const short &b,
const float &c)
287 template <> __host__ __device__
inline void copy_and_scale(
float &a,
const char &b,
const float &c) { a =
c2f(b, c); }
292 template <
bool isFixed,
typename T>
294 __device__ __host__
static T
Atan2(
const T &a,
const T &b) {
return atan2(a,b); }
295 __device__ __host__
static T
Sin(
const T &a ) {
return sin(a); }
296 __device__ __host__
static T
Cos(
const T &a ) {
return cos(a); }
297 __device__ __host__
static void SinCos(
const T &a, T *
s, T *c) { sincos(a, s, c); }
305 __device__ __host__
static float Atan2(
const float &a,
const float &b) {
return atan2f(a,b); }
306 __device__ __host__
static float Sin(
const float &a)
314 __device__ __host__
static float Cos(
const float &a)
323 __device__ __host__
static void SinCos(
const float &a,
float *
s,
float *c)
338 __device__ __host__
static float Atan2(
const float &a,
const float &b) {
return atan2f(a,b)/M_PI; }
339 __device__ __host__
static float Sin(
const float &a)
342 return __sinf(a * static_cast<float>(M_PI));
344 return sinf(a * static_cast<float>(M_PI));
347 __device__ __host__
static float Cos(
const float &a)
350 return __cosf(a * static_cast<float>(M_PI));
352 return cosf(a * static_cast<float>(M_PI));
356 __device__ __host__
static void SinCos(
const float &a,
float *
s,
float *c)
359 __sincosf(a * static_cast<float>(M_PI), s, c);
361 sincosf(a * static_cast<float>(M_PI), s, c);
411 template <
typename VectorType>
414 #if defined(__CUDA_ARCH__) && defined(USE_LDG) 415 return __ldg(reinterpret_cast< VectorType* >(ptr) + idx);
417 return reinterpret_cast< VectorType*
>(ptr)[idx];
421 template <
typename VectorType>
423 reinterpret_cast< VectorType*
>(ptr)[idx] = value;
427 __device__ __host__
inline void vector_store(
void *ptr,
int idx,
const double2 &value) {
428 #if defined(__CUDA_ARCH__) 431 reinterpret_cast<double2*
>(ptr)[idx] = value;
436 __device__ __host__
inline void vector_store(
void *ptr,
int idx,
const float4 &value) {
437 #if defined(__CUDA_ARCH__) 440 reinterpret_cast<float4*
>(ptr)[idx] = value;
445 __device__ __host__
inline void vector_store(
void *ptr,
int idx,
const float2 &value) {
446 #if defined(__CUDA_ARCH__) 449 reinterpret_cast<float2*
>(ptr)[idx] = value;
454 __device__ __host__
inline void vector_store(
void *ptr,
int idx,
const short4 &value) {
455 #if defined(__CUDA_ARCH__) 458 reinterpret_cast<short4*
>(ptr)[idx] = value;
463 __device__ __host__
inline void vector_store(
void *ptr,
int idx,
const short2 &value) {
464 #if defined(__CUDA_ARCH__) 467 reinterpret_cast<short2*
>(ptr)[idx] = value;
473 __device__ __host__
inline void vector_store(
void *ptr,
int idx,
const char4 &value) {
474 #if defined(__CUDA_ARCH__) 476 store_streaming_short2(reinterpret_cast<short2*>(ptr)+idx, reinterpret_cast<const short2*>(&value)->x, reinterpret_cast<const short2*>(&value)->y);
478 reinterpret_cast<char4*
>(ptr)[idx] = value;
484 __device__ __host__
inline void vector_store(
void *ptr,
int idx,
const char2 &value) {
485 #if defined(__CUDA_ARCH__) 486 vector_store(ptr, idx, *reinterpret_cast<const short*>(&value));
489 reinterpret_cast<char2*
>(ptr)[idx] = value;
499 #endif // _REGISTER_TRAITS_H
__device__ static __host__ void SinCos(const T &a, T *s, T *c)
__device__ static __host__ T Cos(const T &a)
__device__ static __host__ float Cos(const float &a)
__host__ __device__ void copy_scaled(T1 &a, const T2 &b)
__device__ static __host__ void SinCos(const float &a, float *s, float *c)
__device__ static __host__ float Sin(const float &a)
__device__ static __host__ void SinCos(const float &a, float *s, float *c)
__host__ __device__ void copy(T1 &a, const T2 &b)
__host__ __device__ ValueType sin(ValueType x)
__device__ static __host__ float Sin(const float &a)
__device__ __host__ void vector_store(void *ptr, int idx, const VectorType &value)
__host__ __device__ ValueType atan2(ValueType x, ValueType y)
__device__ static __host__ float Atan2(const float &a, const float &b)
__device__ void store_streaming_double2(double2 *addr, double x, double y)
__device__ void store_streaming_float4(float4 *addr, float x, float y, float z, float w)
__device__ static __host__ float Atan2(const float &a, const float &b)
__device__ static __host__ float Cos(const float &a)
__device__ void store_streaming_float2(float2 *addr, float x, float y)
__device__ static __host__ T Sin(const T &a)
__host__ __device__ float s2f(short a)
__device__ __host__ VectorType vector_load(void *ptr, int idx)
__host__ __device__ ValueType cos(ValueType x)
__device__ void store_streaming_short2(short2 *addr, short x, short y)
__device__ void store_streaming_short4(short4 *addr, short x, short y, short z, short w)
__device__ __host__ int f2i(float f)
__host__ __device__ float c2f(char a)
__device__ static __host__ T Atan2(const T &a, const T &b)
__host__ __device__ void copy_and_scale(T1 &a, const T2 &b, const T3 &c)
Specialized variants of the copy function that include an additional scale factor. Note the scale factor is ignored unless the input type (b) is either a short or char vector.