QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
convert.h
Go to the documentation of this file.
1 #ifndef _CONVERT_H
2 #define _CONVERT_H
3 
12 #include <quda_internal.h> // for MAX_SHORT
13 
14 template <typename type> int vecLength() { return 0; }
15 
16 template<> int vecLength<short>() { return 1; }
17 template<> int vecLength<float>() { return 1; }
18 template<> int vecLength<double>() { return 1; }
19 
20 template<> int vecLength<short2>() { return 2; }
21 template<> int vecLength<float2>() { return 2; }
22 template<> int vecLength<double2>() { return 2; }
23 
24 template<> int vecLength<short4>() { return 4; }
25 template<> int vecLength<float4>() { return 4; }
26 template<> int vecLength<double4>() { return 4; }
27 
28 // MAX_SHORT 32767
29 #define MAX_SHORT_INV 3.051850948e-5
30 static inline __device__ float s2f(const short &a) { return static_cast<float>(a) * MAX_SHORT_INV; }
31 static inline __device__ float s2d(const short &a) { return static_cast<double>(a) * MAX_SHORT_INV; }
32 
33 template <typename FloatN>
34 __device__ inline void copyFloatN(FloatN &a, const FloatN &b) { a = b; }
35 
36 // This is emulating the texture normalized return
37 __device__ inline void copyFloatN(float2 &a, const short2 &b) { a = make_float2(s2f(b.x), s2f(b.y)); }
38 __device__ inline void copyFloatN(float4 &a, const short4 &b) { a = make_float4(s2f(b.x), s2f(b.y), s2f(b.z), s2f(b.w)); }
39 __device__ inline void copyFloatN(double2 &a, const short2 &b) { a = make_double2(s2d(b.x), s2d(b.y)); }
40 __device__ inline void copyFloatN(double4 &a, const short4 &b) { a = make_double4(s2d(b.x), s2d(b.y), s2d(b.z), s2d(b.w)); }
41 
42 __device__ inline void copyFloatN(float2 &a, const double2 &b) { a = make_float2(b.x, b.y); }
43 __device__ inline void copyFloatN(double2 &a, const float2 &b) { a = make_double2(b.x, b.y); }
44 __device__ inline void copyFloatN(float4 &a, const double4 &b) { a = make_float4(b.x, b.y, b.z, b.w); }
45 __device__ inline void copyFloatN(double4 &a, const float4 &b) { a = make_double4(b.x, b.y, b.z, b.w); }
46 
47 /* Here we assume that the input data has already been normalized and shifted. */
48 __device__ inline void copyFloatN(short2 &a, const float2 &b) { a = make_short2(b.x, b.y); }
49 __device__ inline void copyFloatN(short4 &a, const float4 &b) { a = make_short4(b.x, b.y, b.z, b.w); }
50 __device__ inline void copyFloatN(short2 &a, const double2 &b) { a = make_short2(b.x, b.y); }
51 __device__ inline void copyFloatN(short4 &a, const double4 &b) { a = make_short4(b.x, b.y, b.z, b.w); }
52 
53 
65 template<typename OutputType, typename InputType>
66 __device__ inline void convert(OutputType x[], InputType y[], const int N) {
67  // default is one-2-one conversion, e.g., matching vector lengths and precisions
68 #pragma unroll
69  for (int j=0; j<N; j++) copyFloatN(x[j], y[j]);
70 }
71 
72 template<> __device__ inline void convert<float2,short2>(float2 x[], short2 y[], const int N) {
73 #pragma unroll
74  for (int j=0; j<N; j++) x[j] = make_float2(y[j].x, y[j].y);
75 }
76 
77 template<> __device__ inline void convert<float4,short4>(float4 x[], short4 y[], const int N) {
78 #pragma unroll
79  for (int j=0; j<N; j++) x[j] = make_float4(y[j].x, y[j].y, y[j].z, y[j].w);
80 }
81 
82 // 4 <-> 2 vector conversion
83 
84 template<> __device__ inline void convert<double4,double2>(double4 x[], double2 y[], const int N) {
85 #pragma unroll
86  for (int j=0; j<N; j++) x[j] = make_double4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
87 }
88 
89 template<> __device__ inline void convert<double2,double4>(double2 x[], double4 y[], const int N) {
90 #pragma unroll
91  for (int j=0; j<N/2; j++) {
92  x[2*j] = make_double2(y[j].x, y[j].y);
93  x[2*j+1] = make_double2(y[j].z, y[j].w);
94  }
95 }
96 
97 template<> __device__ inline void convert<float4,float2>(float4 x[], float2 y[], const int N) {
98 #pragma unroll
99  for (int j=0; j<N; j++) x[j] = make_float4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
100 }
101 
102 template<> __device__ inline void convert<float2,float4>(float2 x[], float4 y[], const int N) {
103 #pragma unroll
104  for (int j=0; j<N/2; j++) {
105  x[2*j] = make_float2(y[j].x, y[j].y);
106  x[2*j+1] = make_float2(y[j].z, y[j].w);
107  }
108 }
109 
110 template<> __device__ inline void convert<short4,float2>(short4 x[], float2 y[], const int N) {
111 #pragma unroll
112  for (int j=0; j<N; j++) x[j] = make_short4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
113 }
114 
115 template<> __device__ inline void convert<float2,short4>(float2 x[], short4 y[], const int N) {
116 #pragma unroll
117  for (int j=0; j<N/2; j++) {
118  x[2*j] = make_float2(y[j].x, y[j].y);
119  x[2*j+1] = make_float2(y[j].z, y[j].w);
120  }
121 }
122 
123 template<> __device__ inline void convert<float4,short2>(float4 x[], short2 y[], const int N) {
124 #pragma unroll
125  for (int j=0; j<N; j++) x[j] = make_float4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
126 }
127 
128 template<> __device__ inline void convert<short2,float4>(short2 x[], float4 y[], const int N) {
129 #pragma unroll
130  for (int j=0; j<N/2; j++) {
131  x[2*j] = make_short2(y[j].x, y[j].y);
132  x[2*j+1] = make_short2(y[j].z, y[j].w);
133  }
134 }
135 
136 template<> __device__ inline void convert<short4,double2>(short4 x[], double2 y[], const int N) {
137 #pragma unroll
138  for (int j=0; j<N; j++) x[j] = make_short4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
139 }
140 
141 template<> __device__ inline void convert<double2,short4>(double2 x[], short4 y[], const int N) {
142 #pragma unroll
143  for (int j=0; j<N/2; j++) {
144  x[2*j] = make_double2(y[j].x, y[j].y);
145  x[2*j+1] = make_double2(y[j].z, y[j].w);
146  }
147 }
148 
149 template<> __device__ inline void convert<double4,short2>(double4 x[], short2 y[], const int N) {
150 #pragma unroll
151  for (int j=0; j<N; j++) x[j] = make_double4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
152 }
153 
154 template<> __device__ inline void convert<short2,double4>(short2 x[], double4 y[], const int N) {
155 #pragma unroll
156  for (int j=0; j<N/2; j++) {
157  x[2*j] = make_short2(y[j].x, y[j].y);
158  x[2*j+1] = make_short2(y[j].z, y[j].w);
159  }
160 }
161 
162 template<> __device__ inline void convert<float4,double2>(float4 x[], double2 y[], const int N) {
163 #pragma unroll
164  for (int j=0; j<N; j++) x[j] = make_float4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
165 }
166 
167 template<> __device__ inline void convert<double2,float4>(double2 x[], float4 y[], const int N) {
168 #pragma unroll
169  for (int j=0; j<N/2; j++) {
170  x[2*j] = make_double2(y[j].x, y[j].y);
171  x[2*j+1] = make_double2(y[j].z, y[j].w);
172  }
173 }
174 
175 template<> __device__ inline void convert<double4,float2>(double4 x[], float2 y[], const int N) {
176 #pragma unroll
177  for (int j=0; j<N; j++) x[j] = make_double4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
178 }
179 
180 template<> __device__ inline void convert<float2,double4>(float2 x[], double4 y[], const int N) {
181 #pragma unroll
182  for (int j=0; j<N/2; j++) {
183  x[2*j] = make_float2(y[j].x, y[j].y);
184  x[2*j+1] = make_float2(y[j].z, y[j].w);
185  }
186 }
187 
188 #endif // _CONVERT_H
int vecLength< short >()
Definition: convert.h:16
int y[4]
#define MAX_SHORT_INV
Definition: convert.h:29
int vecLength< float >()
Definition: convert.h:17
int vecLength< float4 >()
Definition: convert.h:25
int vecLength< double4 >()
Definition: convert.h:26
__device__ void convert< float4, short4 >(float4 x[], short4 y[], const int N)
Definition: convert.h:77
int vecLength< double2 >()
Definition: convert.h:22
__device__ void convert< float2, short2 >(float2 x[], short2 y[], const int N)
Definition: convert.h:72
int vecLength< float2 >()
Definition: convert.h:21
__device__ void copyFloatN(FloatN &a, const FloatN &b)
Definition: convert.h:34
__device__ void convert< double4, short2 >(double4 x[], short2 y[], const int N)
Definition: convert.h:149
__device__ void convert(OutputType x[], InputType y[], const int N)
Definition: convert.h:66
__device__ void convert< short2, float4 >(short2 x[], float4 y[], const int N)
Definition: convert.h:128
__device__ void convert< float4, short2 >(float4 x[], short2 y[], const int N)
Definition: convert.h:123
__device__ void convert< float4, float2 >(float4 x[], float2 y[], const int N)
Definition: convert.h:97
__device__ void convert< float2, float4 >(float2 x[], float4 y[], const int N)
Definition: convert.h:102
__device__ void convert< double4, double2 >(double4 x[], double2 y[], const int N)
Definition: convert.h:84
__device__ void convert< float2, short4 >(float2 x[], short4 y[], const int N)
Definition: convert.h:115
int x[4]
int vecLength()
Definition: convert.h:14
__device__ void convert< double2, double4 >(double2 x[], double4 y[], const int N)
Definition: convert.h:89
__device__ void convert< double4, float2 >(double4 x[], float2 y[], const int N)
Definition: convert.h:175
__device__ void convert< double2, float4 >(double2 x[], float4 y[], const int N)
Definition: convert.h:167
__device__ void convert< float4, double2 >(float4 x[], double2 y[], const int N)
Definition: convert.h:162
__device__ void convert< double2, short4 >(double2 x[], short4 y[], const int N)
Definition: convert.h:141
int vecLength< short4 >()
Definition: convert.h:24
__device__ void convert< short4, float2 >(short4 x[], float2 y[], const int N)
Definition: convert.h:110
int vecLength< short2 >()
Definition: convert.h:20
int vecLength< double >()
Definition: convert.h:18
__device__ void convert< short4, double2 >(short4 x[], double2 y[], const int N)
Definition: convert.h:136
__device__ void convert< float2, double4 >(float2 x[], double4 y[], const int N)
Definition: convert.h:180
__device__ void convert< short2, double4 >(short2 x[], double4 y[], const int N)
Definition: convert.h:154