16 template <
typename type>
inline int vecLength() {
return 0; }
42 inline __host__ __device__
float s2f(
short a,
float c)
46 inline __host__ __device__
double s2d(
short a,
double c)
52 inline __host__ __device__
float c2f(
char a,
float c)
56 inline __host__ __device__
double c2d(
char a,
double c)
61 template <
typename FloatN> __device__
inline void copyFloatN(FloatN &a,
const FloatN &b) { a = b; }
64 __device__
inline void copyFloatN(float2 &a,
const char2 &b) { a = make_float2(
c2f(b.x),
c2f(b.y)); }
65 __device__
inline void copyFloatN(float4 &a,
const char4 &b)
69 __device__
inline void copyFloatN(double2 &a,
const char2 &b) { a = make_double2(
c2d(b.x),
c2d(b.y)); }
70 __device__
inline void copyFloatN(double4 &a,
const char4 &b)
72 a = make_double4(
c2d(b.x),
c2d(b.y),
c2d(b.z),
c2d(b.w));
76 __device__
inline void copyFloatN(float2 &a,
const short2 &b) { a = make_float2(
s2f(b.x),
s2f(b.y)); }
77 __device__
inline void copyFloatN(float4 &a,
const short4 &b)
81 __device__
inline void copyFloatN(double2 &a,
const short2 &b) { a = make_double2(
s2d(b.x),
s2d(b.y)); }
82 __device__
inline void copyFloatN(double4 &a,
const short4 &b)
84 a = make_double4(
s2d(b.x),
s2d(b.y),
s2d(b.z),
s2d(b.w));
87 __device__
inline void copyFloatN(float2 &a,
const double2 &b) { a = make_float2(b.x, b.y); }
88 __device__
inline void copyFloatN(double2 &a,
const float2 &b) { a = make_double2(b.x, b.y); }
89 __device__
inline void copyFloatN(float4 &a,
const double4 &b) { a = make_float4(b.x, b.y, b.z, b.w); }
90 __device__
inline void copyFloatN(double4 &a,
const float4 &b) { a = make_double4(b.x, b.y, b.z, b.w); }
93 __device__ __host__
inline int f2i(
float f)
97 return reinterpret_cast<int &
>(f);
99 return static_cast<int>(f);
104 __device__ __host__
inline int d2i(
double d)
107 d += 6755399441055744.0;
108 return reinterpret_cast<int &
>(d);
110 return static_cast<int>(d);
115 __device__
inline void copyFloatN(short2 &a,
const float2 &b) { a = make_short2(
f2i(b.x),
f2i(b.y)); }
116 __device__
inline void copyFloatN(short4 &a,
const float4 &b)
118 a = make_short4(
f2i(b.x),
f2i(b.y),
f2i(b.z),
f2i(b.w));
120 __device__
inline void copyFloatN(short2 &a,
const double2 &b) { a = make_short2(
d2i(b.x),
d2i(b.y)); }
121 __device__
inline void copyFloatN(short4 &a,
const double4 &b)
123 a = make_short4(
d2i(b.x),
d2i(b.y),
d2i(b.z),
d2i(b.w));
126 __device__
inline void copyFloatN(char2 &a,
const float2 &b) { a = make_char2(
f2i(b.x),
f2i(b.y)); }
131 __device__
inline void copyFloatN(char2 &a,
const double2 &b) { a = make_char2(
d2i(b.x),
d2i(b.y)); }
132 __device__
inline void copyFloatN(char4 &a,
const double4 &b)
148 template <
typename OutputType,
typename InputType>
149 __device__
inline void convert(OutputType x[], InputType y[],
const int N)
153 for (
int j = 0; j < N; j++)
copyFloatN(x[j], y[j]);
159 for (
int j = 0; j < N; j++) x[j] = make_float2(y[j].x, y[j].y);
165 for (
int j = 0; j < N; j++) x[j] = make_float4(y[j].x, y[j].y, y[j].z, y[j].w);
173 for (
int j = 0; j < N; j++) x[j] = make_double4(y[2 * j].x, y[2 * j].y, y[2 * j + 1].x, y[2 * j + 1].y);
179 for (
int j = 0; j < N / 2; j++) {
180 x[2 * j] = make_double2(y[j].x, y[j].y);
181 x[2 * j + 1] = make_double2(y[j].z, y[j].w);
188 for (
int j = 0; j < N; j++) x[j] = make_float4(y[2 * j].x, y[2 * j].y, y[2 * j + 1].x, y[2 * j + 1].y);
194 for (
int j = 0; j < N / 2; j++) {
195 x[2 * j] = make_float2(y[j].x, y[j].y);
196 x[2 * j + 1] = make_float2(y[j].z, y[j].w);
203 for (
int j = 0; j < N; j++)
204 x[j] = make_short4(
f2i(y[2 * j].x),
f2i(y[2 * j].y),
f2i(y[2 * j + 1].x),
f2i(y[2 * j + 1].y));
210 for (
int j = 0; j < N / 2; j++) {
211 x[2 * j] = make_float2(y[j].x, y[j].y);
212 x[2 * j + 1] = make_float2(y[j].z, y[j].w);
219 for (
int j = 0; j < N; j++) x[j] = make_float4(y[2 * j].x, y[2 * j].y, y[2 * j + 1].x, y[2 * j + 1].y);
225 for (
int j = 0; j < N / 2; j++) {
226 x[2 * j] = make_short2(
f2i(y[j].x),
f2i(y[j].y));
227 x[2 * j + 1] = make_short2(
f2i(y[j].z),
f2i(y[j].w));
234 for (
int j = 0; j < N; j++)
235 x[j] = make_short4(
d2i(y[2 * j].x),
d2i(y[2 * j].y),
d2i(y[2 * j + 1].x),
d2i(y[2 * j + 1].y));
241 for (
int j = 0; j < N / 2; j++) {
242 x[2 * j] = make_double2(y[j].x, y[j].y);
243 x[2 * j + 1] = make_double2(y[j].z, y[j].w);
250 for (
int j = 0; j < N; j++) x[j] = make_double4(y[2 * j].x, y[2 * j].y, y[2 * j + 1].x, y[2 * j + 1].y);
256 for (
int j = 0; j < N / 2; j++) {
257 x[2 * j] = make_short2(
d2i(y[j].x),
d2i(y[j].y));
258 x[2 * j + 1] = make_short2(
d2i(y[j].z),
d2i(y[j].w));
265 for (
int j = 0; j < N; j++) x[j] = make_float4(y[2 * j].x, y[2 * j].y, y[2 * j + 1].x, y[2 * j + 1].y);
271 for (
int j = 0; j < N / 2; j++) {
272 x[2 * j] = make_double2(y[j].x, y[j].y);
273 x[2 * j + 1] = make_double2(y[j].z, y[j].w);
280 for (
int j = 0; j < N; j++) x[j] = make_double4(y[2 * j].x, y[2 * j].y, y[2 * j + 1].x, y[2 * j + 1].y);
286 for (
int j = 0; j < N / 2; j++) {
287 x[2 * j] = make_float2(y[j].x, y[j].y);
288 x[2 * j + 1] = make_float2(y[j].z, y[j].w);
int vecLength< float2 >()
int vecLength< short4 >()
int vecLength< double2 >()
__device__ __host__ int d2i(double d)
int vecLength< double4 >()
__device__ void convert< float4, double2 >(float4 x[], double2 y[], const int N)
__device__ void convert< float2, double4 >(float2 x[], double4 y[], const int N)
__host__ __device__ double s2d(short a)
int vecLength< float4 >()
__device__ void convert< double2, double4 >(double2 x[], double4 y[], const int N)
__device__ void convert< double2, float4 >(double2 x[], float4 y[], const int N)
__device__ void convert< double4, float2 >(double4 x[], float2 y[], const int N)
__device__ void convert< short2, double4 >(short2 x[], double4 y[], const int N)
__device__ void convert< short2, float4 >(short2 x[], float4 y[], const int N)
__device__ void convert< double2, short4 >(double2 x[], short4 y[], const int N)
__device__ void convert< double4, double2 >(double4 x[], double2 y[], const int N)
__device__ void copyFloatN(FloatN &a, const FloatN &b)
__device__ void convert< float2, float4 >(float2 x[], float4 y[], const int N)
__device__ void convert< short4, float2 >(short4 x[], float2 y[], const int N)
__device__ void convert< float2, short2 >(float2 x[], short2 y[], const int N)
__device__ void convert< float4, short4 >(float4 x[], short4 y[], const int N)
__device__ void convert< float4, float2 >(float4 x[], float2 y[], const int N)
__host__ __device__ float s2f(short a)
__device__ void convert< float4, short2 >(float4 x[], short2 y[], const int N)
__device__ void convert< short4, double2 >(short4 x[], double2 y[], const int N)
int vecLength< short2 >()
int vecLength< double >()
__host__ __device__ double c2d(char a)
__device__ void convert< float2, short4 >(float2 x[], short4 y[], const int N)
__device__ void convert< double4, short2 >(double4 x[], short2 y[], const int N)
__device__ __host__ int f2i(float f)
__host__ __device__ float c2f(char a)
__device__ void convert(OutputType x[], InputType y[], const int N)