14 template <
typename type>
int vecLength() {
return 0; }
29 #define MAX_SHORT_INV 3.051850948e-5
30 static inline __device__
float s2f(
const short &a) {
return static_cast<float>(a) *
MAX_SHORT_INV; }
31 static inline __device__
float s2d(
const short &a) {
return static_cast<double>(a) *
MAX_SHORT_INV; }
33 template <
typename FloatN>
34 __device__
inline void copyFloatN(FloatN &a,
const FloatN &b) { a = b; }
37 __device__
inline void copyFloatN(float2 &a,
const short2 &b) { a = make_float2(s2f(b.x), s2f(b.y)); }
38 __device__
inline void copyFloatN(float4 &a,
const short4 &b) { a = make_float4(s2f(b.x), s2f(b.y), s2f(b.z), s2f(b.w)); }
39 __device__
inline void copyFloatN(double2 &a,
const short2 &b) { a = make_double2(s2d(b.x), s2d(b.y)); }
40 __device__
inline void copyFloatN(double4 &a,
const short4 &b) { a = make_double4(s2d(b.x), s2d(b.y), s2d(b.z), s2d(b.w)); }
42 __device__
inline void copyFloatN(float2 &a,
const double2 &b) { a = make_float2(b.x, b.y); }
43 __device__
inline void copyFloatN(double2 &a,
const float2 &b) { a = make_double2(b.x, b.y); }
44 __device__
inline void copyFloatN(float4 &a,
const double4 &b) { a = make_float4(b.x, b.y, b.z, b.w); }
45 __device__
inline void copyFloatN(double4 &a,
const float4 &b) { a = make_double4(b.x, b.y, b.z, b.w); }
48 __device__
inline void copyFloatN(short2 &a,
const float2 &b) { a = make_short2(b.x, b.y); }
49 __device__
inline void copyFloatN(short4 &a,
const float4 &b) { a = make_short4(b.x, b.y, b.z, b.w); }
50 __device__
inline void copyFloatN(short2 &a,
const double2 &b) { a = make_short2(b.x, b.y); }
51 __device__
inline void copyFloatN(short4 &a,
const double4 &b) { a = make_short4(b.x, b.y, b.z, b.w); }
65 template<
typename OutputType,
typename InputType>
66 __device__
inline void convert(OutputType
x[], InputType
y[],
const int N) {
69 for (
int j=0; j<N; j++)
copyFloatN(x[j], y[j]);
74 for (
int j=0; j<N; j++)
x[j] = make_float2(
y[j].
x,
y[j].
y);
79 for (
int j=0; j<N; j++)
x[j] = make_float4(
y[j].
x,
y[j].
y, y[j].z, y[j].w);
86 for (
int j=0; j<N; j++)
x[j] = make_double4(
y[2*j].
x,
y[2*j].
y, y[2*j+1].x, y[2*j+1].y);
91 for (
int j=0; j<N/2; j++) {
92 x[2*j] = make_double2(
y[j].
x,
y[j].
y);
93 x[2*j+1] = make_double2(y[j].z, y[j].w);
99 for (
int j=0; j<N; j++)
x[j] = make_float4(
y[2*j].
x,
y[2*j].
y, y[2*j+1].x, y[2*j+1].y);
104 for (
int j=0; j<N/2; j++) {
105 x[2*j] = make_float2(
y[j].
x,
y[j].
y);
106 x[2*j+1] = make_float2(y[j].z, y[j].w);
112 for (
int j=0; j<N; j++)
x[j] = make_short4(
y[2*j].
x,
y[2*j].
y, y[2*j+1].x, y[2*j+1].y);
117 for (
int j=0; j<N/2; j++) {
118 x[2*j] = make_float2(
y[j].
x,
y[j].
y);
119 x[2*j+1] = make_float2(y[j].z, y[j].w);
125 for (
int j=0; j<N; j++)
x[j] = make_float4(
y[2*j].
x,
y[2*j].
y, y[2*j+1].x, y[2*j+1].y);
130 for (
int j=0; j<N/2; j++) {
131 x[2*j] = make_short2(
y[j].
x,
y[j].
y);
132 x[2*j+1] = make_short2(y[j].z, y[j].w);
138 for (
int j=0; j<N; j++)
x[j] = make_short4(
y[2*j].
x,
y[2*j].
y, y[2*j+1].x, y[2*j+1].y);
143 for (
int j=0; j<N/2; j++) {
144 x[2*j] = make_double2(
y[j].
x,
y[j].
y);
145 x[2*j+1] = make_double2(y[j].z, y[j].w);
151 for (
int j=0; j<N; j++)
x[j] = make_double4(
y[2*j].
x,
y[2*j].
y, y[2*j+1].x, y[2*j+1].y);
156 for (
int j=0; j<N/2; j++) {
157 x[2*j] = make_short2(
y[j].
x,
y[j].
y);
158 x[2*j+1] = make_short2(y[j].z, y[j].w);
164 for (
int j=0; j<N; j++)
x[j] = make_float4(
y[2*j].
x,
y[2*j].
y, y[2*j+1].x, y[2*j+1].y);
169 for (
int j=0; j<N/2; j++) {
170 x[2*j] = make_double2(
y[j].
x,
y[j].
y);
171 x[2*j+1] = make_double2(y[j].z, y[j].w);
177 for (
int j=0; j<N; j++)
x[j] = make_double4(
y[2*j].
x,
y[2*j].
y, y[2*j+1].x, y[2*j+1].y);
182 for (
int j=0; j<N/2; j++) {
183 x[2*j] = make_float2(
y[j].
x,
y[j].
y);
184 x[2*j+1] = make_float2(y[j].z, y[j].w);
int vecLength< float4 >()
int vecLength< double4 >()
__device__ void convert< float4, short4 >(float4 x[], short4 y[], const int N)
int vecLength< double2 >()
__device__ void convert< float2, short2 >(float2 x[], short2 y[], const int N)
int vecLength< float2 >()
__device__ void copyFloatN(FloatN &a, const FloatN &b)
__device__ void convert< double4, short2 >(double4 x[], short2 y[], const int N)
__device__ void convert(OutputType x[], InputType y[], const int N)
__device__ void convert< short2, float4 >(short2 x[], float4 y[], const int N)
__device__ void convert< float4, short2 >(float4 x[], short2 y[], const int N)
__device__ void convert< float4, float2 >(float4 x[], float2 y[], const int N)
__device__ void convert< float2, float4 >(float2 x[], float4 y[], const int N)
__device__ void convert< double4, double2 >(double4 x[], double2 y[], const int N)
__device__ void convert< float2, short4 >(float2 x[], short4 y[], const int N)
__device__ void convert< double2, double4 >(double2 x[], double4 y[], const int N)
__device__ void convert< double4, float2 >(double4 x[], float2 y[], const int N)
__device__ void convert< double2, float4 >(double2 x[], float4 y[], const int N)
__device__ void convert< float4, double2 >(float4 x[], double2 y[], const int N)
__device__ void convert< double2, short4 >(double2 x[], short4 y[], const int N)
int vecLength< short4 >()
__device__ void convert< short4, float2 >(short4 x[], float2 y[], const int N)
int vecLength< short2 >()
int vecLength< double >()
__device__ void convert< short4, double2 >(short4 x[], double2 y[], const int N)
__device__ void convert< float2, double4 >(float2 x[], double4 y[], const int N)
__device__ void convert< short2, double4 >(short2 x[], double4 y[], const int N)