13 template <
typename type>
inline int vecLength() {
return 0; }
28 #define MAX_SHORT_INV 3.051850948e-5 29 static inline __device__
float s2f(
const short &
a) {
return static_cast<float>(
a) *
MAX_SHORT_INV; }
30 static inline __device__
double s2d(
const short &
a) {
return static_cast<double>(
a) *
MAX_SHORT_INV; }
32 template <
typename FloatN>
41 __device__
inline void copyFloatN(float2 &
a,
const double2 &
b) {
a = make_float2(
b.x,
b.y); }
42 __device__
inline void copyFloatN(double2 &
a,
const float2 &
b) {
a = make_double2(
b.x,
b.y); }
43 __device__
inline void copyFloatN(float4 &
a,
const double4 &
b) {
a = make_float4(
b.x,
b.y,
b.z,
b.w); }
44 __device__
inline void copyFloatN(double4 &
a,
const float4 &
b) {
a = make_double4(
b.x,
b.y,
b.z,
b.w); }
47 __device__
inline int f2i(
float f) {
f += 12582912.0f;
return reinterpret_cast<int&
>(
f); }
50 __device__
inline int d2i(
double d) {
d += 6755399441055744.0;
return reinterpret_cast<int&
>(
d); }
70 template<
typename OutputType,
typename InputType>
71 __device__
inline void convert(OutputType
x[], InputType
y[],
const int N) {
79 for (
int j=0; j<N; j++)
x[j] = make_float2(
y[j].
x,
y[j].
y);
84 for (
int j=0; j<N; j++)
x[j] = make_float4(
y[j].
x,
y[j].
y,
y[j].
z,
y[j].
w);
91 for (
int j=0; j<N; j++)
x[j] = make_double4(
y[2*j].
x,
y[2*j].
y,
y[2*j+1].
x,
y[2*j+1].
y);
96 for (
int j=0; j<N/2; j++) {
97 x[2*j] = make_double2(
y[j].
x,
y[j].
y);
98 x[2*j+1] = make_double2(
y[j].
z,
y[j].
w);
104 for (
int j=0; j<N; j++)
x[j] = make_float4(
y[2*j].
x,
y[2*j].
y,
y[2*j+1].
x,
y[2*j+1].
y);
109 for (
int j=0; j<N/2; j++) {
110 x[2*j] = make_float2(
y[j].
x,
y[j].
y);
111 x[2*j+1] = make_float2(
y[j].
z,
y[j].
w);
117 for (
int j=0; j<N; j++)
x[j] = make_short4(
f2i(
y[2*j].
x),
f2i(
y[2*j].
y),
f2i(
y[2*j+1].
x),
f2i(
y[2*j+1].
y));
122 for (
int j=0; j<N/2; j++) {
123 x[2*j] = make_float2(
y[j].
x,
y[j].
y);
124 x[2*j+1] = make_float2(
y[j].
z,
y[j].
w);
130 for (
int j=0; j<N; j++)
x[j] = make_float4(
y[2*j].
x,
y[2*j].
y,
y[2*j+1].
x,
y[2*j+1].
y);
135 for (
int j=0; j<N/2; j++) {
143 for (
int j=0; j<N; j++)
x[j] = make_short4(
d2i(
y[2*j].
x),
d2i(
y[2*j].
y),
d2i(
y[2*j+1].
x),
d2i(
y[2*j+1].
y));
148 for (
int j=0; j<N/2; j++) {
149 x[2*j] = make_double2(
y[j].
x,
y[j].
y);
150 x[2*j+1] = make_double2(
y[j].
z,
y[j].
w);
156 for (
int j=0; j<N; j++)
x[j] = make_double4(
y[2*j].
x,
y[2*j].
y,
y[2*j+1].
x,
y[2*j+1].
y);
161 for (
int j=0; j<N/2; j++) {
169 for (
int j=0; j<N; j++)
x[j] = make_float4(
y[2*j].
x,
y[2*j].
y,
y[2*j+1].
x,
y[2*j+1].
y);
174 for (
int j=0; j<N/2; j++) {
175 x[2*j] = make_double2(
y[j].
x,
y[j].
y);
176 x[2*j+1] = make_double2(
y[j].
z,
y[j].
w);
182 for (
int j=0; j<N; j++)
x[j] = make_double4(
y[2*j].
x,
y[2*j].
y,
y[2*j+1].
x,
y[2*j+1].
y);
187 for (
int j=0; j<N/2; j++) {
188 x[2*j] = make_float2(
y[j].
x,
y[j].
y);
189 x[2*j+1] = make_float2(
y[j].
z,
y[j].
w);
__device__ int f2i(float f)
int vecLength< float4 >()
int vecLength< double4 >()
__device__ void convert< float4, short4 >(float4 x[], short4 y[], const int N)
static __device__ double s2d(const short &a)
int vecLength< double2 >()
__device__ void convert< float2, short2 >(float2 x[], short2 y[], const int N)
__device__ int d2i(double d)
int vecLength< float2 >()
__device__ void copyFloatN(FloatN &a, const FloatN &b)
__device__ void convert< double4, short2 >(double4 x[], short2 y[], const int N)
__device__ void convert(OutputType x[], InputType y[], const int N)
__device__ void convert< short2, float4 >(short2 x[], float4 y[], const int N)
__device__ void convert< float4, short2 >(float4 x[], short2 y[], const int N)
static __device__ float s2f(const short &a)
__device__ void convert< float4, float2 >(float4 x[], float2 y[], const int N)
__device__ void convert< float2, float4 >(float2 x[], float4 y[], const int N)
int int int enum cudaChannelFormatKind f
__device__ void convert< double4, double2 >(double4 x[], double2 y[], const int N)
__device__ void convert< float2, short4 >(float2 x[], short4 y[], const int N)
__device__ void convert< double2, double4 >(double2 x[], double4 y[], const int N)
__device__ void convert< double4, float2 >(double4 x[], float2 y[], const int N)
__device__ void convert< double2, float4 >(double2 x[], float4 y[], const int N)
__device__ void convert< float4, double2 >(float4 x[], double2 y[], const int N)
__device__ void convert< double2, short4 >(double2 x[], short4 y[], const int N)
int vecLength< short4 >()
__device__ void convert< short4, float2 >(short4 x[], float2 y[], const int N)
int vecLength< short2 >()
int vecLength< double >()
static __inline__ size_t size_t d
__device__ void convert< short4, double2 >(short4 x[], double2 y[], const int N)
__device__ void convert< float2, double4 >(float2 x[], double4 y[], const int N)
__device__ void convert< short2, double4 >(short2 x[], double4 y[], const int N)