QUDA: quda/include/convert.h Source File

QUDA v0.4.0
A library for QCD on GPUs
00001 
00010 #pragma once
00011 
00012 template <typename type> int vecLength() { return 0; }
00013 
00014 template<> int vecLength<short>() { return 1; }
00015 template<> int vecLength<float>() { return 1; }
00016 template<> int vecLength<double>() { return 1; }
00017 
00018 template<> int vecLength<short2>() { return 2; }
00019 template<> int vecLength<float2>() { return 2; }
00020 template<> int vecLength<double2>() { return 2; }
00021 
00022 template<> int vecLength<short4>() { return 4; }
00023 template<> int vecLength<float4>() { return 4; }
00024 template<> int vecLength<double4>() { return 4; }
00025 
00026 __device__ inline void copyFloatN(float2 &a, const float2 &b) { a = b; }
00027 __device__ inline void copyFloatN(double2 &a, const double2 &b) { a = make_double2(b.x, b.y); }
00028 
00029 __device__ inline void copyFloatN(float4 &a, const float4 &b) { a = make_float4(b.x, b.y, b.z, b.w); }
00030 __device__ inline void copyFloatN(double4 &a, const double4 &b) { a = make_double4(b.x, b.y, b.z, b.w); }
00031 
00032 __device__ inline void copyFloatN(float2 &a, const double2 &b) { a = make_float2(b.x, b.y); }
00033 __device__ inline void copyFloatN(double2 &a, const float2 &b) { a = make_double2(b.x, b.y); }
00034 __device__ inline void copyFloatN(float4 &a, const double4 &b) { a = make_float4(b.x, b.y, b.z, b.w); }
00035 __device__ inline void copyFloatN(double4 &a, const float4 &b) { a = make_double4(b.x, b.y, b.z, b.w); }
00036 
00048 template<typename OutputType, typename InputType>
00049 __device__ inline void convert(OutputType x[], InputType y[], const int N) {
00050   // default is one-2-one conversion, e.g., matching vector lengths and precisions
00051 #pragma unroll
00052   for (int j=0; j<N; j++) copyFloatN(x[j], y[j]);
00053 }
00054 
00055 template<> __device__ inline void convert<float2,short2>(float2 x[], short2 y[], const int N) {
00056 #pragma unroll
00057   for (int j=0; j<N; j++) x[j] = make_float2(y[j].x, y[j].y);
00058 }
00059 
00060 template<> __device__ inline void convert<float4,short4>(float4 x[], short4 y[], const int N) {
00061 #pragma unroll
00062   for (int j=0; j<N; j++) x[j] = make_float4(y[j].x, y[j].y, y[j].z, y[j].w);
00063 }
00064 
00065 // 4 <-> 2 vector conversion
00066 
00067 template<> __device__ inline void convert<double4,double2>(double4 x[], double2 y[], const int N) {
00068 #pragma unroll
00069   for (int j=0; j<N; j++) x[j] = make_double4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
00070 }
00071 
00072 template<> __device__ inline void convert<double2,double4>(double2 x[], double4 y[], const int N) {
00073 #pragma unroll
00074   for (int j=0; j<N/2; j++) {
00075     x[2*j] = make_double2(y[j].x, y[j].y);
00076     x[2*j+1] = make_double2(y[j].z, y[j].w);
00077   }
00078 }
00079 
00080 template<> __device__ inline void convert<float4,float2>(float4 x[], float2 y[], const int N) {
00081 #pragma unroll
00082   for (int j=0; j<N; j++) x[j] = make_float4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
00083 }
00084 
00085 template<> __device__ inline void convert<float2,float4>(float2 x[], float4 y[], const int N) {
00086 #pragma unroll
00087   for (int j=0; j<N/2; j++) {
00088     x[2*j] = make_float2(y[j].x, y[j].y);
00089     x[2*j+1] = make_float2(y[j].z, y[j].w);
00090   }
00091 }
00092 
00093 template<> __device__ inline void convert<short4,float2>(short4 x[], float2 y[], const int N) {
00094 #pragma unroll
00095   for (int j=0; j<N; j++) x[j] = make_short4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
00096 }
00097 
00098 template<> __device__ inline void convert<float2,short4>(float2 x[], short4 y[], const int N) {
00099 #pragma unroll
00100   for (int j=0; j<N/2; j++) {
00101     x[2*j] = make_float2(y[j].x, y[j].y);
00102     x[2*j+1] = make_float2(y[j].z, y[j].w);
00103   }
00104 }
00105 
00106 template<> __device__ inline void convert<float4,short2>(float4 x[], short2 y[], const int N) {
00107 #pragma unroll
00108   for (int j=0; j<N; j++) x[j] = make_float4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
00109 }
00110 
00111 template<> __device__ inline void convert<short2,float4>(short2 x[], float4 y[], const int N) {
00112 #pragma unroll
00113   for (int j=0; j<N/2; j++) {
00114     x[2*j] = make_short2(y[j].x, y[j].y);
00115     x[2*j+1] = make_short2(y[j].z, y[j].w);
00116   }
00117 }
00118 
00119 template<> __device__ inline void convert<short4,double2>(short4 x[], double2 y[], const int N) {
00120 #pragma unroll
00121   for (int j=0; j<N; j++) x[j] = make_short4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
00122 }
00123 
00124 template<> __device__ inline void convert<double2,short4>(double2 x[], short4 y[], const int N) {
00125 #pragma unroll
00126   for (int j=0; j<N/2; j++) {
00127     x[2*j] = make_double2(y[j].x, y[j].y);
00128     x[2*j+1] = make_double2(y[j].z, y[j].w);
00129   }
00130 }
00131 
00132 template<> __device__ inline void convert<double4,short2>(double4 x[], short2 y[], const int N) {
00133 #pragma unroll
00134   for (int j=0; j<N; j++) x[j] = make_double4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
00135 }
00136 
00137 template<> __device__ inline void convert<short2,double4>(short2 x[], double4 y[], const int N) {
00138 #pragma unroll
00139   for (int j=0; j<N/2; j++) {
00140     x[2*j] = make_short2(y[j].x, y[j].y);
00141     x[2*j+1] = make_short2(y[j].z, y[j].w);
00142   }
00143 }
00144 
00145 template<> __device__ inline void convert<float4,double2>(float4 x[], double2 y[], const int N) {
00146 #pragma unroll
00147   for (int j=0; j<N; j++) x[j] = make_float4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
00148 }
00149 
00150 template<> __device__ inline void convert<double2,float4>(double2 x[], float4 y[], const int N) {
00151 #pragma unroll
00152   for (int j=0; j<N/2; j++) {
00153     x[2*j] = make_double2(y[j].x, y[j].y);
00154     x[2*j+1] = make_double2(y[j].z, y[j].w);
00155   }
00156 }
00157 
00158 template<> __device__ inline void convert<double4,float2>(double4 x[], float2 y[], const int N) {
00159 #pragma unroll
00160   for (int j=0; j<N; j++) x[j] = make_double4(y[2*j].x, y[2*j].y, y[2*j+1].x, y[2*j+1].y);
00161 }
00162 
00163 template<> __device__ inline void convert<float2,double4>(float2 x[], double4 y[], const int N) {
00164 #pragma unroll
00165   for (int j=0; j<N/2; j++) {
00166     x[2*j] = make_float2(y[j].x, y[j].y);
00167     x[2*j+1] = make_float2(y[j].z, y[j].w);
00168   }
00169 }