QUDA: quda/include/float_vector.h Source File

QUDA v0.4.0
A library for QCD on GPUs
00001 
00009 #pragma once
00010 
00011 __host__ __device__ double2 operator+(const double2& x, const double2 &y) {
00012   return make_double2(x.x + y.x, x.y + y.y);
00013 }
00014 
00015 __host__ __device__ double2 operator-(const double2& x, const double2 &y) {
00016   return make_double2(x.x - y.x, x.y - y.y);
00017 }
00018 
00019 __host__ __device__ float2 operator-(const float2& x, const float2 &y) {
00020   return make_float2(x.x - y.x, x.y - y.y);
00021 }
00022 
00023 __host__ __device__ float4 operator-(const float4& x, const float4 &y) {
00024   return make_float4(x.x - y.x, x.y - y.y, x.z - y.z, x.w - y.w);
00025 }
00026 
00027 __host__ double3 operator+(const double3& x, const double3 &y) {
00028   double3 z;
00029   z.x = x.x + y.x; z.y = x.y + y.y; z.z = x.z + y.z;
00030   return z;
00031 }
00032 
00033 __device__ float4 operator*(const float a, const float4 x) {
00034   float4 y;
00035   y.x = a*x.x;
00036   y.y = a*x.y;
00037   y.z = a*x.z;
00038   y.w = a*x.w;
00039   return y;
00040 }
00041 
00042 __device__ float2 operator*(const float a, const float2 x) {
00043   float2 y;
00044   y.x = a*x.x;
00045   y.y = a*x.y;
00046   return y;
00047 }
00048 
00049 __device__ double2 operator*(const double a, const double2 x) {
00050   double2 y;
00051   y.x = a*x.x;
00052   y.y = a*x.y;
00053   return y;
00054 }
00055 
00056 __device__ double4 operator*(const double a, const double4 x) {
00057   double4 y;
00058   y.x = a*x.x;
00059   y.y = a*x.y;
00060   y.z = a*x.z;
00061   y.w = a*x.w;
00062   return y;
00063 }
00064 
00065 __device__ float2 operator+(const float2 x, const float2 y) {
00066   float2 z;
00067   z.x = x.x + y.x;
00068   z.y = x.y + y.y;
00069   return z;
00070 }
00071 
00072 __device__ float4 operator+(const float4 x, const float4 y) {
00073   float4 z;
00074   z.x = x.x + y.x;
00075   z.y = x.y + y.y;
00076   z.z = x.z + y.z;
00077   z.w = x.w + y.w;
00078   return z;
00079 }
00080 
00081 __device__ float4 operator+=(float4 &x, const float4 y) {
00082   x.x += y.x;
00083   x.y += y.y;
00084   x.z += y.z;
00085   x.w += y.w;
00086   return x;
00087 }
00088 
00089 __device__ float2 operator+=(float2 &x, const float2 y) {
00090   x.x += y.x;
00091   x.y += y.y;
00092   return x;
00093 }
00094 
00095 __host__ __device__ double2 operator+=(double2 &x, const double2 y) {
00096   x.x += y.x;
00097   x.y += y.y;
00098   return x;
00099 }
00100 
00101 __host__ __device__ double3 operator+=(double3 &x, const double3 y) {
00102   x.x += y.x;
00103   x.y += y.y;
00104   x.z += y.z;
00105   return x;
00106 }
00107 
00108 __device__ float4 operator-=(float4 &x, const float4 y) {
00109   x.x -= y.x;
00110   x.y -= y.y;
00111   x.z -= y.z;
00112   x.w -= y.w;
00113   return x;
00114 }
00115 
00116 __device__ float2 operator-=(float2 &x, const float2 y) {
00117   x.x -= y.x;
00118   x.y -= y.y;
00119   return x;
00120 }
00121 
00122 __device__ double2 operator-=(double2 &x, const double2 y) {
00123   x.x -= y.x;
00124   x.y -= y.y;
00125   return x;
00126 }
00127 
00128 __device__ float2 operator*=(float2 &x, const float a) {
00129   x.x *= a;
00130   x.y *= a;
00131   return x;
00132 }
00133 
00134 __device__ float4 operator*=(float4 &a, const float &b) {
00135   a.x *= b;
00136   a.y *= b;
00137   a.z *= b;
00138   a.w *= b;
00139   return a;
00140 }
00141 
00142 __device__ double2 operator*=(double2 &a, const float &b) {
00143   a.x *= b;
00144   a.y *= b;
00145   return a;
00146 }
00147 
00148 __device__ double4 operator*=(double4 &a, const float &b) {
00149   a.x *= b;
00150   a.y *= b;
00151   a.z *= b;
00152   a.w *= b;
00153   return a;
00154 }
00155 
00156 __device__ float2 operator-(const float2 &x) {
00157   return make_float2(-x.x, -x.y);
00158 }
00159 
00160 __device__ double2 operator-(const double2 &x) {
00161   return make_double2(-x.x, -x.y);
00162 }
00163 
00164 
00165 /*
00166   Operations to return the maximium absolute value of a FloatN vector
00167  */
00168 
00169 __forceinline__ __device__ float max_fabs(const float4 &c) {
00170   float a = fmaxf(fabsf(c.x), fabsf(c.y));
00171   float b = fmaxf(fabsf(c.z), fabsf(c.w));
00172   return fmaxf(a, b);
00173 };
00174 
00175 __forceinline__ __device__ float max_fabs(const float2 &b) {
00176   return fmaxf(fabsf(b.x), fabsf(b.y));
00177 };
00178 
00179 __forceinline__ __device__ double max_fabs(const double4 &c) {
00180   double a = fmaxf(fabsf(c.x), fabsf(c.y));
00181   double b = fmaxf(fabsf(c.z), fabsf(c.w));
00182   return fmaxf(a, b);
00183 };
00184 
00185 __forceinline__ __device__ double max_fabs(const double2 &b) {
00186   return fmaxf(fabsf(b.x), fabsf(b.y));
00187 };
00188 
00189 /*
00190   Precision conversion routines for vector types
00191  */
00192 
00193 __forceinline__ __device__ float2 make_FloatN(const double2 &a) {
00194   return make_float2(a.x, a.y);
00195 }
00196 
00197 __forceinline__ __device__ float4 make_FloatN(const double4 &a) {
00198   return make_float4(a.x, a.y, a.z, a.w);
00199 }
00200 
00201 __forceinline__ __device__ double2 make_FloatN(const float2 &a) {
00202   return make_double2(a.x, a.y);
00203 }
00204 
00205 __forceinline__ __device__ double4 make_FloatN(const float4 &a) {
00206   return make_double4(a.x, a.y, a.z, a.w);
00207 }
00208 
00209 __forceinline__ __device__ short4 make_shortN(const float4 &a) {
00210   return make_short4(a.x, a.y, a.z, a.w);
00211 }
00212 
00213 __forceinline__ __device__ short2 make_shortN(const float2 &a) {
00214   return make_short2(a.x, a.y);
00215 }
00216 
00217 __forceinline__ __device__ short4 make_shortN(const double4 &a) {
00218   return make_short4(a.x, a.y, a.z, a.w);
00219 }
00220 
00221 __forceinline__ __device__ short2 make_shortN(const double2 &a) {
00222   return make_short2(a.x, a.y);
00223 }
00224