QUDA v0.4.0
A library for QCD on GPUs
|
00001 00009 #pragma once 00010 00011 __host__ __device__ double2 operator+(const double2& x, const double2 &y) { 00012 return make_double2(x.x + y.x, x.y + y.y); 00013 } 00014 00015 __host__ __device__ double2 operator-(const double2& x, const double2 &y) { 00016 return make_double2(x.x - y.x, x.y - y.y); 00017 } 00018 00019 __host__ __device__ float2 operator-(const float2& x, const float2 &y) { 00020 return make_float2(x.x - y.x, x.y - y.y); 00021 } 00022 00023 __host__ __device__ float4 operator-(const float4& x, const float4 &y) { 00024 return make_float4(x.x - y.x, x.y - y.y, x.z - y.z, x.w - y.w); 00025 } 00026 00027 __host__ double3 operator+(const double3& x, const double3 &y) { 00028 double3 z; 00029 z.x = x.x + y.x; z.y = x.y + y.y; z.z = x.z + y.z; 00030 return z; 00031 } 00032 00033 __device__ float4 operator*(const float a, const float4 x) { 00034 float4 y; 00035 y.x = a*x.x; 00036 y.y = a*x.y; 00037 y.z = a*x.z; 00038 y.w = a*x.w; 00039 return y; 00040 } 00041 00042 __device__ float2 operator*(const float a, const float2 x) { 00043 float2 y; 00044 y.x = a*x.x; 00045 y.y = a*x.y; 00046 return y; 00047 } 00048 00049 __device__ double2 operator*(const double a, const double2 x) { 00050 double2 y; 00051 y.x = a*x.x; 00052 y.y = a*x.y; 00053 return y; 00054 } 00055 00056 __device__ double4 operator*(const double a, const double4 x) { 00057 double4 y; 00058 y.x = a*x.x; 00059 y.y = a*x.y; 00060 y.z = a*x.z; 00061 y.w = a*x.w; 00062 return y; 00063 } 00064 00065 __device__ float2 operator+(const float2 x, const float2 y) { 00066 float2 z; 00067 z.x = x.x + y.x; 00068 z.y = x.y + y.y; 00069 return z; 00070 } 00071 00072 __device__ float4 operator+(const float4 x, const float4 y) { 00073 float4 z; 00074 z.x = x.x + y.x; 00075 z.y = x.y + y.y; 00076 z.z = x.z + y.z; 00077 z.w = x.w + y.w; 00078 return z; 00079 } 00080 00081 __device__ float4 operator+=(float4 &x, const float4 y) { 00082 x.x += y.x; 00083 x.y += y.y; 00084 x.z += y.z; 00085 x.w += y.w; 00086 return x; 00087 } 00088 00089 __device__ float2 operator+=(float2 &x, const float2 y) { 00090 x.x += y.x; 00091 x.y += y.y; 00092 return x; 00093 } 00094 00095 __host__ __device__ double2 operator+=(double2 &x, const double2 y) { 00096 x.x += y.x; 00097 x.y += y.y; 00098 return x; 00099 } 00100 00101 __host__ __device__ double3 operator+=(double3 &x, const double3 y) { 00102 x.x += y.x; 00103 x.y += y.y; 00104 x.z += y.z; 00105 return x; 00106 } 00107 00108 __device__ float4 operator-=(float4 &x, const float4 y) { 00109 x.x -= y.x; 00110 x.y -= y.y; 00111 x.z -= y.z; 00112 x.w -= y.w; 00113 return x; 00114 } 00115 00116 __device__ float2 operator-=(float2 &x, const float2 y) { 00117 x.x -= y.x; 00118 x.y -= y.y; 00119 return x; 00120 } 00121 00122 __device__ double2 operator-=(double2 &x, const double2 y) { 00123 x.x -= y.x; 00124 x.y -= y.y; 00125 return x; 00126 } 00127 00128 __device__ float2 operator*=(float2 &x, const float a) { 00129 x.x *= a; 00130 x.y *= a; 00131 return x; 00132 } 00133 00134 __device__ float4 operator*=(float4 &a, const float &b) { 00135 a.x *= b; 00136 a.y *= b; 00137 a.z *= b; 00138 a.w *= b; 00139 return a; 00140 } 00141 00142 __device__ double2 operator*=(double2 &a, const float &b) { 00143 a.x *= b; 00144 a.y *= b; 00145 return a; 00146 } 00147 00148 __device__ double4 operator*=(double4 &a, const float &b) { 00149 a.x *= b; 00150 a.y *= b; 00151 a.z *= b; 00152 a.w *= b; 00153 return a; 00154 } 00155 00156 __device__ float2 operator-(const float2 &x) { 00157 return make_float2(-x.x, -x.y); 00158 } 00159 00160 __device__ double2 operator-(const double2 &x) { 00161 return make_double2(-x.x, -x.y); 00162 } 00163 00164 00165 /* 00166 Operations to return the maximium absolute value of a FloatN vector 00167 */ 00168 00169 __forceinline__ __device__ float max_fabs(const float4 &c) { 00170 float a = fmaxf(fabsf(c.x), fabsf(c.y)); 00171 float b = fmaxf(fabsf(c.z), fabsf(c.w)); 00172 return fmaxf(a, b); 00173 }; 00174 00175 __forceinline__ __device__ float max_fabs(const float2 &b) { 00176 return fmaxf(fabsf(b.x), fabsf(b.y)); 00177 }; 00178 00179 __forceinline__ __device__ double max_fabs(const double4 &c) { 00180 double a = fmaxf(fabsf(c.x), fabsf(c.y)); 00181 double b = fmaxf(fabsf(c.z), fabsf(c.w)); 00182 return fmaxf(a, b); 00183 }; 00184 00185 __forceinline__ __device__ double max_fabs(const double2 &b) { 00186 return fmaxf(fabsf(b.x), fabsf(b.y)); 00187 }; 00188 00189 /* 00190 Precision conversion routines for vector types 00191 */ 00192 00193 __forceinline__ __device__ float2 make_FloatN(const double2 &a) { 00194 return make_float2(a.x, a.y); 00195 } 00196 00197 __forceinline__ __device__ float4 make_FloatN(const double4 &a) { 00198 return make_float4(a.x, a.y, a.z, a.w); 00199 } 00200 00201 __forceinline__ __device__ double2 make_FloatN(const float2 &a) { 00202 return make_double2(a.x, a.y); 00203 } 00204 00205 __forceinline__ __device__ double4 make_FloatN(const float4 &a) { 00206 return make_double4(a.x, a.y, a.z, a.w); 00207 } 00208 00209 __forceinline__ __device__ short4 make_shortN(const float4 &a) { 00210 return make_short4(a.x, a.y, a.z, a.w); 00211 } 00212 00213 __forceinline__ __device__ short2 make_shortN(const float2 &a) { 00214 return make_short2(a.x, a.y); 00215 } 00216 00217 __forceinline__ __device__ short4 make_shortN(const double4 &a) { 00218 return make_short4(a.x, a.y, a.z, a.w); 00219 } 00220 00221 __forceinline__ __device__ short2 make_shortN(const double2 &a) { 00222 return make_short2(a.x, a.y); 00223 } 00224