14 __device__ __host__
inline void zero(
double &
a) {
a = 0.0; }
15 __device__ __host__
inline void zero(double2 &
a) {
a.x = 0.0;
a.y = 0.0; }
16 __device__ __host__
inline void zero(double3 &
a) {
a.x = 0.0;
a.y = 0.0;
a.z = 0.0; }
17 __device__ __host__
inline void zero(double4 &
a) {
a.x = 0.0;
a.y = 0.0;
a.z = 0.0;
a.w = 0.0; }
19 __device__ __host__
inline void zero(
float &
a) {
a = 0.0; }
20 __device__ __host__
inline void zero(float2 &
a) {
a.x = 0.0;
a.y = 0.0; }
21 __device__ __host__
inline void zero(float3 &
a) {
a.x = 0.0;
a.y = 0.0;
a.z = 0.0; }
22 __device__ __host__
inline void zero(float4 &
a) {
a.x = 0.0;
a.y = 0.0;
a.z = 0.0;
a.w = 0.0; }
24 __host__ __device__
inline double2
operator+(
const double2&
x,
const double2 &
y) {
25 return make_double2(
x.x +
y.x,
x.y +
y.y);
28 __host__ __device__
inline double2
operator-(
const double2&
x,
const double2 &
y) {
29 return make_double2(
x.x -
y.x,
x.y -
y.y);
32 __host__ __device__
inline float2
operator-(
const float2&
x,
const float2 &
y) {
33 return make_float2(
x.x -
y.x,
x.y -
y.y);
36 __host__ __device__
inline float4
operator-(
const float4&
x,
const float4 &
y) {
37 return make_float4(
x.x -
y.x,
x.y -
y.y,
x.z -
y.z,
x.w -
y.w);
40 __host__ __device__
inline double3
operator+(
const double3&
x,
const double3 &
y) {
41 return make_double3(
x.x +
y.x,
x.y +
y.y,
x.z +
y.z);
44 __host__ __device__
inline double4
operator+(
const double4&
x,
const double4 &
y) {
45 return make_double4(
x.x +
y.x,
x.y +
y.y,
x.z +
y.z,
x.w +
y.w);
48 __host__ __device__
inline float4
operator*(
const float a,
const float4
x) {
57 __host__ __device__
inline float2
operator*(
const float a,
const float2
x) {
64 __host__ __device__
inline double2
operator*(
const double a,
const double2
x) {
71 __host__ __device__
inline double4
operator*(
const double a,
const double4
x) {
80 __host__ __device__
inline float2
operator+(
const float2
x,
const float2
y) {
87 __host__ __device__
inline float4
operator+(
const float4
x,
const float4
y) {
96 __host__ __device__
inline float4
operator+=(float4 &
x,
const float4
y) {
104 __host__ __device__
inline float2
operator+=(float2 &
x,
const float2
y) {
110 __host__ __device__
inline double2
operator+=(double2 &
x,
const double2
y) {
116 __host__ __device__
inline double3
operator+=(double3 &
x,
const double3
y) {
123 __host__ __device__
inline double4
operator+=(double4 &
x,
const double4
y) {
131 __host__ __device__
inline float4
operator-=(float4 &
x,
const float4
y) {
139 __host__ __device__
inline float2
operator-=(float2 &
x,
const float2
y) {
145 __host__ __device__
inline double2
operator-=(double2 &
x,
const double2
y) {
151 __host__ __device__
inline float2
operator*=(float2 &
x,
const float a) {
157 __host__ __device__
inline double2
operator*=(double2 &
x,
const float a) {
163 __host__ __device__
inline float4
operator*=(float4 &
a,
const float &
b) {
171 __host__ __device__
inline double2
operator*=(double2 &
a,
const double &
b) {
177 __host__ __device__
inline double4
operator*=(double4 &
a,
const double &
b) {
185 __host__ __device__
inline float2
operator-(
const float2 &
x) {
186 return make_float2(-
x.x, -
x.y);
189 __host__ __device__
inline double2
operator-(
const double2 &
x) {
190 return make_double2(-
x.x, -
x.y);
198 __forceinline__ __host__ __device__
float max_fabs(
const float4 &
c) {
204 __forceinline__ __host__ __device__
float max_fabs(
const float2 &
b) {
208 __forceinline__ __host__ __device__
double max_fabs(
const double4 &
c) {
214 __forceinline__ __host__ __device__
double max_fabs(
const double2 &
b) {
222 __forceinline__ __host__ __device__ float2
make_FloatN(
const double2 &
a) {
223 return make_float2(
a.x,
a.y);
226 __forceinline__ __host__ __device__ float4
make_FloatN(
const double4 &
a) {
227 return make_float4(
a.x,
a.y,
a.z,
a.w);
230 __forceinline__ __host__ __device__ double2
make_FloatN(
const float2 &
a) {
231 return make_double2(
a.x,
a.y);
234 __forceinline__ __host__ __device__ double4
make_FloatN(
const float4 &
a) {
235 return make_double4(
a.x,
a.y,
a.z,
a.w);
238 __forceinline__ __host__ __device__ short4
make_shortN(
const float4 &
a) {
239 return make_short4(
a.x,
a.y,
a.z,
a.w);
242 __forceinline__ __host__ __device__ short2
make_shortN(
const float2 &
a) {
243 return make_short2(
a.x,
a.y);
246 __forceinline__ __host__ __device__ short4
make_shortN(
const double4 &
a) {
247 return make_short4(
a.x,
a.y,
a.z,
a.w);
250 __forceinline__ __host__ __device__ short2
make_shortN(
const double2 &
a) {
251 return make_short2(
a.x,
a.y);
256 template<
typename Float2,
typename Complex>
269 inline double2
make_Float2(
const std::complex<double> &
a) {
return make_double2(
a.real(),
a.imag() ); }
271 inline double2
make_Float2(
const std::complex<float> &
a) {
return make_double2(
a.real(),
a.imag() ); }
273 inline float2
make_Float2(
const std::complex<double> &
a) {
return make_float2(
a.real(),
a.imag() ); }
275 inline float2
make_Float2(
const std::complex<float> &
a) {
return make_float2(
a.real(),
a.imag() ); }
__host__ __device__ float4 operator-=(float4 &x, const float4 y)
std::complex< double > Complex
double fmax(double, double)
__forceinline__ __host__ __device__ short4 make_shortN(const float4 &a)
__host__ __device__ float2 operator*=(float2 &x, const float a)
complex< double > make_Complex(const double2 &a)
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator-(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor subtraction operator.
float fmaxf(float, float)
__forceinline__ __host__ __device__ float2 make_FloatN(const double2 &a)
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator+(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor addition operator.
__forceinline__ __host__ __device__ float max_fabs(const float4 &c)
Float2 make_Float2(const Complex &a)
__host__ __device__ float4 operator+=(float4 &x, const float4 y)
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator*(const S &a, const ColorSpinor< Float, Nc, Ns > &x)
Compute the scalar-vector product y = a * x.
__device__ __host__ void zero(vector_type< scalar, n > &v)