QUDA  0.9.0
float_vector.h
Go to the documentation of this file.
1 #include <complex_quda.h>
2 
10 #pragma once
11 
12 namespace quda {
13 
14  __device__ __host__ inline void zero(double &a) { a = 0.0; }
15  __device__ __host__ inline void zero(double2 &a) { a.x = 0.0; a.y = 0.0; }
16  __device__ __host__ inline void zero(double3 &a) { a.x = 0.0; a.y = 0.0; a.z = 0.0; }
17  __device__ __host__ inline void zero(double4 &a) { a.x = 0.0; a.y = 0.0; a.z = 0.0; a.w = 0.0; }
18 
19  __device__ __host__ inline void zero(float &a) { a = 0.0; }
20  __device__ __host__ inline void zero(float2 &a) { a.x = 0.0; a.y = 0.0; }
21  __device__ __host__ inline void zero(float3 &a) { a.x = 0.0; a.y = 0.0; a.z = 0.0; }
22  __device__ __host__ inline void zero(float4 &a) { a.x = 0.0; a.y = 0.0; a.z = 0.0; a.w = 0.0; }
23 
24  __host__ __device__ inline double2 operator+(const double2& x, const double2 &y) {
25  return make_double2(x.x + y.x, x.y + y.y);
26  }
27 
28  __host__ __device__ inline double2 operator-(const double2& x, const double2 &y) {
29  return make_double2(x.x - y.x, x.y - y.y);
30  }
31 
32  __host__ __device__ inline float2 operator-(const float2& x, const float2 &y) {
33  return make_float2(x.x - y.x, x.y - y.y);
34  }
35 
36  __host__ __device__ inline float4 operator-(const float4& x, const float4 &y) {
37  return make_float4(x.x - y.x, x.y - y.y, x.z - y.z, x.w - y.w);
38  }
39 
40  __host__ __device__ inline double3 operator+(const double3& x, const double3 &y) {
41  return make_double3(x.x + y.x, x.y + y.y, x.z + y.z);
42  }
43 
44  __host__ __device__ inline double4 operator+(const double4& x, const double4 &y) {
45  return make_double4(x.x + y.x, x.y + y.y, x.z + y.z, x.w + y.w);
46  }
47 
48  __host__ __device__ inline float4 operator*(const float a, const float4 x) {
49  float4 y;
50  y.x = a*x.x;
51  y.y = a*x.y;
52  y.z = a*x.z;
53  y.w = a*x.w;
54  return y;
55  }
56 
57  __host__ __device__ inline float2 operator*(const float a, const float2 x) {
58  float2 y;
59  y.x = a*x.x;
60  y.y = a*x.y;
61  return y;
62  }
63 
64  __host__ __device__ inline double2 operator*(const double a, const double2 x) {
65  double2 y;
66  y.x = a*x.x;
67  y.y = a*x.y;
68  return y;
69  }
70 
71  __host__ __device__ inline double4 operator*(const double a, const double4 x) {
72  double4 y;
73  y.x = a*x.x;
74  y.y = a*x.y;
75  y.z = a*x.z;
76  y.w = a*x.w;
77  return y;
78  }
79 
80  __host__ __device__ inline float2 operator+(const float2 x, const float2 y) {
81  float2 z;
82  z.x = x.x + y.x;
83  z.y = x.y + y.y;
84  return z;
85  }
86 
87  __host__ __device__ inline float4 operator+(const float4 x, const float4 y) {
88  float4 z;
89  z.x = x.x + y.x;
90  z.y = x.y + y.y;
91  z.z = x.z + y.z;
92  z.w = x.w + y.w;
93  return z;
94  }
95 
96  __host__ __device__ inline float4 operator+=(float4 &x, const float4 y) {
97  x.x += y.x;
98  x.y += y.y;
99  x.z += y.z;
100  x.w += y.w;
101  return x;
102  }
103 
104  __host__ __device__ inline float2 operator+=(float2 &x, const float2 y) {
105  x.x += y.x;
106  x.y += y.y;
107  return x;
108  }
109 
110  __host__ __device__ inline double2 operator+=(double2 &x, const double2 y) {
111  x.x += y.x;
112  x.y += y.y;
113  return x;
114  }
115 
116  __host__ __device__ inline double3 operator+=(double3 &x, const double3 y) {
117  x.x += y.x;
118  x.y += y.y;
119  x.z += y.z;
120  return x;
121  }
122 
123  __host__ __device__ inline double4 operator+=(double4 &x, const double4 y) {
124  x.x += y.x;
125  x.y += y.y;
126  x.z += y.z;
127  x.w += y.w;
128  return x;
129  }
130 
131  __host__ __device__ inline float4 operator-=(float4 &x, const float4 y) {
132  x.x -= y.x;
133  x.y -= y.y;
134  x.z -= y.z;
135  x.w -= y.w;
136  return x;
137  }
138 
139  __host__ __device__ inline float2 operator-=(float2 &x, const float2 y) {
140  x.x -= y.x;
141  x.y -= y.y;
142  return x;
143  }
144 
145  __host__ __device__ inline double2 operator-=(double2 &x, const double2 y) {
146  x.x -= y.x;
147  x.y -= y.y;
148  return x;
149  }
150 
151  __host__ __device__ inline float2 operator*=(float2 &x, const float a) {
152  x.x *= a;
153  x.y *= a;
154  return x;
155  }
156 
157  __host__ __device__ inline double2 operator*=(double2 &x, const float a) {
158  x.x *= a;
159  x.y *= a;
160  return x;
161  }
162 
163  __host__ __device__ inline float4 operator*=(float4 &a, const float &b) {
164  a.x *= b;
165  a.y *= b;
166  a.z *= b;
167  a.w *= b;
168  return a;
169  }
170 
171  __host__ __device__ inline double2 operator*=(double2 &a, const double &b) {
172  a.x *= b;
173  a.y *= b;
174  return a;
175  }
176 
177  __host__ __device__ inline double4 operator*=(double4 &a, const double &b) {
178  a.x *= b;
179  a.y *= b;
180  a.z *= b;
181  a.w *= b;
182  return a;
183  }
184 
185  __host__ __device__ inline float2 operator-(const float2 &x) {
186  return make_float2(-x.x, -x.y);
187  }
188 
189  __host__ __device__ inline double2 operator-(const double2 &x) {
190  return make_double2(-x.x, -x.y);
191  }
192 
193 
194  /*
195  Operations to return the maximium absolute value of a FloatN vector
196  */
197 
198  __forceinline__ __host__ __device__ float max_fabs(const float4 &c) {
199  float a = fmaxf(fabsf(c.x), fabsf(c.y));
200  float b = fmaxf(fabsf(c.z), fabsf(c.w));
201  return fmaxf(a, b);
202  };
203 
204  __forceinline__ __host__ __device__ float max_fabs(const float2 &b) {
205  return fmaxf(fabsf(b.x), fabsf(b.y));
206  };
207 
208  __forceinline__ __host__ __device__ double max_fabs(const double4 &c) {
209  double a = fmax(fabs(c.x), fabs(c.y));
210  double b = fmax(fabs(c.z), fabs(c.w));
211  return fmax(a, b);
212  };
213 
214  __forceinline__ __host__ __device__ double max_fabs(const double2 &b) {
215  return fmax(fabs(b.x), fabs(b.y));
216  };
217 
218  /*
219  Precision conversion routines for vector types
220  */
221 
222  __forceinline__ __host__ __device__ float2 make_FloatN(const double2 &a) {
223  return make_float2(a.x, a.y);
224 }
225 
226  __forceinline__ __host__ __device__ float4 make_FloatN(const double4 &a) {
227  return make_float4(a.x, a.y, a.z, a.w);
228  }
229 
230  __forceinline__ __host__ __device__ double2 make_FloatN(const float2 &a) {
231  return make_double2(a.x, a.y);
232  }
233 
234  __forceinline__ __host__ __device__ double4 make_FloatN(const float4 &a) {
235  return make_double4(a.x, a.y, a.z, a.w);
236  }
237 
238  __forceinline__ __host__ __device__ short4 make_shortN(const float4 &a) {
239  return make_short4(a.x, a.y, a.z, a.w);
240  }
241 
242  __forceinline__ __host__ __device__ short2 make_shortN(const float2 &a) {
243  return make_short2(a.x, a.y);
244  }
245 
246  __forceinline__ __host__ __device__ short4 make_shortN(const double4 &a) {
247  return make_short4(a.x, a.y, a.z, a.w);
248  }
249 
250  __forceinline__ __host__ __device__ short2 make_shortN(const double2 &a) {
251  return make_short2(a.x, a.y);
252  }
253 
254 
255  /* Helper functions for converting between float2/double2 and complex */
256  template<typename Float2, typename Complex>
257  inline Float2 make_Float2(const Complex &a) { return (Float2)0; }
258 
259  template<>
260  inline double2 make_Float2(const complex<double> &a) { return make_double2( a.real(), a.imag() ); }
261  template<>
262  inline double2 make_Float2(const complex<float> &a) { return make_double2( a.real(), a.imag() ); }
263  template<>
264  inline float2 make_Float2(const complex<double> &a) { return make_float2( a.real(), a.imag() ); }
265  template<>
266  inline float2 make_Float2(const complex<float> &a) { return make_float2( a.real(), a.imag() ); }
267 
268  template<>
269  inline double2 make_Float2(const std::complex<double> &a) { return make_double2( a.real(), a.imag() ); }
270  template<>
271  inline double2 make_Float2(const std::complex<float> &a) { return make_double2( a.real(), a.imag() ); }
272  template<>
273  inline float2 make_Float2(const std::complex<double> &a) { return make_float2( a.real(), a.imag() ); }
274  template<>
275  inline float2 make_Float2(const std::complex<float> &a) { return make_float2( a.real(), a.imag() ); }
276 
277 
278  inline complex<double> make_Complex(const double2 &a) { return complex<double>(a.x, a.y); }
279  inline complex<float> make_Complex(const float2 &a) { return complex<float>(a.x, a.y); }
280 
281  template<typename T> struct RealType {};
282  template<> struct RealType<double> { typedef double type; };
283  template<> struct RealType<double2> { typedef double type; };
284  template<> struct RealType<complex<double> > { typedef double type; };
285  template<> struct RealType<float> { typedef float type; };
286  template<> struct RealType<float2> { typedef float type; };
287  template<> struct RealType<complex<float> > { typedef float type; };
288  template<> struct RealType<float4> { typedef float type; };
289  template<> struct RealType<short> { typedef short type; };
290  template<> struct RealType<short2> { typedef short type; };
291  template<> struct RealType<complex<short> > { typedef short type; };
292  template<> struct RealType<short4> { typedef short type; };
293 
294 }
__host__ __device__ float4 operator-=(float4 &x, const float4 y)
Definition: float_vector.h:131
float fabsf(float)
std::complex< double > Complex
Definition: eig_variables.h:13
double fmax(double, double)
__forceinline__ __host__ __device__ short4 make_shortN(const float4 &a)
Definition: float_vector.h:238
__host__ __device__ float2 operator*=(float2 &x, const float a)
Definition: float_vector.h:151
#define b
complex< double > make_Complex(const double2 &a)
Definition: float_vector.h:278
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator-(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor subtraction operator.
Definition: color_spinor.h:907
float fmaxf(float, float)
__forceinline__ __host__ __device__ float2 make_FloatN(const double2 &a)
Definition: float_vector.h:222
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator+(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor addition operator.
Definition: color_spinor.h:885
double fabs(double)
__forceinline__ __host__ __device__ float max_fabs(const float4 &c)
Definition: float_vector.h:198
const void * c
Float2 make_Float2(const Complex &a)
Definition: float_vector.h:257
__host__ __device__ float4 operator+=(float4 &x, const float4 y)
Definition: float_vector.h:96
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator*(const S &a, const ColorSpinor< Float, Nc, Ns > &x)
Compute the scalar-vector product y = a * x.
Definition: color_spinor.h:929
#define a
__device__ __host__ void zero(vector_type< scalar, n > &v)
Definition: cub_helper.cuh:82