QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
float_vector.h
Go to the documentation of this file.
1 #include <complex_quda.h>
2 
10 #pragma once
11 
12 namespace quda {
13 
14  __device__ __host__ inline void zero(double &a) { a = 0.0; }
15  __device__ __host__ inline void zero(double2 &a) { a.x = 0.0; a.y = 0.0; }
16  __device__ __host__ inline void zero(double3 &a) { a.x = 0.0; a.y = 0.0; a.z = 0.0; }
17  __device__ __host__ inline void zero(double4 &a) { a.x = 0.0; a.y = 0.0; a.z = 0.0; a.w = 0.0; }
18 
19  __device__ __host__ inline void zero(float &a) { a = 0.0; }
20  __device__ __host__ inline void zero(float2 &a) { a.x = 0.0; a.y = 0.0; }
21  __device__ __host__ inline void zero(float3 &a) { a.x = 0.0; a.y = 0.0; a.z = 0.0; }
22  __device__ __host__ inline void zero(float4 &a) { a.x = 0.0; a.y = 0.0; a.z = 0.0; a.w = 0.0; }
23 
24  __host__ __device__ inline double2 operator+(const double2& x, const double2 &y) {
25  return make_double2(x.x + y.x, x.y + y.y);
26  }
27 
28  __host__ __device__ inline double2 operator-(const double2& x, const double2 &y) {
29  return make_double2(x.x - y.x, x.y - y.y);
30  }
31 
32  __host__ __device__ inline float2 operator-(const float2& x, const float2 &y) {
33  return make_float2(x.x - y.x, x.y - y.y);
34  }
35 
36  __host__ __device__ inline float4 operator-(const float4& x, const float4 &y) {
37  return make_float4(x.x - y.x, x.y - y.y, x.z - y.z, x.w - y.w);
38  }
39 
40  __host__ __device__ inline double3 operator+(const double3& x, const double3 &y) {
41  return make_double3(x.x + y.x, x.y + y.y, x.z + y.z);
42  }
43 
44  __host__ __device__ inline double4 operator+(const double4& x, const double4 &y) {
45  return make_double4(x.x + y.x, x.y + y.y, x.z + y.z, x.w + y.w);
46  }
47 
48  __host__ __device__ inline float4 operator*(const float a, const float4 x) {
49  float4 y;
50  y.x = a*x.x;
51  y.y = a*x.y;
52  y.z = a*x.z;
53  y.w = a*x.w;
54  return y;
55  }
56 
57  __host__ __device__ inline float2 operator*(const float a, const float2 x) {
58  float2 y;
59  y.x = a*x.x;
60  y.y = a*x.y;
61  return y;
62  }
63 
64  __host__ __device__ inline double2 operator*(const double a, const double2 x) {
65  double2 y;
66  y.x = a*x.x;
67  y.y = a*x.y;
68  return y;
69  }
70 
71  __host__ __device__ inline double4 operator*(const double a, const double4 x) {
72  double4 y;
73  y.x = a*x.x;
74  y.y = a*x.y;
75  y.z = a*x.z;
76  y.w = a*x.w;
77  return y;
78  }
79 
80  __host__ __device__ inline float2 operator+(const float2 x, const float2 y) {
81  float2 z;
82  z.x = x.x + y.x;
83  z.y = x.y + y.y;
84  return z;
85  }
86 
87  __host__ __device__ inline float4 operator+(const float4 x, const float4 y) {
88  float4 z;
89  z.x = x.x + y.x;
90  z.y = x.y + y.y;
91  z.z = x.z + y.z;
92  z.w = x.w + y.w;
93  return z;
94  }
95 
96  __host__ __device__ inline float4 operator+=(float4 &x, const float4 y) {
97  x.x += y.x;
98  x.y += y.y;
99  x.z += y.z;
100  x.w += y.w;
101  return x;
102  }
103 
104  __host__ __device__ inline float2 operator+=(float2 &x, const float2 y) {
105  x.x += y.x;
106  x.y += y.y;
107  return x;
108  }
109 
110  __host__ __device__ inline double2 operator+=(double2 &x, const double2 y) {
111  x.x += y.x;
112  x.y += y.y;
113  return x;
114  }
115 
116  __host__ __device__ inline double3 operator+=(double3 &x, const double3 y) {
117  x.x += y.x;
118  x.y += y.y;
119  x.z += y.z;
120  return x;
121  }
122 
123  __host__ __device__ inline double4 operator+=(double4 &x, const double4 y) {
124  x.x += y.x;
125  x.y += y.y;
126  x.z += y.z;
127  x.w += y.w;
128  return x;
129  }
130 
131  __host__ __device__ inline float4 operator-=(float4 &x, const float4 y) {
132  x.x -= y.x;
133  x.y -= y.y;
134  x.z -= y.z;
135  x.w -= y.w;
136  return x;
137  }
138 
139  __host__ __device__ inline float2 operator-=(float2 &x, const float2 y) {
140  x.x -= y.x;
141  x.y -= y.y;
142  return x;
143  }
144 
145  __host__ __device__ inline double2 operator-=(double2 &x, const double2 y) {
146  x.x -= y.x;
147  x.y -= y.y;
148  return x;
149  }
150 
151  __host__ __device__ inline float2 operator*=(float2 &x, const float a) {
152  x.x *= a;
153  x.y *= a;
154  return x;
155  }
156 
157  __host__ __device__ inline double2 operator*=(double2 &x, const float a) {
158  x.x *= a;
159  x.y *= a;
160  return x;
161  }
162 
163  __host__ __device__ inline float4 operator*=(float4 &a, const float &b) {
164  a.x *= b;
165  a.y *= b;
166  a.z *= b;
167  a.w *= b;
168  return a;
169  }
170 
171  __host__ __device__ inline double2 operator*=(double2 &a, const double &b) {
172  a.x *= b;
173  a.y *= b;
174  return a;
175  }
176 
177  __host__ __device__ inline double4 operator*=(double4 &a, const double &b) {
178  a.x *= b;
179  a.y *= b;
180  a.z *= b;
181  a.w *= b;
182  return a;
183  }
184 
185  __host__ __device__ inline float2 operator-(const float2 &x) {
186  return make_float2(-x.x, -x.y);
187  }
188 
189  __host__ __device__ inline double2 operator-(const double2 &x) {
190  return make_double2(-x.x, -x.y);
191  }
192 
193 
194  /*
195  Operations to return the maximium absolute value of a FloatN vector
196  */
197 
198  __forceinline__ __host__ __device__ float max_fabs(const float4 &c) {
199  float a = fmaxf(fabsf(c.x), fabsf(c.y));
200  float b = fmaxf(fabsf(c.z), fabsf(c.w));
201  return fmaxf(a, b);
202  };
203 
204  __forceinline__ __host__ __device__ float max_fabs(const float2 &b) {
205  return fmaxf(fabsf(b.x), fabsf(b.y));
206  };
207 
208  __forceinline__ __host__ __device__ double max_fabs(const double4 &c) {
209  double a = fmax(fabs(c.x), fabs(c.y));
210  double b = fmax(fabs(c.z), fabs(c.w));
211  return fmax(a, b);
212  };
213 
214  __forceinline__ __host__ __device__ double max_fabs(const double2 &b) {
215  return fmax(fabs(b.x), fabs(b.y));
216  };
217 
218 
219  /*
220  Precision conversion routines for vector types
221  */
222 
223  __forceinline__ __host__ __device__ float2 make_FloatN(const double2 &a) {
224  return make_float2(a.x, a.y);
225 }
226 
227  __forceinline__ __host__ __device__ float4 make_FloatN(const double4 &a) {
228  return make_float4(a.x, a.y, a.z, a.w);
229  }
230 
231  __forceinline__ __host__ __device__ double2 make_FloatN(const float2 &a) {
232  return make_double2(a.x, a.y);
233  }
234 
235  __forceinline__ __host__ __device__ double4 make_FloatN(const float4 &a) {
236  return make_double4(a.x, a.y, a.z, a.w);
237  }
238 
239  __forceinline__ __host__ __device__ short4 make_shortN(const char4 &a) {
240  return make_short4(a.x, a.y, a.z, a.w);
241  }
242 
243  __forceinline__ __host__ __device__ short2 make_shortN(const char2 &a) {
244  return make_short2(a.x, a.y);
245  }
246 
247  __forceinline__ __host__ __device__ short4 make_shortN(const float4 &a) {
248  return make_short4(a.x, a.y, a.z, a.w);
249  }
250 
251  __forceinline__ __host__ __device__ short2 make_shortN(const float2 &a) {
252  return make_short2(a.x, a.y);
253  }
254 
255  __forceinline__ __host__ __device__ short4 make_shortN(const double4 &a) {
256  return make_short4(a.x, a.y, a.z, a.w);
257  }
258 
259  __forceinline__ __host__ __device__ short2 make_shortN(const double2 &a) {
260  return make_short2(a.x, a.y);
261  }
262 
263  __forceinline__ __host__ __device__ char4 make_charN(const short4 &a) {
264  return make_char4(a.x, a.y, a.z, a.w);
265  }
266 
267  __forceinline__ __host__ __device__ char2 make_charN(const short2 &a) {
268  return make_char2(a.x, a.y);
269  }
270 
271  __forceinline__ __host__ __device__ char4 make_charN(const float4 &a) {
272  return make_char4(a.x, a.y, a.z, a.w);
273  }
274 
275  __forceinline__ __host__ __device__ char2 make_charN(const float2 &a) {
276  return make_char2(a.x, a.y);
277  }
278 
279  __forceinline__ __host__ __device__ char4 make_charN(const double4 &a) {
280  return make_char4(a.x, a.y, a.z, a.w);
281  }
282 
283  __forceinline__ __host__ __device__ char2 make_charN(const double2 &a) {
284  return make_char2(a.x, a.y);
285  }
286  /* Helper functions for converting between float2/double2 and complex */
287  template<typename Float2, typename Complex>
288  inline Float2 make_Float2(const Complex &a) { return (Float2)0; }
289 
290  template<>
291  inline double2 make_Float2(const complex<double> &a) { return make_double2( a.real(), a.imag() ); }
292  template<>
293  inline double2 make_Float2(const complex<float> &a) { return make_double2( a.real(), a.imag() ); }
294  template<>
295  inline float2 make_Float2(const complex<double> &a) { return make_float2( a.real(), a.imag() ); }
296  template<>
297  inline float2 make_Float2(const complex<float> &a) { return make_float2( a.real(), a.imag() ); }
298 
299  template<>
300  inline double2 make_Float2(const std::complex<double> &a) { return make_double2( a.real(), a.imag() ); }
301  template<>
302  inline double2 make_Float2(const std::complex<float> &a) { return make_double2( a.real(), a.imag() ); }
303  template<>
304  inline float2 make_Float2(const std::complex<double> &a) { return make_float2( a.real(), a.imag() ); }
305  template<>
306  inline float2 make_Float2(const std::complex<float> &a) { return make_float2( a.real(), a.imag() ); }
307 
308 
309  inline complex<double> make_Complex(const double2 &a) { return complex<double>(a.x, a.y); }
310  inline complex<float> make_Complex(const float2 &a) { return complex<float>(a.x, a.y); }
311 
312  template<typename T> struct RealType {};
313  template<> struct RealType<double> { typedef double type; };
314  template<> struct RealType<double2> { typedef double type; };
315  template<> struct RealType<complex<double> > { typedef double type; };
316  template<> struct RealType<float> { typedef float type; };
317  template<> struct RealType<float2> { typedef float type; };
318  template<> struct RealType<complex<float> > { typedef float type; };
319  template<> struct RealType<float4> { typedef float type; };
320  template<> struct RealType<short> { typedef short type; };
321  template<> struct RealType<short2> { typedef short type; };
322  template<> struct RealType<complex<short> > { typedef short type; };
323  template<> struct RealType<short4> { typedef short type; };
324  template<> struct RealType<char> { typedef char type; };
325  template<> struct RealType<char2> { typedef char type; };
326  template<> struct RealType<complex<char> > { typedef char type; };
327  template<> struct RealType<char4> { typedef char type; };
328 
329 }
__host__ __device__ float4 operator-=(float4 &x, const float4 y)
Definition: float_vector.h:131
__forceinline__ __host__ __device__ char4 make_charN(const short4 &a)
Definition: float_vector.h:263
__host__ __device__ float2 operator*=(float2 &x, const float a)
Definition: float_vector.h:151
complex< double > make_Complex(const double2 &a)
Definition: float_vector.h:309
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator-(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor subtraction operator.
__host__ __device__ float imag() const volatile
Definition: complex_quda.h:553
std::complex< double > Complex
Definition: quda_internal.h:46
__forceinline__ __host__ __device__ float2 make_FloatN(const double2 &a)
Definition: float_vector.h:223
__host__ __device__ double imag() const volatile
Definition: complex_quda.h:683
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator+(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor addition operator.
__forceinline__ __host__ __device__ float max_fabs(const float4 &c)
Definition: float_vector.h:198
__forceinline__ __host__ __device__ short4 make_shortN(const char4 &a)
Definition: float_vector.h:239
Float2 make_Float2(const Complex &a)
Definition: float_vector.h:288
__host__ __device__ float4 operator+=(float4 &x, const float4 y)
Definition: float_vector.h:96
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator*(const S &a, const ColorSpinor< Float, Nc, Ns > &x)
Compute the scalar-vector product y = a * x.
__device__ __host__ void zero(vector_type< scalar, n > &v)
Definition: cub_helper.cuh:54
__host__ __device__ double real() const volatile
Definition: complex_quda.h:682
__host__ __device__ float real() const volatile
Definition: complex_quda.h:552