QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
float_vector.h
Go to the documentation of this file.
1 
9 #pragma once
10 
11 namespace quda {
12 
13  __host__ __device__ inline double2 operator+(const double2& x, const double2 &y) {
14  return make_double2(x.x + y.x, x.y + y.y);
15  }
16 
17  __host__ __device__ inline double2 operator-(const double2& x, const double2 &y) {
18  return make_double2(x.x - y.x, x.y - y.y);
19  }
20 
21  __host__ __device__ inline float2 operator-(const float2& x, const float2 &y) {
22  return make_float2(x.x - y.x, x.y - y.y);
23  }
24 
25  __host__ __device__ inline float4 operator-(const float4& x, const float4 &y) {
26  return make_float4(x.x - y.x, x.y - y.y, x.z - y.z, x.w - y.w);
27  }
28 
29  __host__ __device__ inline double3 operator+(const double3& x, const double3 &y) {
30  double3 z;
31  z.x = x.x + y.x; z.y = x.y + y.y; z.z = x.z + y.z;
32  return z;
33  }
34 
35  __host__ __device__ inline float4 operator*(const float a, const float4 x) {
36  float4 y;
37  y.x = a*x.x;
38  y.y = a*x.y;
39  y.z = a*x.z;
40  y.w = a*x.w;
41  return y;
42  }
43 
44  __host__ __device__ inline float2 operator*(const float a, const float2 x) {
45  float2 y;
46  y.x = a*x.x;
47  y.y = a*x.y;
48  return y;
49  }
50 
51  __host__ __device__ inline double2 operator*(const double a, const double2 x) {
52  double2 y;
53  y.x = a*x.x;
54  y.y = a*x.y;
55  return y;
56  }
57 
58  __host__ __device__ inline double4 operator*(const double a, const double4 x) {
59  double4 y;
60  y.x = a*x.x;
61  y.y = a*x.y;
62  y.z = a*x.z;
63  y.w = a*x.w;
64  return y;
65  }
66 
67  __host__ __device__ inline float2 operator+(const float2 x, const float2 y) {
68  float2 z;
69  z.x = x.x + y.x;
70  z.y = x.y + y.y;
71  return z;
72  }
73 
74  __host__ __device__ inline float4 operator+(const float4 x, const float4 y) {
75  float4 z;
76  z.x = x.x + y.x;
77  z.y = x.y + y.y;
78  z.z = x.z + y.z;
79  z.w = x.w + y.w;
80  return z;
81  }
82 
83  __host__ __device__ inline float4 operator+=(float4 &x, const float4 y) {
84  x.x += y.x;
85  x.y += y.y;
86  x.z += y.z;
87  x.w += y.w;
88  return x;
89  }
90 
91  __host__ __device__ inline float2 operator+=(float2 &x, const float2 y) {
92  x.x += y.x;
93  x.y += y.y;
94  return x;
95  }
96 
97  __host__ __device__ inline double2 operator+=(double2 &x, const double2 y) {
98  x.x += y.x;
99  x.y += y.y;
100  return x;
101  }
102 
103  __host__ __device__ inline double3 operator+=(double3 &x, const double3 y) {
104  x.x += y.x;
105  x.y += y.y;
106  x.z += y.z;
107  return x;
108  }
109 
110  __host__ __device__ inline float4 operator-=(float4 &x, const float4 y) {
111  x.x -= y.x;
112  x.y -= y.y;
113  x.z -= y.z;
114  x.w -= y.w;
115  return x;
116  }
117 
118  __host__ __device__ inline float2 operator-=(float2 &x, const float2 y) {
119  x.x -= y.x;
120  x.y -= y.y;
121  return x;
122  }
123 
124  __host__ __device__ inline double2 operator-=(double2 &x, const double2 y) {
125  x.x -= y.x;
126  x.y -= y.y;
127  return x;
128  }
129 
130  __host__ __device__ inline float2 operator*=(float2 &x, const float a) {
131  x.x *= a;
132  x.y *= a;
133  return x;
134  }
135 
136  __host__ __device__ inline double2 operator*=(double2 &x, const float a) {
137  x.x *= a;
138  x.y *= a;
139  return x;
140  }
141 
142  __host__ __device__ inline float4 operator*=(float4 &a, const float &b) {
143  a.x *= b;
144  a.y *= b;
145  a.z *= b;
146  a.w *= b;
147  return a;
148  }
149 
150  __host__ __device__ inline double2 operator*=(double2 &a, const double &b) {
151  a.x *= b;
152  a.y *= b;
153  return a;
154  }
155 
156  __host__ __device__ inline double4 operator*=(double4 &a, const double &b) {
157  a.x *= b;
158  a.y *= b;
159  a.z *= b;
160  a.w *= b;
161  return a;
162  }
163 
164  __host__ __device__ inline float2 operator-(const float2 &x) {
165  return make_float2(-x.x, -x.y);
166  }
167 
168  __host__ __device__ inline double2 operator-(const double2 &x) {
169  return make_double2(-x.x, -x.y);
170  }
171 
172 
173  /*
174  Operations to return the maximium absolute value of a FloatN vector
175  */
176 
177  __forceinline__ __host__ __device__ float max_fabs(const float4 &c) {
178  float a = fmaxf(fabsf(c.x), fabsf(c.y));
179  float b = fmaxf(fabsf(c.z), fabsf(c.w));
180  return fmaxf(a, b);
181  };
182 
183  __forceinline__ __host__ __device__ float max_fabs(const float2 &b) {
184  return fmaxf(fabsf(b.x), fabsf(b.y));
185  };
186 
187  __forceinline__ __host__ __device__ double max_fabs(const double4 &c) {
188  double a = fmaxf(fabsf(c.x), fabsf(c.y));
189  double b = fmaxf(fabsf(c.z), fabsf(c.w));
190  return fmaxf(a, b);
191  };
192 
193  __forceinline__ __host__ __device__ double max_fabs(const double2 &b) {
194  return fmaxf(fabsf(b.x), fabsf(b.y));
195  };
196 
197  /*
198  Precision conversion routines for vector types
199  */
200 
201  __forceinline__ __host__ __device__ float2 make_FloatN(const double2 &a) {
202  return make_float2(a.x, a.y);
203  }
204 
205  __forceinline__ __host__ __device__ float4 make_FloatN(const double4 &a) {
206  return make_float4(a.x, a.y, a.z, a.w);
207  }
208 
209  __forceinline__ __host__ __device__ double2 make_FloatN(const float2 &a) {
210  return make_double2(a.x, a.y);
211  }
212 
213  __forceinline__ __host__ __device__ double4 make_FloatN(const float4 &a) {
214  return make_double4(a.x, a.y, a.z, a.w);
215  }
216 
217  __forceinline__ __host__ __device__ short4 make_shortN(const float4 &a) {
218  return make_short4(a.x, a.y, a.z, a.w);
219  }
220 
221  __forceinline__ __host__ __device__ short2 make_shortN(const float2 &a) {
222  return make_short2(a.x, a.y);
223  }
224 
225  __forceinline__ __host__ __device__ short4 make_shortN(const double4 &a) {
226  return make_short4(a.x, a.y, a.z, a.w);
227  }
228 
229  __forceinline__ __host__ __device__ short2 make_shortN(const double2 &a) {
230  return make_short2(a.x, a.y);
231  }
232 
233 }
__host__ __device__ float4 operator-=(float4 &x, const float4 y)
Definition: float_vector.h:110
int y[4]
__forceinline__ __host__ __device__ short4 make_shortN(const float4 &a)
Definition: float_vector.h:217
__host__ __device__ float2 operator*=(float2 &x, const float a)
Definition: float_vector.h:130
__host__ __device__ complex< ValueType > operator-(const complex< ValueType > &lhs, const complex< ValueType > &rhs)
Definition: complex_quda.h:673
int x[4]
__forceinline__ __host__ __device__ float2 make_FloatN(const double2 &a)
Definition: float_vector.h:201
__host__ __device__ complex< ValueType > operator*(const complex< ValueType > &lhs, const complex< ValueType > &rhs)
Definition: complex_quda.h:692
__forceinline__ __host__ __device__ float max_fabs(const float4 &c)
Definition: float_vector.h:177
__host__ __device__ float4 operator+=(float4 &x, const float4 y)
Definition: float_vector.h:83
__host__ __device__ complex< ValueType > operator+(const complex< ValueType > &lhs, const complex< ValueType > &rhs)
Definition: complex_quda.h:644