QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
blas_cpu.cpp
Go to the documentation of this file.
1 #include <color_spinor_field.h>
2 #include <blas_quda.h>
3 #include <face_quda.h>
4 
5 namespace quda {
6 
7  template <typename Float>
8  void axpby(const Float &a, const Float *x, const Float &b, Float *y, const int N) {
9  for (int i=0; i<N; i++) y[i] = a*x[i] + b*y[i];
10  }
11 
12  void axpbyCpu(const double &a, const cpuColorSpinorField &x,
13  const double &b, cpuColorSpinorField &y) {
15  axpby(a, (double*)x.V(), b, (double*)y.V(), x.Length());
16  else if (x.Precision() == QUDA_SINGLE_PRECISION)
17  axpby((float)a, (float*)x.V(), (float)b, (float*)y.V(), x.Length());
18  else
19  errorQuda("Precision type %d not implemented", x.Precision());
20  }
21 
24  axpby(1.0, (double*)x.V(), 1.0, (double*)y.V(), x.Length());
25  else if (x.Precision() == QUDA_SINGLE_PRECISION)
26  axpby(1.0f, (float*)x.V(), 1.0f, (float*)y.V(), x.Length());
27  else
28  errorQuda("Precision type %d not implemented", x.Precision());
29  }
30 
31  void axpyCpu(const double &a, const cpuColorSpinorField &x,
34  axpby(a, (double*)x.V(), 1.0, (double*)y.V(), x.Length());
35  else if (x.Precision() == QUDA_SINGLE_PRECISION)
36  axpby((float)a, (float*)x.V(), 1.0f, (float*)y.V(), x.Length());
37  else
38  errorQuda("Precision type %d not implemented", x.Precision());
39  }
40 
41  void xpayCpu(const cpuColorSpinorField &x, const double &a,
44  axpby(1.0, (double*)x.V(), a, (double*)y.V(), x.Length());
45  else if (x.Precision() == QUDA_SINGLE_PRECISION)
46  axpby(1.0f, (float*)x.V(), (float)a, (float*)y.V(), x.Length());
47  else
48  errorQuda("Precision type %d not implemented", x.Precision());
49  }
50 
53  axpby(-1.0, (double*)x.V(), 1.0, (double*)y.V(), x.Length());
54  else if (x.Precision() == QUDA_SINGLE_PRECISION)
55  axpby(-1.0f, (float*)x.V(), 1.0f, (float*)y.V(), x.Length());
56  else
57  errorQuda("Precision type %d not implemented", x.Precision());
58  }
59 
60  void axCpu(const double &a, cpuColorSpinorField &x) {
62  axpby(0.0, (double*)x.V(), a, (double*)x.V(), x.Length());
63  else if (x.Precision() == QUDA_SINGLE_PRECISION)
64  axpby(0.0f, (float*)x.V(), (float)a, (float*)x.V(), x.Length());
65  else
66  errorQuda("Precision type %d not implemented", x.Precision());
67  }
68 
69  template <typename Float>
70  void caxpby(const std::complex<Float> &a, const std::complex<Float> *x,
71  const std::complex<Float> &b, std::complex<Float> *y, int N) {
72 
73  for (int i=0; i<N; i++) {
74  y[i] = a*x[i] + b*y[i];
75  }
76 
77  }
78 
79  void caxpyCpu(const Complex &a, const cpuColorSpinorField &x,
81 
82  if ( x.Precision() == QUDA_DOUBLE_PRECISION)
83  caxpby(a, (Complex*)x.V(), Complex(1.0),
84  (Complex*)y.V(), x.Length()/2);
85  else if (x.Precision() == QUDA_SINGLE_PRECISION)
86  caxpby((std::complex<float>)a, (std::complex<float>*)x.V(), std::complex<float>(1.0),
87  (std::complex<float>*)y.V(), x.Length()/2);
88  else
89  errorQuda("Precision type %d not implemented", x.Precision());
90  }
91 
92  void caxpbyCpu(const Complex &a, const cpuColorSpinorField &x,
93  const Complex &b, cpuColorSpinorField &y) {
94 
95  if ( x.Precision() == QUDA_DOUBLE_PRECISION)
96  caxpby(a, (Complex*)x.V(), b, (Complex*)y.V(), x.Length()/2);
97  else if (x.Precision() == QUDA_SINGLE_PRECISION)
98  caxpby((std::complex<float>)a, (std::complex<float>*)x.V(), (std::complex<float>)b,
99  (std::complex<float>*)y.V(), x.Length()/2);
100  else
101  errorQuda("Precision type %d not implemented", x.Precision());
102  }
103 
104  template <typename Float>
105  void caxpbypcz(const std::complex<Float> &a, const std::complex<Float> *x,
106  const std::complex<Float> &b, const std::complex<Float> *y,
107  const std::complex<Float> &c, std::complex<Float> *z, int N) {
108 
109  for (int i=0; i<N; i++) {
110  z[i] = a*x[i] + b*y[i] + c*z[i];
111  }
112 
113  }
114 
115  void cxpaypbzCpu(const cpuColorSpinorField &x, const Complex &a,
116  const cpuColorSpinorField &y, const Complex &b,
117  cpuColorSpinorField &z) {
118 
119  if (x.Precision() == QUDA_DOUBLE_PRECISION)
120  caxpbypcz(Complex(1, 0), (Complex*)x.V(), a, (Complex*)y.V(),
121  b, (Complex*)z.V(), x.Length()/2);
122  else if (x.Precision() == QUDA_SINGLE_PRECISION)
123  caxpbypcz(std::complex<float>(1, 0), (std::complex<float>*)x.V(), (std::complex<float>)a, (std::complex<float>*)y.V(),
124  (std::complex<float>)b, (std::complex<float>*)z.V(), x.Length()/2);
125  else
126  errorQuda("Precision type %d not implemented", x.Precision());
127  }
128 
130  const double &b, const cpuColorSpinorField& z, const double &c) {
131  axpyCpu(a, x, y);
132  axpbyCpu(b, z, c, x);
133  }
134 
135  // performs the operations: {y[i] = a*x[i] + y[i]; x[i] = z[i] + b*x[i]}
137  const cpuColorSpinorField &z, const double &b) {
138  axpyCpu(a, x, y);
139  xpayCpu(z, b, x);
140  }
141 
142  // performs the operation z[i] = a*x[i] + b*y[i] + z[i] and y[i] -= b*w[i]
143  void caxpbypzYmbwCpu(const Complex &a, const cpuColorSpinorField &x, const Complex &b,
145 
146  if (x.Precision() == QUDA_DOUBLE_PRECISION)
147  caxpbypcz(a, (Complex*)x.V(), b, (Complex*)y.V(),
148  Complex(1, 0), (Complex*)z.V(), x.Length()/2);
149  else if (x.Precision() == QUDA_SINGLE_PRECISION)
150  caxpbypcz((std::complex<float>)a, (std::complex<float>*)x.V(),
151  (std::complex<float>)b, (std::complex<float>*)y.V(),
152  (std::complex<float>)(1.0f), (std::complex<float>*)z.V(), x.Length()/2);
153  else
154  errorQuda("Precision type %d not implemented", x.Precision());
155 
156  caxpyCpu(-b, w, y);
157  }
158 
159  template <typename Float>
160  double norm(const Float *a, const int N) {
161  double norm2 = 0;
162  for (int i=0; i<N; i++) norm2 += a[i]*a[i];
163  return norm2;
164  }
165 
166  double normCpu(const cpuColorSpinorField &a) {
167  double norm2 = 0.0;
168  if (a.Precision() == QUDA_DOUBLE_PRECISION)
169  norm2 = norm((double*)a.V(), a.Length());
170  else if (a.Precision() == QUDA_SINGLE_PRECISION)
171  norm2 = norm((float*)a.V(), a.Length());
172  else
173  errorQuda("Precision type %d not implemented", a.Precision());
174  reduceDouble(norm2);
175  return norm2;
176  }
177 
178  double axpyNormCpu(const double &a, const cpuColorSpinorField &x,
180  axpyCpu(a, x, y);
181  return normCpu(y);
182  }
183 
184  template <typename Float>
185  double reDotProduct(const Float *a, const Float *b, const int N) {
186  double dot = 0;
187  for (int i=0; i<N; i++) dot += a[i]*b[i];
188  return dot;
189  }
190 
192  double dot = 0.0;
193  if (a.Precision() == QUDA_DOUBLE_PRECISION)
194  dot = reDotProduct((double*)a.V(), (double*)b.V(), a.Length());
195  else if (a.Precision() == QUDA_SINGLE_PRECISION)
196  dot = reDotProduct((float*)a.V(), (float*)b.V(), a.Length());
197  else
198  errorQuda("Precision type %d not implemented", a.Precision());
199  reduceDouble(dot);
200  return dot;
201  }
202 
203  // First performs the operation y[i] = x[i] - y[i]
204  // Second returns the norm of y
206  xpayCpu(x, -1, y);
207  return normCpu(y);
208  }
209 
210  template <typename Float>
211  Complex cDotProduct(const std::complex<Float> *a, const std::complex<Float> *b, const int N) {
212  quda::Complex dot = 0;
213  for (int i=0; i<N; i++) dot += conj(a[i])*b[i];
214  return dot;
215  }
216 
218  Complex dot = 0.0;
219  if (a.Precision() == QUDA_DOUBLE_PRECISION)
220  dot = cDotProduct((Complex*)a.V(), (Complex*)b.V(), a.Length()/2);
221  else if (a.Precision() == QUDA_SINGLE_PRECISION)
222  dot = cDotProduct((std::complex<float>*)a.V(), (std::complex<float>*)b.V(), a.Length()/2);
223  else
224  errorQuda("Precision type %d not implemented", a.Precision());
225  reduceDoubleArray((double*)&dot, 2);
226  return dot;
227  }
228 
229  // First performs the operation y = x + a*y
230  // Second returns complex dot product (z,y)
231  Complex xpaycDotzyCpu(const cpuColorSpinorField &x, const double &a,
233  xpayCpu(x, a, y);
234  return cDotProductCpu(z,y);
235  }
236 
238  Complex dot = cDotProductCpu(a, b);
239  double norm = normCpu(a);
240  return make_double3(real(dot), imag(dot), norm);
241  }
242 
244  Complex dot = cDotProductCpu(a, b);
245  double norm = normCpu(b);
246  return make_double3(real(dot), imag(dot), norm);
247  }
248 
249  // This convoluted kernel does the following: z += a*x + b*y, y -= b*w, norm = (y,y), dot = (u, y)
251  const Complex &b, cpuColorSpinorField &y,
253  const cpuColorSpinorField &u) {
254 
255  caxpbypzYmbwCpu(a, x, b, y, z, w);
256  return cDotProductNormBCpu(u, y);
257  }
258 
259  void cabxpyAxCpu(const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y) {
260  axCpu(a, x);
261  caxpyCpu(b, x, y);
262  }
263 
266  caxpyCpu(a, x, y);
267  return norm2(y);
268  }
269 
272  caxpyCpu(a, x, y);
273  caxpyCpu(-a, z, x);
274  return norm2(x);
275  }
276 
279  caxpyCpu(a, x, y);
280  caxpyCpu(-a, z, x);
281  }
282 
283  double cabxpyAxNormCpu(const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y) {
284  axCpu(a, x);
285  caxpyCpu(b, x, y);
286  return norm2(y);
287  }
288 
290  cpuColorSpinorField &z) {
291  caxpyCpu(a, x, z);
292  caxpyCpu(b, y, z);
293  }
294 
297  caxpyCpu(a, x, w);
298  caxpyCpu(b, y, w);
299  caxpyCpu(c, z, w);
300 
301  }
302 
304  cpuColorSpinorField &z) {
305  caxpyCpu(a, x, y);
306  return cDotProductCpu(z, y);
307  }
308 
309  template <typename Float>
310  double3 HeavyQuarkResidualNorm(const Float *x, const Float *r, const int volume, const int Nint) {
311 
312  double3 sum = make_double3(0.0, 0.0, 0.0);
313  for (int i = 0; i<volume; i++) {
314  double x2 = 0;
315  double r2 = 0;
316 
317  for (int j=0; j<Nint; j++) { // loop over internal degrees of freedom
318  int k = i*Nint + j;
319  x2 += x[k]*x[k];
320  r2 += r[k]*r[k];
321  }
322 
323  sum.x += x2;
324  sum.y += r2;
325  sum.z += (x2 > 0.0) ? (r2 / x2) : 1.0;
326  }
327  return sum;
328  }
329 
330 
332  double3 rtn;
333  if (x.Precision() == QUDA_DOUBLE_PRECISION) {
334  rtn = HeavyQuarkResidualNorm<double>((const double*)(x.V()), (const double*)(r.V()),
335  x.Volume(), 2*x.Ncolor()*x.Nspin());
336  } else if (x.Precision() == QUDA_SINGLE_PRECISION) {
337  rtn = HeavyQuarkResidualNorm<float>((const float*)(x.V()), (const float*)(r.V()),
338  x.Volume(), 2*x.Ncolor()*x.Nspin());
339  } else {
340  errorQuda("Precision type %d not implemented", x.Precision());
341  }
342 #ifdef MULTI_GPU
343  rtn.z /= (x.Volume()*comm_size());
344 #else
345  rtn.z /= x.Volume();
346 #endif
347  reduceDoubleArray((double*)&rtn, 3);
348 
349  return rtn;
350  }
351 
354  xpyCpu(y, tmp);
355  return HeavyQuarkResidualNormCpu(tmp, r);
356  }
357 
358 } // namespace quda
359 
void caxpbypcz(const std::complex< Float > &a, const std::complex< Float > *x, const std::complex< Float > &b, const std::complex< Float > *y, const std::complex< Float > &c, std::complex< Float > *z, int N)
Definition: blas_cpu.cpp:105
void caxpbypzYmbwCpu(const Complex &, const cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &, const cpuColorSpinorField &)
Definition: blas_cpu.cpp:143
void mxpyCpu(const cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:51
int y[4]
Complex xpaycDotzyCpu(const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y, const cpuColorSpinorField &z)
Definition: blas_cpu.cpp:231
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
Definition: complex_quda.h:859
void cabxpyAxCpu(const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:259
#define errorQuda(...)
Definition: util_quda.h:73
void axpby(const Float &a, const Float *x, const Float &b, Float *y, const int N)
Definition: blas_cpu.cpp:8
void xpayCpu(const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:41
std::complex< double > Complex
Definition: eig_variables.h:13
void xpyCpu(const cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:22
void caxpby(const std::complex< Float > &a, const std::complex< Float > *x, const std::complex< Float > &b, std::complex< Float > *y, int N)
Definition: blas_cpu.cpp:70
void axpyZpbxCpu(const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const cpuColorSpinorField &z, const double &b)
Definition: blas_cpu.cpp:136
Complex cDotProduct(const std::complex< Float > *a, const std::complex< Float > *b, const int N)
Definition: blas_cpu.cpp:211
int comm_size(void)
Definition: comm_mpi.cpp:86
cudaColorSpinorField * tmp
void caxpbyCpu(const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:92
double3 caxpbypzYmbwcDotProductUYNormYCpu(const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z, const cpuColorSpinorField &w, const cpuColorSpinorField &u)
Definition: blas_cpu.cpp:250
FloatingPoint< float > Float
Definition: gtest.h:7350
Complex cDotProductCpu(const cpuColorSpinorField &, const cpuColorSpinorField &)
Definition: blas_cpu.cpp:217
void reduceDoubleArray(double *, const int len)
void cxpaypbzCpu(const cpuColorSpinorField &x, const Complex &b, const cpuColorSpinorField &y, const Complex &c, cpuColorSpinorField &z)
Definition: blas_cpu.cpp:115
void caxpbypzCpu(const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &)
Definition: blas_cpu.cpp:289
void caxpyCpu(const Complex &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:79
void axpbyCpu(const double &a, const cpuColorSpinorField &x, const double &b, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:12
double reDotProduct(const Float *a, const Float *b, const int N)
Definition: blas_cpu.cpp:185
void axpyBzpcxCpu(const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const double &b, const cpuColorSpinorField &z, const double &c)
Definition: blas_cpu.cpp:129
int x[4]
void axCpu(const double &a, cpuColorSpinorField &x)
Definition: blas_cpu.cpp:60
double cabxpyAxNormCpu(const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:283
void caxpyXmazCpu(const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
Definition: blas_cpu.cpp:277
void caxpbypczpwCpu(const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &)
Definition: blas_cpu.cpp:295
double caxpyNormCpu(const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:264
QudaPrecision Precision() const
void reduceDouble(double &)
double normCpu(const cpuColorSpinorField &b)
Definition: blas_cpu.cpp:166
double3 HeavyQuarkResidualNorm(const Float *x, const Float *r, const int volume, const int Nint)
Definition: blas_cpu.cpp:310
double axpyNormCpu(const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:178
double reDotProductCpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b)
Definition: blas_cpu.cpp:191
double3 cDotProductNormACpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b)
Definition: blas_cpu.cpp:237
double caxpyXmazNormXCpu(const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
Definition: blas_cpu.cpp:270
double3 HeavyQuarkResidualNormCpu(cpuColorSpinorField &x, cpuColorSpinorField &r)
Definition: blas_cpu.cpp:331
double xmyNormCpu(const cpuColorSpinorField &a, cpuColorSpinorField &b)
Definition: blas_cpu.cpp:205
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:115
Complex caxpyDotzyCpu(const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
Definition: blas_cpu.cpp:303
double3 cDotProductNormBCpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b)
Definition: blas_cpu.cpp:243
double norm2(const ColorSpinorField &)
void axpyCpu(const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:31