QUDA  v1.1.0
A library for QCD on GPUs
blas_quda.h
Go to the documentation of this file.
1 #ifndef _QUDA_BLAS_H
2 #define _QUDA_BLAS_H
3 
4 #include <quda_internal.h>
5 #include <color_spinor_field.h>
6 
7 // ---------- blas_quda.cu ----------
8 
9 namespace quda {
10 
11  namespace blas {
12 
13  // creates and destroys reduction buffers
14  void init();
15  void destroy();
16 
17  void setParam(int kernel, int prec, int threads, int blocks);
18 
19  extern unsigned long long flops;
20  extern unsigned long long bytes;
21 
23 
24  inline void copy(ColorSpinorField &dst, const ColorSpinorField &src)
25  {
26  if (&dst == &src) return;
27 
29  static_cast<cudaColorSpinorField &>(dst).copy(static_cast<const cudaColorSpinorField &>(src));
30  } else if (dst.Location() == QUDA_CPU_FIELD_LOCATION && src.Location() == QUDA_CPU_FIELD_LOCATION) {
31  static_cast<cpuColorSpinorField &>(dst).copy(static_cast<const cpuColorSpinorField &>(src));
32  } else {
33  errorQuda("Cannot call copy with fields with different locations");
34  }
35  }
36 
37  void ax(double a, ColorSpinorField &x);
38 
39  void axpbyz(double a, ColorSpinorField &x, double b, ColorSpinorField &y, ColorSpinorField &z);
40 
41  inline void xpy(ColorSpinorField &x, ColorSpinorField &y) { axpbyz(1.0, x, 1.0, y, y); }
42  inline void mxpy(ColorSpinorField &x, ColorSpinorField &y) { axpbyz(-1.0, x, 1.0, y, y); }
43  inline void axpy(double a, ColorSpinorField &x, ColorSpinorField &y) { axpbyz(a, x, 1.0, y, y); }
44  inline void axpby(double a, ColorSpinorField &x, double b, ColorSpinorField &y) { axpbyz(a, x, b, y, y); }
45  inline void xpay(ColorSpinorField &x, double a, ColorSpinorField &y) { axpbyz(1.0, x, a, y, y); }
46  inline void xpayz(ColorSpinorField &x, double a, ColorSpinorField &y, ColorSpinorField &z) { axpbyz(1.0, x, a, y, z); }
47 
48  void axpyZpbx(double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, double b);
49  void axpyBzpcx(double a, ColorSpinorField& x, ColorSpinorField& y, double b, ColorSpinorField& z, double c);
50 
51  void caxpby(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y);
53  void caxpbypczw(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, const Complex &c,
59 
60  void cabxpyAx(double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y);
61  void caxpyXmaz(const Complex &a, ColorSpinorField &x,
64 
65  void tripleCGUpdate(double alpha, double beta, ColorSpinorField &q,
67 
68  // reduction kernels - defined in reduce_quda.cu
69 
70  double norm1(const ColorSpinorField &b);
71  double norm2(const ColorSpinorField &a);
72 
73  double axpyReDot(double a, ColorSpinorField &x, ColorSpinorField &y);
74 
76 
77  double axpbyzNorm(double a, ColorSpinorField &x, double b, ColorSpinorField &y, ColorSpinorField &z);
78  inline double axpyNorm(double a, ColorSpinorField &x, ColorSpinorField &y) { return axpbyzNorm(a, x, 1.0, y, y); }
79  inline double xmyNorm(ColorSpinorField &x, ColorSpinorField &y) { return axpbyzNorm(1.0, x, -1.0, y, y); }
80 
83 
88  double3 a3 = cDotProductNormA(b, a);
89  return make_double3(a3.x, -a3.y, a3.z);
90  }
91 
94 
99 
101  ColorSpinorField &z);
105 
108 
111 
112  // multi-blas kernels - defined in multi_blas.cu
113 
122  void axpy(const double *a, std::vector<ColorSpinorField *> &x, std::vector<ColorSpinorField *> &y);
123 
132  void axpy(const double *a, ColorSpinorField &x, ColorSpinorField &y);
133 
146  void axpy_U(const double *a, std::vector<ColorSpinorField *> &x, std::vector<ColorSpinorField *> &y);
147 
158  void axpy_U(const double *a, ColorSpinorField &x, ColorSpinorField &y);
159 
172  void axpy_L(const double *a, std::vector<ColorSpinorField *> &x, std::vector<ColorSpinorField *> &y);
173 
184  void axpy_L(const double *a, ColorSpinorField &x, ColorSpinorField &y);
185 
199  void caxpy(const Complex *a, std::vector<ColorSpinorField*> &x, std::vector<ColorSpinorField*> &y);
200 
212 
225  void caxpy_U(const Complex *a, std::vector<ColorSpinorField*> &x, std::vector<ColorSpinorField*> &y);
226 
238 
251  void caxpy_L(const Complex *a, std::vector<ColorSpinorField*> &x, std::vector<ColorSpinorField*> &y);
252 
264 
280  void caxpyz(const Complex *a, std::vector<ColorSpinorField*> &x, std::vector<ColorSpinorField*> &y, std::vector<ColorSpinorField*> &z);
281 
294 
308  void caxpyz_U(const Complex *a, std::vector<ColorSpinorField*> &x, std::vector<ColorSpinorField*> &y, std::vector<ColorSpinorField*> &z);
309 
322 
336  void caxpyz_L(const Complex *a, std::vector<ColorSpinorField*> &x, std::vector<ColorSpinorField*> &y, std::vector<ColorSpinorField*> &z);
337 
350 
369  void axpyBzpcx(const double *a, std::vector<ColorSpinorField*> &x, std::vector<ColorSpinorField*> &y,
370  const double *b, ColorSpinorField &z, const double *c);
371 
389  void caxpyBxpz(const Complex *a_, std::vector<ColorSpinorField*> &x_, ColorSpinorField &y_,
390  const Complex *b_, ColorSpinorField &z_);
391 
392 
393  // multi-reduce kernels - defined in multi_reduce.cu
394 
395  void reDotProduct(double* result, std::vector<ColorSpinorField*>& a, std::vector<ColorSpinorField*>& b);
396 
404  void cDotProduct(Complex* result, std::vector<ColorSpinorField*>& a, std::vector<ColorSpinorField*>& b);
405 
416  void hDotProduct(Complex* result, std::vector<ColorSpinorField*>& a, std::vector<ColorSpinorField*>& b);
417 
430  void hDotProduct_Anorm(Complex* result, std::vector<ColorSpinorField*>& a, std::vector<ColorSpinorField*>& b);
431 
432 
441  void cDotProductCopy(Complex* result, std::vector<ColorSpinorField*>& a, std::vector<ColorSpinorField*>& b, std::vector<ColorSpinorField*>& c);
442 
443  } // namespace blas
444 
445 } // namespace quda
446 
447 #endif // _QUDA_BLAS_H
QudaFieldLocation Location() const
QudaPrecision prec
@ QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:326
@ QUDA_CPU_FIELD_LOCATION
Definition: enum_quda.h:325
void destroy()
double axpbyzNorm(double a, ColorSpinorField &x, double b, ColorSpinorField &y, ColorSpinorField &z)
double axpyReDot(double a, ColorSpinorField &x, ColorSpinorField &y)
double4 quadrupleCGReduction(ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Complex axpyCGNorm(double a, ColorSpinorField &x, ColorSpinorField &y)
Complex caxpyDotzy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
void setParam(int kernel, int prec, int threads, int blocks)
double quadrupleCG3UpdateNorm(double a, double b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)
void caxpbypzYmbw(const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, ColorSpinorField &, ColorSpinorField &)
void axpyZpbx(double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, double b)
void caxpyz_L(const Complex *a, std::vector< ColorSpinorField * > &x, std::vector< ColorSpinorField * > &y, std::vector< ColorSpinorField * > &z)
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.
double norm1(const ColorSpinorField &b)
void xpayz(ColorSpinorField &x, double a, ColorSpinorField &y, ColorSpinorField &z)
Definition: blas_quda.h:46
void init()
void cabxpyAx(double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y)
void caxpby(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y)
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
void cDotProductCopy(Complex *result, std::vector< ColorSpinorField * > &a, std::vector< ColorSpinorField * > &b, std::vector< ColorSpinorField * > &c)
Computes the matrix of inner products between the vector set a and the vector set b,...
void caxpyz(const Complex *a, std::vector< ColorSpinorField * > &x, std::vector< ColorSpinorField * > &y, std::vector< ColorSpinorField * > &z)
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:79
double3 tripleCGReduction(ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
void caxpyBzpx(const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &)
unsigned long long flops
double caxpyXmazNormX(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
void axpbyz(double a, ColorSpinorField &x, double b, ColorSpinorField &y, ColorSpinorField &z)
double3 caxpbypzYmbwcDotProductUYNormY(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &u)
unsigned long long bytes
void caxpbypczw(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, const Complex &c, ColorSpinorField &z, ColorSpinorField &w)
void axpyBzpcx(double a, ColorSpinorField &x, ColorSpinorField &y, double b, ColorSpinorField &z, double c)
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
Definition: blas_quda.h:45
void caxpyBxpz(const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &)
double cabxpyzAxNorm(double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
double quadrupleCG3InitNorm(double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)
void hDotProduct(Complex *result, std::vector< ColorSpinorField * > &a, std::vector< ColorSpinorField * > &b)
Computes the matrix of inner products between the vector set a and the vector set b....
double3 xpyHeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &r)
void caxpyXmaz(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
void ax(double a, ColorSpinorField &x)
void caxpyXmazMR(const double &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
void axpy_L(const double *a, std::vector< ColorSpinorField * > &x, std::vector< ColorSpinorField * > &y)
Compute the block "axpy_L" with over the set of ColorSpinorFields. E.g., it computes.
double caxpyNorm(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void zero(ColorSpinorField &a)
double norm2(const ColorSpinorField &a)
void caxpy_U(const Complex *a, std::vector< ColorSpinorField * > &x, std::vector< ColorSpinorField * > &y)
Compute the block "caxpy_U" with over the set of ColorSpinorFields. E.g., it computes.
void caxpy_L(const Complex *a, std::vector< ColorSpinorField * > &x, std::vector< ColorSpinorField * > &y)
Compute the block "caxpy_L" with over the set of ColorSpinorFields. E.g., it computes.
void tripleCGUpdate(double alpha, double beta, ColorSpinorField &q, ColorSpinorField &r, ColorSpinorField &x, ColorSpinorField &p)
double reDotProduct(ColorSpinorField &x, ColorSpinorField &y)
void axpy_U(const double *a, std::vector< ColorSpinorField * > &x, std::vector< ColorSpinorField * > &y)
Compute the block "axpy_U" with over the set of ColorSpinorFields. E.g., it computes.
double axpyNorm(double a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:78
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:43
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)
double3 cDotProductNormB(ColorSpinorField &a, ColorSpinorField &b)
Return (a,b) and ||b||^2 - implemented using cDotProductNormA.
Definition: blas_quda.h:87
void hDotProduct_Anorm(Complex *result, std::vector< ColorSpinorField * > &a, std::vector< ColorSpinorField * > &b)
Computes the matrix of inner products between the vector set a and the vector set b....
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
void cxpaypbz(ColorSpinorField &, const Complex &b, ColorSpinorField &y, const Complex &c, ColorSpinorField &z)
void xpy(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:41
void mxpy(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:42
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: blas_quda.h:24
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
void axpby(double a, ColorSpinorField &x, double b, ColorSpinorField &y)
Definition: blas_quda.h:44
void caxpyz_U(const Complex *a, std::vector< ColorSpinorField * > &x, std::vector< ColorSpinorField * > &y, std::vector< ColorSpinorField * > &z)
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.
std::complex< double > Complex
Definition: quda_internal.h:86
#define errorQuda(...)
Definition: util_quda.h:120