QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
blas_quda.h
Go to the documentation of this file.
1 #ifndef _QUDA_BLAS_H
2 #define _QUDA_BLAS_H
3 
4 #include <quda_internal.h>
5 #include <color_spinor_field.h>
6 
7 // ---------- blas_quda.cu ----------
8 
9 namespace quda {
10 
11  namespace blas {
12 
13  // creates and destroys reduction buffers
14  void init();
15  void end(void);
16 
17  void* getDeviceReduceBuffer();
19  void* getHostReduceBuffer();
20 
21  void setParam(int kernel, int prec, int threads, int blocks);
22 
23  extern unsigned long long flops;
24  extern unsigned long long bytes;
25 
26  void zero(ColorSpinorField &a);
27  void copy(ColorSpinorField &dst, const ColorSpinorField &src);
28 
29  void ax(double a, ColorSpinorField &x);
30 
31  void axpbyz(double a, ColorSpinorField &x, double b, ColorSpinorField &y, ColorSpinorField &z);
32 
33  inline void xpy(ColorSpinorField &x, ColorSpinorField &y) { axpbyz(1.0, x, 1.0, y, y); }
34  inline void mxpy(ColorSpinorField &x, ColorSpinorField &y) { axpbyz(-1.0, x, 1.0, y, y); }
35  inline void axpy(double a, ColorSpinorField &x, ColorSpinorField &y) { axpbyz(a, x, 1.0, y, y); }
36  inline void axpby(double a, ColorSpinorField &x, double b, ColorSpinorField &y) { axpbyz(a, x, b, y, y); }
37  inline void xpay(ColorSpinorField &x, double a, ColorSpinorField &y) { axpbyz(1.0, x, a, y, y); }
38  inline void xpayz(ColorSpinorField &x, double a, ColorSpinorField &y, ColorSpinorField &z) { axpbyz(1.0, x, a, y, z); }
39 
40  void axpyZpbx(double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, double b);
41  void axpyBzpcx(double a, ColorSpinorField& x, ColorSpinorField& y, double b, ColorSpinorField& z, double c);
42 
43  void caxpby(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y);
44  void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y);
45  void caxpbypczw(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, const Complex &c,
47  void cxpaypbz(ColorSpinorField &, const Complex &b, ColorSpinorField &y, const Complex &c, ColorSpinorField &z);
51 
52  void cabxpyAx(double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y);
53  void caxpyXmaz(const Complex &a, ColorSpinorField &x,
55  void caxpyXmazMR(const Complex &a, ColorSpinorField &x,
57 
58  void tripleCGUpdate(double alpha, double beta, ColorSpinorField &q,
61  void doubleCG3Update(double a, double b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z);
62 
63 
64  // reduction kernels - defined in reduce_quda.cu
65 
66  double norm1(const ColorSpinorField &b);
67  double norm2(const ColorSpinorField &a);
68 
69  double axpyReDot(double a, ColorSpinorField &x, ColorSpinorField &y);
70 
72 
73  double axpbyzNorm(double a, ColorSpinorField &x, double b, ColorSpinorField &y, ColorSpinorField &z);
74  inline double axpyNorm(double a, ColorSpinorField &x, ColorSpinorField &y) { return axpbyzNorm(a, x, 1.0, y, y); }
75  inline double xmyNorm(ColorSpinorField &x, ColorSpinorField &y) { return axpbyzNorm(1.0, x, -1.0, y, y); }
76 
79 
84  double3 a3 = cDotProductNormA(b, a);
85  return make_double3(a3.x, -a3.y, a3.z);
86  }
87 
90 
91  double caxpyNorm(const Complex &a, ColorSpinorField &x, ColorSpinorField &y);
92  double caxpyXmazNormX(const Complex &a, ColorSpinorField &x,
94  double cabxpyzAxNorm(double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z);
95 
97  ColorSpinorField &z);
101 
104 
107 
109  double doubleCG3UpdateNorm(double a, double b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z);
110 
111 
112  // multi-blas kernels - defined in multi_blas.cu
113 
127  void caxpy(const Complex *a, std::vector<ColorSpinorField*> &x, std::vector<ColorSpinorField*> &y);
128 
139  void caxpy(const Complex *a, ColorSpinorField &x, ColorSpinorField &y);
140 
153  void caxpy_U(const Complex *a, std::vector<ColorSpinorField*> &x, std::vector<ColorSpinorField*> &y);
154 
165  void caxpy_U(const Complex *a, ColorSpinorField &x, ColorSpinorField &y);
166 
179  void caxpy_L(const Complex *a, std::vector<ColorSpinorField*> &x, std::vector<ColorSpinorField*> &y);
180 
191  void caxpy_L(const Complex *a, ColorSpinorField &x, ColorSpinorField &y);
192 
208  void caxpyz(const Complex *a, std::vector<ColorSpinorField*> &x, std::vector<ColorSpinorField*> &y, std::vector<ColorSpinorField*> &z);
209 
222 
236  void caxpyz_U(const Complex *a, std::vector<ColorSpinorField*> &x, std::vector<ColorSpinorField*> &y, std::vector<ColorSpinorField*> &z);
237 
250 
264  void caxpyz_L(const Complex *a, std::vector<ColorSpinorField*> &x, std::vector<ColorSpinorField*> &y, std::vector<ColorSpinorField*> &z);
265 
278 
297  void axpyBzpcx(const double *a, std::vector<ColorSpinorField*> &x, std::vector<ColorSpinorField*> &y,
298  const double *b, ColorSpinorField &z, const double *c);
299 
317  void caxpyBxpz(const Complex *a_, std::vector<ColorSpinorField*> &x_, ColorSpinorField &y_,
318  const Complex *b_, ColorSpinorField &z_);
319 
320 
321  // multi-reduce kernels - defined in multi_reduce.cu
322 
323  void reDotProduct(double* result, std::vector<ColorSpinorField*>& a, std::vector<ColorSpinorField*>& b);
324 
332  void cDotProduct(Complex* result, std::vector<ColorSpinorField*>& a, std::vector<ColorSpinorField*>& b);
333 
344  void hDotProduct(Complex* result, std::vector<ColorSpinorField*>& a, std::vector<ColorSpinorField*>& b);
345 
358  void hDotProduct_Anorm(Complex* result, std::vector<ColorSpinorField*>& a, std::vector<ColorSpinorField*>& b);
359 
360 
369  void cDotProductCopy(Complex* result, std::vector<ColorSpinorField*>& a, std::vector<ColorSpinorField*>& b, std::vector<ColorSpinorField*>& c);
370 
371  } // namespace blas
372 
373 } // namespace quda
374 
375 #endif // _QUDA_BLAS_H
void ax(double a, ColorSpinorField &x)
Definition: blas_quda.cu:508
void caxpyz(const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z)
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.
void caxpyz_U(const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z)
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.
void caxpyXmazMR(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Definition: blas_quda.cu:603
void * getHostReduceBuffer()
Definition: reduce_quda.cu:28
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)
Definition: reduce_quda.cu:778
double caxpyNorm(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
Definition: reduce_quda.cu:746
void axpyZpbx(double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, double b)
Definition: blas_quda.cu:552
double quadrupleCG3InitNorm(double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)
Definition: reduce_quda.cu:838
void setParam(int kernel, int prec, int threads, int blocks)
void end(void)
Definition: blas_quda.cu:489
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:721
void init()
Definition: blas_quda.cu:483
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
Definition: reduce_quda.cu:764
void cDotProductCopy(Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b, std::vector< ColorSpinorField *> &c)
Computes the matrix of inner products between the vector set a and the vector set b...
void cabxpyAx(double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.cu:591
double3 xpyHeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &r)
Definition: reduce_quda.cu:818
void caxpbypczw(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, const Complex &c, ColorSpinorField &z, ColorSpinorField &w)
Definition: blas_quda.cu:528
double reDotProduct(ColorSpinorField &x, ColorSpinorField &y)
Definition: reduce_quda.cu:728
Complex axpyCGNorm(double a, ColorSpinorField &x, ColorSpinorField &y)
Definition: reduce_quda.cu:796
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
Definition: copy_quda.cu:355
void caxpy_U(const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y)
Compute the block "caxpy_U" with over the set of ColorSpinorFields. E.g., it computes.
void * getMappedHostReduceBuffer()
Definition: reduce_quda.cu:27
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:75
void caxpyBzpx(const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &)
Definition: blas_quda.cu:563
void caxpyBxpz(const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &)
Definition: blas_quda.cu:574
void doubleCG3Update(double a, double b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Definition: blas_quda.cu:631
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
Definition: blas_quda.h:37
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:35
void caxpbypzYmbw(const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, ColorSpinorField &, ColorSpinorField &)
Definition: blas_quda.cu:585
double4 quadrupleCGReduction(ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Definition: reduce_quda.cu:833
void axpyBzpcx(double a, ColorSpinorField &x, ColorSpinorField &y, double b, ColorSpinorField &z, double c)
Definition: blas_quda.cu:541
double cabxpyzAxNorm(double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Definition: reduce_quda.cu:758
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
Definition: reduce_quda.cu:809
void hDotProduct_Anorm(Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b)
Computes the matrix of inner products between the vector set a and the vector set b...
std::complex< double > Complex
Definition: quda_internal.h:46
double caxpyXmazNormX(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Definition: reduce_quda.cu:752
void tripleCGUpdate(double alpha, double beta, ColorSpinorField &q, ColorSpinorField &r, ColorSpinorField &x, ColorSpinorField &p)
Definition: blas_quda.cu:614
double axpyReDot(double a, ColorSpinorField &x, ColorSpinorField &y)
Definition: reduce_quda.cu:740
Complex caxpyDotzy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Definition: reduce_quda.cu:771
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.cu:512
void axpbyz(double a, ColorSpinorField &x, double b, ColorSpinorField &y, ColorSpinorField &z)
Definition: blas_quda.cu:496
void zero(ColorSpinorField &a)
Definition: blas_quda.cu:472
void doubleCG3Init(double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Definition: blas_quda.cu:626
void caxpy_L(const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y)
Compute the block "caxpy_L" with over the set of ColorSpinorFields. E.g., it computes.
double axpbyzNorm(double a, ColorSpinorField &x, double b, ColorSpinorField &y, ColorSpinorField &z)
Definition: reduce_quda.cu:734
double3 caxpbypzYmbwcDotProductUYNormY(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &u)
Definition: reduce_quda.cu:783
void caxpyz_L(const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z)
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.
double norm1(const ColorSpinorField &b)
Definition: reduce_quda.cu:714
void hDotProduct(Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b)
Computes the matrix of inner products between the vector set a and the vector set b...
void caxpyXmaz(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Definition: blas_quda.cu:597
void * getDeviceReduceBuffer()
Definition: reduce_quda.cu:26
double doubleCG3UpdateNorm(double a, double b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Definition: reduce_quda.cu:853
unsigned long long flops
Definition: blas_quda.cu:22
void xpy(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:33
void axpby(double a, ColorSpinorField &x, double b, ColorSpinorField &y)
Definition: blas_quda.h:36
void caxpby(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y)
Definition: blas_quda.cu:523
double axpyNorm(double a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:74
void mxpy(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:34
void cxpaypbz(ColorSpinorField &, const Complex &b, ColorSpinorField &y, const Complex &c, ColorSpinorField &z)
Definition: blas_quda.cu:535
double doubleCG3InitNorm(double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Definition: reduce_quda.cu:848
double3 cDotProductNormB(ColorSpinorField &a, ColorSpinorField &b)
Return (a,b) and ||b||^2 - implemented using cDotProductNormA.
Definition: blas_quda.h:83
QudaPrecision prec
Definition: test_util.cpp:1608
void xpayz(ColorSpinorField &x, double a, ColorSpinorField &y, ColorSpinorField &z)
Definition: blas_quda.h:38
unsigned long long bytes
Definition: blas_quda.cu:23
double3 tripleCGReduction(ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Definition: reduce_quda.cu:828
double quadrupleCG3UpdateNorm(double a, double b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)
Definition: reduce_quda.cu:843