|
QUDA
0.9.0
|
Namespaces | |
| copy_ns | |
| multi | |
| reduce | |
Classes | |
| struct | ax_ |
| struct | axpby_ |
| struct | axpy_ |
| struct | axpyBzpcx_ |
| struct | axpyCGNorm2 |
| struct | axpyNorm2 |
| struct | AxpyReDot |
| struct | axpyZpbx_ |
| struct | BlasFunctor |
| struct | cabxpyAx_ |
| struct | cabxpyaxnorm |
| struct | caxpby_ |
| struct | caxpbypczpw_ |
| struct | caxpbypz_ |
| struct | caxpbypzYmbw_ |
| struct | caxpbypzYmbwcDotProductUYNormY_ |
| struct | caxpy_ |
| struct | caxpyBxpz_ |
| struct | caxpyBzpx_ |
| struct | caxpydotzy |
| struct | caxpyNorm2 |
| struct | caxpyxmaz_ |
| struct | caxpyxmazMR_ |
| struct | caxpyxmaznormx |
| struct | Cdot |
| struct | CdotCopy |
| struct | CdotNormA |
| struct | CdotNormB |
| struct | cxpaypbz_ |
| struct | Dot |
| struct | DotNormA |
| struct | HeavyQuarkResidualNorm_ |
| struct | multi_axpyBzpcx_ |
| struct | multi_caxpyBxpz_ |
| struct | MultiBlasFunctor |
| struct | multicaxpy_ |
| struct | multicaxpyz_ |
| struct | MultiReduceFunctor |
| struct | mxpy_ |
| struct | Norm1 |
| struct | Norm2 |
| struct | ReduceFunctor |
| class | TileSizeTune |
| struct | tripleCGReduction_ |
| struct | tripleCGUpdate_ |
| struct | write |
| struct | xmyNorm2 |
| struct | xpaycdotzy |
| struct | xpayz_ |
| struct | xpy_ |
| struct | xpyHeavyQuarkResidualNorm_ |
Functions | |
| void | init () |
| void | end (void) |
| void * | getDeviceReduceBuffer () |
| void * | getMappedHostReduceBuffer () |
| void * | getHostReduceBuffer () |
| void | setParam (int kernel, int prec, int threads, int blocks) |
| double | norm2 (const ColorSpinorField &a) |
| double | norm1 (const ColorSpinorField &b) |
| void | zero (ColorSpinorField &a) |
| void | copy (ColorSpinorField &dst, const ColorSpinorField &src) |
| double | axpyNorm (const double &a, ColorSpinorField &x, ColorSpinorField &y) |
| double | axpyReDot (const double &a, ColorSpinorField &x, ColorSpinorField &y) |
| double | reDotProduct (ColorSpinorField &x, ColorSpinorField &y) |
| double2 | reDotProductNormA (ColorSpinorField &a, ColorSpinorField &b) |
| double | xmyNorm (ColorSpinorField &x, ColorSpinorField &y) |
| void | axpby (const double &a, ColorSpinorField &x, const double &b, ColorSpinorField &y) |
| void | axpy (const double &a, ColorSpinorField &x, ColorSpinorField &y) |
| void | ax (const double &a, ColorSpinorField &x) |
| void | xpy (ColorSpinorField &x, ColorSpinorField &y) |
| void | xpay (ColorSpinorField &x, const double &a, ColorSpinorField &y) |
| void | xpayz (ColorSpinorField &x, const double &a, ColorSpinorField &y, ColorSpinorField &z) |
| void | mxpy (ColorSpinorField &x, ColorSpinorField &y) |
| void | axpyZpbx (const double &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, const double &b) |
| void | axpyBzpcx (const double &a, ColorSpinorField &x, ColorSpinorField &y, const double &b, ColorSpinorField &z, const double &c) |
| void | caxpby (const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y) |
| void | caxpy (const Complex &a, ColorSpinorField &x, ColorSpinorField &y) |
| void | cxpaypbz (ColorSpinorField &, const Complex &b, ColorSpinorField &y, const Complex &c, ColorSpinorField &z) |
| void | caxpbypzYmbw (const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, ColorSpinorField &, ColorSpinorField &) |
| void | caxpyBzpx (const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &) |
| void | caxpyBxpz (const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &) |
| Complex | cDotProduct (ColorSpinorField &, ColorSpinorField &) |
| Complex | xpaycDotzy (ColorSpinorField &x, const double &a, ColorSpinorField &y, ColorSpinorField &z) |
| double3 | cDotProductNormA (ColorSpinorField &a, ColorSpinorField &b) |
| double3 | cDotProductNormB (ColorSpinorField &a, ColorSpinorField &b) |
| double3 | caxpbypzYmbwcDotProductUYNormY (const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &u) |
| void | cabxpyAx (const double &a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y) |
| double | caxpyNorm (const Complex &a, ColorSpinorField &x, ColorSpinorField &y) |
| void | caxpyXmaz (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
| void | caxpyXmazMR (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
| double | caxpyXmazNormX (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
| double | cabxpyAxNorm (const double &a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y) |
| void | caxpbypz (const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, ColorSpinorField &) |
| void | caxpbypczpw (const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, ColorSpinorField &) |
| Complex | caxpyDotzy (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
| Complex | axpyCGNorm (const double &a, ColorSpinorField &x, ColorSpinorField &y) |
| double3 | HeavyQuarkResidualNorm (ColorSpinorField &x, ColorSpinorField &r) |
| double3 | xpyHeavyQuarkResidualNorm (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &r) |
| void | tripleCGUpdate (const double &alpha, const double &beta, ColorSpinorField &q, ColorSpinorField &r, ColorSpinorField &x, ColorSpinorField &p) |
| double3 | tripleCGReduction (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
| double4 | quadrupleCGReduction (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
| void | caxpy (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y) |
| Compute the block "caxpy" with over the set of ColorSpinorFields. E.g., it computes. More... | |
| void | caxpy (const Complex *a, ColorSpinorField &x, ColorSpinorField &y) |
| This is a wrapper for calling the block "caxpy" with a composite ColorSpinorField. E.g., it computes. More... | |
| void | caxpy_U (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y) |
| Compute the block "caxpy_U" with over the set of ColorSpinorFields. E.g., it computes. More... | |
| void | caxpy_U (const Complex *a, ColorSpinorField &x, ColorSpinorField &y) |
| This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes. More... | |
| void | caxpy_L (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y) |
| Compute the block "caxpy_L" with over the set of ColorSpinorFields. E.g., it computes. More... | |
| void | caxpy_L (const Complex *a, ColorSpinorField &x, ColorSpinorField &y) |
| This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes. More... | |
| void | caxpyz (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z) |
| Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes. More... | |
| void | caxpyz (const Complex *a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
| This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes. More... | |
| void | caxpyz_U (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z) |
| Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes. More... | |
| void | caxpyz_U (const Complex *a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
| This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes. More... | |
| void | caxpyz_L (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z) |
| Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes. More... | |
| void | caxpyz_L (const Complex *a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
| This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes. More... | |
| void | axpyBzpcx (const double *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, const double *b, ColorSpinorField &z, const double *c) |
| Compute the vectorized "axpyBzpcx" with over the set of ColorSpinorFields, where the third vector, z, is constant over the batch. E.g., it computes. More... | |
| void | caxpyBxpz (const Complex *a_, std::vector< ColorSpinorField *> &x_, ColorSpinorField &y_, const Complex *b_, ColorSpinorField &z_) |
| Compute the vectorized "caxpyBxpz" over the set of ColorSpinorFields, where the second and third vector, y and z, is constant over the batch. E.g., it computes. More... | |
| void | reDotProduct (double *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b) |
| void | cDotProduct (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b) |
| Computes the matrix of inner products between the vector set a and the vector set b. More... | |
| void | hDotProduct (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b) |
| Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Requires a.size()==b.size(). More... | |
| void | hDotProduct_Anorm (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b) |
| Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Uniquely defined for cases like (p, Ap) where the output is Hermitian, but there's an A-norm instead of an L2 norm. Requires a.size()==b.size(). More... | |
| void | cDotProductCopy (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b, std::vector< ColorSpinorField *> &c) |
| Computes the matrix of inner products between the vector set a and the vector set b, and copies b into c. More... | |
| template<typename Float > | |
| void | axpby (const Float &a, const Float *x, const Float &b, Float *y, const int N) |
| void | axpbyCpu (const double &a, const cpuColorSpinorField &x, const double &b, cpuColorSpinorField &y) |
| void | xpyCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y) |
| void | axpyCpu (const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y) |
| void | xpayCpu (const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y) |
| void | mxpyCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y) |
| void | axCpu (const double &a, cpuColorSpinorField &x) |
| template<typename Float > | |
| void | caxpby (const std::complex< Float > &a, const std::complex< Float > *x, const std::complex< Float > &b, std::complex< Float > *y, int N) |
| void | caxpyCpu (const Complex &a, const cpuColorSpinorField &x, cpuColorSpinorField &y) |
| void | caxpbyCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y) |
| template<typename Float > | |
| void | caxpbypcz (const std::complex< Float > &a, const std::complex< Float > *x, const std::complex< Float > &b, const std::complex< Float > *y, const std::complex< Float > &c, std::complex< Float > *z, int N) |
| void | cxpaypbzCpu (const cpuColorSpinorField &x, const Complex &a, const cpuColorSpinorField &y, const Complex &b, cpuColorSpinorField &z) |
| void | axpyBzpcxCpu (const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const double &b, const cpuColorSpinorField &z, const double &c) |
| void | axpyZpbxCpu (const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const cpuColorSpinorField &z, const double &b) |
| void | caxpbypzYmbwCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z, const cpuColorSpinorField &w) |
| template<typename Float > | |
| double | norm (const Float *a, const int N) |
| double | normCpu (const cpuColorSpinorField &a) |
| double | axpyNormCpu (const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y) |
| template<typename Float > | |
| double | reDotProduct (const Float *a, const Float *b, const int N) |
| double | reDotProductCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b) |
| double | xmyNormCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y) |
| template<typename Float > | |
| Complex | cDotProduct (const std::complex< Float > *a, const std::complex< Float > *b, const int N) |
| Complex | cDotProductCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b) |
| Complex | xpaycDotzyCpu (const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y, const cpuColorSpinorField &z) |
| double3 | cDotProductNormACpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b) |
| double3 | cDotProductNormBCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b) |
| double3 | caxpbypzYmbwcDotProductUYNormYCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z, const cpuColorSpinorField &w, const cpuColorSpinorField &u) |
| void | cabxpyAxCpu (const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y) |
| double | caxpyNormCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y) |
| double | caxpyXmazNormXCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z) |
| void | caxpyXmazCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z) |
| double | cabxpyAxNormCpu (const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y) |
| void | caxpbypzCpu (const Complex &a, cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z) |
| void | caxpbypczpwCpu (const Complex &a, cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, const Complex &c, cpuColorSpinorField &z, cpuColorSpinorField &w) |
| Complex | caxpyDotzyCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z) |
| template<typename Float > | |
| double3 | HeavyQuarkResidualNorm (const Float *x, const Float *r, const int volume, const int Nint) |
| double3 | HeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &r) |
| double3 | HeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &r) |
| void | initReduce () |
| void | endReduce () |
| cudaStream_t * | getStream () |
| __device__ __host__ void | _caxpy (const float2 &a, const float4 &x, float4 &y) |
| __device__ __host__ void | _caxpy (const float2 &a, const float2 &x, float2 &y) |
| __device__ __host__ void | _caxpy (const double2 &a, const double2 &x, double2 &y) |
| __device__ __host__ void | _caxpby (const float2 &a, const float4 &x, const float2 &b, float4 &y) |
| __device__ __host__ void | _caxpby (const float2 &a, const float2 &x, const float2 &b, float2 &y) |
| __device__ __host__ void | _caxpby (const double2 &a, const double2 &x, const double2 &b, double2 &y) |
| __device__ __host__ void | _cxpaypbz (const float4 &x, const float2 &a, const float4 &y, const float2 &b, float4 &z) |
| __device__ __host__ void | _cxpaypbz (const float2 &x, const float2 &a, const float2 &y, const float2 &b, float2 &z) |
| __device__ __host__ void | _cxpaypbz (const double2 &x, const double2 &a, const double2 &y, const double2 &b, double2 &z) |
| void | caxpy_recurse (const Complex *a_, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, int i_idx, int j_idx, int upper) |
| void | caxpyz_recurse (const Complex *a_, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, int i, int j, int pass, int upper) |
| cudaEvent_t * | getReduceEvent () |
| template<typename ReduceType > | |
| __device__ __host__ void | dot_ (ReduceType &sum, const double2 &a, const double2 &b) |
| template<typename ReduceType > | |
| __device__ __host__ void | dot_ (ReduceType &sum, const float2 &a, const float2 &b) |
| template<typename ReduceType > | |
| __device__ __host__ void | dot_ (ReduceType &sum, const float4 &a, const float4 &b) |
| template<typename ReduceType > | |
| __device__ __host__ void | cdot_ (ReduceType &sum, const double2 &a, const double2 &b) |
| template<typename ReduceType > | |
| __device__ __host__ void | cdot_ (ReduceType &sum, const float2 &a, const float2 &b) |
| template<typename ReduceType > | |
| __device__ __host__ void | cdot_ (ReduceType &sum, const float4 &a, const float4 &b) |
| template<template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal > | |
| void | multiReduce_recurse (Complex *result, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, std::vector< ColorSpinorField *> &w, int i_idx, int j_idx, bool hermitian, unsigned int tile_size) |
| template<typename ReduceType > | |
| __device__ __host__ ReduceType | norm1_ (const double2 &a) |
| template<typename ReduceType > | |
| __device__ __host__ ReduceType | norm1_ (const float2 &a) |
| template<typename ReduceType > | |
| __device__ __host__ ReduceType | norm1_ (const float4 &a) |
| template<typename ReduceType > | |
| __device__ __host__ void | norm2_ (ReduceType &sum, const double2 &a) |
| template<typename ReduceType > | |
| __device__ __host__ void | norm2_ (ReduceType &sum, const float2 &a) |
| template<typename ReduceType > | |
| __device__ __host__ void | norm2_ (ReduceType &sum, const float4 &a) |
| template<typename ReduceType , typename InputType > | |
| __device__ __host__ ReduceType | dotNormA_ (const InputType &a, const InputType &b) |
| __device__ __host__ void | Caxpy_ (const double2 &a, const double2 &x, double2 &y) |
| __device__ __host__ void | Caxpy_ (const float2 &a, const float2 &x, float2 &y) |
| __device__ __host__ void | Caxpy_ (const float2 &a, const float4 &x, float4 &y) |
| template<typename ReduceType , typename InputType > | |
| __device__ __host__ void | cdotNormA_ (ReduceType &sum, const InputType &a, const InputType &b) |
| template<typename ReduceType , typename InputType > | |
| __device__ __host__ void | cdotNormB_ (ReduceType &sum, const InputType &a, const InputType &b) |
Variables | |
| unsigned long long | flops |
| unsigned long long | bytes |
| static cudaStream_t * | blasStream |
| struct { | |
| const char * quda::blas::vol_str | |
| const char * quda::blas::aux_str | |
| char quda::blas::aux_tmp [TuneKey::aux_n] | |
| } | blasStrings |
| struct { | |
| const char * quda::blas::vol_str | |
| const char * quda::blas::aux_str | |
| char quda::blas::aux_tmp [TuneKey::aux_n] | |
| } | blasStrings |
| __device__ __host__ void quda::blas::_caxpby | ( | const float2 & | a, |
| const float4 & | x, | ||
| const float2 & | b, | ||
| float4 & | y | ||
| ) |
Functor to perform the operation y = a*x + b*y (complex-valued)
Definition at line 261 of file blas_quda.cu.
Referenced by quda::blas::caxpby_< Float2, FloatN >::operator()().

| __device__ __host__ void quda::blas::_caxpby | ( | const float2 & | a, |
| const float2 & | x, | ||
| const float2 & | b, | ||
| float2 & | y | ||
| ) |
| __device__ __host__ void quda::blas::_caxpby | ( | const double2 & | a, |
| const double2 & | x, | ||
| const double2 & | b, | ||
| double2 & | y | ||
| ) |
|
inline |
Functor to perform the operation y += a * x (complex-valued)
Definition at line 219 of file blas_quda.cu.
Referenced by quda::blas::multicaxpy_< NXZ, Float2, FloatN >::operator()(), quda::blas::caxpy_< Float2, FloatN >::operator()(), quda::blas::multicaxpyz_< NXZ, Float2, FloatN >::operator()(), quda::blas::caxpyBzpx_< Float2, FloatN >::operator()(), quda::blas::caxpyBxpz_< Float2, FloatN >::operator()(), quda::blas::caxpbypzYmbw_< Float2, FloatN >::operator()(), quda::blas::cabxpyAx_< Float2, FloatN >::operator()(), quda::blas::caxpbypz_< Float2, FloatN >::operator()(), quda::blas::caxpbypczpw_< Float2, FloatN >::operator()(), quda::blas::caxpyxmaz_< Float2, FloatN >::operator()(), quda::blas::caxpyxmazMR_< Float2, FloatN >::operator()(), and quda::blas::multi_caxpyBxpz_< NXZ, Float2, FloatN >::operator()().

|
inline |
Definition at line 226 of file blas_quda.cu.
|
inline |
Definition at line 231 of file blas_quda.cu.
| __device__ __host__ void quda::blas::_cxpaypbz | ( | const float4 & | x, |
| const float2 & | a, | ||
| const float4 & | y, | ||
| const float2 & | b, | ||
| float4 & | z | ||
| ) |
Functor to performs the operation z[i] = x[i] + a*y[i] + b*z[i]
Definition at line 301 of file blas_quda.cu.
Referenced by quda::blas::cxpaypbz_< Float2, FloatN >::operator()().

| __device__ __host__ void quda::blas::_cxpaypbz | ( | const float2 & | x, |
| const float2 & | a, | ||
| const float2 & | y, | ||
| const float2 & | b, | ||
| float2 & | z | ||
| ) |
| __device__ __host__ void quda::blas::_cxpaypbz | ( | const double2 & | x, |
| const double2 & | a, | ||
| const double2 & | y, | ||
| const double2 & | b, | ||
| double2 & | z | ||
| ) |
| void quda::blas::ax | ( | const double & | a, |
| ColorSpinorField & | x | ||
| ) |
Definition at line 209 of file blas_quda.cu.
Referenced by quda::ax(), benchmark(), cloverQuda(), dslashQuda(), dslashQuda_4dpc(), dslashQuda_mdwf(), quda::GMResDR::FlexArnoldiProcedure(), quda::Deflation::increment(), invertMultiShiftQuda(), invertMultiSrcQuda(), invertQuda(), main(), quda::massRescale(), MatDagMatQuda(), MatQuda(), quda::MPCG::operator()(), quda::MR::operator()(), quda::MinResExt::operator()(), quda::GMResDR::RestartVZH(), test(), and quda::IncEigCG::UpdateVm().

| void quda::blas::axCpu | ( | const double & | a, |
| cpuColorSpinorField & | x | ||
| ) |
Definition at line 61 of file blas_cpu.cpp.
References a, axpby(), errorQuda, f, float, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and x.
Referenced by cabxpyAxCpu(), and cabxpyAxNormCpu().


| void quda::blas::axpby | ( | const Float & | a, |
| const Float * | x, | ||
| const Float & | b, | ||
| Float * | y, | ||
| const int | N | ||
| ) |
Definition at line 9 of file blas_cpu.cpp.
References a, b, fused_exterior_ndeg_tm_dslash_cuda_gen::i, x, and y.
| void quda::blas::axpby | ( | const double & | a, |
| ColorSpinorField & | x, | ||
| const double & | b, | ||
| ColorSpinorField & | y | ||
| ) |
Definition at line 106 of file blas_quda.cu.
Referenced by axCpu(), axpbyCpu(), axpyCpu(), benchmark(), mxpyCpu(), quda::MR::operator()(), quda::MultiShiftCG::operator()(), test(), xpayCpu(), and xpyCpu().

| void quda::blas::axpbyCpu | ( | const double & | a, |
| const cpuColorSpinorField & | x, | ||
| const double & | b, | ||
| cpuColorSpinorField & | y | ||
| ) |
Definition at line 13 of file blas_cpu.cpp.
References a, axpby(), b, errorQuda, float, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, and y.
Referenced by axpyBzpcxCpu().


| void quda::blas::axpy | ( | const double & | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y | ||
| ) |
Definition at line 150 of file blas_quda.cu.
Referenced by benchmark(), quda::DiracMobius::M(), main(), quda::MPCG::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::MultiShiftCG::operator()(), quda::GMResDR::operator()(), quda::DiracMdagM::operator()(), quda::DiracMMdag::operator()(), and test().

| void quda::blas::axpyBzpcx | ( | const double & | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y, | ||
| const double & | b, | ||
| ColorSpinorField & | z, | ||
| const double & | c | ||
| ) |
Definition at line 356 of file blas_quda.cu.
References a, b, c, x, y, and z.
Referenced by quda::ShiftUpdate::apply(), axpyBzpcx(), benchmark(), and test().

| void quda::blas::axpyBzpcx | ( | const double * | a, |
| std::vector< ColorSpinorField *> & | x, | ||
| std::vector< ColorSpinorField *> & | y, | ||
| const double * | b, | ||
| ColorSpinorField & | z, | ||
| const double * | c | ||
| ) |
Compute the vectorized "axpyBzpcx" with over the set of ColorSpinorFields, where the third vector, z, is constant over the batch. E.g., it computes.
y = a * x + y x = b * z + c * x
The dimensions of a, b, c are the same as the size of x and y, with a maximum size of 16.
| a[in] | Array of coefficients |
| b[in] | Array of coefficients |
| c[in] | Array of coefficients |
| x[in,out] | vector of ColorSpinorFields |
| y[in,out] | vector of ColorSpinorFields |
| z[in] | input ColorSpinorField |
Definition at line 718 of file multi_blas_quda.cu.
References a, fused_exterior_ndeg_tm_dslash_cuda_gen::a1, axpyBzpcx(), b, c, MAX_MULTI_BLAS_N, w, x, y, y0(), and y1().

| void quda::blas::axpyBzpcxCpu | ( | const double & | a, |
| cpuColorSpinorField & | x, | ||
| cpuColorSpinorField & | y, | ||
| const double & | b, | ||
| const cpuColorSpinorField & | z, | ||
| const double & | c | ||
| ) |
Definition at line 130 of file blas_cpu.cpp.
References a, axpbyCpu(), axpyCpu(), b, c, x, y, and z.

| Complex quda::blas::axpyCGNorm | ( | const double & | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y | ||
| ) |
Definition at line 654 of file reduce_quda.cu.
Referenced by quda::CG::operator()(), quda::PreconCG::operator()(), and quda::MultiShiftCG::operator()().

| void quda::blas::axpyCpu | ( | const double & | a, |
| const cpuColorSpinorField & | x, | ||
| cpuColorSpinorField & | y | ||
| ) |
Definition at line 32 of file blas_cpu.cpp.
References a, axpby(), errorQuda, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, and y.
Referenced by axpyBzpcxCpu(), axpyNormCpu(), and axpyZpbxCpu().


| double quda::blas::axpyNorm | ( | const double & | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y | ||
| ) |
Definition at line 325 of file reduce_quda.cu.
Referenced by benchmark(), quda::IncEigCG::eigCGsolve(), quda::CG::operator()(), and test().

| double quda::blas::axpyNormCpu | ( | const double & | a, |
| const cpuColorSpinorField & | x, | ||
| cpuColorSpinorField & | y | ||
| ) |
| double quda::blas::axpyReDot | ( | const double & | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y | ||
| ) |
Definition at line 345 of file reduce_quda.cu.
Referenced by benchmark(), quda::MultiShiftCG::operator()(), and test().

| void quda::blas::axpyZpbx | ( | const double & | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z, | ||
| const double & | b | ||
| ) |
Definition at line 384 of file blas_quda.cu.
Referenced by benchmark(), quda::IncEigCG::eigCGsolve(), quda::CG::operator()(), quda::PreconCG::operator()(), quda::MultiShiftCG::operator()(), and test().

| void quda::blas::axpyZpbxCpu | ( | const double & | a, |
| cpuColorSpinorField & | x, | ||
| cpuColorSpinorField & | y, | ||
| const cpuColorSpinorField & | z, | ||
| const double & | b | ||
| ) |
| void quda::blas::cabxpyAx | ( | const double & | a, |
| const Complex & | b, | ||
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y | ||
| ) |
Definition at line 484 of file blas_quda.cu.
References a, b, IMAG, REAL, x, and y.
Referenced by benchmark(), and test().

| void quda::blas::cabxpyAxCpu | ( | const double & | a, |
| const Complex & | b, | ||
| cpuColorSpinorField & | x, | ||
| cpuColorSpinorField & | y | ||
| ) |
Definition at line 260 of file blas_cpu.cpp.
References a, axCpu(), b, caxpyCpu(), x, and y.

| double quda::blas::cabxpyAxNorm | ( | const double & | a, |
| const Complex & | b, | ||
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y | ||
| ) |
Definition at line 449 of file reduce_quda.cu.
References a, b, IMAG, REAL, x, and y.
Referenced by benchmark(), quda::GCR::operator()(), and test().

| double quda::blas::cabxpyAxNormCpu | ( | const double & | a, |
| const Complex & | b, | ||
| cpuColorSpinorField & | x, | ||
| cpuColorSpinorField & | y | ||
| ) |
Definition at line 284 of file blas_cpu.cpp.
References a, axCpu(), b, caxpyCpu(), norm2(), x, and y.

| void quda::blas::caxpby | ( | const Complex & | a, |
| ColorSpinorField & | x, | ||
| const Complex & | b, | ||
| ColorSpinorField & | y | ||
| ) |
Definition at line 292 of file blas_quda.cu.
References a, b, IMAG, REAL, x, and y.
Referenced by quda::BiCGstabLUpdate::apply(), benchmark(), caxpbyCpu(), caxpyCpu(), quda::BiCGstabL::operator()(), and test().

| void quda::blas::caxpby | ( | const std::complex< Float > & | a, |
| const std::complex< Float > * | x, | ||
| const std::complex< Float > & | b, | ||
| std::complex< Float > * | y, | ||
| int | N | ||
| ) |
Definition at line 71 of file blas_cpu.cpp.
References a, b, fused_exterior_ndeg_tm_dslash_cuda_gen::i, x, and y.
| void quda::blas::caxpbyCpu | ( | const Complex & | a, |
| const cpuColorSpinorField & | x, | ||
| const Complex & | b, | ||
| cpuColorSpinorField & | y | ||
| ) |
Definition at line 93 of file blas_cpu.cpp.
References a, b, caxpby(), errorQuda, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, and y.

| void quda::blas::caxpbypcz | ( | const std::complex< Float > & | a, |
| const std::complex< Float > * | x, | ||
| const std::complex< Float > & | b, | ||
| const std::complex< Float > * | y, | ||
| const std::complex< Float > & | c, | ||
| std::complex< Float > * | z, | ||
| int | N | ||
| ) |
Definition at line 106 of file blas_cpu.cpp.
References a, b, c, fused_exterior_ndeg_tm_dslash_cuda_gen::i, x, y, and z.
Referenced by caxpbypzYmbwCpu(), and cxpaypbzCpu().

| void quda::blas::caxpbypczpw | ( | const Complex & | a, |
| ColorSpinorField & | x, | ||
| const Complex & | b, | ||
| ColorSpinorField & | y, | ||
| const Complex & | c, | ||
| ColorSpinorField & | z, | ||
| ColorSpinorField & | w | ||
| ) |
| void quda::blas::caxpbypczpwCpu | ( | const Complex & | a, |
| cpuColorSpinorField & | x, | ||
| const Complex & | b, | ||
| cpuColorSpinorField & | y, | ||
| const Complex & | c, | ||
| cpuColorSpinorField & | z, | ||
| cpuColorSpinorField & | w | ||
| ) |
Definition at line 296 of file blas_cpu.cpp.
References a, b, c, caxpyCpu(), w, x, y, and z.

| void quda::blas::caxpbypz | ( | const Complex & | a, |
| ColorSpinorField & | x, | ||
| const Complex & | b, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z | ||
| ) |
| void quda::blas::caxpbypzCpu | ( | const Complex & | a, |
| cpuColorSpinorField & | x, | ||
| const Complex & | b, | ||
| cpuColorSpinorField & | y, | ||
| cpuColorSpinorField & | z | ||
| ) |
Definition at line 290 of file blas_cpu.cpp.
References a, b, caxpyCpu(), x, y, and z.

| void quda::blas::caxpbypzYmbw | ( | const Complex & | a, |
| ColorSpinorField & | x, | ||
| const Complex & | b, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z, | ||
| ColorSpinorField & | w | ||
| ) |
Definition at line 464 of file blas_quda.cu.
References a, b, IMAG, REAL, w, x, y, and z.
Referenced by benchmark(), quda::BiCGstab::operator()(), and test().

| double3 quda::blas::caxpbypzYmbwcDotProductUYNormY | ( | const Complex & | a, |
| ColorSpinorField & | x, | ||
| const Complex & | b, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z, | ||
| ColorSpinorField & | w, | ||
| ColorSpinorField & | u | ||
| ) |
Definition at line 619 of file reduce_quda.cu.
References a, b, IMAG, REAL, w, x, y, and z.
Referenced by benchmark(), quda::BiCGstab::operator()(), and test().

| double3 quda::blas::caxpbypzYmbwcDotProductUYNormYCpu | ( | const Complex & | a, |
| const cpuColorSpinorField & | x, | ||
| const Complex & | b, | ||
| cpuColorSpinorField & | y, | ||
| cpuColorSpinorField & | z, | ||
| const cpuColorSpinorField & | w, | ||
| const cpuColorSpinorField & | u | ||
| ) |
Definition at line 251 of file blas_cpu.cpp.
References a, b, caxpbypzYmbwCpu(), cDotProductNormBCpu(), w, x, y, and z.

| void quda::blas::caxpbypzYmbwCpu | ( | const Complex & | a, |
| const cpuColorSpinorField & | x, | ||
| const Complex & | b, | ||
| cpuColorSpinorField & | y, | ||
| cpuColorSpinorField & | z, | ||
| const cpuColorSpinorField & | w | ||
| ) |
Definition at line 144 of file blas_cpu.cpp.
References a, b, caxpbypcz(), caxpyCpu(), errorQuda, f, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, w, x, y, and z.
Referenced by caxpbypzYmbwcDotProductUYNormYCpu().


| void quda::blas::caxpy | ( | const Complex & | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y | ||
| ) |
Definition at line 246 of file blas_quda.cu.
Referenced by quda::BiCGstabLUpdate::apply(), benchmark(), caxpy(), caxpyz_recurse(), quda::GMResDR::FlexArnoldiProcedure(), quda::MG::generateNullVectors(), quda::Eig_Solver::GrandSchm_test(), quda::Deflation::increment(), quda::Deflation::operator()(), quda::CG::operator()(), quda::BiCGstab::operator()(), quda::MPBiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::MultiShiftCG::operator()(), quda::MinResExt::operator()(), quda::orthoDir(), quda::BiCGstabL::orthoDir(), quda::Deflation::reduce(), quda::IncEigCG::RestartVT(), quda::GMResDR::RestartVZH(), quda::CG::solve(), test(), quda::updateAp(), quda::BiCGstabL::updateR(), quda::updateSolution(), quda::GMResDR::UpdateSolution(), quda::BiCGstabL::updateUend(), and quda::Deflation::verify().

| void quda::blas::caxpy | ( | const Complex * | a, |
| std::vector< ColorSpinorField *> & | x, | ||
| std::vector< ColorSpinorField *> & | y | ||
| ) |
Compute the block "caxpy" with over the set of ColorSpinorFields. E.g., it computes.
y = x * a + y
The dimensions of a can be rectangular, e.g., the width of x and y need not be same.
| a[in] | Matrix of coefficients |
| x[in] | vector of input ColorSpinorFields |
| y[in,out] | vector of input/output ColorSpinorFields |
Definition at line 351 of file multi_blas_quda.cu.
References caxpy_recurse(), x, and y.

| void quda::blas::caxpy | ( | const Complex * | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y | ||
| ) |
This is a wrapper for calling the block "caxpy" with a composite ColorSpinorField. E.g., it computes.
y = x * a + y
| a[in] | Matrix of coefficients |
| x[in] | Input matrix |
| y[in,out] | Computed output matrix |
Definition at line 382 of file multi_blas_quda.cu.
References a, caxpy(), x, and y.

| __device__ __host__ void quda::blas::Caxpy_ | ( | const double2 & | a, |
| const double2 & | x, | ||
| double2 & | y | ||
| ) |
Functor to perform the operation y += a * x (complex-valued)
Definition at line 373 of file reduce_quda.cu.
Referenced by quda::blas::caxpyNorm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpyxmaznormx< ReduceType, Float2, FloatN >::operator()(), quda::blas::cabxpyaxnorm< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpydotzy< ReduceType, Float2, FloatN >::operator()(), and quda::blas::caxpbypzYmbwcDotProductUYNormY_< ReduceType, Float2, FloatN >::operator()().

| __device__ __host__ void quda::blas::Caxpy_ | ( | const float2 & | a, |
| const float2 & | x, | ||
| float2 & | y | ||
| ) |
Definition at line 377 of file reduce_quda.cu.
| __device__ __host__ void quda::blas::Caxpy_ | ( | const float2 & | a, |
| const float4 & | x, | ||
| float4 & | y | ||
| ) |
Definition at line 381 of file reduce_quda.cu.
| void quda::blas::caxpy_L | ( | const Complex * | a, |
| std::vector< ColorSpinorField *> & | x, | ||
| std::vector< ColorSpinorField *> & | y | ||
| ) |
Compute the block "caxpy_L" with over the set of ColorSpinorFields. E.g., it computes.
y = x * a + y
Where 'a' must be a square, lower triangular matrix.
| a[in] | Matrix of coefficients |
| x[in] | vector of input ColorSpinorFields |
| y[in,out] | vector of input/output ColorSpinorFields |
Definition at line 369 of file multi_blas_quda.cu.
References caxpy_recurse(), errorQuda, x, and y.
Referenced by caxpy_L().


| void quda::blas::caxpy_L | ( | const Complex * | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y | ||
| ) |
This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes.
y = x * a + y
| a[in] | Matrix of coefficients |
| x[in] | Input matrix |
| y[in,out] | Computed output matrix |
Definition at line 386 of file multi_blas_quda.cu.
References a, caxpy_L(), x, and y.

| void quda::blas::caxpy_recurse | ( | const Complex * | a_, |
| std::vector< ColorSpinorField *> & | x, | ||
| std::vector< ColorSpinorField *> & | y, | ||
| int | i_idx, | ||
| int | j_idx, | ||
| int | upper | ||
| ) |
Definition at line 117 of file multi_blas_quda.cu.
References a, fused_exterior_ndeg_tm_dslash_cuda_gen::a1, b, c, quda::count, fused_exterior_ndeg_tm_dslash_cuda_gen::i, MAX_MULTI_BLAS_N, x, y, y0(), and y1().
Referenced by caxpy(), caxpy_L(), and caxpy_U().


| void quda::blas::caxpy_U | ( | const Complex * | a, |
| std::vector< ColorSpinorField *> & | x, | ||
| std::vector< ColorSpinorField *> & | y | ||
| ) |
Compute the block "caxpy_U" with over the set of ColorSpinorFields. E.g., it computes.
y = x * a + y
Where 'a' must be a square, upper triangular matrix.
| a[in] | Matrix of coefficients |
| x[in] | vector of input ColorSpinorFields |
| y[in,out] | vector of input/output ColorSpinorFields |
Definition at line 357 of file multi_blas_quda.cu.
References caxpy_recurse(), errorQuda, x, and y.
Referenced by caxpy_U().


| void quda::blas::caxpy_U | ( | const Complex * | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y | ||
| ) |
This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes.
y = x * a + y
| a[in] | Matrix of coefficients |
| x[in] | Input matrix |
| y[in,out] | Computed output matrix |
Definition at line 384 of file multi_blas_quda.cu.
References a, caxpy_U(), x, and y.

| void quda::blas::caxpyBxpz | ( | const Complex & | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y, | ||
| const Complex & | b, | ||
| ColorSpinorField & | z | ||
| ) |
Definition at line 438 of file blas_quda.cu.
References a, b, IMAG, REAL, x, y, and z.
Referenced by benchmark(), caxpyBxpz(), test(), and quda::BiCGstabL::updateXRend().

| void quda::blas::caxpyBxpz | ( | const Complex * | a_, |
| std::vector< ColorSpinorField *> & | x_, | ||
| ColorSpinorField & | y_, | ||
| const Complex * | b_, | ||
| ColorSpinorField & | z_ | ||
| ) |
Compute the vectorized "caxpyBxpz" over the set of ColorSpinorFields, where the second and third vector, y and z, is constant over the batch. E.g., it computes.
y = a * x + y z = b * x + z
The dimensions of a, b are the same as the size of x, with a maximum size of 16.
| a[in] | Array of coefficients |
| b[in] | Array of coefficients |
| x[in] | vector of ColorSpinorFields |
| y[in,out] | input ColorSpinorField |
| z[in,out] | input ColorSpinorField |
Definition at line 791 of file multi_blas_quda.cu.
References a, fused_exterior_ndeg_tm_dslash_cuda_gen::a1, b, c, caxpyBxpz(), MAX_MULTI_BLAS_N, w, x, xsize, and y.

| void quda::blas::caxpyBzpx | ( | const Complex & | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y, | ||
| const Complex & | b, | ||
| ColorSpinorField & | z | ||
| ) |
| void quda::blas::caxpyCpu | ( | const Complex & | a, |
| const cpuColorSpinorField & | x, | ||
| cpuColorSpinorField & | y | ||
| ) |
Definition at line 80 of file blas_cpu.cpp.
References a, caxpby(), errorQuda, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, and y.
Referenced by cabxpyAxCpu(), cabxpyAxNormCpu(), caxpbypczpwCpu(), caxpbypzCpu(), caxpbypzYmbwCpu(), caxpyDotzyCpu(), caxpyNormCpu(), caxpyXmazCpu(), and caxpyXmazNormXCpu().


| Complex quda::blas::caxpyDotzy | ( | const Complex & | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z | ||
| ) |
Definition at line 544 of file reduce_quda.cu.
References a, IMAG, REAL, x, y, and z.
Referenced by benchmark(), quda::orthoDir(), quda::BiCGstabL::orthoDir(), and test().

| Complex quda::blas::caxpyDotzyCpu | ( | const Complex & | a, |
| cpuColorSpinorField & | x, | ||
| cpuColorSpinorField & | y, | ||
| cpuColorSpinorField & | z | ||
| ) |
Definition at line 304 of file blas_cpu.cpp.
References a, caxpyCpu(), cDotProductCpu(), x, y, and z.

| double quda::blas::caxpyNorm | ( | const Complex & | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y | ||
| ) |
Definition at line 402 of file reduce_quda.cu.
References a, IMAG, REAL, x, and y.
Referenced by benchmark(), and test().

| double quda::blas::caxpyNormCpu | ( | const Complex & | a, |
| cpuColorSpinorField & | x, | ||
| cpuColorSpinorField & | y | ||
| ) |
Definition at line 265 of file blas_cpu.cpp.
References a, caxpyCpu(), norm2(), x, and y.

| void quda::blas::caxpyXmaz | ( | const Complex & | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z | ||
| ) |
Definition at line 549 of file blas_quda.cu.
References a, IMAG, REAL, x, y, and z.
Referenced by benchmark(), quda::MR::operator()(), and test().

| void quda::blas::caxpyXmazCpu | ( | const Complex & | a, |
| cpuColorSpinorField & | x, | ||
| cpuColorSpinorField & | y, | ||
| cpuColorSpinorField & | z | ||
| ) |
Definition at line 278 of file blas_cpu.cpp.
References a, caxpyCpu(), x, y, and z.

| void quda::blas::caxpyXmazMR | ( | const Complex & | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z | ||
| ) |
Definition at line 583 of file blas_quda.cu.
References a, commAsyncReduction(), errorQuda, IMAG, QUDA_CPU_FIELD_LOCATION, REAL, x, y, and z.
Referenced by quda::MR::operator()().


| double quda::blas::caxpyXmazNormX | ( | const Complex & | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z | ||
| ) |
Definition at line 424 of file reduce_quda.cu.
References a, IMAG, REAL, x, y, and z.
Referenced by benchmark(), and test().

| double quda::blas::caxpyXmazNormXCpu | ( | const Complex & | a, |
| cpuColorSpinorField & | x, | ||
| cpuColorSpinorField & | y, | ||
| cpuColorSpinorField & | z | ||
| ) |
Definition at line 271 of file blas_cpu.cpp.
References a, caxpyCpu(), norm2(), x, y, and z.

| void quda::blas::caxpyz | ( | const Complex * | a, |
| std::vector< ColorSpinorField *> & | x, | ||
| std::vector< ColorSpinorField *> & | y, | ||
| std::vector< ColorSpinorField *> & | z | ||
| ) |
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.
z = x * a + y
The dimensions of a can be rectangular, e.g., the width of x and y need not be same, though the maximum width for both is 16.
| a[in] | Matrix of coefficients |
| x[in] | vector of input ColorSpinorFields |
| y[in] | vector of input ColorSpinorFields |
| z[out] | vector of output ColorSpinorFields |
Definition at line 659 of file multi_blas_quda.cu.
References a, caxpyz_recurse(), x, y, and z.
Referenced by caxpyz().


| void quda::blas::caxpyz | ( | const Complex * | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z | ||
| ) |
This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes.
z = x * a + y
| a[in] | Matrix of coefficients |
| x[in] | Input matrix |
| y[in] | Computed output matrix |
| z[out] | vector of input/output ColorSpinorFields |
Definition at line 683 of file multi_blas_quda.cu.
References a, caxpyz(), x, y, and z.

| void quda::blas::caxpyz_L | ( | const Complex * | a, |
| std::vector< ColorSpinorField *> & | x, | ||
| std::vector< ColorSpinorField *> & | y, | ||
| std::vector< ColorSpinorField *> & | z | ||
| ) |
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.
z = x * a + y
Where 'a' is assumed to be lower triangular
| a[in] | Matrix of coefficients |
| x[in] | vector of input ColorSpinorFields |
| y[in] | vector of input ColorSpinorFields |
| z[out] | vector of output ColorSpinorFields |
Definition at line 674 of file multi_blas_quda.cu.
References a, caxpyz_recurse(), x, y, and z.
Referenced by caxpyz_L().


| void quda::blas::caxpyz_L | ( | const Complex * | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z | ||
| ) |
This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes.
z = x * a + y
| a[in] | Matrix of coefficients |
| x[in] | Input matrix |
| y[in] | Computed output matrix |
| z[out] | vector of input/output ColorSpinorFields |
Definition at line 691 of file multi_blas_quda.cu.
References a, caxpyz_L(), x, y, and z.

| void quda::blas::caxpyz_recurse | ( | const Complex * | a_, |
| std::vector< ColorSpinorField *> & | x, | ||
| std::vector< ColorSpinorField *> & | y, | ||
| std::vector< ColorSpinorField *> & | z, | ||
| int | i, | ||
| int | j, | ||
| int | pass, | ||
| int | upper | ||
| ) |
Definition at line 416 of file multi_blas_quda.cu.
References a, fused_exterior_ndeg_tm_dslash_cuda_gen::a1, b, c, caxpy(), quda::count, fused_exterior_ndeg_tm_dslash_cuda_gen::i, MAX_MULTI_BLAS_N, x, y, y0(), y1(), and z.
Referenced by caxpyz(), caxpyz_L(), and caxpyz_U().


| void quda::blas::caxpyz_U | ( | const Complex * | a, |
| std::vector< ColorSpinorField *> & | x, | ||
| std::vector< ColorSpinorField *> & | y, | ||
| std::vector< ColorSpinorField *> & | z | ||
| ) |
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.
z = x * a + y
Where 'a' is assumed to be upper triangular.
| a[in] | Matrix of coefficients |
| x[in] | vector of input ColorSpinorFields |
| y[in] | vector of input ColorSpinorFields |
| z[out] | vector of output ColorSpinorFields |
Definition at line 666 of file multi_blas_quda.cu.
References a, caxpyz_recurse(), x, y, and z.
Referenced by caxpyz_U().


| void quda::blas::caxpyz_U | ( | const Complex * | a, |
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z | ||
| ) |
This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes.
z = x * a + y
| a[in] | Matrix of coefficients |
| x[in] | Input matrix |
| y[in] | Computed output matrix |
| z[out] | vector of input/output ColorSpinorFields |
Definition at line 687 of file multi_blas_quda.cu.
References a, caxpyz_U(), x, y, and z.

| __device__ __host__ void quda::blas::cdot_ | ( | ReduceType & | sum, |
| const double2 & | a, | ||
| const double2 & | b | ||
| ) |
Returns complex-valued dot product of x and y
Definition at line 226 of file multi_reduce_quda.cu.

| __device__ __host__ void quda::blas::cdot_ | ( | ReduceType & | sum, |
| const float2 & | a, | ||
| const float2 & | b | ||
| ) |
| __device__ __host__ void quda::blas::cdot_ | ( | ReduceType & | sum, |
| const float4 & | a, | ||
| const float4 & | b | ||
| ) |
| __device__ __host__ void quda::blas::cdotNormA_ | ( | ReduceType & | sum, |
| const InputType & | a, | ||
| const InputType & | b | ||
| ) |
First returns the dot product (x,y) Returns the norm of x
Definition at line 556 of file reduce_quda.cu.

| __device__ __host__ void quda::blas::cdotNormB_ | ( | ReduceType & | sum, |
| const InputType & | a, | ||
| const InputType & | b | ||
| ) |
First returns the dot product (x,y) Returns the norm of y
Definition at line 583 of file reduce_quda.cu.

| Complex quda::blas::cDotProduct | ( | ColorSpinorField & | x, |
| ColorSpinorField & | y | ||
| ) |
Definition at line 500 of file reduce_quda.cu.
Referenced by benchmark(), cDotProductCpu(), quda::computeBeta(), quda::BiCGstabL::computeTau(), quda::GMResDR::FlexArnoldiProcedure(), quda::MG::generateNullVectors(), quda::Eig_Solver::GrandSchm_test(), quda::Deflation::increment(), invertMultiShiftQuda(), invertQuda(), quda::Deflation::operator()(), quda::CG::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::MultiShiftCG::operator()(), quda::MinResExt::operator()(), quda::GMResDR::operator()(), quda::orthoDir(), quda::BiCGstabL::orthoDir(), quda::EigCGArgs::RestartLanczos(), quda::GMResDR::RestartVZH(), quda::solve(), quda::CG::solve(), test(), and quda::MG::verify().

| Complex quda::blas::cDotProduct | ( | const std::complex< Float > * | a, |
| const std::complex< Float > * | b, | ||
| const int | N | ||
| ) |
Definition at line 212 of file blas_cpu.cpp.
References a, b, quda::conj(), dot(), and fused_exterior_ndeg_tm_dslash_cuda_gen::i.

| void quda::blas::cDotProduct | ( | Complex * | result, |
| std::vector< ColorSpinorField *> & | a, | ||
| std::vector< ColorSpinorField *> & | b | ||
| ) |
Computes the matrix of inner products between the vector set a and the vector set b.
| result[out] | Matrix of inner product result[i][j] = (a[j],b[i]) |
| a[in] | set of input ColorSpinorFields |
| b[in] | set of input ColorSpinorFields |
Definition at line 594 of file multi_reduce_quda.cu.
References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), errorQuda, fused_exterior_ndeg_tm_dslash_cuda_gen::i, reduceDoubleArray(), x, and y.

| void quda::blas::cDotProductCopy | ( | Complex * | result, |
| std::vector< ColorSpinorField *> & | a, | ||
| std::vector< ColorSpinorField *> & | b, | ||
| std::vector< ColorSpinorField *> & | c | ||
| ) |
Computes the matrix of inner products between the vector set a and the vector set b, and copies b into c.
| result[out] | Matrix of inner product result[i][j] = (a[j],b[i]) |
| a[in] | set of input ColorSpinorFields |
| b[in] | set of input ColorSpinorFields |
| c[out] | set of output ColorSpinorFields |
Definition at line 673 of file multi_reduce_quda.cu.
References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), errorQuda, fused_exterior_ndeg_tm_dslash_cuda_gen::i, reduceDoubleArray(), x, y, and z.

| Complex quda::blas::cDotProductCpu | ( | const cpuColorSpinorField & | a, |
| const cpuColorSpinorField & | b | ||
| ) |
Definition at line 218 of file blas_cpu.cpp.
References a, b, cDotProduct(), dot(), errorQuda, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and reduceDoubleArray().
Referenced by caxpyDotzyCpu(), cDotProductNormACpu(), cDotProductNormBCpu(), and xpaycDotzyCpu().


| double3 quda::blas::cDotProductNormA | ( | ColorSpinorField & | a, |
| ColorSpinorField & | b | ||
| ) |
Definition at line 572 of file reduce_quda.cu.
Referenced by benchmark(), quda::CG::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::Deflation::reduce(), test(), and quda::Deflation::verify().

| double3 quda::blas::cDotProductNormACpu | ( | const cpuColorSpinorField & | a, |
| const cpuColorSpinorField & | b | ||
| ) |
Definition at line 238 of file blas_cpu.cpp.
References a, b, cDotProductCpu(), dot(), norm(), and normCpu().

| double3 quda::blas::cDotProductNormB | ( | ColorSpinorField & | a, |
| ColorSpinorField & | b | ||
| ) |
Definition at line 599 of file reduce_quda.cu.
Referenced by benchmark(), quda::MR::operator()(), and test().

| double3 quda::blas::cDotProductNormBCpu | ( | const cpuColorSpinorField & | a, |
| const cpuColorSpinorField & | b | ||
| ) |
Definition at line 244 of file blas_cpu.cpp.
References a, b, cDotProductCpu(), dot(), norm(), and normCpu().
Referenced by caxpbypzYmbwcDotProductUYNormYCpu().


| void quda::blas::copy | ( | ColorSpinorField & | dst, |
| const ColorSpinorField & | src | ||
| ) |
Definition at line 263 of file copy_quda.cu.
References quda::blas::copy_ns::copy(), quda::LatticeField::Location(), QUDA_CUDA_FIELD_LOCATION, and src.
Referenced by benchmark(), comm_declare_send_relative_(), comm_declare_strided_send_relative_(), quda::cudaColorSpinorField::copy(), quda::IncEigCG::eigCGsolve(), quda::Deflation::increment(), invertMultiShiftQuda(), invertMultiSrcQuda(), invertQuda(), quda::CG::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::MultiShiftCG::operator()(), quda::Deflation::reduce(), quda::IncEigCG::RestartVT(), quda::GMResDR::RestartVZH(), quda::CG::solve(), test(), and quda::IncEigCG::UpdateVm().


| void quda::blas::cxpaypbz | ( | ColorSpinorField & | x, |
| const Complex & | b, | ||
| ColorSpinorField & | y, | ||
| const Complex & | c, | ||
| ColorSpinorField & | z | ||
| ) |
Definition at line 335 of file blas_quda.cu.
References a, b, IMAG, REAL, x, y, and z.
Referenced by benchmark(), quda::BiCGstab::operator()(), and test().

| void quda::blas::cxpaypbzCpu | ( | const cpuColorSpinorField & | x, |
| const Complex & | a, | ||
| const cpuColorSpinorField & | y, | ||
| const Complex & | b, | ||
| cpuColorSpinorField & | z | ||
| ) |
Definition at line 116 of file blas_cpu.cpp.
References a, b, caxpbypcz(), errorQuda, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, y, and z.

| __device__ __host__ void quda::blas::dot_ | ( | ReduceType & | sum, |
| const double2 & | a, | ||
| const double2 & | b | ||
| ) |
Return the real dot product of x and y Broken at the moment—need to update reDotProduct with permuting, etc of cDotProduct below.
Return the real dot product of x and y
Definition at line 114 of file multi_reduce_quda.cu.

| __device__ __host__ void quda::blas::dot_ | ( | ReduceType & | sum, |
| const float2 & | a, | ||
| const float2 & | b | ||
| ) |
| __device__ __host__ void quda::blas::dot_ | ( | ReduceType & | sum, |
| const float4 & | a, | ||
| const float4 & | b | ||
| ) |
| __device__ __host__ ReduceType quda::blas::dotNormA_ | ( | const InputType & | a, |
| const InputType & | b | ||
| ) |
Returns the real component of the dot product of a and b and the norm of a
Definition at line 288 of file reduce_quda.cu.
| void quda::blas::end | ( | void | ) |
Definition at line 70 of file blas_quda.cu.
References endReduce().
Referenced by endQuda(), and quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune().


| void quda::blas::endReduce | ( | void | ) |
Definition at line 134 of file reduce_quda.cu.
References d_reduce, device_free, h_reduce, hd_reduce, host_free, and reduceEnd.
Referenced by end().

| void * quda::blas::getDeviceReduceBuffer | ( | ) |
Definition at line 73 of file reduce_quda.cu.
References d_reduce.
| void * quda::blas::getHostReduceBuffer | ( | ) |
Definition at line 75 of file reduce_quda.cu.
References h_reduce.
Referenced by multiReduceLaunch().

| void * quda::blas::getMappedHostReduceBuffer | ( | ) |
Definition at line 74 of file reduce_quda.cu.
References hd_reduce.
Referenced by multiReduceLaunch().

| cudaEvent_t * quda::blas::getReduceEvent | ( | ) |
Definition at line 76 of file reduce_quda.cu.
References reduceEnd.
Referenced by multiReduceLaunch().

| cudaStream_t * quda::blas::getStream | ( | ) |
Definition at line 75 of file blas_quda.cu.
References blasStream.
Referenced by quda::blas::copy_ns::copy(), multiblasCuda(), multiReduceCuda(), and reduceCuda().

| void quda::blas::hDotProduct | ( | Complex * | result, |
| std::vector< ColorSpinorField *> & | a, | ||
| std::vector< ColorSpinorField *> & | b | ||
| ) |
Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Requires a.size()==b.size().
| result[out] | Matrix of inner product result[i][j] = (a[j],b[i]) |
| a[in] | set of input ColorSpinorFields |
| b[in] | set of input ColorSpinorFields |
Definition at line 619 of file multi_reduce_quda.cu.
References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), quda::conj(), errorQuda, fused_exterior_ndeg_tm_dslash_cuda_gen::i, reduceDoubleArray(), x, and y.

| void quda::blas::hDotProduct_Anorm | ( | Complex * | result, |
| std::vector< ColorSpinorField *> & | a, | ||
| std::vector< ColorSpinorField *> & | b | ||
| ) |
Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Uniquely defined for cases like (p, Ap) where the output is Hermitian, but there's an A-norm instead of an L2 norm. Requires a.size()==b.size().
| result[out] | Matrix of inner product result[i][j] = (a[j],b[i]) |
| a[in] | set of input ColorSpinorFields |
| b[in] | set of input ColorSpinorFields |
Definition at line 646 of file multi_reduce_quda.cu.
References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), quda::conj(), errorQuda, fused_exterior_ndeg_tm_dslash_cuda_gen::i, reduceDoubleArray(), x, and y.

| double3 quda::blas::HeavyQuarkResidualNorm | ( | ColorSpinorField & | x, |
| ColorSpinorField & | r | ||
| ) |
Definition at line 703 of file reduce_quda.cu.
References comm_size(), and x.
Referenced by benchmark(), quda::IncEigCG::eigCGsolve(), invert_test(), quda::CG::operator()(), quda::CGNR::operator()(), quda::PreconCG::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MultiShiftCG::operator()(), quda::IncEigCG::operator()(), quda::GMResDR::operator()(), quda::CG::solve(), and test().


| double3 quda::blas::HeavyQuarkResidualNorm | ( | const Float * | x, |
| const Float * | r, | ||
| const int | volume, | ||
| const int | Nint | ||
| ) |
Definition at line 311 of file blas_cpu.cpp.
References fused_exterior_ndeg_tm_dslash_cuda_gen::i, sum(), and x.

| double3 quda::blas::HeavyQuarkResidualNormCpu | ( | cpuColorSpinorField & | x, |
| cpuColorSpinorField & | r | ||
| ) |
Definition at line 332 of file blas_cpu.cpp.
References comm_size(), errorQuda, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, reduceDoubleArray(), quda::ColorSpinorField::V(), and x.
Referenced by HeavyQuarkResidualNormCpu().


| double3 quda::blas::HeavyQuarkResidualNormCpu | ( | cpuColorSpinorField & | x, |
| cpuColorSpinorField & | y, | ||
| cpuColorSpinorField & | r | ||
| ) |
Definition at line 353 of file blas_cpu.cpp.
References HeavyQuarkResidualNormCpu(), tmp, x, xpyCpu(), and y.

| void quda::blas::init | ( | ) |
Definition at line 64 of file blas_quda.cu.
References blasStream, initReduce(), quda::Nstream, and streams.
Referenced by comm_peer2peer_enabled_global(), getRankVerbosity(), getTuning(), initQudaMemory(), and quda::traceEnabled().


| void quda::blas::initReduce | ( | ) |
Definition at line 78 of file reduce_quda.cu.
References bytes, checkCudaError, d_reduce, device_malloc, deviceProp, h_reduce, hd_reduce, mapped_malloc, MAX_MULTI_BLAS_N, memset(), pinned_malloc, QudaSumFloat, and reduceEnd.
Referenced by init().


| void quda::blas::multiReduce_recurse | ( | Complex * | result, |
| std::vector< ColorSpinorField *> & | x, | ||
| std::vector< ColorSpinorField *> & | y, | ||
| std::vector< ColorSpinorField *> & | z, | ||
| std::vector< ColorSpinorField *> & | w, | ||
| int | i_idx, | ||
| int | j_idx, | ||
| bool | hermitian, | ||
| unsigned int | tile_size | ||
| ) |
Definition at line 282 of file multi_reduce_quda.cu.
References a, b, c, quda::count, fused_exterior_ndeg_tm_dslash_cuda_gen::i, w, x, y, y0(), y1(), and z.

| void quda::blas::mxpy | ( | ColorSpinorField & | x, |
| ColorSpinorField & | y | ||
| ) |
Definition at line 192 of file blas_quda.cu.
Referenced by benchmark(), invert_test(), main(), and test().

| void quda::blas::mxpyCpu | ( | const cpuColorSpinorField & | x, |
| cpuColorSpinorField & | y | ||
| ) |
Definition at line 52 of file blas_cpu.cpp.
References axpby(), errorQuda, f, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, and y.

Definition at line 161 of file blas_cpu.cpp.
References a, fused_exterior_ndeg_tm_dslash_cuda_gen::i, and norm2().
Referenced by cDotProductNormACpu(), cDotProductNormBCpu(), quda::ComputeHarmonicRitz< libtype::eigen_lib >(), quda::ComputeHarmonicRitz< libtype::magma_lib >(), and normCpu().


| double quda::blas::norm1 | ( | const ColorSpinorField & | b | ) |
Definition at line 200 of file reduce_quda.cu.
References errorQuda, x, and y.
Referenced by getLambdaMax(), getRealBidiagMatrix(), and quda::norm1().

| __device__ __host__ ReduceType quda::blas::norm1_ | ( | const double2 & | a | ) |
Return the L1 norm of x
Definition at line 179 of file reduce_quda.cu.

| __device__ __host__ ReduceType quda::blas::norm1_ | ( | const float2 & | a | ) |
| __device__ __host__ ReduceType quda::blas::norm1_ | ( | const float4 & | a | ) |
| double quda::blas::norm2 | ( | const ColorSpinorField & | a | ) |
Definition at line 241 of file reduce_quda.cu.
Referenced by benchmark(), cabxpyAxNormCpu(), caxpyNormCpu(), caxpyXmazNormXCpu(), cloverQuda(), dslashQuda(), dslashQuda_4dpc(), dslashQuda_mdwf(), dslashTest(), quda::IncEigCG::eigCGsolve(), quda::Deflation::increment(), init(), invertMultiShiftQuda(), invertMultiSrcQuda(), invertQuda(), lanczosQuda(), main(), quda::massRescale(), MatDagMatQuda(), MatQuda(), norm(), quda::norm2(), normCpu(), quda::CG::operator()(), quda::CGNR::operator()(), quda::MPCG::operator()(), quda::BiCGstab::operator()(), quda::MPBiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::MultiShiftCG::operator()(), quda::MinResExt::operator()(), packTest(), performWuppertalnStep(), quda::GMResDR::RestartVZH(), quda::CG::solve(), test(), and TEST_P().

| __device__ __host__ void quda::blas::norm2_ | ( | ReduceType & | sum, |
| const double2 & | a | ||
| ) |
Return the L2 norm of x
Definition at line 214 of file reduce_quda.cu.

| __device__ __host__ void quda::blas::norm2_ | ( | ReduceType & | sum, |
| const float2 & | a | ||
| ) |
| __device__ __host__ void quda::blas::norm2_ | ( | ReduceType & | sum, |
| const float4 & | a | ||
| ) |
| double quda::blas::normCpu | ( | const cpuColorSpinorField & | a | ) |
Definition at line 167 of file blas_cpu.cpp.
References a, errorQuda, norm(), norm2(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and reduceDouble().
Referenced by axpyNormCpu(), cDotProductNormACpu(), cDotProductNormBCpu(), and xmyNormCpu().


| double4 quda::blas::quadrupleCGReduction | ( | ColorSpinorField & | x, |
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z | ||
| ) |
| double quda::blas::reDotProduct | ( | ColorSpinorField & | x, |
| ColorSpinorField & | y | ||
| ) |
Definition at line 277 of file reduce_quda.cu.
Referenced by benchmark(), quda::IncEigCG::eigCGsolve(), quda::Lanczos::operator()(), quda::CG::operator()(), quda::PreconCG::operator()(), quda::MultiShiftCG::operator()(), reDotProductCpu(), and test().

| double quda::blas::reDotProduct | ( | const Float * | a, |
| const Float * | b, | ||
| const int | N | ||
| ) |
Definition at line 186 of file blas_cpu.cpp.
References a, b, dot(), and fused_exterior_ndeg_tm_dslash_cuda_gen::i.

| void quda::blas::reDotProduct | ( | double * | result, |
| std::vector< ColorSpinorField *> & | a, | ||
| std::vector< ColorSpinorField *> & | b | ||
| ) |
Definition at line 142 of file multi_reduce_quda.cu.
References errorQuda, reduceDoubleArray(), x, and y.

| double quda::blas::reDotProductCpu | ( | const cpuColorSpinorField & | a, |
| const cpuColorSpinorField & | b | ||
| ) |
Definition at line 192 of file blas_cpu.cpp.
References a, b, dot(), errorQuda, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, reDotProduct(), and reduceDouble().

| double2 quda::blas::reDotProductNormA | ( | ColorSpinorField & | a, |
| ColorSpinorField & | b | ||
| ) |
Definition at line 305 of file reduce_quda.cu.
Referenced by quda::SD::operator()().

| double3 quda::blas::tripleCGReduction | ( | ColorSpinorField & | x, |
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z | ||
| ) |
Definition at line 767 of file reduce_quda.cu.
Referenced by benchmark(), quda::CG::operator()(), and test().

| void quda::blas::tripleCGUpdate | ( | const double & | alpha, |
| const double & | beta, | ||
| ColorSpinorField & | q, | ||
| ColorSpinorField & | r, | ||
| ColorSpinorField & | x, | ||
| ColorSpinorField & | p | ||
| ) |
Definition at line 610 of file blas_quda.cu.
References a, b, w, x, y, and z.
Referenced by benchmark(), quda::CG::operator()(), and test().

| double quda::blas::xmyNorm | ( | ColorSpinorField & | x, |
| ColorSpinorField & | y | ||
| ) |
Definition at line 364 of file reduce_quda.cu.
Referenced by benchmark(), quda::IncEigCG::eigCGsolve(), quda::MG::operator()(), quda::CG::operator()(), quda::CGNR::operator()(), quda::MPCG::operator()(), quda::PreconCG::operator()(), quda::BiCGstab::operator()(), quda::MPBiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::SD::operator()(), quda::MultiShiftCG::operator()(), quda::IncEigCG::operator()(), quda::GMResDR::operator()(), quda::CG::solve(), test(), and quda::MG::verify().

| double quda::blas::xmyNormCpu | ( | const cpuColorSpinorField & | x, |
| cpuColorSpinorField & | y | ||
| ) |
| void quda::blas::xpay | ( | ColorSpinorField & | x, |
| const double & | a, | ||
| ColorSpinorField & | y | ||
| ) |
Definition at line 173 of file blas_quda.cu.
Referenced by benchmark(), clover_mat(), clover_matpc(), quda::DiracCoarsePC::DslashXpay(), dw_4d_mat(), dw_4d_matpc(), dw_mat(), dw_matpc(), quda::IncEigCG::initCGsolve(), quda::LaplaceArg< Float, nColor, reconstruct, xpay >::isXpay(), quda::DiracCoarsePC::M(), mat(), mdw_dslash_5(), mdw_mat(), mdw_matpc(), quda::PreconCG::operator()(), quda::MultiShiftCG::operator()(), quda::DiracCoarsePC::prepare(), quda::DiracCoarsePC::reconstruct(), quda::Deflation::reduce(), quda::IncEigCG::RestartVT(), staggered_matpc(), test(), tm_mat(), tm_matpc(), tm_ndeg_mat(), tm_ndeg_matpc(), tmc_mat(), tmc_matpc(), quda::Deflation::verify(), wil_mat(), and wil_matpc().

| Complex quda::blas::xpaycDotzy | ( | ColorSpinorField & | x, |
| const double & | a, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z | ||
| ) |
Definition at line 521 of file reduce_quda.cu.
Referenced by benchmark(), and test().

| Complex quda::blas::xpaycDotzyCpu | ( | const cpuColorSpinorField & | x, |
| const double & | a, | ||
| cpuColorSpinorField & | y, | ||
| const cpuColorSpinorField & | z | ||
| ) |
Definition at line 232 of file blas_cpu.cpp.
References a, cDotProductCpu(), x, xpayCpu(), y, and z.

| void quda::blas::xpayCpu | ( | const cpuColorSpinorField & | x, |
| const double & | a, | ||
| cpuColorSpinorField & | y | ||
| ) |
Definition at line 42 of file blas_cpu.cpp.
References a, axpby(), errorQuda, f, float, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, and y.
Referenced by axpyZpbxCpu(), xmyNormCpu(), and xpaycDotzyCpu().


| void quda::blas::xpayz | ( | ColorSpinorField & | x, |
| const double & | a, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z | ||
| ) |
Definition at line 177 of file blas_quda.cu.
Referenced by quda::CG::operator()().

| void quda::blas::xpy | ( | ColorSpinorField & | x, |
| ColorSpinorField & | y | ||
| ) |
Definition at line 128 of file blas_quda.cu.
Referenced by benchmark(), quda::IncEigCG::eigCGsolve(), quda::MG::loadVectors(), quda::MG::operator()(), quda::CG::operator()(), quda::PreconCG::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MultiShiftCG::operator()(), quda::IncEigCG::operator()(), quda::GMResDR::operator()(), quda::CG::solve(), and test().

| void quda::blas::xpyCpu | ( | const cpuColorSpinorField & | x, |
| cpuColorSpinorField & | y | ||
| ) |
Definition at line 23 of file blas_cpu.cpp.
References axpby(), errorQuda, f, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, and y.
Referenced by HeavyQuarkResidualNormCpu().


| double3 quda::blas::xpyHeavyQuarkResidualNorm | ( | ColorSpinorField & | x, |
| ColorSpinorField & | y, | ||
| ColorSpinorField & | r | ||
| ) |
Definition at line 742 of file reduce_quda.cu.
References comm_size(), x, and y.
Referenced by benchmark(), quda::CG::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), and test().


| void quda::blas::zero | ( | ColorSpinorField & | a | ) |
Definition at line 45 of file blas_quda.cu.
References a.
Referenced by quda::IncEigCG::eigCGsolve(), genericReduce(), invertQuda(), quda::CG::operator()(), quda::MPCG::operator()(), quda::BiCGstab::operator()(), quda::MPBiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::MultiShiftCG::operator()(), quda::MinResExt::operator()(), quda::IncEigCG::operator()(), quda::GMResDR::operator()(), quda::Deflation::reduce(), reduceCuda(), reduceKernel(), quda::IncEigCG::RestartVT(), quda::GMResDR::RestartVZH(), and quda::CG::solve().

| const char* quda::blas::aux_str |
Definition at line 57 of file blas_quda.cu.
| char quda::blas::aux_tmp[TuneKey::aux_n] |
Definition at line 58 of file blas_quda.cu.
Referenced by quda::ColorSpinorField::setTuningString().
|
static |
Definition at line 53 of file blas_quda.cu.
Referenced by blasCuda(), getStream(), and init().
| struct { ... } quda::blas::blasStrings |
| struct { ... } quda::blas::blasStrings |
Referenced by blasCuda(), multiblasCuda(), multiReduceCuda(), reduceCuda(), BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor >::tuneKey(), ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >::tuneKey(), MultiBlasCuda< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor >::tuneKey(), and MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::tuneKey().
| unsigned long long quda::blas::bytes |
Definition at line 43 of file blas_quda.cu.
Referenced by Spinor< RegType, StoreType, N, write, tex_id >::backup(), blasCuda(), quda::blas::copy_ns::copy(), quda::create_gauge_buffer(), quda::create_ghost_buffer(), initReduce(), multiblasCuda(), multiReduceCuda(), Texture< InterType, StoreType, -1 >::operator=(), mixed::reduceCuda(), reduceCuda(), register_pinned_quda_(), Spinor< RegType, StoreType, N, write, tex_id >::restore(), and TEST_P().
| unsigned long long quda::blas::flops |
Definition at line 42 of file blas_quda.cu.
Referenced by quda::cublas::BatchInvertMatrix(), blasCuda(), quda::calculateY(), quda::completeKSForce(), computeHISQForceQuda(), quda::DiracTwistedCloverPC::Dslash(), dslashTest(), quda::DiracTwistedCloverPC::DslashXpay(), quda::IncEigCG::eigCGsolve(), gauge_force_test(), llfat_test(), main(), multiblasCuda(), multiReduceCuda(), quda::CG::operator()(), quda::PreconCG::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::MultiShiftCG::operator()(), quda::GMResDR::operator()(), reduceCuda(), quda::CG::solve(), TEST_P(), and total_staple_io_flops().
| const char* quda::blas::vol_str |
Definition at line 56 of file blas_quda.cu.
1.8.14