QUDA  0.9.0
Namespaces | Classes | Functions | Variables
quda::blas Namespace Reference

Namespaces

 copy_ns
 
 multi
 
 reduce
 

Classes

struct  ax_
 
struct  axpby_
 
struct  axpy_
 
struct  axpyBzpcx_
 
struct  axpyCGNorm2
 
struct  axpyNorm2
 
struct  AxpyReDot
 
struct  axpyZpbx_
 
struct  BlasFunctor
 
struct  cabxpyAx_
 
struct  cabxpyaxnorm
 
struct  caxpby_
 
struct  caxpbypczpw_
 
struct  caxpbypz_
 
struct  caxpbypzYmbw_
 
struct  caxpbypzYmbwcDotProductUYNormY_
 
struct  caxpy_
 
struct  caxpyBxpz_
 
struct  caxpyBzpx_
 
struct  caxpydotzy
 
struct  caxpyNorm2
 
struct  caxpyxmaz_
 
struct  caxpyxmazMR_
 
struct  caxpyxmaznormx
 
struct  Cdot
 
struct  CdotCopy
 
struct  CdotNormA
 
struct  CdotNormB
 
struct  cxpaypbz_
 
struct  Dot
 
struct  DotNormA
 
struct  HeavyQuarkResidualNorm_
 
struct  multi_axpyBzpcx_
 
struct  multi_caxpyBxpz_
 
struct  MultiBlasFunctor
 
struct  multicaxpy_
 
struct  multicaxpyz_
 
struct  MultiReduceFunctor
 
struct  mxpy_
 
struct  Norm1
 
struct  Norm2
 
struct  ReduceFunctor
 
class  TileSizeTune
 
struct  tripleCGReduction_
 
struct  tripleCGUpdate_
 
struct  write
 
struct  xmyNorm2
 
struct  xpaycdotzy
 
struct  xpayz_
 
struct  xpy_
 
struct  xpyHeavyQuarkResidualNorm_
 

Functions

void init ()
 
void end (void)
 
void * getDeviceReduceBuffer ()
 
void * getMappedHostReduceBuffer ()
 
void * getHostReduceBuffer ()
 
void setParam (int kernel, int prec, int threads, int blocks)
 
double norm2 (const ColorSpinorField &a)
 
double norm1 (const ColorSpinorField &b)
 
void zero (ColorSpinorField &a)
 
void copy (ColorSpinorField &dst, const ColorSpinorField &src)
 
double axpyNorm (const double &a, ColorSpinorField &x, ColorSpinorField &y)
 
double axpyReDot (const double &a, ColorSpinorField &x, ColorSpinorField &y)
 
double reDotProduct (ColorSpinorField &x, ColorSpinorField &y)
 
double2 reDotProductNormA (ColorSpinorField &a, ColorSpinorField &b)
 
double xmyNorm (ColorSpinorField &x, ColorSpinorField &y)
 
void axpby (const double &a, ColorSpinorField &x, const double &b, ColorSpinorField &y)
 
void axpy (const double &a, ColorSpinorField &x, ColorSpinorField &y)
 
void ax (const double &a, ColorSpinorField &x)
 
void xpy (ColorSpinorField &x, ColorSpinorField &y)
 
void xpay (ColorSpinorField &x, const double &a, ColorSpinorField &y)
 
void xpayz (ColorSpinorField &x, const double &a, ColorSpinorField &y, ColorSpinorField &z)
 
void mxpy (ColorSpinorField &x, ColorSpinorField &y)
 
void axpyZpbx (const double &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, const double &b)
 
void axpyBzpcx (const double &a, ColorSpinorField &x, ColorSpinorField &y, const double &b, ColorSpinorField &z, const double &c)
 
void caxpby (const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y)
 
void caxpy (const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
 
void cxpaypbz (ColorSpinorField &, const Complex &b, ColorSpinorField &y, const Complex &c, ColorSpinorField &z)
 
void caxpbypzYmbw (const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, ColorSpinorField &, ColorSpinorField &)
 
void caxpyBzpx (const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &)
 
void caxpyBxpz (const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &)
 
Complex cDotProduct (ColorSpinorField &, ColorSpinorField &)
 
Complex xpaycDotzy (ColorSpinorField &x, const double &a, ColorSpinorField &y, ColorSpinorField &z)
 
double3 cDotProductNormA (ColorSpinorField &a, ColorSpinorField &b)
 
double3 cDotProductNormB (ColorSpinorField &a, ColorSpinorField &b)
 
double3 caxpbypzYmbwcDotProductUYNormY (const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &u)
 
void cabxpyAx (const double &a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y)
 
double caxpyNorm (const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
 
void caxpyXmaz (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
void caxpyXmazMR (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
double caxpyXmazNormX (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
double cabxpyAxNorm (const double &a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y)
 
void caxpbypz (const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, ColorSpinorField &)
 
void caxpbypczpw (const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, ColorSpinorField &)
 
Complex caxpyDotzy (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
Complex axpyCGNorm (const double &a, ColorSpinorField &x, ColorSpinorField &y)
 
double3 HeavyQuarkResidualNorm (ColorSpinorField &x, ColorSpinorField &r)
 
double3 xpyHeavyQuarkResidualNorm (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &r)
 
void tripleCGUpdate (const double &alpha, const double &beta, ColorSpinorField &q, ColorSpinorField &r, ColorSpinorField &x, ColorSpinorField &p)
 
double3 tripleCGReduction (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
double4 quadrupleCGReduction (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
void caxpy (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y)
 Compute the block "caxpy" with over the set of ColorSpinorFields. E.g., it computes. More...
 
void caxpy (const Complex *a, ColorSpinorField &x, ColorSpinorField &y)
 This is a wrapper for calling the block "caxpy" with a composite ColorSpinorField. E.g., it computes. More...
 
void caxpy_U (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y)
 Compute the block "caxpy_U" with over the set of ColorSpinorFields. E.g., it computes. More...
 
void caxpy_U (const Complex *a, ColorSpinorField &x, ColorSpinorField &y)
 This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes. More...
 
void caxpy_L (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y)
 Compute the block "caxpy_L" with over the set of ColorSpinorFields. E.g., it computes. More...
 
void caxpy_L (const Complex *a, ColorSpinorField &x, ColorSpinorField &y)
 This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes. More...
 
void caxpyz (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z)
 Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes. More...
 
void caxpyz (const Complex *a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes. More...
 
void caxpyz_U (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z)
 Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes. More...
 
void caxpyz_U (const Complex *a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes. More...
 
void caxpyz_L (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z)
 Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes. More...
 
void caxpyz_L (const Complex *a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes. More...
 
void axpyBzpcx (const double *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, const double *b, ColorSpinorField &z, const double *c)
 Compute the vectorized "axpyBzpcx" with over the set of ColorSpinorFields, where the third vector, z, is constant over the batch. E.g., it computes. More...
 
void caxpyBxpz (const Complex *a_, std::vector< ColorSpinorField *> &x_, ColorSpinorField &y_, const Complex *b_, ColorSpinorField &z_)
 Compute the vectorized "caxpyBxpz" over the set of ColorSpinorFields, where the second and third vector, y and z, is constant over the batch. E.g., it computes. More...
 
void reDotProduct (double *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b)
 
void cDotProduct (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b)
 Computes the matrix of inner products between the vector set a and the vector set b. More...
 
void hDotProduct (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b)
 Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Requires a.size()==b.size(). More...
 
void hDotProduct_Anorm (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b)
 Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Uniquely defined for cases like (p, Ap) where the output is Hermitian, but there's an A-norm instead of an L2 norm. Requires a.size()==b.size(). More...
 
void cDotProductCopy (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b, std::vector< ColorSpinorField *> &c)
 Computes the matrix of inner products between the vector set a and the vector set b, and copies b into c. More...
 
template<typename Float >
void axpby (const Float &a, const Float *x, const Float &b, Float *y, const int N)
 
void axpbyCpu (const double &a, const cpuColorSpinorField &x, const double &b, cpuColorSpinorField &y)
 
void xpyCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void axpyCpu (const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void xpayCpu (const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y)
 
void mxpyCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void axCpu (const double &a, cpuColorSpinorField &x)
 
template<typename Float >
void caxpby (const std::complex< Float > &a, const std::complex< Float > *x, const std::complex< Float > &b, std::complex< Float > *y, int N)
 
void caxpyCpu (const Complex &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void caxpbyCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y)
 
template<typename Float >
void caxpbypcz (const std::complex< Float > &a, const std::complex< Float > *x, const std::complex< Float > &b, const std::complex< Float > *y, const std::complex< Float > &c, std::complex< Float > *z, int N)
 
void cxpaypbzCpu (const cpuColorSpinorField &x, const Complex &a, const cpuColorSpinorField &y, const Complex &b, cpuColorSpinorField &z)
 
void axpyBzpcxCpu (const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const double &b, const cpuColorSpinorField &z, const double &c)
 
void axpyZpbxCpu (const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const cpuColorSpinorField &z, const double &b)
 
void caxpbypzYmbwCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z, const cpuColorSpinorField &w)
 
template<typename Float >
double norm (const Float *a, const int N)
 
double normCpu (const cpuColorSpinorField &a)
 
double axpyNormCpu (const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
template<typename Float >
double reDotProduct (const Float *a, const Float *b, const int N)
 
double reDotProductCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)
 
double xmyNormCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
template<typename Float >
Complex cDotProduct (const std::complex< Float > *a, const std::complex< Float > *b, const int N)
 
Complex cDotProductCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)
 
Complex xpaycDotzyCpu (const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y, const cpuColorSpinorField &z)
 
double3 cDotProductNormACpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)
 
double3 cDotProductNormBCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)
 
double3 caxpbypzYmbwcDotProductUYNormYCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z, const cpuColorSpinorField &w, const cpuColorSpinorField &u)
 
void cabxpyAxCpu (const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)
 
double caxpyNormCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y)
 
double caxpyXmazNormXCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
 
void caxpyXmazCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
 
double cabxpyAxNormCpu (const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void caxpbypzCpu (const Complex &a, cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z)
 
void caxpbypczpwCpu (const Complex &a, cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, const Complex &c, cpuColorSpinorField &z, cpuColorSpinorField &w)
 
Complex caxpyDotzyCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
 
template<typename Float >
double3 HeavyQuarkResidualNorm (const Float *x, const Float *r, const int volume, const int Nint)
 
double3 HeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &r)
 
double3 HeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &r)
 
void initReduce ()
 
void endReduce ()
 
cudaStream_t * getStream ()
 
__device__ __host__ void _caxpy (const float2 &a, const float4 &x, float4 &y)
 
__device__ __host__ void _caxpy (const float2 &a, const float2 &x, float2 &y)
 
__device__ __host__ void _caxpy (const double2 &a, const double2 &x, double2 &y)
 
__device__ __host__ void _caxpby (const float2 &a, const float4 &x, const float2 &b, float4 &y)
 
__device__ __host__ void _caxpby (const float2 &a, const float2 &x, const float2 &b, float2 &y)
 
__device__ __host__ void _caxpby (const double2 &a, const double2 &x, const double2 &b, double2 &y)
 
__device__ __host__ void _cxpaypbz (const float4 &x, const float2 &a, const float4 &y, const float2 &b, float4 &z)
 
__device__ __host__ void _cxpaypbz (const float2 &x, const float2 &a, const float2 &y, const float2 &b, float2 &z)
 
__device__ __host__ void _cxpaypbz (const double2 &x, const double2 &a, const double2 &y, const double2 &b, double2 &z)
 
void caxpy_recurse (const Complex *a_, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, int i_idx, int j_idx, int upper)
 
void caxpyz_recurse (const Complex *a_, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, int i, int j, int pass, int upper)
 
cudaEvent_t * getReduceEvent ()
 
template<typename ReduceType >
__device__ __host__ void dot_ (ReduceType &sum, const double2 &a, const double2 &b)
 
template<typename ReduceType >
__device__ __host__ void dot_ (ReduceType &sum, const float2 &a, const float2 &b)
 
template<typename ReduceType >
__device__ __host__ void dot_ (ReduceType &sum, const float4 &a, const float4 &b)
 
template<typename ReduceType >
__device__ __host__ void cdot_ (ReduceType &sum, const double2 &a, const double2 &b)
 
template<typename ReduceType >
__device__ __host__ void cdot_ (ReduceType &sum, const float2 &a, const float2 &b)
 
template<typename ReduceType >
__device__ __host__ void cdot_ (ReduceType &sum, const float4 &a, const float4 &b)
 
template<template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal >
void multiReduce_recurse (Complex *result, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, std::vector< ColorSpinorField *> &w, int i_idx, int j_idx, bool hermitian, unsigned int tile_size)
 
template<typename ReduceType >
__device__ __host__ ReduceType norm1_ (const double2 &a)
 
template<typename ReduceType >
__device__ __host__ ReduceType norm1_ (const float2 &a)
 
template<typename ReduceType >
__device__ __host__ ReduceType norm1_ (const float4 &a)
 
template<typename ReduceType >
__device__ __host__ void norm2_ (ReduceType &sum, const double2 &a)
 
template<typename ReduceType >
__device__ __host__ void norm2_ (ReduceType &sum, const float2 &a)
 
template<typename ReduceType >
__device__ __host__ void norm2_ (ReduceType &sum, const float4 &a)
 
template<typename ReduceType , typename InputType >
__device__ __host__ ReduceType dotNormA_ (const InputType &a, const InputType &b)
 
__device__ __host__ void Caxpy_ (const double2 &a, const double2 &x, double2 &y)
 
__device__ __host__ void Caxpy_ (const float2 &a, const float2 &x, float2 &y)
 
__device__ __host__ void Caxpy_ (const float2 &a, const float4 &x, float4 &y)
 
template<typename ReduceType , typename InputType >
__device__ __host__ void cdotNormA_ (ReduceType &sum, const InputType &a, const InputType &b)
 
template<typename ReduceType , typename InputType >
__device__ __host__ void cdotNormB_ (ReduceType &sum, const InputType &a, const InputType &b)
 

Variables

unsigned long long flops
 
unsigned long long bytes
 
static cudaStream_t * blasStream
 
struct {
   const char *   quda::blas::vol_str
 
   const char *   quda::blas::aux_str
 
   char   quda::blas::aux_tmp [TuneKey::aux_n]
 
blasStrings
 
struct {
   const char *   quda::blas::vol_str
 
   const char *   quda::blas::aux_str
 
   char   quda::blas::aux_tmp [TuneKey::aux_n]
 
blasStrings
 

Function Documentation

◆ _caxpby() [1/3]

__device__ __host__ void quda::blas::_caxpby ( const float2 &  a,
const float4 &  x,
const float2 &  b,
float4 &  y 
)

Functor to perform the operation y = a*x + b*y (complex-valued)

Definition at line 261 of file blas_quda.cu.

References a, b, x, and y.

Referenced by quda::blas::caxpby_< Float2, FloatN >::operator()().

Here is the caller graph for this function:

◆ _caxpby() [2/3]

__device__ __host__ void quda::blas::_caxpby ( const float2 &  a,
const float2 &  x,
const float2 &  b,
float2 &  y 
)

Definition at line 269 of file blas_quda.cu.

References a, b, x, and y.

◆ _caxpby() [3/3]

__device__ __host__ void quda::blas::_caxpby ( const double2 &  a,
const double2 &  x,
const double2 &  b,
double2 &  y 
)

Definition at line 275 of file blas_quda.cu.

References a, b, x, and y.

◆ _caxpy() [1/3]

__device__ __host__ void quda::blas::_caxpy ( const float2 &  a,
const float4 &  x,
float4 &  y 
)
inline

◆ _caxpy() [2/3]

__device__ __host__ void quda::blas::_caxpy ( const float2 &  a,
const float2 &  x,
float2 &  y 
)
inline

Definition at line 226 of file blas_quda.cu.

References a, x, and y.

◆ _caxpy() [3/3]

__device__ __host__ void quda::blas::_caxpy ( const double2 &  a,
const double2 &  x,
double2 &  y 
)
inline

Definition at line 231 of file blas_quda.cu.

References a, x, and y.

◆ _cxpaypbz() [1/3]

__device__ __host__ void quda::blas::_cxpaypbz ( const float4 &  x,
const float2 &  a,
const float4 &  y,
const float2 &  b,
float4 &  z 
)

Functor to performs the operation z[i] = x[i] + a*y[i] + b*z[i]

Definition at line 301 of file blas_quda.cu.

References a, b, x, y, and z.

Referenced by quda::blas::cxpaypbz_< Float2, FloatN >::operator()().

Here is the caller graph for this function:

◆ _cxpaypbz() [2/3]

__device__ __host__ void quda::blas::_cxpaypbz ( const float2 &  x,
const float2 &  a,
const float2 &  y,
const float2 &  b,
float2 &  z 
)

Definition at line 310 of file blas_quda.cu.

References a, b, x, y, and z.

◆ _cxpaypbz() [3/3]

__device__ __host__ void quda::blas::_cxpaypbz ( const double2 &  x,
const double2 &  a,
const double2 &  y,
const double2 &  b,
double2 &  z 
)

Definition at line 317 of file blas_quda.cu.

References a, b, x, y, and z.

◆ ax()

void quda::blas::ax ( const double a,
ColorSpinorField x 
)

◆ axCpu()

void quda::blas::axCpu ( const double a,
cpuColorSpinorField x 
)

Definition at line 61 of file blas_cpu.cpp.

References a, axpby(), errorQuda, f, float, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and x.

Referenced by cabxpyAxCpu(), and cabxpyAxNormCpu().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ axpby() [1/2]

template<typename Float >
void quda::blas::axpby ( const Float &  a,
const Float *  x,
const Float &  b,
Float *  y,
const int  N 
)

Definition at line 9 of file blas_cpu.cpp.

References a, b, fused_exterior_ndeg_tm_dslash_cuda_gen::i, x, and y.

◆ axpby() [2/2]

void quda::blas::axpby ( const double a,
ColorSpinorField x,
const double b,
ColorSpinorField y 
)

Definition at line 106 of file blas_quda.cu.

References a, b, x, and y.

Referenced by axCpu(), axpbyCpu(), axpyCpu(), benchmark(), mxpyCpu(), quda::MR::operator()(), quda::MultiShiftCG::operator()(), test(), xpayCpu(), and xpyCpu().

Here is the caller graph for this function:

◆ axpbyCpu()

void quda::blas::axpbyCpu ( const double a,
const cpuColorSpinorField x,
const double b,
cpuColorSpinorField y 
)

Definition at line 13 of file blas_cpu.cpp.

References a, axpby(), b, errorQuda, float, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, and y.

Referenced by axpyBzpcxCpu().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ axpy()

void quda::blas::axpy ( const double a,
ColorSpinorField x,
ColorSpinorField y 
)

◆ axpyBzpcx() [1/2]

void quda::blas::axpyBzpcx ( const double a,
ColorSpinorField x,
ColorSpinorField y,
const double b,
ColorSpinorField z,
const double c 
)

Definition at line 356 of file blas_quda.cu.

References a, b, c, x, y, and z.

Referenced by quda::ShiftUpdate::apply(), axpyBzpcx(), benchmark(), and test().

Here is the caller graph for this function:

◆ axpyBzpcx() [2/2]

void quda::blas::axpyBzpcx ( const double a,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
const double b,
ColorSpinorField z,
const double c 
)

Compute the vectorized "axpyBzpcx" with over the set of ColorSpinorFields, where the third vector, z, is constant over the batch. E.g., it computes.

y = a * x + y x = b * z + c * x

The dimensions of a, b, c are the same as the size of x and y, with a maximum size of 16.

Parameters
a[in]Array of coefficients
b[in]Array of coefficients
c[in]Array of coefficients
x[in,out]vector of ColorSpinorFields
y[in,out]vector of ColorSpinorFields
z[in]input ColorSpinorField

Definition at line 718 of file multi_blas_quda.cu.

References a, fused_exterior_ndeg_tm_dslash_cuda_gen::a1, axpyBzpcx(), b, c, MAX_MULTI_BLAS_N, w, x, y, y0(), and y1().

Here is the call graph for this function:

◆ axpyBzpcxCpu()

void quda::blas::axpyBzpcxCpu ( const double a,
cpuColorSpinorField x,
cpuColorSpinorField y,
const double b,
const cpuColorSpinorField z,
const double c 
)

Definition at line 130 of file blas_cpu.cpp.

References a, axpbyCpu(), axpyCpu(), b, c, x, y, and z.

Here is the call graph for this function:

◆ axpyCGNorm()

Complex quda::blas::axpyCGNorm ( const double a,
ColorSpinorField x,
ColorSpinorField y 
)

Definition at line 654 of file reduce_quda.cu.

References a, x, and y.

Referenced by quda::CG::operator()(), quda::PreconCG::operator()(), and quda::MultiShiftCG::operator()().

Here is the caller graph for this function:

◆ axpyCpu()

void quda::blas::axpyCpu ( const double a,
const cpuColorSpinorField x,
cpuColorSpinorField y 
)

Definition at line 32 of file blas_cpu.cpp.

References a, axpby(), errorQuda, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, and y.

Referenced by axpyBzpcxCpu(), axpyNormCpu(), and axpyZpbxCpu().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ axpyNorm()

double quda::blas::axpyNorm ( const double a,
ColorSpinorField x,
ColorSpinorField y 
)

Definition at line 325 of file reduce_quda.cu.

References a, x, and y.

Referenced by benchmark(), quda::IncEigCG::eigCGsolve(), quda::CG::operator()(), and test().

Here is the caller graph for this function:

◆ axpyNormCpu()

double quda::blas::axpyNormCpu ( const double a,
const cpuColorSpinorField x,
cpuColorSpinorField y 
)

Definition at line 179 of file blas_cpu.cpp.

References a, axpyCpu(), normCpu(), x, and y.

Here is the call graph for this function:

◆ axpyReDot()

double quda::blas::axpyReDot ( const double a,
ColorSpinorField x,
ColorSpinorField y 
)

Definition at line 345 of file reduce_quda.cu.

References a, x, and y.

Referenced by benchmark(), quda::MultiShiftCG::operator()(), and test().

Here is the caller graph for this function:

◆ axpyZpbx()

void quda::blas::axpyZpbx ( const double a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z,
const double b 
)

Definition at line 384 of file blas_quda.cu.

References a, b, x, y, and z.

Referenced by benchmark(), quda::IncEigCG::eigCGsolve(), quda::CG::operator()(), quda::PreconCG::operator()(), quda::MultiShiftCG::operator()(), and test().

Here is the caller graph for this function:

◆ axpyZpbxCpu()

void quda::blas::axpyZpbxCpu ( const double a,
cpuColorSpinorField x,
cpuColorSpinorField y,
const cpuColorSpinorField z,
const double b 
)

Definition at line 137 of file blas_cpu.cpp.

References a, axpyCpu(), b, x, xpayCpu(), y, and z.

Here is the call graph for this function:

◆ cabxpyAx()

void quda::blas::cabxpyAx ( const double a,
const Complex b,
ColorSpinorField x,
ColorSpinorField y 
)

Definition at line 484 of file blas_quda.cu.

References a, b, IMAG, REAL, x, and y.

Referenced by benchmark(), and test().

Here is the caller graph for this function:

◆ cabxpyAxCpu()

void quda::blas::cabxpyAxCpu ( const double a,
const Complex b,
cpuColorSpinorField x,
cpuColorSpinorField y 
)

Definition at line 260 of file blas_cpu.cpp.

References a, axCpu(), b, caxpyCpu(), x, and y.

Here is the call graph for this function:

◆ cabxpyAxNorm()

double quda::blas::cabxpyAxNorm ( const double a,
const Complex b,
ColorSpinorField x,
ColorSpinorField y 
)

Definition at line 449 of file reduce_quda.cu.

References a, b, IMAG, REAL, x, and y.

Referenced by benchmark(), quda::GCR::operator()(), and test().

Here is the caller graph for this function:

◆ cabxpyAxNormCpu()

double quda::blas::cabxpyAxNormCpu ( const double a,
const Complex b,
cpuColorSpinorField x,
cpuColorSpinorField y 
)

Definition at line 284 of file blas_cpu.cpp.

References a, axCpu(), b, caxpyCpu(), norm2(), x, and y.

Here is the call graph for this function:

◆ caxpby() [1/2]

void quda::blas::caxpby ( const Complex a,
ColorSpinorField x,
const Complex b,
ColorSpinorField y 
)

Definition at line 292 of file blas_quda.cu.

References a, b, IMAG, REAL, x, and y.

Referenced by quda::BiCGstabLUpdate::apply(), benchmark(), caxpbyCpu(), caxpyCpu(), quda::BiCGstabL::operator()(), and test().

Here is the caller graph for this function:

◆ caxpby() [2/2]

template<typename Float >
void quda::blas::caxpby ( const std::complex< Float > &  a,
const std::complex< Float > *  x,
const std::complex< Float > &  b,
std::complex< Float > *  y,
int  N 
)

Definition at line 71 of file blas_cpu.cpp.

References a, b, fused_exterior_ndeg_tm_dslash_cuda_gen::i, x, and y.

◆ caxpbyCpu()

void quda::blas::caxpbyCpu ( const Complex a,
const cpuColorSpinorField x,
const Complex b,
cpuColorSpinorField y 
)

Definition at line 93 of file blas_cpu.cpp.

References a, b, caxpby(), errorQuda, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, and y.

Here is the call graph for this function:

◆ caxpbypcz()

template<typename Float >
void quda::blas::caxpbypcz ( const std::complex< Float > &  a,
const std::complex< Float > *  x,
const std::complex< Float > &  b,
const std::complex< Float > *  y,
const std::complex< Float > &  c,
std::complex< Float > *  z,
int  N 
)

Definition at line 106 of file blas_cpu.cpp.

References a, b, c, fused_exterior_ndeg_tm_dslash_cuda_gen::i, x, y, and z.

Referenced by caxpbypzYmbwCpu(), and cxpaypbzCpu().

Here is the caller graph for this function:

◆ caxpbypczpw()

void quda::blas::caxpbypczpw ( const Complex a,
ColorSpinorField x,
const Complex b,
ColorSpinorField y,
const Complex c,
ColorSpinorField z,
ColorSpinorField w 
)

Definition at line 527 of file blas_quda.cu.

References a, b, c, IMAG, REAL, w, x, y, and z.

Referenced by benchmark(), and test().

Here is the caller graph for this function:

◆ caxpbypczpwCpu()

void quda::blas::caxpbypczpwCpu ( const Complex a,
cpuColorSpinorField x,
const Complex b,
cpuColorSpinorField y,
const Complex c,
cpuColorSpinorField z,
cpuColorSpinorField w 
)

Definition at line 296 of file blas_cpu.cpp.

References a, b, c, caxpyCpu(), w, x, y, and z.

Here is the call graph for this function:

◆ caxpbypz()

void quda::blas::caxpbypz ( const Complex a,
ColorSpinorField x,
const Complex b,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 505 of file blas_quda.cu.

References a, b, IMAG, REAL, x, y, and z.

Referenced by benchmark(), and test().

Here is the caller graph for this function:

◆ caxpbypzCpu()

void quda::blas::caxpbypzCpu ( const Complex a,
cpuColorSpinorField x,
const Complex b,
cpuColorSpinorField y,
cpuColorSpinorField z 
)

Definition at line 290 of file blas_cpu.cpp.

References a, b, caxpyCpu(), x, y, and z.

Here is the call graph for this function:

◆ caxpbypzYmbw()

void quda::blas::caxpbypzYmbw ( const Complex a,
ColorSpinorField x,
const Complex b,
ColorSpinorField y,
ColorSpinorField z,
ColorSpinorField w 
)

Definition at line 464 of file blas_quda.cu.

References a, b, IMAG, REAL, w, x, y, and z.

Referenced by benchmark(), quda::BiCGstab::operator()(), and test().

Here is the caller graph for this function:

◆ caxpbypzYmbwcDotProductUYNormY()

double3 quda::blas::caxpbypzYmbwcDotProductUYNormY ( const Complex a,
ColorSpinorField x,
const Complex b,
ColorSpinorField y,
ColorSpinorField z,
ColorSpinorField w,
ColorSpinorField u 
)

Definition at line 619 of file reduce_quda.cu.

References a, b, IMAG, REAL, w, x, y, and z.

Referenced by benchmark(), quda::BiCGstab::operator()(), and test().

Here is the caller graph for this function:

◆ caxpbypzYmbwcDotProductUYNormYCpu()

double3 quda::blas::caxpbypzYmbwcDotProductUYNormYCpu ( const Complex a,
const cpuColorSpinorField x,
const Complex b,
cpuColorSpinorField y,
cpuColorSpinorField z,
const cpuColorSpinorField w,
const cpuColorSpinorField u 
)

Definition at line 251 of file blas_cpu.cpp.

References a, b, caxpbypzYmbwCpu(), cDotProductNormBCpu(), w, x, y, and z.

Here is the call graph for this function:

◆ caxpbypzYmbwCpu()

void quda::blas::caxpbypzYmbwCpu ( const Complex a,
const cpuColorSpinorField x,
const Complex b,
cpuColorSpinorField y,
cpuColorSpinorField z,
const cpuColorSpinorField w 
)

Definition at line 144 of file blas_cpu.cpp.

References a, b, caxpbypcz(), caxpyCpu(), errorQuda, f, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, w, x, y, and z.

Referenced by caxpbypzYmbwcDotProductUYNormYCpu().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpy() [1/3]

void quda::blas::caxpy ( const Complex a,
ColorSpinorField x,
ColorSpinorField y 
)

◆ caxpy() [2/3]

void quda::blas::caxpy ( const Complex a,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y 
)

Compute the block "caxpy" with over the set of ColorSpinorFields. E.g., it computes.

y = x * a + y

The dimensions of a can be rectangular, e.g., the width of x and y need not be same.

Parameters
a[in]Matrix of coefficients
x[in]vector of input ColorSpinorFields
y[in,out]vector of input/output ColorSpinorFields

Definition at line 351 of file multi_blas_quda.cu.

References caxpy_recurse(), x, and y.

Here is the call graph for this function:

◆ caxpy() [3/3]

void quda::blas::caxpy ( const Complex a,
ColorSpinorField x,
ColorSpinorField y 
)

This is a wrapper for calling the block "caxpy" with a composite ColorSpinorField. E.g., it computes.

y = x * a + y

Parameters
a[in]Matrix of coefficients
x[in]Input matrix
y[in,out]Computed output matrix

Definition at line 382 of file multi_blas_quda.cu.

References a, caxpy(), x, and y.

Here is the call graph for this function:

◆ Caxpy_() [1/3]

__device__ __host__ void quda::blas::Caxpy_ ( const double2 &  a,
const double2 &  x,
double2 &  y 
)

◆ Caxpy_() [2/3]

__device__ __host__ void quda::blas::Caxpy_ ( const float2 &  a,
const float2 &  x,
float2 &  y 
)

Definition at line 377 of file reduce_quda.cu.

References a, x, and y.

◆ Caxpy_() [3/3]

__device__ __host__ void quda::blas::Caxpy_ ( const float2 &  a,
const float4 &  x,
float4 &  y 
)

Definition at line 381 of file reduce_quda.cu.

References a, x, and y.

◆ caxpy_L() [1/2]

void quda::blas::caxpy_L ( const Complex a,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y 
)

Compute the block "caxpy_L" with over the set of ColorSpinorFields. E.g., it computes.

y = x * a + y

Where 'a' must be a square, lower triangular matrix.

Parameters
a[in]Matrix of coefficients
x[in]vector of input ColorSpinorFields
y[in,out]vector of input/output ColorSpinorFields

Definition at line 369 of file multi_blas_quda.cu.

References caxpy_recurse(), errorQuda, x, and y.

Referenced by caxpy_L().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpy_L() [2/2]

void quda::blas::caxpy_L ( const Complex a,
ColorSpinorField x,
ColorSpinorField y 
)

This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes.

y = x * a + y

Parameters
a[in]Matrix of coefficients
x[in]Input matrix
y[in,out]Computed output matrix

Definition at line 386 of file multi_blas_quda.cu.

References a, caxpy_L(), x, and y.

Here is the call graph for this function:

◆ caxpy_recurse()

void quda::blas::caxpy_recurse ( const Complex a_,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
int  i_idx,
int  j_idx,
int  upper 
)

Definition at line 117 of file multi_blas_quda.cu.

References a, fused_exterior_ndeg_tm_dslash_cuda_gen::a1, b, c, quda::count, fused_exterior_ndeg_tm_dslash_cuda_gen::i, MAX_MULTI_BLAS_N, x, y, y0(), and y1().

Referenced by caxpy(), caxpy_L(), and caxpy_U().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpy_U() [1/2]

void quda::blas::caxpy_U ( const Complex a,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y 
)

Compute the block "caxpy_U" with over the set of ColorSpinorFields. E.g., it computes.

y = x * a + y

Where 'a' must be a square, upper triangular matrix.

Parameters
a[in]Matrix of coefficients
x[in]vector of input ColorSpinorFields
y[in,out]vector of input/output ColorSpinorFields

Definition at line 357 of file multi_blas_quda.cu.

References caxpy_recurse(), errorQuda, x, and y.

Referenced by caxpy_U().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpy_U() [2/2]

void quda::blas::caxpy_U ( const Complex a,
ColorSpinorField x,
ColorSpinorField y 
)

This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes.

y = x * a + y

Parameters
a[in]Matrix of coefficients
x[in]Input matrix
y[in,out]Computed output matrix

Definition at line 384 of file multi_blas_quda.cu.

References a, caxpy_U(), x, and y.

Here is the call graph for this function:

◆ caxpyBxpz() [1/2]

void quda::blas::caxpyBxpz ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
const Complex b,
ColorSpinorField z 
)

Definition at line 438 of file blas_quda.cu.

References a, b, IMAG, REAL, x, y, and z.

Referenced by benchmark(), caxpyBxpz(), test(), and quda::BiCGstabL::updateXRend().

Here is the caller graph for this function:

◆ caxpyBxpz() [2/2]

void quda::blas::caxpyBxpz ( const Complex a_,
std::vector< ColorSpinorField *> &  x_,
ColorSpinorField y_,
const Complex b_,
ColorSpinorField z_ 
)

Compute the vectorized "caxpyBxpz" over the set of ColorSpinorFields, where the second and third vector, y and z, is constant over the batch. E.g., it computes.

y = a * x + y z = b * x + z

The dimensions of a, b are the same as the size of x, with a maximum size of 16.

Parameters
a[in]Array of coefficients
b[in]Array of coefficients
x[in]vector of ColorSpinorFields
y[in,out]input ColorSpinorField
z[in,out]input ColorSpinorField

Definition at line 791 of file multi_blas_quda.cu.

References a, fused_exterior_ndeg_tm_dslash_cuda_gen::a1, b, c, caxpyBxpz(), MAX_MULTI_BLAS_N, w, x, xsize, and y.

Here is the call graph for this function:

◆ caxpyBzpx()

void quda::blas::caxpyBzpx ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
const Complex b,
ColorSpinorField z 
)

Definition at line 412 of file blas_quda.cu.

References a, b, IMAG, REAL, x, y, and z.

Referenced by benchmark(), and test().

Here is the caller graph for this function:

◆ caxpyCpu()

void quda::blas::caxpyCpu ( const Complex a,
const cpuColorSpinorField x,
cpuColorSpinorField y 
)

Definition at line 80 of file blas_cpu.cpp.

References a, caxpby(), errorQuda, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, and y.

Referenced by cabxpyAxCpu(), cabxpyAxNormCpu(), caxpbypczpwCpu(), caxpbypzCpu(), caxpbypzYmbwCpu(), caxpyDotzyCpu(), caxpyNormCpu(), caxpyXmazCpu(), and caxpyXmazNormXCpu().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpyDotzy()

Complex quda::blas::caxpyDotzy ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 544 of file reduce_quda.cu.

References a, IMAG, REAL, x, y, and z.

Referenced by benchmark(), quda::orthoDir(), quda::BiCGstabL::orthoDir(), and test().

Here is the caller graph for this function:

◆ caxpyDotzyCpu()

Complex quda::blas::caxpyDotzyCpu ( const Complex a,
cpuColorSpinorField x,
cpuColorSpinorField y,
cpuColorSpinorField z 
)

Definition at line 304 of file blas_cpu.cpp.

References a, caxpyCpu(), cDotProductCpu(), x, y, and z.

Here is the call graph for this function:

◆ caxpyNorm()

double quda::blas::caxpyNorm ( const Complex a,
ColorSpinorField x,
ColorSpinorField y 
)

Definition at line 402 of file reduce_quda.cu.

References a, IMAG, REAL, x, and y.

Referenced by benchmark(), and test().

Here is the caller graph for this function:

◆ caxpyNormCpu()

double quda::blas::caxpyNormCpu ( const Complex a,
cpuColorSpinorField x,
cpuColorSpinorField y 
)

Definition at line 265 of file blas_cpu.cpp.

References a, caxpyCpu(), norm2(), x, and y.

Here is the call graph for this function:

◆ caxpyXmaz()

void quda::blas::caxpyXmaz ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 549 of file blas_quda.cu.

References a, IMAG, REAL, x, y, and z.

Referenced by benchmark(), quda::MR::operator()(), and test().

Here is the caller graph for this function:

◆ caxpyXmazCpu()

void quda::blas::caxpyXmazCpu ( const Complex a,
cpuColorSpinorField x,
cpuColorSpinorField y,
cpuColorSpinorField z 
)

Definition at line 278 of file blas_cpu.cpp.

References a, caxpyCpu(), x, y, and z.

Here is the call graph for this function:

◆ caxpyXmazMR()

void quda::blas::caxpyXmazMR ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 583 of file blas_quda.cu.

References a, commAsyncReduction(), errorQuda, IMAG, QUDA_CPU_FIELD_LOCATION, REAL, x, y, and z.

Referenced by quda::MR::operator()().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpyXmazNormX()

double quda::blas::caxpyXmazNormX ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 424 of file reduce_quda.cu.

References a, IMAG, REAL, x, y, and z.

Referenced by benchmark(), and test().

Here is the caller graph for this function:

◆ caxpyXmazNormXCpu()

double quda::blas::caxpyXmazNormXCpu ( const Complex a,
cpuColorSpinorField x,
cpuColorSpinorField y,
cpuColorSpinorField z 
)

Definition at line 271 of file blas_cpu.cpp.

References a, caxpyCpu(), norm2(), x, y, and z.

Here is the call graph for this function:

◆ caxpyz() [1/2]

void quda::blas::caxpyz ( const Complex a,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
std::vector< ColorSpinorField *> &  z 
)

Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.

z = x * a + y

The dimensions of a can be rectangular, e.g., the width of x and y need not be same, though the maximum width for both is 16.

Parameters
a[in]Matrix of coefficients
x[in]vector of input ColorSpinorFields
y[in]vector of input ColorSpinorFields
z[out]vector of output ColorSpinorFields

Definition at line 659 of file multi_blas_quda.cu.

References a, caxpyz_recurse(), x, y, and z.

Referenced by caxpyz().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpyz() [2/2]

void quda::blas::caxpyz ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes.

z = x * a + y

Parameters
a[in]Matrix of coefficients
x[in]Input matrix
y[in]Computed output matrix
z[out]vector of input/output ColorSpinorFields

Definition at line 683 of file multi_blas_quda.cu.

References a, caxpyz(), x, y, and z.

Here is the call graph for this function:

◆ caxpyz_L() [1/2]

void quda::blas::caxpyz_L ( const Complex a,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
std::vector< ColorSpinorField *> &  z 
)

Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.

z = x * a + y

Where 'a' is assumed to be lower triangular

Parameters
a[in]Matrix of coefficients
x[in]vector of input ColorSpinorFields
y[in]vector of input ColorSpinorFields
z[out]vector of output ColorSpinorFields

Definition at line 674 of file multi_blas_quda.cu.

References a, caxpyz_recurse(), x, y, and z.

Referenced by caxpyz_L().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpyz_L() [2/2]

void quda::blas::caxpyz_L ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes.

z = x * a + y

Parameters
a[in]Matrix of coefficients
x[in]Input matrix
y[in]Computed output matrix
z[out]vector of input/output ColorSpinorFields

Definition at line 691 of file multi_blas_quda.cu.

References a, caxpyz_L(), x, y, and z.

Here is the call graph for this function:

◆ caxpyz_recurse()

void quda::blas::caxpyz_recurse ( const Complex a_,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
std::vector< ColorSpinorField *> &  z,
int  i,
int  j,
int  pass,
int  upper 
)

Definition at line 416 of file multi_blas_quda.cu.

References a, fused_exterior_ndeg_tm_dslash_cuda_gen::a1, b, c, caxpy(), quda::count, fused_exterior_ndeg_tm_dslash_cuda_gen::i, MAX_MULTI_BLAS_N, x, y, y0(), y1(), and z.

Referenced by caxpyz(), caxpyz_L(), and caxpyz_U().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpyz_U() [1/2]

void quda::blas::caxpyz_U ( const Complex a,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
std::vector< ColorSpinorField *> &  z 
)

Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.

z = x * a + y

Where 'a' is assumed to be upper triangular.

Parameters
a[in]Matrix of coefficients
x[in]vector of input ColorSpinorFields
y[in]vector of input ColorSpinorFields
z[out]vector of output ColorSpinorFields

Definition at line 666 of file multi_blas_quda.cu.

References a, caxpyz_recurse(), x, y, and z.

Referenced by caxpyz_U().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpyz_U() [2/2]

void quda::blas::caxpyz_U ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes.

z = x * a + y

Parameters
a[in]Matrix of coefficients
x[in]Input matrix
y[in]Computed output matrix
z[out]vector of input/output ColorSpinorFields

Definition at line 687 of file multi_blas_quda.cu.

References a, caxpyz_U(), x, y, and z.

Here is the call graph for this function:

◆ cdot_() [1/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::cdot_ ( ReduceType &  sum,
const double2 &  a,
const double2 &  b 
)

Returns complex-valued dot product of x and y

Definition at line 226 of file multi_reduce_quda.cu.

References a, b, and sum().

Here is the call graph for this function:

◆ cdot_() [2/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::cdot_ ( ReduceType &  sum,
const float2 &  a,
const float2 &  b 
)

Definition at line 235 of file multi_reduce_quda.cu.

References a, b, and sum().

Here is the call graph for this function:

◆ cdot_() [3/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::cdot_ ( ReduceType &  sum,
const float4 &  a,
const float4 &  b 
)

Definition at line 244 of file multi_reduce_quda.cu.

References a, b, and sum().

Here is the call graph for this function:

◆ cdotNormA_()

template<typename ReduceType , typename InputType >
__device__ __host__ void quda::blas::cdotNormA_ ( ReduceType &  sum,
const InputType &  a,
const InputType &  b 
)

First returns the dot product (x,y) Returns the norm of x

Definition at line 556 of file reduce_quda.cu.

References a, b, and sum().

Here is the call graph for this function:

◆ cdotNormB_()

template<typename ReduceType , typename InputType >
__device__ __host__ void quda::blas::cdotNormB_ ( ReduceType &  sum,
const InputType &  a,
const InputType &  b 
)

First returns the dot product (x,y) Returns the norm of y

Definition at line 583 of file reduce_quda.cu.

References a, b, and sum().

Here is the call graph for this function:

◆ cDotProduct() [1/3]

Complex quda::blas::cDotProduct ( ColorSpinorField x,
ColorSpinorField y 
)

◆ cDotProduct() [2/3]

template<typename Float >
Complex quda::blas::cDotProduct ( const std::complex< Float > *  a,
const std::complex< Float > *  b,
const int  N 
)

Definition at line 212 of file blas_cpu.cpp.

References a, b, quda::conj(), dot(), and fused_exterior_ndeg_tm_dslash_cuda_gen::i.

Here is the call graph for this function:

◆ cDotProduct() [3/3]

void quda::blas::cDotProduct ( Complex result,
std::vector< ColorSpinorField *> &  a,
std::vector< ColorSpinorField *> &  b 
)

Computes the matrix of inner products between the vector set a and the vector set b.

Parameters
result[out]Matrix of inner product result[i][j] = (a[j],b[i])
a[in]set of input ColorSpinorFields
b[in]set of input ColorSpinorFields

Definition at line 594 of file multi_reduce_quda.cu.

References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), errorQuda, fused_exterior_ndeg_tm_dslash_cuda_gen::i, reduceDoubleArray(), x, and y.

Here is the call graph for this function:

◆ cDotProductCopy()

void quda::blas::cDotProductCopy ( Complex result,
std::vector< ColorSpinorField *> &  a,
std::vector< ColorSpinorField *> &  b,
std::vector< ColorSpinorField *> &  c 
)

Computes the matrix of inner products between the vector set a and the vector set b, and copies b into c.

Parameters
result[out]Matrix of inner product result[i][j] = (a[j],b[i])
a[in]set of input ColorSpinorFields
b[in]set of input ColorSpinorFields
c[out]set of output ColorSpinorFields

Definition at line 673 of file multi_reduce_quda.cu.

References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), errorQuda, fused_exterior_ndeg_tm_dslash_cuda_gen::i, reduceDoubleArray(), x, y, and z.

Here is the call graph for this function:

◆ cDotProductCpu()

Complex quda::blas::cDotProductCpu ( const cpuColorSpinorField a,
const cpuColorSpinorField b 
)

Definition at line 218 of file blas_cpu.cpp.

References a, b, cDotProduct(), dot(), errorQuda, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and reduceDoubleArray().

Referenced by caxpyDotzyCpu(), cDotProductNormACpu(), cDotProductNormBCpu(), and xpaycDotzyCpu().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ cDotProductNormA()

double3 quda::blas::cDotProductNormA ( ColorSpinorField a,
ColorSpinorField b 
)

◆ cDotProductNormACpu()

double3 quda::blas::cDotProductNormACpu ( const cpuColorSpinorField a,
const cpuColorSpinorField b 
)

Definition at line 238 of file blas_cpu.cpp.

References a, b, cDotProductCpu(), dot(), norm(), and normCpu().

Here is the call graph for this function:

◆ cDotProductNormB()

double3 quda::blas::cDotProductNormB ( ColorSpinorField a,
ColorSpinorField b 
)

Definition at line 599 of file reduce_quda.cu.

References x, and y.

Referenced by benchmark(), quda::MR::operator()(), and test().

Here is the caller graph for this function:

◆ cDotProductNormBCpu()

double3 quda::blas::cDotProductNormBCpu ( const cpuColorSpinorField a,
const cpuColorSpinorField b 
)

Definition at line 244 of file blas_cpu.cpp.

References a, b, cDotProductCpu(), dot(), norm(), and normCpu().

Referenced by caxpbypzYmbwcDotProductUYNormYCpu().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ copy()

void quda::blas::copy ( ColorSpinorField dst,
const ColorSpinorField src 
)

◆ cxpaypbz()

void quda::blas::cxpaypbz ( ColorSpinorField x,
const Complex b,
ColorSpinorField y,
const Complex c,
ColorSpinorField z 
)

Definition at line 335 of file blas_quda.cu.

References a, b, IMAG, REAL, x, y, and z.

Referenced by benchmark(), quda::BiCGstab::operator()(), and test().

Here is the caller graph for this function:

◆ cxpaypbzCpu()

void quda::blas::cxpaypbzCpu ( const cpuColorSpinorField x,
const Complex a,
const cpuColorSpinorField y,
const Complex b,
cpuColorSpinorField z 
)

Definition at line 116 of file blas_cpu.cpp.

References a, b, caxpbypcz(), errorQuda, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, y, and z.

Here is the call graph for this function:

◆ dot_() [1/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::dot_ ( ReduceType &  sum,
const double2 &  a,
const double2 &  b 
)

Return the real dot product of x and y Broken at the moment—need to update reDotProduct with permuting, etc of cDotProduct below.

Return the real dot product of x and y

Definition at line 114 of file multi_reduce_quda.cu.

References a, b, and sum().

Here is the call graph for this function:

◆ dot_() [2/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::dot_ ( ReduceType &  sum,
const float2 &  a,
const float2 &  b 
)

Definition at line 119 of file multi_reduce_quda.cu.

References a, b, and sum().

Here is the call graph for this function:

◆ dot_() [3/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::dot_ ( ReduceType &  sum,
const float4 &  a,
const float4 &  b 
)

Definition at line 124 of file multi_reduce_quda.cu.

References a, b, and sum().

Here is the call graph for this function:

◆ dotNormA_()

template<typename ReduceType , typename InputType >
__device__ __host__ ReduceType quda::blas::dotNormA_ ( const InputType &  a,
const InputType &  b 
)

Returns the real component of the dot product of a and b and the norm of a

Definition at line 288 of file reduce_quda.cu.

References a, b, and c.

◆ end()

void quda::blas::end ( void  )

Definition at line 70 of file blas_quda.cu.

References endReduce().

Referenced by endQuda(), and quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ endReduce()

void quda::blas::endReduce ( void  )

Definition at line 134 of file reduce_quda.cu.

References d_reduce, device_free, h_reduce, hd_reduce, host_free, and reduceEnd.

Referenced by end().

Here is the caller graph for this function:

◆ getDeviceReduceBuffer()

void * quda::blas::getDeviceReduceBuffer ( )

Definition at line 73 of file reduce_quda.cu.

References d_reduce.

◆ getHostReduceBuffer()

void * quda::blas::getHostReduceBuffer ( )

Definition at line 75 of file reduce_quda.cu.

References h_reduce.

Referenced by multiReduceLaunch().

Here is the caller graph for this function:

◆ getMappedHostReduceBuffer()

void * quda::blas::getMappedHostReduceBuffer ( )

Definition at line 74 of file reduce_quda.cu.

References hd_reduce.

Referenced by multiReduceLaunch().

Here is the caller graph for this function:

◆ getReduceEvent()

cudaEvent_t * quda::blas::getReduceEvent ( )

Definition at line 76 of file reduce_quda.cu.

References reduceEnd.

Referenced by multiReduceLaunch().

Here is the caller graph for this function:

◆ getStream()

cudaStream_t * quda::blas::getStream ( )

Definition at line 75 of file blas_quda.cu.

References blasStream.

Referenced by quda::blas::copy_ns::copy(), multiblasCuda(), multiReduceCuda(), and reduceCuda().

Here is the caller graph for this function:

◆ hDotProduct()

void quda::blas::hDotProduct ( Complex result,
std::vector< ColorSpinorField *> &  a,
std::vector< ColorSpinorField *> &  b 
)

Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Requires a.size()==b.size().

Parameters
result[out]Matrix of inner product result[i][j] = (a[j],b[i])
a[in]set of input ColorSpinorFields
b[in]set of input ColorSpinorFields

Definition at line 619 of file multi_reduce_quda.cu.

References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), quda::conj(), errorQuda, fused_exterior_ndeg_tm_dslash_cuda_gen::i, reduceDoubleArray(), x, and y.

Here is the call graph for this function:

◆ hDotProduct_Anorm()

void quda::blas::hDotProduct_Anorm ( Complex result,
std::vector< ColorSpinorField *> &  a,
std::vector< ColorSpinorField *> &  b 
)

Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Uniquely defined for cases like (p, Ap) where the output is Hermitian, but there's an A-norm instead of an L2 norm. Requires a.size()==b.size().

Parameters
result[out]Matrix of inner product result[i][j] = (a[j],b[i])
a[in]set of input ColorSpinorFields
b[in]set of input ColorSpinorFields

Definition at line 646 of file multi_reduce_quda.cu.

References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), quda::conj(), errorQuda, fused_exterior_ndeg_tm_dslash_cuda_gen::i, reduceDoubleArray(), x, and y.

Here is the call graph for this function:

◆ HeavyQuarkResidualNorm() [1/2]

double3 quda::blas::HeavyQuarkResidualNorm ( ColorSpinorField x,
ColorSpinorField r 
)

◆ HeavyQuarkResidualNorm() [2/2]

template<typename Float >
double3 quda::blas::HeavyQuarkResidualNorm ( const Float *  x,
const Float *  r,
const int  volume,
const int  Nint 
)

Definition at line 311 of file blas_cpu.cpp.

References fused_exterior_ndeg_tm_dslash_cuda_gen::i, sum(), and x.

Here is the call graph for this function:

◆ HeavyQuarkResidualNormCpu() [1/2]

double3 quda::blas::HeavyQuarkResidualNormCpu ( cpuColorSpinorField x,
cpuColorSpinorField r 
)

Definition at line 332 of file blas_cpu.cpp.

References comm_size(), errorQuda, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, reduceDoubleArray(), quda::ColorSpinorField::V(), and x.

Referenced by HeavyQuarkResidualNormCpu().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ HeavyQuarkResidualNormCpu() [2/2]

double3 quda::blas::HeavyQuarkResidualNormCpu ( cpuColorSpinorField x,
cpuColorSpinorField y,
cpuColorSpinorField r 
)

Definition at line 353 of file blas_cpu.cpp.

References HeavyQuarkResidualNormCpu(), tmp, x, xpyCpu(), and y.

Here is the call graph for this function:

◆ init()

void quda::blas::init ( )

Definition at line 64 of file blas_quda.cu.

References blasStream, initReduce(), quda::Nstream, and streams.

Referenced by comm_peer2peer_enabled_global(), getRankVerbosity(), getTuning(), initQudaMemory(), and quda::traceEnabled().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ initReduce()

void quda::blas::initReduce ( )

Definition at line 78 of file reduce_quda.cu.

References bytes, checkCudaError, d_reduce, device_malloc, deviceProp, h_reduce, hd_reduce, mapped_malloc, MAX_MULTI_BLAS_N, memset(), pinned_malloc, QudaSumFloat, and reduceEnd.

Referenced by init().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ multiReduce_recurse()

template<template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal >
void quda::blas::multiReduce_recurse ( Complex result,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
std::vector< ColorSpinorField *> &  z,
std::vector< ColorSpinorField *> &  w,
int  i_idx,
int  j_idx,
bool  hermitian,
unsigned int  tile_size 
)

Definition at line 282 of file multi_reduce_quda.cu.

References a, b, c, quda::count, fused_exterior_ndeg_tm_dslash_cuda_gen::i, w, x, y, y0(), y1(), and z.

Here is the call graph for this function:

◆ mxpy()

void quda::blas::mxpy ( ColorSpinorField x,
ColorSpinorField y 
)

Definition at line 192 of file blas_quda.cu.

References x, and y.

Referenced by benchmark(), invert_test(), main(), and test().

Here is the caller graph for this function:

◆ mxpyCpu()

void quda::blas::mxpyCpu ( const cpuColorSpinorField x,
cpuColorSpinorField y 
)

Definition at line 52 of file blas_cpu.cpp.

References axpby(), errorQuda, f, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, and y.

Here is the call graph for this function:

◆ norm()

template<typename Float >
double quda::blas::norm ( const Float *  a,
const int  N 
)

Definition at line 161 of file blas_cpu.cpp.

References a, fused_exterior_ndeg_tm_dslash_cuda_gen::i, and norm2().

Referenced by cDotProductNormACpu(), cDotProductNormBCpu(), quda::ComputeHarmonicRitz< libtype::eigen_lib >(), quda::ComputeHarmonicRitz< libtype::magma_lib >(), and normCpu().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ norm1()

double quda::blas::norm1 ( const ColorSpinorField b)

Definition at line 200 of file reduce_quda.cu.

References errorQuda, x, and y.

Referenced by getLambdaMax(), getRealBidiagMatrix(), and quda::norm1().

Here is the caller graph for this function:

◆ norm1_() [1/3]

template<typename ReduceType >
__device__ __host__ ReduceType quda::blas::norm1_ ( const double2 &  a)

Return the L1 norm of x

Definition at line 179 of file reduce_quda.cu.

References a, and fabs().

Here is the call graph for this function:

◆ norm1_() [2/3]

template<typename ReduceType >
__device__ __host__ ReduceType quda::blas::norm1_ ( const float2 &  a)

Definition at line 183 of file reduce_quda.cu.

References a, and fabs().

Here is the call graph for this function:

◆ norm1_() [3/3]

template<typename ReduceType >
__device__ __host__ ReduceType quda::blas::norm1_ ( const float4 &  a)

Definition at line 187 of file reduce_quda.cu.

References a, and fabs().

Here is the call graph for this function:

◆ norm2()

double quda::blas::norm2 ( const ColorSpinorField a)

◆ norm2_() [1/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::norm2_ ( ReduceType &  sum,
const double2 &  a 
)

Return the L2 norm of x

Definition at line 214 of file reduce_quda.cu.

References a, and sum().

Here is the call graph for this function:

◆ norm2_() [2/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::norm2_ ( ReduceType &  sum,
const float2 &  a 
)

Definition at line 219 of file reduce_quda.cu.

References a, and sum().

Here is the call graph for this function:

◆ norm2_() [3/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::norm2_ ( ReduceType &  sum,
const float4 &  a 
)

Definition at line 224 of file reduce_quda.cu.

References a, and sum().

Here is the call graph for this function:

◆ normCpu()

double quda::blas::normCpu ( const cpuColorSpinorField a)

Definition at line 167 of file blas_cpu.cpp.

References a, errorQuda, norm(), norm2(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and reduceDouble().

Referenced by axpyNormCpu(), cDotProductNormACpu(), cDotProductNormBCpu(), and xmyNormCpu().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ quadrupleCGReduction()

double4 quda::blas::quadrupleCGReduction ( ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

Referenced by quda::CG::operator()().

Here is the caller graph for this function:

◆ reDotProduct() [1/3]

double quda::blas::reDotProduct ( ColorSpinorField x,
ColorSpinorField y 
)

Definition at line 277 of file reduce_quda.cu.

References x, and y.

Referenced by benchmark(), quda::IncEigCG::eigCGsolve(), quda::Lanczos::operator()(), quda::CG::operator()(), quda::PreconCG::operator()(), quda::MultiShiftCG::operator()(), reDotProductCpu(), and test().

Here is the caller graph for this function:

◆ reDotProduct() [2/3]

template<typename Float >
double quda::blas::reDotProduct ( const Float *  a,
const Float *  b,
const int  N 
)

Definition at line 186 of file blas_cpu.cpp.

References a, b, dot(), and fused_exterior_ndeg_tm_dslash_cuda_gen::i.

Here is the call graph for this function:

◆ reDotProduct() [3/3]

void quda::blas::reDotProduct ( double result,
std::vector< ColorSpinorField *> &  a,
std::vector< ColorSpinorField *> &  b 
)

Definition at line 142 of file multi_reduce_quda.cu.

References errorQuda, reduceDoubleArray(), x, and y.

Here is the call graph for this function:

◆ reDotProductCpu()

double quda::blas::reDotProductCpu ( const cpuColorSpinorField a,
const cpuColorSpinorField b 
)

Definition at line 192 of file blas_cpu.cpp.

References a, b, dot(), errorQuda, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, reDotProduct(), and reduceDouble().

Here is the call graph for this function:

◆ reDotProductNormA()

double2 quda::blas::reDotProductNormA ( ColorSpinorField a,
ColorSpinorField b 
)

Definition at line 305 of file reduce_quda.cu.

References x, and y.

Referenced by quda::SD::operator()().

Here is the caller graph for this function:

◆ setParam()

void quda::blas::setParam ( int  kernel,
int  prec,
int  threads,
int  blocks 
)

◆ tripleCGReduction()

double3 quda::blas::tripleCGReduction ( ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 767 of file reduce_quda.cu.

References x, y, and z.

Referenced by benchmark(), quda::CG::operator()(), and test().

Here is the caller graph for this function:

◆ tripleCGUpdate()

void quda::blas::tripleCGUpdate ( const double alpha,
const double beta,
ColorSpinorField q,
ColorSpinorField r,
ColorSpinorField x,
ColorSpinorField p 
)

Definition at line 610 of file blas_quda.cu.

References a, b, w, x, y, and z.

Referenced by benchmark(), quda::CG::operator()(), and test().

Here is the caller graph for this function:

◆ xmyNorm()

double quda::blas::xmyNorm ( ColorSpinorField x,
ColorSpinorField y 
)

◆ xmyNormCpu()

double quda::blas::xmyNormCpu ( const cpuColorSpinorField x,
cpuColorSpinorField y 
)

Definition at line 206 of file blas_cpu.cpp.

References normCpu(), x, xpayCpu(), and y.

Here is the call graph for this function:

◆ xpay()

void quda::blas::xpay ( ColorSpinorField x,
const double a,
ColorSpinorField y 
)

◆ xpaycDotzy()

Complex quda::blas::xpaycDotzy ( ColorSpinorField x,
const double a,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 521 of file reduce_quda.cu.

References a, x, y, and z.

Referenced by benchmark(), and test().

Here is the caller graph for this function:

◆ xpaycDotzyCpu()

Complex quda::blas::xpaycDotzyCpu ( const cpuColorSpinorField x,
const double a,
cpuColorSpinorField y,
const cpuColorSpinorField z 
)

Definition at line 232 of file blas_cpu.cpp.

References a, cDotProductCpu(), x, xpayCpu(), y, and z.

Here is the call graph for this function:

◆ xpayCpu()

void quda::blas::xpayCpu ( const cpuColorSpinorField x,
const double a,
cpuColorSpinorField y 
)

Definition at line 42 of file blas_cpu.cpp.

References a, axpby(), errorQuda, f, float, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, and y.

Referenced by axpyZpbxCpu(), xmyNormCpu(), and xpaycDotzyCpu().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ xpayz()

void quda::blas::xpayz ( ColorSpinorField x,
const double a,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 177 of file blas_quda.cu.

References a, x, y, and z.

Referenced by quda::CG::operator()().

Here is the caller graph for this function:

◆ xpy()

void quda::blas::xpy ( ColorSpinorField x,
ColorSpinorField y 
)

◆ xpyCpu()

void quda::blas::xpyCpu ( const cpuColorSpinorField x,
cpuColorSpinorField y 
)

Definition at line 23 of file blas_cpu.cpp.

References axpby(), errorQuda, f, QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, x, and y.

Referenced by HeavyQuarkResidualNormCpu().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ xpyHeavyQuarkResidualNorm()

double3 quda::blas::xpyHeavyQuarkResidualNorm ( ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField r 
)

Definition at line 742 of file reduce_quda.cu.

References comm_size(), x, and y.

Referenced by benchmark(), quda::CG::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), and test().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ zero()

void quda::blas::zero ( ColorSpinorField a)

Variable Documentation

◆ aux_str

const char* quda::blas::aux_str

Definition at line 57 of file blas_quda.cu.

◆ aux_tmp

char quda::blas::aux_tmp[TuneKey::aux_n]

Definition at line 58 of file blas_quda.cu.

Referenced by quda::ColorSpinorField::setTuningString().

◆ blasStream

cudaStream_t* quda::blas::blasStream
static

Definition at line 53 of file blas_quda.cu.

Referenced by blasCuda(), getStream(), and init().

◆ blasStrings [1/2]

struct { ... } quda::blas::blasStrings

◆ blasStrings [2/2]

struct { ... } quda::blas::blasStrings

◆ bytes

unsigned long long quda::blas::bytes

◆ flops

unsigned long long quda::blas::flops

◆ vol_str

const char* quda::blas::vol_str

Definition at line 56 of file blas_quda.cu.