QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Namespaces | Classes | Functions | Variables
quda::blas Namespace Reference

Namespaces

 copy_ns
 
 detail
 

Classes

struct  ax_
 
struct  axpbyz_
 
struct  axpbyzNorm2
 
struct  axpyBzpcx_
 
struct  axpyCGNorm2
 
struct  AxpyReDot
 
struct  axpyZpbx_
 
struct  BlasArg
 
class  BlasCuda
 
struct  BlasFunctor
 
struct  cabxpyAx_
 
struct  cabxpyzaxnorm
 
struct  caxpby_
 
struct  caxpbypczw_
 
struct  caxpbypzYmbw_
 
struct  caxpbypzYmbwcDotProductUYNormY_
 
struct  caxpy_
 
struct  caxpyBxpz_
 
struct  caxpyBzpx_
 
struct  caxpydotzy
 
struct  caxpyNorm2
 
struct  caxpyxmaz_
 
struct  caxpyxmazMR_
 
struct  caxpyxmaznormx
 
struct  Cdot
 
struct  CdotCopy
 
struct  CdotNormA
 
struct  coeff_array
 
struct  Dot
 
struct  doubleCG3Init_
 
struct  doubleCG3InitNorm_
 
struct  doubleCG3Update_
 
struct  doubleCG3UpdateNorm_
 
struct  HeavyQuarkResidualNorm_
 
struct  multi_axpyBzpcx_
 
struct  multi_caxpyBxpz_
 
class  MultiBlas
 
struct  MultiBlasArg
 Parameter struct for generic multi-blas kernel. More...
 
struct  MultiBlasFunctor
 
struct  multicaxpy_
 
struct  multicaxpyz_
 
struct  MultiReduceArg
 Parameter struct for generic multi-blas kernel. More...
 
class  MultiReduceCuda
 
struct  MultiReduceFunctor
 
struct  Norm1
 
struct  Norm2
 
struct  num_to_string
 
struct  quadrupleCG3InitNorm_
 
struct  quadrupleCG3UpdateNorm_
 
struct  quadrupleCGReduction_
 
class  ReduceCuda
 
struct  ReduceFunctor
 
struct  ReductionArg
 
class  TileSizeTune
 
struct  tripleCGReduction_
 
struct  tripleCGUpdate_
 
struct  write
 
struct  xpyHeavyQuarkResidualNorm_
 

Functions

void init ()
 
void end (void)
 
void * getDeviceReduceBuffer ()
 
void * getMappedHostReduceBuffer ()
 
void * getHostReduceBuffer ()
 
void setParam (int kernel, int prec, int threads, int blocks)
 
void zero (ColorSpinorField &a)
 
void copy (ColorSpinorField &dst, const ColorSpinorField &src)
 
void ax (double a, ColorSpinorField &x)
 
void axpbyz (double a, ColorSpinorField &x, double b, ColorSpinorField &y, ColorSpinorField &z)
 
void xpy (ColorSpinorField &x, ColorSpinorField &y)
 
void mxpy (ColorSpinorField &x, ColorSpinorField &y)
 
void axpy (double a, ColorSpinorField &x, ColorSpinorField &y)
 
void axpby (double a, ColorSpinorField &x, double b, ColorSpinorField &y)
 
void xpay (ColorSpinorField &x, double a, ColorSpinorField &y)
 
void xpayz (ColorSpinorField &x, double a, ColorSpinorField &y, ColorSpinorField &z)
 
void axpyZpbx (double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, double b)
 
void axpyBzpcx (double a, ColorSpinorField &x, ColorSpinorField &y, double b, ColorSpinorField &z, double c)
 
void caxpby (const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y)
 
void caxpy (const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
 
void caxpbypczw (const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, const Complex &c, ColorSpinorField &z, ColorSpinorField &w)
 
void cxpaypbz (ColorSpinorField &, const Complex &b, ColorSpinorField &y, const Complex &c, ColorSpinorField &z)
 
void caxpbypzYmbw (const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, ColorSpinorField &, ColorSpinorField &)
 
void caxpyBzpx (const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &)
 
void caxpyBxpz (const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &)
 
void cabxpyAx (double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y)
 
void caxpyXmaz (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
void caxpyXmazMR (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
void tripleCGUpdate (double alpha, double beta, ColorSpinorField &q, ColorSpinorField &r, ColorSpinorField &x, ColorSpinorField &p)
 
void doubleCG3Init (double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
void doubleCG3Update (double a, double b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
double norm1 (const ColorSpinorField &b)
 
double norm2 (const ColorSpinorField &a)
 
double axpyReDot (double a, ColorSpinorField &x, ColorSpinorField &y)
 
double reDotProduct (ColorSpinorField &x, ColorSpinorField &y)
 
double axpbyzNorm (double a, ColorSpinorField &x, double b, ColorSpinorField &y, ColorSpinorField &z)
 
double axpyNorm (double a, ColorSpinorField &x, ColorSpinorField &y)
 
double xmyNorm (ColorSpinorField &x, ColorSpinorField &y)
 
Complex cDotProduct (ColorSpinorField &, ColorSpinorField &)
 
double3 cDotProductNormA (ColorSpinorField &a, ColorSpinorField &b)
 
double3 cDotProductNormB (ColorSpinorField &a, ColorSpinorField &b)
 Return (a,b) and ||b||^2 - implemented using cDotProductNormA. More...
 
double3 caxpbypzYmbwcDotProductUYNormY (const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &u)
 
double caxpyNorm (const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
 
double caxpyXmazNormX (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
double cabxpyzAxNorm (double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
Complex caxpyDotzy (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
Complex axpyCGNorm (double a, ColorSpinorField &x, ColorSpinorField &y)
 
double3 HeavyQuarkResidualNorm (ColorSpinorField &x, ColorSpinorField &r)
 
double3 xpyHeavyQuarkResidualNorm (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &r)
 
double3 tripleCGReduction (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
double4 quadrupleCGReduction (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
double quadrupleCG3InitNorm (double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)
 
double quadrupleCG3UpdateNorm (double a, double b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)
 
double doubleCG3InitNorm (double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
double doubleCG3UpdateNorm (double a, double b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 
void caxpy (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y)
 Compute the block "caxpy" with over the set of ColorSpinorFields. E.g., it computes. More...
 
void caxpy (const Complex *a, ColorSpinorField &x, ColorSpinorField &y)
 This is a wrapper for calling the block "caxpy" with a composite ColorSpinorField. E.g., it computes. More...
 
void caxpy_U (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y)
 Compute the block "caxpy_U" with over the set of ColorSpinorFields. E.g., it computes. More...
 
void caxpy_U (const Complex *a, ColorSpinorField &x, ColorSpinorField &y)
 This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes. More...
 
void caxpy_L (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y)
 Compute the block "caxpy_L" with over the set of ColorSpinorFields. E.g., it computes. More...
 
void caxpy_L (const Complex *a, ColorSpinorField &x, ColorSpinorField &y)
 This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes. More...
 
void caxpyz (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z)
 Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes. More...
 
void caxpyz (const Complex *a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes. More...
 
void caxpyz_U (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z)
 Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes. More...
 
void caxpyz_U (const Complex *a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes. More...
 
void caxpyz_L (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z)
 Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes. More...
 
void caxpyz_L (const Complex *a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
 This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes. More...
 
void axpyBzpcx (const double *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, const double *b, ColorSpinorField &z, const double *c)
 Compute the vectorized "axpyBzpcx" with over the set of ColorSpinorFields, where the third vector, z, is constant over the batch. E.g., it computes. More...
 
void caxpyBxpz (const Complex *a_, std::vector< ColorSpinorField *> &x_, ColorSpinorField &y_, const Complex *b_, ColorSpinorField &z_)
 Compute the vectorized "caxpyBxpz" over the set of ColorSpinorFields, where the second and third vector, y and z, is constant over the batch. E.g., it computes. More...
 
void reDotProduct (double *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b)
 
void cDotProduct (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b)
 Computes the matrix of inner products between the vector set a and the vector set b. More...
 
void hDotProduct (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b)
 Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Requires a.size()==b.size(). More...
 
void hDotProduct_Anorm (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b)
 Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Uniquely defined for cases like (p, Ap) where the output is Hermitian, but there's an A-norm instead of an L2 norm. Requires a.size()==b.size(). More...
 
void cDotProductCopy (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b, std::vector< ColorSpinorField *> &c)
 Computes the matrix of inner products between the vector set a and the vector set b, and copies b into c. More...
 
template<typename FloatN , int M, typename Arg >
__global__ void blasKernel (Arg arg)
 
__device__ __host__ void _caxpy (const float2 &a, const float4 &x, float4 &y)
 
__device__ __host__ void _caxpy (const float2 &a, const float2 &x, float2 &y)
 
__device__ __host__ void _caxpy (const double2 &a, const double2 &x, double2 &y)
 
__device__ __host__ void _caxpby (const float2 &a, const float4 &x, const float2 &b, float4 &y)
 
__device__ __host__ void _caxpby (const float2 &a, const float2 &x, const float2 &b, float2 &y)
 
__device__ __host__ void _caxpby (const double2 &a, const double2 &x, const double2 &b, double2 &y)
 
template<typename FloatN , int M, int NXZ, typename Arg >
__global__ void multiBlasKernel (Arg arg_)
 Generic multi-blas kernel with four loads and up to four stores. More...
 
template<int block_size, typename ReduceType , typename FloatN , int M, int NXZ, typename Arg >
__global__ void multiReduceKernel (Arg arg_)
 
template<typename ReduceType >
__device__ __host__ void dot_ (ReduceType &sum, const double2 &a, const double2 &b)
 
template<typename ReduceType >
__device__ __host__ void dot_ (ReduceType &sum, const float2 &a, const float2 &b)
 
template<typename ReduceType >
__device__ __host__ void dot_ (ReduceType &sum, const float4 &a, const float4 &b)
 
template<typename ReduceType >
__device__ __host__ void cdot_ (ReduceType &sum, const double2 &a, const double2 &b)
 
template<typename ReduceType >
__device__ __host__ void cdot_ (ReduceType &sum, const float2 &a, const float2 &b)
 
template<typename ReduceType >
__device__ __host__ void cdot_ (ReduceType &sum, const float4 &a, const float4 &b)
 
template<int block_size, typename ReduceType , typename FloatN , int M, typename Arg >
__global__ void reduceKernel (Arg arg)
 
template<typename ReduceType >
__device__ __host__ ReduceType norm1_ (const double2 &a)
 
template<typename ReduceType >
__device__ __host__ ReduceType norm1_ (const float2 &a)
 
template<typename ReduceType >
__device__ __host__ ReduceType norm1_ (const float4 &a)
 
template<typename ReduceType >
__device__ __host__ void norm2_ (ReduceType &sum, const double2 &a)
 
template<typename ReduceType >
__device__ __host__ void norm2_ (ReduceType &sum, const float2 &a)
 
template<typename ReduceType >
__device__ __host__ void norm2_ (ReduceType &sum, const float4 &a)
 
__device__ __host__ void Caxpy_ (const double2 &a, const double2 &x, double2 &y)
 
__device__ __host__ void Caxpy_ (const float2 &a, const float2 &x, float2 &y)
 
__device__ __host__ void Caxpy_ (const float2 &a, const float4 &x, float4 &y)
 
template<typename ReduceType , typename InputType >
__device__ __host__ void cdotNormA_ (ReduceType &sum, const InputType &a, const InputType &b)
 
template<typename ReduceType , typename InputType >
__device__ __host__ void cdotNormB_ (ReduceType &sum, const InputType &a, const InputType &b)
 
template<typename RegType , typename StoreType , typename yType , int M, template< typename, typename > class Functor, int writeX, int writeY, int writeZ, int writeW, int writeV>
void nativeBlas (const double2 &a, const double2 &b, const double2 &c, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, int length)
 
template<template< typename Float, typename FloatN > class Functor, int writeX = 0, int writeY = 0, int writeZ = 0, int writeW = 0, int writeV = 0>
void uni_blas (const double2 &a, const double2 &b, const double2 &c, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)
 
template<template< typename Float, typename FloatN > class Functor, int writeX = 0, int writeY = 0, int writeZ = 0, int writeW = 0, int writeV = 0>
void mixed_blas (const double2 &a, const double2 &b, const double2 &c, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)
 
void initReduce ()
 
void endReduce ()
 
cudaStream_t * getStream ()
 
template<int NXZ, typename RegType , typename StoreType , typename yType , int M, template< int, typename, typename > class Functor, typename write , typename T >
void multiBlas (const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, std::vector< ColorSpinorField *> &w, int length)
 
template<int NXZ, template< int MXZ, typename Float, typename FloatN > class Functor, typename write , typename T >
void multiBlas (const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w)
 
template<int NXZ, template< int MXZ, typename Float, typename FloatN > class Functor, typename write , typename T >
void mixedMultiBlas (const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w)
 
void caxpy_recurse (const Complex *a_, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, int i_idx, int j_idx, int upper)
 
void caxpyz_recurse (const Complex *a_, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, int i, int j, int pass, int upper)
 
cudaEvent_t * getReduceEvent ()
 
bool getFastReduce ()
 
void initFastReduce (int words)
 
void completeFastReduce (int32_t words)
 
template<typename doubleN , typename ReduceType , typename FloatN , int M, int NXZ, typename Arg >
void multiReduceLaunch (doubleN result[], Arg &arg, const TuneParam &tp, const cudaStream_t &stream, Tunable &tunable)
 
template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename yType , int M, int NXZ, template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , typename T >
void multiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, std::vector< ColorSpinorField *> &w, int length)
 
template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T >
void multiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w)
 
template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T >
void mixedMultiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w)
 
template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal , bool siteUnroll, typename T >
void multiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w, int i, int j)
 
template<template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal >
void multiReduce_recurse (Complex *result, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, std::vector< ColorSpinorField *> &w, int i_idx, int j_idx, bool hermitian, unsigned int tile_size)
 
void initFastReduce (int32_t words)
 
template<typename doubleN , typename ReduceType , typename FloatN , int M, typename Arg >
doubleN reduceLaunch (Arg &arg, const TuneParam &tp, const cudaStream_t &stream, Tunable &tunable)
 
template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename zType , int M, template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV>
doubleN nativeReduce (const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, int length)
 
template<typename doubleN , typename ReduceType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll>
doubleN uni_reduce (const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)
 
template<typename doubleN , typename ReduceType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll>
doubleN mixed_reduce (const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)
 

Variables

unsigned long long flops
 
unsigned long long bytes
 
static __constant__ signed char Amatrix_d [MAX_MATRIX_SIZE]
 
static __constant__ signed char Bmatrix_d [MAX_MATRIX_SIZE]
 
static __constant__ signed char Cmatrix_d [MAX_MATRIX_SIZE]
 
static signed char * Amatrix_h
 
static signed char * Bmatrix_h
 
static signed char * Cmatrix_h
 
static __constant__ signed char arg_buffer [MAX_MATRIX_SIZE]
 
static __constant__ signed char Amatrix_d [MAX_MATRIX_SIZE]
 
static __constant__ signed char Bmatrix_d [MAX_MATRIX_SIZE]
 
static __constant__ signed char Cmatrix_d [MAX_MATRIX_SIZE]
 
static signed char * Amatrix_h
 
static signed char * Bmatrix_h
 
static signed char * Cmatrix_h
 
static __constant__ signed char arg_buffer [MAX_MATRIX_SIZE]
 
static cudaStream_t * blasStream
 

Function Documentation

◆ _caxpby() [1/3]

__device__ __host__ void quda::blas::_caxpby ( const float2 &  a,
const float4 &  x,
const float2 &  b,
float4 &  y 
)

Functor to perform the operation y = a*x + b*y (complex-valued)

Definition at line 150 of file blas_core.cuh.

Referenced by quda::blas::caxpby_< Float2, FloatN >::operator()(), and quda::blas::caxpbypczw_< Float2, FloatN >::operator()().

Here is the caller graph for this function:

◆ _caxpby() [2/3]

__device__ __host__ void quda::blas::_caxpby ( const float2 &  a,
const float2 &  x,
const float2 &  b,
float2 &  y 
)

Definition at line 172 of file blas_core.cuh.

◆ _caxpby() [3/3]

__device__ __host__ void quda::blas::_caxpby ( const double2 &  a,
const double2 &  x,
const double2 &  b,
double2 &  y 
)

Definition at line 186 of file blas_core.cuh.

◆ _caxpy() [1/3]

__device__ __host__ void quda::blas::_caxpy ( const float2 &  a,
const float4 &  x,
float4 &  y 
)
inline

◆ _caxpy() [2/3]

__device__ __host__ void quda::blas::_caxpy ( const float2 &  a,
const float2 &  x,
float2 &  y 
)
inline

Definition at line 122 of file blas_core.cuh.

◆ _caxpy() [3/3]

__device__ __host__ void quda::blas::_caxpy ( const double2 &  a,
const double2 &  x,
double2 &  y 
)
inline

Definition at line 130 of file blas_core.cuh.

◆ ax()

void quda::blas::ax ( double  a,
ColorSpinorField x 
)

◆ axpby()

void quda::blas::axpby ( double  a,
ColorSpinorField x,
double  b,
ColorSpinorField y 
)
inline

Definition at line 36 of file blas_quda.h.

References axpbyz().

Referenced by benchmark(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::CGNE::operator()(), quda::CGNR::operator()(), quda::CACGNE::operator()(), quda::CACGNR::operator()(), quda::MultiShiftCG::operator()(), and test().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ axpbyz()

void quda::blas::axpbyz ( double  a,
ColorSpinorField x,
double  b,
ColorSpinorField y,
ColorSpinorField z 
)

◆ axpbyzNorm()

double quda::blas::axpbyzNorm ( double  a,
ColorSpinorField x,
double  b,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 734 of file reduce_quda.cu.

Referenced by axpyNorm(), xmyNorm(), and xpayz().

Here is the caller graph for this function:

◆ axpy()

void quda::blas::axpy ( double  a,
ColorSpinorField x,
ColorSpinorField y 
)
inline

◆ axpyBzpcx() [1/2]

void quda::blas::axpyBzpcx ( double  a,
ColorSpinorField x,
ColorSpinorField y,
double  b,
ColorSpinorField z,
double  c 
)

◆ axpyBzpcx() [2/2]

void quda::blas::axpyBzpcx ( const double *  a,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
const double *  b,
ColorSpinorField z,
const double *  c 
)

Compute the vectorized "axpyBzpcx" with over the set of ColorSpinorFields, where the third vector, z, is constant over the batch. E.g., it computes.

y = a * x + y x = b * z + c * x

The dimensions of a, b, c are the same as the size of x and y, with a maximum size of 16.

Parameters
a[in]Array of coefficients
b[in]Array of coefficients
c[in]Array of coefficients
x[in,out]vector of ColorSpinorFields
y[in,out]vector of ColorSpinorFields
z[in]input ColorSpinorField

Definition at line 985 of file multi_blas_quda.cu.

References axpyBzpcx(), and MAX_MULTI_BLAS_N.

Here is the call graph for this function:

◆ axpyCGNorm()

Complex quda::blas::axpyCGNorm ( double  a,
ColorSpinorField x,
ColorSpinorField y 
)

Definition at line 796 of file reduce_quda.cu.

References quda::LatticeField::Precision().

Referenced by cDotProductNormB(), quda::CG::operator()(), quda::PreconCG::operator()(), and quda::MultiShiftCG::operator()().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ axpyNorm()

double quda::blas::axpyNorm ( double  a,
ColorSpinorField x,
ColorSpinorField y 
)
inline

Definition at line 74 of file blas_quda.h.

References axpbyzNorm().

Referenced by benchmark(), quda::IncEigCG::eigCGsolve(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), and test().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ axpyReDot()

double quda::blas::axpyReDot ( double  a,
ColorSpinorField x,
ColorSpinorField y 
)

Definition at line 740 of file reduce_quda.cu.

Referenced by benchmark(), quda::MultiShiftCG::operator()(), test(), and xpayz().

Here is the caller graph for this function:

◆ axpyZpbx()

void quda::blas::axpyZpbx ( double  a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z,
double  b 
)

◆ blasKernel()

template<typename FloatN , int M, typename Arg >
__global__ void quda::blas::blasKernel ( Arg  arg)

Generic blas kernel with four loads and up to four stores.

Definition at line 43 of file blas_core.cuh.

References parity.

◆ cabxpyAx()

void quda::blas::cabxpyAx ( double  a,
const Complex b,
ColorSpinorField x,
ColorSpinorField y 
)

◆ cabxpyzAxNorm()

double quda::blas::cabxpyzAxNorm ( double  a,
const Complex b,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 758 of file reduce_quda.cu.

References IMAG, and REAL.

Referenced by benchmark(), cDotProductNormB(), quda::GCR::operator()(), and test().

Here is the caller graph for this function:

◆ caxpby()

void quda::blas::caxpby ( const Complex a,
ColorSpinorField x,
const Complex b,
ColorSpinorField y 
)

◆ caxpbypczw()

void quda::blas::caxpbypczw ( const Complex a,
ColorSpinorField x,
const Complex b,
ColorSpinorField y,
const Complex c,
ColorSpinorField z,
ColorSpinorField w 
)

◆ caxpbypzYmbw()

void quda::blas::caxpbypzYmbw ( const Complex a,
ColorSpinorField x,
const Complex b,
ColorSpinorField y,
ColorSpinorField z,
ColorSpinorField w 
)

◆ caxpbypzYmbwcDotProductUYNormY()

double3 quda::blas::caxpbypzYmbwcDotProductUYNormY ( const Complex a,
ColorSpinorField x,
const Complex b,
ColorSpinorField y,
ColorSpinorField z,
ColorSpinorField w,
ColorSpinorField u 
)

Definition at line 783 of file reduce_quda.cu.

References IMAG, quda::LatticeField::Precision(), and REAL.

Referenced by benchmark(), cDotProductNormB(), quda::BiCGstab::operator()(), and test().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpy() [1/3]

void quda::blas::caxpy ( const Complex a,
ColorSpinorField x,
ColorSpinorField y 
)

◆ caxpy() [2/3]

void quda::blas::caxpy ( const Complex a,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y 
)

Compute the block "caxpy" with over the set of ColorSpinorFields. E.g., it computes.

y = x * a + y

The dimensions of a can be rectangular, e.g., the width of x and y need not be same.

Parameters
a[in]Matrix of coefficients
x[in]vector of input ColorSpinorFields
y[in,out]vector of input/output ColorSpinorFields

Definition at line 732 of file multi_blas_quda.cu.

References caxpy_recurse().

Here is the call graph for this function:

◆ caxpy() [3/3]

void quda::blas::caxpy ( const Complex a,
ColorSpinorField x,
ColorSpinorField y 
)

This is a wrapper for calling the block "caxpy" with a composite ColorSpinorField. E.g., it computes.

y = x * a + y

Parameters
a[in]Matrix of coefficients
x[in]Input matrix
y[in,out]Computed output matrix

Definition at line 763 of file multi_blas_quda.cu.

References caxpy(), and quda::ColorSpinorField::Components().

Here is the call graph for this function:

◆ Caxpy_() [1/3]

__device__ __host__ void quda::blas::Caxpy_ ( const double2 &  a,
const double2 &  x,
double2 &  y 
)

◆ Caxpy_() [2/3]

__device__ __host__ void quda::blas::Caxpy_ ( const float2 &  a,
const float2 &  x,
float2 &  y 
)

Definition at line 239 of file reduce_core.cuh.

◆ Caxpy_() [3/3]

__device__ __host__ void quda::blas::Caxpy_ ( const float2 &  a,
const float4 &  x,
float4 &  y 
)

Definition at line 246 of file reduce_core.cuh.

◆ caxpy_L() [1/2]

void quda::blas::caxpy_L ( const Complex a,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y 
)

Compute the block "caxpy_L" with over the set of ColorSpinorFields. E.g., it computes.

y = x * a + y

Where 'a' must be a square, lower triangular matrix.

Parameters
a[in]Matrix of coefficients
x[in]vector of input ColorSpinorFields
y[in,out]vector of input/output ColorSpinorFields

Definition at line 750 of file multi_blas_quda.cu.

References caxpy_recurse(), and errorQuda.

Referenced by caxpy_L(), and cDotProductNormB().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpy_L() [2/2]

void quda::blas::caxpy_L ( const Complex a,
ColorSpinorField x,
ColorSpinorField y 
)

This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes.

y = x * a + y

Parameters
a[in]Matrix of coefficients
x[in]Input matrix
y[in,out]Computed output matrix

Definition at line 767 of file multi_blas_quda.cu.

References caxpy_L(), and quda::ColorSpinorField::Components().

Here is the call graph for this function:

◆ caxpy_recurse()

void quda::blas::caxpy_recurse ( const Complex a_,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
int  i_idx,
int  j_idx,
int  upper 
)

Definition at line 562 of file multi_blas_quda.cu.

References quda::count, and MAX_MULTI_BLAS_N.

Referenced by caxpy(), caxpy_L(), and caxpy_U().

Here is the caller graph for this function:

◆ caxpy_U() [1/2]

void quda::blas::caxpy_U ( const Complex a,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y 
)

Compute the block "caxpy_U" with over the set of ColorSpinorFields. E.g., it computes.

y = x * a + y

Where 'a' must be a square, upper triangular matrix.

Parameters
a[in]Matrix of coefficients
x[in]vector of input ColorSpinorFields
y[in,out]vector of input/output ColorSpinorFields

Definition at line 738 of file multi_blas_quda.cu.

References caxpy_recurse(), and errorQuda.

Referenced by caxpy_U(), and cDotProductNormB().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpy_U() [2/2]

void quda::blas::caxpy_U ( const Complex a,
ColorSpinorField x,
ColorSpinorField y 
)

This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes.

y = x * a + y

Parameters
a[in]Matrix of coefficients
x[in]Input matrix
y[in,out]Computed output matrix

Definition at line 765 of file multi_blas_quda.cu.

References caxpy_U(), and quda::ColorSpinorField::Components().

Here is the call graph for this function:

◆ caxpyBxpz() [1/2]

void quda::blas::caxpyBxpz ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
const Complex b,
ColorSpinorField z 
)

◆ caxpyBxpz() [2/2]

void quda::blas::caxpyBxpz ( const Complex a_,
std::vector< ColorSpinorField *> &  x_,
ColorSpinorField y_,
const Complex b_,
ColorSpinorField z_ 
)

Compute the vectorized "caxpyBxpz" over the set of ColorSpinorFields, where the second and third vector, y and z, is constant over the batch. E.g., it computes.

y = a * x + y z = b * x + z

The dimensions of a, b are the same as the size of x, with a maximum size of 16.

Parameters
a[in]Array of coefficients
b[in]Array of coefficients
x[in]vector of ColorSpinorFields
y[in,out]input ColorSpinorField
z[in,out]input ColorSpinorField

Definition at line 1029 of file multi_blas_quda.cu.

References caxpyBxpz(), and MAX_MULTI_BLAS_N.

Here is the call graph for this function:

◆ caxpyBzpx()

void quda::blas::caxpyBzpx ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
const Complex b,
ColorSpinorField z 
)

◆ caxpyDotzy()

Complex quda::blas::caxpyDotzy ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 771 of file reduce_quda.cu.

References IMAG, and REAL.

Referenced by benchmark(), cDotProductNormB(), quda::orthoDir(), quda::BiCGstabL::orthoDir(), and test().

Here is the caller graph for this function:

◆ caxpyNorm()

double quda::blas::caxpyNorm ( const Complex a,
ColorSpinorField x,
ColorSpinorField y 
)

Definition at line 746 of file reduce_quda.cu.

References IMAG, and REAL.

Referenced by benchmark(), cDotProductNormB(), quda::CG3::operator()(), quda::CG3NE::operator()(), and test().

Here is the caller graph for this function:

◆ caxpyXmaz()

void quda::blas::caxpyXmaz ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

◆ caxpyXmazMR()

void quda::blas::caxpyXmazMR ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

◆ caxpyXmazNormX()

double quda::blas::caxpyXmazNormX ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 752 of file reduce_quda.cu.

References IMAG, and REAL.

Referenced by benchmark(), cDotProductNormB(), and test().

Here is the caller graph for this function:

◆ caxpyz() [1/2]

void quda::blas::caxpyz ( const Complex a,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
std::vector< ColorSpinorField *> &  z 
)

Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.

z = x * a + y

The dimensions of a can be rectangular, e.g., the width of x and y need not be same, though the maximum width for both is 16.

Parameters
a[in]Matrix of coefficients
x[in]vector of input ColorSpinorFields
y[in]vector of input ColorSpinorFields
z[out]vector of output ColorSpinorFields

Definition at line 949 of file multi_blas_quda.cu.

References caxpyz_recurse().

Referenced by caxpyz(), cDotProductNormB(), and quda::CACG::operator()().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpyz() [2/2]

void quda::blas::caxpyz ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes.

z = x * a + y

Parameters
a[in]Matrix of coefficients
x[in]Input matrix
y[in]Computed output matrix
z[out]vector of input/output ColorSpinorFields

Definition at line 973 of file multi_blas_quda.cu.

References caxpyz(), and quda::ColorSpinorField::Components().

Here is the call graph for this function:

◆ caxpyz_L() [1/2]

void quda::blas::caxpyz_L ( const Complex a,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
std::vector< ColorSpinorField *> &  z 
)

Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.

z = x * a + y

Where 'a' is assumed to be lower triangular

Parameters
a[in]Matrix of coefficients
x[in]vector of input ColorSpinorFields
y[in]vector of input ColorSpinorFields
z[out]vector of output ColorSpinorFields

Definition at line 964 of file multi_blas_quda.cu.

References caxpyz_recurse().

Referenced by caxpyz_L(), and cDotProductNormB().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpyz_L() [2/2]

void quda::blas::caxpyz_L ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes.

z = x * a + y

Parameters
a[in]Matrix of coefficients
x[in]Input matrix
y[in]Computed output matrix
z[out]vector of input/output ColorSpinorFields

Definition at line 981 of file multi_blas_quda.cu.

References caxpyz_L(), and quda::ColorSpinorField::Components().

Here is the call graph for this function:

◆ caxpyz_recurse()

void quda::blas::caxpyz_recurse ( const Complex a_,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
std::vector< ColorSpinorField *> &  z,
int  i,
int  j,
int  pass,
int  upper 
)

Definition at line 770 of file multi_blas_quda.cu.

References caxpy(), quda::count, and MAX_MULTI_BLAS_N.

Referenced by caxpyz(), caxpyz_L(), and caxpyz_U().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpyz_U() [1/2]

void quda::blas::caxpyz_U ( const Complex a,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
std::vector< ColorSpinorField *> &  z 
)

Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.

z = x * a + y

Where 'a' is assumed to be upper triangular.

Parameters
a[in]Matrix of coefficients
x[in]vector of input ColorSpinorFields
y[in]vector of input ColorSpinorFields
z[out]vector of output ColorSpinorFields

Definition at line 956 of file multi_blas_quda.cu.

References caxpyz_recurse().

Referenced by caxpyz_U(), and cDotProductNormB().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ caxpyz_U() [2/2]

void quda::blas::caxpyz_U ( const Complex a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes.

z = x * a + y

Parameters
a[in]Matrix of coefficients
x[in]Input matrix
y[in]Computed output matrix
z[out]vector of input/output ColorSpinorFields

Definition at line 977 of file multi_blas_quda.cu.

References caxpyz_U(), and quda::ColorSpinorField::Components().

Here is the call graph for this function:

◆ cdot_() [1/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::cdot_ ( ReduceType &  sum,
const double2 &  a,
const double2 &  b 
)

Returns complex-valued dot product of x and y

Definition at line 199 of file multi_reduce_core.cuh.

Referenced by quda::blas::cabxpyzaxnorm< ReduceType, Float2, FloatN >::flops().

Here is the caller graph for this function:

◆ cdot_() [2/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::cdot_ ( ReduceType &  sum,
const float2 &  a,
const float2 &  b 
)

Definition at line 208 of file multi_reduce_core.cuh.

◆ cdot_() [3/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::cdot_ ( ReduceType &  sum,
const float4 &  a,
const float4 &  b 
)

Definition at line 217 of file multi_reduce_core.cuh.

◆ cdotNormA_()

template<typename ReduceType , typename InputType >
__device__ __host__ void quda::blas::cdotNormA_ ( ReduceType &  sum,
const InputType &  a,
const InputType &  b 
)

First returns the dot product (x,y) Returns the norm of x

Definition at line 385 of file reduce_core.cuh.

References quda::sum().

Here is the call graph for this function:

◆ cdotNormB_()

template<typename ReduceType , typename InputType >
__device__ __host__ void quda::blas::cdotNormB_ ( ReduceType &  sum,
const InputType &  a,
const InputType &  b 
)

First returns the dot product (x,y) Returns the norm of y

Definition at line 398 of file reduce_core.cuh.

References quda::sum().

Here is the call graph for this function:

◆ cDotProduct() [1/2]

Complex quda::blas::cDotProduct ( ColorSpinorField x,
ColorSpinorField y 
)

◆ cDotProduct() [2/2]

void quda::blas::cDotProduct ( Complex result,
std::vector< ColorSpinorField *> &  a,
std::vector< ColorSpinorField *> &  b 
)

Computes the matrix of inner products between the vector set a and the vector set b.

Parameters
result[out]Matrix of inner product result[i][j] = (a[j],b[i])
a[in]set of input ColorSpinorFields
b[in]set of input ColorSpinorFields

Definition at line 1031 of file multi_reduce_quda.cu.

References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), errorQuda, and reduceDoubleArray().

Here is the call graph for this function:

◆ cDotProductCopy()

void quda::blas::cDotProductCopy ( Complex result,
std::vector< ColorSpinorField *> &  a,
std::vector< ColorSpinorField *> &  b,
std::vector< ColorSpinorField *> &  c 
)

Computes the matrix of inner products between the vector set a and the vector set b, and copies b into c.

Parameters
result[out]Matrix of inner product result[i][j] = (a[j],b[i])
a[in]set of input ColorSpinorFields
b[in]set of input ColorSpinorFields
c[out]set of output ColorSpinorFields

Definition at line 1110 of file multi_reduce_quda.cu.

References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), errorQuda, and reduceDoubleArray().

Referenced by cDotProductNormB().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ cDotProductNormA()

double3 quda::blas::cDotProductNormA ( ColorSpinorField a,
ColorSpinorField b 
)

◆ cDotProductNormB()

double3 quda::blas::cDotProductNormB ( ColorSpinorField a,
ColorSpinorField b 
)
inline

◆ completeFastReduce()

void quda::blas::completeFastReduce ( int32_t  words)

Definition at line 43 of file reduce_quda.cu.

References quda::count, and h_reduce.

Referenced by multiReduceLaunch(), and reduceLaunch().

Here is the caller graph for this function:

◆ copy()

void quda::blas::copy ( ColorSpinorField dst,
const ColorSpinorField src 
)

◆ cxpaypbz()

void quda::blas::cxpaypbz ( ColorSpinorField x,
const Complex b,
ColorSpinorField y,
const Complex c,
ColorSpinorField z 
)

◆ dot_() [1/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::dot_ ( ReduceType &  sum,
const double2 &  a,
const double2 &  b 
)

Return the real dot product of x and y Broken at the moment—need to update reDotProduct with permuting, etc of cDotProduct below.

Return the real dot product of x and y

Definition at line 158 of file multi_reduce_core.cuh.

Referenced by quda::__launch_bounds__(), and quda::blas::Norm2< ReduceType, Float2, FloatN >::flops().

Here is the caller graph for this function:

◆ dot_() [2/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::dot_ ( ReduceType &  sum,
const float2 &  a,
const float2 &  b 
)

Definition at line 164 of file multi_reduce_core.cuh.

◆ dot_() [3/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::dot_ ( ReduceType &  sum,
const float4 &  a,
const float4 &  b 
)

Definition at line 170 of file multi_reduce_core.cuh.

◆ doubleCG3Init()

void quda::blas::doubleCG3Init ( double  a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

◆ doubleCG3InitNorm()

double quda::blas::doubleCG3InitNorm ( double  a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 848 of file reduce_quda.cu.

Referenced by cDotProductNormB(), and quda::CG3NE::operator()().

Here is the caller graph for this function:

◆ doubleCG3Update()

void quda::blas::doubleCG3Update ( double  a,
double  b,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

◆ doubleCG3UpdateNorm()

double quda::blas::doubleCG3UpdateNorm ( double  a,
double  b,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 853 of file reduce_quda.cu.

Referenced by cDotProductNormB(), and quda::CG3NE::operator()().

Here is the caller graph for this function:

◆ end()

void quda::blas::end ( void  )

◆ endReduce()

void quda::blas::endReduce ( void  )

Definition at line 120 of file reduce_quda.cu.

References d_reduce, device_free, h_reduce, hd_reduce, host_free, and reduceEnd.

Referenced by end(), and zero().

Here is the caller graph for this function:

◆ getDeviceReduceBuffer()

void * quda::blas::getDeviceReduceBuffer ( )

Definition at line 26 of file reduce_quda.cu.

References d_reduce.

◆ getFastReduce()

bool quda::blas::getFastReduce ( )

◆ getHostReduceBuffer()

void * quda::blas::getHostReduceBuffer ( )

Definition at line 28 of file reduce_quda.cu.

References h_reduce.

Referenced by multiReduceLaunch().

Here is the caller graph for this function:

◆ getMappedHostReduceBuffer()

void * quda::blas::getMappedHostReduceBuffer ( )

Definition at line 27 of file reduce_quda.cu.

References hd_reduce.

Referenced by multiReduceLaunch().

Here is the caller graph for this function:

◆ getReduceEvent()

cudaEvent_t * quda::blas::getReduceEvent ( )

Definition at line 29 of file reduce_quda.cu.

References reduceEnd.

Referenced by multiReduceLaunch().

Here is the caller graph for this function:

◆ getStream()

cudaStream_t * quda::blas::getStream ( )

◆ hDotProduct()

void quda::blas::hDotProduct ( Complex result,
std::vector< ColorSpinorField *> &  a,
std::vector< ColorSpinorField *> &  b 
)

Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Requires a.size()==b.size().

Parameters
result[out]Matrix of inner product result[i][j] = (a[j],b[i])
a[in]set of input ColorSpinorFields
b[in]set of input ColorSpinorFields

Definition at line 1056 of file multi_reduce_quda.cu.

References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), quda::conj(), errorQuda, and reduceDoubleArray().

Referenced by cDotProductNormB(), and quda::CAGCR::solve().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ hDotProduct_Anorm()

void quda::blas::hDotProduct_Anorm ( Complex result,
std::vector< ColorSpinorField *> &  a,
std::vector< ColorSpinorField *> &  b 
)

Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Uniquely defined for cases like (p, Ap) where the output is Hermitian, but there's an A-norm instead of an L2 norm. Requires a.size()==b.size().

Parameters
result[out]Matrix of inner product result[i][j] = (a[j],b[i])
a[in]set of input ColorSpinorFields
b[in]set of input ColorSpinorFields

Definition at line 1083 of file multi_reduce_quda.cu.

References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), quda::conj(), errorQuda, and reduceDoubleArray().

Referenced by cDotProductNormB().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ HeavyQuarkResidualNorm()

double3 quda::blas::HeavyQuarkResidualNorm ( ColorSpinorField x,
ColorSpinorField r 
)

◆ init()

void quda::blas::init ( )

Definition at line 483 of file blas_quda.cu.

References initReduce(), quda::Nstream, and streams.

Referenced by initQudaMemory().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ initFastReduce() [1/2]

void quda::blas::initFastReduce ( int  words)

Referenced by multiReduceLaunch(), and reduceLaunch().

Here is the caller graph for this function:

◆ initFastReduce() [2/2]

void quda::blas::initFastReduce ( int32_t  words)

Definition at line 32 of file reduce_quda.cu.

References h_reduce.

◆ initReduce()

void quda::blas::initReduce ( )

Definition at line 64 of file reduce_quda.cu.

References bytes, checkCudaError, d_reduce, device_malloc, deviceProp, fast_reduce_enabled, h_reduce, hd_reduce, mapped_malloc, MAX_MULTI_BLAS_N, memset(), pinned_malloc, QudaSumFloat, reduceEnd, and warningQuda.

Referenced by init(), and zero().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ mixed_blas()

template<template< typename Float, typename FloatN > class Functor, int writeX = 0, int writeY = 0, int writeZ = 0, int writeW = 0, int writeV = 0>
void quda::blas::mixed_blas ( const double2 &  a,
const double2 &  b,
const double2 &  c,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z,
ColorSpinorField w,
ColorSpinorField v 
)

◆ mixed_reduce()

template<typename doubleN , typename ReduceType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll>
doubleN quda::blas::mixed_reduce ( const double2 &  a,
const double2 &  b,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z,
ColorSpinorField w,
ColorSpinorField v 
)

◆ mixedMultiBlas()

template<int NXZ, template< int MXZ, typename Float, typename FloatN > class Functor, typename write , typename T >
void quda::blas::mixedMultiBlas ( const coeff_array< T > &  a,
const coeff_array< T > &  b,
const coeff_array< T > &  c,
CompositeColorSpinorField x,
CompositeColorSpinorField y,
CompositeColorSpinorField z,
CompositeColorSpinorField w 
)

Driver for generic blas routine with four loads and two store.

Definition at line 403 of file multi_blas_quda.cu.

References checkLocation, errorQuda, Nspin, QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

◆ mixedMultiReduce()

template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T >
void quda::blas::mixedMultiReduce ( doubleN  result[],
const coeff_array< T > &  a,
const coeff_array< T > &  b,
const coeff_array< T > &  c,
CompositeColorSpinorField x,
CompositeColorSpinorField y,
CompositeColorSpinorField z,
CompositeColorSpinorField w 
)

Driver for multi-reduce with up to five vectors

Definition at line 511 of file multi_reduce_quda.cu.

References checkPrecision, errorQuda, Nspin, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

◆ multiBlas() [1/2]

template<int NXZ, typename RegType , typename StoreType , typename yType , int M, template< int, typename, typename > class Functor, typename write , typename T >
void quda::blas::multiBlas ( const coeff_array< T > &  a,
const coeff_array< T > &  b,
const coeff_array< T > &  c,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
std::vector< ColorSpinorField *> &  z,
std::vector< ColorSpinorField *> &  w,
int  length 
)

◆ multiBlas() [2/2]

template<int NXZ, template< int MXZ, typename Float, typename FloatN > class Functor, typename write , typename T >
void quda::blas::multiBlas ( const coeff_array< T > &  a,
const coeff_array< T > &  b,
const coeff_array< T > &  c,
CompositeColorSpinorField x,
CompositeColorSpinorField y,
CompositeColorSpinorField z,
CompositeColorSpinorField w 
)

Driver for generic blas routine with four loads and two store.

Definition at line 294 of file multi_blas_quda.cu.

References checkLocation, errorQuda, Ncolor, Nspin, QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

◆ multiBlasKernel()

template<typename FloatN , int M, int NXZ, typename Arg >
__global__ void quda::blas::multiBlasKernel ( Arg  arg_)

Generic multi-blas kernel with four loads and up to four stores.

Parameters
[in,out]argArgument struct with required meta data (input/output fields, functor, etc.)

Definition at line 73 of file multi_blas_core.cuh.

References quda::arg(), and parity.

Here is the call graph for this function:

◆ multiReduce() [1/3]

template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename yType , int M, int NXZ, template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , typename T >
void quda::blas::multiReduce ( doubleN  result[],
const coeff_array< T > &  a,
const coeff_array< T > &  b,
const coeff_array< T > &  c,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
std::vector< ColorSpinorField *> &  z,
std::vector< ColorSpinorField *> &  w,
int  length 
)

◆ multiReduce() [2/3]

template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T >
void quda::blas::multiReduce ( doubleN  result[],
const coeff_array< T > &  a,
const coeff_array< T > &  b,
const coeff_array< T > &  c,
CompositeColorSpinorField x,
CompositeColorSpinorField y,
CompositeColorSpinorField z,
CompositeColorSpinorField w 
)

Driver for multi-reduce with up to four vectors

Definition at line 385 of file multi_reduce_quda.cu.

References checkPrecision, errorQuda, Nspin, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

◆ multiReduce() [3/3]

template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal , bool siteUnroll, typename T >
void quda::blas::multiReduce ( doubleN  result[],
const coeff_array< T > &  a,
const coeff_array< T > &  b,
const coeff_array< T > &  c,
CompositeColorSpinorField x,
CompositeColorSpinorField y,
CompositeColorSpinorField z,
CompositeColorSpinorField w,
int  i,
int  j 
)

Definition at line 598 of file multi_reduce_quda.cu.

◆ multiReduce_recurse()

template<template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal >
void quda::blas::multiReduce_recurse ( Complex result,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
std::vector< ColorSpinorField *> &  z,
std::vector< ColorSpinorField *> &  w,
int  i_idx,
int  j_idx,
bool  hermitian,
unsigned int  tile_size 
)

Definition at line 706 of file multi_reduce_quda.cu.

References quda::count.

◆ multiReduceKernel()

template<int block_size, typename ReduceType , typename FloatN , int M, int NXZ, typename Arg >
__global__ void quda::blas::multiReduceKernel ( Arg  arg_)

Definition at line 79 of file multi_reduce_core.cuh.

References quda::arg(), parity, and quda::sum().

Referenced by multiReduceLaunch().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ multiReduceLaunch()

template<typename doubleN , typename ReduceType , typename FloatN , int M, int NXZ, typename Arg >
void quda::blas::multiReduceLaunch ( doubleN  result[],
Arg arg,
const TuneParam tp,
const cudaStream_t &  stream,
Tunable tunable 
)

◆ mxpy()

void quda::blas::mxpy ( ColorSpinorField x,
ColorSpinorField y 
)
inline

Definition at line 34 of file blas_quda.h.

References axpbyz().

Referenced by benchmark(), invert_test(), main(), and test().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nativeBlas()

template<typename RegType , typename StoreType , typename yType , int M, template< typename, typename > class Functor, int writeX, int writeY, int writeZ, int writeW, int writeV>
void quda::blas::nativeBlas ( const double2 &  a,
const double2 &  b,
const double2 &  c,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z,
ColorSpinorField w,
ColorSpinorField v,
int  length 
)

◆ nativeReduce()

template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename zType , int M, template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV>
doubleN quda::blas::nativeReduce ( const double2 &  a,
const double2 &  b,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z,
ColorSpinorField w,
ColorSpinorField v,
int  length 
)

Definition at line 297 of file reduce_quda.cu.

References bytes, checkCudaError, quda::checkLength(), flops, getStream(), quda::reduce(), V, X, and Z.

Referenced by mixed_reduce(), and uni_reduce().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ norm1()

double quda::blas::norm1 ( const ColorSpinorField b)

Definition at line 714 of file reduce_quda.cu.

Referenced by getLambdaMax(), getRealBidiagMatrix(), quda::norm1(), and xpayz().

Here is the caller graph for this function:

◆ norm1_() [1/3]

template<typename ReduceType >
__device__ __host__ ReduceType quda::blas::norm1_ ( const double2 &  a)

Return the L1 norm of x

Definition at line 100 of file reduce_core.cuh.

References quda::sqrt().

Here is the call graph for this function:

◆ norm1_() [2/3]

template<typename ReduceType >
__device__ __host__ ReduceType quda::blas::norm1_ ( const float2 &  a)

Definition at line 105 of file reduce_core.cuh.

References quda::sqrt().

Here is the call graph for this function:

◆ norm1_() [3/3]

template<typename ReduceType >
__device__ __host__ ReduceType quda::blas::norm1_ ( const float4 &  a)

Definition at line 110 of file reduce_core.cuh.

References quda::sqrt().

Here is the call graph for this function:

◆ norm2()

double quda::blas::norm2 ( const ColorSpinorField a)

◆ norm2_() [1/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::norm2_ ( ReduceType &  sum,
const double2 &  a 
)

Return the L2 norm of x

Definition at line 129 of file reduce_core.cuh.

◆ norm2_() [2/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::norm2_ ( ReduceType &  sum,
const float2 &  a 
)

Definition at line 135 of file reduce_core.cuh.

◆ norm2_() [3/3]

template<typename ReduceType >
__device__ __host__ void quda::blas::norm2_ ( ReduceType &  sum,
const float4 &  a 
)

Definition at line 141 of file reduce_core.cuh.

◆ quadrupleCG3InitNorm()

double quda::blas::quadrupleCG3InitNorm ( double  a,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z,
ColorSpinorField w,
ColorSpinorField v 
)

Definition at line 838 of file reduce_quda.cu.

Referenced by cDotProductNormB(), and quda::CG3::operator()().

Here is the caller graph for this function:

◆ quadrupleCG3UpdateNorm()

double quda::blas::quadrupleCG3UpdateNorm ( double  a,
double  b,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z,
ColorSpinorField w,
ColorSpinorField v 
)

Definition at line 843 of file reduce_quda.cu.

Referenced by cDotProductNormB(), and quda::CG3::operator()().

Here is the caller graph for this function:

◆ quadrupleCGReduction()

double4 quda::blas::quadrupleCGReduction ( ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 833 of file reduce_quda.cu.

Referenced by cDotProductNormB(), and quda::CG::operator()().

Here is the caller graph for this function:

◆ reDotProduct() [1/2]

double quda::blas::reDotProduct ( ColorSpinorField x,
ColorSpinorField y 
)

◆ reDotProduct() [2/2]

void quda::blas::reDotProduct ( double *  result,
std::vector< ColorSpinorField *> &  a,
std::vector< ColorSpinorField *> &  b 
)

Definition at line 622 of file multi_reduce_quda.cu.

References errorQuda, and reduceDoubleArray().

Here is the call graph for this function:

◆ reduceKernel()

template<int block_size, typename ReduceType , typename FloatN , int M, typename Arg >
__global__ void quda::blas::reduceKernel ( Arg  arg)

Generic reduction kernel with up to four loads and three saves.

Definition at line 44 of file reduce_core.cuh.

References quda::arg(), parity, quda::sum(), and zero().

Referenced by reduceLaunch().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ reduceLaunch()

template<typename doubleN , typename ReduceType , typename FloatN , int M, typename Arg >
doubleN quda::blas::reduceLaunch ( Arg arg,
const TuneParam tp,
const cudaStream_t &  stream,
Tunable tunable 
)

◆ setParam()

void quda::blas::setParam ( int  kernel,
int  prec,
int  threads,
int  blocks 
)

◆ tripleCGReduction()

double3 quda::blas::tripleCGReduction ( ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z 
)

Definition at line 828 of file reduce_quda.cu.

Referenced by benchmark(), cDotProductNormB(), quda::CG::operator()(), and test().

Here is the caller graph for this function:

◆ tripleCGUpdate()

void quda::blas::tripleCGUpdate ( double  alpha,
double  beta,
ColorSpinorField q,
ColorSpinorField r,
ColorSpinorField x,
ColorSpinorField p 
)

◆ uni_blas()

template<template< typename Float, typename FloatN > class Functor, int writeX = 0, int writeY = 0, int writeZ = 0, int writeW = 0, int writeV = 0>
void quda::blas::uni_blas ( const double2 &  a,
const double2 &  b,
const double2 &  c,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z,
ColorSpinorField w,
ColorSpinorField v 
)

◆ uni_reduce()

template<typename doubleN , typename ReduceType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll>
doubleN quda::blas::uni_reduce ( const double2 &  a,
const double2 &  b,
ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField z,
ColorSpinorField w,
ColorSpinorField v 
)

◆ xmyNorm()

double quda::blas::xmyNorm ( ColorSpinorField x,
ColorSpinorField y 
)
inline

◆ xpay()

void quda::blas::xpay ( ColorSpinorField x,
double  a,
ColorSpinorField y 
)
inline

Definition at line 37 of file blas_quda.h.

References axpbyz().

Referenced by quda::ApplyNdegTwistedMassPreconditioned(), quda::ApplyTwistedCloverPreconditioned(), quda::ApplyTwistedMassPreconditioned(), benchmark(), quda::CG::blocksolve(), clover_mat(), clover_matpc(), quda::dslash5(), quda::dslash5inv(), quda::DiracCoarsePC::DslashXpay(), dw_4d_mat(), dw_4d_matpc(), dw_mat(), dw_matpc(), quda::IncEigCG::initCGsolve(), quda::Dslash< Float >::instantiate(), invert_test(), quda::TwistedMassLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::WilsonCloverPreconditionedLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::WilsonCloverLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::NdegTwistedMassLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::TwistedCloverLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::TwistedCloverPreconditionedLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::Dslash< Float >::launch(), quda::DiracDomainWall4D::M(), quda::DiracCoarsePC::M(), quda::PreconCG::operator()(), quda::CACG::operator()(), quda::CAGCR::operator()(), quda::MultiShiftCG::operator()(), quda::DiracCoarsePC::prepare(), quda::DiracCoarsePC::reconstruct(), quda::Deflation::reduce(), quda::IncEigCG::RestartVT(), staggeredDslashRef(), test(), tm_mat(), tm_matpc(), tm_ndeg_mat(), tm_ndeg_matpc(), tmc_mat(), tmc_matpc(), quda::Deflation::verify(), wil_mat(), and wil_matpc().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ xpayz()

void quda::blas::xpayz ( ColorSpinorField x,
double  a,
ColorSpinorField y,
ColorSpinorField z 
)
inline

Definition at line 38 of file blas_quda.h.

References axpbyz(), axpbyzNorm(), axpyBzpcx(), axpyReDot(), axpyZpbx(), cabxpyAx(), caxpby(), caxpbypczw(), caxpbypzYmbw(), caxpy(), caxpyBxpz(), caxpyBzpx(), caxpyXmaz(), caxpyXmazMR(), cxpaypbz(), doubleCG3Init(), doubleCG3Update(), norm1(), norm2(), reDotProduct(), and tripleCGUpdate().

Referenced by quda::CG::operator()().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ xpy()

void quda::blas::xpy ( ColorSpinorField x,
ColorSpinorField y 
)
inline

◆ xpyHeavyQuarkResidualNorm()

double3 quda::blas::xpyHeavyQuarkResidualNorm ( ColorSpinorField x,
ColorSpinorField y,
ColorSpinorField r 
)

Definition at line 818 of file reduce_quda.cu.

References comm_size(), and quda::ColorSpinorField::Ncolor().

Referenced by benchmark(), quda::CG::blocksolve(), cDotProductNormB(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), and test().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ zero()

void quda::blas::zero ( ColorSpinorField a)

Variable Documentation

◆ Amatrix_d [1/2]

__constant__ signed char quda::blas::Amatrix_d[MAX_MATRIX_SIZE]
static

◆ Amatrix_d [2/2]

__constant__ signed char quda::blas::Amatrix_d[MAX_MATRIX_SIZE]
static

Definition at line 20 of file multi_reduce_core.cuh.

◆ Amatrix_h [1/2]

signed char* quda::blas::Amatrix_h
static

◆ Amatrix_h [2/2]

signed char* quda::blas::Amatrix_h
static

Definition at line 24 of file multi_reduce_core.cuh.

◆ arg_buffer [1/2]

__constant__ signed char quda::blas::arg_buffer[MAX_MATRIX_SIZE]
static

◆ arg_buffer [2/2]

__constant__ signed char quda::blas::arg_buffer[MAX_MATRIX_SIZE]
static

Definition at line 32 of file multi_reduce_core.cuh.

◆ blasStream

cudaStream_t* quda::blas::blasStream
static

Definition at line 25 of file blas_quda.cu.

Referenced by getStream().

◆ Bmatrix_d [1/2]

__constant__ signed char quda::blas::Bmatrix_d[MAX_MATRIX_SIZE]
static

◆ Bmatrix_d [2/2]

__constant__ signed char quda::blas::Bmatrix_d[MAX_MATRIX_SIZE]
static

Definition at line 21 of file multi_reduce_core.cuh.

◆ Bmatrix_h [1/2]

signed char* quda::blas::Bmatrix_h
static

◆ Bmatrix_h [2/2]

signed char* quda::blas::Bmatrix_h
static

Definition at line 25 of file multi_reduce_core.cuh.

◆ bytes

unsigned long long quda::blas::bytes

◆ Cmatrix_d [1/2]

__constant__ signed char quda::blas::Cmatrix_d[MAX_MATRIX_SIZE]
static

◆ Cmatrix_d [2/2]

__constant__ signed char quda::blas::Cmatrix_d[MAX_MATRIX_SIZE]
static

Definition at line 22 of file multi_reduce_core.cuh.

◆ Cmatrix_h [1/2]

signed char* quda::blas::Cmatrix_h
static

◆ Cmatrix_h [2/2]

signed char* quda::blas::Cmatrix_h
static

Definition at line 26 of file multi_reduce_core.cuh.

◆ flops

unsigned long long quda::blas::flops