QUDA
1.0.0
|
Namespaces | |
copy_ns | |
detail | |
Functions | |
void | init () |
void | end (void) |
void * | getDeviceReduceBuffer () |
void * | getMappedHostReduceBuffer () |
void * | getHostReduceBuffer () |
void | setParam (int kernel, int prec, int threads, int blocks) |
void | zero (ColorSpinorField &a) |
void | copy (ColorSpinorField &dst, const ColorSpinorField &src) |
void | ax (double a, ColorSpinorField &x) |
void | axpbyz (double a, ColorSpinorField &x, double b, ColorSpinorField &y, ColorSpinorField &z) |
void | xpy (ColorSpinorField &x, ColorSpinorField &y) |
void | mxpy (ColorSpinorField &x, ColorSpinorField &y) |
void | axpy (double a, ColorSpinorField &x, ColorSpinorField &y) |
void | axpby (double a, ColorSpinorField &x, double b, ColorSpinorField &y) |
void | xpay (ColorSpinorField &x, double a, ColorSpinorField &y) |
void | xpayz (ColorSpinorField &x, double a, ColorSpinorField &y, ColorSpinorField &z) |
void | axpyZpbx (double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, double b) |
void | axpyBzpcx (double a, ColorSpinorField &x, ColorSpinorField &y, double b, ColorSpinorField &z, double c) |
void | caxpby (const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y) |
void | caxpy (const Complex &a, ColorSpinorField &x, ColorSpinorField &y) |
void | caxpbypczw (const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, const Complex &c, ColorSpinorField &z, ColorSpinorField &w) |
void | cxpaypbz (ColorSpinorField &, const Complex &b, ColorSpinorField &y, const Complex &c, ColorSpinorField &z) |
void | caxpbypzYmbw (const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, ColorSpinorField &, ColorSpinorField &) |
void | caxpyBzpx (const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &) |
void | caxpyBxpz (const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &) |
void | cabxpyAx (double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y) |
void | caxpyXmaz (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
void | caxpyXmazMR (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
void | tripleCGUpdate (double alpha, double beta, ColorSpinorField &q, ColorSpinorField &r, ColorSpinorField &x, ColorSpinorField &p) |
void | doubleCG3Init (double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
void | doubleCG3Update (double a, double b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
double | norm1 (const ColorSpinorField &b) |
double | norm2 (const ColorSpinorField &a) |
double | axpyReDot (double a, ColorSpinorField &x, ColorSpinorField &y) |
double | reDotProduct (ColorSpinorField &x, ColorSpinorField &y) |
double | axpbyzNorm (double a, ColorSpinorField &x, double b, ColorSpinorField &y, ColorSpinorField &z) |
double | axpyNorm (double a, ColorSpinorField &x, ColorSpinorField &y) |
double | xmyNorm (ColorSpinorField &x, ColorSpinorField &y) |
Complex | cDotProduct (ColorSpinorField &, ColorSpinorField &) |
double3 | cDotProductNormA (ColorSpinorField &a, ColorSpinorField &b) |
double3 | cDotProductNormB (ColorSpinorField &a, ColorSpinorField &b) |
Return (a,b) and ||b||^2 - implemented using cDotProductNormA. More... | |
double3 | caxpbypzYmbwcDotProductUYNormY (const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &u) |
double | caxpyNorm (const Complex &a, ColorSpinorField &x, ColorSpinorField &y) |
double | caxpyXmazNormX (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
double | cabxpyzAxNorm (double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
Complex | caxpyDotzy (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
Complex | axpyCGNorm (double a, ColorSpinorField &x, ColorSpinorField &y) |
double3 | HeavyQuarkResidualNorm (ColorSpinorField &x, ColorSpinorField &r) |
double3 | xpyHeavyQuarkResidualNorm (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &r) |
double3 | tripleCGReduction (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
double4 | quadrupleCGReduction (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
double | quadrupleCG3InitNorm (double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v) |
double | quadrupleCG3UpdateNorm (double a, double b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v) |
double | doubleCG3InitNorm (double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
double | doubleCG3UpdateNorm (double a, double b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
void | caxpy (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y) |
Compute the block "caxpy" with over the set of ColorSpinorFields. E.g., it computes. More... | |
void | caxpy (const Complex *a, ColorSpinorField &x, ColorSpinorField &y) |
This is a wrapper for calling the block "caxpy" with a composite ColorSpinorField. E.g., it computes. More... | |
void | caxpy_U (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y) |
Compute the block "caxpy_U" with over the set of ColorSpinorFields. E.g., it computes. More... | |
void | caxpy_U (const Complex *a, ColorSpinorField &x, ColorSpinorField &y) |
This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes. More... | |
void | caxpy_L (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y) |
Compute the block "caxpy_L" with over the set of ColorSpinorFields. E.g., it computes. More... | |
void | caxpy_L (const Complex *a, ColorSpinorField &x, ColorSpinorField &y) |
This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes. More... | |
void | caxpyz (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z) |
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes. More... | |
void | caxpyz (const Complex *a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes. More... | |
void | caxpyz_U (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z) |
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes. More... | |
void | caxpyz_U (const Complex *a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes. More... | |
void | caxpyz_L (const Complex *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z) |
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes. More... | |
void | caxpyz_L (const Complex *a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z) |
This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes. More... | |
void | axpyBzpcx (const double *a, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, const double *b, ColorSpinorField &z, const double *c) |
Compute the vectorized "axpyBzpcx" with over the set of ColorSpinorFields, where the third vector, z, is constant over the batch. E.g., it computes. More... | |
void | caxpyBxpz (const Complex *a_, std::vector< ColorSpinorField *> &x_, ColorSpinorField &y_, const Complex *b_, ColorSpinorField &z_) |
Compute the vectorized "caxpyBxpz" over the set of ColorSpinorFields, where the second and third vector, y and z, is constant over the batch. E.g., it computes. More... | |
void | reDotProduct (double *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b) |
void | cDotProduct (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b) |
Computes the matrix of inner products between the vector set a and the vector set b. More... | |
void | hDotProduct (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b) |
Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Requires a.size()==b.size(). More... | |
void | hDotProduct_Anorm (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b) |
Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Uniquely defined for cases like (p, Ap) where the output is Hermitian, but there's an A-norm instead of an L2 norm. Requires a.size()==b.size(). More... | |
void | cDotProductCopy (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b, std::vector< ColorSpinorField *> &c) |
Computes the matrix of inner products between the vector set a and the vector set b, and copies b into c. More... | |
template<typename FloatN , int M, typename Arg > | |
__global__ void | blasKernel (Arg arg) |
__device__ __host__ void | _caxpy (const float2 &a, const float4 &x, float4 &y) |
__device__ __host__ void | _caxpy (const float2 &a, const float2 &x, float2 &y) |
__device__ __host__ void | _caxpy (const double2 &a, const double2 &x, double2 &y) |
__device__ __host__ void | _caxpby (const float2 &a, const float4 &x, const float2 &b, float4 &y) |
__device__ __host__ void | _caxpby (const float2 &a, const float2 &x, const float2 &b, float2 &y) |
__device__ __host__ void | _caxpby (const double2 &a, const double2 &x, const double2 &b, double2 &y) |
template<typename FloatN , int M, int NXZ, typename Arg > | |
__global__ void | multiBlasKernel (Arg arg_) |
Generic multi-blas kernel with four loads and up to four stores. More... | |
template<int block_size, typename ReduceType , typename FloatN , int M, int NXZ, typename Arg > | |
__global__ void | multiReduceKernel (Arg arg_) |
template<typename ReduceType > | |
__device__ __host__ void | dot_ (ReduceType &sum, const double2 &a, const double2 &b) |
template<typename ReduceType > | |
__device__ __host__ void | dot_ (ReduceType &sum, const float2 &a, const float2 &b) |
template<typename ReduceType > | |
__device__ __host__ void | dot_ (ReduceType &sum, const float4 &a, const float4 &b) |
template<typename ReduceType > | |
__device__ __host__ void | cdot_ (ReduceType &sum, const double2 &a, const double2 &b) |
template<typename ReduceType > | |
__device__ __host__ void | cdot_ (ReduceType &sum, const float2 &a, const float2 &b) |
template<typename ReduceType > | |
__device__ __host__ void | cdot_ (ReduceType &sum, const float4 &a, const float4 &b) |
template<int block_size, typename ReduceType , typename FloatN , int M, typename Arg > | |
__global__ void | reduceKernel (Arg arg) |
template<typename ReduceType > | |
__device__ __host__ ReduceType | norm1_ (const double2 &a) |
template<typename ReduceType > | |
__device__ __host__ ReduceType | norm1_ (const float2 &a) |
template<typename ReduceType > | |
__device__ __host__ ReduceType | norm1_ (const float4 &a) |
template<typename ReduceType > | |
__device__ __host__ void | norm2_ (ReduceType &sum, const double2 &a) |
template<typename ReduceType > | |
__device__ __host__ void | norm2_ (ReduceType &sum, const float2 &a) |
template<typename ReduceType > | |
__device__ __host__ void | norm2_ (ReduceType &sum, const float4 &a) |
__device__ __host__ void | Caxpy_ (const double2 &a, const double2 &x, double2 &y) |
__device__ __host__ void | Caxpy_ (const float2 &a, const float2 &x, float2 &y) |
__device__ __host__ void | Caxpy_ (const float2 &a, const float4 &x, float4 &y) |
template<typename ReduceType , typename InputType > | |
__device__ __host__ void | cdotNormA_ (ReduceType &sum, const InputType &a, const InputType &b) |
template<typename ReduceType , typename InputType > | |
__device__ __host__ void | cdotNormB_ (ReduceType &sum, const InputType &a, const InputType &b) |
template<typename RegType , typename StoreType , typename yType , int M, template< typename, typename > class Functor, int writeX, int writeY, int writeZ, int writeW, int writeV> | |
void | nativeBlas (const double2 &a, const double2 &b, const double2 &c, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, int length) |
template<template< typename Float, typename FloatN > class Functor, int writeX = 0, int writeY = 0, int writeZ = 0, int writeW = 0, int writeV = 0> | |
void | uni_blas (const double2 &a, const double2 &b, const double2 &c, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v) |
template<template< typename Float, typename FloatN > class Functor, int writeX = 0, int writeY = 0, int writeZ = 0, int writeW = 0, int writeV = 0> | |
void | mixed_blas (const double2 &a, const double2 &b, const double2 &c, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v) |
void | initReduce () |
void | endReduce () |
cudaStream_t * | getStream () |
template<int NXZ, typename RegType , typename StoreType , typename yType , int M, template< int, typename, typename > class Functor, typename write , typename T > | |
void | multiBlas (const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, std::vector< ColorSpinorField *> &w, int length) |
template<int NXZ, template< int MXZ, typename Float, typename FloatN > class Functor, typename write , typename T > | |
void | multiBlas (const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w) |
template<int NXZ, template< int MXZ, typename Float, typename FloatN > class Functor, typename write , typename T > | |
void | mixedMultiBlas (const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w) |
void | caxpy_recurse (const Complex *a_, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, int i_idx, int j_idx, int upper) |
void | caxpyz_recurse (const Complex *a_, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, int i, int j, int pass, int upper) |
cudaEvent_t * | getReduceEvent () |
bool | getFastReduce () |
void | initFastReduce (int words) |
void | completeFastReduce (int32_t words) |
template<typename doubleN , typename ReduceType , typename FloatN , int M, int NXZ, typename Arg > | |
void | multiReduceLaunch (doubleN result[], Arg &arg, const TuneParam &tp, const cudaStream_t &stream, Tunable &tunable) |
template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename yType , int M, int NXZ, template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , typename T > | |
void | multiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, std::vector< ColorSpinorField *> &w, int length) |
template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T > | |
void | multiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w) |
template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T > | |
void | mixedMultiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w) |
template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal , bool siteUnroll, typename T > | |
void | multiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w, int i, int j) |
template<template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal > | |
void | multiReduce_recurse (Complex *result, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, std::vector< ColorSpinorField *> &w, int i_idx, int j_idx, bool hermitian, unsigned int tile_size) |
void | initFastReduce (int32_t words) |
template<typename doubleN , typename ReduceType , typename FloatN , int M, typename Arg > | |
doubleN | reduceLaunch (Arg &arg, const TuneParam &tp, const cudaStream_t &stream, Tunable &tunable) |
template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename zType , int M, template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV> | |
doubleN | nativeReduce (const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, int length) |
template<typename doubleN , typename ReduceType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll> | |
doubleN | uni_reduce (const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v) |
template<typename doubleN , typename ReduceType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll> | |
doubleN | mixed_reduce (const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v) |
Variables | |
unsigned long long | flops |
unsigned long long | bytes |
static __constant__ signed char | Amatrix_d [MAX_MATRIX_SIZE] |
static __constant__ signed char | Bmatrix_d [MAX_MATRIX_SIZE] |
static __constant__ signed char | Cmatrix_d [MAX_MATRIX_SIZE] |
static signed char * | Amatrix_h |
static signed char * | Bmatrix_h |
static signed char * | Cmatrix_h |
static __constant__ signed char | arg_buffer [MAX_MATRIX_SIZE] |
static __constant__ signed char | Amatrix_d [MAX_MATRIX_SIZE] |
static __constant__ signed char | Bmatrix_d [MAX_MATRIX_SIZE] |
static __constant__ signed char | Cmatrix_d [MAX_MATRIX_SIZE] |
static signed char * | Amatrix_h |
static signed char * | Bmatrix_h |
static signed char * | Cmatrix_h |
static __constant__ signed char | arg_buffer [MAX_MATRIX_SIZE] |
static cudaStream_t * | blasStream |
__device__ __host__ void quda::blas::_caxpby | ( | const float2 & | a, |
const float4 & | x, | ||
const float2 & | b, | ||
float4 & | y | ||
) |
Functor to perform the operation y = a*x + b*y (complex-valued)
Definition at line 150 of file blas_core.cuh.
Referenced by quda::blas::caxpby_< Float2, FloatN >::operator()(), and quda::blas::caxpbypczw_< Float2, FloatN >::operator()().
__device__ __host__ void quda::blas::_caxpby | ( | const float2 & | a, |
const float2 & | x, | ||
const float2 & | b, | ||
float2 & | y | ||
) |
Definition at line 172 of file blas_core.cuh.
__device__ __host__ void quda::blas::_caxpby | ( | const double2 & | a, |
const double2 & | x, | ||
const double2 & | b, | ||
double2 & | y | ||
) |
Definition at line 186 of file blas_core.cuh.
|
inline |
Functor to perform the operation y += a * x (complex-valued)
Definition at line 110 of file blas_core.cuh.
Referenced by quda::blas::MultiBlasFunctor< NXZ, Float2, FloatN >::init(), quda::blas::caxpy_< Float2, FloatN >::operator()(), quda::blas::multicaxpy_< NXZ, Float2, FloatN >::operator()(), quda::blas::multicaxpyz_< NXZ, Float2, FloatN >::operator()(), quda::blas::caxpbypczw_< Float2, FloatN >::operator()(), quda::blas::multi_caxpyBxpz_< NXZ, Float2, FloatN >::operator()(), quda::blas::caxpyBzpx_< Float2, FloatN >::operator()(), quda::blas::caxpyBxpz_< Float2, FloatN >::operator()(), quda::blas::caxpbypzYmbw_< Float2, FloatN >::operator()(), quda::blas::cabxpyAx_< Float2, FloatN >::operator()(), quda::blas::caxpyxmaz_< Float2, FloatN >::operator()(), and quda::blas::caxpyxmazMR_< Float2, FloatN >::operator()().
|
inline |
Definition at line 122 of file blas_core.cuh.
|
inline |
Definition at line 130 of file blas_core.cuh.
void quda::blas::ax | ( | double | a, |
ColorSpinorField & | x | ||
) |
Definition at line 508 of file blas_quda.cu.
References quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x.
Referenced by quda::ax(), benchmark(), quda::CG::blocksolve(), cloverQuda(), quda::TRLM::computeKeptRitz(), quda::EigenSolver::computeSVD(), dslashQuda(), dslashQuda_4dpc(), dslashQuda_mdwf(), quda::GMResDR::FlexArnoldiProcedure(), quda::genericCompare(), quda::Deflation::increment(), invert_test(), invertMultiShiftQuda(), invertMultiSrcQuda(), invertQuda(), main(), quda::massRescale(), MatDagMatQuda(), MatQuda(), quda::TRLM::operator()(), quda::MPCG::operator()(), quda::MR::operator()(), quda::CACG::operator()(), quda::MinResExt::operator()(), quda::DiracStaggeredPC::prepare(), quda::DiracImprovedStaggeredPC::prepare(), quda::DiracStaggeredPC::reconstruct(), quda::DiracImprovedStaggeredPC::reconstruct(), quda::GMResDR::RestartVZH(), test(), and quda::IncEigCG::UpdateVm().
|
inline |
Definition at line 36 of file blas_quda.h.
References axpbyz().
Referenced by benchmark(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::CGNE::operator()(), quda::CGNR::operator()(), quda::CACGNE::operator()(), quda::CACGNR::operator()(), quda::MultiShiftCG::operator()(), and test().
void quda::blas::axpbyz | ( | double | a, |
ColorSpinorField & | x, | ||
double | b, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
Definition at line 496 of file blas_quda.cu.
References quda::LatticeField::Precision(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.
Referenced by axpby(), axpy(), mxpy(), xpay(), xpayz(), and xpy().
double quda::blas::axpbyzNorm | ( | double | a, |
ColorSpinorField & | x, | ||
double | b, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
Definition at line 734 of file reduce_quda.cu.
Referenced by axpyNorm(), xmyNorm(), and xpayz().
|
inline |
Definition at line 35 of file blas_quda.h.
References axpbyz().
Referenced by benchmark(), quda::CG::blocksolve(), invert_test(), quda::TRLM::lanczosStep(), quda::DiracMobius::M(), main(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::MPCG::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::CACG::operator()(), quda::CAGCR::operator()(), quda::MultiShiftCG::operator()(), quda::DiracM::operator()(), quda::DiracMdagM::operator()(), quda::DiracMMdag::operator()(), quda::DiracMdag::operator()(), quda::GMResDR::operator()(), and test().
void quda::blas::axpyBzpcx | ( | double | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
double | b, | ||
ColorSpinorField & | z, | ||
double | c | ||
) |
Definition at line 541 of file blas_quda.cu.
References quda::LatticeField::Precision(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.
Referenced by quda::ShiftUpdate::apply(), axpyBzpcx(), benchmark(), cDotProductNormB(), test(), and xpayz().
void quda::blas::axpyBzpcx | ( | const double * | a, |
std::vector< ColorSpinorField *> & | x, | ||
std::vector< ColorSpinorField *> & | y, | ||
const double * | b, | ||
ColorSpinorField & | z, | ||
const double * | c | ||
) |
Compute the vectorized "axpyBzpcx" with over the set of ColorSpinorFields, where the third vector, z, is constant over the batch. E.g., it computes.
y = a * x + y x = b * z + c * x
The dimensions of a, b, c are the same as the size of x and y, with a maximum size of 16.
a[in] | Array of coefficients |
b[in] | Array of coefficients |
c[in] | Array of coefficients |
x[in,out] | vector of ColorSpinorFields |
y[in,out] | vector of ColorSpinorFields |
z[in] | input ColorSpinorField |
Definition at line 985 of file multi_blas_quda.cu.
References axpyBzpcx(), and MAX_MULTI_BLAS_N.
Complex quda::blas::axpyCGNorm | ( | double | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y | ||
) |
Definition at line 796 of file reduce_quda.cu.
References quda::LatticeField::Precision().
Referenced by cDotProductNormB(), quda::CG::operator()(), quda::PreconCG::operator()(), and quda::MultiShiftCG::operator()().
|
inline |
Definition at line 74 of file blas_quda.h.
References axpbyzNorm().
Referenced by benchmark(), quda::IncEigCG::eigCGsolve(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), and test().
double quda::blas::axpyReDot | ( | double | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y | ||
) |
Definition at line 740 of file reduce_quda.cu.
Referenced by benchmark(), quda::MultiShiftCG::operator()(), test(), and xpayz().
void quda::blas::axpyZpbx | ( | double | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z, | ||
double | b | ||
) |
Definition at line 552 of file blas_quda.cu.
References quda::LatticeField::Precision(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.
Referenced by benchmark(), quda::IncEigCG::eigCGsolve(), quda::CG::operator()(), quda::PreconCG::operator()(), quda::MultiShiftCG::operator()(), test(), and xpayz().
__global__ void quda::blas::blasKernel | ( | Arg | arg | ) |
Generic blas kernel with four loads and up to four stores.
Definition at line 43 of file blas_core.cuh.
References parity.
void quda::blas::cabxpyAx | ( | double | a, |
const Complex & | b, | ||
ColorSpinorField & | x, | ||
ColorSpinorField & | y | ||
) |
Definition at line 591 of file blas_quda.cu.
References IMAG, REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y.
Referenced by benchmark(), test(), and xpayz().
double quda::blas::cabxpyzAxNorm | ( | double | a, |
const Complex & | b, | ||
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
Definition at line 758 of file reduce_quda.cu.
Referenced by benchmark(), cDotProductNormB(), quda::GCR::operator()(), and test().
void quda::blas::caxpby | ( | const Complex & | a, |
ColorSpinorField & | x, | ||
const Complex & | b, | ||
ColorSpinorField & | y | ||
) |
Definition at line 523 of file blas_quda.cu.
References IMAG, REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y.
Referenced by quda::BiCGstabLUpdate::apply(), benchmark(), quda::EigenSolver::chebyOp(), quda::EigenSolver::computeEvals(), quda::BiCGstabL::operator()(), test(), and xpayz().
void quda::blas::caxpbypczw | ( | const Complex & | a, |
ColorSpinorField & | x, | ||
const Complex & | b, | ||
ColorSpinorField & | y, | ||
const Complex & | c, | ||
ColorSpinorField & | z, | ||
ColorSpinorField & | w | ||
) |
Definition at line 528 of file blas_quda.cu.
References IMAG, REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::w, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.
Referenced by quda::EigenSolver::chebyOp(), and xpayz().
void quda::blas::caxpbypzYmbw | ( | const Complex & | a, |
ColorSpinorField & | x, | ||
const Complex & | b, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z, | ||
ColorSpinorField & | w | ||
) |
Definition at line 585 of file blas_quda.cu.
References IMAG, REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::w, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.
Referenced by benchmark(), quda::BiCGstab::operator()(), test(), and xpayz().
double3 quda::blas::caxpbypzYmbwcDotProductUYNormY | ( | const Complex & | a, |
ColorSpinorField & | x, | ||
const Complex & | b, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z, | ||
ColorSpinorField & | w, | ||
ColorSpinorField & | u | ||
) |
Definition at line 783 of file reduce_quda.cu.
References IMAG, quda::LatticeField::Precision(), and REAL.
Referenced by benchmark(), cDotProductNormB(), quda::BiCGstab::operator()(), and test().
void quda::blas::caxpy | ( | const Complex & | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y | ||
) |
Definition at line 512 of file blas_quda.cu.
References quda::LatticeField::Precision(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y.
Referenced by quda::BiCGstabLUpdate::apply(), benchmark(), quda::EigenSolver::blockOrthogonalize(), quda::CG::blocksolve(), caxpy(), caxpyz_recurse(), cDotProductNormB(), quda::TRLM::computeKeptRitz(), quda::EigenSolver::deflate(), quda::EigenSolver::deflateSVD(), quda::Deflation::increment(), quda::Deflation::operator()(), quda::CG::operator()(), quda::BiCGstab::operator()(), quda::MPBiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::CACG::operator()(), quda::CAGCR::operator()(), quda::MultiShiftCG::operator()(), quda::MinResExt::operator()(), quda::orthoDir(), quda::BiCGstabL::orthoDir(), quda::Deflation::reduce(), quda::IncEigCG::RestartVT(), quda::GMResDR::RestartVZH(), test(), quda::updateAp(), quda::BiCGstabL::updateR(), quda::updateSolution(), quda::GMResDR::UpdateSolution(), quda::BiCGstabL::updateUend(), quda::Deflation::verify(), and xpayz().
void quda::blas::caxpy | ( | const Complex * | a, |
std::vector< ColorSpinorField *> & | x, | ||
std::vector< ColorSpinorField *> & | y | ||
) |
Compute the block "caxpy" with over the set of ColorSpinorFields. E.g., it computes.
y = x * a + y
The dimensions of a can be rectangular, e.g., the width of x and y need not be same.
a[in] | Matrix of coefficients |
x[in] | vector of input ColorSpinorFields |
y[in,out] | vector of input/output ColorSpinorFields |
Definition at line 732 of file multi_blas_quda.cu.
References caxpy_recurse().
void quda::blas::caxpy | ( | const Complex * | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y | ||
) |
This is a wrapper for calling the block "caxpy" with a composite ColorSpinorField. E.g., it computes.
y = x * a + y
a[in] | Matrix of coefficients |
x[in] | Input matrix |
y[in,out] | Computed output matrix |
Definition at line 763 of file multi_blas_quda.cu.
References caxpy(), and quda::ColorSpinorField::Components().
__device__ __host__ void quda::blas::Caxpy_ | ( | const double2 & | a, |
const double2 & | x, | ||
double2 & | y | ||
) |
Functor to perform the operation y += a * x (complex-valued)
Definition at line 232 of file reduce_core.cuh.
Referenced by quda::blas::caxpyNorm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpyxmaznormx< ReduceType, Float2, FloatN >::operator()(), quda::blas::cabxpyzaxnorm< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpydotzy< ReduceType, Float2, FloatN >::operator()(), and quda::blas::caxpbypzYmbwcDotProductUYNormY_< ReduceType, Float2, FloatN >::operator()().
__device__ __host__ void quda::blas::Caxpy_ | ( | const float2 & | a, |
const float2 & | x, | ||
float2 & | y | ||
) |
Definition at line 239 of file reduce_core.cuh.
__device__ __host__ void quda::blas::Caxpy_ | ( | const float2 & | a, |
const float4 & | x, | ||
float4 & | y | ||
) |
Definition at line 246 of file reduce_core.cuh.
void quda::blas::caxpy_L | ( | const Complex * | a, |
std::vector< ColorSpinorField *> & | x, | ||
std::vector< ColorSpinorField *> & | y | ||
) |
Compute the block "caxpy_L" with over the set of ColorSpinorFields. E.g., it computes.
y = x * a + y
Where 'a' must be a square, lower triangular matrix.
a[in] | Matrix of coefficients |
x[in] | vector of input ColorSpinorFields |
y[in,out] | vector of input/output ColorSpinorFields |
Definition at line 750 of file multi_blas_quda.cu.
References caxpy_recurse(), and errorQuda.
Referenced by caxpy_L(), and cDotProductNormB().
void quda::blas::caxpy_L | ( | const Complex * | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y | ||
) |
This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes.
y = x * a + y
a[in] | Matrix of coefficients |
x[in] | Input matrix |
y[in,out] | Computed output matrix |
Definition at line 767 of file multi_blas_quda.cu.
References caxpy_L(), and quda::ColorSpinorField::Components().
void quda::blas::caxpy_recurse | ( | const Complex * | a_, |
std::vector< ColorSpinorField *> & | x, | ||
std::vector< ColorSpinorField *> & | y, | ||
int | i_idx, | ||
int | j_idx, | ||
int | upper | ||
) |
Definition at line 562 of file multi_blas_quda.cu.
References quda::count, and MAX_MULTI_BLAS_N.
Referenced by caxpy(), caxpy_L(), and caxpy_U().
void quda::blas::caxpy_U | ( | const Complex * | a, |
std::vector< ColorSpinorField *> & | x, | ||
std::vector< ColorSpinorField *> & | y | ||
) |
Compute the block "caxpy_U" with over the set of ColorSpinorFields. E.g., it computes.
y = x * a + y
Where 'a' must be a square, upper triangular matrix.
a[in] | Matrix of coefficients |
x[in] | vector of input ColorSpinorFields |
y[in,out] | vector of input/output ColorSpinorFields |
Definition at line 738 of file multi_blas_quda.cu.
References caxpy_recurse(), and errorQuda.
Referenced by caxpy_U(), and cDotProductNormB().
void quda::blas::caxpy_U | ( | const Complex * | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y | ||
) |
This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes.
y = x * a + y
a[in] | Matrix of coefficients |
x[in] | Input matrix |
y[in,out] | Computed output matrix |
Definition at line 765 of file multi_blas_quda.cu.
References caxpy_U(), and quda::ColorSpinorField::Components().
void quda::blas::caxpyBxpz | ( | const Complex & | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
const Complex & | b, | ||
ColorSpinorField & | z | ||
) |
Definition at line 574 of file blas_quda.cu.
References IMAG, quda::LatticeField::Precision(), REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.
Referenced by benchmark(), caxpyBxpz(), cDotProductNormB(), test(), quda::BiCGstabL::updateXRend(), and xpayz().
void quda::blas::caxpyBxpz | ( | const Complex * | a_, |
std::vector< ColorSpinorField *> & | x_, | ||
ColorSpinorField & | y_, | ||
const Complex * | b_, | ||
ColorSpinorField & | z_ | ||
) |
Compute the vectorized "caxpyBxpz" over the set of ColorSpinorFields, where the second and third vector, y and z, is constant over the batch. E.g., it computes.
y = a * x + y z = b * x + z
The dimensions of a, b are the same as the size of x, with a maximum size of 16.
a[in] | Array of coefficients |
b[in] | Array of coefficients |
x[in] | vector of ColorSpinorFields |
y[in,out] | input ColorSpinorField |
z[in,out] | input ColorSpinorField |
Definition at line 1029 of file multi_blas_quda.cu.
References caxpyBxpz(), and MAX_MULTI_BLAS_N.
void quda::blas::caxpyBzpx | ( | const Complex & | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
const Complex & | b, | ||
ColorSpinorField & | z | ||
) |
Definition at line 563 of file blas_quda.cu.
References IMAG, quda::LatticeField::Precision(), REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.
Referenced by benchmark(), test(), and xpayz().
Complex quda::blas::caxpyDotzy | ( | const Complex & | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
Definition at line 771 of file reduce_quda.cu.
Referenced by benchmark(), cDotProductNormB(), quda::orthoDir(), quda::BiCGstabL::orthoDir(), and test().
double quda::blas::caxpyNorm | ( | const Complex & | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y | ||
) |
Definition at line 746 of file reduce_quda.cu.
Referenced by benchmark(), cDotProductNormB(), quda::CG3::operator()(), quda::CG3NE::operator()(), and test().
void quda::blas::caxpyXmaz | ( | const Complex & | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
Definition at line 597 of file blas_quda.cu.
References IMAG, REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.
Referenced by benchmark(), quda::MR::operator()(), test(), and xpayz().
void quda::blas::caxpyXmazMR | ( | const Complex & | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
Definition at line 603 of file blas_quda.cu.
References commAsyncReduction(), errorQuda, IMAG, quda::LatticeField::Location(), QUDA_CPU_FIELD_LOCATION, REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.
Referenced by quda::MR::operator()(), and xpayz().
double quda::blas::caxpyXmazNormX | ( | const Complex & | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
Definition at line 752 of file reduce_quda.cu.
Referenced by benchmark(), cDotProductNormB(), and test().
void quda::blas::caxpyz | ( | const Complex * | a, |
std::vector< ColorSpinorField *> & | x, | ||
std::vector< ColorSpinorField *> & | y, | ||
std::vector< ColorSpinorField *> & | z | ||
) |
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.
z = x * a + y
The dimensions of a can be rectangular, e.g., the width of x and y need not be same, though the maximum width for both is 16.
a[in] | Matrix of coefficients |
x[in] | vector of input ColorSpinorFields |
y[in] | vector of input ColorSpinorFields |
z[out] | vector of output ColorSpinorFields |
Definition at line 949 of file multi_blas_quda.cu.
References caxpyz_recurse().
Referenced by caxpyz(), cDotProductNormB(), and quda::CACG::operator()().
void quda::blas::caxpyz | ( | const Complex * | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes.
z = x * a + y
a[in] | Matrix of coefficients |
x[in] | Input matrix |
y[in] | Computed output matrix |
z[out] | vector of input/output ColorSpinorFields |
Definition at line 973 of file multi_blas_quda.cu.
References caxpyz(), and quda::ColorSpinorField::Components().
void quda::blas::caxpyz_L | ( | const Complex * | a, |
std::vector< ColorSpinorField *> & | x, | ||
std::vector< ColorSpinorField *> & | y, | ||
std::vector< ColorSpinorField *> & | z | ||
) |
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.
z = x * a + y
Where 'a' is assumed to be lower triangular
a[in] | Matrix of coefficients |
x[in] | vector of input ColorSpinorFields |
y[in] | vector of input ColorSpinorFields |
z[out] | vector of output ColorSpinorFields |
Definition at line 964 of file multi_blas_quda.cu.
References caxpyz_recurse().
Referenced by caxpyz_L(), and cDotProductNormB().
void quda::blas::caxpyz_L | ( | const Complex * | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes.
z = x * a + y
a[in] | Matrix of coefficients |
x[in] | Input matrix |
y[in] | Computed output matrix |
z[out] | vector of input/output ColorSpinorFields |
Definition at line 981 of file multi_blas_quda.cu.
References caxpyz_L(), and quda::ColorSpinorField::Components().
void quda::blas::caxpyz_recurse | ( | const Complex * | a_, |
std::vector< ColorSpinorField *> & | x, | ||
std::vector< ColorSpinorField *> & | y, | ||
std::vector< ColorSpinorField *> & | z, | ||
int | i, | ||
int | j, | ||
int | pass, | ||
int | upper | ||
) |
Definition at line 770 of file multi_blas_quda.cu.
References caxpy(), quda::count, and MAX_MULTI_BLAS_N.
Referenced by caxpyz(), caxpyz_L(), and caxpyz_U().
void quda::blas::caxpyz_U | ( | const Complex * | a, |
std::vector< ColorSpinorField *> & | x, | ||
std::vector< ColorSpinorField *> & | y, | ||
std::vector< ColorSpinorField *> & | z | ||
) |
Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.
z = x * a + y
Where 'a' is assumed to be upper triangular.
a[in] | Matrix of coefficients |
x[in] | vector of input ColorSpinorFields |
y[in] | vector of input ColorSpinorFields |
z[out] | vector of output ColorSpinorFields |
Definition at line 956 of file multi_blas_quda.cu.
References caxpyz_recurse().
Referenced by caxpyz_U(), and cDotProductNormB().
void quda::blas::caxpyz_U | ( | const Complex * | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes.
z = x * a + y
a[in] | Matrix of coefficients |
x[in] | Input matrix |
y[in] | Computed output matrix |
z[out] | vector of input/output ColorSpinorFields |
Definition at line 977 of file multi_blas_quda.cu.
References caxpyz_U(), and quda::ColorSpinorField::Components().
__device__ __host__ void quda::blas::cdot_ | ( | ReduceType & | sum, |
const double2 & | a, | ||
const double2 & | b | ||
) |
Returns complex-valued dot product of x and y
Definition at line 199 of file multi_reduce_core.cuh.
Referenced by quda::blas::cabxpyzaxnorm< ReduceType, Float2, FloatN >::flops().
__device__ __host__ void quda::blas::cdot_ | ( | ReduceType & | sum, |
const float2 & | a, | ||
const float2 & | b | ||
) |
Definition at line 208 of file multi_reduce_core.cuh.
__device__ __host__ void quda::blas::cdot_ | ( | ReduceType & | sum, |
const float4 & | a, | ||
const float4 & | b | ||
) |
Definition at line 217 of file multi_reduce_core.cuh.
__device__ __host__ void quda::blas::cdotNormA_ | ( | ReduceType & | sum, |
const InputType & | a, | ||
const InputType & | b | ||
) |
First returns the dot product (x,y) Returns the norm of x
Definition at line 385 of file reduce_core.cuh.
References quda::sum().
__device__ __host__ void quda::blas::cdotNormB_ | ( | ReduceType & | sum, |
const InputType & | a, | ||
const InputType & | b | ||
) |
First returns the dot product (x,y) Returns the norm of y
Definition at line 398 of file reduce_core.cuh.
References quda::sum().
Complex quda::blas::cDotProduct | ( | ColorSpinorField & | x, |
ColorSpinorField & | y | ||
) |
Definition at line 764 of file reduce_quda.cu.
Referenced by benchmark(), quda::EigenSolver::blockOrthogonalize(), quda::CG::blocksolve(), cDotProductNormB(), quda::computeBeta(), quda::EigenSolver::computeEvals(), quda::MPBiCGstab::computeMatrixPowers(), quda::EigenSolver::computeSVD(), quda::BiCGstabL::computeTau(), quda::EigenSolver::deflate(), quda::EigenSolver::deflateSVD(), quda::GMResDR::FlexArnoldiProcedure(), quda::MG::generateNullVectors(), quda::Deflation::increment(), invertMultiShiftQuda(), invertQuda(), quda::Deflation::operator()(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::CACG::operator()(), quda::MultiShiftCG::operator()(), quda::MinResExt::operator()(), quda::GMResDR::operator()(), quda::orthoDir(), quda::BiCGstabL::orthoDir(), quda::EigCGArgs::RestartLanczos(), quda::GMResDR::RestartVZH(), quda::CAGCR::solve(), quda::MinResExt::solve(), test(), quda::MG::verify(), and xmyNorm().
void quda::blas::cDotProduct | ( | Complex * | result, |
std::vector< ColorSpinorField *> & | a, | ||
std::vector< ColorSpinorField *> & | b | ||
) |
Computes the matrix of inner products between the vector set a and the vector set b.
result[out] | Matrix of inner product result[i][j] = (a[j],b[i]) |
a[in] | set of input ColorSpinorFields |
b[in] | set of input ColorSpinorFields |
Definition at line 1031 of file multi_reduce_quda.cu.
References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), errorQuda, and reduceDoubleArray().
void quda::blas::cDotProductCopy | ( | Complex * | result, |
std::vector< ColorSpinorField *> & | a, | ||
std::vector< ColorSpinorField *> & | b, | ||
std::vector< ColorSpinorField *> & | c | ||
) |
Computes the matrix of inner products between the vector set a and the vector set b, and copies b into c.
result[out] | Matrix of inner product result[i][j] = (a[j],b[i]) |
a[in] | set of input ColorSpinorFields |
b[in] | set of input ColorSpinorFields |
c[out] | set of output ColorSpinorFields |
Definition at line 1110 of file multi_reduce_quda.cu.
References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), errorQuda, and reduceDoubleArray().
Referenced by cDotProductNormB().
double3 quda::blas::cDotProductNormA | ( | ColorSpinorField & | a, |
ColorSpinorField & | b | ||
) |
Definition at line 778 of file reduce_quda.cu.
Referenced by benchmark(), cDotProductNormB(), quda::CG::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::SD::operator()(), quda::Deflation::reduce(), test(), quda::Deflation::verify(), and xmyNorm().
|
inline |
Return (a,b) and ||b||^2 - implemented using cDotProductNormA.
Definition at line 83 of file blas_quda.h.
References axpyBzpcx(), axpyCGNorm(), cabxpyzAxNorm(), caxpbypzYmbwcDotProductUYNormY(), caxpy(), caxpy_L(), caxpy_U(), caxpyBxpz(), caxpyDotzy(), caxpyNorm(), caxpyXmazNormX(), caxpyz(), caxpyz_L(), caxpyz_U(), cDotProduct(), cDotProductCopy(), cDotProductNormA(), doubleCG3InitNorm(), doubleCG3UpdateNorm(), hDotProduct(), hDotProduct_Anorm(), HeavyQuarkResidualNorm(), quadrupleCG3InitNorm(), quadrupleCG3UpdateNorm(), quadrupleCGReduction(), reDotProduct(), tripleCGReduction(), and xpyHeavyQuarkResidualNorm().
Referenced by benchmark(), and test().
void quda::blas::completeFastReduce | ( | int32_t | words | ) |
Definition at line 43 of file reduce_quda.cu.
References quda::count, and h_reduce.
Referenced by multiReduceLaunch(), and reduceLaunch().
void quda::blas::copy | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src | ||
) |
Definition at line 355 of file copy_quda.cu.
References quda::blas::copy_ns::copy(), quda::LatticeField::Location(), and QUDA_CUDA_FIELD_LOCATION.
Referenced by benchmark(), quda::CG::blocksolve(), quda::EigenSolver::chebyOp(), comm_declare_send_relative_(), comm_declare_strided_send_relative_(), quda::cudaColorSpinorField::copy(), quda::IncEigCG::eigCGsolve(), quda::Deflation::increment(), invertMultiShiftQuda(), invertMultiSrcQuda(), invertQuda(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::CACG::operator()(), quda::CAGCR::operator()(), quda::MultiShiftCG::operator()(), quda::MinResExt::operator()(), quda::Deflation::reduce(), quda::IncEigCG::RestartVT(), quda::GMResDR::RestartVZH(), test(), and quda::IncEigCG::UpdateVm().
void quda::blas::cxpaypbz | ( | ColorSpinorField & | x, |
const Complex & | b, | ||
ColorSpinorField & | y, | ||
const Complex & | c, | ||
ColorSpinorField & | z | ||
) |
Definition at line 535 of file blas_quda.cu.
References IMAG, REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.
Referenced by benchmark(), quda::BiCGstab::operator()(), test(), and xpayz().
__device__ __host__ void quda::blas::dot_ | ( | ReduceType & | sum, |
const double2 & | a, | ||
const double2 & | b | ||
) |
Return the real dot product of x and y Broken at the moment—need to update reDotProduct with permuting, etc of cDotProduct below.
Return the real dot product of x and y
Definition at line 158 of file multi_reduce_core.cuh.
Referenced by quda::__launch_bounds__(), and quda::blas::Norm2< ReduceType, Float2, FloatN >::flops().
__device__ __host__ void quda::blas::dot_ | ( | ReduceType & | sum, |
const float2 & | a, | ||
const float2 & | b | ||
) |
Definition at line 164 of file multi_reduce_core.cuh.
__device__ __host__ void quda::blas::dot_ | ( | ReduceType & | sum, |
const float4 & | a, | ||
const float4 & | b | ||
) |
Definition at line 170 of file multi_reduce_core.cuh.
void quda::blas::doubleCG3Init | ( | double | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
Definition at line 626 of file blas_quda.cu.
References quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.
Referenced by quda::CG3NE::operator()(), and xpayz().
double quda::blas::doubleCG3InitNorm | ( | double | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
Definition at line 848 of file reduce_quda.cu.
Referenced by cDotProductNormB(), and quda::CG3NE::operator()().
void quda::blas::doubleCG3Update | ( | double | a, |
double | b, | ||
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
Definition at line 631 of file blas_quda.cu.
References quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.
Referenced by quda::CG3NE::operator()(), and xpayz().
double quda::blas::doubleCG3UpdateNorm | ( | double | a, |
double | b, | ||
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
Definition at line 853 of file reduce_quda.cu.
Referenced by cDotProductNormB(), and quda::CG3NE::operator()().
void quda::blas::end | ( | void | ) |
Definition at line 489 of file blas_quda.cu.
References endReduce().
Referenced by benchmark(), quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), endQuda(), quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune(), and quda::tuneLaunch().
void quda::blas::endReduce | ( | void | ) |
Definition at line 120 of file reduce_quda.cu.
References d_reduce, device_free, h_reduce, hd_reduce, host_free, and reduceEnd.
Referenced by end(), and zero().
void * quda::blas::getDeviceReduceBuffer | ( | ) |
Definition at line 26 of file reduce_quda.cu.
References d_reduce.
bool quda::blas::getFastReduce | ( | ) |
Definition at line 30 of file reduce_quda.cu.
References fast_reduce_enabled.
Referenced by quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::MultiReduceCuda(), multiReduceLaunch(), quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >::ReduceCuda(), and reduceLaunch().
void * quda::blas::getHostReduceBuffer | ( | ) |
Definition at line 28 of file reduce_quda.cu.
References h_reduce.
Referenced by multiReduceLaunch().
void * quda::blas::getMappedHostReduceBuffer | ( | ) |
Definition at line 27 of file reduce_quda.cu.
References hd_reduce.
Referenced by multiReduceLaunch().
cudaEvent_t * quda::blas::getReduceEvent | ( | ) |
Definition at line 29 of file reduce_quda.cu.
References reduceEnd.
Referenced by multiReduceLaunch().
cudaStream_t * quda::blas::getStream | ( | ) |
Definition at line 494 of file blas_quda.cu.
References blasStream.
Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), quda::blas::copy_ns::copy(), multiBlas(), multiReduce(), multiReduceLaunch(), and nativeReduce().
void quda::blas::hDotProduct | ( | Complex * | result, |
std::vector< ColorSpinorField *> & | a, | ||
std::vector< ColorSpinorField *> & | b | ||
) |
Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Requires a.size()==b.size().
result[out] | Matrix of inner product result[i][j] = (a[j],b[i]) |
a[in] | set of input ColorSpinorFields |
b[in] | set of input ColorSpinorFields |
Definition at line 1056 of file multi_reduce_quda.cu.
References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), quda::conj(), errorQuda, and reduceDoubleArray().
Referenced by cDotProductNormB(), and quda::CAGCR::solve().
void quda::blas::hDotProduct_Anorm | ( | Complex * | result, |
std::vector< ColorSpinorField *> & | a, | ||
std::vector< ColorSpinorField *> & | b | ||
) |
Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Uniquely defined for cases like (p, Ap) where the output is Hermitian, but there's an A-norm instead of an L2 norm. Requires a.size()==b.size().
result[out] | Matrix of inner product result[i][j] = (a[j],b[i]) |
a[in] | set of input ColorSpinorFields |
b[in] | set of input ColorSpinorFields |
Definition at line 1083 of file multi_reduce_quda.cu.
References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), quda::conj(), errorQuda, and reduceDoubleArray().
Referenced by cDotProductNormB().
double3 quda::blas::HeavyQuarkResidualNorm | ( | ColorSpinorField & | x, |
ColorSpinorField & | r | ||
) |
Definition at line 809 of file reduce_quda.cu.
References comm_size(), and quda::ColorSpinorField::Ncolor().
Referenced by benchmark(), quda::CG::blocksolve(), cDotProductNormB(), quda::IncEigCG::eigCGsolve(), invert_test(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::CGNE::operator()(), quda::CGNR::operator()(), quda::PreconCG::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::CACG::operator()(), quda::CACGNE::operator()(), quda::CACGNR::operator()(), quda::CAGCR::operator()(), quda::MultiShiftCG::operator()(), quda::IncEigCG::operator()(), quda::GMResDR::operator()(), and test().
void quda::blas::init | ( | ) |
Definition at line 483 of file blas_quda.cu.
References initReduce(), quda::Nstream, and streams.
Referenced by initQudaMemory().
void quda::blas::initFastReduce | ( | int | words | ) |
void quda::blas::initFastReduce | ( | int32_t | words | ) |
Definition at line 32 of file reduce_quda.cu.
References h_reduce.
void quda::blas::initReduce | ( | ) |
Definition at line 64 of file reduce_quda.cu.
References bytes, checkCudaError, d_reduce, device_malloc, deviceProp, fast_reduce_enabled, h_reduce, hd_reduce, mapped_malloc, MAX_MULTI_BLAS_N, memset(), pinned_malloc, QudaSumFloat, reduceEnd, and warningQuda.
Referenced by init(), and zero().
void quda::blas::mixed_blas | ( | const double2 & | a, |
const double2 & | b, | ||
const double2 & | c, | ||
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z, | ||
ColorSpinorField & | w, | ||
ColorSpinorField & | v | ||
) |
Driver for generic blas routine with four loads and two store. This is the mixed-precision driver which supports a different precision for (x,z,w) and (y,v), where the former is the low precision and the latter is the high precision.
Definition at line 326 of file blas_quda.cu.
References checkLocation, checkPrecision, errorQuda, quda::ColorSpinorField::isNative(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::v, quda::ColorSpinorField::Volume(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::w, warningQuda, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.
doubleN quda::blas::mixed_reduce | ( | const double2 & | a, |
const double2 & | b, | ||
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z, | ||
ColorSpinorField & | w, | ||
ColorSpinorField & | v | ||
) |
Driver for generic reduction routine with two loads.
ReduceType | |
siteUnroll | - if this is true, then one site corresponds to exactly one thread |
Definition at line 520 of file reduce_quda.cu.
References bytes, checkLocation, checkPrecision, errorQuda, quda::ColorSpinorField::FieldOrder(), genericReduce(), quda::ColorSpinorField::isNative(), quda::ColorSpinorField::Length(), nativeReduce(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_FIELD_ORDER, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, quda::ColorSpinorField::RealLength(), reduceDoubleArray(), streams, quda::ColorSpinorField::Volume(), warningQuda, and zero().
void quda::blas::mixedMultiBlas | ( | const coeff_array< T > & | a, |
const coeff_array< T > & | b, | ||
const coeff_array< T > & | c, | ||
CompositeColorSpinorField & | x, | ||
CompositeColorSpinorField & | y, | ||
CompositeColorSpinorField & | z, | ||
CompositeColorSpinorField & | w | ||
) |
Driver for generic blas routine with four loads and two store.
Definition at line 403 of file multi_blas_quda.cu.
References checkLocation, errorQuda, Nspin, QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.
void quda::blas::mixedMultiReduce | ( | doubleN | result[], |
const coeff_array< T > & | a, | ||
const coeff_array< T > & | b, | ||
const coeff_array< T > & | c, | ||
CompositeColorSpinorField & | x, | ||
CompositeColorSpinorField & | y, | ||
CompositeColorSpinorField & | z, | ||
CompositeColorSpinorField & | w | ||
) |
Driver for multi-reduce with up to five vectors
Definition at line 511 of file multi_reduce_quda.cu.
References checkPrecision, errorQuda, Nspin, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.
void quda::blas::multiBlas | ( | const coeff_array< T > & | a, |
const coeff_array< T > & | b, | ||
const coeff_array< T > & | c, | ||
std::vector< ColorSpinorField *> & | x, | ||
std::vector< ColorSpinorField *> & | y, | ||
std::vector< ColorSpinorField *> & | z, | ||
std::vector< ColorSpinorField *> & | w, | ||
int | length | ||
) |
Definition at line 245 of file multi_blas_quda.cu.
References quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), bytes, quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::bytes(), checkCudaError, errorQuda, flops, quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::flops(), getStream(), MAX_MATRIX_SIZE, MAX_MULTI_BLAS_N, SpinorTexture< RegType, StoreType, N >::set(), Spinor< RegType, StoreType, N, write >::set(), quda::blas::write< writeX, writeY, writeZ, writeW >::W, quda::blas::write< writeX, writeY, writeZ, writeW >::X, quda::blas::write< writeX, writeY, writeZ, writeW >::Y, and quda::blas::write< writeX, writeY, writeZ, writeW >::Z.
void quda::blas::multiBlas | ( | const coeff_array< T > & | a, |
const coeff_array< T > & | b, | ||
const coeff_array< T > & | c, | ||
CompositeColorSpinorField & | x, | ||
CompositeColorSpinorField & | y, | ||
CompositeColorSpinorField & | z, | ||
CompositeColorSpinorField & | w | ||
) |
Driver for generic blas routine with four loads and two store.
Definition at line 294 of file multi_blas_quda.cu.
References checkLocation, errorQuda, Ncolor, Nspin, QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.
__global__ void quda::blas::multiBlasKernel | ( | Arg | arg_ | ) |
Generic multi-blas kernel with four loads and up to four stores.
[in,out] | arg | Argument struct with required meta data (input/output fields, functor, etc.) |
Definition at line 73 of file multi_blas_core.cuh.
References quda::arg(), and parity.
void quda::blas::multiReduce | ( | doubleN | result[], |
const coeff_array< T > & | a, | ||
const coeff_array< T > & | b, | ||
const coeff_array< T > & | c, | ||
std::vector< ColorSpinorField *> & | x, | ||
std::vector< ColorSpinorField *> & | y, | ||
std::vector< ColorSpinorField *> & | z, | ||
std::vector< ColorSpinorField *> & | w, | ||
int | length | ||
) |
Definition at line 275 of file multi_reduce_quda.cu.
References Amatrix_d, Amatrix_h, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::apply(), Bmatrix_d, Bmatrix_h, bytes, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::bytes(), checkCudaError, quda::checkSpinor(), Cmatrix_d, Cmatrix_h, quda::blas::coeff_array< T >::data, errorQuda, flops, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::flops(), getStream(), MAX_MATRIX_SIZE, MAX_MULTI_BLAS_N, memset(), QUDA_MAX_MULTI_REDUCE, quda::reduce(), SpinorTexture< RegType, StoreType, N >::set(), Spinor< RegType, StoreType, N, write >::set(), quda::blas::coeff_array< T >::use_const, quda::blas::write< writeX, writeY, writeZ, writeW >::W, warningQuda, quda::blas::write< writeX, writeY, writeZ, writeW >::X, quda::blas::write< writeX, writeY, writeZ, writeW >::Y, and quda::blas::write< writeX, writeY, writeZ, writeW >::Z.
void quda::blas::multiReduce | ( | doubleN | result[], |
const coeff_array< T > & | a, | ||
const coeff_array< T > & | b, | ||
const coeff_array< T > & | c, | ||
CompositeColorSpinorField & | x, | ||
CompositeColorSpinorField & | y, | ||
CompositeColorSpinorField & | z, | ||
CompositeColorSpinorField & | w | ||
) |
Driver for multi-reduce with up to four vectors
Definition at line 385 of file multi_reduce_quda.cu.
References checkPrecision, errorQuda, Nspin, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.
void quda::blas::multiReduce | ( | doubleN | result[], |
const coeff_array< T > & | a, | ||
const coeff_array< T > & | b, | ||
const coeff_array< T > & | c, | ||
CompositeColorSpinorField & | x, | ||
CompositeColorSpinorField & | y, | ||
CompositeColorSpinorField & | z, | ||
CompositeColorSpinorField & | w, | ||
int | i, | ||
int | j | ||
) |
Definition at line 598 of file multi_reduce_quda.cu.
void quda::blas::multiReduce_recurse | ( | Complex * | result, |
std::vector< ColorSpinorField *> & | x, | ||
std::vector< ColorSpinorField *> & | y, | ||
std::vector< ColorSpinorField *> & | z, | ||
std::vector< ColorSpinorField *> & | w, | ||
int | i_idx, | ||
int | j_idx, | ||
bool | hermitian, | ||
unsigned int | tile_size | ||
) |
Definition at line 706 of file multi_reduce_quda.cu.
References quda::count.
__global__ void quda::blas::multiReduceKernel | ( | Arg | arg_ | ) |
Definition at line 79 of file multi_reduce_core.cuh.
References quda::arg(), parity, and quda::sum().
Referenced by multiReduceLaunch().
void quda::blas::multiReduceLaunch | ( | doubleN | result[], |
Arg & | arg, | ||
const TuneParam & | tp, | ||
const cudaStream_t & | stream, | ||
Tunable & | tunable | ||
) |
Definition at line 36 of file multi_reduce_quda.cu.
References arg_buffer, quda::TuneParam::block, commAsyncReduction(), completeFastReduce(), deviceProp, errorQuda, getFastReduce(), getHostReduceBuffer(), getMappedHostReduceBuffer(), getReduceEvent(), getStream(), quda::TuneParam::grid, initFastReduce(), quda::Tunable::jitifyError(), LAUNCH_KERNEL_LOCAL_PARITY, multiReduceKernel(), quda::qudaEventQuery(), quda::qudaEventRecord(), qudaMemcpy, quda::TuneParam::shared_bytes, and quda::sum().
|
inline |
Definition at line 34 of file blas_quda.h.
References axpbyz().
Referenced by benchmark(), invert_test(), main(), and test().
void quda::blas::nativeBlas | ( | const double2 & | a, |
const double2 & | b, | ||
const double2 & | c, | ||
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z, | ||
ColorSpinorField & | w, | ||
ColorSpinorField & | v, | ||
int | length | ||
) |
Definition at line 149 of file blas_quda.cu.
References quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::apply(), bytes, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::bytes(), checkCudaError, quda::checkLength(), flops, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::flops(), V, X, and Z.
doubleN quda::blas::nativeReduce | ( | const double2 & | a, |
const double2 & | b, | ||
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z, | ||
ColorSpinorField & | w, | ||
ColorSpinorField & | v, | ||
int | length | ||
) |
Definition at line 297 of file reduce_quda.cu.
References bytes, checkCudaError, quda::checkLength(), flops, getStream(), quda::reduce(), V, X, and Z.
Referenced by mixed_reduce(), and uni_reduce().
double quda::blas::norm1 | ( | const ColorSpinorField & | b | ) |
Definition at line 714 of file reduce_quda.cu.
Referenced by getLambdaMax(), getRealBidiagMatrix(), quda::norm1(), and xpayz().
__device__ __host__ ReduceType quda::blas::norm1_ | ( | const double2 & | a | ) |
Return the L1 norm of x
Definition at line 100 of file reduce_core.cuh.
References quda::sqrt().
__device__ __host__ ReduceType quda::blas::norm1_ | ( | const float2 & | a | ) |
Definition at line 105 of file reduce_core.cuh.
References quda::sqrt().
__device__ __host__ ReduceType quda::blas::norm1_ | ( | const float4 & | a | ) |
Definition at line 110 of file reduce_core.cuh.
References quda::sqrt().
double quda::blas::norm2 | ( | const ColorSpinorField & | a | ) |
Definition at line 721 of file reduce_quda.cu.
Referenced by benchmark(), quda::CG::blocksolve(), cloverQuda(), quda::EigenSolver::computeEvals(), quda::EigenSolver::computeSVD(), dslashQuda(), dslashQuda_4dpc(), dslashQuda_mdwf(), dslashTest(), quda::IncEigCG::eigCGsolve(), quda::Deflation::increment(), init(), invertMultiShiftQuda(), invertMultiSrcQuda(), invertQuda(), quda::TRLM::lanczosStep(), main(), quda::massRescale(), MatDagMatQuda(), MatQuda(), quda::norm2(), quda::TRLM::operator()(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::CGNE::operator()(), quda::CGNR::operator()(), quda::MPCG::operator()(), quda::BiCGstab::operator()(), quda::MPBiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::CACG::operator()(), quda::CACGNE::operator()(), quda::CACGNR::operator()(), quda::CAGCR::operator()(), quda::MultiShiftCG::operator()(), quda::MinResExt::operator()(), packTest(), performWuppertalnStep(), quda::GMResDR::RestartVZH(), test(), TEST_P(), and xpayz().
__device__ __host__ void quda::blas::norm2_ | ( | ReduceType & | sum, |
const double2 & | a | ||
) |
Return the L2 norm of x
Definition at line 129 of file reduce_core.cuh.
__device__ __host__ void quda::blas::norm2_ | ( | ReduceType & | sum, |
const float2 & | a | ||
) |
Definition at line 135 of file reduce_core.cuh.
__device__ __host__ void quda::blas::norm2_ | ( | ReduceType & | sum, |
const float4 & | a | ||
) |
Definition at line 141 of file reduce_core.cuh.
double quda::blas::quadrupleCG3InitNorm | ( | double | a, |
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z, | ||
ColorSpinorField & | w, | ||
ColorSpinorField & | v | ||
) |
Definition at line 838 of file reduce_quda.cu.
Referenced by cDotProductNormB(), and quda::CG3::operator()().
double quda::blas::quadrupleCG3UpdateNorm | ( | double | a, |
double | b, | ||
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z, | ||
ColorSpinorField & | w, | ||
ColorSpinorField & | v | ||
) |
Definition at line 843 of file reduce_quda.cu.
Referenced by cDotProductNormB(), and quda::CG3::operator()().
double4 quda::blas::quadrupleCGReduction | ( | ColorSpinorField & | x, |
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
Definition at line 833 of file reduce_quda.cu.
Referenced by cDotProductNormB(), and quda::CG::operator()().
double quda::blas::reDotProduct | ( | ColorSpinorField & | x, |
ColorSpinorField & | y | ||
) |
Definition at line 728 of file reduce_quda.cu.
Referenced by benchmark(), quda::CG::blocksolve(), cDotProductNormB(), quda::MPCG::computeMatrixPowers(), quda::IncEigCG::eigCGsolve(), quda::TRLM::lanczosStep(), quda::CG::operator()(), quda::CG3::operator()(), quda::PreconCG::operator()(), quda::MultiShiftCG::operator()(), test(), and xpayz().
void quda::blas::reDotProduct | ( | double * | result, |
std::vector< ColorSpinorField *> & | a, | ||
std::vector< ColorSpinorField *> & | b | ||
) |
Definition at line 622 of file multi_reduce_quda.cu.
References errorQuda, and reduceDoubleArray().
__global__ void quda::blas::reduceKernel | ( | Arg | arg | ) |
Generic reduction kernel with up to four loads and three saves.
Definition at line 44 of file reduce_core.cuh.
References quda::arg(), parity, quda::sum(), and zero().
Referenced by reduceLaunch().
doubleN quda::blas::reduceLaunch | ( | Arg & | arg, |
const TuneParam & | tp, | ||
const cudaStream_t & | stream, | ||
Tunable & | tunable | ||
) |
Generic reduction kernel launcher
Definition at line 139 of file reduce_quda.cu.
References quda::TuneParam::block, commAsyncReduction(), completeFastReduce(), deviceProp, errorQuda, getFastReduce(), quda::TuneParam::grid, h_reduce, hd_reduce, initFastReduce(), quda::Tunable::jitifyError(), LAUNCH_KERNEL, quda::qudaEventQuery(), quda::qudaEventRecord(), qudaMemcpy, reduceEnd, reduceKernel(), quda::TuneParam::shared_bytes, and quda::sum().
void quda::blas::setParam | ( | int | kernel, |
int | prec, | ||
int | threads, | ||
int | blocks | ||
) |
double3 quda::blas::tripleCGReduction | ( | ColorSpinorField & | x, |
ColorSpinorField & | y, | ||
ColorSpinorField & | z | ||
) |
Definition at line 828 of file reduce_quda.cu.
Referenced by benchmark(), cDotProductNormB(), quda::CG::operator()(), and test().
void quda::blas::tripleCGUpdate | ( | double | alpha, |
double | beta, | ||
ColorSpinorField & | q, | ||
ColorSpinorField & | r, | ||
ColorSpinorField & | x, | ||
ColorSpinorField & | p | ||
) |
Definition at line 614 of file blas_quda.cu.
References quda::LatticeField::Precision(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::w, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.
Referenced by benchmark(), quda::CG::operator()(), test(), and xpayz().
void quda::blas::uni_blas | ( | const double2 & | a, |
const double2 & | b, | ||
const double2 & | c, | ||
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z, | ||
ColorSpinorField & | w, | ||
ColorSpinorField & | v | ||
) |
Driver for generic blas routine with four loads and two store. All fields must have matching precisions.
Definition at line 185 of file blas_quda.cu.
References checkLocation, checkPrecision, errorQuda, quda::ColorSpinorField::FieldOrder(), quda::ColorSpinorField::isNative(), quda::ColorSpinorField::Length(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_FIELD_ORDER, QUDA_FLOAT4_FIELD_ORDER, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::v, quda::ColorSpinorField::Volume(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::w, warningQuda, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.
doubleN quda::blas::uni_reduce | ( | const double2 & | a, |
const double2 & | b, | ||
ColorSpinorField & | x, | ||
ColorSpinorField & | y, | ||
ColorSpinorField & | z, | ||
ColorSpinorField & | w, | ||
ColorSpinorField & | v | ||
) |
Driver for generic reduction routine with five loads.
ReduceType | |
siteUnroll | - if this is true, then one site corresponds to exactly one thread |
Definition at line 349 of file reduce_quda.cu.
References checkLocation, checkPrecision, errorQuda, quda::ColorSpinorField::FieldOrder(), genericReduce(), quda::ColorSpinorField::isNative(), quda::ColorSpinorField::Length(), nativeReduce(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_FIELD_ORDER, QUDA_FLOAT4_FIELD_ORDER, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, quda::ColorSpinorField::RealLength(), reduceDoubleArray(), quda::ColorSpinorField::Volume(), warningQuda, and zero().
|
inline |
Definition at line 75 of file blas_quda.h.
References axpbyzNorm(), cDotProduct(), and cDotProductNormA().
Referenced by benchmark(), quda::CG::blocksolve(), quda::IncEigCG::eigCGsolve(), quda::MG::operator()(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::CGNE::operator()(), quda::MPCG::operator()(), quda::PreconCG::operator()(), quda::BiCGstab::operator()(), quda::MPBiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::CACG::operator()(), quda::CACGNE::operator()(), quda::CAGCR::operator()(), quda::SD::operator()(), quda::MultiShiftCG::operator()(), quda::IncEigCG::operator()(), quda::GMResDR::operator()(), test(), and quda::MG::verify().
|
inline |
Definition at line 37 of file blas_quda.h.
References axpbyz().
Referenced by quda::ApplyNdegTwistedMassPreconditioned(), quda::ApplyTwistedCloverPreconditioned(), quda::ApplyTwistedMassPreconditioned(), benchmark(), quda::CG::blocksolve(), clover_mat(), clover_matpc(), quda::dslash5(), quda::dslash5inv(), quda::DiracCoarsePC::DslashXpay(), dw_4d_mat(), dw_4d_matpc(), dw_mat(), dw_matpc(), quda::IncEigCG::initCGsolve(), quda::Dslash< Float >::instantiate(), invert_test(), quda::TwistedMassLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::WilsonCloverPreconditionedLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::WilsonCloverLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::NdegTwistedMassLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::TwistedCloverLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::TwistedCloverPreconditionedLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::Dslash< Float >::launch(), quda::DiracDomainWall4D::M(), quda::DiracCoarsePC::M(), quda::PreconCG::operator()(), quda::CACG::operator()(), quda::CAGCR::operator()(), quda::MultiShiftCG::operator()(), quda::DiracCoarsePC::prepare(), quda::DiracCoarsePC::reconstruct(), quda::Deflation::reduce(), quda::IncEigCG::RestartVT(), staggeredDslashRef(), test(), tm_mat(), tm_matpc(), tm_ndeg_mat(), tm_ndeg_matpc(), tmc_mat(), tmc_matpc(), quda::Deflation::verify(), wil_mat(), and wil_matpc().
|
inline |
Definition at line 38 of file blas_quda.h.
References axpbyz(), axpbyzNorm(), axpyBzpcx(), axpyReDot(), axpyZpbx(), cabxpyAx(), caxpby(), caxpbypczw(), caxpbypzYmbw(), caxpy(), caxpyBxpz(), caxpyBzpx(), caxpyXmaz(), caxpyXmazMR(), cxpaypbz(), doubleCG3Init(), doubleCG3Update(), norm1(), norm2(), reDotProduct(), and tripleCGUpdate().
Referenced by quda::CG::operator()().
|
inline |
Definition at line 33 of file blas_quda.h.
References axpbyz().
Referenced by benchmark(), quda::CG::blocksolve(), quda::MG::buildFreeVectors(), quda::IncEigCG::eigCGsolve(), quda::MG::operator()(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::CGNE::operator()(), quda::PreconCG::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::CACGNE::operator()(), quda::MultiShiftCG::operator()(), quda::IncEigCG::operator()(), quda::GMResDR::operator()(), and test().
double3 quda::blas::xpyHeavyQuarkResidualNorm | ( | ColorSpinorField & | x, |
ColorSpinorField & | y, | ||
ColorSpinorField & | r | ||
) |
Definition at line 818 of file reduce_quda.cu.
References comm_size(), and quda::ColorSpinorField::Ncolor().
Referenced by benchmark(), quda::CG::blocksolve(), cDotProductNormB(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), and test().
void quda::blas::zero | ( | ColorSpinorField & | a | ) |
Definition at line 472 of file blas_quda.cu.
References endReduce(), and initReduce().
Referenced by quda::CG::blocksolve(), quda::EigenSolver::deflate(), quda::EigenSolver::deflateSVD(), quda::IncEigCG::eigCGsolve(), genericReduce(), invertQuda(), quda::TRLM::lanczosStep(), mixed_reduce(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::CGNE::operator()(), quda::CGNR::operator()(), quda::MPCG::operator()(), quda::BiCGstab::operator()(), quda::MPBiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::CACG::operator()(), quda::CACGNE::operator()(), quda::CACGNR::operator()(), quda::CAGCR::operator()(), quda::MultiShiftCG::operator()(), quda::MinResExt::operator()(), quda::IncEigCG::operator()(), quda::GMResDR::operator()(), quda::Deflation::reduce(), reduceKernel(), quda::IncEigCG::RestartVT(), quda::GMResDR::RestartVZH(), and uni_reduce().
|
static |
Definition at line 17 of file multi_blas_core.cuh.
Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), multiReduce(), quda::blas::multicaxpy_< NXZ, Float2, FloatN >::operator()(), quda::blas::multicaxpyz_< NXZ, Float2, FloatN >::operator()(), and quda::blas::multi_caxpyBxpz_< NXZ, Float2, FloatN >::operator()().
|
static |
Definition at line 20 of file multi_reduce_core.cuh.
|
static |
Definition at line 21 of file multi_blas_core.cuh.
Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::MultiBlas(), multiReduce(), quda::blas::multicaxpy_< NXZ, Float2, FloatN >::operator()(), quda::blas::multicaxpyz_< NXZ, Float2, FloatN >::operator()(), and quda::blas::multi_caxpyBxpz_< NXZ, Float2, FloatN >::operator()().
|
static |
Definition at line 24 of file multi_reduce_core.cuh.
|
static |
Definition at line 29 of file multi_blas_core.cuh.
Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), and multiReduceLaunch().
|
static |
Definition at line 32 of file multi_reduce_core.cuh.
|
static |
Definition at line 25 of file blas_quda.cu.
Referenced by getStream().
|
static |
Definition at line 18 of file multi_blas_core.cuh.
Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), multiReduce(), and quda::blas::multi_caxpyBxpz_< NXZ, Float2, FloatN >::operator()().
|
static |
Definition at line 21 of file multi_reduce_core.cuh.
|
static |
Definition at line 22 of file multi_blas_core.cuh.
Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::MultiBlas(), multiReduce(), and quda::blas::multi_caxpyBxpz_< NXZ, Float2, FloatN >::operator()().
|
static |
Definition at line 25 of file multi_reduce_core.cuh.
unsigned long long quda::blas::bytes |
Definition at line 23 of file blas_quda.cu.
Referenced by Spinor< RegType, StoreType, N, write >::backup(), quda::DomainWall5D< Float, nDim, nColor, Arg >::bytes(), quda::TwistedClover< Float, nDim, nColor, Arg >::bytes(), quda::WilsonClover< Float, nDim, nColor, Arg >::bytes(), quda::TwistedCloverPreconditioned< Float, nDim, nColor, Arg >::bytes(), quda::WilsonCloverPreconditioned< Float, nDim, nColor, Arg >::bytes(), quda::CloverField::Bytes(), quda::GaugeField::Bytes(), quda::clover::FloatNOrder< Float, length, N, add_rho, huge_alloc >::Bytes(), quda::computeMomAction(), quda::blas::copy_ns::copy(), quda::forceRecord(), initReduce(), quda::isUnitary(), mixed_reduce(), multiBlas(), multiReduce(), nativeBlas(), nativeReduce(), quda::Tunable::perfString(), TEST_P(), and quda::updateMomentum().
|
static |
Definition at line 19 of file multi_blas_core.cuh.
Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), and multiReduce().
|
static |
Definition at line 22 of file multi_reduce_core.cuh.
|
static |
Definition at line 23 of file multi_blas_core.cuh.
Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::MultiBlas(), and multiReduce().
|
static |
Definition at line 26 of file multi_reduce_core.cuh.
unsigned long long quda::blas::flops |
Definition at line 22 of file blas_quda.cu.
Referenced by quda::cublas::BatchInvertMatrix(), quda::CG::blocksolve(), quda::completeKSForce(), quda::computeMomAction(), quda::CACG::create(), quda::CAGCR::create(), dslashTest(), quda::IncEigCG::eigCGsolve(), quda::DomainWall5D< Float, nDim, nColor, Arg >::flops(), quda::TwistedMass< Float, nDim, nColor, Arg >::flops(), quda::TwistedClover< Float, nDim, nColor, Arg >::flops(), quda::WilsonClover< Float, nDim, nColor, Arg >::flops(), quda::NdegTwistedMass< Float, nDim, nColor, Arg >::flops(), quda::TwistedCloverPreconditioned< Float, nDim, nColor, Arg >::flops(), quda::WilsonCloverPreconditioned< Float, nDim, nColor, Arg >::flops(), quda::TwistedMassPreconditioned< Float, nDim, nColor, Arg >::flops(), quda::NdegTwistedMassPreconditioned< Float, nDim, nColor, Arg >::flops(), quda::Dirac::Flops(), quda::forceRecord(), gauge_force_test(), hisq_test(), quda::isUnitary(), llfat_test(), main(), multiBlas(), multiReduce(), nativeBlas(), nativeReduce(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::PreconCG::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::CACG::operator()(), quda::CAGCR::operator()(), quda::MultiShiftCG::operator()(), quda::GMResDR::operator()(), quda::Tunable::perfString(), quda::Deflation::size(), TEST_P(), and quda::updateMomentum().