|
cudaStream_t * | quda::blas::getStream () |
|
cudaEvent_t * | quda::blas::getReduceEvent () |
|
bool | quda::blas::getFastReduce () |
|
void | quda::blas::initFastReduce (int words) |
|
void | quda::blas::completeFastReduce (int32_t words) |
|
template<typename doubleN , typename ReduceType , typename FloatN , int M, int NXZ, typename Arg > |
void | quda::blas::multiReduceLaunch (doubleN result[], Arg &arg, const TuneParam &tp, const cudaStream_t &stream, Tunable &tunable) |
|
template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename yType , int M, int NXZ, template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , typename T > |
void | quda::blas::multiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, std::vector< ColorSpinorField *> &w, int length) |
|
template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T > |
void | quda::blas::multiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w) |
|
template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T > |
void | quda::blas::mixedMultiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w) |
|
template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal , bool siteUnroll, typename T > |
void | quda::blas::multiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w, int i, int j) |
|
void | quda::blas::reDotProduct (double *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b) |
|
template<template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal > |
void | quda::blas::multiReduce_recurse (Complex *result, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, std::vector< ColorSpinorField *> &w, int i_idx, int j_idx, bool hermitian, unsigned int tile_size) |
|
void | quda::blas::cDotProduct (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b) |
| Computes the matrix of inner products between the vector set a and the vector set b. More...
|
|
void | quda::blas::hDotProduct (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b) |
| Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Requires a.size()==b.size(). More...
|
|
void | quda::blas::hDotProduct_Anorm (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b) |
| Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Uniquely defined for cases like (p, Ap) where the output is Hermitian, but there's an A-norm instead of an L2 norm. Requires a.size()==b.size(). More...
|
|
void | quda::blas::cDotProductCopy (Complex *result, std::vector< ColorSpinorField *> &a, std::vector< ColorSpinorField *> &b, std::vector< ColorSpinorField *> &c) |
| Computes the matrix of inner products between the vector set a and the vector set b, and copies b into c. More...
|
|