|
| template<int k, int NXZ, typename FloatN , int M, typename Arg > |
| __device__ void | compute (Arg &arg, int idx, int parity) |
| |
| template<typename FloatN , int M, int NXZ, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Functor > |
| __global__ void | multiblasKernel (MultiBlasArg< NXZ, SpinorX, SpinorY, SpinorZ, SpinorW, Functor > arg) |
| | Generic multi-blas kernel with four loads and up to four stores. More...
|
| |
| template<int NXZ, typename RegType , typename StoreType , typename yType , int M, template< int, typename, typename > class Functor, typename write , typename T > |
| void | multiblasCuda (const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, std::vector< ColorSpinorField *> &w, int length) |
| |
| template<typename Float2 , typename write , typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Functor > |
| void | genericMultiBlas (SpinorX &X, SpinorY &Y, SpinorZ &Z, SpinorW &W, Functor f) |
| |
| template<typename Float , typename yFloat , int nSpin, int nColor, QudaFieldOrder order, typename write , typename Functor > |
| void | genericMultiBlas (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, Functor f) |
| |
| template<typename Float , typename yFloat , int nSpin, QudaFieldOrder order, typename write , typename Functor > |
| void | genericMultiBlas (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, Functor f) |
| |
| template<typename Float , typename yFloat , QudaFieldOrder order, typename write , typename Functor > |
| void | genericMultiBlas (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, Functor f) |
| |
| template<typename Float , typename yFloat , typename write , typename Functor > |
| void | genericMultiBlas (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, Functor f) |
| |
template<typename Float2 , typename write , typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Functor >
| void genericMultiBlas |
( |
SpinorX & |
X, |
|
|
SpinorY & |
Y, |
|
|
SpinorZ & |
Z, |
|
|
SpinorW & |
W, |
|
|
Functor |
f |
|
) |
| |
Generic blas kernel with four loads and up to four stores. FIXME - this is hacky due to the lack of std::complex support in CUDA. The functors are defined in terms of FloatN vectors, whereas the operator() accessor returns std::complex<Float>
Definition at line 351 of file multi_blas_core.cuh.
References c, errorQuda, f, quda::make_Complex(), parity, s, X, x, and Z.
template<typename Float , typename yFloat , int nSpin, int nColor, QudaFieldOrder order, typename write , typename Functor >
| void genericMultiBlas |
( |
ColorSpinorField & |
x, |
|
|
ColorSpinorField & |
y, |
|
|
ColorSpinorField & |
z, |
|
|
ColorSpinorField & |
w, |
|
|
Functor |
f |
|
) |
| |
template<typename Float , typename yFloat , int nSpin, QudaFieldOrder order, typename write , typename Functor >
| void genericMultiBlas |
( |
ColorSpinorField & |
x, |
|
|
ColorSpinorField & |
y, |
|
|
ColorSpinorField & |
z, |
|
|
ColorSpinorField & |
w, |
|
|
Functor |
f |
|
) |
| |
template<typename Float , typename yFloat , QudaFieldOrder order, typename write , typename Functor >
| void genericMultiBlas |
( |
ColorSpinorField & |
x, |
|
|
ColorSpinorField & |
y, |
|
|
ColorSpinorField & |
z, |
|
|
ColorSpinorField & |
w, |
|
|
Functor |
f |
|
) |
| |
template<int NXZ, typename RegType , typename StoreType , typename yType , int M, template< int, typename, typename > class Functor, typename write , typename T >
| void multiblasCuda |
( |
const coeff_array< T > & |
a, |
|
|
const coeff_array< T > & |
b, |
|
|
const coeff_array< T > & |
c, |
|
|
std::vector< ColorSpinorField *> & |
x, |
|
|
std::vector< ColorSpinorField *> & |
y, |
|
|
std::vector< ColorSpinorField *> & |
z, |
|
|
std::vector< ColorSpinorField *> & |
w, |
|
|
int |
length |
|
) |
| |
Definition at line 250 of file multi_blas_core.cuh.
References a, Amatrix_d, Amatrix_h, MultiBlasCuda< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor >::apply(), b, quda::blas::blasStrings, Bmatrix_d, Bmatrix_h, quda::blas::bytes, MultiBlasCuda< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor >::bytes(), c, checkCudaError, Cmatrix_d, Cmatrix_h, errorQuda, f, quda::blas::flops, MultiBlasCuda< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor >::flops(), quda::blas::getStream(), fused_exterior_ndeg_tm_dslash_cuda_gen::i, length, MAX_MATRIX_SIZE, MAX_MULTI_BLAS_N, strcat(), strcpy(), w, X, x, y, Z, and z.
template<typename FloatN , int M, int NXZ, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Functor >
| __global__ void multiblasKernel |
( |
MultiBlasArg< NXZ, SpinorX, SpinorY, SpinorZ, SpinorW, Functor > |
arg | ) |
|