QUDA  0.9.0
Classes | Namespaces | Macros | Functions | Variables
multi_blas_core.cuh File Reference
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  MultiBlasArg< NXZ, SpinorX, SpinorY, SpinorZ, SpinorW, Functor >
 Parameter struct for generic multi-blas kernel. More...
 
struct  detail::to_chars< digits >
 
struct  detail::explode< rem, digits >
 
struct  detail::explode< 0, digits... >
 
struct  num_to_string< num >
 
class  MultiBlasCuda< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor >
 
struct  coeff_array< T >
 

Namespaces

 detail
 

Macros

#define MAX_MATRIX_SIZE   4096
 

Functions

template<int k, int NXZ, typename FloatN , int M, typename Arg >
__device__ void compute (Arg &arg, int idx, int parity)
 
template<typename FloatN , int M, int NXZ, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Functor >
__global__ void multiblasKernel (MultiBlasArg< NXZ, SpinorX, SpinorY, SpinorZ, SpinorW, Functor > arg)
 Generic multi-blas kernel with four loads and up to four stores. More...
 
template<int NXZ, typename RegType , typename StoreType , typename yType , int M, template< int, typename, typename > class Functor, typename write , typename T >
void multiblasCuda (const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, std::vector< ColorSpinorField *> &w, int length)
 
template<typename Float2 , typename write , typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Functor >
void genericMultiBlas (SpinorX &X, SpinorY &Y, SpinorZ &Z, SpinorW &W, Functor f)
 
template<typename Float , typename yFloat , int nSpin, int nColor, QudaFieldOrder order, typename write , typename Functor >
void genericMultiBlas (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, Functor f)
 
template<typename Float , typename yFloat , int nSpin, QudaFieldOrder order, typename write , typename Functor >
void genericMultiBlas (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, Functor f)
 
template<typename Float , typename yFloat , QudaFieldOrder order, typename write , typename Functor >
void genericMultiBlas (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, Functor f)
 
template<typename Float , typename yFloat , typename write , typename Functor >
void genericMultiBlas (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, Functor f)
 

Variables

static __constant__ signed char Amatrix_d [MAX_MATRIX_SIZE]
 
static __constant__ signed char Bmatrix_d [MAX_MATRIX_SIZE]
 
static __constant__ signed char Cmatrix_d [MAX_MATRIX_SIZE]
 
static signed char * Amatrix_h
 
static signed char * Bmatrix_h
 
static signed char * Cmatrix_h
 

Macro Definition Documentation

◆ MAX_MATRIX_SIZE

#define MAX_MATRIX_SIZE   4096

Definition at line 38 of file multi_blas_core.cuh.

Referenced by multiblasCuda().

Function Documentation

◆ compute()

template<int k, int NXZ, typename FloatN , int M, typename Arg >
__device__ void compute ( Arg &  arg,
int  idx,
int  parity 
)
inline

Definition at line 48 of file multi_blas_core.cuh.

References quda::arg(), blockDim, gridDim, idx, parity, w, x, y, and z.

Here is the call graph for this function:

◆ genericMultiBlas() [1/5]

template<typename Float2 , typename write , typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Functor >
void genericMultiBlas ( SpinorX &  X,
SpinorY &  Y,
SpinorZ &  Z,
SpinorW &  W,
Functor  f 
)

Generic blas kernel with four loads and up to four stores. FIXME - this is hacky due to the lack of std::complex support in CUDA. The functors are defined in terms of FloatN vectors, whereas the operator() accessor returns std::complex<Float>

Definition at line 351 of file multi_blas_core.cuh.

References c, errorQuda, f, quda::make_Complex(), parity, s, X, x, and Z.

Here is the call graph for this function:

◆ genericMultiBlas() [2/5]

template<typename Float , typename yFloat , int nSpin, int nColor, QudaFieldOrder order, typename write , typename Functor >
void genericMultiBlas ( ColorSpinorField &  x,
ColorSpinorField &  y,
ColorSpinorField &  z,
ColorSpinorField &  w,
Functor  f 
)

Definition at line 375 of file multi_blas_core.cuh.

References f, w, X, x, y, Z, and z.

◆ genericMultiBlas() [3/5]

template<typename Float , typename yFloat , int nSpin, QudaFieldOrder order, typename write , typename Functor >
void genericMultiBlas ( ColorSpinorField &  x,
ColorSpinorField &  y,
ColorSpinorField &  z,
ColorSpinorField &  w,
Functor  f 
)

Definition at line 385 of file multi_blas_core.cuh.

References errorQuda, f, w, x, y, and z.

◆ genericMultiBlas() [4/5]

template<typename Float , typename yFloat , QudaFieldOrder order, typename write , typename Functor >
void genericMultiBlas ( ColorSpinorField &  x,
ColorSpinorField &  y,
ColorSpinorField &  z,
ColorSpinorField &  w,
Functor  f 
)

Definition at line 410 of file multi_blas_core.cuh.

References errorQuda, f, w, x, y, and z.

◆ genericMultiBlas() [5/5]

template<typename Float , typename yFloat , typename write , typename Functor >
void genericMultiBlas ( ColorSpinorField &  x,
ColorSpinorField &  y,
ColorSpinorField &  z,
ColorSpinorField &  w,
Functor  f 
)

Definition at line 425 of file multi_blas_core.cuh.

References errorQuda, f, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER, w, x, y, and z.

◆ multiblasCuda()

template<int NXZ, typename RegType , typename StoreType , typename yType , int M, template< int, typename, typename > class Functor, typename write , typename T >
void multiblasCuda ( const coeff_array< T > &  a,
const coeff_array< T > &  b,
const coeff_array< T > &  c,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
std::vector< ColorSpinorField *> &  z,
std::vector< ColorSpinorField *> &  w,
int  length 
)

◆ multiblasKernel()

template<typename FloatN , int M, int NXZ, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Functor >
__global__ void multiblasKernel ( MultiBlasArg< NXZ, SpinorX, SpinorY, SpinorZ, SpinorW, Functor >  arg)

Generic multi-blas kernel with four loads and up to four stores.

Parameters
[in,out]argArgument struct with required meta data (input/output fields, functor, etc.)

Definition at line 78 of file multi_blas_core.cuh.

References quda::arg(), blockDim, fused_exterior_ndeg_tm_dslash_cuda_gen::i, and parity.

Here is the call graph for this function:

Variable Documentation

◆ Amatrix_d

__constant__ signed char Amatrix_d[MAX_MATRIX_SIZE]
static

◆ Amatrix_h

signed char* Amatrix_h
static

◆ Bmatrix_d

__constant__ signed char Bmatrix_d[MAX_MATRIX_SIZE]
static

◆ Bmatrix_h

signed char* Bmatrix_h
static

◆ Cmatrix_d

__constant__ signed char Cmatrix_d[MAX_MATRIX_SIZE]
static

Definition at line 41 of file multi_blas_core.cuh.

Referenced by multiblasCuda().

◆ Cmatrix_h

signed char* Cmatrix_h
static

Definition at line 45 of file multi_blas_core.cuh.

Referenced by multiblasCuda().