QUDA  0.9.0
Classes | Namespaces | Macros | Functions | Variables
multi_reduce_core.cuh File Reference
#include <launch_kernel.cuh>
Include dependency graph for multi_reduce_core.cuh:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  MultiReduceArg< NXZ, ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >
 Parameter struct for generic multi-blas kernel. More...
 
struct  detail::to_chars< digits >
 
struct  detail::explode< rem, digits >
 
struct  detail::explode< 0, digits... >
 
struct  num_to_string< num >
 
class  MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >
 
struct  coeff_array< T >
 

Namespaces

 detail
 

Macros

#define MAX_MATRIX_SIZE   4096
 

Functions

__host__ __device__ double set (double &x)
 
__host__ __device__ double2 set (double2 &x)
 
__host__ __device__ double3 set (double3 &x)
 
__host__ __device__ void sum (double &a, double &b)
 
__host__ __device__ void sum (double2 &a, double2 &b)
 
__host__ __device__ void sum (double3 &a, double3 &b)
 
template<int k, int NXZ, typename FloatN , int M, typename ReduceType , typename Arg >
__device__ void compute (vector_type< ReduceType, NXZ > &sum, Arg &arg, int idx, int parity)
 
template<int block_size, typename ReduceType , typename FloatN , int M, int NXZ, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Reducer >
__global__ void multiReduceKernel (MultiReduceArg< NXZ, ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer > arg)
 
template<typename doubleN , typename ReduceType , typename FloatN , int M, int NXZ, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Reducer >
void multiReduceLaunch (doubleN result[], MultiReduceArg< NXZ, ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer > &arg, const TuneParam &tp, const cudaStream_t &stream)
 
template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename yType , int M, int NXZ, template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , typename T >
void multiReduceCuda (doubleN result[], const reduce::coeff_array< T > &a, const reduce::coeff_array< T > &b, const reduce::coeff_array< T > &c, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, std::vector< ColorSpinorField *> &w, int length)
 

Variables

static __device__ unsigned int count = 0
 
static __shared__ bool isLastBlockDone
 
static __constant__ signed char Amatrix_d [MAX_MATRIX_SIZE]
 
static __constant__ signed char Bmatrix_d [MAX_MATRIX_SIZE]
 
static __constant__ signed char Cmatrix_d [MAX_MATRIX_SIZE]
 
static signed char * Amatrix_h
 
static signed char * Bmatrix_h
 
static signed char * Cmatrix_h
 

Macro Definition Documentation

◆ MAX_MATRIX_SIZE

#define MAX_MATRIX_SIZE   4096

Definition at line 65 of file multi_reduce_core.cuh.

Referenced by multiReduceCuda().

Function Documentation

◆ compute()

template<int k, int NXZ, typename FloatN , int M, typename ReduceType , typename Arg >
__device__ void compute ( vector_type< ReduceType, NXZ > &  sum,
Arg &  arg,
int  idx,
int  parity 
)
inline

Definition at line 76 of file multi_reduce_core.cuh.

References quda::arg(), blockDim, gridDim, idx, parity, sum(), w, x, y, and z.

Here is the call graph for this function:

◆ multiReduceCuda()

template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename yType , int M, int NXZ, template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , typename T >
void multiReduceCuda ( doubleN  result[],
const reduce::coeff_array< T > &  a,
const reduce::coeff_array< T > &  b,
const reduce::coeff_array< T > &  c,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  y,
std::vector< ColorSpinorField *> &  z,
std::vector< ColorSpinorField *> &  w,
int  length 
)

◆ multiReduceKernel()

template<int block_size, typename ReduceType , typename FloatN , int M, int NXZ, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Reducer >
__global__ void multiReduceKernel ( MultiReduceArg< NXZ, ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >  arg)

Definition at line 121 of file multi_reduce_core.cuh.

References quda::arg(), blockDim, fused_exterior_ndeg_tm_dslash_cuda_gen::i, parity, and sum().

Referenced by multiReduceLaunch().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ multiReduceLaunch()

template<typename doubleN , typename ReduceType , typename FloatN , int M, int NXZ, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Reducer >
void multiReduceLaunch ( doubleN  result[],
MultiReduceArg< NXZ, ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer > &  arg,
const TuneParam &  tp,
const cudaStream_t &  stream 
)

◆ set() [1/3]

__host__ __device__ double set ( double x)
inline

Definition at line 1 of file multi_reduce_core.cuh.

References x.

◆ set() [2/3]

__host__ __device__ double2 set ( double2 &  x)
inline

Definition at line 2 of file multi_reduce_core.cuh.

References x.

◆ set() [3/3]

__host__ __device__ double3 set ( double3 &  x)
inline

Definition at line 3 of file multi_reduce_core.cuh.

References x.

◆ sum() [1/3]

__host__ __device__ void sum ( double a,
double b 
)
inline

Definition at line 4 of file multi_reduce_core.cuh.

References a, and b.

Referenced by quda::blas::cdot_(), quda::blas::cdotNormA_(), quda::blas::cdotNormB_(), compute(), covdevReference(), quda::blas::dot_(), dslashReference(), dslashReference_4d_sgpu(), dslashReference_5th(), quda::getRealTraceUVdagger(), quda::blas::HeavyQuarkResidualNorm(), multiReduceKernel(), multiReduceLaunch(), norm2(), quda::blas::norm2_(), normalize(), quda::blas::Dot< NXZ, ReduceType, Float2, FloatN >::operator()(), quda::blas::Norm1< ReduceType, Float2, FloatN >::operator()(), quda::reduce_vector< T >::operator()(), quda::blas::Norm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::Cdot< NXZ, ReduceType, Float2, FloatN >::operator()(), quda::blas::CdotCopy< NXZ, ReduceType, Float2, FloatN >::operator()(), quda::blas::DotNormA< ReduceType, Float2, FloatN >::operator()(), quda::blas::axpyNorm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::AxpyReDot< ReduceType, Float2, FloatN >::operator()(), quda::blas::xmyNorm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpyNorm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpyxmaznormx< ReduceType, Float2, FloatN >::operator()(), quda::blas::cabxpyaxnorm< ReduceType, Float2, FloatN >::operator()(), quda::blas::xpaycdotzy< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpydotzy< ReduceType, Float2, FloatN >::operator()(), quda::blas::CdotNormA< ReduceType, Float2, FloatN >::operator()(), quda::blas::CdotNormB< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpbypzYmbwcDotProductUYNormY_< ReduceType, Float2, FloatN >::operator()(), quda::blas::axpyCGNorm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::tripleCGReduction_< ReduceType, Float2, FloatN >::operator()(), quda::blas::HeavyQuarkResidualNorm_< ReduceType, Float2, FloatN >::post(), quda::blas::xpyHeavyQuarkResidualNorm_< ReduceType, Float2, FloatN >::post(), quda::reduce2d(), reduceDouble(), reduceDoubleArray(), and quda::reduceRow().

Here is the caller graph for this function:

◆ sum() [2/3]

__host__ __device__ void sum ( double2 &  a,
double2 &  b 
)
inline

Definition at line 5 of file multi_reduce_core.cuh.

References a, and b.

◆ sum() [3/3]

__host__ __device__ void sum ( double3 &  a,
double3 &  b 
)
inline

Definition at line 6 of file multi_reduce_core.cuh.

References a, and b.

Variable Documentation

◆ Amatrix_d

__constant__ signed char Amatrix_d[MAX_MATRIX_SIZE]
static

Definition at line 66 of file multi_reduce_core.cuh.

Referenced by multiReduceCuda().

◆ Amatrix_h

signed char* Amatrix_h
static

Definition at line 70 of file multi_reduce_core.cuh.

Referenced by multiReduceCuda().

◆ Bmatrix_d

__constant__ signed char Bmatrix_d[MAX_MATRIX_SIZE]
static

Definition at line 67 of file multi_reduce_core.cuh.

Referenced by multiReduceCuda().

◆ Bmatrix_h

signed char* Bmatrix_h
static

Definition at line 71 of file multi_reduce_core.cuh.

Referenced by multiReduceCuda().

◆ Cmatrix_d

__constant__ signed char Cmatrix_d[MAX_MATRIX_SIZE]
static

Definition at line 68 of file multi_reduce_core.cuh.

Referenced by multiReduceCuda().

◆ Cmatrix_h

signed char* Cmatrix_h
static

Definition at line 72 of file multi_reduce_core.cuh.

Referenced by multiReduceCuda().

◆ count

__device__ unsigned int count = 0
static

Definition at line 19 of file multi_reduce_core.cuh.

◆ isLastBlockDone

__shared__ bool isLastBlockDone
static

Definition at line 20 of file multi_reduce_core.cuh.