QUDA  0.9.0
Classes | Functions | Variables
reduce_core.cuh File Reference
#include <launch_kernel.cuh>
Include dependency graph for reduce_core.cuh:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  ReductionArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >
 
class  ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >
 
struct  vector< typename, N >
 
struct  vector< double, 2 >
 
struct  vector< float, 2 >
 

Functions

__host__ __device__ double set (double &x)
 
__host__ __device__ double2 set (double2 &x)
 
__host__ __device__ double3 set (double3 &x)
 
__host__ __device__ double4 set (double4 &x)
 
__host__ __device__ void sum (double &a, double &b)
 
__host__ __device__ void sum (double2 &a, double2 &b)
 
__host__ __device__ void sum (double3 &a, double3 &b)
 
__host__ __device__ void sum (double4 &a, double4 &b)
 
template<int block_size, typename ReduceType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >
__global__ void reduceKernel (ReductionArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > arg)
 
template<typename doubleN , typename ReduceType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >
doubleN reduceLaunch (ReductionArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > &arg, const TuneParam &tp, const cudaStream_t &stream)
 
template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename zType , int M, template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV>
doubleN reduceCuda (const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, int length)
 
template<typename ReduceType , typename Float2 , int writeX, int writeY, int writeZ, int writeW, int writeV, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >
ReduceType genericReduce (SpinorX &X, SpinorY &Y, SpinorZ &Z, SpinorW &W, SpinorV &V, Reducer r)
 
template<typename ReduceType , typename Float , typename zFloat , int nSpin, int nColor, QudaFieldOrder order, int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >
ReduceType genericReduce (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, R r)
 
template<typename ReduceType , typename Float , typename zFloat , int nSpin, QudaFieldOrder order, int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >
ReduceType genericReduce (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, R r)
 
template<typename ReduceType , typename Float , typename zFloat , QudaFieldOrder order, int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >
ReduceType genericReduce (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, R r)
 
template<typename doubleN , typename ReduceType , typename Float , typename zFloat , int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >
doubleN genericReduce (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, R r)
 

Variables

static __device__ unsigned int count = 0
 
static __shared__ bool isLastBlockDone
 

Function Documentation

◆ genericReduce() [1/5]

template<typename ReduceType , typename Float2 , int writeX, int writeY, int writeZ, int writeW, int writeV, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >
ReduceType genericReduce ( SpinorX &  X,
SpinorY &  Y,
SpinorZ &  Z,
SpinorW &  W,
SpinorV &  V,
Reducer  r 
)

Generic reduce kernel with four loads and up to four stores. FIXME - this is hacky due to the lack of std::complex support in CUDA. The functors are defined in terms of FloatN vectors, whereas the operator() accessor returns std::complex<Float>

Definition at line 269 of file reduce_core.cuh.

References c, quda::make_Complex(), parity, s, sum(), V, X, x, Z, and quda::blas::zero().

Here is the call graph for this function:

◆ genericReduce() [2/5]

template<typename ReduceType , typename Float , typename zFloat , int nSpin, int nColor, QudaFieldOrder order, int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >
ReduceType genericReduce ( ColorSpinorField &  x,
ColorSpinorField &  y,
ColorSpinorField &  z,
ColorSpinorField &  w,
ColorSpinorField &  v,
R  r 
)

Definition at line 305 of file reduce_core.cuh.

References V, w, X, x, y, Z, and z.

◆ genericReduce() [3/5]

template<typename ReduceType , typename Float , typename zFloat , int nSpin, QudaFieldOrder order, int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >
ReduceType genericReduce ( ColorSpinorField &  x,
ColorSpinorField &  y,
ColorSpinorField &  z,
ColorSpinorField &  w,
ColorSpinorField &  v,
R  r 
)

Definition at line 315 of file reduce_core.cuh.

References errorQuda, value, w, x, y, z, and quda::blas::zero().

Here is the call graph for this function:

◆ genericReduce() [4/5]

template<typename ReduceType , typename Float , typename zFloat , QudaFieldOrder order, int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >
ReduceType genericReduce ( ColorSpinorField &  x,
ColorSpinorField &  y,
ColorSpinorField &  z,
ColorSpinorField &  w,
ColorSpinorField &  v,
R  r 
)

Definition at line 351 of file reduce_core.cuh.

References errorQuda, value, w, x, y, z, and quda::blas::zero().

Here is the call graph for this function:

◆ genericReduce() [5/5]

template<typename doubleN , typename ReduceType , typename Float , typename zFloat , int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >
doubleN genericReduce ( ColorSpinorField &  x,
ColorSpinorField &  y,
ColorSpinorField &  z,
ColorSpinorField &  w,
ColorSpinorField &  v,
R  r 
)

Definition at line 370 of file reduce_core.cuh.

References QUDA_SPACE_SPIN_COLOR_FIELD_ORDER, value, w, warningQuda, x, y, z, and quda::blas::zero().

Here is the call graph for this function:

◆ reduceCuda()

template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename zType , int M, template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV>
doubleN reduceCuda ( const double2 &  a,
const double2 &  b,
ColorSpinorField &  x,
ColorSpinorField &  y,
ColorSpinorField &  z,
ColorSpinorField &  w,
ColorSpinorField &  v,
int  length 
)

◆ reduceKernel()

template<int block_size, typename ReduceType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >
__global__ void reduceKernel ( ReductionArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >  arg)

Generic reduction kernel with up to four loads and three saves.

Definition at line 43 of file reduce_core.cuh.

References quda::arg(), blockDim, gridDim, fused_exterior_ndeg_tm_dslash_cuda_gen::i, parity, sum(), w, x, y, z, and quda::blas::zero().

Referenced by reduceLaunch().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ reduceLaunch()

template<typename doubleN , typename ReduceType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >
doubleN reduceLaunch ( ReductionArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > &  arg,
const TuneParam &  tp,
const cudaStream_t &  stream 
)

Generic reduction kernel launcher

Definition at line 85 of file reduce_core.cuh.

References quda::arg(), commAsyncReduction(), deviceProp, errorQuda, h_reduce, hd_reduce, LAUNCH_KERNEL, quda::qudaEventQuery(), quda::qudaEventRecord(), qudaMemcpy, reduceEnd, reduceKernel(), stream, and sum().

Here is the call graph for this function:

◆ set() [1/4]

__host__ __device__ double set ( double x)
inline

Definition at line 1 of file reduce_core.cuh.

References x.

◆ set() [2/4]

__host__ __device__ double2 set ( double2 &  x)
inline

Definition at line 2 of file reduce_core.cuh.

References x.

◆ set() [3/4]

__host__ __device__ double3 set ( double3 &  x)
inline

Definition at line 3 of file reduce_core.cuh.

References x.

◆ set() [4/4]

__host__ __device__ double4 set ( double4 &  x)
inline

Definition at line 4 of file reduce_core.cuh.

References x.

◆ sum() [1/4]

__host__ __device__ void sum ( double a,
double b 
)
inline

Definition at line 5 of file reduce_core.cuh.

References a, and b.

Referenced by genericReduce(), reduceKernel(), and reduceLaunch().

Here is the caller graph for this function:

◆ sum() [2/4]

__host__ __device__ void sum ( double2 &  a,
double2 &  b 
)
inline

Definition at line 6 of file reduce_core.cuh.

References a, and b.

◆ sum() [3/4]

__host__ __device__ void sum ( double3 &  a,
double3 &  b 
)
inline

Definition at line 7 of file reduce_core.cuh.

References a, and b.

◆ sum() [4/4]

__host__ __device__ void sum ( double4 &  a,
double4 &  b 
)
inline

Definition at line 8 of file reduce_core.cuh.

References a, and b.

Variable Documentation

◆ count

__device__ unsigned int count = 0
static

Definition at line 19 of file reduce_core.cuh.

◆ isLastBlockDone

__shared__ bool isLastBlockDone
static

Definition at line 20 of file reduce_core.cuh.