#include <launch_kernel.cuh>

Include dependency graph for reduce_core.cuh:

This graph shows which files directly or indirectly include this file:

Classes
struct	ReductionArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >

class	ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >

struct	vector< typename, N >

struct	vector< double, 2 >

struct	vector< float, 2 >

Functions
__host__ __device__ double	set (double &x)

__host__ __device__ double2	set (double2 &x)

__host__ __device__ double3	set (double3 &x)

__host__ __device__ double4	set (double4 &x)

__host__ __device__ void	sum (double &a, double &b)

__host__ __device__ void	sum (double2 &a, double2 &b)

__host__ __device__ void	sum (double3 &a, double3 &b)

__host__ __device__ void	sum (double4 &a, double4 &b)

template<int block_size, typename ReduceType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >
__global__ void	reduceKernel (ReductionArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > arg)

template<typename doubleN , typename ReduceType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >
doubleN	reduceLaunch (ReductionArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > &arg, const TuneParam &tp, const cudaStream_t &stream)

template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename zType , int M, template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV>
doubleN	reduceCuda (const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, int length)

template<typename ReduceType , typename Float2 , int writeX, int writeY, int writeZ, int writeW, int writeV, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >
ReduceType	genericReduce (SpinorX &X, SpinorY &Y, SpinorZ &Z, SpinorW &W, SpinorV &V, Reducer r)

template<typename ReduceType , typename Float , typename zFloat , int nSpin, int nColor, QudaFieldOrder order, int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >
ReduceType	genericReduce (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, R r)

template<typename ReduceType , typename Float , typename zFloat , int nSpin, QudaFieldOrder order, int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >
ReduceType	genericReduce (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, R r)

template<typename ReduceType , typename Float , typename zFloat , QudaFieldOrder order, int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >
ReduceType	genericReduce (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, R r)

template<typename doubleN , typename ReduceType , typename Float , typename zFloat , int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >
doubleN	genericReduce (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, R r)

Variables
static __device__ unsigned int	count = 0

static __shared__ bool	isLastBlockDone

Function Documentation

◆ genericReduce() [1/5]

template<typename ReduceType , typename Float2 , int writeX, int writeY, int writeZ, int writeW, int writeV, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >

ReduceType genericReduce	(	SpinorX &	X,
		SpinorY &	Y,
		SpinorZ &	Z,
		SpinorW &	W,
		SpinorV &	V,
		Reducer	r
	)

Generic reduce kernel with four loads and up to four stores. FIXME - this is hacky due to the lack of std::complex support in CUDA. The functors are defined in terms of FloatN vectors, whereas the operator() accessor returns std::complex<Float>

Definition at line 269 of file reduce_core.cuh.

References c, quda::make_Complex(), parity, s, sum(), V, X, x, Z, and quda::blas::zero().

Here is the call graph for this function:

◆ genericReduce() [2/5]

template<typename ReduceType , typename Float , typename zFloat , int nSpin, int nColor, QudaFieldOrder order, int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >

ReduceType genericReduce	(	ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		ColorSpinorField &	w,
		ColorSpinorField &	v,
		R	r
	)

Definition at line 305 of file reduce_core.cuh.

References V, w, X, x, y, Z, and z.

◆ genericReduce() [3/5]

template<typename ReduceType , typename Float , typename zFloat , int nSpin, QudaFieldOrder order, int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >

ReduceType genericReduce	(	ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		ColorSpinorField &	w,
		ColorSpinorField &	v,
		R	r
	)

Definition at line 315 of file reduce_core.cuh.

References errorQuda, value, w, x, y, z, and quda::blas::zero().

Here is the call graph for this function:

◆ genericReduce() [4/5]

template<typename ReduceType , typename Float , typename zFloat , QudaFieldOrder order, int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >

ReduceType genericReduce	(	ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		ColorSpinorField &	w,
		ColorSpinorField &	v,
		R	r
	)

Definition at line 351 of file reduce_core.cuh.

References errorQuda, value, w, x, y, z, and quda::blas::zero().

Here is the call graph for this function:

◆ genericReduce() [5/5]

template<typename doubleN , typename ReduceType , typename Float , typename zFloat , int writeX, int writeY, int writeZ, int writeW, int writeV, typename R >

doubleN genericReduce	(	ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		ColorSpinorField &	w,
		ColorSpinorField &	v,
		R	r
	)

Definition at line 370 of file reduce_core.cuh.

References QUDA_SPACE_SPIN_COLOR_FIELD_ORDER, value, w, warningQuda, x, y, z, and quda::blas::zero().

Here is the call graph for this function:

◆ reduceCuda()

template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename zType , int M, template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV>

doubleN reduceCuda	(	const double2 &	a,
		const double2 &	b,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		ColorSpinorField &	w,
		ColorSpinorField &	v,
		int	length
	)

Definition at line 205 of file reduce_core.cuh.

References a, b, quda::blas::blasStrings, quda::blas::bytes, checkCudaError, checkLength(), quda::blas::flops, quda::blas::getStream(), length, quda::reduce(), strcat(), strcpy(), V, value, w, warningQuda, X, x, y, Z, z, and quda::blas::zero().

Here is the call graph for this function:

◆ reduceKernel()

template<int block_size, typename ReduceType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >

__global__ void reduceKernel ( ReductionArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > arg )

Generic reduction kernel with up to four loads and three saves.

Definition at line 43 of file reduce_core.cuh.

References quda::arg(), blockDim, gridDim, fused_exterior_ndeg_tm_dslash_cuda_gen::i, parity, sum(), w, x, y, z, and quda::blas::zero().

Referenced by reduceLaunch().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ reduceLaunch()

template<typename doubleN , typename ReduceType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >

doubleN reduceLaunch	(	ReductionArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > &	arg,
		const TuneParam &	tp,
		const cudaStream_t &	stream
	)

Generic reduction kernel launcher

Definition at line 85 of file reduce_core.cuh.

References quda::arg(), commAsyncReduction(), deviceProp, errorQuda, h_reduce, hd_reduce, LAUNCH_KERNEL, quda::qudaEventQuery(), quda::qudaEventRecord(), qudaMemcpy, reduceEnd, reduceKernel(), stream, and sum().

Here is the call graph for this function:

◆ set() [1/4]

__host__ __device__ double set ( double & x )

inline

Definition at line 1 of file reduce_core.cuh.

References x.

◆ set() [2/4]

__host__ __device__ double2 set ( double2 & x )

inline

Definition at line 2 of file reduce_core.cuh.

References x.

◆ set() [3/4]

__host__ __device__ double3 set ( double3 & x )

inline

Definition at line 3 of file reduce_core.cuh.

References x.

◆ set() [4/4]

__host__ __device__ double4 set ( double4 & x )

inline

Definition at line 4 of file reduce_core.cuh.

References x.

◆ sum() [1/4]

__host__ __device__ void sum	(	double &	a,
		double &	b
	)

inline

Definition at line 5 of file reduce_core.cuh.

References a, and b.

Referenced by genericReduce(), reduceKernel(), and reduceLaunch().

Here is the caller graph for this function:

◆ sum() [2/4]

__host__ __device__ void sum	(	double2 &	a,
		double2 &	b
	)

inline

Definition at line 6 of file reduce_core.cuh.

References a, and b.

◆ sum() [3/4]

__host__ __device__ void sum	(	double3 &	a,
		double3 &	b
	)

inline

Definition at line 7 of file reduce_core.cuh.

References a, and b.

◆ sum() [4/4]

__host__ __device__ void sum	(	double4 &	a,
		double4 &	b
	)

inline

Definition at line 8 of file reduce_core.cuh.

References a, and b.

Variable Documentation

◆ count

__device__ unsigned int count = 0

static

Definition at line 19 of file reduce_core.cuh.

◆ isLastBlockDone

__shared__ bool isLastBlockDone

static

Definition at line 20 of file reduce_core.cuh.

Classes

Functions

Variables

Function Documentation

◆ genericReduce() [1/5]

◆ genericReduce() [2/5]

◆ genericReduce() [3/5]

◆ genericReduce() [4/5]

◆ genericReduce() [5/5]

◆ reduceCuda()

◆ reduceKernel()

◆ reduceLaunch()

◆ set() [1/4]

◆ set() [2/4]

◆ set() [3/4]

◆ set() [4/4]

◆ sum() [1/4]

◆ sum() [2/4]

◆ sum() [3/4]

◆ sum() [4/4]

Variable Documentation

◆ count

◆ isLastBlockDone