#include <reduce_helper.h>

Public Member Functions
	ReduceArg (int n_reduce=1)

template<typename host_t , typename device_t = host_t>
void	complete (std::vector< host_t > &result, const qudaStream_t stream=0, bool reset=false)
	Finalize the reduction, returning the computed reduction into result. With heterogeneous atomics this means we poll the atomics until their value differs from the init_value. The alternate legacy path posts an event after the kernel and then polls on completion of the event. More...

template<typename host_t , typename device_t = host_t>
void	complete (host_t &result, const qudaStream_t stream=0, bool reset=false)
	Overload providing a simple reference interface. More...

template<int block_size_x, int block_size_y, bool do_sum = true, typename Reducer = cub::Sum>
__device__ void	reduce2d (const T &in, const int idx=0)
	Generic reduction function that reduces block-distributed data "in" per thread to a single value. This is the legacy variant which require explicit host-device synchronization to signal the completion of the reduction to the host. More...

template<int block_size, bool do_sum = true, typename Reducer = cub::Sum>
__device__ void	reduce (const T &in, const int idx=0)

Public Attributes
qudaError_t	launch_error

Detailed Description

template<typename T>
struct quda::ReduceArg< T >

Definition at line 58 of file reduce_helper.h.

Constructor & Destructor Documentation

◆ ReduceArg()

template<typename T >

quda::ReduceArg< T >::ReduceArg ( int n_reduce = 1 )

inline

Definition at line 80 of file reduce_helper.h.

Member Function Documentation

◆ complete() [1/2]

template<typename T >

template<typename host_t , typename device_t = host_t>

void quda::ReduceArg< T >::complete	(	host_t &	result,
		const qudaStream_t	stream = `0`,
		bool	reset = `false`
	)

inline

Overload providing a simple reference interface.

Parameters

[out]	result	The reduction result is copied here
[in]	stream	The stream on which we the reduction is being done
[in]	reset	Whether to reset the atomics after the reduction has completed; required if the same ReduceArg instance will be used for multiple reductions.

Definition at line 169 of file reduce_helper.h.

◆ complete() [2/2]

template<typename T >

template<typename host_t , typename device_t = host_t>

void quda::ReduceArg< T >::complete	(	std::vector< host_t > &	result,
		const qudaStream_t	stream = `0`,
		bool	reset = `false`
	)

inline

Finalize the reduction, returning the computed reduction into result. With heterogeneous atomics this means we poll the atomics until their value differs from the init_value. The alternate legacy path posts an event after the kernel and then polls on completion of the event.

Parameters

[out]	result	The reduction result is copied here
[in]	stream	The stream on which we the reduction is being done
[in]	reset	Whether to reset the atomics after the reduction has completed; required if the same aReducearg instance will be used for multiple reductions.

Definition at line 125 of file reduce_helper.h.

◆ reduce()

template<typename T >

template<int block_size, bool do_sum = true, typename Reducer = cub::Sum>

__device__ void quda::ReduceArg< T >::reduce	(	const T &	in,
		const int	idx = `0`
	)

inline

Definition at line 301 of file reduce_helper.h.

◆ reduce2d()

template<typename T >

template<int block_size_x, int block_size_y, bool do_sum = true, typename Reducer = cub::Sum>

__device__ void quda::ReduceArg< T >::reduce2d	(	const T &	in,
		const int	idx = `0`
	)

inline

Generic reduction function that reduces block-distributed data "in" per thread to a single value. This is the legacy variant which require explicit host-device synchronization to signal the completion of the reduction to the host.

Parameters

in	The input per-thread data to be reduced
idx	In the case of multiple reductions, idx identifies which reduction this thread block corresponds to. Typically idx will be constant along constant blockIdx.y and blockIdx.z.

Definition at line 257 of file reduce_helper.h.

Member Data Documentation

◆ launch_error

template<typename T >

qudaError_t quda::ReduceArg< T >::launch_error

Definition at line 60 of file reduce_helper.h.

The documentation for this struct was generated from the following file:

quda/include/reduce_helper.h

Public Member Functions

Public Attributes

Detailed Description

template<typename T> struct quda::ReduceArg< T >

Constructor & Destructor Documentation

◆ ReduceArg()

Member Function Documentation

◆ complete() [1/2]

◆ complete() [2/2]

◆ reduce()

◆ reduce2d()

Member Data Documentation

◆ launch_error

template<typename T>
struct quda::ReduceArg< T >