|
QUDA
0.9.0
|
#include <launch_kernel.cuh>

Go to the source code of this file.
Functions | |
| __host__ __device__ double | set (double &x) |
| __host__ __device__ double2 | set (double2 &x) |
| __host__ __device__ double3 | set (double3 &x) |
| __host__ __device__ double4 | set (double4 &x) |
| __host__ __device__ void | sum (double &a, double &b) |
| __host__ __device__ void | sum (double2 &a, double2 &b) |
| __host__ __device__ void | sum (double3 &a, double3 &b) |
| __host__ __device__ void | sum (double4 &a, double4 &b) |
| template<int block_size, typename ReduceType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer > | |
| __global__ void | reduceKernel (ReductionArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > arg) |
| template<typename doubleN , typename ReduceType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer > | |
| doubleN | reduceLaunch (ReductionArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > &arg, const TuneParam &tp, const cudaStream_t &stream) |
| template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename zType , int M, template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV> | |
| doubleN | reduceCuda (const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, int length) |
| template<typename ReduceType , typename Float2 , int writeX, int writeY, int writeZ, int writeW, int writeV, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer > | |
| ReduceType | genericReduce (SpinorX &X, SpinorY &Y, SpinorZ &Z, SpinorW &W, SpinorV &V, Reducer r) |
| template<typename ReduceType , typename Float , typename zFloat , int nSpin, int nColor, QudaFieldOrder order, int writeX, int writeY, int writeZ, int writeW, int writeV, typename R > | |
| ReduceType | genericReduce (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, R r) |
| template<typename ReduceType , typename Float , typename zFloat , int nSpin, QudaFieldOrder order, int writeX, int writeY, int writeZ, int writeW, int writeV, typename R > | |
| ReduceType | genericReduce (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, R r) |
| template<typename ReduceType , typename Float , typename zFloat , QudaFieldOrder order, int writeX, int writeY, int writeZ, int writeW, int writeV, typename R > | |
| ReduceType | genericReduce (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, R r) |
| template<typename doubleN , typename ReduceType , typename Float , typename zFloat , int writeX, int writeY, int writeZ, int writeW, int writeV, typename R > | |
| doubleN | genericReduce (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, R r) |
Variables | |
| static __device__ unsigned int | count = 0 |
| static __shared__ bool | isLastBlockDone |
| ReduceType genericReduce | ( | SpinorX & | X, |
| SpinorY & | Y, | ||
| SpinorZ & | Z, | ||
| SpinorW & | W, | ||
| SpinorV & | V, | ||
| Reducer | r | ||
| ) |
Generic reduce kernel with four loads and up to four stores. FIXME - this is hacky due to the lack of std::complex support in CUDA. The functors are defined in terms of FloatN vectors, whereas the operator() accessor returns std::complex<Float>
Definition at line 269 of file reduce_core.cuh.
References c, quda::make_Complex(), parity, s, sum(), V, X, x, Z, and quda::blas::zero().

| ReduceType genericReduce | ( | ColorSpinorField & | x, |
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z, | ||
| ColorSpinorField & | w, | ||
| ColorSpinorField & | v, | ||
| R | r | ||
| ) |
| ReduceType genericReduce | ( | ColorSpinorField & | x, |
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z, | ||
| ColorSpinorField & | w, | ||
| ColorSpinorField & | v, | ||
| R | r | ||
| ) |
Definition at line 315 of file reduce_core.cuh.
References errorQuda, value, w, x, y, z, and quda::blas::zero().

| ReduceType genericReduce | ( | ColorSpinorField & | x, |
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z, | ||
| ColorSpinorField & | w, | ||
| ColorSpinorField & | v, | ||
| R | r | ||
| ) |
Definition at line 351 of file reduce_core.cuh.
References errorQuda, value, w, x, y, z, and quda::blas::zero().

| doubleN genericReduce | ( | ColorSpinorField & | x, |
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z, | ||
| ColorSpinorField & | w, | ||
| ColorSpinorField & | v, | ||
| R | r | ||
| ) |
Definition at line 370 of file reduce_core.cuh.
References QUDA_SPACE_SPIN_COLOR_FIELD_ORDER, value, w, warningQuda, x, y, z, and quda::blas::zero().

| doubleN reduceCuda | ( | const double2 & | a, |
| const double2 & | b, | ||
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z, | ||
| ColorSpinorField & | w, | ||
| ColorSpinorField & | v, | ||
| int | length | ||
| ) |
Definition at line 205 of file reduce_core.cuh.
References a, b, quda::blas::blasStrings, quda::blas::bytes, checkCudaError, checkLength(), quda::blas::flops, quda::blas::getStream(), length, quda::reduce(), strcat(), strcpy(), V, value, w, warningQuda, X, x, y, Z, z, and quda::blas::zero().

| __global__ void reduceKernel | ( | ReductionArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > | arg | ) |
Generic reduction kernel with up to four loads and three saves.
Definition at line 43 of file reduce_core.cuh.
References quda::arg(), blockDim, gridDim, fused_exterior_ndeg_tm_dslash_cuda_gen::i, parity, sum(), w, x, y, z, and quda::blas::zero().
Referenced by reduceLaunch().


| doubleN reduceLaunch | ( | ReductionArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > & | arg, |
| const TuneParam & | tp, | ||
| const cudaStream_t & | stream | ||
| ) |
Generic reduction kernel launcher
Definition at line 85 of file reduce_core.cuh.
References quda::arg(), commAsyncReduction(), deviceProp, errorQuda, h_reduce, hd_reduce, LAUNCH_KERNEL, quda::qudaEventQuery(), quda::qudaEventRecord(), qudaMemcpy, reduceEnd, reduceKernel(), stream, and sum().

Definition at line 1 of file reduce_core.cuh.
References x.
|
inline |
Definition at line 2 of file reduce_core.cuh.
References x.
|
inline |
Definition at line 3 of file reduce_core.cuh.
References x.
|
inline |
Definition at line 4 of file reduce_core.cuh.
References x.
Definition at line 5 of file reduce_core.cuh.
Referenced by genericReduce(), reduceKernel(), and reduceLaunch().

|
inline |
Definition at line 6 of file reduce_core.cuh.
|
inline |
Definition at line 7 of file reduce_core.cuh.
|
inline |
Definition at line 8 of file reduce_core.cuh.
|
static |
Definition at line 19 of file reduce_core.cuh.
|
static |
Definition at line 20 of file reduce_core.cuh.
1.8.14