|
QUDA
0.9.0
|
#include <launch_kernel.cuh>

Go to the source code of this file.
Classes | |
| struct | MultiReduceArg< NXZ, ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer > |
| Parameter struct for generic multi-blas kernel. More... | |
| struct | detail::to_chars< digits > |
| struct | detail::explode< rem, digits > |
| struct | detail::explode< 0, digits... > |
| struct | num_to_string< num > |
| class | MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer > |
| struct | coeff_array< T > |
Namespaces | |
| detail | |
Macros | |
| #define | MAX_MATRIX_SIZE 4096 |
Functions | |
| __host__ __device__ double | set (double &x) |
| __host__ __device__ double2 | set (double2 &x) |
| __host__ __device__ double3 | set (double3 &x) |
| __host__ __device__ void | sum (double &a, double &b) |
| __host__ __device__ void | sum (double2 &a, double2 &b) |
| __host__ __device__ void | sum (double3 &a, double3 &b) |
| template<int k, int NXZ, typename FloatN , int M, typename ReduceType , typename Arg > | |
| __device__ void | compute (vector_type< ReduceType, NXZ > &sum, Arg &arg, int idx, int parity) |
| template<int block_size, typename ReduceType , typename FloatN , int M, int NXZ, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Reducer > | |
| __global__ void | multiReduceKernel (MultiReduceArg< NXZ, ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer > arg) |
| template<typename doubleN , typename ReduceType , typename FloatN , int M, int NXZ, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Reducer > | |
| void | multiReduceLaunch (doubleN result[], MultiReduceArg< NXZ, ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer > &arg, const TuneParam &tp, const cudaStream_t &stream) |
| template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename yType , int M, int NXZ, template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , typename T > | |
| void | multiReduceCuda (doubleN result[], const reduce::coeff_array< T > &a, const reduce::coeff_array< T > &b, const reduce::coeff_array< T > &c, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &y, std::vector< ColorSpinorField *> &z, std::vector< ColorSpinorField *> &w, int length) |
Variables | |
| static __device__ unsigned int | count = 0 |
| static __shared__ bool | isLastBlockDone |
| static __constant__ signed char | Amatrix_d [MAX_MATRIX_SIZE] |
| static __constant__ signed char | Bmatrix_d [MAX_MATRIX_SIZE] |
| static __constant__ signed char | Cmatrix_d [MAX_MATRIX_SIZE] |
| static signed char * | Amatrix_h |
| static signed char * | Bmatrix_h |
| static signed char * | Cmatrix_h |
| #define MAX_MATRIX_SIZE 4096 |
Definition at line 65 of file multi_reduce_core.cuh.
Referenced by multiReduceCuda().
| void multiReduceCuda | ( | doubleN | result[], |
| const reduce::coeff_array< T > & | a, | ||
| const reduce::coeff_array< T > & | b, | ||
| const reduce::coeff_array< T > & | c, | ||
| std::vector< ColorSpinorField *> & | x, | ||
| std::vector< ColorSpinorField *> & | y, | ||
| std::vector< ColorSpinorField *> & | z, | ||
| std::vector< ColorSpinorField *> & | w, | ||
| int | length | ||
| ) |
Definition at line 370 of file multi_reduce_core.cuh.
References a, Amatrix_d, Amatrix_h, b, quda::blas::blasStrings, Bmatrix_d, Bmatrix_h, quda::blas::bytes, c, checkCudaError, checkSpinor(), Cmatrix_d, Cmatrix_h, errorQuda, quda::blas::flops, quda::blas::getStream(), fused_exterior_ndeg_tm_dslash_cuda_gen::i, length, MAX_MATRIX_SIZE, MAX_MULTI_BLAS_N, memset(), QUDA_MAX_MULTI_REDUCE, quda::reduce(), strcat(), strcpy(), V, w, warningQuda, X, x, y, Z, and z.

| __global__ void multiReduceKernel | ( | MultiReduceArg< NXZ, ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer > | arg | ) |
Definition at line 121 of file multi_reduce_core.cuh.
References quda::arg(), blockDim, fused_exterior_ndeg_tm_dslash_cuda_gen::i, parity, and sum().
Referenced by multiReduceLaunch().


| void multiReduceLaunch | ( | doubleN | result[], |
| MultiReduceArg< NXZ, ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer > & | arg, | ||
| const TuneParam & | tp, | ||
| const cudaStream_t & | stream | ||
| ) |
Definition at line 190 of file multi_reduce_core.cuh.
References quda::arg(), deviceProp, errorQuda, quda::blas::getHostReduceBuffer(), quda::blas::getMappedHostReduceBuffer(), quda::blas::getReduceEvent(), fused_exterior_ndeg_tm_dslash_cuda_gen::i, LAUNCH_KERNEL_LOCAL_PARITY, multiReduceKernel(), quda::qudaEventQuery(), quda::qudaEventRecord(), qudaMemcpy, stream, and sum().

Definition at line 1 of file multi_reduce_core.cuh.
References x.
|
inline |
Definition at line 2 of file multi_reduce_core.cuh.
References x.
|
inline |
Definition at line 3 of file multi_reduce_core.cuh.
References x.
Definition at line 4 of file multi_reduce_core.cuh.
Referenced by quda::blas::cdot_(), quda::blas::cdotNormA_(), quda::blas::cdotNormB_(), compute(), covdevReference(), quda::blas::dot_(), dslashReference(), dslashReference_4d_sgpu(), dslashReference_5th(), quda::getRealTraceUVdagger(), quda::blas::HeavyQuarkResidualNorm(), multiReduceKernel(), multiReduceLaunch(), norm2(), quda::blas::norm2_(), normalize(), quda::blas::Dot< NXZ, ReduceType, Float2, FloatN >::operator()(), quda::blas::Norm1< ReduceType, Float2, FloatN >::operator()(), quda::reduce_vector< T >::operator()(), quda::blas::Norm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::Cdot< NXZ, ReduceType, Float2, FloatN >::operator()(), quda::blas::CdotCopy< NXZ, ReduceType, Float2, FloatN >::operator()(), quda::blas::DotNormA< ReduceType, Float2, FloatN >::operator()(), quda::blas::axpyNorm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::AxpyReDot< ReduceType, Float2, FloatN >::operator()(), quda::blas::xmyNorm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpyNorm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpyxmaznormx< ReduceType, Float2, FloatN >::operator()(), quda::blas::cabxpyaxnorm< ReduceType, Float2, FloatN >::operator()(), quda::blas::xpaycdotzy< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpydotzy< ReduceType, Float2, FloatN >::operator()(), quda::blas::CdotNormA< ReduceType, Float2, FloatN >::operator()(), quda::blas::CdotNormB< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpbypzYmbwcDotProductUYNormY_< ReduceType, Float2, FloatN >::operator()(), quda::blas::axpyCGNorm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::tripleCGReduction_< ReduceType, Float2, FloatN >::operator()(), quda::blas::HeavyQuarkResidualNorm_< ReduceType, Float2, FloatN >::post(), quda::blas::xpyHeavyQuarkResidualNorm_< ReduceType, Float2, FloatN >::post(), quda::reduce2d(), reduceDouble(), reduceDoubleArray(), and quda::reduceRow().

|
inline |
Definition at line 5 of file multi_reduce_core.cuh.
|
inline |
Definition at line 6 of file multi_reduce_core.cuh.
|
static |
Definition at line 66 of file multi_reduce_core.cuh.
Referenced by multiReduceCuda().
|
static |
Definition at line 70 of file multi_reduce_core.cuh.
Referenced by multiReduceCuda().
|
static |
Definition at line 67 of file multi_reduce_core.cuh.
Referenced by multiReduceCuda().
|
static |
Definition at line 71 of file multi_reduce_core.cuh.
Referenced by multiReduceCuda().
|
static |
Definition at line 68 of file multi_reduce_core.cuh.
Referenced by multiReduceCuda().
|
static |
Definition at line 72 of file multi_reduce_core.cuh.
Referenced by multiReduceCuda().
|
static |
Definition at line 19 of file multi_reduce_core.cuh.
|
static |
Definition at line 20 of file multi_reduce_core.cuh.
1.8.14