|
template<int block_size, typename ReduceType , typename FloatN , int M, typename Arg > |
__global__ void | quda::blas::reduceKernel (Arg arg) |
|
template<typename ReduceType > |
__device__ __host__ ReduceType | quda::blas::norm1_ (const double2 &a) |
|
template<typename ReduceType > |
__device__ __host__ ReduceType | quda::blas::norm1_ (const float2 &a) |
|
template<typename ReduceType > |
__device__ __host__ ReduceType | quda::blas::norm1_ (const float4 &a) |
|
template<typename ReduceType > |
__device__ __host__ void | quda::blas::norm2_ (ReduceType &sum, const double2 &a) |
|
template<typename ReduceType > |
__device__ __host__ void | quda::blas::norm2_ (ReduceType &sum, const float2 &a) |
|
template<typename ReduceType > |
__device__ __host__ void | quda::blas::norm2_ (ReduceType &sum, const float4 &a) |
|
template<typename ReduceType > |
__device__ __host__ void | quda::blas::dot_ (ReduceType &sum, const double2 &a, const double2 &b) |
|
template<typename ReduceType > |
__device__ __host__ void | quda::blas::dot_ (ReduceType &sum, const float2 &a, const float2 &b) |
|
template<typename ReduceType > |
__device__ __host__ void | quda::blas::dot_ (ReduceType &sum, const float4 &a, const float4 &b) |
|
__device__ __host__ void | quda::blas::Caxpy_ (const double2 &a, const double2 &x, double2 &y) |
|
__device__ __host__ void | quda::blas::Caxpy_ (const float2 &a, const float2 &x, float2 &y) |
|
__device__ __host__ void | quda::blas::Caxpy_ (const float2 &a, const float4 &x, float4 &y) |
|
template<typename ReduceType > |
__device__ __host__ void | quda::blas::cdot_ (ReduceType &sum, const double2 &a, const double2 &b) |
|
template<typename ReduceType > |
__device__ __host__ void | quda::blas::cdot_ (ReduceType &sum, const float2 &a, const float2 &b) |
|
template<typename ReduceType > |
__device__ __host__ void | quda::blas::cdot_ (ReduceType &sum, const float4 &a, const float4 &b) |
|
template<typename ReduceType , typename InputType > |
__device__ __host__ void | quda::blas::cdotNormA_ (ReduceType &sum, const InputType &a, const InputType &b) |
|
template<typename ReduceType , typename InputType > |
__device__ __host__ void | quda::blas::cdotNormB_ (ReduceType &sum, const InputType &a, const InputType &b) |
|