QUDA v0.4.0
A library for QCD on GPUs
Classes | Functions | Variables
quda/lib/reduce_core.h File Reference

Go to the source code of this file.

Classes

class  ReduceCuda< doubleN, ReduceType, ReduceSimpleType, FloatN, M, writeX, writeY, writeZ, InputX, InputY, InputZ, InputW, InputV, Reducer, OutputX, OutputY, OutputZ >

Functions

__host__ __device__ void zero (double &x)
__host__ __device__ void zero (double2 &x)
__host__ __device__ void zero (double3 &x)
__device__ void copytoshared (double *s, const int i, const double x, const int block)
__device__ void copytoshared (double *s, const int i, const double2 x, const int block)
__device__ void copytoshared (double *s, const int i, const double3 x, const int block)
__device__ void copytoshared (volatile double *s, const int i, const double x, const int block)
__device__ void copytoshared (volatile double *s, const int i, const double2 x, const int block)
__device__ void copytoshared (volatile double *s, const int i, const double3 x, const int block)
__device__ void copyfromshared (double &x, const double *s, const int i, const int block)
__device__ void copyfromshared (double2 &x, const double *s, const int i, const int block)
__device__ void copyfromshared (double3 &x, const double *s, const int i, const int block)
template<typename ReduceType , typename ReduceSimpleType >
__device__ void add (ReduceType &sum, ReduceSimpleType *s, const int i, const int block)
template<>
__device__ void add< double, double > (double &sum, double *s, const int i, const int block)
template<>
__device__ void add< double2, double > (double2 &sum, double *s, const int i, const int block)
template<>
__device__ void add< double3, double > (double3 &sum, double *s, const int i, const int block)
template<typename ReduceType , typename ReduceSimpleType >
__device__ void add (ReduceSimpleType *s, const int i, const int j, const int block)
template<typename ReduceType , typename ReduceSimpleType >
__device__ void add (volatile ReduceSimpleType *s, const int i, const int j, const int block)
template<>
__device__ void add< double, double > (double *s, const int i, const int j, const int block)
template<>
__device__ void add< double, double > (volatile double *s, const int i, const int j, const int block)
template<>
__device__ void add< double2, double > (double *s, const int i, const int j, const int block)
template<>
__device__ void add< double2, double > (volatile double *s, const int i, const int j, const int block)
template<>
__device__ void add< double3, double > (double *s, const int i, const int j, const int block)
template<>
__device__ void add< double3, double > (volatile double *s, const int i, const int j, const int block)
__host__ __device__ void zero (doublesingle &x)
__host__ __device__ void zero (doublesingle2 &x)
__host__ __device__ void zero (doublesingle3 &x)
__device__ void copytoshared (doublesingle *s, const int i, const doublesingle x, const int block)
__device__ void copytoshared (doublesingle *s, const int i, const doublesingle2 x, const int block)
__device__ void copytoshared (doublesingle *s, const int i, const doublesingle3 x, const int block)
__device__ void copytoshared (volatile doublesingle *s, const int i, const doublesingle x, const int block)
__device__ void copytoshared (volatile doublesingle *s, const int i, const doublesingle2 x, const int block)
__device__ void copytoshared (volatile doublesingle *s, const int i, const doublesingle3 x, const int block)
__device__ void copyfromshared (doublesingle &x, const doublesingle *s, const int i, const int block)
__device__ void copyfromshared (doublesingle2 &x, const doublesingle *s, const int i, const int block)
__device__ void copyfromshared (doublesingle3 &x, const doublesingle *s, const int i, const int block)
template<>
__device__ void add< doublesingle, doublesingle > (doublesingle &sum, doublesingle *s, const int i, const int block)
template<>
__device__ void add< doublesingle2, doublesingle > (doublesingle2 &sum, doublesingle *s, const int i, const int block)
template<>
__device__ void add< doublesingle3, doublesingle > (doublesingle3 &sum, doublesingle *s, const int i, const int block)
template<>
__device__ void add< doublesingle, doublesingle > (doublesingle *s, const int i, const int j, const int block)
template<>
__device__ void add< doublesingle, doublesingle > (volatile doublesingle *s, const int i, const int j, const int block)
template<>
__device__ void add< doublesingle2, doublesingle > (doublesingle *s, const int i, const int j, const int block)
template<>
__device__ void add< doublesingle2, doublesingle > (volatile doublesingle *s, const int i, const int j, const int block)
template<>
__device__ void add< doublesingle3, doublesingle > (doublesingle *s, const int i, const int j, const int block)
template<>
__device__ void add< doublesingle3, doublesingle > (volatile doublesingle *s, const int i, const int j, const int block)
template<int block_size, typename ReduceType , typename ReduceSimpleType , typename FloatN , int M, int writeX, int writeY, int writeZ, typename InputX , typename InputY , typename InputZ , typename InputW , typename InputV , typename OutputX , typename OutputY , typename OutputZ , typename Reducer >
__global__ void reduceKernel (InputX X, InputY Y, InputZ Z, InputW W, InputV V, Reducer r, ReduceType *partial, ReduceType *complete, OutputX XX, OutputY YY, OutputZ ZZ, int length)
template<typename doubleN , typename ReduceType , typename ReduceSimpleType , typename FloatN , int M, int writeX, int writeY, int writeZ, typename InputX , typename InputY , typename InputZ , typename InputW , typename InputV , typename Reducer , typename OutputX , typename OutputY , typename OutputZ >
doubleN reduceLaunch (InputX X, InputY Y, InputZ Z, InputW W, InputV V, Reducer r, OutputX XX, OutputY YY, OutputZ ZZ, int length, const TuneParam &tp, const cudaStream_t &stream)
template<typename doubleN , typename ReduceType , typename ReduceSimpleType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ>
doubleN reduceCuda (const int kernel, const double2 &a, const double2 &b, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w, cudaColorSpinorField &v)

Variables

__device__ unsigned int count = 0
__shared__ bool isLastBlockDone

Function Documentation

template<typename ReduceType , typename ReduceSimpleType >
__device__ void add ( ReduceType &  sum,
ReduceSimpleType *  s,
const int  i,
const int  block 
)

Definition at line 21 of file reduce_core.h.

template<typename ReduceType , typename ReduceSimpleType >
__device__ void add ( ReduceSimpleType *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 30 of file reduce_core.h.

template<typename ReduceType , typename ReduceSimpleType >
__device__ void add ( volatile ReduceSimpleType *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 32 of file reduce_core.h.

template<>
__device__ void add< double, double > ( double *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 34 of file reduce_core.h.

template<>
__device__ void add< double, double > ( volatile double *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 36 of file reduce_core.h.

template<>
__device__ void add< double, double > ( double &  sum,
double *  s,
const int  i,
const int  block 
)

Definition at line 22 of file reduce_core.h.

template<>
__device__ void add< double2, double > ( double *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 39 of file reduce_core.h.

template<>
__device__ void add< double2, double > ( volatile double *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 41 of file reduce_core.h.

template<>
__device__ void add< double2, double > ( double2 &  sum,
double *  s,
const int  i,
const int  block 
)

Definition at line 24 of file reduce_core.h.

template<>
__device__ void add< double3, double > ( double3 &  sum,
double *  s,
const int  i,
const int  block 
)

Definition at line 26 of file reduce_core.h.

template<>
__device__ void add< double3, double > ( double *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 44 of file reduce_core.h.

template<>
__device__ void add< double3, double > ( volatile double *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 46 of file reduce_core.h.

template<>
__device__ void add< doublesingle, doublesingle > ( doublesingle sum,
doublesingle s,
const int  i,
const int  block 
)

Definition at line 70 of file reduce_core.h.

template<>
__device__ void add< doublesingle, doublesingle > ( doublesingle s,
const int  i,
const int  j,
const int  block 
)

Definition at line 77 of file reduce_core.h.

template<>
__device__ void add< doublesingle, doublesingle > ( volatile doublesingle s,
const int  i,
const int  j,
const int  block 
)

Definition at line 79 of file reduce_core.h.

template<>
__device__ void add< doublesingle2, doublesingle > ( doublesingle2 sum,
doublesingle s,
const int  i,
const int  block 
)

Definition at line 72 of file reduce_core.h.

template<>
__device__ void add< doublesingle2, doublesingle > ( doublesingle s,
const int  i,
const int  j,
const int  block 
)

Definition at line 82 of file reduce_core.h.

template<>
__device__ void add< doublesingle2, doublesingle > ( volatile doublesingle s,
const int  i,
const int  j,
const int  block 
)

Definition at line 84 of file reduce_core.h.

template<>
__device__ void add< doublesingle3, doublesingle > ( doublesingle3 sum,
doublesingle s,
const int  i,
const int  block 
)

Definition at line 74 of file reduce_core.h.

template<>
__device__ void add< doublesingle3, doublesingle > ( doublesingle s,
const int  i,
const int  j,
const int  block 
)

Definition at line 87 of file reduce_core.h.

template<>
__device__ void add< doublesingle3, doublesingle > ( volatile doublesingle s,
const int  i,
const int  j,
const int  block 
)

Definition at line 89 of file reduce_core.h.

__device__ void copyfromshared ( double2 &  x,
const double *  s,
const int  i,
const int  block 
)

Definition at line 15 of file reduce_core.h.

__device__ void copyfromshared ( doublesingle x,
const doublesingle s,
const int  i,
const int  block 
)

Definition at line 64 of file reduce_core.h.

__device__ void copyfromshared ( doublesingle2 x,
const doublesingle s,
const int  i,
const int  block 
)

Definition at line 65 of file reduce_core.h.

__device__ void copyfromshared ( doublesingle3 x,
const doublesingle s,
const int  i,
const int  block 
)

Definition at line 67 of file reduce_core.h.

__device__ void copyfromshared ( double3 &  x,
const double *  s,
const int  i,
const int  block 
)

Definition at line 17 of file reduce_core.h.

__device__ void copyfromshared ( double &  x,
const double *  s,
const int  i,
const int  block 
)

Definition at line 14 of file reduce_core.h.

__device__ void copytoshared ( volatile double *  s,
const int  i,
const double2  x,
const int  block 
)

Definition at line 10 of file reduce_core.h.

__device__ void copytoshared ( doublesingle s,
const int  i,
const doublesingle2  x,
const int  block 
)

Definition at line 54 of file reduce_core.h.

__device__ void copytoshared ( volatile doublesingle s,
const int  i,
const doublesingle2  x,
const int  block 
)

Definition at line 59 of file reduce_core.h.

__device__ void copytoshared ( doublesingle s,
const int  i,
const doublesingle  x,
const int  block 
)

Definition at line 53 of file reduce_core.h.

__device__ void copytoshared ( double *  s,
const int  i,
const double  x,
const int  block 
)

Definition at line 4 of file reduce_core.h.

__device__ void copytoshared ( doublesingle s,
const int  i,
const doublesingle3  x,
const int  block 
)

Definition at line 56 of file reduce_core.h.

__device__ void copytoshared ( volatile double *  s,
const int  i,
const double3  x,
const int  block 
)

Definition at line 12 of file reduce_core.h.

__device__ void copytoshared ( double *  s,
const int  i,
const double2  x,
const int  block 
)

Definition at line 5 of file reduce_core.h.

__device__ void copytoshared ( volatile doublesingle s,
const int  i,
const doublesingle  x,
const int  block 
)

Definition at line 58 of file reduce_core.h.

__device__ void copytoshared ( volatile doublesingle s,
const int  i,
const doublesingle3  x,
const int  block 
)

Definition at line 61 of file reduce_core.h.

__device__ void copytoshared ( double *  s,
const int  i,
const double3  x,
const int  block 
)

Definition at line 7 of file reduce_core.h.

__device__ void copytoshared ( volatile double *  s,
const int  i,
const double  x,
const int  block 
)

Definition at line 9 of file reduce_core.h.

template<typename doubleN , typename ReduceType , typename ReduceSimpleType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ>
doubleN reduceCuda ( const int  kernel,
const double2 &  a,
const double2 &  b,
cudaColorSpinorField x,
cudaColorSpinorField y,
cudaColorSpinorField z,
cudaColorSpinorField w,
cudaColorSpinorField v 
)

Driver for generic reduction routine with two loads.

Parameters:
ReduceType

Definition at line 450 of file reduce_core.h.

template<int block_size, typename ReduceType , typename ReduceSimpleType , typename FloatN , int M, int writeX, int writeY, int writeZ, typename InputX , typename InputY , typename InputZ , typename InputW , typename InputV , typename OutputX , typename OutputY , typename OutputZ , typename Reducer >
__global__ void reduceKernel ( InputX  X,
InputY  Y,
InputZ  Z,
InputW  W,
InputV  V,
Reducer  r,
ReduceType *  partial,
ReduceType *  complete,
OutputX  XX,
OutputY  YY,
OutputZ  ZZ,
int  length 
)

Generic reduction kernel with up to four loads and three saves.

Definition at line 103 of file reduce_core.h.

template<typename doubleN , typename ReduceType , typename ReduceSimpleType , typename FloatN , int M, int writeX, int writeY, int writeZ, typename InputX , typename InputY , typename InputZ , typename InputW , typename InputV , typename Reducer , typename OutputX , typename OutputY , typename OutputZ >
doubleN reduceLaunch ( InputX  X,
InputY  Y,
InputZ  Z,
InputW  W,
InputV  V,
Reducer  r,
OutputX  XX,
OutputY  YY,
OutputZ  ZZ,
int  length,
const TuneParam tp,
const cudaStream_t &  stream 
)

Generic reduction kernel launcher

Definition at line 225 of file reduce_core.h.

__host__ __device__ void zero ( double3 &  x)

Definition at line 3 of file reduce_core.h.

__host__ __device__ void zero ( double2 &  x)

Definition at line 2 of file reduce_core.h.

__host__ __device__ void zero ( doublesingle2 x)

Definition at line 51 of file reduce_core.h.

__host__ __device__ void zero ( double &  x)

Definition at line 1 of file reduce_core.h.

__host__ __device__ void zero ( doublesingle3 x)

Definition at line 52 of file reduce_core.h.

__host__ __device__ void zero ( doublesingle x)

Definition at line 50 of file reduce_core.h.


Variable Documentation

__device__ unsigned int count = 0

Definition at line 93 of file reduce_core.h.

__shared__ bool isLastBlockDone

Definition at line 94 of file reduce_core.h.

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines