QUDA
v0.7.0
A library for QCD on GPUs
|
Classes | |
struct | BlasArg |
class | BlasCuda |
struct | ReduceArg |
class | ReduceCuda |
Functions | |
template<typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Functor > | |
__global__ void | blasKernel (BlasArg< SpinorX, SpinorY, SpinorZ, SpinorW, Functor > arg) |
template<template< typename Float, typename FloatN > class Functor, int writeX, int writeY, int writeZ, int writeW> | |
void | blasCuda (const double2 &a, const double2 &b, const double2 &c, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w) |
__host__ __device__ void | zero (double &x) |
__host__ __device__ void | zero (double2 &x) |
__host__ __device__ void | zero (double3 &x) |
__device__ void | copytoshared (double *s, const int i, const double x, const int block) |
__device__ void | copytoshared (double *s, const int i, const double2 x, const int block) |
__device__ void | copytoshared (double *s, const int i, const double3 x, const int block) |
__device__ void | copytoshared (volatile double *s, const int i, const double x, const int block) |
__device__ void | copytoshared (volatile double *s, const int i, const double2 x, const int block) |
__device__ void | copytoshared (volatile double *s, const int i, const double3 x, const int block) |
__device__ void | copyfromshared (double &x, const double *s, const int i, const int block) |
__device__ void | copyfromshared (double2 &x, const double *s, const int i, const int block) |
__device__ void | copyfromshared (double3 &x, const double *s, const int i, const int block) |
template<typename ReduceType , typename ReduceSimpleType > | |
__device__ void | add (ReduceType &sum, ReduceSimpleType *s, const int i, const int block) |
template<> | |
__device__ void | add< double, double > (double &sum, double *s, const int i, const int block) |
template<> | |
__device__ void | add< double2, double > (double2 &sum, double *s, const int i, const int block) |
template<> | |
__device__ void | add< double3, double > (double3 &sum, double *s, const int i, const int block) |
template<typename ReduceType , typename ReduceSimpleType > | |
__device__ void | add (ReduceSimpleType *s, const int i, const int j, const int block) |
template<typename ReduceType , typename ReduceSimpleType > | |
__device__ void | add (volatile ReduceSimpleType *s, const int i, const int j, const int block) |
template<> | |
__device__ void | add< double, double > (double *s, const int i, const int j, const int block) |
template<> | |
__device__ void | add< double, double > (volatile double *s, const int i, const int j, const int block) |
template<> | |
__device__ void | add< double2, double > (double *s, const int i, const int j, const int block) |
template<> | |
__device__ void | add< double2, double > (volatile double *s, const int i, const int j, const int block) |
template<> | |
__device__ void | add< double3, double > (double *s, const int i, const int j, const int block) |
template<> | |
__device__ void | add< double3, double > (volatile double *s, const int i, const int j, const int block) |
__host__ __device__ void | zero (doublesingle &x) |
__host__ __device__ void | zero (doublesingle2 &x) |
__host__ __device__ void | zero (doublesingle3 &x) |
__device__ void | copytoshared (doublesingle *s, const int i, const doublesingle x, const int block) |
__device__ void | copytoshared (doublesingle *s, const int i, const doublesingle2 x, const int block) |
__device__ void | copytoshared (doublesingle *s, const int i, const doublesingle3 x, const int block) |
__device__ void | copytoshared (volatile doublesingle *s, const int i, const doublesingle x, const int block) |
__device__ void | copytoshared (volatile doublesingle *s, const int i, const doublesingle2 x, const int block) |
__device__ void | copytoshared (volatile doublesingle *s, const int i, const doublesingle3 x, const int block) |
__device__ void | copyfromshared (doublesingle &x, const doublesingle *s, const int i, const int block) |
__device__ void | copyfromshared (doublesingle2 &x, const doublesingle *s, const int i, const int block) |
__device__ void | copyfromshared (doublesingle3 &x, const doublesingle *s, const int i, const int block) |
template<> | |
__device__ void | add< doublesingle, doublesingle > (doublesingle &sum, doublesingle *s, const int i, const int block) |
template<> | |
__device__ void | add< doublesingle2, doublesingle > (doublesingle2 &sum, doublesingle *s, const int i, const int block) |
template<> | |
__device__ void | add< doublesingle3, doublesingle > (doublesingle3 &sum, doublesingle *s, const int i, const int block) |
template<> | |
__device__ void | add< doublesingle, doublesingle > (doublesingle *s, const int i, const int j, const int block) |
template<> | |
__device__ void | add< doublesingle, doublesingle > (volatile doublesingle *s, const int i, const int j, const int block) |
template<> | |
__device__ void | add< doublesingle2, doublesingle > (doublesingle *s, const int i, const int j, const int block) |
template<> | |
__device__ void | add< doublesingle2, doublesingle > (volatile doublesingle *s, const int i, const int j, const int block) |
template<> | |
__device__ void | add< doublesingle3, doublesingle > (doublesingle *s, const int i, const int j, const int block) |
template<> | |
__device__ void | add< doublesingle3, doublesingle > (volatile doublesingle *s, const int i, const int j, const int block) |
template<int block_size, typename ReduceType , typename ReduceSimpleType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer > | |
__global__ void | reduceKernel (ReduceArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > arg) |
template<typename doubleN , typename ReduceType , typename ReduceSimpleType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer > | |
doubleN | reduceLaunch (ReduceArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > &arg, const TuneParam &tp, const cudaStream_t &stream) |
template<typename doubleN , typename ReduceType , typename ReduceSimpleType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll> | |
doubleN | reduceCuda (const double2 &a, const double2 &b, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w, cudaColorSpinorField &v) |
Variables | |
__device__ unsigned int | count = 0 |
__shared__ bool | isLastBlockDone |
__device__ void mixed::add | ( | ReduceType & | sum, |
ReduceSimpleType * | s, | ||
const int | i, | ||
const int | block | ||
) |
Definition at line 23 of file reduce_mixed_core.h.
__device__ void mixed::add | ( | ReduceSimpleType * | s, |
const int | i, | ||
const int | j, | ||
const int | block | ||
) |
Definition at line 32 of file reduce_mixed_core.h.
__device__ void mixed::add | ( | volatile ReduceSimpleType * | s, |
const int | i, | ||
const int | j, | ||
const int | block | ||
) |
Definition at line 34 of file reduce_mixed_core.h.
__device__ void mixed::add< double, double > | ( | double & | sum, |
double * | s, | ||
const int | i, | ||
const int | block | ||
) |
Definition at line 24 of file reduce_mixed_core.h.
__device__ void mixed::add< double, double > | ( | double * | s, |
const int | i, | ||
const int | j, | ||
const int | block | ||
) |
Definition at line 36 of file reduce_mixed_core.h.
__device__ void mixed::add< double, double > | ( | volatile double * | s, |
const int | i, | ||
const int | j, | ||
const int | block | ||
) |
Definition at line 38 of file reduce_mixed_core.h.
__device__ void mixed::add< double2, double > | ( | double2 & | sum, |
double * | s, | ||
const int | i, | ||
const int | block | ||
) |
Definition at line 26 of file reduce_mixed_core.h.
__device__ void mixed::add< double2, double > | ( | double * | s, |
const int | i, | ||
const int | j, | ||
const int | block | ||
) |
Definition at line 41 of file reduce_mixed_core.h.
__device__ void mixed::add< double2, double > | ( | volatile double * | s, |
const int | i, | ||
const int | j, | ||
const int | block | ||
) |
Definition at line 43 of file reduce_mixed_core.h.
__device__ void mixed::add< double3, double > | ( | double3 & | sum, |
double * | s, | ||
const int | i, | ||
const int | block | ||
) |
Definition at line 28 of file reduce_mixed_core.h.
__device__ void mixed::add< double3, double > | ( | double * | s, |
const int | i, | ||
const int | j, | ||
const int | block | ||
) |
Definition at line 46 of file reduce_mixed_core.h.
__device__ void mixed::add< double3, double > | ( | volatile double * | s, |
const int | i, | ||
const int | j, | ||
const int | block | ||
) |
Definition at line 48 of file reduce_mixed_core.h.
__device__ void mixed::add< doublesingle, doublesingle > | ( | doublesingle & | sum, |
doublesingle * | s, | ||
const int | i, | ||
const int | block | ||
) |
Definition at line 72 of file reduce_mixed_core.h.
__device__ void mixed::add< doublesingle, doublesingle > | ( | doublesingle * | s, |
const int | i, | ||
const int | j, | ||
const int | block | ||
) |
Definition at line 79 of file reduce_mixed_core.h.
__device__ void mixed::add< doublesingle, doublesingle > | ( | volatile doublesingle * | s, |
const int | i, | ||
const int | j, | ||
const int | block | ||
) |
Definition at line 81 of file reduce_mixed_core.h.
__device__ void mixed::add< doublesingle2, doublesingle > | ( | doublesingle2 & | sum, |
doublesingle * | s, | ||
const int | i, | ||
const int | block | ||
) |
Definition at line 74 of file reduce_mixed_core.h.
__device__ void mixed::add< doublesingle2, doublesingle > | ( | doublesingle * | s, |
const int | i, | ||
const int | j, | ||
const int | block | ||
) |
Definition at line 84 of file reduce_mixed_core.h.
__device__ void mixed::add< doublesingle2, doublesingle > | ( | volatile doublesingle * | s, |
const int | i, | ||
const int | j, | ||
const int | block | ||
) |
Definition at line 86 of file reduce_mixed_core.h.
__device__ void mixed::add< doublesingle3, doublesingle > | ( | doublesingle3 & | sum, |
doublesingle * | s, | ||
const int | i, | ||
const int | block | ||
) |
Definition at line 76 of file reduce_mixed_core.h.
__device__ void mixed::add< doublesingle3, doublesingle > | ( | doublesingle * | s, |
const int | i, | ||
const int | j, | ||
const int | block | ||
) |
Definition at line 89 of file reduce_mixed_core.h.
__device__ void mixed::add< doublesingle3, doublesingle > | ( | volatile doublesingle * | s, |
const int | i, | ||
const int | j, | ||
const int | block | ||
) |
Definition at line 91 of file reduce_mixed_core.h.
void mixed::blasCuda | ( | const double2 & | a, |
const double2 & | b, | ||
const double2 & | c, | ||
cudaColorSpinorField & | x, | ||
cudaColorSpinorField & | y, | ||
cudaColorSpinorField & | z, | ||
cudaColorSpinorField & | w | ||
) |
Driver for generic blas routine with four loads and two store.
Definition at line 117 of file blas_mixed_core.h.
__global__ void mixed::blasKernel | ( | BlasArg< SpinorX, SpinorY, SpinorZ, SpinorW, Functor > | arg | ) |
Generic blas kernel with four loads and up to four stores.
Definition at line 24 of file blas_mixed_core.h.
__device__ void mixed::copyfromshared | ( | double & | x, |
const double * | s, | ||
const int | i, | ||
const int | block | ||
) |
Definition at line 16 of file reduce_mixed_core.h.
__device__ void mixed::copyfromshared | ( | double2 & | x, |
const double * | s, | ||
const int | i, | ||
const int | block | ||
) |
Definition at line 17 of file reduce_mixed_core.h.
__device__ void mixed::copyfromshared | ( | double3 & | x, |
const double * | s, | ||
const int | i, | ||
const int | block | ||
) |
Definition at line 19 of file reduce_mixed_core.h.
__device__ void mixed::copyfromshared | ( | doublesingle & | x, |
const doublesingle * | s, | ||
const int | i, | ||
const int | block | ||
) |
Definition at line 66 of file reduce_mixed_core.h.
__device__ void mixed::copyfromshared | ( | doublesingle2 & | x, |
const doublesingle * | s, | ||
const int | i, | ||
const int | block | ||
) |
Definition at line 67 of file reduce_mixed_core.h.
__device__ void mixed::copyfromshared | ( | doublesingle3 & | x, |
const doublesingle * | s, | ||
const int | i, | ||
const int | block | ||
) |
Definition at line 69 of file reduce_mixed_core.h.
__device__ void mixed::copytoshared | ( | double * | s, |
const int | i, | ||
const double | x, | ||
const int | block | ||
) |
Definition at line 6 of file reduce_mixed_core.h.
__device__ void mixed::copytoshared | ( | double * | s, |
const int | i, | ||
const double2 | x, | ||
const int | block | ||
) |
Definition at line 7 of file reduce_mixed_core.h.
__device__ void mixed::copytoshared | ( | double * | s, |
const int | i, | ||
const double3 | x, | ||
const int | block | ||
) |
Definition at line 9 of file reduce_mixed_core.h.
__device__ void mixed::copytoshared | ( | volatile double * | s, |
const int | i, | ||
const double | x, | ||
const int | block | ||
) |
Definition at line 11 of file reduce_mixed_core.h.
__device__ void mixed::copytoshared | ( | volatile double * | s, |
const int | i, | ||
const double2 | x, | ||
const int | block | ||
) |
Definition at line 12 of file reduce_mixed_core.h.
__device__ void mixed::copytoshared | ( | volatile double * | s, |
const int | i, | ||
const double3 | x, | ||
const int | block | ||
) |
Definition at line 14 of file reduce_mixed_core.h.
__device__ void mixed::copytoshared | ( | doublesingle * | s, |
const int | i, | ||
const doublesingle | x, | ||
const int | block | ||
) |
Definition at line 55 of file reduce_mixed_core.h.
__device__ void mixed::copytoshared | ( | doublesingle * | s, |
const int | i, | ||
const doublesingle2 | x, | ||
const int | block | ||
) |
Definition at line 56 of file reduce_mixed_core.h.
__device__ void mixed::copytoshared | ( | doublesingle * | s, |
const int | i, | ||
const doublesingle3 | x, | ||
const int | block | ||
) |
Definition at line 58 of file reduce_mixed_core.h.
__device__ void mixed::copytoshared | ( | volatile doublesingle * | s, |
const int | i, | ||
const doublesingle | x, | ||
const int | block | ||
) |
Definition at line 60 of file reduce_mixed_core.h.
__device__ void mixed::copytoshared | ( | volatile doublesingle * | s, |
const int | i, | ||
const doublesingle2 | x, | ||
const int | block | ||
) |
Definition at line 61 of file reduce_mixed_core.h.
__device__ void mixed::copytoshared | ( | volatile doublesingle * | s, |
const int | i, | ||
const doublesingle3 | x, | ||
const int | block | ||
) |
Definition at line 63 of file reduce_mixed_core.h.
doubleN mixed::reduceCuda | ( | const double2 & | a, |
const double2 & | b, | ||
cudaColorSpinorField & | x, | ||
cudaColorSpinorField & | y, | ||
cudaColorSpinorField & | z, | ||
cudaColorSpinorField & | w, | ||
cudaColorSpinorField & | v | ||
) |
Driver for generic reduction routine with two loads.
ReduceType | |
siteUnroll | - if this is true, then one site corresponds to exactly one thread |
Definition at line 378 of file reduce_mixed_core.h.
__global__ void mixed::reduceKernel | ( | ReduceArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > | arg | ) |
Generic reduction kernel with up to four loads and three saves.
Definition at line 123 of file reduce_mixed_core.h.
doubleN mixed::reduceLaunch | ( | ReduceArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > & | arg, |
const TuneParam & | tp, | ||
const cudaStream_t & | stream | ||
) |
Generic reduction kernel launcher
Definition at line 253 of file reduce_mixed_core.h.
__host__ __device__ void mixed::zero | ( | double & | x | ) |
Definition at line 3 of file reduce_mixed_core.h.
__host__ __device__ void mixed::zero | ( | double2 & | x | ) |
Definition at line 4 of file reduce_mixed_core.h.
__host__ __device__ void mixed::zero | ( | double3 & | x | ) |
Definition at line 5 of file reduce_mixed_core.h.
__host__ __device__ void mixed::zero | ( | doublesingle & | x | ) |
Definition at line 52 of file reduce_mixed_core.h.
__host__ __device__ void mixed::zero | ( | doublesingle2 & | x | ) |
Definition at line 53 of file reduce_mixed_core.h.
__host__ __device__ void mixed::zero | ( | doublesingle3 & | x | ) |
Definition at line 54 of file reduce_mixed_core.h.
__device__ unsigned int mixed::count = 0 |
Definition at line 97 of file reduce_mixed_core.h.
__shared__ bool mixed::isLastBlockDone |
Definition at line 98 of file reduce_mixed_core.h.