|
QUDA
v0.7.0
A library for QCD on GPUs
|
Classes | |
| struct | BlasArg |
| class | BlasCuda |
| struct | ReduceArg |
| class | ReduceCuda |
Functions | |
| template<typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Functor > | |
| __global__ void | blasKernel (BlasArg< SpinorX, SpinorY, SpinorZ, SpinorW, Functor > arg) |
| template<template< typename Float, typename FloatN > class Functor, int writeX, int writeY, int writeZ, int writeW> | |
| void | blasCuda (const double2 &a, const double2 &b, const double2 &c, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w) |
| __host__ __device__ void | zero (double &x) |
| __host__ __device__ void | zero (double2 &x) |
| __host__ __device__ void | zero (double3 &x) |
| __device__ void | copytoshared (double *s, const int i, const double x, const int block) |
| __device__ void | copytoshared (double *s, const int i, const double2 x, const int block) |
| __device__ void | copytoshared (double *s, const int i, const double3 x, const int block) |
| __device__ void | copytoshared (volatile double *s, const int i, const double x, const int block) |
| __device__ void | copytoshared (volatile double *s, const int i, const double2 x, const int block) |
| __device__ void | copytoshared (volatile double *s, const int i, const double3 x, const int block) |
| __device__ void | copyfromshared (double &x, const double *s, const int i, const int block) |
| __device__ void | copyfromshared (double2 &x, const double *s, const int i, const int block) |
| __device__ void | copyfromshared (double3 &x, const double *s, const int i, const int block) |
| template<typename ReduceType , typename ReduceSimpleType > | |
| __device__ void | add (ReduceType &sum, ReduceSimpleType *s, const int i, const int block) |
| template<> | |
| __device__ void | add< double, double > (double &sum, double *s, const int i, const int block) |
| template<> | |
| __device__ void | add< double2, double > (double2 &sum, double *s, const int i, const int block) |
| template<> | |
| __device__ void | add< double3, double > (double3 &sum, double *s, const int i, const int block) |
| template<typename ReduceType , typename ReduceSimpleType > | |
| __device__ void | add (ReduceSimpleType *s, const int i, const int j, const int block) |
| template<typename ReduceType , typename ReduceSimpleType > | |
| __device__ void | add (volatile ReduceSimpleType *s, const int i, const int j, const int block) |
| template<> | |
| __device__ void | add< double, double > (double *s, const int i, const int j, const int block) |
| template<> | |
| __device__ void | add< double, double > (volatile double *s, const int i, const int j, const int block) |
| template<> | |
| __device__ void | add< double2, double > (double *s, const int i, const int j, const int block) |
| template<> | |
| __device__ void | add< double2, double > (volatile double *s, const int i, const int j, const int block) |
| template<> | |
| __device__ void | add< double3, double > (double *s, const int i, const int j, const int block) |
| template<> | |
| __device__ void | add< double3, double > (volatile double *s, const int i, const int j, const int block) |
| __host__ __device__ void | zero (doublesingle &x) |
| __host__ __device__ void | zero (doublesingle2 &x) |
| __host__ __device__ void | zero (doublesingle3 &x) |
| __device__ void | copytoshared (doublesingle *s, const int i, const doublesingle x, const int block) |
| __device__ void | copytoshared (doublesingle *s, const int i, const doublesingle2 x, const int block) |
| __device__ void | copytoshared (doublesingle *s, const int i, const doublesingle3 x, const int block) |
| __device__ void | copytoshared (volatile doublesingle *s, const int i, const doublesingle x, const int block) |
| __device__ void | copytoshared (volatile doublesingle *s, const int i, const doublesingle2 x, const int block) |
| __device__ void | copytoshared (volatile doublesingle *s, const int i, const doublesingle3 x, const int block) |
| __device__ void | copyfromshared (doublesingle &x, const doublesingle *s, const int i, const int block) |
| __device__ void | copyfromshared (doublesingle2 &x, const doublesingle *s, const int i, const int block) |
| __device__ void | copyfromshared (doublesingle3 &x, const doublesingle *s, const int i, const int block) |
| template<> | |
| __device__ void | add< doublesingle, doublesingle > (doublesingle &sum, doublesingle *s, const int i, const int block) |
| template<> | |
| __device__ void | add< doublesingle2, doublesingle > (doublesingle2 &sum, doublesingle *s, const int i, const int block) |
| template<> | |
| __device__ void | add< doublesingle3, doublesingle > (doublesingle3 &sum, doublesingle *s, const int i, const int block) |
| template<> | |
| __device__ void | add< doublesingle, doublesingle > (doublesingle *s, const int i, const int j, const int block) |
| template<> | |
| __device__ void | add< doublesingle, doublesingle > (volatile doublesingle *s, const int i, const int j, const int block) |
| template<> | |
| __device__ void | add< doublesingle2, doublesingle > (doublesingle *s, const int i, const int j, const int block) |
| template<> | |
| __device__ void | add< doublesingle2, doublesingle > (volatile doublesingle *s, const int i, const int j, const int block) |
| template<> | |
| __device__ void | add< doublesingle3, doublesingle > (doublesingle *s, const int i, const int j, const int block) |
| template<> | |
| __device__ void | add< doublesingle3, doublesingle > (volatile doublesingle *s, const int i, const int j, const int block) |
| template<int block_size, typename ReduceType , typename ReduceSimpleType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer > | |
| __global__ void | reduceKernel (ReduceArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > arg) |
| template<typename doubleN , typename ReduceType , typename ReduceSimpleType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer > | |
| doubleN | reduceLaunch (ReduceArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > &arg, const TuneParam &tp, const cudaStream_t &stream) |
| template<typename doubleN , typename ReduceType , typename ReduceSimpleType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll> | |
| doubleN | reduceCuda (const double2 &a, const double2 &b, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w, cudaColorSpinorField &v) |
Variables | |
| __device__ unsigned int | count = 0 |
| __shared__ bool | isLastBlockDone |
| __device__ void mixed::add | ( | ReduceType & | sum, |
| ReduceSimpleType * | s, | ||
| const int | i, | ||
| const int | block | ||
| ) |
Definition at line 23 of file reduce_mixed_core.h.
| __device__ void mixed::add | ( | ReduceSimpleType * | s, |
| const int | i, | ||
| const int | j, | ||
| const int | block | ||
| ) |
Definition at line 32 of file reduce_mixed_core.h.
| __device__ void mixed::add | ( | volatile ReduceSimpleType * | s, |
| const int | i, | ||
| const int | j, | ||
| const int | block | ||
| ) |
Definition at line 34 of file reduce_mixed_core.h.
| __device__ void mixed::add< double, double > | ( | double & | sum, |
| double * | s, | ||
| const int | i, | ||
| const int | block | ||
| ) |
Definition at line 24 of file reduce_mixed_core.h.
| __device__ void mixed::add< double, double > | ( | double * | s, |
| const int | i, | ||
| const int | j, | ||
| const int | block | ||
| ) |
Definition at line 36 of file reduce_mixed_core.h.
| __device__ void mixed::add< double, double > | ( | volatile double * | s, |
| const int | i, | ||
| const int | j, | ||
| const int | block | ||
| ) |
Definition at line 38 of file reduce_mixed_core.h.
| __device__ void mixed::add< double2, double > | ( | double2 & | sum, |
| double * | s, | ||
| const int | i, | ||
| const int | block | ||
| ) |
Definition at line 26 of file reduce_mixed_core.h.
| __device__ void mixed::add< double2, double > | ( | double * | s, |
| const int | i, | ||
| const int | j, | ||
| const int | block | ||
| ) |
Definition at line 41 of file reduce_mixed_core.h.
| __device__ void mixed::add< double2, double > | ( | volatile double * | s, |
| const int | i, | ||
| const int | j, | ||
| const int | block | ||
| ) |
Definition at line 43 of file reduce_mixed_core.h.
| __device__ void mixed::add< double3, double > | ( | double3 & | sum, |
| double * | s, | ||
| const int | i, | ||
| const int | block | ||
| ) |
Definition at line 28 of file reduce_mixed_core.h.
| __device__ void mixed::add< double3, double > | ( | double * | s, |
| const int | i, | ||
| const int | j, | ||
| const int | block | ||
| ) |
Definition at line 46 of file reduce_mixed_core.h.
| __device__ void mixed::add< double3, double > | ( | volatile double * | s, |
| const int | i, | ||
| const int | j, | ||
| const int | block | ||
| ) |
Definition at line 48 of file reduce_mixed_core.h.
| __device__ void mixed::add< doublesingle, doublesingle > | ( | doublesingle & | sum, |
| doublesingle * | s, | ||
| const int | i, | ||
| const int | block | ||
| ) |
Definition at line 72 of file reduce_mixed_core.h.
| __device__ void mixed::add< doublesingle, doublesingle > | ( | doublesingle * | s, |
| const int | i, | ||
| const int | j, | ||
| const int | block | ||
| ) |
Definition at line 79 of file reduce_mixed_core.h.
| __device__ void mixed::add< doublesingle, doublesingle > | ( | volatile doublesingle * | s, |
| const int | i, | ||
| const int | j, | ||
| const int | block | ||
| ) |
Definition at line 81 of file reduce_mixed_core.h.
| __device__ void mixed::add< doublesingle2, doublesingle > | ( | doublesingle2 & | sum, |
| doublesingle * | s, | ||
| const int | i, | ||
| const int | block | ||
| ) |
Definition at line 74 of file reduce_mixed_core.h.
| __device__ void mixed::add< doublesingle2, doublesingle > | ( | doublesingle * | s, |
| const int | i, | ||
| const int | j, | ||
| const int | block | ||
| ) |
Definition at line 84 of file reduce_mixed_core.h.
| __device__ void mixed::add< doublesingle2, doublesingle > | ( | volatile doublesingle * | s, |
| const int | i, | ||
| const int | j, | ||
| const int | block | ||
| ) |
Definition at line 86 of file reduce_mixed_core.h.
| __device__ void mixed::add< doublesingle3, doublesingle > | ( | doublesingle3 & | sum, |
| doublesingle * | s, | ||
| const int | i, | ||
| const int | block | ||
| ) |
Definition at line 76 of file reduce_mixed_core.h.
| __device__ void mixed::add< doublesingle3, doublesingle > | ( | doublesingle * | s, |
| const int | i, | ||
| const int | j, | ||
| const int | block | ||
| ) |
Definition at line 89 of file reduce_mixed_core.h.
| __device__ void mixed::add< doublesingle3, doublesingle > | ( | volatile doublesingle * | s, |
| const int | i, | ||
| const int | j, | ||
| const int | block | ||
| ) |
Definition at line 91 of file reduce_mixed_core.h.
| void mixed::blasCuda | ( | const double2 & | a, |
| const double2 & | b, | ||
| const double2 & | c, | ||
| cudaColorSpinorField & | x, | ||
| cudaColorSpinorField & | y, | ||
| cudaColorSpinorField & | z, | ||
| cudaColorSpinorField & | w | ||
| ) |
Driver for generic blas routine with four loads and two store.
Definition at line 117 of file blas_mixed_core.h.
| __global__ void mixed::blasKernel | ( | BlasArg< SpinorX, SpinorY, SpinorZ, SpinorW, Functor > | arg | ) |
Generic blas kernel with four loads and up to four stores.
Definition at line 24 of file blas_mixed_core.h.
| __device__ void mixed::copyfromshared | ( | double & | x, |
| const double * | s, | ||
| const int | i, | ||
| const int | block | ||
| ) |
Definition at line 16 of file reduce_mixed_core.h.
| __device__ void mixed::copyfromshared | ( | double2 & | x, |
| const double * | s, | ||
| const int | i, | ||
| const int | block | ||
| ) |
Definition at line 17 of file reduce_mixed_core.h.
| __device__ void mixed::copyfromshared | ( | double3 & | x, |
| const double * | s, | ||
| const int | i, | ||
| const int | block | ||
| ) |
Definition at line 19 of file reduce_mixed_core.h.
| __device__ void mixed::copyfromshared | ( | doublesingle & | x, |
| const doublesingle * | s, | ||
| const int | i, | ||
| const int | block | ||
| ) |
Definition at line 66 of file reduce_mixed_core.h.
| __device__ void mixed::copyfromshared | ( | doublesingle2 & | x, |
| const doublesingle * | s, | ||
| const int | i, | ||
| const int | block | ||
| ) |
Definition at line 67 of file reduce_mixed_core.h.
| __device__ void mixed::copyfromshared | ( | doublesingle3 & | x, |
| const doublesingle * | s, | ||
| const int | i, | ||
| const int | block | ||
| ) |
Definition at line 69 of file reduce_mixed_core.h.
| __device__ void mixed::copytoshared | ( | double * | s, |
| const int | i, | ||
| const double | x, | ||
| const int | block | ||
| ) |
Definition at line 6 of file reduce_mixed_core.h.
| __device__ void mixed::copytoshared | ( | double * | s, |
| const int | i, | ||
| const double2 | x, | ||
| const int | block | ||
| ) |
Definition at line 7 of file reduce_mixed_core.h.
| __device__ void mixed::copytoshared | ( | double * | s, |
| const int | i, | ||
| const double3 | x, | ||
| const int | block | ||
| ) |
Definition at line 9 of file reduce_mixed_core.h.
| __device__ void mixed::copytoshared | ( | volatile double * | s, |
| const int | i, | ||
| const double | x, | ||
| const int | block | ||
| ) |
Definition at line 11 of file reduce_mixed_core.h.
| __device__ void mixed::copytoshared | ( | volatile double * | s, |
| const int | i, | ||
| const double2 | x, | ||
| const int | block | ||
| ) |
Definition at line 12 of file reduce_mixed_core.h.
| __device__ void mixed::copytoshared | ( | volatile double * | s, |
| const int | i, | ||
| const double3 | x, | ||
| const int | block | ||
| ) |
Definition at line 14 of file reduce_mixed_core.h.
| __device__ void mixed::copytoshared | ( | doublesingle * | s, |
| const int | i, | ||
| const doublesingle | x, | ||
| const int | block | ||
| ) |
Definition at line 55 of file reduce_mixed_core.h.
| __device__ void mixed::copytoshared | ( | doublesingle * | s, |
| const int | i, | ||
| const doublesingle2 | x, | ||
| const int | block | ||
| ) |
Definition at line 56 of file reduce_mixed_core.h.
| __device__ void mixed::copytoshared | ( | doublesingle * | s, |
| const int | i, | ||
| const doublesingle3 | x, | ||
| const int | block | ||
| ) |
Definition at line 58 of file reduce_mixed_core.h.
| __device__ void mixed::copytoshared | ( | volatile doublesingle * | s, |
| const int | i, | ||
| const doublesingle | x, | ||
| const int | block | ||
| ) |
Definition at line 60 of file reduce_mixed_core.h.
| __device__ void mixed::copytoshared | ( | volatile doublesingle * | s, |
| const int | i, | ||
| const doublesingle2 | x, | ||
| const int | block | ||
| ) |
Definition at line 61 of file reduce_mixed_core.h.
| __device__ void mixed::copytoshared | ( | volatile doublesingle * | s, |
| const int | i, | ||
| const doublesingle3 | x, | ||
| const int | block | ||
| ) |
Definition at line 63 of file reduce_mixed_core.h.
| doubleN mixed::reduceCuda | ( | const double2 & | a, |
| const double2 & | b, | ||
| cudaColorSpinorField & | x, | ||
| cudaColorSpinorField & | y, | ||
| cudaColorSpinorField & | z, | ||
| cudaColorSpinorField & | w, | ||
| cudaColorSpinorField & | v | ||
| ) |
Driver for generic reduction routine with two loads.
| ReduceType | |
| siteUnroll | - if this is true, then one site corresponds to exactly one thread |
Definition at line 378 of file reduce_mixed_core.h.
| __global__ void mixed::reduceKernel | ( | ReduceArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > | arg | ) |
Generic reduction kernel with up to four loads and three saves.
Definition at line 123 of file reduce_mixed_core.h.
| doubleN mixed::reduceLaunch | ( | ReduceArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > & | arg, |
| const TuneParam & | tp, | ||
| const cudaStream_t & | stream | ||
| ) |
Generic reduction kernel launcher
Definition at line 253 of file reduce_mixed_core.h.
| __host__ __device__ void mixed::zero | ( | double & | x | ) |
Definition at line 3 of file reduce_mixed_core.h.
| __host__ __device__ void mixed::zero | ( | double2 & | x | ) |
Definition at line 4 of file reduce_mixed_core.h.
| __host__ __device__ void mixed::zero | ( | double3 & | x | ) |
Definition at line 5 of file reduce_mixed_core.h.
| __host__ __device__ void mixed::zero | ( | doublesingle & | x | ) |
Definition at line 52 of file reduce_mixed_core.h.
| __host__ __device__ void mixed::zero | ( | doublesingle2 & | x | ) |
Definition at line 53 of file reduce_mixed_core.h.
| __host__ __device__ void mixed::zero | ( | doublesingle3 & | x | ) |
Definition at line 54 of file reduce_mixed_core.h.
| __device__ unsigned int mixed::count = 0 |
Definition at line 97 of file reduce_mixed_core.h.
| __shared__ bool mixed::isLastBlockDone |
Definition at line 98 of file reduce_mixed_core.h.
1.8.6