QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Classes | Functions | Variables
mixed Namespace Reference

Classes

struct  BlasArg
 
class  BlasCuda
 
struct  ReduceArg
 
class  ReduceCuda
 

Functions

template<typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Functor >
__global__ void blasKernel (BlasArg< SpinorX, SpinorY, SpinorZ, SpinorW, Functor > arg)
 
template<template< typename Float, typename FloatN > class Functor, int writeX, int writeY, int writeZ, int writeW>
void blasCuda (const double2 &a, const double2 &b, const double2 &c, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w)
 
__host__ __device__ void zero (double &x)
 
__host__ __device__ void zero (double2 &x)
 
__host__ __device__ void zero (double3 &x)
 
__device__ void copytoshared (double *s, const int i, const double x, const int block)
 
__device__ void copytoshared (double *s, const int i, const double2 x, const int block)
 
__device__ void copytoshared (double *s, const int i, const double3 x, const int block)
 
__device__ void copytoshared (volatile double *s, const int i, const double x, const int block)
 
__device__ void copytoshared (volatile double *s, const int i, const double2 x, const int block)
 
__device__ void copytoshared (volatile double *s, const int i, const double3 x, const int block)
 
__device__ void copyfromshared (double &x, const double *s, const int i, const int block)
 
__device__ void copyfromshared (double2 &x, const double *s, const int i, const int block)
 
__device__ void copyfromshared (double3 &x, const double *s, const int i, const int block)
 
template<typename ReduceType , typename ReduceSimpleType >
__device__ void add (ReduceType &sum, ReduceSimpleType *s, const int i, const int block)
 
template<>
__device__ void add< double, double > (double &sum, double *s, const int i, const int block)
 
template<>
__device__ void add< double2, double > (double2 &sum, double *s, const int i, const int block)
 
template<>
__device__ void add< double3, double > (double3 &sum, double *s, const int i, const int block)
 
template<typename ReduceType , typename ReduceSimpleType >
__device__ void add (ReduceSimpleType *s, const int i, const int j, const int block)
 
template<typename ReduceType , typename ReduceSimpleType >
__device__ void add (volatile ReduceSimpleType *s, const int i, const int j, const int block)
 
template<>
__device__ void add< double, double > (double *s, const int i, const int j, const int block)
 
template<>
__device__ void add< double, double > (volatile double *s, const int i, const int j, const int block)
 
template<>
__device__ void add< double2, double > (double *s, const int i, const int j, const int block)
 
template<>
__device__ void add< double2, double > (volatile double *s, const int i, const int j, const int block)
 
template<>
__device__ void add< double3, double > (double *s, const int i, const int j, const int block)
 
template<>
__device__ void add< double3, double > (volatile double *s, const int i, const int j, const int block)
 
__host__ __device__ void zero (doublesingle &x)
 
__host__ __device__ void zero (doublesingle2 &x)
 
__host__ __device__ void zero (doublesingle3 &x)
 
__device__ void copytoshared (doublesingle *s, const int i, const doublesingle x, const int block)
 
__device__ void copytoshared (doublesingle *s, const int i, const doublesingle2 x, const int block)
 
__device__ void copytoshared (doublesingle *s, const int i, const doublesingle3 x, const int block)
 
__device__ void copytoshared (volatile doublesingle *s, const int i, const doublesingle x, const int block)
 
__device__ void copytoshared (volatile doublesingle *s, const int i, const doublesingle2 x, const int block)
 
__device__ void copytoshared (volatile doublesingle *s, const int i, const doublesingle3 x, const int block)
 
__device__ void copyfromshared (doublesingle &x, const doublesingle *s, const int i, const int block)
 
__device__ void copyfromshared (doublesingle2 &x, const doublesingle *s, const int i, const int block)
 
__device__ void copyfromshared (doublesingle3 &x, const doublesingle *s, const int i, const int block)
 
template<>
__device__ void add< doublesingle, doublesingle > (doublesingle &sum, doublesingle *s, const int i, const int block)
 
template<>
__device__ void add< doublesingle2, doublesingle > (doublesingle2 &sum, doublesingle *s, const int i, const int block)
 
template<>
__device__ void add< doublesingle3, doublesingle > (doublesingle3 &sum, doublesingle *s, const int i, const int block)
 
template<>
__device__ void add< doublesingle, doublesingle > (doublesingle *s, const int i, const int j, const int block)
 
template<>
__device__ void add< doublesingle, doublesingle > (volatile doublesingle *s, const int i, const int j, const int block)
 
template<>
__device__ void add< doublesingle2, doublesingle > (doublesingle *s, const int i, const int j, const int block)
 
template<>
__device__ void add< doublesingle2, doublesingle > (volatile doublesingle *s, const int i, const int j, const int block)
 
template<>
__device__ void add< doublesingle3, doublesingle > (doublesingle *s, const int i, const int j, const int block)
 
template<>
__device__ void add< doublesingle3, doublesingle > (volatile doublesingle *s, const int i, const int j, const int block)
 
template<int block_size, typename ReduceType , typename ReduceSimpleType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >
__global__ void reduceKernel (ReduceArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > arg)
 
template<typename doubleN , typename ReduceType , typename ReduceSimpleType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >
doubleN reduceLaunch (ReduceArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > &arg, const TuneParam &tp, const cudaStream_t &stream)
 
template<typename doubleN , typename ReduceType , typename ReduceSimpleType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll>
doubleN reduceCuda (const double2 &a, const double2 &b, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w, cudaColorSpinorField &v)
 

Variables

__device__ unsigned int count = 0
 
__shared__ bool isLastBlockDone
 

Function Documentation

template<typename ReduceType , typename ReduceSimpleType >
__device__ void mixed::add ( ReduceType &  sum,
ReduceSimpleType *  s,
const int  i,
const int  block 
)

Definition at line 23 of file reduce_mixed_core.h.

template<typename ReduceType , typename ReduceSimpleType >
__device__ void mixed::add ( ReduceSimpleType *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 32 of file reduce_mixed_core.h.

template<typename ReduceType , typename ReduceSimpleType >
__device__ void mixed::add ( volatile ReduceSimpleType *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 34 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< double, double > ( double &  sum,
double *  s,
const int  i,
const int  block 
)

Definition at line 24 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< double, double > ( double *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 36 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< double, double > ( volatile double *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 38 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< double2, double > ( double2 &  sum,
double *  s,
const int  i,
const int  block 
)

Definition at line 26 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< double2, double > ( double *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 41 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< double2, double > ( volatile double *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 43 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< double3, double > ( double3 &  sum,
double *  s,
const int  i,
const int  block 
)

Definition at line 28 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< double3, double > ( double *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 46 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< double3, double > ( volatile double *  s,
const int  i,
const int  j,
const int  block 
)

Definition at line 48 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< doublesingle, doublesingle > ( doublesingle sum,
doublesingle s,
const int  i,
const int  block 
)

Definition at line 72 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< doublesingle, doublesingle > ( doublesingle s,
const int  i,
const int  j,
const int  block 
)

Definition at line 79 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< doublesingle, doublesingle > ( volatile doublesingle s,
const int  i,
const int  j,
const int  block 
)

Definition at line 81 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< doublesingle2, doublesingle > ( doublesingle2 sum,
doublesingle s,
const int  i,
const int  block 
)

Definition at line 74 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< doublesingle2, doublesingle > ( doublesingle s,
const int  i,
const int  j,
const int  block 
)

Definition at line 84 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< doublesingle2, doublesingle > ( volatile doublesingle s,
const int  i,
const int  j,
const int  block 
)

Definition at line 86 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< doublesingle3, doublesingle > ( doublesingle3 sum,
doublesingle s,
const int  i,
const int  block 
)

Definition at line 76 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< doublesingle3, doublesingle > ( doublesingle s,
const int  i,
const int  j,
const int  block 
)

Definition at line 89 of file reduce_mixed_core.h.

template<>
__device__ void mixed::add< doublesingle3, doublesingle > ( volatile doublesingle s,
const int  i,
const int  j,
const int  block 
)

Definition at line 91 of file reduce_mixed_core.h.

template<template< typename Float, typename FloatN > class Functor, int writeX, int writeY, int writeZ, int writeW>
void mixed::blasCuda ( const double2 &  a,
const double2 &  b,
const double2 &  c,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z,
cudaColorSpinorField &  w 
)

Driver for generic blas routine with four loads and two store.

Definition at line 117 of file blas_mixed_core.h.

template<typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename Functor >
__global__ void mixed::blasKernel ( BlasArg< SpinorX, SpinorY, SpinorZ, SpinorW, Functor >  arg)

Generic blas kernel with four loads and up to four stores.

Definition at line 24 of file blas_mixed_core.h.

__device__ void mixed::copyfromshared ( double &  x,
const double *  s,
const int  i,
const int  block 
)

Definition at line 16 of file reduce_mixed_core.h.

__device__ void mixed::copyfromshared ( double2 &  x,
const double *  s,
const int  i,
const int  block 
)

Definition at line 17 of file reduce_mixed_core.h.

__device__ void mixed::copyfromshared ( double3 &  x,
const double *  s,
const int  i,
const int  block 
)

Definition at line 19 of file reduce_mixed_core.h.

__device__ void mixed::copyfromshared ( doublesingle x,
const doublesingle s,
const int  i,
const int  block 
)

Definition at line 66 of file reduce_mixed_core.h.

__device__ void mixed::copyfromshared ( doublesingle2 x,
const doublesingle s,
const int  i,
const int  block 
)

Definition at line 67 of file reduce_mixed_core.h.

__device__ void mixed::copyfromshared ( doublesingle3 x,
const doublesingle s,
const int  i,
const int  block 
)

Definition at line 69 of file reduce_mixed_core.h.

__device__ void mixed::copytoshared ( double *  s,
const int  i,
const double  x,
const int  block 
)

Definition at line 6 of file reduce_mixed_core.h.

__device__ void mixed::copytoshared ( double *  s,
const int  i,
const double2  x,
const int  block 
)

Definition at line 7 of file reduce_mixed_core.h.

__device__ void mixed::copytoshared ( double *  s,
const int  i,
const double3  x,
const int  block 
)

Definition at line 9 of file reduce_mixed_core.h.

__device__ void mixed::copytoshared ( volatile double *  s,
const int  i,
const double  x,
const int  block 
)

Definition at line 11 of file reduce_mixed_core.h.

__device__ void mixed::copytoshared ( volatile double *  s,
const int  i,
const double2  x,
const int  block 
)

Definition at line 12 of file reduce_mixed_core.h.

__device__ void mixed::copytoshared ( volatile double *  s,
const int  i,
const double3  x,
const int  block 
)

Definition at line 14 of file reduce_mixed_core.h.

__device__ void mixed::copytoshared ( doublesingle s,
const int  i,
const doublesingle  x,
const int  block 
)

Definition at line 55 of file reduce_mixed_core.h.

__device__ void mixed::copytoshared ( doublesingle s,
const int  i,
const doublesingle2  x,
const int  block 
)

Definition at line 56 of file reduce_mixed_core.h.

__device__ void mixed::copytoshared ( doublesingle s,
const int  i,
const doublesingle3  x,
const int  block 
)

Definition at line 58 of file reduce_mixed_core.h.

__device__ void mixed::copytoshared ( volatile doublesingle s,
const int  i,
const doublesingle  x,
const int  block 
)

Definition at line 60 of file reduce_mixed_core.h.

__device__ void mixed::copytoshared ( volatile doublesingle s,
const int  i,
const doublesingle2  x,
const int  block 
)

Definition at line 61 of file reduce_mixed_core.h.

__device__ void mixed::copytoshared ( volatile doublesingle s,
const int  i,
const doublesingle3  x,
const int  block 
)

Definition at line 63 of file reduce_mixed_core.h.

template<typename doubleN , typename ReduceType , typename ReduceSimpleType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll>
doubleN mixed::reduceCuda ( const double2 &  a,
const double2 &  b,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z,
cudaColorSpinorField &  w,
cudaColorSpinorField &  v 
)

Driver for generic reduction routine with two loads.

Parameters
ReduceType
siteUnroll- if this is true, then one site corresponds to exactly one thread

Definition at line 378 of file reduce_mixed_core.h.

template<int block_size, typename ReduceType , typename ReduceSimpleType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >
__global__ void mixed::reduceKernel ( ReduceArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >  arg)

Generic reduction kernel with up to four loads and three saves.

Definition at line 123 of file reduce_mixed_core.h.

template<typename doubleN , typename ReduceType , typename ReduceSimpleType , typename FloatN , int M, typename SpinorX , typename SpinorY , typename SpinorZ , typename SpinorW , typename SpinorV , typename Reducer >
doubleN mixed::reduceLaunch ( ReduceArg< ReduceType, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer > &  arg,
const TuneParam &  tp,
const cudaStream_t &  stream 
)

Generic reduction kernel launcher

Definition at line 253 of file reduce_mixed_core.h.

__host__ __device__ void mixed::zero ( double &  x)

Definition at line 3 of file reduce_mixed_core.h.

__host__ __device__ void mixed::zero ( double2 &  x)

Definition at line 4 of file reduce_mixed_core.h.

__host__ __device__ void mixed::zero ( double3 &  x)

Definition at line 5 of file reduce_mixed_core.h.

__host__ __device__ void mixed::zero ( doublesingle x)

Definition at line 52 of file reduce_mixed_core.h.

__host__ __device__ void mixed::zero ( doublesingle2 x)

Definition at line 53 of file reduce_mixed_core.h.

__host__ __device__ void mixed::zero ( doublesingle3 x)

Definition at line 54 of file reduce_mixed_core.h.

Variable Documentation

__device__ unsigned int mixed::count = 0

Definition at line 97 of file reduce_mixed_core.h.

__shared__ bool mixed::isLastBlockDone

Definition at line 98 of file reduce_mixed_core.h.