QUDA v0.4.0
A library for QCD on GPUs
|
Public Member Functions | |
caxpbypzYmbwcDotProductUYNormY (const Float2 &a, const Float2 &b) | |
__device__ void | operator() (ReduceType &sum, const FloatN &x, FloatN &y, FloatN &z, const FloatN &w, const FloatN &v) |
Static Public Member Functions | |
static int | streams () |
static int | flops () |
total number of input and output streams | |
Public Attributes | |
Float2 | a |
Float2 | b |
This convoluted kernel does the following: z += a*x + b*y, y -= b*w, norm = (y,y), dot = (u, y)
Definition at line 1008 of file blas_quda.cu.
caxpbypzYmbwcDotProductUYNormY< ReduceType, Float2, FloatN >::caxpbypzYmbwcDotProductUYNormY | ( | const Float2 & | a, |
const Float2 & | b | ||
) | [inline] |
Definition at line 1011 of file blas_quda.cu.
static int caxpbypzYmbwcDotProductUYNormY< ReduceType, Float2, FloatN >::flops | ( | ) | [inline, static] |
total number of input and output streams
Definition at line 1014 of file blas_quda.cu.
__device__ void caxpbypzYmbwcDotProductUYNormY< ReduceType, Float2, FloatN >::operator() | ( | ReduceType & | sum, |
const FloatN & | x, | ||
FloatN & | y, | ||
FloatN & | z, | ||
const FloatN & | w, | ||
const FloatN & | v | ||
) | [inline] |
Definition at line 1012 of file blas_quda.cu.
static int caxpbypzYmbwcDotProductUYNormY< ReduceType, Float2, FloatN >::streams | ( | ) | [inline, static] |
Definition at line 1013 of file blas_quda.cu.
Float2 caxpbypzYmbwcDotProductUYNormY< ReduceType, Float2, FloatN >::a |
Definition at line 1009 of file blas_quda.cu.
Float2 caxpbypzYmbwcDotProductUYNormY< ReduceType, Float2, FloatN >::b |
Definition at line 1010 of file blas_quda.cu.