|
QUDA v0.3.2
A library for QCD on GPUs
|
#include <stdlib.h>#include <stdio.h>#include <quda_internal.h>#include <blas_quda.h>#include <color_spinor_field.h>#include <cuComplex.h>#include <blas_param.h>#include "reduce_core.h"#include "reduce_complex_core.h"#include "reduce_triple_core.h"Go to the source code of this file.
Defines | |
| #define | REDUCE_MAX_BLOCKS 65536 |
| #define | REDUCE_DOUBLE 64 |
| #define | REDUCE_KAHAN 32 |
| #define | REDUCE_TYPE REDUCE_KAHAN |
| #define | QudaSumFloat float |
| #define | QudaSumComplex cuComplex |
| #define | QudaSumFloat3 float3 |
| #define | EMUSYNC |
| #define | READ_DOUBLE2_TEXTURE(x, i) fetch_double2(x##TexDouble2, i) |
| #define | READ_FLOAT2_TEXTURE(x, i) tex1Dfetch(x##TexSingle2, i) |
| #define | RECONSTRUCT_HALF_SPINOR(a, texHalf, texNorm, length) |
| #define | RECONSTRUCT_HALF_SPINOR_ST(a, texHalf, texNorm, length) |
| #define | READ_HALF_SPINOR_TEX(a, tex, texNorm, length) |
| #define | READ_HALF_SPINOR(a, tex, length) |
| #define | READ_HALF_SPINOR_ST(a, tex, length) |
| #define | SHORT_LENGTH 65536 |
| #define | SCALE_FLOAT ((SHORT_LENGTH-1) * 0.5) |
| #define | SHIFT_FLOAT (-1.f / (SHORT_LENGTH-1)) |
| #define | FAST_ABS_MAX(a, b) fmaxf(fabsf(a), fabsf(b)); |
| #define | FAST_MAX(a, b) fmaxf(a, b); |
| #define | CONSTRUCT_HALF_SPINOR_FROM_SINGLE(h, n, a, length) |
| #define | CONSTRUCT_HALF_SPINOR_FROM_DOUBLE(h, n, a, length) |
| #define | CONSTRUCT_HALF_SPINOR_FROM_SINGLE_ST(h, n, a, length) |
| #define | CONSTRUCT_HALF_SPINOR_FROM_DOUBLE_ST(h, n, a, length) |
| #define | SUM_FLOAT4(sum, a) float sum = a.x + a.y + a.z + a.w; |
| #define | SUM_FLOAT2(sum, a) float sum = a.x + a.y; |
| #define | REAL_DOT_FLOAT4(dot, a, b) float dot = a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; |
| #define | REAL_DOT_FLOAT2(dot, a, b) float dot = a.x*b.x + a.y*b.y; |
| #define | IMAG_DOT_FLOAT4(dot, a, b) float dot = a.x*b.y - a.y*b.x + a.z*b.w - a.w*b.z; |
| #define | IMAG_DOT_FLOAT2(dot, a, b) float dot = a.x*b.y - a.y*b.x; |
| #define | AX_FLOAT4(a, X) X.x *= a; X.y *= a; X.z *= a; X.w *= a; |
| #define | AX_FLOAT2(a, X) X.x *= a; X.y *= a; |
| #define | XPY_FLOAT4(X, Y) Y.x += X.x; Y.y += X.y; Y.z += X.z; Y.w += X.w; |
| #define | XPY_FLOAT2(X, Y) Y.x += X.x; Y.y += X.y; |
| #define | XMY_FLOAT4(X, Y) Y.x = X.x - Y.x; Y.y = X.y - Y.y; Y.z = X.z - Y.z; Y.w = X.w - Y.w; |
| #define | XMY_FLOAT2(X, Y) Y.x = X.x - Y.x; Y.y = X.y - Y.y; |
| #define | MXPY_FLOAT4(X, Y) Y.x -= X.x; Y.y -= X.y; Y.z -= X.z; Y.w -= X.w; |
| #define | MXPY_FLOAT2(X, Y) Y.x -= X.x; Y.y -= X.y; |
| #define | AXPY_FLOAT4(a, X, Y) |
| #define | AXPY_FLOAT2(a, X, Y) Y.x += a*X.x; Y.y += a*X.y; |
| #define | AXPBY_FLOAT4(a, X, b, Y) |
| #define | AXPBY_FLOAT2(a, X, b, Y) Y.x = b*Y.x; Y.x += a*X.x; Y.y = b*Y.y; Y.y += a*X.y; \ |
| #define | XPAY_FLOAT4(X, a, Y) |
| #define | XPAY_FLOAT2(X, a, Y) Y.x = X.x + a*Y.x; Y.y = X.y + a*Y.y; |
| #define | CAXPY_FLOAT4(a, X, Y) |
| #define | CAXPY_FLOAT2(a, X, Y) |
| #define | CMAXPY_FLOAT4(a, X, Y) |
| #define | CAXPBY_FLOAT4(a, X, b, Y) |
| #define | CAXPBY_FLOAT2(a, X, b, Y) |
| #define | CXPAYPBZ_FLOAT4(X, a, Y, b, Z) |
| #define | CXPAYPBZ_FLOAT2(X, a, Y, b, Z) |
| #define | CAXPBYPZ_FLOAT4(a, X, b, Y, Z) |
| #define | CAXPBYPZ_FLOAT2(a, X, b, Y, Z) |
| #define | checkSpinor(a, b) |
| #define | checkSpinorLength(a, b) |
| #define | REDUCE_FUNC_NAME(suffix) sumD##suffix |
| #define | REDUCE_TYPES Float *a |
| #define | REDUCE_PARAMS a |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) a[i] |
| #define | REDUCE_FUNC_NAME(suffix) sumS##suffix |
| #define | REDUCE_TYPES Float *a |
| #define | REDUCE_PARAMS a |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) a[i].x + a[i].y |
| #define | REDUCE_FUNC_NAME(suffix) sumH##suffix |
| #define | REDUCE_TYPES Float *aN, int stride |
| #define | REDUCE_PARAMS aN, stride |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) (ac*s0) |
| #define | REDUCE_FUNC_NAME(suffix) sumHSt##suffix |
| #define | REDUCE_TYPES Float *aN, int stride |
| #define | REDUCE_PARAMS aN, stride |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) (ac*s0) |
| #define | REDUCE_FUNC_NAME(suffix) normD##suffix |
| #define | REDUCE_TYPES Float *a |
| #define | REDUCE_PARAMS a |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) (a[i]*a[i]) |
| #define | REDUCE_FUNC_NAME(suffix) normS##suffix |
| #define | REDUCE_TYPES Float *a |
| #define | REDUCE_PARAMS a |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) (a[i].x*a[i].x + a[i].y*a[i].y) |
| #define | REDUCE_FUNC_NAME(suffix) normH##suffix |
| #define | REDUCE_TYPES Float *aN, int stride |
| #define | REDUCE_PARAMS aN, stride |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) (ac*ac*norm0) |
| #define | REDUCE_FUNC_NAME(suffix) normHSt##suffix |
| #define | REDUCE_TYPES Float *aN, int stride |
| #define | REDUCE_PARAMS aN, stride |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) (ac*ac*norm0) |
| #define | REDUCE_FUNC_NAME(suffix) reDotProductD##suffix |
| #define | REDUCE_TYPES Float *a, Float *b |
| #define | REDUCE_PARAMS a, b |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) (a[i]*b[i]) |
| #define | REDUCE_FUNC_NAME(suffix) reDotProductS##suffix |
| #define | REDUCE_TYPES Float *a, Float *b |
| #define | REDUCE_PARAMS a, b |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) (a[i].x*b[i].x + a[i].y*b[i].y) |
| #define | REDUCE_FUNC_NAME(suffix) reDotProductH##suffix |
| #define | REDUCE_TYPES Float *aN, Float *bN, int stride |
| #define | REDUCE_PARAMS aN, bN, stride |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) (ac*bc*rdot0) |
| #define | REDUCE_FUNC_NAME(suffix) reDotProductHSt##suffix |
| #define | REDUCE_TYPES Float *aN, Float *bN, int stride |
| #define | REDUCE_PARAMS aN, bN, stride |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) (ac*bc*rdot0) |
| #define | REDUCE_FUNC_NAME(suffix) axpyNormF##suffix |
| #define | REDUCE_TYPES Float a, Float *x, Float *y |
| #define | REDUCE_PARAMS a, x, y |
| #define | REDUCE_AUXILIARY(i) y[i] = a*x[i] + y[i] |
| #define | REDUCE_OPERATION(i) (y[i]*y[i]) |
| #define | REDUCE_FUNC_NAME(suffix) axpyNormH##suffix |
| #define | REDUCE_TYPES Float a, short4 *yH, float *yN, int stride |
| #define | REDUCE_PARAMS a, yH, yN, stride |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) (norm0) |
| #define | REDUCE_FUNC_NAME(suffix) axpyNormH##suffix |
| #define | REDUCE_TYPES Float a, short2 *yH, float *yN, int stride |
| #define | REDUCE_PARAMS a, yH, yN, stride |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) (norm0) |
| #define | REDUCE_FUNC_NAME(suffix) xmyNormF##suffix |
| #define | REDUCE_TYPES Float *x, Float *y |
| #define | REDUCE_PARAMS x, y |
| #define | REDUCE_AUXILIARY(i) y[i] = x[i] - y[i] |
| #define | REDUCE_OPERATION(i) (y[i]*y[i]) |
| #define | REDUCE_FUNC_NAME(suffix) xmyNormH##suffix |
| #define | REDUCE_TYPES Float *d1, Float *d2, short4 *yH, float *yN, int stride |
| #define | REDUCE_PARAMS d1, d2, yH, yN, stride |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) (norm0) |
| #define | REDUCE_FUNC_NAME(suffix) xmyNormH##suffix |
| #define | REDUCE_TYPES Float *d1, Float *d2, short2 *yH, float *yN, int stride |
| #define | REDUCE_PARAMS d1, d2, yH, yN, stride |
| #define | REDUCE_AUXILIARY(i) |
| #define | REDUCE_OPERATION(i) (norm0) |
| #define | REDUCE_FUNC_NAME(suffix) cDotProductD##suffix |
| #define | REDUCE_TYPES Float2 *x, Float2 *y, Float c |
| #define | REDUCE_PARAMS x, y, c |
| #define | REDUCE_REAL_AUXILIARY(i) Float2 a = READ_DOUBLE2_TEXTURE(x, i); |
| #define | REDUCE_IMAG_AUXILIARY(i) Float2 b = READ_DOUBLE2_TEXTURE(y, i); |
| #define | REDUCE_REAL_OPERATION(i) (a.x*b.x + a.y*b.y) |
| #define | REDUCE_IMAG_OPERATION(i) (a.x*b.y - a.y*b.x) |
| #define | REDUCE_FUNC_NAME(suffix) cDotProductS##suffix |
| #define | REDUCE_TYPES Float2 *x, Float2 *y, Float c |
| #define | REDUCE_PARAMS x, y, c |
| #define | REDUCE_REAL_AUXILIARY(i) Float2 a = read_Float2(x, i); |
| #define | REDUCE_IMAG_AUXILIARY(i) Float2 b = read_Float2(y, i); |
| #define | REDUCE_REAL_OPERATION(i) (a.x*b.x + a.y*b.y) |
| #define | REDUCE_IMAG_OPERATION(i) (a.x*b.y - a.y*b.x) |
| #define | REDUCE_FUNC_NAME(suffix) cDotProductH##suffix |
| #define | REDUCE_TYPES Float *aN, Float2 *bN, int stride |
| #define | REDUCE_PARAMS aN, bN, stride |
| #define | REDUCE_REAL_AUXILIARY(i) |
| #define | REDUCE_IMAG_AUXILIARY(i) |
| #define | REDUCE_REAL_OPERATION(i) (ac*bc*rdot0) |
| #define | REDUCE_IMAG_OPERATION(i) (ac*bc*idot0) |
| #define | REDUCE_FUNC_NAME(suffix) cDotProductHSt##suffix |
| #define | REDUCE_TYPES Float *aN, Float2 *bN, int stride |
| #define | REDUCE_PARAMS aN, bN, stride |
| #define | REDUCE_REAL_AUXILIARY(i) |
| #define | REDUCE_IMAG_AUXILIARY(i) |
| #define | REDUCE_REAL_OPERATION(i) (ac*bc*rdot0) |
| #define | REDUCE_IMAG_OPERATION(i) (ac*bc*idot0) |
| #define | REDUCE_FUNC_NAME(suffix) xpaycDotzyD##suffix |
| #define | REDUCE_TYPES Float2 *x, Float a, Float2 *y, Float2 *z |
| #define | REDUCE_PARAMS x, a, y, z |
| #define | REDUCE_REAL_AUXILIARY(i) |
| #define | REDUCE_IMAG_AUXILIARY(i) y[i].x = X.x + a*Y.x; y[i].y = X.y + a*Y.y |
| #define | REDUCE_REAL_OPERATION(i) (Z.x*y[i].x + Z.y*y[i].y) |
| #define | REDUCE_IMAG_OPERATION(i) (Z.x*y[i].y - Z.y*y[i].x) |
| #define | REDUCE_FUNC_NAME(suffix) xpaycDotzyS##suffix |
| #define | REDUCE_TYPES Float2 *x, Float a, Float2 *y, Float2 *z |
| #define | REDUCE_PARAMS x, a, y, z |
| #define | REDUCE_REAL_AUXILIARY(i) y[i].x = x[i].x + a*y[i].x |
| #define | REDUCE_IMAG_AUXILIARY(i) y[i].y = x[i].y + a*y[i].y |
| #define | REDUCE_REAL_OPERATION(i) (z[i].x*y[i].x + z[i].y*y[i].y) |
| #define | REDUCE_IMAG_OPERATION(i) (z[i].x*y[i].y - z[i].y*y[i].x) |
| #define | REDUCE_FUNC_NAME(suffix) xpaycDotzyH##suffix |
| #define | REDUCE_TYPES Float a, short4 *yH, Float2 *yN, int stride |
| #define | REDUCE_PARAMS a, yH, yN, stride |
| #define | REDUCE_REAL_AUXILIARY(i) |
| #define | REDUCE_IMAG_AUXILIARY(i) |
| #define | REDUCE_REAL_OPERATION(i) (rdot0) |
| #define | REDUCE_IMAG_OPERATION(i) (idot0) |
| #define | REDUCE_FUNC_NAME(suffix) xpaycDotzyH##suffix |
| #define | REDUCE_TYPES Float a, short2 *yH, Float2 *yN, int stride |
| #define | REDUCE_PARAMS a, yH, yN, stride |
| #define | REDUCE_REAL_AUXILIARY(i) |
| #define | REDUCE_IMAG_AUXILIARY(i) |
| #define | REDUCE_REAL_OPERATION(i) (rdot0) |
| #define | REDUCE_IMAG_OPERATION(i) (idot0) |
| #define | REDUCE_FUNC_NAME(suffix) cDotProductNormAD##suffix |
| #define | REDUCE_TYPES Float2 *x, Float2 *y |
| #define | REDUCE_PARAMS x, y |
| #define | REDUCE_X_AUXILIARY(i) Float2 a = READ_DOUBLE2_TEXTURE(x, i); |
| #define | REDUCE_Y_AUXILIARY(i) Float2 b = READ_DOUBLE2_TEXTURE(y, i); |
| #define | REDUCE_Z_AUXILIARY(i) |
| #define | REDUCE_X_OPERATION(i) (a.x*b.x + a.y*b.y) |
| #define | REDUCE_Y_OPERATION(i) (a.x*b.y - a.y*b.x) |
| #define | REDUCE_Z_OPERATION(i) (a.x*a.x + a.y*a.y) |
| #define | REDUCE_FUNC_NAME(suffix) cDotProductNormAS##suffix |
| #define | REDUCE_TYPES Float2 *a, Float2 *b |
| #define | REDUCE_PARAMS a, b |
| #define | REDUCE_X_AUXILIARY(i) |
| #define | REDUCE_Y_AUXILIARY(i) |
| #define | REDUCE_Z_AUXILIARY(i) |
| #define | REDUCE_X_OPERATION(i) (a[i].x*b[i].x + a[i].y*b[i].y) |
| #define | REDUCE_Y_OPERATION(i) (a[i].x*b[i].y - a[i].y*b[i].x) |
| #define | REDUCE_Z_OPERATION(i) (a[i].x*a[i].x + a[i].y*a[i].y) |
| #define | REDUCE_FUNC_NAME(suffix) cDotProductNormAH##suffix |
| #define | REDUCE_TYPES Float2 *xN, Float2 *yN, int stride |
| #define | REDUCE_PARAMS xN, yN, stride |
| #define | REDUCE_X_AUXILIARY(i) |
| #define | REDUCE_Y_AUXILIARY(i) |
| #define | REDUCE_Z_AUXILIARY(i) |
| #define | REDUCE_X_OPERATION(i) (xc*yc*rdot0) |
| #define | REDUCE_Y_OPERATION(i) (xc*yc*idot0) |
| #define | REDUCE_Z_OPERATION(i) (xc*xc*norm0) |
| #define | REDUCE_FUNC_NAME(suffix) cDotProductNormAHSt##suffix |
| #define | REDUCE_TYPES Float2 *xN, Float2 *yN, int stride |
| #define | REDUCE_PARAMS xN, yN, stride |
| #define | REDUCE_X_AUXILIARY(i) |
| #define | REDUCE_Y_AUXILIARY(i) |
| #define | REDUCE_Z_AUXILIARY(i) |
| #define | REDUCE_X_OPERATION(i) (xc*yc*rdot0) |
| #define | REDUCE_Y_OPERATION(i) (xc*yc*idot0) |
| #define | REDUCE_Z_OPERATION(i) (xc*xc*norm0) |
| #define | REDUCE_FUNC_NAME(suffix) cDotProductNormBD##suffix |
| #define | REDUCE_TYPES Float2 *x, Float2 *y |
| #define | REDUCE_PARAMS x, y |
| #define | REDUCE_X_AUXILIARY(i) Float2 a = READ_DOUBLE2_TEXTURE(x, i); |
| #define | REDUCE_Y_AUXILIARY(i) Float2 b = READ_DOUBLE2_TEXTURE(y, i); |
| #define | REDUCE_Z_AUXILIARY(i) |
| #define | REDUCE_X_OPERATION(i) (a.x*b.x + a.y*b.y) |
| #define | REDUCE_Y_OPERATION(i) (a.x*b.y - a.y*b.x) |
| #define | REDUCE_Z_OPERATION(i) (b.x*b.x + b.y*b.y) |
| #define | REDUCE_FUNC_NAME(suffix) cDotProductNormBS##suffix |
| #define | REDUCE_TYPES Float2 *a, Float2 *b |
| #define | REDUCE_PARAMS a, b |
| #define | REDUCE_X_AUXILIARY(i) |
| #define | REDUCE_Y_AUXILIARY(i) |
| #define | REDUCE_Z_AUXILIARY(i) |
| #define | REDUCE_X_OPERATION(i) (a[i].x*b[i].x + a[i].y*b[i].y) |
| #define | REDUCE_Y_OPERATION(i) (a[i].x*b[i].y - a[i].y*b[i].x) |
| #define | REDUCE_Z_OPERATION(i) (b[i].x*b[i].x + b[i].y*b[i].y) |
| #define | REDUCE_FUNC_NAME(suffix) cDotProductNormBH##suffix |
| #define | REDUCE_TYPES Float2 *xN, Float2 *yN, int stride |
| #define | REDUCE_PARAMS xN, yN, stride |
| #define | REDUCE_X_AUXILIARY(i) |
| #define | REDUCE_Y_AUXILIARY(i) |
| #define | REDUCE_Z_AUXILIARY(i) |
| #define | REDUCE_X_OPERATION(i) (xc*yc*rdot0) |
| #define | REDUCE_Y_OPERATION(i) (xc*yc*idot0) |
| #define | REDUCE_Z_OPERATION(i) (yc*yc*norm0) |
| #define | REDUCE_FUNC_NAME(suffix) cDotProductNormBHSt##suffix |
| #define | REDUCE_TYPES Float2 *xN, Float2 *yN, int stride |
| #define | REDUCE_PARAMS xN, yN, stride |
| #define | REDUCE_X_AUXILIARY(i) |
| #define | REDUCE_Y_AUXILIARY(i) |
| #define | REDUCE_Z_AUXILIARY(i) |
| #define | REDUCE_X_OPERATION(i) (xc*yc*rdot0) |
| #define | REDUCE_Y_OPERATION(i) (xc*yc*idot0) |
| #define | REDUCE_Z_OPERATION(i) (yc*yc*norm0) |
| #define | REDUCE_FUNC_NAME(suffix) caxpbypzYmbwcDotProductWYNormYD##suffix |
| #define | REDUCE_TYPES Float2 a, Float2 *x, Float2 b, Float2 *y, Float2 *z, Float2 *w, Float2 *u |
| #define | REDUCE_PARAMS a, x, b, y, z, w, u |
| #define | REDUCE_X_AUXILIARY(i) |
| #define | REDUCE_Y_AUXILIARY(i) |
| #define | REDUCE_Z_AUXILIARY(i) |
| #define | REDUCE_X_OPERATION(i) (u[i].x*y[i].x + u[i].y*y[i].y) |
| #define | REDUCE_Y_OPERATION(i) (u[i].x*y[i].y - u[i].y*y[i].x) |
| #define | REDUCE_Z_OPERATION(i) (y[i].x*y[i].x + y[i].y*y[i].y) |
| #define | REDUCE_FUNC_NAME(suffix) caxpbypzYmbwcDotProductWYNormYS##suffix |
| #define | REDUCE_TYPES Float2 a, Float2 *x, Float2 b, Float2 *y, Float2 *z, Float2 *w, Float2 *u |
| #define | REDUCE_PARAMS a, x, b, y, z, w, u |
| #define | REDUCE_X_AUXILIARY(i) |
| #define | REDUCE_Y_AUXILIARY(i) |
| #define | REDUCE_Z_AUXILIARY(i) |
| #define | REDUCE_X_OPERATION(i) (u[i].x*y[i].x + u[i].y*y[i].y) |
| #define | REDUCE_Y_OPERATION(i) (u[i].x*y[i].y - u[i].y*y[i].x) |
| #define | REDUCE_Z_OPERATION(i) (y[i].x*y[i].x + y[i].y*y[i].y) |
| #define | REDUCE_FUNC_NAME(suffix) caxpbypzYmbwcDotProductWYNormYH##suffix |
| #define | REDUCE_TYPES Float2 a, Float2 b, short4 *yH, float *yN, short4 *zH, float *zN, float *wN, float *uN, int stride |
| #define | REDUCE_PARAMS a, b, yH, yN, zH, zN, wN, uN, stride |
| #define | REDUCE_X_AUXILIARY(i) |
| #define | REDUCE_Y_AUXILIARY(i) |
| #define | REDUCE_Z_AUXILIARY(i) |
| #define | REDUCE_X_OPERATION(i) (uc*rdot0) |
| #define | REDUCE_Y_OPERATION(i) (uc*idot0) |
| #define | REDUCE_Z_OPERATION(i) (norm0) |
| #define | REDUCE_FUNC_NAME(suffix) caxpbypzYmbwcDotProductWYNormYH##suffix |
| #define | REDUCE_TYPES Float2 a, Float2 b, short2 *yH, float *yN, short2 *zH, float *zN, float *wN, float *uN, int stride |
| #define | REDUCE_PARAMS a, b, yH, yN, zH, zN, wN, uN, stride |
| #define | REDUCE_X_AUXILIARY(i) |
| #define | REDUCE_Y_AUXILIARY(i) |
| #define | REDUCE_Z_AUXILIARY(i) |
| #define | REDUCE_X_OPERATION(i) (uc*rdot0) |
| #define | REDUCE_Y_OPERATION(i) (uc*idot0) |
| #define | REDUCE_Z_OPERATION(i) (norm0) |
Functions | |
| double2 | operator+ (const double2 &x, const double2 &y) |
| double3 | operator+ (const double3 &x, const double3 &y) |
| __device__ float2 | operator* (const float a, const float2 x) |
| template<typename Float2 > | |
| __device__ Float2 | operator+ (const Float2 x, const Float2 y) |
| template<typename Float2 > | |
| __device__ Float2 | operator+= (Float2 &x, const Float2 y) |
| template<typename Float2 > | |
| __device__ Float2 | operator-= (Float2 &x, const Float2 y) |
| template<typename Float , typename Float2 > | |
| __device__ Float2 | operator*= (Float2 &x, const Float a) |
| template<typename Float > | |
| __device__ float4 | operator*= (float4 &a, const Float &b) |
| void | zeroCuda (cudaColorSpinorField &a) |
| void | initBlas (void) |
| void | endBlas (void) |
| void | setBlasTuning (int tuning) |
| void | setBlasParam (int kernel, int prec, int threads, int blocks) |
| void | setBlock (int kernel, int length, QudaPrecision precision) |
| float2 __device__ | read_Float2 (float2 *x, int i) |
| double2 __device__ | read_Float2 (double2 *x, int i) |
| float2 __device__ | make_Float2 (float2 x) |
| double2 __device__ | make_Float2 (double2 x) |
| __device__ short | float2short (float c, float a) |
| __device__ float | short2float (short a) |
| __device__ short4 | float42short4 (float c, float4 a) |
| __device__ float | fast_abs_max (float4 a) |
| __global__ void | convertDSKernel (double2 *dst, float4 *src, int length) |
| __global__ void | convertDSKernel (double2 *dst, float2 *src, int length) |
| __global__ void | convertSDKernel (float4 *dst, double2 *src, int length) |
| __global__ void | convertSDKernel (float2 *dst, double2 *src, int length) |
| __global__ void | convertHSKernel (short4 *h, float *norm, int length, int real_length) |
| __global__ void | convertHSKernel (short2 *h, float *norm, int length, int real_length) |
| __global__ void | convertSHKernel (float4 *res, int length, int real_length) |
| __global__ void | convertSHKernel (float2 *res, int length, int real_length) |
| __global__ void | convertHDKernel (short4 *h, float *norm, int length, int real_length) |
| __global__ void | convertHDKernel (short2 *h, float *norm, int length, int real_length) |
| __global__ void | convertDHKernel (double2 *res, int length, int real_length) |
| __global__ void | convertDHKernelSt (double2 *res, int length, int real_length) |
| void | copyCuda (cudaColorSpinorField &dst, const cudaColorSpinorField &src) |
| template<typename Float , typename Float2 > | |
| __global__ void | axpbyKernel (Float a, Float2 *x, Float b, Float2 *y, int length) |
| __global__ void | axpbyHKernel (float a, float b, short4 *yH, float *yN, int stride, int length) |
| __global__ void | axpbyHKernel (float a, float b, short2 *yH, float *yN, int stride, int length) |
| void | axpbyCuda (const double &a, cudaColorSpinorField &x, const double &b, cudaColorSpinorField &y) |
| template<typename Float > | |
| __global__ void | xpyKernel (Float *x, Float *y, int len) |
| __global__ void | xpyHKernel (short4 *yH, float *yN, int stride, int length) |
| __global__ void | xpyHKernel (short2 *yH, float *yN, int stride, int length) |
| void | xpyCuda (cudaColorSpinorField &x, cudaColorSpinorField &y) |
| template<typename Float , typename Float2 > | |
| __global__ void | axpyKernel (Float a, Float2 *x, Float2 *y, int len) |
| __global__ void | axpyHKernel (float a, short4 *yH, float *yN, int stride, int length) |
| __global__ void | axpyHKernel (float a, short2 *yH, float *yN, int stride, int length) |
| void | axpyCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y) |
| template<typename Float , typename Float2 > | |
| __global__ void | xpayKernel (const Float2 *x, Float a, Float2 *y, int len) |
| __global__ void | xpayHKernel (float a, short4 *yH, float *yN, int stride, int length) |
| __global__ void | xpayHKernel (float a, short2 *yH, float *yN, int stride, int length) |
| void | xpayCuda (const cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y) |
| template<typename Float > | |
| __global__ void | mxpyKernel (Float *x, Float *y, int len) |
| __global__ void | mxpyHKernel (short4 *yH, float *yN, int stride, int length) |
| __global__ void | mxpyHKernel (short2 *yH, float *yN, int stride, int length) |
| void | mxpyCuda (cudaColorSpinorField &x, cudaColorSpinorField &y) |
| template<typename Float , typename Float2 > | |
| __global__ void | axKernel (Float a, Float2 *x, int len) |
| __global__ void | axHKernel (float a, short4 *xH, float *xN, int stride, int length) |
| __global__ void | axHKernel (float a, short2 *xH, float *xN, int stride, int length) |
| void | axCuda (const double &a, cudaColorSpinorField &x) |
| template<typename Float2 > | |
| __global__ void | caxpyDKernel (Float2 a, Float2 *x, Float2 *y, int len) |
| template<typename Float2 > | |
| __global__ void | caxpySKernel (Float2 a, Float2 *x, Float2 *y, int len) |
| __global__ void | caxpyHKernel (float2 a, short4 *yH, float *yN, int stride, int length) |
| __global__ void | caxpyHKernel (float2 a, short2 *yH, float *yN, int stride, int length) |
| void | caxpyCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y) |
| template<typename Float2 > | |
| __global__ void | caxpbyDKernel (Float2 a, Float2 *x, Float2 b, Float2 *y, int len) |
| template<typename Float2 > | |
| __global__ void | caxpbySKernel (Float2 a, Float2 *x, Float2 b, Float2 *y, int len) |
| __global__ void | caxpbyHKernel (float2 a, float2 b, short4 *yH, float *yN, int stride, int length) |
| __global__ void | caxpbyHKernel (float2 a, float2 b, short2 *yH, float *yN, int stride, int length) |
| void | caxpbyCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y) |
| template<typename Float2 > | |
| __global__ void | cxpaypbzDKernel (Float2 *x, Float2 a, Float2 *y, Float2 b, Float2 *z, int len) |
| template<typename Float2 > | |
| __global__ void | cxpaypbzSKernel (Float2 *x, Float2 a, Float2 *y, Float2 b, Float2 *z, int len) |
| __global__ void | cxpaypbzHKernel (float2 a, float2 b, short4 *zH, float *zN, int stride, int length) |
| __global__ void | cxpaypbzHKernel (float2 a, float2 b, short2 *zH, float *zN, int stride, int length) |
| void | cxpaypbzCuda (cudaColorSpinorField &x, const Complex &a, cudaColorSpinorField &y, const Complex &b, cudaColorSpinorField &z) |
| template<typename Float , typename Float2 > | |
| __global__ void | axpyBzpcxDKernel (Float a, Float2 *x, Float2 *y, Float b, Float2 *z, Float c, int len) |
| template<typename Float , typename Float2 > | |
| __global__ void | axpyBzpcxSKernel (Float a, Float2 *x, Float2 *y, Float b, Float2 *z, Float c, int len) |
| __global__ void | axpyBzpcxHKernel (float a, float b, float c, short4 *xH, float *xN, short4 *yH, float *yN, int stride, int length) |
| __global__ void | axpyBzpcxHKernel (float a, float b, float c, short2 *xH, float *xN, short2 *yH, float *yN, int stride, int length) |
| void | axpyBzpcxCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, const double &b, cudaColorSpinorField &z, const double &c) |
| template<typename Float , typename Float2 > | |
| __global__ void | axpyZpbxDKernel (Float a, Float2 *x, Float2 *y, Float2 *z, Float b, int len) |
| template<typename Float , typename Float2 > | |
| __global__ void | axpyZpbxSKernel (Float a, Float2 *x, Float2 *y, Float2 *z, Float b, int len) |
| __global__ void | axpyZpbxHKernel (float a, float b, short4 *xH, float *xN, short4 *yH, float *yN, int stride, int length) |
| __global__ void | axpyZpbxHKernel (float a, float b, short2 *xH, float *xN, short2 *yH, float *yN, int stride, int length) |
| void | axpyZpbxCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, const double &b) |
| template<typename Float2 > | |
| __global__ void | caxpbypzYmbwDKernel (Float2 a, Float2 *x, Float2 b, Float2 *y, Float2 *z, Float2 *w, int len) |
| template<typename Float2 > | |
| __global__ void | caxpbypzYmbwSKernel (Float2 a, Float2 *x, Float2 b, Float2 *y, Float2 *z, Float2 *w, int len) |
| __global__ void | caxpbypzYmbwHKernel (float2 a, float2 b, float *xN, short4 *yH, float *yN, short4 *zH, float *zN, float *wN, int stride, int length) |
| __global__ void | caxpbypzYmbwHKernel (float2 a, float2 b, float *xN, short2 *yH, float *yN, short2 *zH, float *zN, float *wN, int stride, int length) |
| void | caxpbypzYmbwCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w) |
| __device__ void | dsadd (volatile QudaSumFloat &c0, volatile QudaSumFloat &c1, const volatile QudaSumFloat &a0, const volatile QudaSumFloat &a1, const float b0, const float b1) |
| __device__ void | zcadd (volatile QudaSumComplex &c0, volatile QudaSumComplex &c1, const volatile QudaSumComplex &a0, const volatile QudaSumComplex &a1, const volatile QudaSumComplex &b0, const volatile QudaSumComplex &b1) |
| __device__ void | dsadd3 (volatile QudaSumFloat3 &c0, volatile QudaSumFloat3 &c1, const volatile QudaSumFloat3 &a0, const volatile QudaSumFloat3 &a1, const volatile QudaSumFloat3 &b0, const volatile QudaSumFloat3 &b1) |
| double | sumCuda (cudaColorSpinorField &a) |
| double | normCuda (const cudaColorSpinorField &a) |
| double | reDotProductCuda (cudaColorSpinorField &a, cudaColorSpinorField &b) |
| double | axpyNormCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y) |
| double | xmyNormCuda (cudaColorSpinorField &x, cudaColorSpinorField &y) |
| Complex | cDotProductCuda (cudaColorSpinorField &x, cudaColorSpinorField &y) |
| Complex | xpaycDotzyCuda (cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y, cudaColorSpinorField &z) |
| double3 | cDotProductNormACuda (cudaColorSpinorField &x, cudaColorSpinorField &y) |
| double3 | cDotProductNormBCuda (cudaColorSpinorField &x, cudaColorSpinorField &y) |
| double3 | caxpbypzYmbwcDotProductWYNormYCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w, cudaColorSpinorField &u) |
Variables | |
| unsigned long long | blas_quda_flops |
| unsigned long long | blas_quda_bytes |
| texture< int4, 1 > | xTexDouble2 |
| texture< int4, 1 > | yTexDouble2 |
| texture< int4, 1 > | zTexDouble2 |
| texture< int4, 1 > | wTexDouble2 |
| texture< int4, 1 > | uTexDouble2 |
| texture< float2, 1 > | xTexSingle2 |
| texture< float2, 1 > | yTexSingle2 |
| texture< float4, 1 > | xTexSingle4 |
| texture< short4, 1, cudaReadModeNormalizedFloat > | texHalf1 |
| texture< short2, 1, cudaReadModeNormalizedFloat > | texHalfSt1 |
| texture< float, 1, cudaReadModeElementType > | texNorm1 |
| texture< short4, 1, cudaReadModeNormalizedFloat > | texHalf2 |
| texture< short2, 1, cudaReadModeNormalizedFloat > | texHalfSt2 |
| texture< float, 1, cudaReadModeElementType > | texNorm2 |
| texture< short4, 1, cudaReadModeNormalizedFloat > | texHalf3 |
| texture< short2, 1, cudaReadModeNormalizedFloat > | texHalfSt3 |
| texture< float, 1, cudaReadModeElementType > | texNorm3 |
| texture< short4, 1, cudaReadModeNormalizedFloat > | texHalf4 |
| texture< short2, 1, cudaReadModeNormalizedFloat > | texHalfSt4 |
| texture< float, 1, cudaReadModeElementType > | texNorm4 |
| texture< short4, 1, cudaReadModeNormalizedFloat > | texHalf5 |
| texture< short2, 1, cudaReadModeNormalizedFloat > | texHalfSt5 |
| texture< float, 1, cudaReadModeElementType > | texNorm5 |
Definition at line 438 of file blas_quda.cu.
Definition at line 435 of file blas_quda.cu.
Definition at line 476 of file blas_quda.cu.
| #define AXPBY_FLOAT4 | ( | a, | |
| X, | |||
| b, | |||
| Y | |||
| ) |
Definition at line 469 of file blas_quda.cu.
| #define AXPY_FLOAT4 | ( | a, | |
| X, | |||
| Y | |||
| ) |
| #define CAXPBY_FLOAT2 | ( | a, | |
| X, | |||
| b, | |||
| Y | |||
| ) |
| #define CAXPBY_FLOAT4 | ( | a, | |
| X, | |||
| b, | |||
| Y | |||
| ) |
{ float2 y; \
y.x = a.x*X.x; y.x -= a.y*X.y; y.x += b.x*Y.x; y.x -= b.y*Y.y; \
y.y = a.y*X.x; y.y += a.x*X.y; y.y += b.y*Y.x; y.y += b.x*Y.y; \
Y.x = y.x; Y.y = y.y; \
y.x = a.x*X.z; y.x -= a.y*X.w; y.x += b.x*Y.z; y.x -= b.y*Y.w; \
y.y = a.y*X.z; y.y += a.x*X.w; y.y += b.y*Y.z; y.y += b.x*Y.w; \
Y.z = y.x; Y.w = y.y;}
Definition at line 522 of file blas_quda.cu.
| #define CAXPY_FLOAT2 | ( | a, | |
| X, | |||
| Y | |||
| ) |
| #define CAXPY_FLOAT4 | ( | a, | |
| X, | |||
| Y | |||
| ) |
| #define checkSpinor | ( | a, | |
| b | |||
| ) |
{ \
if (a.Precision() != b.Precision()) \
errorQuda("precisions do not match: %d %d", a.Precision(), b.Precision()); \
if (a.Length() != b.Length()) \
errorQuda("lengths do not match: %d %d", a.Length(), b.Length()); \
if (a.Stride() != b.Stride()) \
errorQuda("strides do not match: %d %d", a.Stride(), b.Stride()); \
}
Definition at line 615 of file blas_quda.cu.
| #define checkSpinorLength | ( | a, | |
| b | |||
| ) |
{ \
if (a.Length() != b.Length()) { \
errorQuda("engths do not match: %d %d", a.Length(), b.Length()); \
}
Definition at line 626 of file blas_quda.cu.
| #define CMAXPY_FLOAT4 | ( | a, | |
| X, | |||
| Y | |||
| ) |
| #define CONSTRUCT_HALF_SPINOR_FROM_DOUBLE | ( | h, | |
| n, | |||
| a, | |||
| length | |||
| ) |
{float c0 = fmaxf(fabsf((a##0).x), fabsf((a##0).y)); \
float c1 = fmaxf(fabsf((a##1).x), fabsf((a##1).y)); \
float c2 = fmaxf(fabsf((a##2).x), fabsf((a##2).y)); \
float c3 = fmaxf(fabsf((a##3).x), fabsf((a##3).y)); \
float c4 = fmaxf(fabsf((a##4).x), fabsf((a##4).y)); \
float c5 = fmaxf(fabsf((a##5).x), fabsf((a##5).y)); \
float c6 = fmaxf(fabsf((a##6).x), fabsf((a##6).y)); \
float c7 = fmaxf(fabsf((a##7).x), fabsf((a##7).y)); \
float c8 = fmaxf(fabsf((a##8).x), fabsf((a##8).y)); \
float c9 = fmaxf(fabsf((a##9).x), fabsf((a##9).y)); \
float c10 = fmaxf(fabsf((a##10).x), fabsf((a##10).y)); \
float c11 = fmaxf(fabsf((a##11).x), fabsf((a##11).y)); \
c0 = fmaxf(c0, c1); c1 = fmaxf(c2, c3); c2 = fmaxf(c4, c5); c3 = fmaxf(c6, c7); \
c4 = fmaxf(c8, c9); c5 = fmaxf(c10, c11); c0 = fmaxf(c0, c1); c1 = fmaxf(c2, c3); \
c2 = fmaxf(c4, c5); c0 = fmaxf(c0, c1); c0 = fmaxf(c0, c2); \
n[i] = c0; \
float C = __fdividef(MAX_SHORT, c0); \
h[i+0*length] = make_short4((short)(C*(float)(a##0).x), (short)(C*(float)(a##0).y), \
(short)(C*(float)(a##1).x), (short)(C*(float)(a##1).y)); \
h[i+1*length] = make_short4((short)(C*(float)(a##2).x), (short)(C*(float)(a##2).y), \
(short)(C*(float)(a##3).x), (short)(C*(float)(a##3).y)); \
h[i+2*length] = make_short4((short)(C*(float)(a##4).x), (short)(C*(float)(a##4).y), \
(short)(C*(float)(a##5).x), (short)(C*(float)(a##5).y)); \
h[i+3*length] = make_short4((short)(C*(float)(a##6).x), (short)(C*(float)(a##6).y), \
(short)(C*(float)(a##7).x), (short)(C*(float)(a##7).y)); \
h[i+4*length] = make_short4((short)(C*(float)(a##8).x), (short)(C*(float)(a##8).y), \
(short)(C*(float)(a##9).x), (short)(C*(float)(a##9).y)); \
h[i+5*length] = make_short4((short)(C*(float)(a##10).x), (short)(C*(float)(a##10).y), \
(short)(C*(float)(a##11).x), (short)(C*(float)(a##11).y));}
Definition at line 347 of file blas_quda.cu.
| #define CONSTRUCT_HALF_SPINOR_FROM_DOUBLE_ST | ( | h, | |
| n, | |||
| a, | |||
| length | |||
| ) |
{float c0 = fmaxf(fabsf((a##0).x), fabsf((a##0).y)); \
float c1 = fmaxf(fabsf((a##1).x), fabsf((a##1).y)); \
float c2 = fmaxf(fabsf((a##2).x), fabsf((a##2).y)); \
c0 = fmaxf(c0, c1); c0 = fmaxf(c0, c2); \
n[i] = c0; \
float C = __fdividef(MAX_SHORT, c0); \
h[i+0*length] = make_short2((short)(C*(float)(a##0).x), (short)(C*(float)(a##0).y)); \
h[i+1*length] = make_short2((short)(C*(float)(a##1).x), (short)(C*(float)(a##1).y)); \
h[i+2*length] = make_short2((short)(C*(float)(a##2).x), (short)(C*(float)(a##2).y));}
Definition at line 389 of file blas_quda.cu.
| #define CONSTRUCT_HALF_SPINOR_FROM_SINGLE | ( | h, | |
| n, | |||
| a, | |||
| length | |||
| ) |
{ \
float c0 = fast_abs_max(a##0); \
float c1 = fast_abs_max(a##1); \
c0 = FAST_MAX(c0, c1); \
float c2 = fast_abs_max(a##2); \
float c3 = fast_abs_max(a##3); \
c1 = FAST_MAX(c2, c3); \
c0 = FAST_MAX(c0, c1); \
c2 = fast_abs_max(a##4); \
c3 = fast_abs_max(a##5); \
c1 = FAST_MAX(c2, c3); \
c0 = FAST_MAX(c0, c1); \
n[i] = c0; \
float C = __fdividef(MAX_SHORT, c0); \
h[i+0*length] = make_short4((short)(C*(float)(a##0).x), (short)(C*(float)(a##0).y), \
(short)(C*(float)(a##0).z), (short)(C*(float)(a##0).w)); \
h[i+1*length] = make_short4((short)(C*(float)(a##1).x), (short)(C*(float)(a##1).y), \
(short)(C*(float)(a##1).z), (short)(C*(float)(a##1).w)); \
h[i+2*length] = make_short4((short)(C*(float)(a##2).x), (short)(C*(float)(a##2).y), \
(short)(C*(float)(a##2).z), (short)(C*(float)(a##2).w)); \
h[i+3*length] = make_short4((short)(C*(float)(a##3).x), (short)(C*(float)(a##3).y), \
(short)(C*(float)(a##3).z), (short)(C*(float)(a##3).w)); \
h[i+4*length] = make_short4((short)(C*(float)(a##4).x), (short)(C*(float)(a##4).y), \
(short)(C*(float)(a##4).z), (short)(C*(float)(a##4).w)); \
h[i+5*length] = make_short4((short)(C*(float)(a##5).x), (short)(C*(float)(a##5).y), \
(short)(C*(float)(a##5).z), (short)(C*(float)(a##5).w));}
Definition at line 320 of file blas_quda.cu.
| #define CONSTRUCT_HALF_SPINOR_FROM_SINGLE_ST | ( | h, | |
| n, | |||
| a, | |||
| length | |||
| ) |
{float c0 = fmaxf(fabsf((a##0).x), fabsf((a##0).y)); \
float c1 = fmaxf(fabsf((a##1).x), fabsf((a##1).y)); \
float c2 = fmaxf(fabsf((a##2).x), fabsf((a##2).y)); \
c0 = fmaxf(c0, c1); c0 = fmaxf(c0, c2); \
n[i] = c0; \
float C = __fdividef(MAX_SHORT, c0); \
h[i+0*length] = make_short2((short)(C*(float)(a##0).x), (short)(C*(float)(a##0).y)); \
h[i+1*length] = make_short2((short)(C*(float)(a##1).x), (short)(C*(float)(a##1).y)); \
h[i+2*length] = make_short2((short)(C*(float)(a##2).x), (short)(C*(float)(a##2).y));}
Definition at line 378 of file blas_quda.cu.
{float2 z; \
z.x = X.x + a.x*Y.x; z.x -= a.y*Y.y; z.x += b.x*Z.x; z.x -= b.y*Z.y; \
z.y = X.y + a.y*Y.x; z.y += a.x*Y.y; z.y += b.y*Z.x; z.y += b.x*Z.y; \
Z.x = z.x; Z.y = z.y; \
z.x = X.z + a.x*Y.z; z.x -= a.y*Y.w; z.x += b.x*Z.z; z.x -= b.y*Z.w; \
z.y = X.w + a.y*Y.z; z.y += a.x*Y.w; z.y += b.y*Z.z; z.y += b.x*Z.w; \
Z.z = z.x; Z.w = z.y;}
Definition at line 538 of file blas_quda.cu.
| #define EMUSYNC |
Definition at line 31 of file blas_quda.cu.
| #define FAST_ABS_MAX | ( | a, | |
| b | |||
| ) | fmaxf(fabsf(a), fabsf(b)); |
Definition at line 311 of file blas_quda.cu.
| #define FAST_MAX | ( | a, | |
| b | |||
| ) | fmaxf(a, b); |
Definition at line 312 of file blas_quda.cu.
Definition at line 432 of file blas_quda.cu.
Definition at line 422 of file blas_quda.cu.
Definition at line 453 of file blas_quda.cu.
| #define QudaSumComplex cuComplex |
Definition at line 23 of file blas_quda.cu.
| #define QudaSumFloat float |
Definition at line 22 of file blas_quda.cu.
| #define QudaSumFloat3 float3 |
Definition at line 24 of file blas_quda.cu.
Definition at line 221 of file blas_quda.cu.
Definition at line 224 of file blas_quda.cu.
| #define READ_HALF_SPINOR | ( | a, | |
| tex, | |||
| length | |||
| ) |
float4 a##0 = tex1Dfetch(tex, i + 0*length); \
float4 a##1 = tex1Dfetch(tex, i + 1*length); \
float4 a##2 = tex1Dfetch(tex, i + 2*length); \
float4 a##3 = tex1Dfetch(tex, i + 3*length); \
float4 a##4 = tex1Dfetch(tex, i + 4*length); \
float4 a##5 = tex1Dfetch(tex, i + 5*length); \
float a##c = a##N[i];
Definition at line 278 of file blas_quda.cu.
| #define READ_HALF_SPINOR_ST | ( | a, | |
| tex, | |||
| length | |||
| ) |
float2 a##0 = tex1Dfetch(tex, i + 0*length); \
float2 a##1 = tex1Dfetch(tex, i + 1*length); \
float2 a##2 = tex1Dfetch(tex, i + 2*length); \
float a##c = a##N[i];
Definition at line 287 of file blas_quda.cu.
| #define READ_HALF_SPINOR_TEX | ( | a, | |
| tex, | |||
| texNorm, | |||
| length | |||
| ) |
float a##c = tex1Dfetch(texNorm, i); \
float4 a##0 = tex1Dfetch(tex, i + 0*length); \
float4 a##1 = tex1Dfetch(tex, i + 1*length); \
float4 a##2 = tex1Dfetch(tex, i + 2*length); \
float4 a##3 = tex1Dfetch(tex, i + 3*length); \
float4 a##4 = tex1Dfetch(tex, i + 4*length); \
float4 a##5 = tex1Dfetch(tex, i + 5*length); \
Definition at line 269 of file blas_quda.cu.
Definition at line 418 of file blas_quda.cu.
Definition at line 408 of file blas_quda.cu.
| #define RECONSTRUCT_HALF_SPINOR | ( | a, | |
| texHalf, | |||
| texNorm, | |||
| length | |||
| ) |
float a##c = tex1Dfetch(texNorm, i); \
float4 a##0 = tex1Dfetch(texHalf, i + 0*length); \
float4 a##1 = tex1Dfetch(texHalf, i + 1*length); \
float4 a##2 = tex1Dfetch(texHalf, i + 2*length); \
float4 a##3 = tex1Dfetch(texHalf, i + 3*length); \
float4 a##4 = tex1Dfetch(texHalf, i + 4*length); \
float4 a##5 = tex1Dfetch(texHalf, i + 5*length); \
a##0 *= a##c; \
a##1 *= a##c; \
a##2 *= a##c; \
a##3 *= a##c; \
a##4 *= a##c; \
a##5 *= a##c;
Definition at line 235 of file blas_quda.cu.
| #define RECONSTRUCT_HALF_SPINOR_ST | ( | a, | |
| texHalf, | |||
| texNorm, | |||
| length | |||
| ) |
float a##c = tex1Dfetch(texNorm, i); \
float2 a##0 = tex1Dfetch(texHalf, i + 0*length); \
float2 a##1 = tex1Dfetch(texHalf, i + 1*length); \
float2 a##2 = tex1Dfetch(texHalf, i + 2*length); \
(a##0) *= a##c; \
(a##1) *= a##c; \
(a##2) *= a##c;
Definition at line 250 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
RECONSTRUCT_HALF_SPINOR_ST(x, texHalfSt1, texNorm1, stride); \ RECONSTRUCT_HALF_SPINOR_ST(y, texHalfSt2, texNorm2, stride); \ AXPY_FLOAT2(a, x0, y0); \ REAL_DOT_FLOAT2(norm0, y0, y0); \ AXPY_FLOAT2(a, x1, y1); \ REAL_DOT_FLOAT2(norm1, y1, y1); \ AXPY_FLOAT2(a, x2, y2); \ REAL_DOT_FLOAT2(norm2, y2, y2); \ norm0 += norm1; norm0 += norm2; \ CONSTRUCT_HALF_SPINOR_FROM_SINGLE_ST(yH, yN, y, stride);
Definition at line 2729 of file blas_quda.cu.
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
READ_HALF_SPINOR(a, texHalf1, stride); \ SUM_FLOAT4(s0, a0); \ SUM_FLOAT4(s1, a1); \ SUM_FLOAT4(s2, a2); \ SUM_FLOAT4(s3, a3); \ SUM_FLOAT4(s4, a4); \ SUM_FLOAT4(s5, a5); \ s0 += s1; s2 += s3; s4 += s5; s0 += s2; s0 += s4;
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
RECONSTRUCT_HALF_SPINOR(x, texHalf1, texNorm1, stride); \ RECONSTRUCT_HALF_SPINOR(y, texHalf2, texNorm2, stride); \ XMY_FLOAT4(x0, y0); \ REAL_DOT_FLOAT4(norm0, y0, y0); \ XMY_FLOAT4(x1, y1); \ REAL_DOT_FLOAT4(norm1, y1, y1); \ XMY_FLOAT4(x2, y2); \ REAL_DOT_FLOAT4(norm2, y2, y2); \ XMY_FLOAT4(x3, y3); \ REAL_DOT_FLOAT4(norm3, y3, y3); \ XMY_FLOAT4(x4, y4); \ REAL_DOT_FLOAT4(norm4, y4, y4); \ XMY_FLOAT4(x5, y5); \ REAL_DOT_FLOAT4(norm5, y5, y5); \ norm0 += norm1; norm2 += norm3; norm4 += norm5; norm0 += norm2; norm0 += norm4; \ CONSTRUCT_HALF_SPINOR_FROM_SINGLE(yH, yN, y, stride);
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
RECONSTRUCT_HALF_SPINOR_ST(x, texHalfSt1, texNorm1, stride); \ RECONSTRUCT_HALF_SPINOR_ST(y, texHalfSt2, texNorm2, stride); \ XMY_FLOAT2(x0, y0); \ REAL_DOT_FLOAT2(norm0, y0, y0); \ XMY_FLOAT2(x1, y1); \ REAL_DOT_FLOAT2(norm1, y1, y1); \ XMY_FLOAT2(x2, y2); \ REAL_DOT_FLOAT2(norm2, y2, y2); \ norm0 += norm1; norm0 += norm2; \ CONSTRUCT_HALF_SPINOR_FROM_SINGLE_ST(yH, yN, y, stride);
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
READ_HALF_SPINOR_ST(a, texHalfSt1, stride); \ SUM_FLOAT2(s0, a0); \ SUM_FLOAT2(s1, a1); \ SUM_FLOAT2(s2, a2); \ s0 += s1; s0 += s2;
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
READ_HALF_SPINOR(a, texHalf1, stride); \ REAL_DOT_FLOAT4(norm0, a0, a0); \ REAL_DOT_FLOAT4(norm1, a1, a1); \ REAL_DOT_FLOAT4(norm2, a2, a2); \ REAL_DOT_FLOAT4(norm3, a3, a3); \ REAL_DOT_FLOAT4(norm4, a4, a4); \ REAL_DOT_FLOAT4(norm5, a5, a5); \ norm0 += norm1; norm2 += norm3; norm4 += norm5; norm0 += norm2, norm0 += norm4;
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
READ_HALF_SPINOR_ST(a, texHalfSt1, stride); \ REAL_DOT_FLOAT2(norm0, a0, a0); \ REAL_DOT_FLOAT2(norm1, a1, a1); \ REAL_DOT_FLOAT2(norm2, a2, a2); \ norm0 += norm1; norm0 += norm2;
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
READ_HALF_SPINOR(a, texHalf1, stride); \ READ_HALF_SPINOR(b, texHalf2, stride); \ REAL_DOT_FLOAT4(rdot0, a0, b0); \ REAL_DOT_FLOAT4(rdot1, a1, b1); \ REAL_DOT_FLOAT4(rdot2, a2, b2); \ REAL_DOT_FLOAT4(rdot3, a3, b3); \ REAL_DOT_FLOAT4(rdot4, a4, b4); \ REAL_DOT_FLOAT4(rdot5, a5, b5); \ rdot0 += rdot1; rdot2 += rdot3; rdot4 += rdot5; rdot0 += rdot2; rdot0 += rdot4;
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
READ_HALF_SPINOR_ST(a, texHalfSt1, stride); \ READ_HALF_SPINOR_ST(b, texHalfSt2, stride); \ REAL_DOT_FLOAT2(rdot0, a0, b0); \ REAL_DOT_FLOAT2(rdot1, a1, b1); \ REAL_DOT_FLOAT2(rdot2, a2, b2); \ rdot0 += rdot1; rdot0 += rdot2;
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
Definition at line 2729 of file blas_quda.cu.
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_AUXILIARY | ( | i | ) |
RECONSTRUCT_HALF_SPINOR(x, texHalf1, texNorm1, stride); \ RECONSTRUCT_HALF_SPINOR(y, texHalf2, texNorm2, stride); \ AXPY_FLOAT4(a, x0, y0); \ REAL_DOT_FLOAT4(norm0, y0, y0); \ AXPY_FLOAT4(a, x1, y1); \ REAL_DOT_FLOAT4(norm1, y1, y1); \ AXPY_FLOAT4(a, x2, y2); \ REAL_DOT_FLOAT4(norm2, y2, y2); \ AXPY_FLOAT4(a, x3, y3); \ REAL_DOT_FLOAT4(norm3, y3, y3); \ AXPY_FLOAT4(a, x4, y4); \ REAL_DOT_FLOAT4(norm4, y4, y4); \ AXPY_FLOAT4(a, x5, y5); \ REAL_DOT_FLOAT4(norm5, y5, y5); \ norm0 += norm1; norm2 += norm3; norm4 += norm5; norm0 += norm2; norm0 += norm4; \ CONSTRUCT_HALF_SPINOR_FROM_SINGLE(yH, yN, y, stride);
Definition at line 2729 of file blas_quda.cu.
| #define REDUCE_DOUBLE 64 |
Definition at line 12 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | cDotProductNormBH##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | xmyNormF##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | cDotProductNormBHSt##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | sumH##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | caxpbypzYmbwcDotProductWYNormYD##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | xmyNormH##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | sumHSt##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | xmyNormH##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | caxpbypzYmbwcDotProductWYNormYS##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | cDotProductD##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | caxpbypzYmbwcDotProductWYNormYH##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | caxpbypzYmbwcDotProductWYNormYH##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | normD##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | cDotProductS##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | normS##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | cDotProductH##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | cDotProductHSt##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | normH##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | xpaycDotzyD##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | xpaycDotzyS##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | normHSt##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | xpaycDotzyH##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | reDotProductD##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | xpaycDotzyH##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | reDotProductS##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | cDotProductNormAD##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | cDotProductNormAS##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | reDotProductH##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | cDotProductNormAH##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | reDotProductHSt##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | sumD##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | cDotProductNormAHSt##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | axpyNormF##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | cDotProductNormBD##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | axpyNormH##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | sumS##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | cDotProductNormBS##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_FUNC_NAME | ( | suffix | ) | axpyNormH##suffix |
Definition at line 3548 of file blas_quda.cu.
| #define REDUCE_IMAG_AUXILIARY | ( | i | ) | Float2 b = READ_DOUBLE2_TEXTURE(y, i); |
Definition at line 3023 of file blas_quda.cu.
| #define REDUCE_IMAG_AUXILIARY | ( | i | ) | Float2 b = read_Float2(y, i); |
Definition at line 3023 of file blas_quda.cu.
| #define REDUCE_IMAG_AUXILIARY | ( | i | ) |
IMAG_DOT_FLOAT4(idot0, a0, b0); \ IMAG_DOT_FLOAT4(idot1, a1, b1); \ IMAG_DOT_FLOAT4(idot2, a2, b2); \ IMAG_DOT_FLOAT4(idot3, a3, b3); \ IMAG_DOT_FLOAT4(idot4, a4, b4); \ IMAG_DOT_FLOAT4(idot5, a5, b5); \ idot0 += idot1; idot2 += idot3; idot4 += idot5; idot0 += idot2; idot0 += idot4;
Definition at line 3023 of file blas_quda.cu.
| #define REDUCE_IMAG_AUXILIARY | ( | i | ) |
IMAG_DOT_FLOAT2(idot0, a0, b0); \ IMAG_DOT_FLOAT2(idot1, a1, b1); \ IMAG_DOT_FLOAT2(idot2, a2, b2); \ idot0 += idot1; idot0 += idot2;
Definition at line 3023 of file blas_quda.cu.
Definition at line 3023 of file blas_quda.cu.
Definition at line 3023 of file blas_quda.cu.
| #define REDUCE_IMAG_AUXILIARY | ( | i | ) |
IMAG_DOT_FLOAT4(idot0, z0, y0); \ IMAG_DOT_FLOAT4(idot1, z1, y1); \ IMAG_DOT_FLOAT4(idot2, z2, y2); \ IMAG_DOT_FLOAT4(idot3, z3, y3); \ IMAG_DOT_FLOAT4(idot4, z4, y4); \ IMAG_DOT_FLOAT4(idot5, z5, y5); \ idot0 += idot1; idot2 += idot3; idot4 += idot5; idot0 += idot2; idot0 += idot4; \ CONSTRUCT_HALF_SPINOR_FROM_SINGLE(yH, yN, y, stride);
Definition at line 3023 of file blas_quda.cu.
| #define REDUCE_IMAG_AUXILIARY | ( | i | ) |
IMAG_DOT_FLOAT2(idot0, z0, y0); \ IMAG_DOT_FLOAT2(idot1, z1, y1); \ IMAG_DOT_FLOAT2(idot2, z2, y2); \ idot0 += idot1; idot0 += idot2; \ CONSTRUCT_HALF_SPINOR_FROM_SINGLE_ST(yH, yN, y, stride);
Definition at line 3023 of file blas_quda.cu.
Definition at line 3030 of file blas_quda.cu.
Definition at line 3030 of file blas_quda.cu.
| #define REDUCE_IMAG_OPERATION | ( | i | ) | (ac*bc*idot0) |
Definition at line 3030 of file blas_quda.cu.
| #define REDUCE_IMAG_OPERATION | ( | i | ) | (ac*bc*idot0) |
Definition at line 3030 of file blas_quda.cu.
Definition at line 3030 of file blas_quda.cu.
Definition at line 3030 of file blas_quda.cu.
| #define REDUCE_IMAG_OPERATION | ( | i | ) | (idot0) |
Definition at line 3030 of file blas_quda.cu.
| #define REDUCE_IMAG_OPERATION | ( | i | ) | (idot0) |
Definition at line 3030 of file blas_quda.cu.
| #define REDUCE_KAHAN 32 |
Definition at line 13 of file blas_quda.cu.
| #define REDUCE_MAX_BLOCKS 65536 |
Definition at line 10 of file blas_quda.cu.
Definition at line 2740 of file blas_quda.cu.
| #define REDUCE_OPERATION | ( | i | ) | (norm0) |
Definition at line 2740 of file blas_quda.cu.
Definition at line 2740 of file blas_quda.cu.
| #define REDUCE_OPERATION | ( | i | ) | (ac*s0) |
Definition at line 2740 of file blas_quda.cu.
| #define REDUCE_OPERATION | ( | i | ) | (norm0) |
Definition at line 2740 of file blas_quda.cu.
| #define REDUCE_OPERATION | ( | i | ) | (norm0) |
Definition at line 2740 of file blas_quda.cu.
| #define REDUCE_OPERATION | ( | i | ) | (a[i]*a[i]) |
Definition at line 2740 of file blas_quda.cu.
Definition at line 2740 of file blas_quda.cu.
| #define REDUCE_OPERATION | ( | i | ) | (ac*ac*norm0) |
Definition at line 2740 of file blas_quda.cu.
| #define REDUCE_OPERATION | ( | i | ) | (ac*ac*norm0) |
Definition at line 2740 of file blas_quda.cu.
| #define REDUCE_OPERATION | ( | i | ) | (a[i]*b[i]) |
Definition at line 2740 of file blas_quda.cu.
| #define REDUCE_OPERATION | ( | i | ) | (ac*s0) |
Definition at line 2740 of file blas_quda.cu.
Definition at line 2740 of file blas_quda.cu.
| #define REDUCE_OPERATION | ( | i | ) | (ac*bc*rdot0) |
Definition at line 2740 of file blas_quda.cu.
| #define REDUCE_OPERATION | ( | i | ) | (ac*bc*rdot0) |
Definition at line 2740 of file blas_quda.cu.
Definition at line 2740 of file blas_quda.cu.
| #define REDUCE_OPERATION | ( | i | ) | a[i] |
Definition at line 2740 of file blas_quda.cu.
| #define REDUCE_OPERATION | ( | i | ) | (norm0) |
Definition at line 2740 of file blas_quda.cu.
| #define REDUCE_PARAMS a, yH, yN, stride |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS xN, yN, stride |
Definition at line 3550 of file blas_quda.cu.
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS xN, yN, stride |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS aN, stride |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS d1, d2, yH, yN, stride |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS d1, d2, yH, yN, stride |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS aN, stride |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS a, b, yH, yN, zH, zN, wN, uN, stride |
Definition at line 3550 of file blas_quda.cu.
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS a, b, yH, yN, zH, zN, wN, uN, stride |
Definition at line 3550 of file blas_quda.cu.
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS a |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS aN, bN, stride |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS a |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS aN, bN, stride |
Definition at line 3550 of file blas_quda.cu.
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS aN, stride |
Definition at line 3550 of file blas_quda.cu.
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS aN, stride |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS a, yH, yN, stride |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS a, b |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS a, yH, yN, stride |
Definition at line 3550 of file blas_quda.cu.
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS a, b |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS a, b |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS aN, bN, stride |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS xN, yN, stride |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS aN, bN, stride |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS xN, yN, stride |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS a |
Definition at line 3550 of file blas_quda.cu.
Definition at line 3550 of file blas_quda.cu.
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS a, yH, yN, stride |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS a, b |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_PARAMS a |
Definition at line 3550 of file blas_quda.cu.
| #define REDUCE_REAL_AUXILIARY | ( | i | ) | Float2 a = READ_DOUBLE2_TEXTURE(x, i); |
Definition at line 3012 of file blas_quda.cu.
| #define REDUCE_REAL_AUXILIARY | ( | i | ) | Float2 a = read_Float2(x, i); |
Definition at line 3012 of file blas_quda.cu.
| #define REDUCE_REAL_AUXILIARY | ( | i | ) |
READ_HALF_SPINOR(a, texHalf1, stride); \ READ_HALF_SPINOR(b, texHalf2, stride); \ REAL_DOT_FLOAT4(rdot0, a0, b0); \ REAL_DOT_FLOAT4(rdot1, a1, b1); \ REAL_DOT_FLOAT4(rdot2, a2, b2); \ REAL_DOT_FLOAT4(rdot3, a3, b3); \ REAL_DOT_FLOAT4(rdot4, a4, b4); \ REAL_DOT_FLOAT4(rdot5, a5, b5); \ rdot0 += rdot1; rdot2 += rdot3; rdot4 += rdot5; rdot0 += rdot2; rdot0 += rdot4;
Definition at line 3012 of file blas_quda.cu.
| #define REDUCE_REAL_AUXILIARY | ( | i | ) |
READ_HALF_SPINOR_ST(a, texHalfSt1, stride); \ READ_HALF_SPINOR_ST(b, texHalfSt2, stride); \ REAL_DOT_FLOAT2(rdot0, a0, b0); \ REAL_DOT_FLOAT2(rdot1, a1, b1); \ REAL_DOT_FLOAT2(rdot2, a2, b2); \ rdot0 += rdot1; rdot0 += rdot2;
Definition at line 3012 of file blas_quda.cu.
| #define REDUCE_REAL_AUXILIARY | ( | i | ) |
Float2 X = READ_DOUBLE2_TEXTURE(x, i); \ Float2 Y = READ_DOUBLE2_TEXTURE(y, i); \ Float2 Z = READ_DOUBLE2_TEXTURE(z, i);
Definition at line 3012 of file blas_quda.cu.
Definition at line 3012 of file blas_quda.cu.
| #define REDUCE_REAL_AUXILIARY | ( | i | ) |
RECONSTRUCT_HALF_SPINOR(x, texHalf1, texNorm1, stride); \ RECONSTRUCT_HALF_SPINOR(y, texHalf2, texNorm2, stride); \ RECONSTRUCT_HALF_SPINOR(z, texHalf3, texNorm3, stride); \ XPAY_FLOAT4(x0, a, y0); \ XPAY_FLOAT4(x1, a, y1); \ XPAY_FLOAT4(x2, a, y2); \ XPAY_FLOAT4(x3, a, y3); \ XPAY_FLOAT4(x4, a, y4); \ XPAY_FLOAT4(x5, a, y5); \ REAL_DOT_FLOAT4(rdot0, z0, y0); \ REAL_DOT_FLOAT4(rdot1, z1, y1); \ REAL_DOT_FLOAT4(rdot2, z2, y2); \ REAL_DOT_FLOAT4(rdot3, z3, y3); \ REAL_DOT_FLOAT4(rdot4, z4, y4); \ REAL_DOT_FLOAT4(rdot5, z5, y5); \ rdot0 += rdot1; rdot2 += rdot3; rdot4 += rdot5; rdot0 += rdot2; rdot0 += rdot4;
Definition at line 3012 of file blas_quda.cu.
| #define REDUCE_REAL_AUXILIARY | ( | i | ) |
RECONSTRUCT_HALF_SPINOR_ST(x, texHalfSt1, texNorm1, stride); \ RECONSTRUCT_HALF_SPINOR_ST(y, texHalfSt2, texNorm2, stride); \ RECONSTRUCT_HALF_SPINOR_ST(z, texHalfSt3, texNorm3, stride); \ XPAY_FLOAT2(x0, a, y0); \ XPAY_FLOAT2(x1, a, y1); \ XPAY_FLOAT2(x2, a, y2); \ REAL_DOT_FLOAT2(rdot0, z0, y0); \ REAL_DOT_FLOAT2(rdot1, z1, y1); \ REAL_DOT_FLOAT2(rdot2, z2, y2); \ rdot0 += rdot1; rdot0 += rdot2;
Definition at line 3012 of file blas_quda.cu.
Definition at line 3029 of file blas_quda.cu.
| #define REDUCE_REAL_OPERATION | ( | i | ) | (ac*bc*rdot0) |
Definition at line 3029 of file blas_quda.cu.
| #define REDUCE_REAL_OPERATION | ( | i | ) | (ac*bc*rdot0) |
Definition at line 3029 of file blas_quda.cu.
Definition at line 3029 of file blas_quda.cu.
Definition at line 3029 of file blas_quda.cu.
| #define REDUCE_REAL_OPERATION | ( | i | ) | (rdot0) |
Definition at line 3029 of file blas_quda.cu.
| #define REDUCE_REAL_OPERATION | ( | i | ) | (rdot0) |
Definition at line 3029 of file blas_quda.cu.
Definition at line 3029 of file blas_quda.cu.
| #define REDUCE_TYPE REDUCE_KAHAN |
Definition at line 21 of file blas_quda.cu.
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *aN, int stride |
Definition at line 3549 of file blas_quda.cu.
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *d1, Float *d2, short2 *yH, float *yN, int stride |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float2 *a, Float2 *b |
Definition at line 3549 of file blas_quda.cu.
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *a |
Definition at line 3549 of file blas_quda.cu.
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *a |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float2 *xN, Float2 *yN, int stride |
Definition at line 3549 of file blas_quda.cu.
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *aN, Float2 *bN, int stride |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *a |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *aN, Float2 *bN, int stride |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *aN, int stride |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *aN, Float *bN, int stride |
Definition at line 3549 of file blas_quda.cu.
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float a, short4 *yH, Float2 *yN, int stride |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float2 a, Float2 b, short4 *yH, float *yN, short4 *zH, float *zN, float *wN, float *uN, int stride |
Definition at line 3549 of file blas_quda.cu.
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *a, Float *b |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *a, Float *b |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float a, short2 *yH, Float2 *yN, int stride |
Definition at line 3549 of file blas_quda.cu.
Definition at line 3549 of file blas_quda.cu.
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *aN, int stride |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *aN, int stride |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *d1, Float *d2, short4 *yH, float *yN, int stride |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float2 *a, Float2 *b |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float2 a, Float2 b, short2 *yH, float *yN, short2 *zH, float *zN, float *wN, float *uN, int stride |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *aN, Float *bN, int stride |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float *a |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float2 *xN, Float2 *yN, int stride |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float2 *xN, Float2 *yN, int stride |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float2 *xN, Float2 *yN, int stride |
Definition at line 3549 of file blas_quda.cu.
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float a, short4 *yH, float *yN, int stride |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_TYPES Float a, short2 *yH, float *yN, int stride |
Definition at line 3549 of file blas_quda.cu.
| #define REDUCE_X_AUXILIARY | ( | i | ) |
READ_HALF_SPINOR_ST(x, texHalfSt1, stride); \ READ_HALF_SPINOR_ST(y, texHalfSt2, stride); \ REAL_DOT_FLOAT2(norm0, y0, y0); \ REAL_DOT_FLOAT2(norm1, y1, y1); \ REAL_DOT_FLOAT2(norm2, y2, y2); \ norm0 += norm1; norm0 += norm2;
Definition at line 3551 of file blas_quda.cu.
| #define REDUCE_X_AUXILIARY | ( | i | ) | Float2 a = READ_DOUBLE2_TEXTURE(x, i); |
Definition at line 3551 of file blas_quda.cu.
| #define REDUCE_X_AUXILIARY | ( | i | ) |
Definition at line 3551 of file blas_quda.cu.
| #define REDUCE_X_AUXILIARY | ( | i | ) |
RECONSTRUCT_HALF_SPINOR_ST(x, texHalfSt1, texNorm1, stride); \ RECONSTRUCT_HALF_SPINOR_ST(y, texHalfSt2, texNorm2, stride); \ RECONSTRUCT_HALF_SPINOR_ST(z, texHalfSt3, texNorm3, stride); \ CAXPBYPZ_FLOAT2(a, x0, b, y0, z0); \ CAXPBYPZ_FLOAT2(a, x1, b, y1, z1); \ CAXPBYPZ_FLOAT2(a, x2, b, y2, z2); \ CONSTRUCT_HALF_SPINOR_FROM_SINGLE_ST(zH, zN, z, stride); \ READ_HALF_SPINOR_ST(w, texHalfSt4, stride); \ float2 bwc = -wc*b; \ CAXPY_FLOAT2(bwc, w0, y0); \ CAXPY_FLOAT2(bwc, w1, y1); \ CAXPY_FLOAT2(bwc, w2, y2); \ REAL_DOT_FLOAT2(norm0, y0, y0); \ REAL_DOT_FLOAT2(norm1, y1, y1); \ REAL_DOT_FLOAT2(norm2, y2, y2); \ CONSTRUCT_HALF_SPINOR_FROM_SINGLE_ST(yH, yN, y, stride);
Definition at line 3551 of file blas_quda.cu.
| #define REDUCE_X_AUXILIARY | ( | i | ) | Float2 a = READ_DOUBLE2_TEXTURE(x, i); |
Definition at line 3551 of file blas_quda.cu.
| #define REDUCE_X_AUXILIARY | ( | i | ) |
READ_HALF_SPINOR(x, texHalf1, stride); \ READ_HALF_SPINOR(y, texHalf2, stride); \ REAL_DOT_FLOAT4(norm0, y0, y0); \ REAL_DOT_FLOAT4(norm1, y1, y1); \ REAL_DOT_FLOAT4(norm2, y2, y2); \ REAL_DOT_FLOAT4(norm3, y3, y3); \ REAL_DOT_FLOAT4(norm4, y4, y4); \ REAL_DOT_FLOAT4(norm5, y5, y5); \ norm0 += norm1; norm2 += norm3; norm4 += norm5; norm0 += norm2, norm0 += norm4;
Definition at line 3551 of file blas_quda.cu.
| #define REDUCE_X_AUXILIARY | ( | i | ) |
Float2 X = read_Float2(x, i); \ Float2 Y = read_Float2(y, i); \ Float2 W = read_Float2(w, i);
Definition at line 3551 of file blas_quda.cu.
| #define REDUCE_X_AUXILIARY | ( | i | ) |
READ_HALF_SPINOR_ST(x, texHalfSt1, stride); \ READ_HALF_SPINOR_ST(y, texHalfSt2, stride); \ REAL_DOT_FLOAT2(norm0, x0, x0); \ REAL_DOT_FLOAT2(norm1, x1, x1); \ REAL_DOT_FLOAT2(norm2, x2, x2); \ norm0 += norm1; norm0 += norm2;
Definition at line 3551 of file blas_quda.cu.
| #define REDUCE_X_AUXILIARY | ( | i | ) |
Definition at line 3551 of file blas_quda.cu.
| #define REDUCE_X_AUXILIARY | ( | i | ) |
READ_HALF_SPINOR(x, texHalf1, stride); \ READ_HALF_SPINOR(y, texHalf2, stride); \ REAL_DOT_FLOAT4(norm0, x0, x0); \ REAL_DOT_FLOAT4(norm1, x1, x1); \ REAL_DOT_FLOAT4(norm2, x2, x2); \ REAL_DOT_FLOAT4(norm3, x3, x3); \ REAL_DOT_FLOAT4(norm4, x4, x4); \ REAL_DOT_FLOAT4(norm5, x5, x5); \ norm0 += norm1; norm2 += norm3; norm4 += norm5; norm0 += norm2, norm0 += norm4;
Definition at line 3551 of file blas_quda.cu.
| #define REDUCE_X_AUXILIARY | ( | i | ) |
Float2 X = READ_DOUBLE2_TEXTURE(x, i); \ Float2 Y = READ_DOUBLE2_TEXTURE(y, i); \ Float2 W = READ_DOUBLE2_TEXTURE(w, i);
Definition at line 3551 of file blas_quda.cu.
| #define REDUCE_X_AUXILIARY | ( | i | ) |
RECONSTRUCT_HALF_SPINOR(x, texHalf1, texNorm1, stride); \ RECONSTRUCT_HALF_SPINOR(y, texHalf2, texNorm2, stride); \ RECONSTRUCT_HALF_SPINOR(z, texHalf3, texNorm3, stride); \ CAXPBYPZ_FLOAT4(a, x0, b, y0, z0); \ CAXPBYPZ_FLOAT4(a, x1, b, y1, z1); \ CAXPBYPZ_FLOAT4(a, x2, b, y2, z2); \ CAXPBYPZ_FLOAT4(a, x3, b, y3, z3); \ CAXPBYPZ_FLOAT4(a, x4, b, y4, z4); \ CAXPBYPZ_FLOAT4(a, x5, b, y5, z5); \ CONSTRUCT_HALF_SPINOR_FROM_SINGLE(zH, zN, z, stride); \ READ_HALF_SPINOR(w, texHalf4, stride); \ float2 bwc = -wc*b; \ CAXPY_FLOAT4(bwc, w0, y0); \ CAXPY_FLOAT4(bwc, w1, y1); \ CAXPY_FLOAT4(bwc, w2, y2); \ CAXPY_FLOAT4(bwc, w3, y3); \ CAXPY_FLOAT4(bwc, w4, y4); \ CAXPY_FLOAT4(bwc, w5, y5); \ REAL_DOT_FLOAT4(norm0, y0, y0); \ REAL_DOT_FLOAT4(norm1, y1, y1); \ REAL_DOT_FLOAT4(norm2, y2, y2); \ REAL_DOT_FLOAT4(norm3, y3, y3); \ REAL_DOT_FLOAT4(norm4, y4, y4); \ REAL_DOT_FLOAT4(norm5, y5, y5); \ CONSTRUCT_HALF_SPINOR_FROM_SINGLE(yH, yN, y, stride);
Definition at line 3551 of file blas_quda.cu.
| #define REDUCE_X_OPERATION | ( | i | ) | (xc*yc*rdot0) |
Definition at line 3581 of file blas_quda.cu.
Definition at line 3581 of file blas_quda.cu.
| #define REDUCE_X_OPERATION | ( | i | ) | (xc*yc*rdot0) |
Definition at line 3581 of file blas_quda.cu.
| #define REDUCE_X_OPERATION | ( | i | ) | (xc*yc*rdot0) |
Definition at line 3581 of file blas_quda.cu.
| #define REDUCE_X_OPERATION | ( | i | ) | (uc*rdot0) |
Definition at line 3581 of file blas_quda.cu.
| #define REDUCE_X_OPERATION | ( | i | ) | (xc*yc*rdot0) |
Definition at line 3581 of file blas_quda.cu.
| #define REDUCE_X_OPERATION | ( | i | ) | (uc*rdot0) |
Definition at line 3581 of file blas_quda.cu.
Definition at line 3581 of file blas_quda.cu.
Definition at line 3581 of file blas_quda.cu.
Definition at line 3581 of file blas_quda.cu.
| #define REDUCE_Y_AUXILIARY | ( | i | ) | Float2 b = READ_DOUBLE2_TEXTURE(y, i); |
Definition at line 3568 of file blas_quda.cu.
| #define REDUCE_Y_AUXILIARY | ( | i | ) |
REAL_DOT_FLOAT4(rdot0, x0, y0); \ REAL_DOT_FLOAT4(rdot1, x1, y1); \ REAL_DOT_FLOAT4(rdot2, x2, y2); \ REAL_DOT_FLOAT4(rdot3, x3, y3); \ REAL_DOT_FLOAT4(rdot4, x4, y4); \ REAL_DOT_FLOAT4(rdot5, x5, y5); \ rdot0 += rdot1; rdot2 += rdot3; rdot4 += rdot5; rdot0 += rdot2; rdot0 += rdot4;
Definition at line 3568 of file blas_quda.cu.
| #define REDUCE_Y_AUXILIARY | ( | i | ) |
REAL_DOT_FLOAT2(rdot0, x0, y0); \ REAL_DOT_FLOAT2(rdot1, x1, y1); \ REAL_DOT_FLOAT2(rdot2, x2, y2); \ rdot0 += rdot1; rdot0 += rdot2;
Definition at line 3568 of file blas_quda.cu.
| #define REDUCE_Y_AUXILIARY | ( | i | ) |
| #define REDUCE_Y_AUXILIARY | ( | i | ) |
READ_HALF_SPINOR(u, texHalf5, stride); \ REAL_DOT_FLOAT4(rdot0, u0, y0); \ REAL_DOT_FLOAT4(rdot1, u1, y1); \ REAL_DOT_FLOAT4(rdot2, u2, y2); \ REAL_DOT_FLOAT4(rdot3, u3, y3); \ REAL_DOT_FLOAT4(rdot4, u4, y4); \ REAL_DOT_FLOAT4(rdot5, u5, y5); \ IMAG_DOT_FLOAT4(idot0, u0, y0); \ IMAG_DOT_FLOAT4(idot1, u1, y1); \ IMAG_DOT_FLOAT4(idot2, u2, y2); \ IMAG_DOT_FLOAT4(idot3, u3, y3); \ IMAG_DOT_FLOAT4(idot4, u4, y4); \ IMAG_DOT_FLOAT4(idot5, u5, y5);
Definition at line 3568 of file blas_quda.cu.
| #define REDUCE_Y_AUXILIARY | ( | i | ) |
REAL_DOT_FLOAT4(rdot0, x0, y0); \ REAL_DOT_FLOAT4(rdot1, x1, y1); \ REAL_DOT_FLOAT4(rdot2, x2, y2); \ REAL_DOT_FLOAT4(rdot3, x3, y3); \ REAL_DOT_FLOAT4(rdot4, x4, y4); \ REAL_DOT_FLOAT4(rdot5, x5, y5); \ rdot0 += rdot1; rdot2 += rdot3; rdot4 += rdot5; rdot0 += rdot2; rdot0 += rdot4;
Definition at line 3568 of file blas_quda.cu.
| #define REDUCE_Y_AUXILIARY | ( | i | ) |
| #define REDUCE_Y_AUXILIARY | ( | i | ) |
REAL_DOT_FLOAT2(rdot0, x0, y0); \ REAL_DOT_FLOAT2(rdot1, x1, y1); \ REAL_DOT_FLOAT2(rdot2, x2, y2); \ rdot0 += rdot1; rdot0 += rdot2;
Definition at line 3568 of file blas_quda.cu.
| #define REDUCE_Y_AUXILIARY | ( | i | ) |
Definition at line 3568 of file blas_quda.cu.
| #define REDUCE_Y_AUXILIARY | ( | i | ) |
READ_HALF_SPINOR_ST(u, texHalfSt5, stride); \ REAL_DOT_FLOAT2(rdot0, u0, y0); \ REAL_DOT_FLOAT2(rdot1, u1, y1); \ REAL_DOT_FLOAT2(rdot2, u2, y2); \ IMAG_DOT_FLOAT2(idot0, u0, y0); \ IMAG_DOT_FLOAT2(idot1, u1, y1); \ IMAG_DOT_FLOAT2(idot2, u2, y2);
Definition at line 3568 of file blas_quda.cu.
| #define REDUCE_Y_AUXILIARY | ( | i | ) | Float2 b = READ_DOUBLE2_TEXTURE(y, i); |
Definition at line 3568 of file blas_quda.cu.
| #define REDUCE_Y_AUXILIARY | ( | i | ) |
Definition at line 3568 of file blas_quda.cu.
Definition at line 3582 of file blas_quda.cu.
Definition at line 3582 of file blas_quda.cu.
| #define REDUCE_Y_OPERATION | ( | i | ) | (uc*idot0) |
Definition at line 3582 of file blas_quda.cu.
| #define REDUCE_Y_OPERATION | ( | i | ) | (xc*yc*idot0) |
Definition at line 3582 of file blas_quda.cu.
Definition at line 3582 of file blas_quda.cu.
| #define REDUCE_Y_OPERATION | ( | i | ) | (uc*idot0) |
Definition at line 3582 of file blas_quda.cu.
| #define REDUCE_Y_OPERATION | ( | i | ) | (xc*yc*idot0) |
Definition at line 3582 of file blas_quda.cu.
| #define REDUCE_Y_OPERATION | ( | i | ) | (xc*yc*idot0) |
Definition at line 3582 of file blas_quda.cu.
| #define REDUCE_Y_OPERATION | ( | i | ) | (xc*yc*idot0) |
Definition at line 3582 of file blas_quda.cu.
Definition at line 3582 of file blas_quda.cu.
| #define REDUCE_Z_AUXILIARY | ( | i | ) |
norm0 += norm1; norm2 += norm3; norm4 += norm5; norm0 += norm2, norm0 += norm4; \ rdot0 += rdot1; rdot2 += rdot3; rdot4 += rdot5; rdot0 += rdot2; rdot0 += rdot4; \ idot0 += idot1; idot2 += idot3; idot4 += idot5; idot0 += idot2; idot0 += idot4;
Definition at line 3576 of file blas_quda.cu.
| #define REDUCE_Z_AUXILIARY | ( | i | ) |
IMAG_DOT_FLOAT2(idot0, x0, y0); \ IMAG_DOT_FLOAT2(idot1, x1, y1); \ IMAG_DOT_FLOAT2(idot2, x2, y2); \ idot0 += idot1; idot0 += idot2;
Definition at line 3576 of file blas_quda.cu.
| #define REDUCE_Z_AUXILIARY | ( | i | ) |
IMAG_DOT_FLOAT4(idot0, x0, y0); \ IMAG_DOT_FLOAT4(idot1, x1, y1); \ IMAG_DOT_FLOAT4(idot2, x2, y2); \ IMAG_DOT_FLOAT4(idot3, x3, y3); \ IMAG_DOT_FLOAT4(idot4, x4, y4); \ IMAG_DOT_FLOAT4(idot5, x5, y5); \ idot0 += idot1; idot2 += idot3; idot4 += idot5; idot0 += idot2; idot0 += idot4;
Definition at line 3576 of file blas_quda.cu.
| #define REDUCE_Z_AUXILIARY | ( | i | ) |
z[i] = make_Float2(Z); \ y[i] = make_Float2(Y);
Definition at line 3576 of file blas_quda.cu.
| #define REDUCE_Z_AUXILIARY | ( | i | ) |
Definition at line 3576 of file blas_quda.cu.
| #define REDUCE_Z_AUXILIARY | ( | i | ) |
IMAG_DOT_FLOAT4(idot0, x0, y0); \ IMAG_DOT_FLOAT4(idot1, x1, y1); \ IMAG_DOT_FLOAT4(idot2, x2, y2); \ IMAG_DOT_FLOAT4(idot3, x3, y3); \ IMAG_DOT_FLOAT4(idot4, x4, y4); \ IMAG_DOT_FLOAT4(idot5, x5, y5); \ idot0 += idot1; idot2 += idot3; idot4 += idot5; idot0 += idot2; idot0 += idot4;
Definition at line 3576 of file blas_quda.cu.
| #define REDUCE_Z_AUXILIARY | ( | i | ) |
z[i] = make_Float2(Z); \ y[i] = make_Float2(Y);
Definition at line 3576 of file blas_quda.cu.
| #define REDUCE_Z_AUXILIARY | ( | i | ) |
IMAG_DOT_FLOAT2(idot0, x0, y0); \ IMAG_DOT_FLOAT2(idot1, x1, y1); \ IMAG_DOT_FLOAT2(idot2, x2, y2); \ idot0 += idot1; idot0 += idot2;
Definition at line 3576 of file blas_quda.cu.
| #define REDUCE_Z_AUXILIARY | ( | i | ) |
Definition at line 3576 of file blas_quda.cu.
| #define REDUCE_Z_AUXILIARY | ( | i | ) |
norm0 += norm1; norm0 += norm2; \ rdot0 += rdot1; rdot0 += rdot2; \ idot0 += idot1; idot0 += idot2;
Definition at line 3576 of file blas_quda.cu.
| #define REDUCE_Z_AUXILIARY | ( | i | ) |
Definition at line 3576 of file blas_quda.cu.
| #define REDUCE_Z_AUXILIARY | ( | i | ) |
Definition at line 3576 of file blas_quda.cu.
| #define REDUCE_Z_OPERATION | ( | i | ) | (yc*yc*norm0) |
Definition at line 3583 of file blas_quda.cu.
| #define REDUCE_Z_OPERATION | ( | i | ) | (xc*xc*norm0) |
Definition at line 3583 of file blas_quda.cu.
Definition at line 3583 of file blas_quda.cu.
Definition at line 3583 of file blas_quda.cu.
| #define REDUCE_Z_OPERATION | ( | i | ) | (norm0) |
Definition at line 3583 of file blas_quda.cu.
| #define REDUCE_Z_OPERATION | ( | i | ) | (xc*xc*norm0) |
Definition at line 3583 of file blas_quda.cu.
Definition at line 3583 of file blas_quda.cu.
| #define REDUCE_Z_OPERATION | ( | i | ) | (yc*yc*norm0) |
Definition at line 3583 of file blas_quda.cu.
| #define REDUCE_Z_OPERATION | ( | i | ) | (norm0) |
Definition at line 3583 of file blas_quda.cu.
Definition at line 3583 of file blas_quda.cu.
| #define SCALE_FLOAT ((SHORT_LENGTH-1) * 0.5) |
Definition at line 294 of file blas_quda.cu.
| #define SHIFT_FLOAT (-1.f / (SHORT_LENGTH-1)) |
Definition at line 295 of file blas_quda.cu.
| #define SHORT_LENGTH 65536 |
Definition at line 293 of file blas_quda.cu.
Definition at line 404 of file blas_quda.cu.
Definition at line 401 of file blas_quda.cu.
Definition at line 450 of file blas_quda.cu.
Definition at line 447 of file blas_quda.cu.
Definition at line 489 of file blas_quda.cu.
| #define XPAY_FLOAT4 | ( | X, | |
| a, | |||
| Y | |||
| ) |
Definition at line 441 of file blas_quda.cu.
| void axCuda | ( | const double & | a, |
| cudaColorSpinorField & | x | ||
| ) |
Definition at line 1378 of file blas_quda.cu.
| __global__ void axHKernel | ( | float | a, |
| short4 * | xH, | ||
| float * | xN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1352 of file blas_quda.cu.
| __global__ void axHKernel | ( | float | a, |
| short2 * | xH, | ||
| float * | xN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1365 of file blas_quda.cu.
| __global__ void axKernel | ( | Float | a, |
| Float2 * | x, | ||
| int | len | ||
| ) |
Definition at line 1343 of file blas_quda.cu.
| void axpbyCuda | ( | const double & | a, |
| cudaColorSpinorField & | x, | ||
| const double & | b, | ||
| cudaColorSpinorField & | y | ||
| ) |
Definition at line 978 of file blas_quda.cu.
| __global__ void axpbyHKernel | ( | float | a, |
| float | b, | ||
| short4 * | yH, | ||
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 944 of file blas_quda.cu.
| __global__ void axpbyHKernel | ( | float | a, |
| float | b, | ||
| short2 * | yH, | ||
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 961 of file blas_quda.cu.
| __global__ void axpbyKernel | ( | Float | a, |
| Float2 * | x, | ||
| Float | b, | ||
| Float2 * | y, | ||
| int | length | ||
| ) |
Definition at line 935 of file blas_quda.cu.
| void axpyBzpcxCuda | ( | const double & | a, |
| cudaColorSpinorField & | x, | ||
| cudaColorSpinorField & | y, | ||
| const double & | b, | ||
| cudaColorSpinorField & | z, | ||
| const double & | c | ||
| ) |
Definition at line 1856 of file blas_quda.cu.
| __global__ void axpyBzpcxDKernel | ( | Float | a, |
| Float2 * | x, | ||
| Float2 * | y, | ||
| Float | b, | ||
| Float2 * | z, | ||
| Float | c, | ||
| int | len | ||
| ) |
Definition at line 1768 of file blas_quda.cu.
| __global__ void axpyBzpcxHKernel | ( | float | a, |
| float | b, | ||
| float | c, | ||
| short4 * | xH, | ||
| float * | xN, | ||
| short4 * | yH, | ||
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1806 of file blas_quda.cu.
| __global__ void axpyBzpcxHKernel | ( | float | a, |
| float | b, | ||
| float | c, | ||
| short2 * | xH, | ||
| float * | xN, | ||
| short2 * | yH, | ||
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1834 of file blas_quda.cu.
| __global__ void axpyBzpcxSKernel | ( | Float | a, |
| Float2 * | x, | ||
| Float2 * | y, | ||
| Float | b, | ||
| Float2 * | z, | ||
| Float | c, | ||
| int | len | ||
| ) |
Definition at line 1788 of file blas_quda.cu.
| void axpyCuda | ( | const double & | a, |
| cudaColorSpinorField & | x, | ||
| cudaColorSpinorField & | y | ||
| ) |
Definition at line 1143 of file blas_quda.cu.
| __global__ void axpyHKernel | ( | float | a, |
| short4 * | yH, | ||
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1108 of file blas_quda.cu.
| __global__ void axpyHKernel | ( | float | a, |
| short2 * | yH, | ||
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1127 of file blas_quda.cu.
| __global__ void axpyKernel | ( | Float | a, |
| Float2 * | x, | ||
| Float2 * | y, | ||
| int | len | ||
| ) |
Definition at line 1099 of file blas_quda.cu.
| double axpyNormCuda | ( | const double & | a, |
| cudaColorSpinorField & | x, | ||
| cudaColorSpinorField & | y | ||
| ) |
Definition at line 2643 of file blas_quda.cu.
| void axpyZpbxCuda | ( | const double & | a, |
| cudaColorSpinorField & | x, | ||
| cudaColorSpinorField & | y, | ||
| cudaColorSpinorField & | z, | ||
| const double & | b | ||
| ) |
Definition at line 1986 of file blas_quda.cu.
| __global__ void axpyZpbxDKernel | ( | Float | a, |
| Float2 * | x, | ||
| Float2 * | y, | ||
| Float2 * | z, | ||
| Float | b, | ||
| int | len | ||
| ) |
Definition at line 1909 of file blas_quda.cu.
| __global__ void axpyZpbxHKernel | ( | float | a, |
| float | b, | ||
| short4 * | xH, | ||
| float * | xN, | ||
| short4 * | yH, | ||
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1938 of file blas_quda.cu.
| __global__ void axpyZpbxHKernel | ( | float | a, |
| float | b, | ||
| short2 * | xH, | ||
| float * | xN, | ||
| short2 * | yH, | ||
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1964 of file blas_quda.cu.
| __global__ void axpyZpbxSKernel | ( | Float | a, |
| Float2 * | x, | ||
| Float2 * | y, | ||
| Float2 * | z, | ||
| Float | b, | ||
| int | len | ||
| ) |
Definition at line 1924 of file blas_quda.cu.
| void caxpbyCuda | ( | const Complex & | a, |
| cudaColorSpinorField & | x, | ||
| const Complex & | b, | ||
| cudaColorSpinorField & | y | ||
| ) |
Definition at line 1582 of file blas_quda.cu.
| __global__ void caxpbyDKernel | ( | Float2 | a, |
| Float2 * | x, | ||
| Float2 | b, | ||
| Float2 * | y, | ||
| int | len | ||
| ) |
Definition at line 1520 of file blas_quda.cu.
| __global__ void caxpbyHKernel | ( | float2 | a, |
| float2 | b, | ||
| short2 * | yH, | ||
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1565 of file blas_quda.cu.
| __global__ void caxpbyHKernel | ( | float2 | a, |
| float2 | b, | ||
| short4 * | yH, | ||
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1547 of file blas_quda.cu.
| double3 caxpbypzYmbwcDotProductWYNormYCuda | ( | const Complex & | a, |
| cudaColorSpinorField & | x, | ||
| const Complex & | b, | ||
| cudaColorSpinorField & | y, | ||
| cudaColorSpinorField & | z, | ||
| cudaColorSpinorField & | w, | ||
| cudaColorSpinorField & | u | ||
| ) |
Definition at line 3597 of file blas_quda.cu.
| void caxpbypzYmbwCuda | ( | const Complex & | a, |
| cudaColorSpinorField & | x, | ||
| const Complex & | b, | ||
| cudaColorSpinorField & | y, | ||
| cudaColorSpinorField & | z, | ||
| cudaColorSpinorField & | w | ||
| ) |
Definition at line 2143 of file blas_quda.cu.
| __global__ void caxpbypzYmbwDKernel | ( | Float2 | a, |
| Float2 * | x, | ||
| Float2 | b, | ||
| Float2 * | y, | ||
| Float2 * | z, | ||
| Float2 * | w, | ||
| int | len | ||
| ) |
Definition at line 2037 of file blas_quda.cu.
| __global__ void caxpbypzYmbwHKernel | ( | float2 | a, |
| float2 | b, | ||
| float * | xN, | ||
| short2 * | yH, | ||
| float * | yN, | ||
| short2 * | zH, | ||
| float * | zN, | ||
| float * | wN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 2119 of file blas_quda.cu.
| __global__ void caxpbypzYmbwHKernel | ( | float2 | a, |
| float2 | b, | ||
| float * | xN, | ||
| short4 * | yH, | ||
| float * | yN, | ||
| short4 * | zH, | ||
| float * | zN, | ||
| float * | wN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 2090 of file blas_quda.cu.
| __global__ void caxpbypzYmbwSKernel | ( | Float2 | a, |
| Float2 * | x, | ||
| Float2 | b, | ||
| Float2 * | y, | ||
| Float2 * | z, | ||
| Float2 * | w, | ||
| int | len | ||
| ) |
Definition at line 2064 of file blas_quda.cu.
| __global__ void caxpbySKernel | ( | Float2 | a, |
| Float2 * | x, | ||
| Float2 | b, | ||
| Float2 * | y, | ||
| int | len | ||
| ) |
Definition at line 1534 of file blas_quda.cu.
| void caxpyCuda | ( | const Complex & | a, |
| cudaColorSpinorField & | x, | ||
| cudaColorSpinorField & | y | ||
| ) |
Definition at line 1474 of file blas_quda.cu.
| __global__ void caxpyDKernel | ( | Float2 | a, |
| Float2 * | x, | ||
| Float2 * | y, | ||
| int | len | ||
| ) |
Definition at line 1411 of file blas_quda.cu.
| __global__ void caxpyHKernel | ( | float2 | a, |
| short2 * | yH, | ||
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1457 of file blas_quda.cu.
| __global__ void caxpyHKernel | ( | float2 | a, |
| short4 * | yH, | ||
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1438 of file blas_quda.cu.
| __global__ void caxpySKernel | ( | Float2 | a, |
| Float2 * | x, | ||
| Float2 * | y, | ||
| int | len | ||
| ) |
Definition at line 1425 of file blas_quda.cu.
| Complex cDotProductCuda | ( | cudaColorSpinorField & | x, |
| cudaColorSpinorField & | y | ||
| ) |
Definition at line 2879 of file blas_quda.cu.
| double3 cDotProductNormACuda | ( | cudaColorSpinorField & | x, |
| cudaColorSpinorField & | y | ||
| ) |
Definition at line 3209 of file blas_quda.cu.
| double3 cDotProductNormBCuda | ( | cudaColorSpinorField & | x, |
| cudaColorSpinorField & | y | ||
| ) |
Definition at line 3370 of file blas_quda.cu.
| __global__ void convertDHKernel | ( | double2 * | res, |
| int | length, | ||
| int | real_length | ||
| ) |
Definition at line 786 of file blas_quda.cu.
| __global__ void convertDHKernelSt | ( | double2 * | res, |
| int | length, | ||
| int | real_length | ||
| ) |
Definition at line 810 of file blas_quda.cu.
| __global__ void convertDSKernel | ( | double2 * | dst, |
| float4 * | src, | ||
| int | length | ||
| ) |
Definition at line 632 of file blas_quda.cu.
| __global__ void convertDSKernel | ( | double2 * | dst, |
| float2 * | src, | ||
| int | length | ||
| ) |
Definition at line 646 of file blas_quda.cu.
| __global__ void convertHDKernel | ( | short4 * | h, |
| float * | norm, | ||
| int | length, | ||
| int | real_length | ||
| ) |
Definition at line 749 of file blas_quda.cu.
| __global__ void convertHDKernel | ( | short2 * | h, |
| float * | norm, | ||
| int | length, | ||
| int | real_length | ||
| ) |
Definition at line 772 of file blas_quda.cu.
| __global__ void convertHSKernel | ( | short2 * | h, |
| float * | norm, | ||
| int | length, | ||
| int | real_length | ||
| ) |
Definition at line 702 of file blas_quda.cu.
| __global__ void convertHSKernel | ( | short4 * | h, |
| float * | norm, | ||
| int | length, | ||
| int | real_length | ||
| ) |
Definition at line 684 of file blas_quda.cu.
| __global__ void convertSDKernel | ( | float4 * | dst, |
| double2 * | src, | ||
| int | length | ||
| ) |
Definition at line 658 of file blas_quda.cu.
| __global__ void convertSDKernel | ( | float2 * | dst, |
| double2 * | src, | ||
| int | length | ||
| ) |
Definition at line 672 of file blas_quda.cu.
| __global__ void convertSHKernel | ( | float4 * | res, |
| int | length, | ||
| int | real_length | ||
| ) |
Definition at line 718 of file blas_quda.cu.
| __global__ void convertSHKernel | ( | float2 * | res, |
| int | length, | ||
| int | real_length | ||
| ) |
Definition at line 735 of file blas_quda.cu.
| void copyCuda | ( | cudaColorSpinorField & | dst, |
| const cudaColorSpinorField & | src | ||
| ) |
Definition at line 827 of file blas_quda.cu.
| void cxpaypbzCuda | ( | cudaColorSpinorField & | x, |
| const Complex & | a, | ||
| cudaColorSpinorField & | y, | ||
| const Complex & | b, | ||
| cudaColorSpinorField & | z | ||
| ) |
Definition at line 1711 of file blas_quda.cu.
| __global__ void cxpaypbzDKernel | ( | Float2 * | x, |
| Float2 | a, | ||
| Float2 * | y, | ||
| Float2 | b, | ||
| Float2 * | z, | ||
| int | len | ||
| ) |
Definition at line 1632 of file blas_quda.cu.
| __global__ void cxpaypbzHKernel | ( | float2 | a, |
| float2 | b, | ||
| short4 * | zH, | ||
| float * | zN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1673 of file blas_quda.cu.
| __global__ void cxpaypbzHKernel | ( | float2 | a, |
| float2 | b, | ||
| short2 * | zH, | ||
| float * | zN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1693 of file blas_quda.cu.
| __global__ void cxpaypbzSKernel | ( | Float2 * | x, |
| Float2 | a, | ||
| Float2 * | y, | ||
| Float2 | b, | ||
| Float2 * | z, | ||
| int | len | ||
| ) |
Definition at line 1653 of file blas_quda.cu.
| __device__ void dsadd | ( | volatile QudaSumFloat & | c0, |
| volatile QudaSumFloat & | c1, | ||
| const volatile QudaSumFloat & | a0, | ||
| const volatile QudaSumFloat & | a1, | ||
| const float | b0, | ||
| const float | b1 | ||
| ) |
Definition at line 2210 of file blas_quda.cu.
| __device__ void dsadd3 | ( | volatile QudaSumFloat3 & | c0, |
| volatile QudaSumFloat3 & | c1, | ||
| const volatile QudaSumFloat3 & | a0, | ||
| const volatile QudaSumFloat3 & | a1, | ||
| const volatile QudaSumFloat3 & | b0, | ||
| const volatile QudaSumFloat3 & | b1 | ||
| ) |
Definition at line 2242 of file blas_quda.cu.
| void endBlas | ( | void | ) |
Definition at line 150 of file blas_quda.cu.
| __device__ float fast_abs_max | ( | float4 | a | ) |
Definition at line 314 of file blas_quda.cu.
| __device__ short float2short | ( | float | c, |
| float | a | ||
| ) |
Definition at line 297 of file blas_quda.cu.
| __device__ short4 float42short4 | ( | float | c, |
| float4 | a | ||
| ) |
Definition at line 307 of file blas_quda.cu.
| void initBlas | ( | void | ) |
Definition at line 111 of file blas_quda.cu.
| float2 __device__ make_Float2 | ( | float2 | x | ) |
Definition at line 227 of file blas_quda.cu.
| double2 __device__ make_Float2 | ( | double2 | x | ) |
Definition at line 231 of file blas_quda.cu.
| void mxpyCuda | ( | cudaColorSpinorField & | x, |
| cudaColorSpinorField & | y | ||
| ) |
Definition at line 1305 of file blas_quda.cu.
| __global__ void mxpyHKernel | ( | short4 * | yH, |
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1270 of file blas_quda.cu.
| __global__ void mxpyHKernel | ( | short2 * | yH, |
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1288 of file blas_quda.cu.
| __global__ void mxpyKernel | ( | Float * | x, |
| Float * | y, | ||
| int | len | ||
| ) |
Definition at line 1261 of file blas_quda.cu.
| double normCuda | ( | const cudaColorSpinorField & | a | ) |
Definition at line 2435 of file blas_quda.cu.
| __device__ float2 operator* | ( | const float | a, |
| const float2 | x | ||
| ) |
Definition at line 62 of file blas_quda.cu.
| __device__ float4 operator*= | ( | float4 & | a, |
| const Float & | b | ||
| ) |
Definition at line 99 of file blas_quda.cu.
| __device__ Float2 operator*= | ( | Float2 & | x, |
| const Float | a | ||
| ) |
Definition at line 92 of file blas_quda.cu.
| __device__ Float2 operator+ | ( | const Float2 | x, |
| const Float2 | y | ||
| ) |
Definition at line 70 of file blas_quda.cu.
| double3 operator+ | ( | const double3 & | x, |
| const double3 & | y | ||
| ) |
Definition at line 56 of file blas_quda.cu.
| double2 operator+ | ( | const double2 & | x, |
| const double2 & | y | ||
| ) |
Definition at line 52 of file blas_quda.cu.
| __device__ Float2 operator+= | ( | Float2 & | x, |
| const Float2 | y | ||
| ) |
Definition at line 78 of file blas_quda.cu.
| __device__ Float2 operator-= | ( | Float2 & | x, |
| const Float2 | y | ||
| ) |
Definition at line 85 of file blas_quda.cu.
| float2 __device__ read_Float2 | ( | float2 * | x, |
| int | i | ||
| ) |
Definition at line 213 of file blas_quda.cu.
| double2 __device__ read_Float2 | ( | double2 * | x, |
| int | i | ||
| ) |
Definition at line 217 of file blas_quda.cu.
| double reDotProductCuda | ( | cudaColorSpinorField & | a, |
| cudaColorSpinorField & | b | ||
| ) |
Definition at line 2536 of file blas_quda.cu.
| void setBlasParam | ( | int | kernel, |
| int | prec, | ||
| int | threads, | ||
| int | blocks | ||
| ) |
Definition at line 168 of file blas_quda.cu.
| void setBlasTuning | ( | int | tuning | ) |
Definition at line 163 of file blas_quda.cu.
| void setBlock | ( | int | kernel, |
| int | length, | ||
| QudaPrecision | precision | ||
| ) |
Definition at line 174 of file blas_quda.cu.
| __device__ float short2float | ( | short | a | ) |
Definition at line 303 of file blas_quda.cu.
| double sumCuda | ( | cudaColorSpinorField & | a | ) |
Definition at line 2337 of file blas_quda.cu.
| double xmyNormCuda | ( | cudaColorSpinorField & | x, |
| cudaColorSpinorField & | y | ||
| ) |
Definition at line 2749 of file blas_quda.cu.
| Complex xpaycDotzyCuda | ( | cudaColorSpinorField & | x, |
| const double & | a, | ||
| cudaColorSpinorField & | y, | ||
| cudaColorSpinorField & | z | ||
| ) |
Definition at line 3040 of file blas_quda.cu.
| void xpayCuda | ( | const cudaColorSpinorField & | x, |
| const double & | a, | ||
| cudaColorSpinorField & | y | ||
| ) |
Definition at line 1223 of file blas_quda.cu.
| __global__ void xpayHKernel | ( | float | a, |
| short4 * | yH, | ||
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1190 of file blas_quda.cu.
| __global__ void xpayHKernel | ( | float | a, |
| short2 * | yH, | ||
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1207 of file blas_quda.cu.
| __global__ void xpayKernel | ( | const Float2 * | x, |
| Float | a, | ||
| Float2 * | y, | ||
| int | len | ||
| ) |
Definition at line 1181 of file blas_quda.cu.
| void xpyCuda | ( | cudaColorSpinorField & | x, |
| cudaColorSpinorField & | y | ||
| ) |
Definition at line 1061 of file blas_quda.cu.
| __global__ void xpyHKernel | ( | short2 * | yH, |
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1045 of file blas_quda.cu.
| __global__ void xpyHKernel | ( | short4 * | yH, |
| float * | yN, | ||
| int | stride, | ||
| int | length | ||
| ) |
Definition at line 1027 of file blas_quda.cu.
| __global__ void xpyKernel | ( | Float * | x, |
| Float * | y, | ||
| int | len | ||
| ) |
Definition at line 1018 of file blas_quda.cu.
| __device__ void zcadd | ( | volatile QudaSumComplex & | c0, |
| volatile QudaSumComplex & | c1, | ||
| const volatile QudaSumComplex & | a0, | ||
| const volatile QudaSumComplex & | a1, | ||
| const volatile QudaSumComplex & | b0, | ||
| const volatile QudaSumComplex & | b1 | ||
| ) |
Definition at line 2222 of file blas_quda.cu.
| void zeroCuda | ( | cudaColorSpinorField & | a | ) |
Definition at line 109 of file blas_quda.cu.
| unsigned long long blas_quda_bytes |
Definition at line 44 of file blas_quda.cu.
| unsigned long long blas_quda_flops |
Definition at line 43 of file blas_quda.cu.
| texture<short4, 1, cudaReadModeNormalizedFloat> texHalf1 |
Definition at line 591 of file blas_quda.cu.
| texture<short4, 1, cudaReadModeNormalizedFloat> texHalf2 |
Definition at line 596 of file blas_quda.cu.
| texture<short4, 1, cudaReadModeNormalizedFloat> texHalf3 |
Definition at line 601 of file blas_quda.cu.
| texture<short4, 1, cudaReadModeNormalizedFloat> texHalf4 |
Definition at line 606 of file blas_quda.cu.
| texture<short4, 1, cudaReadModeNormalizedFloat> texHalf5 |
Definition at line 611 of file blas_quda.cu.
| texture<short2, 1, cudaReadModeNormalizedFloat> texHalfSt1 |
Definition at line 592 of file blas_quda.cu.
| texture<short2, 1, cudaReadModeNormalizedFloat> texHalfSt2 |
Definition at line 597 of file blas_quda.cu.
| texture<short2, 1, cudaReadModeNormalizedFloat> texHalfSt3 |
Definition at line 602 of file blas_quda.cu.
| texture<short2, 1, cudaReadModeNormalizedFloat> texHalfSt4 |
Definition at line 607 of file blas_quda.cu.
| texture<short2, 1, cudaReadModeNormalizedFloat> texHalfSt5 |
Definition at line 612 of file blas_quda.cu.
| texture<float, 1, cudaReadModeElementType> texNorm1 |
Definition at line 593 of file blas_quda.cu.
| texture<float, 1, cudaReadModeElementType> texNorm2 |
Definition at line 598 of file blas_quda.cu.
| texture<float, 1, cudaReadModeElementType> texNorm3 |
Definition at line 603 of file blas_quda.cu.
| texture<float, 1, cudaReadModeElementType> texNorm4 |
Definition at line 608 of file blas_quda.cu.
| texture<float, 1, cudaReadModeElementType> texNorm5 |
Definition at line 613 of file blas_quda.cu.
| texture<int4, 1> uTexDouble2 |
Definition at line 582 of file blas_quda.cu.
| texture<int4, 1> wTexDouble2 |
Definition at line 581 of file blas_quda.cu.
| texture<int4, 1> xTexDouble2 |
Definition at line 578 of file blas_quda.cu.
| texture<float2, 1> xTexSingle2 |
Definition at line 585 of file blas_quda.cu.
| texture<float4, 1> xTexSingle4 |
Definition at line 588 of file blas_quda.cu.
| texture<int4, 1> yTexDouble2 |
Definition at line 579 of file blas_quda.cu.
| texture<float2, 1> yTexSingle2 |
Definition at line 586 of file blas_quda.cu.
| texture<int4, 1> zTexDouble2 |
Definition at line 580 of file blas_quda.cu.
1.7.3