QUDA v0.3.2
A library for QCD on GPUs
Defines | Functions | Variables

quda/lib/blas_quda.cu File Reference

#include <stdlib.h>
#include <stdio.h>
#include <quda_internal.h>
#include <blas_quda.h>
#include <color_spinor_field.h>
#include <cuComplex.h>
#include <blas_param.h>
#include "reduce_core.h"
#include "reduce_complex_core.h"
#include "reduce_triple_core.h"

Go to the source code of this file.

Defines

#define REDUCE_MAX_BLOCKS   65536
#define REDUCE_DOUBLE   64
#define REDUCE_KAHAN   32
#define REDUCE_TYPE   REDUCE_KAHAN
#define QudaSumFloat   float
#define QudaSumComplex   cuComplex
#define QudaSumFloat3   float3
#define EMUSYNC
#define READ_DOUBLE2_TEXTURE(x, i)   fetch_double2(x##TexDouble2, i)
#define READ_FLOAT2_TEXTURE(x, i)   tex1Dfetch(x##TexSingle2, i)
#define RECONSTRUCT_HALF_SPINOR(a, texHalf, texNorm, length)
#define RECONSTRUCT_HALF_SPINOR_ST(a, texHalf, texNorm, length)
#define READ_HALF_SPINOR_TEX(a, tex, texNorm, length)
#define READ_HALF_SPINOR(a, tex, length)
#define READ_HALF_SPINOR_ST(a, tex, length)
#define SHORT_LENGTH   65536
#define SCALE_FLOAT   ((SHORT_LENGTH-1) * 0.5)
#define SHIFT_FLOAT   (-1.f / (SHORT_LENGTH-1))
#define FAST_ABS_MAX(a, b)   fmaxf(fabsf(a), fabsf(b));
#define FAST_MAX(a, b)   fmaxf(a, b);
#define CONSTRUCT_HALF_SPINOR_FROM_SINGLE(h, n, a, length)
#define CONSTRUCT_HALF_SPINOR_FROM_DOUBLE(h, n, a, length)
#define CONSTRUCT_HALF_SPINOR_FROM_SINGLE_ST(h, n, a, length)
#define CONSTRUCT_HALF_SPINOR_FROM_DOUBLE_ST(h, n, a, length)
#define SUM_FLOAT4(sum, a)   float sum = a.x + a.y + a.z + a.w;
#define SUM_FLOAT2(sum, a)   float sum = a.x + a.y;
#define REAL_DOT_FLOAT4(dot, a, b)   float dot = a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
#define REAL_DOT_FLOAT2(dot, a, b)   float dot = a.x*b.x + a.y*b.y;
#define IMAG_DOT_FLOAT4(dot, a, b)   float dot = a.x*b.y - a.y*b.x + a.z*b.w - a.w*b.z;
#define IMAG_DOT_FLOAT2(dot, a, b)   float dot = a.x*b.y - a.y*b.x;
#define AX_FLOAT4(a, X)   X.x *= a; X.y *= a; X.z *= a; X.w *= a;
#define AX_FLOAT2(a, X)   X.x *= a; X.y *= a;
#define XPY_FLOAT4(X, Y)   Y.x += X.x; Y.y += X.y; Y.z += X.z; Y.w += X.w;
#define XPY_FLOAT2(X, Y)   Y.x += X.x; Y.y += X.y;
#define XMY_FLOAT4(X, Y)   Y.x = X.x - Y.x; Y.y = X.y - Y.y; Y.z = X.z - Y.z; Y.w = X.w - Y.w;
#define XMY_FLOAT2(X, Y)   Y.x = X.x - Y.x; Y.y = X.y - Y.y;
#define MXPY_FLOAT4(X, Y)   Y.x -= X.x; Y.y -= X.y; Y.z -= X.z; Y.w -= X.w;
#define MXPY_FLOAT2(X, Y)   Y.x -= X.x; Y.y -= X.y;
#define AXPY_FLOAT4(a, X, Y)
#define AXPY_FLOAT2(a, X, Y)   Y.x += a*X.x; Y.y += a*X.y;
#define AXPBY_FLOAT4(a, X, b, Y)
#define AXPBY_FLOAT2(a, X, b, Y)   Y.x = b*Y.x; Y.x += a*X.x; Y.y = b*Y.y; Y.y += a*X.y; \
#define XPAY_FLOAT4(X, a, Y)
#define XPAY_FLOAT2(X, a, Y)   Y.x = X.x + a*Y.x; Y.y = X.y + a*Y.y;
#define CAXPY_FLOAT4(a, X, Y)
#define CAXPY_FLOAT2(a, X, Y)
#define CMAXPY_FLOAT4(a, X, Y)
#define CAXPBY_FLOAT4(a, X, b, Y)
#define CAXPBY_FLOAT2(a, X, b, Y)
#define CXPAYPBZ_FLOAT4(X, a, Y, b, Z)
#define CXPAYPBZ_FLOAT2(X, a, Y, b, Z)
#define CAXPBYPZ_FLOAT4(a, X, b, Y, Z)
#define CAXPBYPZ_FLOAT2(a, X, b, Y, Z)
#define checkSpinor(a, b)
#define checkSpinorLength(a, b)
#define REDUCE_FUNC_NAME(suffix)   sumD##suffix
#define REDUCE_TYPES   Float *a
#define REDUCE_PARAMS   a
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   a[i]
#define REDUCE_FUNC_NAME(suffix)   sumS##suffix
#define REDUCE_TYPES   Float *a
#define REDUCE_PARAMS   a
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   a[i].x + a[i].y
#define REDUCE_FUNC_NAME(suffix)   sumH##suffix
#define REDUCE_TYPES   Float *aN, int stride
#define REDUCE_PARAMS   aN, stride
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   (ac*s0)
#define REDUCE_FUNC_NAME(suffix)   sumHSt##suffix
#define REDUCE_TYPES   Float *aN, int stride
#define REDUCE_PARAMS   aN, stride
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   (ac*s0)
#define REDUCE_FUNC_NAME(suffix)   normD##suffix
#define REDUCE_TYPES   Float *a
#define REDUCE_PARAMS   a
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   (a[i]*a[i])
#define REDUCE_FUNC_NAME(suffix)   normS##suffix
#define REDUCE_TYPES   Float *a
#define REDUCE_PARAMS   a
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   (a[i].x*a[i].x + a[i].y*a[i].y)
#define REDUCE_FUNC_NAME(suffix)   normH##suffix
#define REDUCE_TYPES   Float *aN, int stride
#define REDUCE_PARAMS   aN, stride
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   (ac*ac*norm0)
#define REDUCE_FUNC_NAME(suffix)   normHSt##suffix
#define REDUCE_TYPES   Float *aN, int stride
#define REDUCE_PARAMS   aN, stride
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   (ac*ac*norm0)
#define REDUCE_FUNC_NAME(suffix)   reDotProductD##suffix
#define REDUCE_TYPES   Float *a, Float *b
#define REDUCE_PARAMS   a, b
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   (a[i]*b[i])
#define REDUCE_FUNC_NAME(suffix)   reDotProductS##suffix
#define REDUCE_TYPES   Float *a, Float *b
#define REDUCE_PARAMS   a, b
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   (a[i].x*b[i].x + a[i].y*b[i].y)
#define REDUCE_FUNC_NAME(suffix)   reDotProductH##suffix
#define REDUCE_TYPES   Float *aN, Float *bN, int stride
#define REDUCE_PARAMS   aN, bN, stride
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   (ac*bc*rdot0)
#define REDUCE_FUNC_NAME(suffix)   reDotProductHSt##suffix
#define REDUCE_TYPES   Float *aN, Float *bN, int stride
#define REDUCE_PARAMS   aN, bN, stride
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   (ac*bc*rdot0)
#define REDUCE_FUNC_NAME(suffix)   axpyNormF##suffix
#define REDUCE_TYPES   Float a, Float *x, Float *y
#define REDUCE_PARAMS   a, x, y
#define REDUCE_AUXILIARY(i)   y[i] = a*x[i] + y[i]
#define REDUCE_OPERATION(i)   (y[i]*y[i])
#define REDUCE_FUNC_NAME(suffix)   axpyNormH##suffix
#define REDUCE_TYPES   Float a, short4 *yH, float *yN, int stride
#define REDUCE_PARAMS   a, yH, yN, stride
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   (norm0)
#define REDUCE_FUNC_NAME(suffix)   axpyNormH##suffix
#define REDUCE_TYPES   Float a, short2 *yH, float *yN, int stride
#define REDUCE_PARAMS   a, yH, yN, stride
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   (norm0)
#define REDUCE_FUNC_NAME(suffix)   xmyNormF##suffix
#define REDUCE_TYPES   Float *x, Float *y
#define REDUCE_PARAMS   x, y
#define REDUCE_AUXILIARY(i)   y[i] = x[i] - y[i]
#define REDUCE_OPERATION(i)   (y[i]*y[i])
#define REDUCE_FUNC_NAME(suffix)   xmyNormH##suffix
#define REDUCE_TYPES   Float *d1, Float *d2, short4 *yH, float *yN, int stride
#define REDUCE_PARAMS   d1, d2, yH, yN, stride
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   (norm0)
#define REDUCE_FUNC_NAME(suffix)   xmyNormH##suffix
#define REDUCE_TYPES   Float *d1, Float *d2, short2 *yH, float *yN, int stride
#define REDUCE_PARAMS   d1, d2, yH, yN, stride
#define REDUCE_AUXILIARY(i)
#define REDUCE_OPERATION(i)   (norm0)
#define REDUCE_FUNC_NAME(suffix)   cDotProductD##suffix
#define REDUCE_TYPES   Float2 *x, Float2 *y, Float c
#define REDUCE_PARAMS   x, y, c
#define REDUCE_REAL_AUXILIARY(i)   Float2 a = READ_DOUBLE2_TEXTURE(x, i);
#define REDUCE_IMAG_AUXILIARY(i)   Float2 b = READ_DOUBLE2_TEXTURE(y, i);
#define REDUCE_REAL_OPERATION(i)   (a.x*b.x + a.y*b.y)
#define REDUCE_IMAG_OPERATION(i)   (a.x*b.y - a.y*b.x)
#define REDUCE_FUNC_NAME(suffix)   cDotProductS##suffix
#define REDUCE_TYPES   Float2 *x, Float2 *y, Float c
#define REDUCE_PARAMS   x, y, c
#define REDUCE_REAL_AUXILIARY(i)   Float2 a = read_Float2(x, i);
#define REDUCE_IMAG_AUXILIARY(i)   Float2 b = read_Float2(y, i);
#define REDUCE_REAL_OPERATION(i)   (a.x*b.x + a.y*b.y)
#define REDUCE_IMAG_OPERATION(i)   (a.x*b.y - a.y*b.x)
#define REDUCE_FUNC_NAME(suffix)   cDotProductH##suffix
#define REDUCE_TYPES   Float *aN, Float2 *bN, int stride
#define REDUCE_PARAMS   aN, bN, stride
#define REDUCE_REAL_AUXILIARY(i)
#define REDUCE_IMAG_AUXILIARY(i)
#define REDUCE_REAL_OPERATION(i)   (ac*bc*rdot0)
#define REDUCE_IMAG_OPERATION(i)   (ac*bc*idot0)
#define REDUCE_FUNC_NAME(suffix)   cDotProductHSt##suffix
#define REDUCE_TYPES   Float *aN, Float2 *bN, int stride
#define REDUCE_PARAMS   aN, bN, stride
#define REDUCE_REAL_AUXILIARY(i)
#define REDUCE_IMAG_AUXILIARY(i)
#define REDUCE_REAL_OPERATION(i)   (ac*bc*rdot0)
#define REDUCE_IMAG_OPERATION(i)   (ac*bc*idot0)
#define REDUCE_FUNC_NAME(suffix)   xpaycDotzyD##suffix
#define REDUCE_TYPES   Float2 *x, Float a, Float2 *y, Float2 *z
#define REDUCE_PARAMS   x, a, y, z
#define REDUCE_REAL_AUXILIARY(i)
#define REDUCE_IMAG_AUXILIARY(i)   y[i].x = X.x + a*Y.x; y[i].y = X.y + a*Y.y
#define REDUCE_REAL_OPERATION(i)   (Z.x*y[i].x + Z.y*y[i].y)
#define REDUCE_IMAG_OPERATION(i)   (Z.x*y[i].y - Z.y*y[i].x)
#define REDUCE_FUNC_NAME(suffix)   xpaycDotzyS##suffix
#define REDUCE_TYPES   Float2 *x, Float a, Float2 *y, Float2 *z
#define REDUCE_PARAMS   x, a, y, z
#define REDUCE_REAL_AUXILIARY(i)   y[i].x = x[i].x + a*y[i].x
#define REDUCE_IMAG_AUXILIARY(i)   y[i].y = x[i].y + a*y[i].y
#define REDUCE_REAL_OPERATION(i)   (z[i].x*y[i].x + z[i].y*y[i].y)
#define REDUCE_IMAG_OPERATION(i)   (z[i].x*y[i].y - z[i].y*y[i].x)
#define REDUCE_FUNC_NAME(suffix)   xpaycDotzyH##suffix
#define REDUCE_TYPES   Float a, short4 *yH, Float2 *yN, int stride
#define REDUCE_PARAMS   a, yH, yN, stride
#define REDUCE_REAL_AUXILIARY(i)
#define REDUCE_IMAG_AUXILIARY(i)
#define REDUCE_REAL_OPERATION(i)   (rdot0)
#define REDUCE_IMAG_OPERATION(i)   (idot0)
#define REDUCE_FUNC_NAME(suffix)   xpaycDotzyH##suffix
#define REDUCE_TYPES   Float a, short2 *yH, Float2 *yN, int stride
#define REDUCE_PARAMS   a, yH, yN, stride
#define REDUCE_REAL_AUXILIARY(i)
#define REDUCE_IMAG_AUXILIARY(i)
#define REDUCE_REAL_OPERATION(i)   (rdot0)
#define REDUCE_IMAG_OPERATION(i)   (idot0)
#define REDUCE_FUNC_NAME(suffix)   cDotProductNormAD##suffix
#define REDUCE_TYPES   Float2 *x, Float2 *y
#define REDUCE_PARAMS   x, y
#define REDUCE_X_AUXILIARY(i)   Float2 a = READ_DOUBLE2_TEXTURE(x, i);
#define REDUCE_Y_AUXILIARY(i)   Float2 b = READ_DOUBLE2_TEXTURE(y, i);
#define REDUCE_Z_AUXILIARY(i)
#define REDUCE_X_OPERATION(i)   (a.x*b.x + a.y*b.y)
#define REDUCE_Y_OPERATION(i)   (a.x*b.y - a.y*b.x)
#define REDUCE_Z_OPERATION(i)   (a.x*a.x + a.y*a.y)
#define REDUCE_FUNC_NAME(suffix)   cDotProductNormAS##suffix
#define REDUCE_TYPES   Float2 *a, Float2 *b
#define REDUCE_PARAMS   a, b
#define REDUCE_X_AUXILIARY(i)
#define REDUCE_Y_AUXILIARY(i)
#define REDUCE_Z_AUXILIARY(i)
#define REDUCE_X_OPERATION(i)   (a[i].x*b[i].x + a[i].y*b[i].y)
#define REDUCE_Y_OPERATION(i)   (a[i].x*b[i].y - a[i].y*b[i].x)
#define REDUCE_Z_OPERATION(i)   (a[i].x*a[i].x + a[i].y*a[i].y)
#define REDUCE_FUNC_NAME(suffix)   cDotProductNormAH##suffix
#define REDUCE_TYPES   Float2 *xN, Float2 *yN, int stride
#define REDUCE_PARAMS   xN, yN, stride
#define REDUCE_X_AUXILIARY(i)
#define REDUCE_Y_AUXILIARY(i)
#define REDUCE_Z_AUXILIARY(i)
#define REDUCE_X_OPERATION(i)   (xc*yc*rdot0)
#define REDUCE_Y_OPERATION(i)   (xc*yc*idot0)
#define REDUCE_Z_OPERATION(i)   (xc*xc*norm0)
#define REDUCE_FUNC_NAME(suffix)   cDotProductNormAHSt##suffix
#define REDUCE_TYPES   Float2 *xN, Float2 *yN, int stride
#define REDUCE_PARAMS   xN, yN, stride
#define REDUCE_X_AUXILIARY(i)
#define REDUCE_Y_AUXILIARY(i)
#define REDUCE_Z_AUXILIARY(i)
#define REDUCE_X_OPERATION(i)   (xc*yc*rdot0)
#define REDUCE_Y_OPERATION(i)   (xc*yc*idot0)
#define REDUCE_Z_OPERATION(i)   (xc*xc*norm0)
#define REDUCE_FUNC_NAME(suffix)   cDotProductNormBD##suffix
#define REDUCE_TYPES   Float2 *x, Float2 *y
#define REDUCE_PARAMS   x, y
#define REDUCE_X_AUXILIARY(i)   Float2 a = READ_DOUBLE2_TEXTURE(x, i);
#define REDUCE_Y_AUXILIARY(i)   Float2 b = READ_DOUBLE2_TEXTURE(y, i);
#define REDUCE_Z_AUXILIARY(i)
#define REDUCE_X_OPERATION(i)   (a.x*b.x + a.y*b.y)
#define REDUCE_Y_OPERATION(i)   (a.x*b.y - a.y*b.x)
#define REDUCE_Z_OPERATION(i)   (b.x*b.x + b.y*b.y)
#define REDUCE_FUNC_NAME(suffix)   cDotProductNormBS##suffix
#define REDUCE_TYPES   Float2 *a, Float2 *b
#define REDUCE_PARAMS   a, b
#define REDUCE_X_AUXILIARY(i)
#define REDUCE_Y_AUXILIARY(i)
#define REDUCE_Z_AUXILIARY(i)
#define REDUCE_X_OPERATION(i)   (a[i].x*b[i].x + a[i].y*b[i].y)
#define REDUCE_Y_OPERATION(i)   (a[i].x*b[i].y - a[i].y*b[i].x)
#define REDUCE_Z_OPERATION(i)   (b[i].x*b[i].x + b[i].y*b[i].y)
#define REDUCE_FUNC_NAME(suffix)   cDotProductNormBH##suffix
#define REDUCE_TYPES   Float2 *xN, Float2 *yN, int stride
#define REDUCE_PARAMS   xN, yN, stride
#define REDUCE_X_AUXILIARY(i)
#define REDUCE_Y_AUXILIARY(i)
#define REDUCE_Z_AUXILIARY(i)
#define REDUCE_X_OPERATION(i)   (xc*yc*rdot0)
#define REDUCE_Y_OPERATION(i)   (xc*yc*idot0)
#define REDUCE_Z_OPERATION(i)   (yc*yc*norm0)
#define REDUCE_FUNC_NAME(suffix)   cDotProductNormBHSt##suffix
#define REDUCE_TYPES   Float2 *xN, Float2 *yN, int stride
#define REDUCE_PARAMS   xN, yN, stride
#define REDUCE_X_AUXILIARY(i)
#define REDUCE_Y_AUXILIARY(i)
#define REDUCE_Z_AUXILIARY(i)
#define REDUCE_X_OPERATION(i)   (xc*yc*rdot0)
#define REDUCE_Y_OPERATION(i)   (xc*yc*idot0)
#define REDUCE_Z_OPERATION(i)   (yc*yc*norm0)
#define REDUCE_FUNC_NAME(suffix)   caxpbypzYmbwcDotProductWYNormYD##suffix
#define REDUCE_TYPES   Float2 a, Float2 *x, Float2 b, Float2 *y, Float2 *z, Float2 *w, Float2 *u
#define REDUCE_PARAMS   a, x, b, y, z, w, u
#define REDUCE_X_AUXILIARY(i)
#define REDUCE_Y_AUXILIARY(i)
#define REDUCE_Z_AUXILIARY(i)
#define REDUCE_X_OPERATION(i)   (u[i].x*y[i].x + u[i].y*y[i].y)
#define REDUCE_Y_OPERATION(i)   (u[i].x*y[i].y - u[i].y*y[i].x)
#define REDUCE_Z_OPERATION(i)   (y[i].x*y[i].x + y[i].y*y[i].y)
#define REDUCE_FUNC_NAME(suffix)   caxpbypzYmbwcDotProductWYNormYS##suffix
#define REDUCE_TYPES   Float2 a, Float2 *x, Float2 b, Float2 *y, Float2 *z, Float2 *w, Float2 *u
#define REDUCE_PARAMS   a, x, b, y, z, w, u
#define REDUCE_X_AUXILIARY(i)
#define REDUCE_Y_AUXILIARY(i)
#define REDUCE_Z_AUXILIARY(i)
#define REDUCE_X_OPERATION(i)   (u[i].x*y[i].x + u[i].y*y[i].y)
#define REDUCE_Y_OPERATION(i)   (u[i].x*y[i].y - u[i].y*y[i].x)
#define REDUCE_Z_OPERATION(i)   (y[i].x*y[i].x + y[i].y*y[i].y)
#define REDUCE_FUNC_NAME(suffix)   caxpbypzYmbwcDotProductWYNormYH##suffix
#define REDUCE_TYPES   Float2 a, Float2 b, short4 *yH, float *yN, short4 *zH, float *zN, float *wN, float *uN, int stride
#define REDUCE_PARAMS   a, b, yH, yN, zH, zN, wN, uN, stride
#define REDUCE_X_AUXILIARY(i)
#define REDUCE_Y_AUXILIARY(i)
#define REDUCE_Z_AUXILIARY(i)
#define REDUCE_X_OPERATION(i)   (uc*rdot0)
#define REDUCE_Y_OPERATION(i)   (uc*idot0)
#define REDUCE_Z_OPERATION(i)   (norm0)
#define REDUCE_FUNC_NAME(suffix)   caxpbypzYmbwcDotProductWYNormYH##suffix
#define REDUCE_TYPES   Float2 a, Float2 b, short2 *yH, float *yN, short2 *zH, float *zN, float *wN, float *uN, int stride
#define REDUCE_PARAMS   a, b, yH, yN, zH, zN, wN, uN, stride
#define REDUCE_X_AUXILIARY(i)
#define REDUCE_Y_AUXILIARY(i)
#define REDUCE_Z_AUXILIARY(i)
#define REDUCE_X_OPERATION(i)   (uc*rdot0)
#define REDUCE_Y_OPERATION(i)   (uc*idot0)
#define REDUCE_Z_OPERATION(i)   (norm0)

Functions

double2 operator+ (const double2 &x, const double2 &y)
double3 operator+ (const double3 &x, const double3 &y)
__device__ float2 operator* (const float a, const float2 x)
template<typename Float2 >
__device__ Float2 operator+ (const Float2 x, const Float2 y)
template<typename Float2 >
__device__ Float2 operator+= (Float2 &x, const Float2 y)
template<typename Float2 >
__device__ Float2 operator-= (Float2 &x, const Float2 y)
template<typename Float , typename Float2 >
__device__ Float2 operator*= (Float2 &x, const Float a)
template<typename Float >
__device__ float4 operator*= (float4 &a, const Float &b)
void zeroCuda (cudaColorSpinorField &a)
void initBlas (void)
void endBlas (void)
void setBlasTuning (int tuning)
void setBlasParam (int kernel, int prec, int threads, int blocks)
void setBlock (int kernel, int length, QudaPrecision precision)
float2 __device__ read_Float2 (float2 *x, int i)
double2 __device__ read_Float2 (double2 *x, int i)
float2 __device__ make_Float2 (float2 x)
double2 __device__ make_Float2 (double2 x)
__device__ short float2short (float c, float a)
__device__ float short2float (short a)
__device__ short4 float42short4 (float c, float4 a)
__device__ float fast_abs_max (float4 a)
__global__ void convertDSKernel (double2 *dst, float4 *src, int length)
__global__ void convertDSKernel (double2 *dst, float2 *src, int length)
__global__ void convertSDKernel (float4 *dst, double2 *src, int length)
__global__ void convertSDKernel (float2 *dst, double2 *src, int length)
__global__ void convertHSKernel (short4 *h, float *norm, int length, int real_length)
__global__ void convertHSKernel (short2 *h, float *norm, int length, int real_length)
__global__ void convertSHKernel (float4 *res, int length, int real_length)
__global__ void convertSHKernel (float2 *res, int length, int real_length)
__global__ void convertHDKernel (short4 *h, float *norm, int length, int real_length)
__global__ void convertHDKernel (short2 *h, float *norm, int length, int real_length)
__global__ void convertDHKernel (double2 *res, int length, int real_length)
__global__ void convertDHKernelSt (double2 *res, int length, int real_length)
void copyCuda (cudaColorSpinorField &dst, const cudaColorSpinorField &src)
template<typename Float , typename Float2 >
__global__ void axpbyKernel (Float a, Float2 *x, Float b, Float2 *y, int length)
__global__ void axpbyHKernel (float a, float b, short4 *yH, float *yN, int stride, int length)
__global__ void axpbyHKernel (float a, float b, short2 *yH, float *yN, int stride, int length)
void axpbyCuda (const double &a, cudaColorSpinorField &x, const double &b, cudaColorSpinorField &y)
template<typename Float >
__global__ void xpyKernel (Float *x, Float *y, int len)
__global__ void xpyHKernel (short4 *yH, float *yN, int stride, int length)
__global__ void xpyHKernel (short2 *yH, float *yN, int stride, int length)
void xpyCuda (cudaColorSpinorField &x, cudaColorSpinorField &y)
template<typename Float , typename Float2 >
__global__ void axpyKernel (Float a, Float2 *x, Float2 *y, int len)
__global__ void axpyHKernel (float a, short4 *yH, float *yN, int stride, int length)
__global__ void axpyHKernel (float a, short2 *yH, float *yN, int stride, int length)
void axpyCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
template<typename Float , typename Float2 >
__global__ void xpayKernel (const Float2 *x, Float a, Float2 *y, int len)
__global__ void xpayHKernel (float a, short4 *yH, float *yN, int stride, int length)
__global__ void xpayHKernel (float a, short2 *yH, float *yN, int stride, int length)
void xpayCuda (const cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y)
template<typename Float >
__global__ void mxpyKernel (Float *x, Float *y, int len)
__global__ void mxpyHKernel (short4 *yH, float *yN, int stride, int length)
__global__ void mxpyHKernel (short2 *yH, float *yN, int stride, int length)
void mxpyCuda (cudaColorSpinorField &x, cudaColorSpinorField &y)
template<typename Float , typename Float2 >
__global__ void axKernel (Float a, Float2 *x, int len)
__global__ void axHKernel (float a, short4 *xH, float *xN, int stride, int length)
__global__ void axHKernel (float a, short2 *xH, float *xN, int stride, int length)
void axCuda (const double &a, cudaColorSpinorField &x)
template<typename Float2 >
__global__ void caxpyDKernel (Float2 a, Float2 *x, Float2 *y, int len)
template<typename Float2 >
__global__ void caxpySKernel (Float2 a, Float2 *x, Float2 *y, int len)
__global__ void caxpyHKernel (float2 a, short4 *yH, float *yN, int stride, int length)
__global__ void caxpyHKernel (float2 a, short2 *yH, float *yN, int stride, int length)
void caxpyCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
template<typename Float2 >
__global__ void caxpbyDKernel (Float2 a, Float2 *x, Float2 b, Float2 *y, int len)
template<typename Float2 >
__global__ void caxpbySKernel (Float2 a, Float2 *x, Float2 b, Float2 *y, int len)
__global__ void caxpbyHKernel (float2 a, float2 b, short4 *yH, float *yN, int stride, int length)
__global__ void caxpbyHKernel (float2 a, float2 b, short2 *yH, float *yN, int stride, int length)
void caxpbyCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y)
template<typename Float2 >
__global__ void cxpaypbzDKernel (Float2 *x, Float2 a, Float2 *y, Float2 b, Float2 *z, int len)
template<typename Float2 >
__global__ void cxpaypbzSKernel (Float2 *x, Float2 a, Float2 *y, Float2 b, Float2 *z, int len)
__global__ void cxpaypbzHKernel (float2 a, float2 b, short4 *zH, float *zN, int stride, int length)
__global__ void cxpaypbzHKernel (float2 a, float2 b, short2 *zH, float *zN, int stride, int length)
void cxpaypbzCuda (cudaColorSpinorField &x, const Complex &a, cudaColorSpinorField &y, const Complex &b, cudaColorSpinorField &z)
template<typename Float , typename Float2 >
__global__ void axpyBzpcxDKernel (Float a, Float2 *x, Float2 *y, Float b, Float2 *z, Float c, int len)
template<typename Float , typename Float2 >
__global__ void axpyBzpcxSKernel (Float a, Float2 *x, Float2 *y, Float b, Float2 *z, Float c, int len)
__global__ void axpyBzpcxHKernel (float a, float b, float c, short4 *xH, float *xN, short4 *yH, float *yN, int stride, int length)
__global__ void axpyBzpcxHKernel (float a, float b, float c, short2 *xH, float *xN, short2 *yH, float *yN, int stride, int length)
void axpyBzpcxCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, const double &b, cudaColorSpinorField &z, const double &c)
template<typename Float , typename Float2 >
__global__ void axpyZpbxDKernel (Float a, Float2 *x, Float2 *y, Float2 *z, Float b, int len)
template<typename Float , typename Float2 >
__global__ void axpyZpbxSKernel (Float a, Float2 *x, Float2 *y, Float2 *z, Float b, int len)
__global__ void axpyZpbxHKernel (float a, float b, short4 *xH, float *xN, short4 *yH, float *yN, int stride, int length)
__global__ void axpyZpbxHKernel (float a, float b, short2 *xH, float *xN, short2 *yH, float *yN, int stride, int length)
void axpyZpbxCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, const double &b)
template<typename Float2 >
__global__ void caxpbypzYmbwDKernel (Float2 a, Float2 *x, Float2 b, Float2 *y, Float2 *z, Float2 *w, int len)
template<typename Float2 >
__global__ void caxpbypzYmbwSKernel (Float2 a, Float2 *x, Float2 b, Float2 *y, Float2 *z, Float2 *w, int len)
__global__ void caxpbypzYmbwHKernel (float2 a, float2 b, float *xN, short4 *yH, float *yN, short4 *zH, float *zN, float *wN, int stride, int length)
__global__ void caxpbypzYmbwHKernel (float2 a, float2 b, float *xN, short2 *yH, float *yN, short2 *zH, float *zN, float *wN, int stride, int length)
void caxpbypzYmbwCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w)
__device__ void dsadd (volatile QudaSumFloat &c0, volatile QudaSumFloat &c1, const volatile QudaSumFloat &a0, const volatile QudaSumFloat &a1, const float b0, const float b1)
__device__ void zcadd (volatile QudaSumComplex &c0, volatile QudaSumComplex &c1, const volatile QudaSumComplex &a0, const volatile QudaSumComplex &a1, const volatile QudaSumComplex &b0, const volatile QudaSumComplex &b1)
__device__ void dsadd3 (volatile QudaSumFloat3 &c0, volatile QudaSumFloat3 &c1, const volatile QudaSumFloat3 &a0, const volatile QudaSumFloat3 &a1, const volatile QudaSumFloat3 &b0, const volatile QudaSumFloat3 &b1)
double sumCuda (cudaColorSpinorField &a)
double normCuda (const cudaColorSpinorField &a)
double reDotProductCuda (cudaColorSpinorField &a, cudaColorSpinorField &b)
double axpyNormCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
double xmyNormCuda (cudaColorSpinorField &x, cudaColorSpinorField &y)
Complex cDotProductCuda (cudaColorSpinorField &x, cudaColorSpinorField &y)
Complex xpaycDotzyCuda (cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y, cudaColorSpinorField &z)
double3 cDotProductNormACuda (cudaColorSpinorField &x, cudaColorSpinorField &y)
double3 cDotProductNormBCuda (cudaColorSpinorField &x, cudaColorSpinorField &y)
double3 caxpbypzYmbwcDotProductWYNormYCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w, cudaColorSpinorField &u)

Variables

unsigned long long blas_quda_flops
unsigned long long blas_quda_bytes
texture< int4, 1 > xTexDouble2
texture< int4, 1 > yTexDouble2
texture< int4, 1 > zTexDouble2
texture< int4, 1 > wTexDouble2
texture< int4, 1 > uTexDouble2
texture< float2, 1 > xTexSingle2
texture< float2, 1 > yTexSingle2
texture< float4, 1 > xTexSingle4
texture< short4,
1, cudaReadModeNormalizedFloat > 
texHalf1
texture< short2,
1, cudaReadModeNormalizedFloat > 
texHalfSt1
texture< float,
1, cudaReadModeElementType > 
texNorm1
texture< short4,
1, cudaReadModeNormalizedFloat > 
texHalf2
texture< short2,
1, cudaReadModeNormalizedFloat > 
texHalfSt2
texture< float,
1, cudaReadModeElementType > 
texNorm2
texture< short4,
1, cudaReadModeNormalizedFloat > 
texHalf3
texture< short2,
1, cudaReadModeNormalizedFloat > 
texHalfSt3
texture< float,
1, cudaReadModeElementType > 
texNorm3
texture< short4,
1, cudaReadModeNormalizedFloat > 
texHalf4
texture< short2,
1, cudaReadModeNormalizedFloat > 
texHalfSt4
texture< float,
1, cudaReadModeElementType > 
texNorm4
texture< short4,
1, cudaReadModeNormalizedFloat > 
texHalf5
texture< short2,
1, cudaReadModeNormalizedFloat > 
texHalfSt5
texture< float,
1, cudaReadModeElementType > 
texNorm5

Define Documentation

#define AX_FLOAT2 (   a,
  X 
)    X.x *= a; X.y *= a;

Definition at line 438 of file blas_quda.cu.

#define AX_FLOAT4 (   a,
  X 
)    X.x *= a; X.y *= a; X.z *= a; X.w *= a;

Definition at line 435 of file blas_quda.cu.

#define AXPBY_FLOAT2 (   a,
  X,
  b,
 
)    Y.x = b*Y.x; Y.x += a*X.x; Y.y = b*Y.y; Y.y += a*X.y; \

Definition at line 476 of file blas_quda.cu.

#define AXPBY_FLOAT4 (   a,
  X,
  b,
 
)
Value:
Y.x = b*Y.x; Y.x += a*X.x; Y.y = b*Y.y; Y.y += a*X.y;           \
  Y.z = b*Y.z; Y.z += a*X.z; Y.w = b*Y.w; Y.w += a*X.w;

Definition at line 472 of file blas_quda.cu.

#define AXPY_FLOAT2 (   a,
  X,
 
)    Y.x += a*X.x; Y.y += a*X.y;

Definition at line 469 of file blas_quda.cu.

#define AXPY_FLOAT4 (   a,
  X,
 
)
Value:
Y.x += a*X.x;   Y.y += a*X.y;                \
  Y.z += a*X.z; Y.w += a*X.w;

Definition at line 460 of file blas_quda.cu.

#define CAXPBY_FLOAT2 (   a,
  X,
  b,
 
)
Value:
{ float2 y;                                                             \
  y.x = a.x*X.x; y.x -= a.y*X.y; y.x += b.x*Y.x; y.x -= b.y*Y.y;        \
  y.y = a.y*X.x; y.y += a.x*X.y; y.y += b.y*Y.x; y.y += b.x*Y.y;        \
  Y.x = y.x; Y.y = y.y;}

Definition at line 531 of file blas_quda.cu.

#define CAXPBY_FLOAT4 (   a,
  X,
  b,
 
)
Value:
{ float2 y;                                                             \
  y.x = a.x*X.x; y.x -= a.y*X.y; y.x += b.x*Y.x; y.x -= b.y*Y.y;        \
  y.y = a.y*X.x; y.y += a.x*X.y; y.y += b.y*Y.x; y.y += b.x*Y.y;        \
  Y.x = y.x; Y.y = y.y;                                                 \
  y.x = a.x*X.z; y.x -= a.y*X.w; y.x += b.x*Y.z; y.x -= b.y*Y.w;        \
  y.y = a.y*X.z; y.y += a.x*X.w; y.y += b.y*Y.z; y.y += b.x*Y.w;        \
  Y.z = y.x; Y.w = y.y;}

Definition at line 522 of file blas_quda.cu.

#define CAXPBYPZ_FLOAT2 (   a,
  X,
  b,
  Y,
  Z 
)
Value:
Z.x += a.x*X.x - a.y*X.y + b.x*Y.x - b.y*Y.y;   \
  Z.y += a.y*X.x + a.x*X.y + b.y*Y.x + b.x*Y.y;

Definition at line 568 of file blas_quda.cu.

#define CAXPBYPZ_FLOAT4 (   a,
  X,
  b,
  Y,
  Z 
)
Value:
Z.x += a.x*X.x - a.y*X.y + b.x*Y.x - b.y*Y.y;   \
  Z.y += a.y*X.x + a.x*X.y + b.y*Y.x + b.x*Y.y;   \
  Z.z += a.x*X.z - a.y*X.w + b.x*Y.z - b.y*Y.w;   \
  Z.w += a.y*X.z + a.x*X.w + b.y*Y.z + b.x*Y.w;

Definition at line 554 of file blas_quda.cu.

#define CAXPY_FLOAT2 (   a,
  X,
 
)
Value:
Y.x += a.x*X.x; Y.x -= a.y*X.y; \
  Y.y += a.y*X.x; Y.y += a.x*X.y;

Definition at line 507 of file blas_quda.cu.

#define CAXPY_FLOAT4 (   a,
  X,
 
)
Value:
Y.x += a.x*X.x; Y.x -= a.y*X.y; \
  Y.y += a.y*X.x; Y.y += a.x*X.y;       \
  Y.z += a.x*X.z; Y.z -= a.y*X.w;       \
  Y.w += a.y*X.z; Y.w += a.x*X.w;

Definition at line 493 of file blas_quda.cu.

#define checkSpinor (   a,
 
)
Value:
{                                                                       \
    if (a.Precision() != b.Precision())                                 \
      errorQuda("precisions do not match: %d %d", a.Precision(), b.Precision()); \
    if (a.Length() != b.Length())                                       \
      errorQuda("lengths do not match: %d %d", a.Length(), b.Length()); \
    if (a.Stride() != b.Stride())                                       \
      errorQuda("strides do not match: %d %d", a.Stride(), b.Stride()); \
  }

Definition at line 615 of file blas_quda.cu.

#define checkSpinorLength (   a,
 
)
Value:
{                                                                       \
    if (a.Length() != b.Length()) {                                     \
      errorQuda("engths do not match: %d %d", a.Length(), b.Length());  \
    }

Definition at line 626 of file blas_quda.cu.

#define CMAXPY_FLOAT4 (   a,
  X,
 
)
Value:
Y.x -= a.x*X.x; Y.x += a.y*X.y;         \
  Y.y -= a.y*X.x; Y.y -= a.x*X.y;               \
  Y.z -= a.x*X.z; Y.z += a.y*X.w;               \
  Y.w -= a.y*X.z; Y.w -= a.x*X.w;

Definition at line 516 of file blas_quda.cu.

#define CONSTRUCT_HALF_SPINOR_FROM_DOUBLE (   h,
  n,
  a,
  length 
)
Value:
{float c0 = fmaxf(fabsf((a##0).x), fabsf((a##0).y));                    \
  float c1 = fmaxf(fabsf((a##1).x), fabsf((a##1).y));                   \
  float c2 = fmaxf(fabsf((a##2).x), fabsf((a##2).y));                   \
  float c3 = fmaxf(fabsf((a##3).x), fabsf((a##3).y));                   \
  float c4 = fmaxf(fabsf((a##4).x), fabsf((a##4).y));                   \
  float c5 = fmaxf(fabsf((a##5).x), fabsf((a##5).y));                   \
  float c6 = fmaxf(fabsf((a##6).x), fabsf((a##6).y));                   \
  float c7 = fmaxf(fabsf((a##7).x), fabsf((a##7).y));                   \
  float c8 = fmaxf(fabsf((a##8).x), fabsf((a##8).y));                   \
  float c9 = fmaxf(fabsf((a##9).x), fabsf((a##9).y));                   \
  float c10 = fmaxf(fabsf((a##10).x), fabsf((a##10).y));                \
  float c11 = fmaxf(fabsf((a##11).x), fabsf((a##11).y));                \
  c0 = fmaxf(c0, c1); c1 = fmaxf(c2, c3);  c2 = fmaxf(c4, c5); c3 = fmaxf(c6, c7); \
  c4 = fmaxf(c8, c9); c5 = fmaxf(c10, c11); c0 = fmaxf(c0, c1); c1 = fmaxf(c2, c3); \
  c2 = fmaxf(c4, c5); c0 = fmaxf(c0, c1); c0 = fmaxf(c0, c2);           \
  n[i] = c0;                                                            \
  float C = __fdividef(MAX_SHORT, c0);                                  \
  h[i+0*length] = make_short4((short)(C*(float)(a##0).x), (short)(C*(float)(a##0).y), \
                              (short)(C*(float)(a##1).x), (short)(C*(float)(a##1).y)); \
  h[i+1*length] = make_short4((short)(C*(float)(a##2).x), (short)(C*(float)(a##2).y), \
                              (short)(C*(float)(a##3).x), (short)(C*(float)(a##3).y)); \
  h[i+2*length] = make_short4((short)(C*(float)(a##4).x), (short)(C*(float)(a##4).y), \
                              (short)(C*(float)(a##5).x), (short)(C*(float)(a##5).y)); \
  h[i+3*length] = make_short4((short)(C*(float)(a##6).x), (short)(C*(float)(a##6).y), \
                              (short)(C*(float)(a##7).x), (short)(C*(float)(a##7).y)); \
  h[i+4*length] = make_short4((short)(C*(float)(a##8).x), (short)(C*(float)(a##8).y), \
                              (short)(C*(float)(a##9).x), (short)(C*(float)(a##9).y)); \
  h[i+5*length] = make_short4((short)(C*(float)(a##10).x), (short)(C*(float)(a##10).y), \
                              (short)(C*(float)(a##11).x), (short)(C*(float)(a##11).y));}

Definition at line 347 of file blas_quda.cu.

#define CONSTRUCT_HALF_SPINOR_FROM_DOUBLE_ST (   h,
  n,
  a,
  length 
)
Value:
{float c0 = fmaxf(fabsf((a##0).x), fabsf((a##0).y));            \
        float c1 = fmaxf(fabsf((a##1).x), fabsf((a##1).y));             \
        float c2 = fmaxf(fabsf((a##2).x), fabsf((a##2).y));             \
        c0 = fmaxf(c0, c1); c0 = fmaxf(c0, c2);                         \
        n[i] = c0;                                                      \
        float C = __fdividef(MAX_SHORT, c0);                            \
        h[i+0*length] = make_short2((short)(C*(float)(a##0).x), (short)(C*(float)(a##0).y)); \
        h[i+1*length] = make_short2((short)(C*(float)(a##1).x), (short)(C*(float)(a##1).y)); \
        h[i+2*length] = make_short2((short)(C*(float)(a##2).x), (short)(C*(float)(a##2).y));}

Definition at line 389 of file blas_quda.cu.

#define CONSTRUCT_HALF_SPINOR_FROM_SINGLE (   h,
  n,
  a,
  length 
)
Value:
{               \
    float c0 = fast_abs_max(a##0);                                      \
    float c1 = fast_abs_max(a##1);                                      \
    c0 = FAST_MAX(c0, c1);                                              \
    float c2 = fast_abs_max(a##2);                                      \
    float c3 = fast_abs_max(a##3);                                      \
    c1 = FAST_MAX(c2, c3);                                              \
    c0 = FAST_MAX(c0, c1);                                              \
    c2 = fast_abs_max(a##4);                                            \
    c3 = fast_abs_max(a##5);                                            \
    c1 = FAST_MAX(c2, c3);                                              \
    c0 = FAST_MAX(c0, c1);                                              \
    n[i] = c0;                                                          \
    float C = __fdividef(MAX_SHORT, c0);                                \
    h[i+0*length] = make_short4((short)(C*(float)(a##0).x), (short)(C*(float)(a##0).y), \
                                (short)(C*(float)(a##0).z), (short)(C*(float)(a##0).w)); \
    h[i+1*length] = make_short4((short)(C*(float)(a##1).x), (short)(C*(float)(a##1).y), \
                                (short)(C*(float)(a##1).z), (short)(C*(float)(a##1).w)); \
    h[i+2*length] = make_short4((short)(C*(float)(a##2).x), (short)(C*(float)(a##2).y), \
                                (short)(C*(float)(a##2).z), (short)(C*(float)(a##2).w)); \
    h[i+3*length] = make_short4((short)(C*(float)(a##3).x), (short)(C*(float)(a##3).y), \
                                (short)(C*(float)(a##3).z), (short)(C*(float)(a##3).w)); \
    h[i+4*length] = make_short4((short)(C*(float)(a##4).x), (short)(C*(float)(a##4).y), \
                                (short)(C*(float)(a##4).z), (short)(C*(float)(a##4).w)); \
    h[i+5*length] = make_short4((short)(C*(float)(a##5).x), (short)(C*(float)(a##5).y), \
                                (short)(C*(float)(a##5).z), (short)(C*(float)(a##5).w));}

Definition at line 320 of file blas_quda.cu.

#define CONSTRUCT_HALF_SPINOR_FROM_SINGLE_ST (   h,
  n,
  a,
  length 
)
Value:
{float c0 = fmaxf(fabsf((a##0).x), fabsf((a##0).y));            \
        float c1 = fmaxf(fabsf((a##1).x), fabsf((a##1).y));             \
        float c2 = fmaxf(fabsf((a##2).x), fabsf((a##2).y));             \
        c0 = fmaxf(c0, c1); c0 = fmaxf(c0, c2);                         \
        n[i] = c0;                                                      \
        float C = __fdividef(MAX_SHORT, c0);                            \
        h[i+0*length] = make_short2((short)(C*(float)(a##0).x), (short)(C*(float)(a##0).y)); \
        h[i+1*length] = make_short2((short)(C*(float)(a##1).x), (short)(C*(float)(a##1).y)); \
        h[i+2*length] = make_short2((short)(C*(float)(a##2).x), (short)(C*(float)(a##2).y));}

Definition at line 378 of file blas_quda.cu.

#define CXPAYPBZ_FLOAT2 (   X,
  a,
  Y,
  b,
  Z 
)
Value:
{float2 z;                                                             \
  z.x = X.x + a.x*Y.x; z.x -= a.y*Y.y; z.x += b.x*Z.x; z.x -= b.y*Z.y; \
  z.y = X.y + a.y*Y.x; z.y += a.x*Y.y; z.y += b.y*Z.x; z.y += b.x*Z.y; \
  Z.x = z.x; Z.y = z.y;}

Definition at line 547 of file blas_quda.cu.

#define CXPAYPBZ_FLOAT4 (   X,
  a,
  Y,
  b,
  Z 
)
Value:
{float2 z;                                                             \
  z.x = X.x + a.x*Y.x; z.x -= a.y*Y.y; z.x += b.x*Z.x; z.x -= b.y*Z.y; \
  z.y = X.y + a.y*Y.x; z.y += a.x*Y.y; z.y += b.y*Z.x; z.y += b.x*Z.y; \
  Z.x = z.x; Z.y = z.y;                                                \
  z.x = X.z + a.x*Y.z; z.x -= a.y*Y.w; z.x += b.x*Z.z; z.x -= b.y*Z.w; \
  z.y = X.w + a.y*Y.z; z.y += a.x*Y.w; z.y += b.y*Z.z; z.y += b.x*Z.w; \
  Z.z = z.x; Z.w = z.y;}

Definition at line 538 of file blas_quda.cu.

#define EMUSYNC

Definition at line 31 of file blas_quda.cu.

#define FAST_ABS_MAX (   a,
 
)    fmaxf(fabsf(a), fabsf(b));

Definition at line 311 of file blas_quda.cu.

#define FAST_MAX (   a,
 
)    fmaxf(a, b);

Definition at line 312 of file blas_quda.cu.

#define IMAG_DOT_FLOAT2 (   dot,
  a,
 
)    float dot = a.x*b.y - a.y*b.x;

Definition at line 432 of file blas_quda.cu.

#define IMAG_DOT_FLOAT4 (   dot,
  a,
 
)    float dot = a.x*b.y - a.y*b.x + a.z*b.w - a.w*b.z;

Definition at line 422 of file blas_quda.cu.

#define MXPY_FLOAT2 (   X,
 
)    Y.x -= X.x; Y.y -= X.y;

Definition at line 456 of file blas_quda.cu.

#define MXPY_FLOAT4 (   X,
 
)    Y.x -= X.x; Y.y -= X.y; Y.z -= X.z; Y.w -= X.w;

Definition at line 453 of file blas_quda.cu.

#define QudaSumComplex   cuComplex

Definition at line 23 of file blas_quda.cu.

#define QudaSumFloat   float

Definition at line 22 of file blas_quda.cu.

#define QudaSumFloat3   float3

Definition at line 24 of file blas_quda.cu.

#define READ_DOUBLE2_TEXTURE (   x,
 
)    fetch_double2(x##TexDouble2, i)

Definition at line 221 of file blas_quda.cu.

#define READ_FLOAT2_TEXTURE (   x,
 
)    tex1Dfetch(x##TexSingle2, i)

Definition at line 224 of file blas_quda.cu.

#define READ_HALF_SPINOR (   a,
  tex,
  length 
)
Value:
float4 a##0 = tex1Dfetch(tex, i + 0*length);                            \
  float4 a##1 = tex1Dfetch(tex, i + 1*length);                          \
  float4 a##2 = tex1Dfetch(tex, i + 2*length);                          \
  float4 a##3 = tex1Dfetch(tex, i + 3*length);                          \
  float4 a##4 = tex1Dfetch(tex, i + 4*length);                          \
  float4 a##5 = tex1Dfetch(tex, i + 5*length);                          \
  float a##c = a##N[i];

Definition at line 278 of file blas_quda.cu.

#define READ_HALF_SPINOR_ST (   a,
  tex,
  length 
)
Value:
float2 a##0 = tex1Dfetch(tex, i + 0*length);                            \
  float2 a##1 = tex1Dfetch(tex, i + 1*length);                          \
  float2 a##2 = tex1Dfetch(tex, i + 2*length);                          \
  float a##c = a##N[i];

Definition at line 287 of file blas_quda.cu.

#define READ_HALF_SPINOR_TEX (   a,
  tex,
  texNorm,
  length 
)
Value:
float a##c = tex1Dfetch(texNorm, i);                                    \
  float4 a##0 = tex1Dfetch(tex, i + 0*length);                          \
  float4 a##1 = tex1Dfetch(tex, i + 1*length);                          \
  float4 a##2 = tex1Dfetch(tex, i + 2*length);                          \
  float4 a##3 = tex1Dfetch(tex, i + 3*length);                          \
  float4 a##4 = tex1Dfetch(tex, i + 4*length);                          \
  float4 a##5 = tex1Dfetch(tex, i + 5*length);                          \

Definition at line 269 of file blas_quda.cu.

#define REAL_DOT_FLOAT2 (   dot,
  a,
 
)    float dot = a.x*b.x + a.y*b.y;

Definition at line 418 of file blas_quda.cu.

#define REAL_DOT_FLOAT4 (   dot,
  a,
 
)    float dot = a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;

Definition at line 408 of file blas_quda.cu.

#define RECONSTRUCT_HALF_SPINOR (   a,
  texHalf,
  texNorm,
  length 
)
Value:
float a##c = tex1Dfetch(texNorm, i);                                    \
  float4 a##0 = tex1Dfetch(texHalf, i + 0*length);                      \
  float4 a##1 = tex1Dfetch(texHalf, i + 1*length);                      \
  float4 a##2 = tex1Dfetch(texHalf, i + 2*length);                      \
  float4 a##3 = tex1Dfetch(texHalf, i + 3*length);                      \
  float4 a##4 = tex1Dfetch(texHalf, i + 4*length);                      \
  float4 a##5 = tex1Dfetch(texHalf, i + 5*length);                      \
  a##0 *= a##c;                                                         \
  a##1 *= a##c;                                                         \
  a##2 *= a##c;                                                         \
  a##3 *= a##c;                                                         \
  a##4 *= a##c;                                                         \
  a##5 *= a##c;

Definition at line 235 of file blas_quda.cu.

#define RECONSTRUCT_HALF_SPINOR_ST (   a,
  texHalf,
  texNorm,
  length 
)
Value:
float a##c = tex1Dfetch(texNorm, i);                                    \
  float2 a##0 = tex1Dfetch(texHalf, i + 0*length);                      \
  float2 a##1 = tex1Dfetch(texHalf, i + 1*length);                      \
  float2 a##2 = tex1Dfetch(texHalf, i + 2*length);                      \
  (a##0) *= a##c;                                                               \
  (a##1) *= a##c;                                                               \
  (a##2) *= a##c;

Definition at line 250 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)
Value:
RECONSTRUCT_HALF_SPINOR_ST(x, texHalfSt1, texNorm1, stride);            \
  RECONSTRUCT_HALF_SPINOR_ST(y, texHalfSt2, texNorm2, stride);          \
  AXPY_FLOAT2(a, x0, y0);                                               \
  REAL_DOT_FLOAT2(norm0, y0, y0);                                       \
  AXPY_FLOAT2(a, x1, y1);                                               \
  REAL_DOT_FLOAT2(norm1, y1, y1);                                       \
  AXPY_FLOAT2(a, x2, y2);                                               \
  REAL_DOT_FLOAT2(norm2, y2, y2);                                       \
  norm0 += norm1; norm0 += norm2;                                       \
  CONSTRUCT_HALF_SPINOR_FROM_SINGLE_ST(yH, yN, y, stride);

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)    y[i] = x[i] - y[i]

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)
Value:
READ_HALF_SPINOR(a, texHalf1, stride);                          \
  SUM_FLOAT4(s0, a0);                                                   \
  SUM_FLOAT4(s1, a1);                                                   \
  SUM_FLOAT4(s2, a2);                                                   \
  SUM_FLOAT4(s3, a3);                                                   \
  SUM_FLOAT4(s4, a4);                                                   \
  SUM_FLOAT4(s5, a5);                                                   \
  s0 += s1; s2 += s3; s4 += s5; s0 += s2; s0 += s4;

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)
Value:
RECONSTRUCT_HALF_SPINOR(x, texHalf1, texNorm1, stride);         \
  RECONSTRUCT_HALF_SPINOR(y, texHalf2, texNorm2, stride);               \
  XMY_FLOAT4(x0, y0);                                                   \
  REAL_DOT_FLOAT4(norm0, y0, y0);                                       \
  XMY_FLOAT4(x1, y1);                                                   \
  REAL_DOT_FLOAT4(norm1, y1, y1);                                       \
  XMY_FLOAT4(x2, y2);                                                   \
  REAL_DOT_FLOAT4(norm2, y2, y2);                                       \
  XMY_FLOAT4(x3, y3);                                                   \
  REAL_DOT_FLOAT4(norm3, y3, y3);                                       \
  XMY_FLOAT4(x4, y4);                                                   \
  REAL_DOT_FLOAT4(norm4, y4, y4);                                       \
  XMY_FLOAT4(x5, y5);                                                   \
  REAL_DOT_FLOAT4(norm5, y5, y5);                                       \
  norm0 += norm1; norm2 += norm3; norm4 += norm5; norm0 += norm2; norm0 += norm4; \
  CONSTRUCT_HALF_SPINOR_FROM_SINGLE(yH, yN, y, stride);

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)
Value:
RECONSTRUCT_HALF_SPINOR_ST(x, texHalfSt1, texNorm1, stride);            \
  RECONSTRUCT_HALF_SPINOR_ST(y, texHalfSt2, texNorm2, stride);          \
  XMY_FLOAT2(x0, y0);                                                   \
  REAL_DOT_FLOAT2(norm0, y0, y0);                                       \
  XMY_FLOAT2(x1, y1);                                                   \
  REAL_DOT_FLOAT2(norm1, y1, y1);                                       \
  XMY_FLOAT2(x2, y2);                                                   \
  REAL_DOT_FLOAT2(norm2, y2, y2);                                       \
  norm0 += norm1; norm0 += norm2;                                       \
  CONSTRUCT_HALF_SPINOR_FROM_SINGLE_ST(yH, yN, y, stride);

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)
Value:
READ_HALF_SPINOR_ST(a, texHalfSt1, stride);                             \
  SUM_FLOAT2(s0, a0);                                                   \
  SUM_FLOAT2(s1, a1);                                                   \
  SUM_FLOAT2(s2, a2);                                                   \
  s0 += s1; s0 += s2;

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)
Value:
READ_HALF_SPINOR(a, texHalf1, stride);                          \
  REAL_DOT_FLOAT4(norm0, a0, a0);                                       \
  REAL_DOT_FLOAT4(norm1, a1, a1);                                       \
  REAL_DOT_FLOAT4(norm2, a2, a2);                                       \
  REAL_DOT_FLOAT4(norm3, a3, a3);                                       \
  REAL_DOT_FLOAT4(norm4, a4, a4);                                       \
  REAL_DOT_FLOAT4(norm5, a5, a5);                                       \
  norm0 += norm1; norm2 += norm3; norm4 += norm5; norm0 += norm2, norm0 += norm4;

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)
Value:
READ_HALF_SPINOR_ST(a, texHalfSt1, stride);                             \
    REAL_DOT_FLOAT2(norm0, a0, a0);                                     \
    REAL_DOT_FLOAT2(norm1, a1, a1);                                     \
    REAL_DOT_FLOAT2(norm2, a2, a2);                                     \
    norm0 += norm1; norm0 += norm2;

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)
Value:
READ_HALF_SPINOR(a, texHalf1, stride);                          \
  READ_HALF_SPINOR(b, texHalf2, stride);                                \
  REAL_DOT_FLOAT4(rdot0, a0, b0);                                       \
  REAL_DOT_FLOAT4(rdot1, a1, b1);                                       \
  REAL_DOT_FLOAT4(rdot2, a2, b2);                                       \
  REAL_DOT_FLOAT4(rdot3, a3, b3);                                       \
  REAL_DOT_FLOAT4(rdot4, a4, b4);                                       \
  REAL_DOT_FLOAT4(rdot5, a5, b5);                                       \
  rdot0 += rdot1; rdot2 += rdot3; rdot4 += rdot5; rdot0 += rdot2; rdot0 += rdot4;

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)
Value:
READ_HALF_SPINOR_ST(a, texHalfSt1, stride);                             \
  READ_HALF_SPINOR_ST(b, texHalfSt2, stride);                           \
  REAL_DOT_FLOAT2(rdot0, a0, b0);                                       \
  REAL_DOT_FLOAT2(rdot1, a1, b1);                                       \
  REAL_DOT_FLOAT2(rdot2, a2, b2);                                       \
  rdot0 += rdot1; rdot0 += rdot2;

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)    y[i] = a*x[i] + y[i]

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_AUXILIARY (   i)
Value:
RECONSTRUCT_HALF_SPINOR(x, texHalf1, texNorm1, stride);         \
  RECONSTRUCT_HALF_SPINOR(y, texHalf2, texNorm2, stride);               \
  AXPY_FLOAT4(a, x0, y0);                                               \
  REAL_DOT_FLOAT4(norm0, y0, y0);                                       \
  AXPY_FLOAT4(a, x1, y1);                                               \
  REAL_DOT_FLOAT4(norm1, y1, y1);                                       \
  AXPY_FLOAT4(a, x2, y2);                                               \
  REAL_DOT_FLOAT4(norm2, y2, y2);                                       \
  AXPY_FLOAT4(a, x3, y3);                                               \
  REAL_DOT_FLOAT4(norm3, y3, y3);                                       \
  AXPY_FLOAT4(a, x4, y4);                                               \
  REAL_DOT_FLOAT4(norm4, y4, y4);                                       \
  AXPY_FLOAT4(a, x5, y5);                                               \
  REAL_DOT_FLOAT4(norm5, y5, y5);                                       \
  norm0 += norm1; norm2 += norm3; norm4 += norm5; norm0 += norm2; norm0 += norm4; \
  CONSTRUCT_HALF_SPINOR_FROM_SINGLE(yH, yN, y, stride);

Definition at line 2729 of file blas_quda.cu.

#define REDUCE_DOUBLE   64

Definition at line 12 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    cDotProductNormBH##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    xmyNormF##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    cDotProductNormBHSt##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    sumH##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    caxpbypzYmbwcDotProductWYNormYD##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    xmyNormH##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    sumHSt##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    xmyNormH##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    caxpbypzYmbwcDotProductWYNormYS##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    cDotProductD##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    caxpbypzYmbwcDotProductWYNormYH##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    caxpbypzYmbwcDotProductWYNormYH##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    normD##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    cDotProductS##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    normS##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    cDotProductH##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    cDotProductHSt##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    normH##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    xpaycDotzyD##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    xpaycDotzyS##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    normHSt##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    xpaycDotzyH##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    reDotProductD##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    xpaycDotzyH##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    reDotProductS##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    cDotProductNormAD##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    cDotProductNormAS##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    reDotProductH##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    cDotProductNormAH##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    reDotProductHSt##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    sumD##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    cDotProductNormAHSt##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    axpyNormF##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    cDotProductNormBD##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    axpyNormH##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    sumS##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    cDotProductNormBS##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_FUNC_NAME (   suffix)    axpyNormH##suffix

Definition at line 3548 of file blas_quda.cu.

#define REDUCE_IMAG_AUXILIARY (   i)    Float2 b = READ_DOUBLE2_TEXTURE(y, i);

Definition at line 3023 of file blas_quda.cu.

#define REDUCE_IMAG_AUXILIARY (   i)    Float2 b = read_Float2(y, i);

Definition at line 3023 of file blas_quda.cu.

#define REDUCE_IMAG_AUXILIARY (   i)
Value:
IMAG_DOT_FLOAT4(idot0, a0, b0);                                 \
  IMAG_DOT_FLOAT4(idot1, a1, b1);                                       \
  IMAG_DOT_FLOAT4(idot2, a2, b2);                                       \
  IMAG_DOT_FLOAT4(idot3, a3, b3);                                       \
  IMAG_DOT_FLOAT4(idot4, a4, b4);                                       \
  IMAG_DOT_FLOAT4(idot5, a5, b5);                                       \
  idot0 += idot1; idot2 += idot3; idot4 += idot5; idot0 += idot2; idot0 += idot4;

Definition at line 3023 of file blas_quda.cu.

#define REDUCE_IMAG_AUXILIARY (   i)
Value:
IMAG_DOT_FLOAT2(idot0, a0, b0);                                 \
  IMAG_DOT_FLOAT2(idot1, a1, b1);                                       \
  IMAG_DOT_FLOAT2(idot2, a2, b2);                                       \
  idot0 += idot1; idot0 += idot2;

Definition at line 3023 of file blas_quda.cu.

#define REDUCE_IMAG_AUXILIARY (   i)    y[i].x = X.x + a*Y.x; y[i].y = X.y + a*Y.y

Definition at line 3023 of file blas_quda.cu.

#define REDUCE_IMAG_AUXILIARY (   i)    y[i].y = x[i].y + a*y[i].y

Definition at line 3023 of file blas_quda.cu.

#define REDUCE_IMAG_AUXILIARY (   i)
Value:
IMAG_DOT_FLOAT4(idot0, z0, y0);                                 \
  IMAG_DOT_FLOAT4(idot1, z1, y1);                                       \
  IMAG_DOT_FLOAT4(idot2, z2, y2);                                       \
  IMAG_DOT_FLOAT4(idot3, z3, y3);                                       \
  IMAG_DOT_FLOAT4(idot4, z4, y4);                                       \
  IMAG_DOT_FLOAT4(idot5, z5, y5);                                       \
  idot0 += idot1; idot2 += idot3; idot4 += idot5; idot0 += idot2; idot0 += idot4; \
  CONSTRUCT_HALF_SPINOR_FROM_SINGLE(yH, yN, y, stride);

Definition at line 3023 of file blas_quda.cu.

#define REDUCE_IMAG_AUXILIARY (   i)
Value:
IMAG_DOT_FLOAT2(idot0, z0, y0);                                 \
  IMAG_DOT_FLOAT2(idot1, z1, y1);                                       \
  IMAG_DOT_FLOAT2(idot2, z2, y2);                                       \
  idot0 += idot1; idot0 += idot2;                                       \
  CONSTRUCT_HALF_SPINOR_FROM_SINGLE_ST(yH, yN, y, stride);

Definition at line 3023 of file blas_quda.cu.

#define REDUCE_IMAG_OPERATION (   i)    (a.x*b.y - a.y*b.x)

Definition at line 3030 of file blas_quda.cu.

#define REDUCE_IMAG_OPERATION (   i)    (a.x*b.y - a.y*b.x)

Definition at line 3030 of file blas_quda.cu.

#define REDUCE_IMAG_OPERATION (   i)    (ac*bc*idot0)

Definition at line 3030 of file blas_quda.cu.

#define REDUCE_IMAG_OPERATION (   i)    (ac*bc*idot0)

Definition at line 3030 of file blas_quda.cu.

#define REDUCE_IMAG_OPERATION (   i)    (Z.x*y[i].y - Z.y*y[i].x)

Definition at line 3030 of file blas_quda.cu.

#define REDUCE_IMAG_OPERATION (   i)    (z[i].x*y[i].y - z[i].y*y[i].x)

Definition at line 3030 of file blas_quda.cu.

#define REDUCE_IMAG_OPERATION (   i)    (idot0)

Definition at line 3030 of file blas_quda.cu.

#define REDUCE_IMAG_OPERATION (   i)    (idot0)

Definition at line 3030 of file blas_quda.cu.

#define REDUCE_KAHAN   32

Definition at line 13 of file blas_quda.cu.

#define REDUCE_MAX_BLOCKS   65536

Definition at line 10 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    a[i].x + a[i].y

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (norm0)

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (y[i]*y[i])

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (ac*s0)

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (norm0)

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (norm0)

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (a[i]*a[i])

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (a[i].x*a[i].x + a[i].y*a[i].y)

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (ac*ac*norm0)

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (ac*ac*norm0)

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (a[i]*b[i])

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (ac*s0)

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (a[i].x*b[i].x + a[i].y*b[i].y)

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (ac*bc*rdot0)

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (ac*bc*rdot0)

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (y[i]*y[i])

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    a[i]

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_OPERATION (   i)    (norm0)

Definition at line 2740 of file blas_quda.cu.

#define REDUCE_PARAMS   a, yH, yN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   xN, yN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   x, y

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   xN, yN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   aN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a, x, b, y, z, w, u

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   d1, d2, yH, yN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a, x, b, y, z, w, u

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   d1, d2, yH, yN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   aN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a, b, yH, yN, zH, zN, wN, uN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   x, y, c

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a, b, yH, yN, zH, zN, wN, uN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   x, y, c

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   aN, bN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   aN, bN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   x, a, y, z

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   aN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   x, a, y, z

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   aN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a, yH, yN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a, b

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a, yH, yN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   x, y

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a, b

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a, b

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   aN, bN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   xN, yN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   aN, bN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   xN, yN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a, x, y

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   x, y

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a, yH, yN, stride

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a, b

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_PARAMS   a

Definition at line 3550 of file blas_quda.cu.

#define REDUCE_REAL_AUXILIARY (   i)    Float2 a = READ_DOUBLE2_TEXTURE(x, i);

Definition at line 3012 of file blas_quda.cu.

#define REDUCE_REAL_AUXILIARY (   i)    Float2 a = read_Float2(x, i);

Definition at line 3012 of file blas_quda.cu.

#define REDUCE_REAL_AUXILIARY (   i)
Value:
READ_HALF_SPINOR(a, texHalf1, stride);                          \
  READ_HALF_SPINOR(b, texHalf2, stride);                                \
  REAL_DOT_FLOAT4(rdot0, a0, b0);                                       \
  REAL_DOT_FLOAT4(rdot1, a1, b1);                                       \
  REAL_DOT_FLOAT4(rdot2, a2, b2);                                       \
  REAL_DOT_FLOAT4(rdot3, a3, b3);                                       \
  REAL_DOT_FLOAT4(rdot4, a4, b4);                                       \
  REAL_DOT_FLOAT4(rdot5, a5, b5);                                       \
  rdot0 += rdot1; rdot2 += rdot3; rdot4 += rdot5; rdot0 += rdot2; rdot0 += rdot4;

Definition at line 3012 of file blas_quda.cu.

#define REDUCE_REAL_AUXILIARY (   i)
Value:
READ_HALF_SPINOR_ST(a, texHalfSt1, stride);                             \
  READ_HALF_SPINOR_ST(b, texHalfSt2, stride);                           \
  REAL_DOT_FLOAT2(rdot0, a0, b0);                                       \
  REAL_DOT_FLOAT2(rdot1, a1, b1);                                       \
  REAL_DOT_FLOAT2(rdot2, a2, b2);                                       \
  rdot0 += rdot1; rdot0 += rdot2;

Definition at line 3012 of file blas_quda.cu.

#define REDUCE_REAL_AUXILIARY (   i)
Value:
Float2 X = READ_DOUBLE2_TEXTURE(x, i);  \
  Float2 Y = READ_DOUBLE2_TEXTURE(y, i);        \
  Float2 Z = READ_DOUBLE2_TEXTURE(z, i);

Definition at line 3012 of file blas_quda.cu.

#define REDUCE_REAL_AUXILIARY (   i)    y[i].x = x[i].x + a*y[i].x

Definition at line 3012 of file blas_quda.cu.

#define REDUCE_REAL_AUXILIARY (   i)
Value:
RECONSTRUCT_HALF_SPINOR(x, texHalf1, texNorm1, stride);         \
  RECONSTRUCT_HALF_SPINOR(y, texHalf2, texNorm2, stride);               \
  RECONSTRUCT_HALF_SPINOR(z, texHalf3, texNorm3, stride);               \
  XPAY_FLOAT4(x0, a, y0);                                               \
  XPAY_FLOAT4(x1, a, y1);                                               \
  XPAY_FLOAT4(x2, a, y2);                                               \
  XPAY_FLOAT4(x3, a, y3);                                               \
  XPAY_FLOAT4(x4, a, y4);                                               \
  XPAY_FLOAT4(x5, a, y5);                                               \
  REAL_DOT_FLOAT4(rdot0, z0, y0);                                       \
  REAL_DOT_FLOAT4(rdot1, z1, y1);                                       \
  REAL_DOT_FLOAT4(rdot2, z2, y2);                                       \
  REAL_DOT_FLOAT4(rdot3, z3, y3);                                       \
  REAL_DOT_FLOAT4(rdot4, z4, y4);                                       \
  REAL_DOT_FLOAT4(rdot5, z5, y5);                                       \
  rdot0 += rdot1; rdot2 += rdot3; rdot4 += rdot5; rdot0 += rdot2; rdot0 += rdot4;

Definition at line 3012 of file blas_quda.cu.

#define REDUCE_REAL_AUXILIARY (   i)
Value:
RECONSTRUCT_HALF_SPINOR_ST(x, texHalfSt1, texNorm1, stride);            \
  RECONSTRUCT_HALF_SPINOR_ST(y, texHalfSt2, texNorm2, stride);          \
  RECONSTRUCT_HALF_SPINOR_ST(z, texHalfSt3, texNorm3, stride);          \
  XPAY_FLOAT2(x0, a, y0);                                               \
  XPAY_FLOAT2(x1, a, y1);                                               \
  XPAY_FLOAT2(x2, a, y2);                                               \
  REAL_DOT_FLOAT2(rdot0, z0, y0);                                       \
  REAL_DOT_FLOAT2(rdot1, z1, y1);                                       \
  REAL_DOT_FLOAT2(rdot2, z2, y2);                                       \
  rdot0 += rdot1; rdot0 += rdot2;

Definition at line 3012 of file blas_quda.cu.

#define REDUCE_REAL_OPERATION (   i)    (a.x*b.x + a.y*b.y)

Definition at line 3029 of file blas_quda.cu.

#define REDUCE_REAL_OPERATION (   i)    (ac*bc*rdot0)

Definition at line 3029 of file blas_quda.cu.

#define REDUCE_REAL_OPERATION (   i)    (ac*bc*rdot0)

Definition at line 3029 of file blas_quda.cu.

#define REDUCE_REAL_OPERATION (   i)    (Z.x*y[i].x + Z.y*y[i].y)

Definition at line 3029 of file blas_quda.cu.

#define REDUCE_REAL_OPERATION (   i)    (z[i].x*y[i].x + z[i].y*y[i].y)

Definition at line 3029 of file blas_quda.cu.

#define REDUCE_REAL_OPERATION (   i)    (rdot0)

Definition at line 3029 of file blas_quda.cu.

#define REDUCE_REAL_OPERATION (   i)    (rdot0)

Definition at line 3029 of file blas_quda.cu.

#define REDUCE_REAL_OPERATION (   i)    (a.x*b.x + a.y*b.y)

Definition at line 3029 of file blas_quda.cu.

#define REDUCE_TYPE   REDUCE_KAHAN

Definition at line 21 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 a, Float2 *x, Float2 b, Float2 *y, Float2 *z, Float2 *w, Float2 *u

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *aN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 *x, Float a, Float2 *y, Float2 *z

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *d1, Float *d2, short2 *yH, float *yN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 *a, Float2 *b

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float a, Float *x, Float *y

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *a

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *x, Float *y

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *a

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 *xN, Float2 *yN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 *x, Float2 *y

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *aN, Float2 *bN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *a

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *aN, Float2 *bN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *aN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *aN, Float *bN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 *x, Float a, Float2 *y, Float2 *z

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float a, short4 *yH, Float2 *yN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 a, Float2 b, short4 *yH, float *yN, short4 *zH, float *zN, float *wN, float *uN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 *x, Float2 *y, Float c

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *a, Float *b

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *a, Float *b

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float a, short2 *yH, Float2 *yN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 *x, Float2 *y, Float c

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 *x, Float2 *y

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *aN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *aN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *d1, Float *d2, short4 *yH, float *yN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 *a, Float2 *b

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 a, Float2 b, short2 *yH, float *yN, short2 *zH, float *zN, float *wN, float *uN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *aN, Float *bN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float *a

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 *xN, Float2 *yN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 *xN, Float2 *yN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 *xN, Float2 *yN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float2 a, Float2 *x, Float2 b, Float2 *y, Float2 *z, Float2 *w, Float2 *u

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float a, short4 *yH, float *yN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_TYPES   Float a, short2 *yH, float *yN, int stride

Definition at line 3549 of file blas_quda.cu.

#define REDUCE_X_AUXILIARY (   i)
Value:
READ_HALF_SPINOR_ST(x, texHalfSt1, stride);                             \
  READ_HALF_SPINOR_ST(y, texHalfSt2, stride);                           \
  REAL_DOT_FLOAT2(norm0, y0, y0);                                       \
  REAL_DOT_FLOAT2(norm1, y1, y1);                                       \
  REAL_DOT_FLOAT2(norm2, y2, y2);                                       \
  norm0 += norm1; norm0 += norm2;

Definition at line 3551 of file blas_quda.cu.

#define REDUCE_X_AUXILIARY (   i)    Float2 a = READ_DOUBLE2_TEXTURE(x, i);

Definition at line 3551 of file blas_quda.cu.

#define REDUCE_X_AUXILIARY (   i)

Definition at line 3551 of file blas_quda.cu.

#define REDUCE_X_AUXILIARY (   i)
Value:
RECONSTRUCT_HALF_SPINOR_ST(x, texHalfSt1, texNorm1, stride);            \
  RECONSTRUCT_HALF_SPINOR_ST(y, texHalfSt2, texNorm2, stride);          \
  RECONSTRUCT_HALF_SPINOR_ST(z, texHalfSt3, texNorm3, stride);          \
  CAXPBYPZ_FLOAT2(a, x0, b, y0, z0);                                    \
  CAXPBYPZ_FLOAT2(a, x1, b, y1, z1);                                    \
  CAXPBYPZ_FLOAT2(a, x2, b, y2, z2);                                    \
  CONSTRUCT_HALF_SPINOR_FROM_SINGLE_ST(zH, zN, z, stride);                      \
  READ_HALF_SPINOR_ST(w, texHalfSt4, stride);                           \
  float2 bwc = -wc*b;                                                   \
  CAXPY_FLOAT2(bwc, w0, y0);                                            \
  CAXPY_FLOAT2(bwc, w1, y1);                                            \
  CAXPY_FLOAT2(bwc, w2, y2);                                            \
  REAL_DOT_FLOAT2(norm0, y0, y0);                                       \
  REAL_DOT_FLOAT2(norm1, y1, y1);                                       \
  REAL_DOT_FLOAT2(norm2, y2, y2);                                       \
  CONSTRUCT_HALF_SPINOR_FROM_SINGLE_ST(yH, yN, y, stride);

Definition at line 3551 of file blas_quda.cu.

#define REDUCE_X_AUXILIARY (   i)    Float2 a = READ_DOUBLE2_TEXTURE(x, i);

Definition at line 3551 of file blas_quda.cu.

#define REDUCE_X_AUXILIARY (   i)
Value:
READ_HALF_SPINOR(x, texHalf1, stride);                          \
  READ_HALF_SPINOR(y, texHalf2, stride);                                \
  REAL_DOT_FLOAT4(norm0, y0, y0);                                       \
  REAL_DOT_FLOAT4(norm1, y1, y1);                                       \
  REAL_DOT_FLOAT4(norm2, y2, y2);                                       \
  REAL_DOT_FLOAT4(norm3, y3, y3);                                       \
  REAL_DOT_FLOAT4(norm4, y4, y4);                                       \
  REAL_DOT_FLOAT4(norm5, y5, y5);                                       \
  norm0 += norm1; norm2 += norm3; norm4 += norm5; norm0 += norm2, norm0 += norm4;

Definition at line 3551 of file blas_quda.cu.

#define REDUCE_X_AUXILIARY (   i)
Value:
Float2 X = read_Float2(x, i);           \
  Float2 Y = read_Float2(y, i);         \
  Float2 W = read_Float2(w, i);

Definition at line 3551 of file blas_quda.cu.

#define REDUCE_X_AUXILIARY (   i)
Value:
READ_HALF_SPINOR_ST(x, texHalfSt1, stride);                             \
  READ_HALF_SPINOR_ST(y, texHalfSt2, stride);                           \
  REAL_DOT_FLOAT2(norm0, x0, x0);                                       \
  REAL_DOT_FLOAT2(norm1, x1, x1);                                       \
  REAL_DOT_FLOAT2(norm2, x2, x2);                                       \
  norm0 += norm1; norm0 += norm2;

Definition at line 3551 of file blas_quda.cu.

#define REDUCE_X_AUXILIARY (   i)

Definition at line 3551 of file blas_quda.cu.

#define REDUCE_X_AUXILIARY (   i)
Value:
READ_HALF_SPINOR(x, texHalf1, stride);                          \
  READ_HALF_SPINOR(y, texHalf2, stride);                                \
  REAL_DOT_FLOAT4(norm0, x0, x0);                                       \
  REAL_DOT_FLOAT4(norm1, x1, x1);                                       \
  REAL_DOT_FLOAT4(norm2, x2, x2);                                       \
  REAL_DOT_FLOAT4(norm3, x3, x3);                                       \
  REAL_DOT_FLOAT4(norm4, x4, x4);                                       \
  REAL_DOT_FLOAT4(norm5, x5, x5);                                       \
  norm0 += norm1; norm2 += norm3; norm4 += norm5; norm0 += norm2, norm0 += norm4;

Definition at line 3551 of file blas_quda.cu.

#define REDUCE_X_AUXILIARY (   i)
Value:
Float2 X = READ_DOUBLE2_TEXTURE(x, i);          \
  Float2 Y = READ_DOUBLE2_TEXTURE(y, i);                \
  Float2 W = READ_DOUBLE2_TEXTURE(w, i);

Definition at line 3551 of file blas_quda.cu.

#define REDUCE_X_AUXILIARY (   i)
Value:
RECONSTRUCT_HALF_SPINOR(x, texHalf1, texNorm1, stride);         \
  RECONSTRUCT_HALF_SPINOR(y, texHalf2, texNorm2, stride);               \
  RECONSTRUCT_HALF_SPINOR(z, texHalf3, texNorm3, stride);               \
  CAXPBYPZ_FLOAT4(a, x0, b, y0, z0);                                    \
  CAXPBYPZ_FLOAT4(a, x1, b, y1, z1);                                    \
  CAXPBYPZ_FLOAT4(a, x2, b, y2, z2);                                    \
  CAXPBYPZ_FLOAT4(a, x3, b, y3, z3);                                    \
  CAXPBYPZ_FLOAT4(a, x4, b, y4, z4);                                    \
  CAXPBYPZ_FLOAT4(a, x5, b, y5, z5);                                    \
  CONSTRUCT_HALF_SPINOR_FROM_SINGLE(zH, zN, z, stride);                 \
  READ_HALF_SPINOR(w, texHalf4, stride);                                \
  float2 bwc = -wc*b;                                                   \
  CAXPY_FLOAT4(bwc, w0, y0);                                            \
  CAXPY_FLOAT4(bwc, w1, y1);                                            \
  CAXPY_FLOAT4(bwc, w2, y2);                                            \
  CAXPY_FLOAT4(bwc, w3, y3);                                            \
  CAXPY_FLOAT4(bwc, w4, y4);                                            \
  CAXPY_FLOAT4(bwc, w5, y5);                                            \
  REAL_DOT_FLOAT4(norm0, y0, y0);                                       \
  REAL_DOT_FLOAT4(norm1, y1, y1);                                       \
  REAL_DOT_FLOAT4(norm2, y2, y2);                                       \
  REAL_DOT_FLOAT4(norm3, y3, y3);                                       \
  REAL_DOT_FLOAT4(norm4, y4, y4);                                       \
  REAL_DOT_FLOAT4(norm5, y5, y5);                                       \
  CONSTRUCT_HALF_SPINOR_FROM_SINGLE(yH, yN, y, stride);

Definition at line 3551 of file blas_quda.cu.

#define REDUCE_X_OPERATION (   i)    (xc*yc*rdot0)

Definition at line 3581 of file blas_quda.cu.

#define REDUCE_X_OPERATION (   i)    (u[i].x*y[i].x + u[i].y*y[i].y)

Definition at line 3581 of file blas_quda.cu.

#define REDUCE_X_OPERATION (   i)    (a.x*b.x + a.y*b.y)

Definition at line 3581 of file blas_quda.cu.

#define REDUCE_X_OPERATION (   i)    (xc*yc*rdot0)

Definition at line 3581 of file blas_quda.cu.

#define REDUCE_X_OPERATION (   i)    (xc*yc*rdot0)

Definition at line 3581 of file blas_quda.cu.

#define REDUCE_X_OPERATION (   i)    (uc*rdot0)

Definition at line 3581 of file blas_quda.cu.

#define REDUCE_X_OPERATION (   i)    (xc*yc*rdot0)

Definition at line 3581 of file blas_quda.cu.

#define REDUCE_X_OPERATION (   i)    (uc*rdot0)

Definition at line 3581 of file blas_quda.cu.

#define REDUCE_X_OPERATION (   i)    (u[i].x*y[i].x + u[i].y*y[i].y)

Definition at line 3581 of file blas_quda.cu.

#define REDUCE_X_OPERATION (   i)    (a.x*b.x + a.y*b.y)

Definition at line 3581 of file blas_quda.cu.

#define REDUCE_X_OPERATION (   i)    (a[i].x*b[i].x + a[i].y*b[i].y)

Definition at line 3581 of file blas_quda.cu.

#define REDUCE_X_OPERATION (   i)    (a[i].x*b[i].x + a[i].y*b[i].y)

Definition at line 3581 of file blas_quda.cu.

#define REDUCE_Y_AUXILIARY (   i)    Float2 b = READ_DOUBLE2_TEXTURE(y, i);

Definition at line 3568 of file blas_quda.cu.

#define REDUCE_Y_AUXILIARY (   i)
Value:
REAL_DOT_FLOAT4(rdot0, x0, y0);                                 \
  REAL_DOT_FLOAT4(rdot1, x1, y1);                                       \
  REAL_DOT_FLOAT4(rdot2, x2, y2);                                       \
  REAL_DOT_FLOAT4(rdot3, x3, y3);                                       \
  REAL_DOT_FLOAT4(rdot4, x4, y4);                                       \
  REAL_DOT_FLOAT4(rdot5, x5, y5);                                       \
  rdot0 += rdot1; rdot2 += rdot3; rdot4 += rdot5; rdot0 += rdot2; rdot0 += rdot4;

Definition at line 3568 of file blas_quda.cu.

#define REDUCE_Y_AUXILIARY (   i)
Value:
REAL_DOT_FLOAT2(rdot0, x0, y0);                                 \
  REAL_DOT_FLOAT2(rdot1, x1, y1);                                       \
  REAL_DOT_FLOAT2(rdot2, x2, y2);                                       \
  rdot0 += rdot1; rdot0 += rdot2;

Definition at line 3568 of file blas_quda.cu.

#define REDUCE_Y_AUXILIARY (   i)
Value:
Float2 Z = read_Float2(z, i);   \
  Z.x += a.x*X.x - a.y*X.y;                     \
  Z.y += a.y*X.x + a.x*X.y;                     \
  Z.x += b.x*Y.x - b.y*Y.y;                     \
  Z.y += b.y*Y.x + b.x*Y.y;                     \
  Y.x -= b.x*W.x - b.y*W.y;                     \
  Y.y -= b.y*W.x + b.x*W.y;

Definition at line 3568 of file blas_quda.cu.

#define REDUCE_Y_AUXILIARY (   i)
Value:
READ_HALF_SPINOR(u, texHalf5, stride);                          \
  REAL_DOT_FLOAT4(rdot0, u0, y0);                                       \
  REAL_DOT_FLOAT4(rdot1, u1, y1);                                       \
  REAL_DOT_FLOAT4(rdot2, u2, y2);                                       \
  REAL_DOT_FLOAT4(rdot3, u3, y3);                                       \
  REAL_DOT_FLOAT4(rdot4, u4, y4);                                       \
  REAL_DOT_FLOAT4(rdot5, u5, y5);                                       \
  IMAG_DOT_FLOAT4(idot0, u0, y0);                                       \
  IMAG_DOT_FLOAT4(idot1, u1, y1);                                       \
  IMAG_DOT_FLOAT4(idot2, u2, y2);                                       \
  IMAG_DOT_FLOAT4(idot3, u3, y3);                                       \
  IMAG_DOT_FLOAT4(idot4, u4, y4);                                       \
  IMAG_DOT_FLOAT4(idot5, u5, y5);

Definition at line 3568 of file blas_quda.cu.

#define REDUCE_Y_AUXILIARY (   i)
Value:
REAL_DOT_FLOAT4(rdot0, x0, y0);                                 \
  REAL_DOT_FLOAT4(rdot1, x1, y1);                                       \
  REAL_DOT_FLOAT4(rdot2, x2, y2);                                       \
  REAL_DOT_FLOAT4(rdot3, x3, y3);                                       \
  REAL_DOT_FLOAT4(rdot4, x4, y4);                                       \
  REAL_DOT_FLOAT4(rdot5, x5, y5);                                       \
  rdot0 += rdot1; rdot2 += rdot3; rdot4 += rdot5; rdot0 += rdot2; rdot0 += rdot4;

Definition at line 3568 of file blas_quda.cu.

#define REDUCE_Y_AUXILIARY (   i)
Value:
Float2 Z = read_Float2(z, i);                   \
  Z.x += a.x*X.x - a.y*X.y;                     \
  Z.y += a.y*X.x + a.x*X.y;                     \
  Z.x += b.x*Y.x - b.y*Y.y;                     \
  Z.y += b.y*Y.x + b.x*Y.y;                     \
  Y.x -= b.x*W.x - b.y*W.y;                     \
  Y.y -= b.y*W.x + b.x*W.y;

Definition at line 3568 of file blas_quda.cu.

#define REDUCE_Y_AUXILIARY (   i)
Value:
REAL_DOT_FLOAT2(rdot0, x0, y0);                                 \
  REAL_DOT_FLOAT2(rdot1, x1, y1);                                       \
  REAL_DOT_FLOAT2(rdot2, x2, y2);                                       \
  rdot0 += rdot1; rdot0 += rdot2;

Definition at line 3568 of file blas_quda.cu.

#define REDUCE_Y_AUXILIARY (   i)

Definition at line 3568 of file blas_quda.cu.

#define REDUCE_Y_AUXILIARY (   i)
Value:
READ_HALF_SPINOR_ST(u, texHalfSt5, stride);                             \
  REAL_DOT_FLOAT2(rdot0, u0, y0);                                       \
  REAL_DOT_FLOAT2(rdot1, u1, y1);                                       \
  REAL_DOT_FLOAT2(rdot2, u2, y2);                                       \
  IMAG_DOT_FLOAT2(idot0, u0, y0);                                       \
  IMAG_DOT_FLOAT2(idot1, u1, y1);                                       \
  IMAG_DOT_FLOAT2(idot2, u2, y2);

Definition at line 3568 of file blas_quda.cu.

#define REDUCE_Y_AUXILIARY (   i)    Float2 b = READ_DOUBLE2_TEXTURE(y, i);

Definition at line 3568 of file blas_quda.cu.

#define REDUCE_Y_AUXILIARY (   i)

Definition at line 3568 of file blas_quda.cu.

#define REDUCE_Y_OPERATION (   i)    (u[i].x*y[i].y - u[i].y*y[i].x)

Definition at line 3582 of file blas_quda.cu.

#define REDUCE_Y_OPERATION (   i)    (a.x*b.y - a.y*b.x)

Definition at line 3582 of file blas_quda.cu.

#define REDUCE_Y_OPERATION (   i)    (u[i].x*y[i].y - u[i].y*y[i].x)

Definition at line 3582 of file blas_quda.cu.

#define REDUCE_Y_OPERATION (   i)    (uc*idot0)

Definition at line 3582 of file blas_quda.cu.

#define REDUCE_Y_OPERATION (   i)    (xc*yc*idot0)

Definition at line 3582 of file blas_quda.cu.

#define REDUCE_Y_OPERATION (   i)    (a[i].x*b[i].y - a[i].y*b[i].x)

Definition at line 3582 of file blas_quda.cu.

#define REDUCE_Y_OPERATION (   i)    (uc*idot0)

Definition at line 3582 of file blas_quda.cu.

#define REDUCE_Y_OPERATION (   i)    (xc*yc*idot0)

Definition at line 3582 of file blas_quda.cu.

#define REDUCE_Y_OPERATION (   i)    (xc*yc*idot0)

Definition at line 3582 of file blas_quda.cu.

#define REDUCE_Y_OPERATION (   i)    (xc*yc*idot0)

Definition at line 3582 of file blas_quda.cu.

#define REDUCE_Y_OPERATION (   i)    (a[i].x*b[i].y - a[i].y*b[i].x)

Definition at line 3582 of file blas_quda.cu.

#define REDUCE_Y_OPERATION (   i)    (a.x*b.y - a.y*b.x)

Definition at line 3582 of file blas_quda.cu.

#define REDUCE_Z_AUXILIARY (   i)
Value:
norm0 += norm1; norm2 += norm3; norm4 += norm5; norm0 += norm2, norm0 += norm4; \
  rdot0 += rdot1; rdot2 += rdot3; rdot4 += rdot5; rdot0 += rdot2; rdot0 += rdot4; \
  idot0 += idot1; idot2 += idot3; idot4 += idot5; idot0 += idot2; idot0 += idot4;

Definition at line 3576 of file blas_quda.cu.

#define REDUCE_Z_AUXILIARY (   i)
Value:
IMAG_DOT_FLOAT2(idot0, x0, y0);                                 \
  IMAG_DOT_FLOAT2(idot1, x1, y1);                                       \
  IMAG_DOT_FLOAT2(idot2, x2, y2);                                       \
  idot0 += idot1; idot0 += idot2;

Definition at line 3576 of file blas_quda.cu.

#define REDUCE_Z_AUXILIARY (   i)
Value:
IMAG_DOT_FLOAT4(idot0, x0, y0);                                 \
  IMAG_DOT_FLOAT4(idot1, x1, y1);                                       \
  IMAG_DOT_FLOAT4(idot2, x2, y2);                                       \
  IMAG_DOT_FLOAT4(idot3, x3, y3);                                       \
  IMAG_DOT_FLOAT4(idot4, x4, y4);                                       \
  IMAG_DOT_FLOAT4(idot5, x5, y5);                                       \
  idot0 += idot1; idot2 += idot3; idot4 += idot5; idot0 += idot2; idot0 += idot4;

Definition at line 3576 of file blas_quda.cu.

#define REDUCE_Z_AUXILIARY (   i)
Value:
z[i] = make_Float2(Z);        \
  y[i] = make_Float2(Y);

Definition at line 3576 of file blas_quda.cu.

#define REDUCE_Z_AUXILIARY (   i)

Definition at line 3576 of file blas_quda.cu.

#define REDUCE_Z_AUXILIARY (   i)
Value:
IMAG_DOT_FLOAT4(idot0, x0, y0);                                 \
  IMAG_DOT_FLOAT4(idot1, x1, y1);                                       \
  IMAG_DOT_FLOAT4(idot2, x2, y2);                                       \
  IMAG_DOT_FLOAT4(idot3, x3, y3);                                       \
  IMAG_DOT_FLOAT4(idot4, x4, y4);                                       \
  IMAG_DOT_FLOAT4(idot5, x5, y5);                                       \
  idot0 += idot1; idot2 += idot3; idot4 += idot5; idot0 += idot2; idot0 += idot4;

Definition at line 3576 of file blas_quda.cu.

#define REDUCE_Z_AUXILIARY (   i)
Value:
z[i] = make_Float2(Z);        \
  y[i] = make_Float2(Y);

Definition at line 3576 of file blas_quda.cu.

#define REDUCE_Z_AUXILIARY (   i)
Value:
IMAG_DOT_FLOAT2(idot0, x0, y0);                                 \
  IMAG_DOT_FLOAT2(idot1, x1, y1);                                       \
  IMAG_DOT_FLOAT2(idot2, x2, y2);                                       \
  idot0 += idot1; idot0 += idot2;

Definition at line 3576 of file blas_quda.cu.

#define REDUCE_Z_AUXILIARY (   i)

Definition at line 3576 of file blas_quda.cu.

#define REDUCE_Z_AUXILIARY (   i)
Value:
norm0 += norm1; norm0 += norm2; \
  rdot0 += rdot1; rdot0 += rdot2; \
  idot0 += idot1; idot0 += idot2;

Definition at line 3576 of file blas_quda.cu.

#define REDUCE_Z_AUXILIARY (   i)

Definition at line 3576 of file blas_quda.cu.

#define REDUCE_Z_AUXILIARY (   i)

Definition at line 3576 of file blas_quda.cu.

#define REDUCE_Z_OPERATION (   i)    (yc*yc*norm0)

Definition at line 3583 of file blas_quda.cu.

#define REDUCE_Z_OPERATION (   i)    (xc*xc*norm0)

Definition at line 3583 of file blas_quda.cu.

#define REDUCE_Z_OPERATION (   i)    (b.x*b.x + b.y*b.y)

Definition at line 3583 of file blas_quda.cu.

#define REDUCE_Z_OPERATION (   i)    (a.x*a.x + a.y*a.y)

Definition at line 3583 of file blas_quda.cu.

#define REDUCE_Z_OPERATION (   i)    (y[i].x*y[i].x + y[i].y*y[i].y)

Definition at line 3583 of file blas_quda.cu.

#define REDUCE_Z_OPERATION (   i)    (y[i].x*y[i].x + y[i].y*y[i].y)

Definition at line 3583 of file blas_quda.cu.

#define REDUCE_Z_OPERATION (   i)    (norm0)

Definition at line 3583 of file blas_quda.cu.

#define REDUCE_Z_OPERATION (   i)    (xc*xc*norm0)

Definition at line 3583 of file blas_quda.cu.

#define REDUCE_Z_OPERATION (   i)    (a[i].x*a[i].x + a[i].y*a[i].y)

Definition at line 3583 of file blas_quda.cu.

#define REDUCE_Z_OPERATION (   i)    (yc*yc*norm0)

Definition at line 3583 of file blas_quda.cu.

#define REDUCE_Z_OPERATION (   i)    (norm0)

Definition at line 3583 of file blas_quda.cu.

#define REDUCE_Z_OPERATION (   i)    (b[i].x*b[i].x + b[i].y*b[i].y)

Definition at line 3583 of file blas_quda.cu.

#define SCALE_FLOAT   ((SHORT_LENGTH-1) * 0.5)

Definition at line 294 of file blas_quda.cu.

#define SHIFT_FLOAT   (-1.f / (SHORT_LENGTH-1))

Definition at line 295 of file blas_quda.cu.

#define SHORT_LENGTH   65536

Definition at line 293 of file blas_quda.cu.

#define SUM_FLOAT2 (   sum,
 
)    float sum = a.x + a.y;

Definition at line 404 of file blas_quda.cu.

#define SUM_FLOAT4 (   sum,
 
)    float sum = a.x + a.y + a.z + a.w;

Definition at line 401 of file blas_quda.cu.

#define XMY_FLOAT2 (   X,
 
)    Y.x = X.x - Y.x; Y.y = X.y - Y.y;

Definition at line 450 of file blas_quda.cu.

#define XMY_FLOAT4 (   X,
 
)    Y.x = X.x - Y.x; Y.y = X.y - Y.y; Y.z = X.z - Y.z; Y.w = X.w - Y.w;

Definition at line 447 of file blas_quda.cu.

#define XPAY_FLOAT2 (   X,
  a,
 
)    Y.x = X.x + a*Y.x; Y.y = X.y + a*Y.y;

Definition at line 489 of file blas_quda.cu.

#define XPAY_FLOAT4 (   X,
  a,
 
)
Value:
Y.x = X.x + a*Y.x; Y.y = X.y + a*Y.y;                \
  Y.z = X.z + a*Y.z; Y.w = X.w + a*Y.w;

Definition at line 480 of file blas_quda.cu.

#define XPY_FLOAT2 (   X,
 
)    Y.x += X.x; Y.y += X.y;

Definition at line 444 of file blas_quda.cu.

#define XPY_FLOAT4 (   X,
 
)    Y.x += X.x; Y.y += X.y; Y.z += X.z; Y.w += X.w;

Definition at line 441 of file blas_quda.cu.


Function Documentation

void axCuda ( const double &  a,
cudaColorSpinorField x 
)

Definition at line 1378 of file blas_quda.cu.

__global__ void axHKernel ( float  a,
short4 *  xH,
float *  xN,
int  stride,
int  length 
)

Definition at line 1352 of file blas_quda.cu.

__global__ void axHKernel ( float  a,
short2 *  xH,
float *  xN,
int  stride,
int  length 
)

Definition at line 1365 of file blas_quda.cu.

template<typename Float , typename Float2 >
__global__ void axKernel ( Float  a,
Float2 *  x,
int  len 
)

Definition at line 1343 of file blas_quda.cu.

void axpbyCuda ( const double &  a,
cudaColorSpinorField x,
const double &  b,
cudaColorSpinorField y 
)

Definition at line 978 of file blas_quda.cu.

__global__ void axpbyHKernel ( float  a,
float  b,
short4 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 944 of file blas_quda.cu.

__global__ void axpbyHKernel ( float  a,
float  b,
short2 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 961 of file blas_quda.cu.

template<typename Float , typename Float2 >
__global__ void axpbyKernel ( Float  a,
Float2 *  x,
Float  b,
Float2 *  y,
int  length 
)

Definition at line 935 of file blas_quda.cu.

void axpyBzpcxCuda ( const double &  a,
cudaColorSpinorField x,
cudaColorSpinorField y,
const double &  b,
cudaColorSpinorField z,
const double &  c 
)

Definition at line 1856 of file blas_quda.cu.

template<typename Float , typename Float2 >
__global__ void axpyBzpcxDKernel ( Float  a,
Float2 *  x,
Float2 *  y,
Float  b,
Float2 *  z,
Float  c,
int  len 
)

Definition at line 1768 of file blas_quda.cu.

__global__ void axpyBzpcxHKernel ( float  a,
float  b,
float  c,
short4 *  xH,
float *  xN,
short4 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1806 of file blas_quda.cu.

__global__ void axpyBzpcxHKernel ( float  a,
float  b,
float  c,
short2 *  xH,
float *  xN,
short2 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1834 of file blas_quda.cu.

template<typename Float , typename Float2 >
__global__ void axpyBzpcxSKernel ( Float  a,
Float2 *  x,
Float2 *  y,
Float  b,
Float2 *  z,
Float  c,
int  len 
)

Definition at line 1788 of file blas_quda.cu.

void axpyCuda ( const double &  a,
cudaColorSpinorField x,
cudaColorSpinorField y 
)

Definition at line 1143 of file blas_quda.cu.

__global__ void axpyHKernel ( float  a,
short4 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1108 of file blas_quda.cu.

__global__ void axpyHKernel ( float  a,
short2 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1127 of file blas_quda.cu.

template<typename Float , typename Float2 >
__global__ void axpyKernel ( Float  a,
Float2 *  x,
Float2 *  y,
int  len 
)

Definition at line 1099 of file blas_quda.cu.

double axpyNormCuda ( const double &  a,
cudaColorSpinorField x,
cudaColorSpinorField y 
)

Definition at line 2643 of file blas_quda.cu.

void axpyZpbxCuda ( const double &  a,
cudaColorSpinorField x,
cudaColorSpinorField y,
cudaColorSpinorField z,
const double &  b 
)

Definition at line 1986 of file blas_quda.cu.

template<typename Float , typename Float2 >
__global__ void axpyZpbxDKernel ( Float  a,
Float2 *  x,
Float2 *  y,
Float2 *  z,
Float  b,
int  len 
)

Definition at line 1909 of file blas_quda.cu.

__global__ void axpyZpbxHKernel ( float  a,
float  b,
short4 *  xH,
float *  xN,
short4 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1938 of file blas_quda.cu.

__global__ void axpyZpbxHKernel ( float  a,
float  b,
short2 *  xH,
float *  xN,
short2 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1964 of file blas_quda.cu.

template<typename Float , typename Float2 >
__global__ void axpyZpbxSKernel ( Float  a,
Float2 *  x,
Float2 *  y,
Float2 *  z,
Float  b,
int  len 
)

Definition at line 1924 of file blas_quda.cu.

void caxpbyCuda ( const Complex a,
cudaColorSpinorField x,
const Complex b,
cudaColorSpinorField y 
)

Definition at line 1582 of file blas_quda.cu.

template<typename Float2 >
__global__ void caxpbyDKernel ( Float2  a,
Float2 *  x,
Float2  b,
Float2 *  y,
int  len 
)

Definition at line 1520 of file blas_quda.cu.

__global__ void caxpbyHKernel ( float2  a,
float2  b,
short2 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1565 of file blas_quda.cu.

__global__ void caxpbyHKernel ( float2  a,
float2  b,
short4 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1547 of file blas_quda.cu.

double3 caxpbypzYmbwcDotProductWYNormYCuda ( const Complex a,
cudaColorSpinorField x,
const Complex b,
cudaColorSpinorField y,
cudaColorSpinorField z,
cudaColorSpinorField w,
cudaColorSpinorField u 
)

Definition at line 3597 of file blas_quda.cu.

void caxpbypzYmbwCuda ( const Complex a,
cudaColorSpinorField x,
const Complex b,
cudaColorSpinorField y,
cudaColorSpinorField z,
cudaColorSpinorField w 
)

Definition at line 2143 of file blas_quda.cu.

template<typename Float2 >
__global__ void caxpbypzYmbwDKernel ( Float2  a,
Float2 *  x,
Float2  b,
Float2 *  y,
Float2 *  z,
Float2 *  w,
int  len 
)

Definition at line 2037 of file blas_quda.cu.

__global__ void caxpbypzYmbwHKernel ( float2  a,
float2  b,
float *  xN,
short2 *  yH,
float *  yN,
short2 *  zH,
float *  zN,
float *  wN,
int  stride,
int  length 
)

Definition at line 2119 of file blas_quda.cu.

__global__ void caxpbypzYmbwHKernel ( float2  a,
float2  b,
float *  xN,
short4 *  yH,
float *  yN,
short4 *  zH,
float *  zN,
float *  wN,
int  stride,
int  length 
)

Definition at line 2090 of file blas_quda.cu.

template<typename Float2 >
__global__ void caxpbypzYmbwSKernel ( Float2  a,
Float2 *  x,
Float2  b,
Float2 *  y,
Float2 *  z,
Float2 *  w,
int  len 
)

Definition at line 2064 of file blas_quda.cu.

template<typename Float2 >
__global__ void caxpbySKernel ( Float2  a,
Float2 *  x,
Float2  b,
Float2 *  y,
int  len 
)

Definition at line 1534 of file blas_quda.cu.

void caxpyCuda ( const Complex a,
cudaColorSpinorField x,
cudaColorSpinorField y 
)

Definition at line 1474 of file blas_quda.cu.

template<typename Float2 >
__global__ void caxpyDKernel ( Float2  a,
Float2 *  x,
Float2 *  y,
int  len 
)

Definition at line 1411 of file blas_quda.cu.

__global__ void caxpyHKernel ( float2  a,
short2 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1457 of file blas_quda.cu.

__global__ void caxpyHKernel ( float2  a,
short4 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1438 of file blas_quda.cu.

template<typename Float2 >
__global__ void caxpySKernel ( Float2  a,
Float2 *  x,
Float2 *  y,
int  len 
)

Definition at line 1425 of file blas_quda.cu.

Complex cDotProductCuda ( cudaColorSpinorField x,
cudaColorSpinorField y 
)

Definition at line 2879 of file blas_quda.cu.

double3 cDotProductNormACuda ( cudaColorSpinorField x,
cudaColorSpinorField y 
)

Definition at line 3209 of file blas_quda.cu.

double3 cDotProductNormBCuda ( cudaColorSpinorField x,
cudaColorSpinorField y 
)

Definition at line 3370 of file blas_quda.cu.

__global__ void convertDHKernel ( double2 *  res,
int  length,
int  real_length 
)

Definition at line 786 of file blas_quda.cu.

__global__ void convertDHKernelSt ( double2 *  res,
int  length,
int  real_length 
)

Definition at line 810 of file blas_quda.cu.

__global__ void convertDSKernel ( double2 *  dst,
float4 *  src,
int  length 
)

Definition at line 632 of file blas_quda.cu.

__global__ void convertDSKernel ( double2 *  dst,
float2 *  src,
int  length 
)

Definition at line 646 of file blas_quda.cu.

__global__ void convertHDKernel ( short4 *  h,
float *  norm,
int  length,
int  real_length 
)

Definition at line 749 of file blas_quda.cu.

__global__ void convertHDKernel ( short2 *  h,
float *  norm,
int  length,
int  real_length 
)

Definition at line 772 of file blas_quda.cu.

__global__ void convertHSKernel ( short2 *  h,
float *  norm,
int  length,
int  real_length 
)

Definition at line 702 of file blas_quda.cu.

__global__ void convertHSKernel ( short4 *  h,
float *  norm,
int  length,
int  real_length 
)

Definition at line 684 of file blas_quda.cu.

__global__ void convertSDKernel ( float4 *  dst,
double2 *  src,
int  length 
)

Definition at line 658 of file blas_quda.cu.

__global__ void convertSDKernel ( float2 *  dst,
double2 *  src,
int  length 
)

Definition at line 672 of file blas_quda.cu.

__global__ void convertSHKernel ( float4 *  res,
int  length,
int  real_length 
)

Definition at line 718 of file blas_quda.cu.

__global__ void convertSHKernel ( float2 *  res,
int  length,
int  real_length 
)

Definition at line 735 of file blas_quda.cu.

void copyCuda ( cudaColorSpinorField dst,
const cudaColorSpinorField src 
)

Definition at line 827 of file blas_quda.cu.

void cxpaypbzCuda ( cudaColorSpinorField x,
const Complex a,
cudaColorSpinorField y,
const Complex b,
cudaColorSpinorField z 
)

Definition at line 1711 of file blas_quda.cu.

template<typename Float2 >
__global__ void cxpaypbzDKernel ( Float2 *  x,
Float2  a,
Float2 *  y,
Float2  b,
Float2 *  z,
int  len 
)

Definition at line 1632 of file blas_quda.cu.

__global__ void cxpaypbzHKernel ( float2  a,
float2  b,
short4 *  zH,
float *  zN,
int  stride,
int  length 
)

Definition at line 1673 of file blas_quda.cu.

__global__ void cxpaypbzHKernel ( float2  a,
float2  b,
short2 *  zH,
float *  zN,
int  stride,
int  length 
)

Definition at line 1693 of file blas_quda.cu.

template<typename Float2 >
__global__ void cxpaypbzSKernel ( Float2 *  x,
Float2  a,
Float2 *  y,
Float2  b,
Float2 *  z,
int  len 
)

Definition at line 1653 of file blas_quda.cu.

__device__ void dsadd ( volatile QudaSumFloat &  c0,
volatile QudaSumFloat &  c1,
const volatile QudaSumFloat &  a0,
const volatile QudaSumFloat &  a1,
const float  b0,
const float  b1 
)

Definition at line 2210 of file blas_quda.cu.

__device__ void dsadd3 ( volatile QudaSumFloat3 &  c0,
volatile QudaSumFloat3 &  c1,
const volatile QudaSumFloat3 &  a0,
const volatile QudaSumFloat3 &  a1,
const volatile QudaSumFloat3 &  b0,
const volatile QudaSumFloat3 &  b1 
)

Definition at line 2242 of file blas_quda.cu.

void endBlas ( void  )

Definition at line 150 of file blas_quda.cu.

__device__ float fast_abs_max ( float4  a)

Definition at line 314 of file blas_quda.cu.

__device__ short float2short ( float  c,
float  a 
)

Definition at line 297 of file blas_quda.cu.

__device__ short4 float42short4 ( float  c,
float4  a 
)

Definition at line 307 of file blas_quda.cu.

void initBlas ( void  )

Definition at line 111 of file blas_quda.cu.

float2 __device__ make_Float2 ( float2  x)

Definition at line 227 of file blas_quda.cu.

double2 __device__ make_Float2 ( double2  x)

Definition at line 231 of file blas_quda.cu.

void mxpyCuda ( cudaColorSpinorField x,
cudaColorSpinorField y 
)

Definition at line 1305 of file blas_quda.cu.

__global__ void mxpyHKernel ( short4 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1270 of file blas_quda.cu.

__global__ void mxpyHKernel ( short2 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1288 of file blas_quda.cu.

template<typename Float >
__global__ void mxpyKernel ( Float *  x,
Float *  y,
int  len 
)

Definition at line 1261 of file blas_quda.cu.

double normCuda ( const cudaColorSpinorField a)

Definition at line 2435 of file blas_quda.cu.

__device__ float2 operator* ( const float  a,
const float2  x 
)

Definition at line 62 of file blas_quda.cu.

template<typename Float >
__device__ float4 operator*= ( float4 &  a,
const Float &  b 
)

Definition at line 99 of file blas_quda.cu.

template<typename Float , typename Float2 >
__device__ Float2 operator*= ( Float2 &  x,
const Float  a 
)

Definition at line 92 of file blas_quda.cu.

template<typename Float2 >
__device__ Float2 operator+ ( const Float2  x,
const Float2  y 
)

Definition at line 70 of file blas_quda.cu.

double3 operator+ ( const double3 &  x,
const double3 &  y 
)

Definition at line 56 of file blas_quda.cu.

double2 operator+ ( const double2 &  x,
const double2 &  y 
)

Definition at line 52 of file blas_quda.cu.

template<typename Float2 >
__device__ Float2 operator+= ( Float2 &  x,
const Float2  y 
)

Definition at line 78 of file blas_quda.cu.

template<typename Float2 >
__device__ Float2 operator-= ( Float2 &  x,
const Float2  y 
)

Definition at line 85 of file blas_quda.cu.

float2 __device__ read_Float2 ( float2 *  x,
int  i 
)

Definition at line 213 of file blas_quda.cu.

double2 __device__ read_Float2 ( double2 *  x,
int  i 
)

Definition at line 217 of file blas_quda.cu.

double reDotProductCuda ( cudaColorSpinorField a,
cudaColorSpinorField b 
)

Definition at line 2536 of file blas_quda.cu.

void setBlasParam ( int  kernel,
int  prec,
int  threads,
int  blocks 
)

Definition at line 168 of file blas_quda.cu.

void setBlasTuning ( int  tuning)

Definition at line 163 of file blas_quda.cu.

void setBlock ( int  kernel,
int  length,
QudaPrecision  precision 
)

Definition at line 174 of file blas_quda.cu.

__device__ float short2float ( short  a)

Definition at line 303 of file blas_quda.cu.

double sumCuda ( cudaColorSpinorField a)

Definition at line 2337 of file blas_quda.cu.

double xmyNormCuda ( cudaColorSpinorField x,
cudaColorSpinorField y 
)

Definition at line 2749 of file blas_quda.cu.

Complex xpaycDotzyCuda ( cudaColorSpinorField x,
const double &  a,
cudaColorSpinorField y,
cudaColorSpinorField z 
)

Definition at line 3040 of file blas_quda.cu.

void xpayCuda ( const cudaColorSpinorField x,
const double &  a,
cudaColorSpinorField y 
)

Definition at line 1223 of file blas_quda.cu.

__global__ void xpayHKernel ( float  a,
short4 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1190 of file blas_quda.cu.

__global__ void xpayHKernel ( float  a,
short2 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1207 of file blas_quda.cu.

template<typename Float , typename Float2 >
__global__ void xpayKernel ( const Float2 *  x,
Float  a,
Float2 *  y,
int  len 
)

Definition at line 1181 of file blas_quda.cu.

void xpyCuda ( cudaColorSpinorField x,
cudaColorSpinorField y 
)

Definition at line 1061 of file blas_quda.cu.

__global__ void xpyHKernel ( short2 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1045 of file blas_quda.cu.

__global__ void xpyHKernel ( short4 *  yH,
float *  yN,
int  stride,
int  length 
)

Definition at line 1027 of file blas_quda.cu.

template<typename Float >
__global__ void xpyKernel ( Float *  x,
Float *  y,
int  len 
)

Definition at line 1018 of file blas_quda.cu.

__device__ void zcadd ( volatile QudaSumComplex &  c0,
volatile QudaSumComplex &  c1,
const volatile QudaSumComplex &  a0,
const volatile QudaSumComplex &  a1,
const volatile QudaSumComplex &  b0,
const volatile QudaSumComplex &  b1 
)

Definition at line 2222 of file blas_quda.cu.

void zeroCuda ( cudaColorSpinorField a)

Definition at line 109 of file blas_quda.cu.


Variable Documentation

unsigned long long blas_quda_bytes

Definition at line 44 of file blas_quda.cu.

unsigned long long blas_quda_flops

Definition at line 43 of file blas_quda.cu.

texture<short4, 1, cudaReadModeNormalizedFloat> texHalf1

Definition at line 591 of file blas_quda.cu.

texture<short4, 1, cudaReadModeNormalizedFloat> texHalf2

Definition at line 596 of file blas_quda.cu.

texture<short4, 1, cudaReadModeNormalizedFloat> texHalf3

Definition at line 601 of file blas_quda.cu.

texture<short4, 1, cudaReadModeNormalizedFloat> texHalf4

Definition at line 606 of file blas_quda.cu.

texture<short4, 1, cudaReadModeNormalizedFloat> texHalf5

Definition at line 611 of file blas_quda.cu.

texture<short2, 1, cudaReadModeNormalizedFloat> texHalfSt1

Definition at line 592 of file blas_quda.cu.

texture<short2, 1, cudaReadModeNormalizedFloat> texHalfSt2

Definition at line 597 of file blas_quda.cu.

texture<short2, 1, cudaReadModeNormalizedFloat> texHalfSt3

Definition at line 602 of file blas_quda.cu.

texture<short2, 1, cudaReadModeNormalizedFloat> texHalfSt4

Definition at line 607 of file blas_quda.cu.

texture<short2, 1, cudaReadModeNormalizedFloat> texHalfSt5

Definition at line 612 of file blas_quda.cu.

texture<float, 1, cudaReadModeElementType> texNorm1

Definition at line 593 of file blas_quda.cu.

texture<float, 1, cudaReadModeElementType> texNorm2

Definition at line 598 of file blas_quda.cu.

texture<float, 1, cudaReadModeElementType> texNorm3

Definition at line 603 of file blas_quda.cu.

texture<float, 1, cudaReadModeElementType> texNorm4

Definition at line 608 of file blas_quda.cu.

texture<float, 1, cudaReadModeElementType> texNorm5

Definition at line 613 of file blas_quda.cu.

texture<int4, 1> uTexDouble2

Definition at line 582 of file blas_quda.cu.

texture<int4, 1> wTexDouble2

Definition at line 581 of file blas_quda.cu.

texture<int4, 1> xTexDouble2

Definition at line 578 of file blas_quda.cu.

texture<float2, 1> xTexSingle2

Definition at line 585 of file blas_quda.cu.

texture<float4, 1> xTexSingle4

Definition at line 588 of file blas_quda.cu.

texture<int4, 1> yTexDouble2

Definition at line 579 of file blas_quda.cu.

texture<float2, 1> yTexSingle2

Definition at line 586 of file blas_quda.cu.

texture<int4, 1> zTexDouble2

Definition at line 580 of file blas_quda.cu.

 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines