Namespaces
	copy_ns

	detail

Classes
struct	ax_

struct	axpbyz_

struct	axpbyzNorm2

struct	axpyBzpcx_

struct	axpyCGNorm2

struct	AxpyReDot

struct	axpyZpbx_

struct	BlasArg

class	BlasCuda

struct	BlasFunctor

struct	cabxpyAx_

struct	cabxpyzaxnorm

struct	caxpby_

struct	caxpbypczw_

struct	caxpbypzYmbw_

struct	caxpbypzYmbwcDotProductUYNormY_

struct	caxpy_

struct	caxpyBxpz_

struct	caxpyBzpx_

struct	caxpydotzy

struct	caxpyNorm2

struct	caxpyxmaz_

struct	caxpyxmazMR_

struct	caxpyxmaznormx

struct	Cdot

struct	CdotCopy

struct	CdotNormA

struct	coeff_array

struct	Dot

struct	doubleCG3Init_

struct	doubleCG3InitNorm_

struct	doubleCG3Update_

struct	doubleCG3UpdateNorm_

struct	HeavyQuarkResidualNorm_

struct	multi_axpyBzpcx_

struct	multi_caxpyBxpz_

class	MultiBlas

struct	MultiBlasArg
	Parameter struct for generic multi-blas kernel. More...

struct	MultiBlasFunctor

struct	multicaxpy_

struct	multicaxpyz_

struct	MultiReduceArg
	Parameter struct for generic multi-blas kernel. More...

class	MultiReduceCuda

struct	MultiReduceFunctor

struct	Norm1

struct	Norm2

struct	num_to_string

struct	quadrupleCG3InitNorm_

struct	quadrupleCG3UpdateNorm_

struct	quadrupleCGReduction_

class	ReduceCuda

struct	ReduceFunctor

struct	ReductionArg

class	TileSizeTune

struct	tripleCGReduction_

struct	tripleCGUpdate_

struct	write

struct	xpyHeavyQuarkResidualNorm_

Functions
void	init ()

void	end (void)

void *	getDeviceReduceBuffer ()

void *	getMappedHostReduceBuffer ()

void *	getHostReduceBuffer ()

void	setParam (int kernel, int prec, int threads, int blocks)

void	zero (ColorSpinorField &a)

void	copy (ColorSpinorField &dst, const ColorSpinorField &src)

void	ax (double a, ColorSpinorField &x)

void	axpbyz (double a, ColorSpinorField &x, double b, ColorSpinorField &y, ColorSpinorField &z)

void	xpy (ColorSpinorField &x, ColorSpinorField &y)

void	mxpy (ColorSpinorField &x, ColorSpinorField &y)

void	axpy (double a, ColorSpinorField &x, ColorSpinorField &y)

void	axpby (double a, ColorSpinorField &x, double b, ColorSpinorField &y)

void	xpay (ColorSpinorField &x, double a, ColorSpinorField &y)

void	xpayz (ColorSpinorField &x, double a, ColorSpinorField &y, ColorSpinorField &z)

void	axpyZpbx (double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, double b)

void	axpyBzpcx (double a, ColorSpinorField &x, ColorSpinorField &y, double b, ColorSpinorField &z, double c)

void	caxpby (const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y)

void	caxpy (const Complex &a, ColorSpinorField &x, ColorSpinorField &y)

void	caxpbypczw (const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, const Complex &c, ColorSpinorField &z, ColorSpinorField &w)

void	cxpaypbz (ColorSpinorField &, const Complex &b, ColorSpinorField &y, const Complex &c, ColorSpinorField &z)

void	caxpbypzYmbw (const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, ColorSpinorField &, ColorSpinorField &)

void	caxpyBzpx (const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &)

void	caxpyBxpz (const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &)

void	cabxpyAx (double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y)

void	caxpyXmaz (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)

void	caxpyXmazMR (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)

void	tripleCGUpdate (double alpha, double beta, ColorSpinorField &q, ColorSpinorField &r, ColorSpinorField &x, ColorSpinorField &p)

void	doubleCG3Init (double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)

void	doubleCG3Update (double a, double b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)

double	norm1 (const ColorSpinorField &b)

double	norm2 (const ColorSpinorField &a)

double	axpyReDot (double a, ColorSpinorField &x, ColorSpinorField &y)

double	reDotProduct (ColorSpinorField &x, ColorSpinorField &y)

double	axpbyzNorm (double a, ColorSpinorField &x, double b, ColorSpinorField &y, ColorSpinorField &z)

double	axpyNorm (double a, ColorSpinorField &x, ColorSpinorField &y)

double	xmyNorm (ColorSpinorField &x, ColorSpinorField &y)

Complex	cDotProduct (ColorSpinorField &, ColorSpinorField &)

double3	cDotProductNormA (ColorSpinorField &a, ColorSpinorField &b)

double3	cDotProductNormB (ColorSpinorField &a, ColorSpinorField &b)
	Return (a,b) and \|\|b\|\|^2 - implemented using cDotProductNormA. More...

double3	caxpbypzYmbwcDotProductUYNormY (const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &u)

double	caxpyNorm (const Complex &a, ColorSpinorField &x, ColorSpinorField &y)

double	caxpyXmazNormX (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)

double	cabxpyzAxNorm (double a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)

Complex	caxpyDotzy (const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)

Complex	axpyCGNorm (double a, ColorSpinorField &x, ColorSpinorField &y)

double3	HeavyQuarkResidualNorm (ColorSpinorField &x, ColorSpinorField &r)

double3	xpyHeavyQuarkResidualNorm (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &r)

double3	tripleCGReduction (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)

double4	quadrupleCGReduction (ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)

double	quadrupleCG3InitNorm (double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)

double	quadrupleCG3UpdateNorm (double a, double b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)

double	doubleCG3InitNorm (double a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)

double	doubleCG3UpdateNorm (double a, double b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)

void	caxpy (const Complex a, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField *> &y)
	Compute the block "caxpy" with over the set of ColorSpinorFields. E.g., it computes. More...

void	caxpy (const Complex *a, ColorSpinorField &x, ColorSpinorField &y)
	This is a wrapper for calling the block "caxpy" with a composite ColorSpinorField. E.g., it computes. More...

void	caxpy_U (const Complex a, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField *> &y)
	Compute the block "caxpy_U" with over the set of ColorSpinorFields. E.g., it computes. More...

void	caxpy_U (const Complex *a, ColorSpinorField &x, ColorSpinorField &y)
	This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes. More...

void	caxpy_L (const Complex a, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField *> &y)
	Compute the block "caxpy_L" with over the set of ColorSpinorFields. E.g., it computes. More...

void	caxpy_L (const Complex *a, ColorSpinorField &x, ColorSpinorField &y)
	This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes. More...

void	caxpyz (const Complex a, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField > &y, std::vector< ColorSpinorField > &z)
	Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes. More...

void	caxpyz (const Complex *a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
	This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes. More...

void	caxpyz_U (const Complex a, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField > &y, std::vector< ColorSpinorField > &z)
	Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes. More...

void	caxpyz_U (const Complex *a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
	This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes. More...

void	caxpyz_L (const Complex a, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField > &y, std::vector< ColorSpinorField > &z)
	Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes. More...

void	caxpyz_L (const Complex *a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
	This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes. More...

void	axpyBzpcx (const double a, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField > &y, const double b, ColorSpinorField &z, const double *c)
	Compute the vectorized "axpyBzpcx" with over the set of ColorSpinorFields, where the third vector, z, is constant over the batch. E.g., it computes. More...

void	caxpyBxpz (const Complex a_, std::vector< ColorSpinorField > &x_, ColorSpinorField &y_, const Complex *b_, ColorSpinorField &z_)
	Compute the vectorized "caxpyBxpz" over the set of ColorSpinorFields, where the second and third vector, y and z, is constant over the batch. E.g., it computes. More...

void	reDotProduct (double result, std::vector< ColorSpinorField > &a, std::vector< ColorSpinorField *> &b)

void	cDotProduct (Complex result, std::vector< ColorSpinorField > &a, std::vector< ColorSpinorField *> &b)
	Computes the matrix of inner products between the vector set a and the vector set b. More...

void	hDotProduct (Complex result, std::vector< ColorSpinorField > &a, std::vector< ColorSpinorField *> &b)
	Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Requires a.size()==b.size(). More...

void	hDotProduct_Anorm (Complex result, std::vector< ColorSpinorField > &a, std::vector< ColorSpinorField *> &b)
	Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Uniquely defined for cases like (p, Ap) where the output is Hermitian, but there's an A-norm instead of an L2 norm. Requires a.size()==b.size(). More...

void	cDotProductCopy (Complex result, std::vector< ColorSpinorField > &a, std::vector< ColorSpinorField > &b, std::vector< ColorSpinorField > &c)
	Computes the matrix of inner products between the vector set a and the vector set b, and copies b into c. More...

template<typename FloatN , int M, typename Arg >
__global__ void	blasKernel (Arg arg)

__device__ __host__ void	_caxpy (const float2 &a, const float4 &x, float4 &y)

__device__ __host__ void	_caxpy (const float2 &a, const float2 &x, float2 &y)

__device__ __host__ void	_caxpy (const double2 &a, const double2 &x, double2 &y)

__device__ __host__ void	_caxpby (const float2 &a, const float4 &x, const float2 &b, float4 &y)

__device__ __host__ void	_caxpby (const float2 &a, const float2 &x, const float2 &b, float2 &y)

__device__ __host__ void	_caxpby (const double2 &a, const double2 &x, const double2 &b, double2 &y)

template<typename FloatN , int M, int NXZ, typename Arg >
__global__ void	multiBlasKernel (Arg arg_)
	Generic multi-blas kernel with four loads and up to four stores. More...

template<int block_size, typename ReduceType , typename FloatN , int M, int NXZ, typename Arg >
__global__ void	multiReduceKernel (Arg arg_)

template<typename ReduceType >
__device__ __host__ void	dot_ (ReduceType &sum, const double2 &a, const double2 &b)

template<typename ReduceType >
__device__ __host__ void	dot_ (ReduceType &sum, const float2 &a, const float2 &b)

template<typename ReduceType >
__device__ __host__ void	dot_ (ReduceType &sum, const float4 &a, const float4 &b)

template<typename ReduceType >
__device__ __host__ void	cdot_ (ReduceType &sum, const double2 &a, const double2 &b)

template<typename ReduceType >
__device__ __host__ void	cdot_ (ReduceType &sum, const float2 &a, const float2 &b)

template<typename ReduceType >
__device__ __host__ void	cdot_ (ReduceType &sum, const float4 &a, const float4 &b)

template<int block_size, typename ReduceType , typename FloatN , int M, typename Arg >
__global__ void	reduceKernel (Arg arg)

template<typename ReduceType >
__device__ __host__ ReduceType	norm1_ (const double2 &a)

template<typename ReduceType >
__device__ __host__ ReduceType	norm1_ (const float2 &a)

template<typename ReduceType >
__device__ __host__ ReduceType	norm1_ (const float4 &a)

template<typename ReduceType >
__device__ __host__ void	norm2_ (ReduceType &sum, const double2 &a)

template<typename ReduceType >
__device__ __host__ void	norm2_ (ReduceType &sum, const float2 &a)

template<typename ReduceType >
__device__ __host__ void	norm2_ (ReduceType &sum, const float4 &a)

__device__ __host__ void	Caxpy_ (const double2 &a, const double2 &x, double2 &y)

__device__ __host__ void	Caxpy_ (const float2 &a, const float2 &x, float2 &y)

__device__ __host__ void	Caxpy_ (const float2 &a, const float4 &x, float4 &y)

template<typename ReduceType , typename InputType >
__device__ __host__ void	cdotNormA_ (ReduceType &sum, const InputType &a, const InputType &b)

template<typename ReduceType , typename InputType >
__device__ __host__ void	cdotNormB_ (ReduceType &sum, const InputType &a, const InputType &b)

template<typename RegType , typename StoreType , typename yType , int M, template< typename, typename > class Functor, int writeX, int writeY, int writeZ, int writeW, int writeV>
void	nativeBlas (const double2 &a, const double2 &b, const double2 &c, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, int length)

template<template< typename Float, typename FloatN > class Functor, int writeX = 0, int writeY = 0, int writeZ = 0, int writeW = 0, int writeV = 0>
void	uni_blas (const double2 &a, const double2 &b, const double2 &c, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)

template<template< typename Float, typename FloatN > class Functor, int writeX = 0, int writeY = 0, int writeZ = 0, int writeW = 0, int writeV = 0>
void	mixed_blas (const double2 &a, const double2 &b, const double2 &c, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)

void	initReduce ()

void	endReduce ()

cudaStream_t *	getStream ()

template<int NXZ, typename RegType , typename StoreType , typename yType , int M, template< int, typename, typename > class Functor, typename write , typename T >
void	multiBlas (const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField > &y, std::vector< ColorSpinorField > &z, std::vector< ColorSpinorField > &w, int length)

template<int NXZ, template< int MXZ, typename Float, typename FloatN > class Functor, typename write , typename T >
void	multiBlas (const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w)

template<int NXZ, template< int MXZ, typename Float, typename FloatN > class Functor, typename write , typename T >
void	mixedMultiBlas (const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w)

void	caxpy_recurse (const Complex a_, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField *> &y, int i_idx, int j_idx, int upper)

void	caxpyz_recurse (const Complex a_, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField > &y, std::vector< ColorSpinorField > &z, int i, int j, int pass, int upper)

cudaEvent_t *	getReduceEvent ()

bool	getFastReduce ()

void	initFastReduce (int words)

void	completeFastReduce (int32_t words)

template<typename doubleN , typename ReduceType , typename FloatN , int M, int NXZ, typename Arg >
void	multiReduceLaunch (doubleN result[], Arg &arg, const TuneParam &tp, const cudaStream_t &stream, Tunable &tunable)

template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename yType , int M, int NXZ, template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , typename T >
void	multiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField > &y, std::vector< ColorSpinorField > &z, std::vector< ColorSpinorField > &w, int length)

template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T >
void	multiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w)

template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T >
void	mixedMultiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w)

template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal , bool siteUnroll, typename T >
void	multiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w, int i, int j)

template<template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal >
void	multiReduce_recurse (Complex result, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField > &y, std::vector< ColorSpinorField > &z, std::vector< ColorSpinorField *> &w, int i_idx, int j_idx, bool hermitian, unsigned int tile_size)

void	initFastReduce (int32_t words)

template<typename doubleN , typename ReduceType , typename FloatN , int M, typename Arg >
doubleN	reduceLaunch (Arg &arg, const TuneParam &tp, const cudaStream_t &stream, Tunable &tunable)

template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename zType , int M, template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV>
doubleN	nativeReduce (const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v, int length)

template<typename doubleN , typename ReduceType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll>
doubleN	uni_reduce (const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)

template<typename doubleN , typename ReduceType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll>
doubleN	mixed_reduce (const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)

Variables
unsigned long long	flops

unsigned long long	bytes

static __constant__ signed char	Amatrix_d [MAX_MATRIX_SIZE]

static __constant__ signed char	Bmatrix_d [MAX_MATRIX_SIZE]

static __constant__ signed char	Cmatrix_d [MAX_MATRIX_SIZE]

static signed char *	Amatrix_h

static signed char *	Bmatrix_h

static signed char *	Cmatrix_h

static __constant__ signed char	arg_buffer [MAX_MATRIX_SIZE]

static __constant__ signed char	Amatrix_d [MAX_MATRIX_SIZE]

static __constant__ signed char	Bmatrix_d [MAX_MATRIX_SIZE]

static __constant__ signed char	Cmatrix_d [MAX_MATRIX_SIZE]

static signed char *	Amatrix_h

static signed char *	Bmatrix_h

static signed char *	Cmatrix_h

static __constant__ signed char	arg_buffer [MAX_MATRIX_SIZE]

static cudaStream_t *	blasStream

Function Documentation

◆ _caxpby() [1/3]

__device__ __host__ void quda::blas::_caxpby	(	const float2 &	a,
		const float4 &	x,
		const float2 &	b,
		float4 &	y
	)

Functor to perform the operation y = a*x + b*y (complex-valued)

Definition at line 150 of file blas_core.cuh.

Referenced by quda::blas::caxpby_< Float2, FloatN >::operator()(), and quda::blas::caxpbypczw_< Float2, FloatN >::operator()().

Here is the caller graph for this function:

◆ _caxpby() [2/3]

__device__ __host__ void quda::blas::_caxpby	(	const float2 &	a,
		const float2 &	x,
		const float2 &	b,
		float2 &	y
	)

Definition at line 172 of file blas_core.cuh.

◆ _caxpby() [3/3]

__device__ __host__ void quda::blas::_caxpby	(	const double2 &	a,
		const double2 &	x,
		const double2 &	b,
		double2 &	y
	)

Definition at line 186 of file blas_core.cuh.

◆ _caxpy() [1/3]

__device__ __host__ void quda::blas::_caxpy	(	const float2 &	a,
		const float4 &	x,
		float4 &	y
	)

inline

Functor to perform the operation y += a * x (complex-valued)

Definition at line 110 of file blas_core.cuh.

Here is the caller graph for this function:

◆ _caxpy() [2/3]

__device__ __host__ void quda::blas::_caxpy	(	const float2 &	a,
		const float2 &	x,
		float2 &	y
	)

inline

Definition at line 122 of file blas_core.cuh.

◆ _caxpy() [3/3]

__device__ __host__ void quda::blas::_caxpy	(	const double2 &	a,
		const double2 &	x,
		double2 &	y
	)

inline

Definition at line 130 of file blas_core.cuh.

◆ ax()

void quda::blas::ax	(	double	a,
		ColorSpinorField &	x
	)

Definition at line 508 of file blas_quda.cu.

References quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x.

Referenced by quda::ax(), benchmark(), quda::CG::blocksolve(), cloverQuda(), quda::TRLM::computeKeptRitz(), quda::EigenSolver::computeSVD(), dslashQuda(), dslashQuda_4dpc(), dslashQuda_mdwf(), quda::GMResDR::FlexArnoldiProcedure(), quda::genericCompare(), quda::Deflation::increment(), invert_test(), invertMultiShiftQuda(), invertMultiSrcQuda(), invertQuda(), main(), quda::massRescale(), MatDagMatQuda(), MatQuda(), quda::TRLM::operator()(), quda::MPCG::operator()(), quda::MR::operator()(), quda::CACG::operator()(), quda::MinResExt::operator()(), quda::DiracStaggeredPC::prepare(), quda::DiracImprovedStaggeredPC::prepare(), quda::DiracStaggeredPC::reconstruct(), quda::DiracImprovedStaggeredPC::reconstruct(), quda::GMResDR::RestartVZH(), test(), and quda::IncEigCG::UpdateVm().

Here is the caller graph for this function:

◆ axpby()

void quda::blas::axpby	(	double	a,
		ColorSpinorField &	x,
		double	b,
		ColorSpinorField &	y
	)

inline

Definition at line 36 of file blas_quda.h.

References axpbyz().

Referenced by benchmark(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::CGNE::operator()(), quda::CGNR::operator()(), quda::CACGNE::operator()(), quda::CACGNR::operator()(), quda::MultiShiftCG::operator()(), and test().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ axpbyz()

void quda::blas::axpbyz	(	double	a,
		ColorSpinorField &	x,
		double	b,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

Definition at line 496 of file blas_quda.cu.

References quda::LatticeField::Precision(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.

Referenced by axpby(), axpy(), mxpy(), xpay(), xpayz(), and xpy().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ axpbyzNorm()

double quda::blas::axpbyzNorm	(	double	a,
		ColorSpinorField &	x,
		double	b,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

Definition at line 734 of file reduce_quda.cu.

Referenced by axpyNorm(), xmyNorm(), and xpayz().

Here is the caller graph for this function:

◆ axpy()

void quda::blas::axpy	(	double	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y
	)

inline

Definition at line 35 of file blas_quda.h.

References axpbyz().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ axpyBzpcx() [1/2]

void quda::blas::axpyBzpcx	(	double	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		double	b,
		ColorSpinorField &	z,
		double	c
	)

Definition at line 541 of file blas_quda.cu.

References quda::LatticeField::Precision(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.

Referenced by quda::ShiftUpdate::apply(), axpyBzpcx(), benchmark(), cDotProductNormB(), test(), and xpayz().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ axpyBzpcx() [2/2]

void quda::blas::axpyBzpcx	(	const double *	a,
		std::vector< ColorSpinorField *> &	x,
		std::vector< ColorSpinorField *> &	y,
		const double *	b,
		ColorSpinorField &	z,
		const double *	c
	)

Compute the vectorized "axpyBzpcx" with over the set of ColorSpinorFields, where the third vector, z, is constant over the batch. E.g., it computes.

y = a * x + y x = b * z + c * x

The dimensions of a, b, c are the same as the size of x and y, with a maximum size of 16.

Parameters

a[in]	Array of coefficients
b[in]	Array of coefficients
c[in]	Array of coefficients
x[in,out]	vector of ColorSpinorFields
y[in,out]	vector of ColorSpinorFields
z[in]	input ColorSpinorField

Definition at line 985 of file multi_blas_quda.cu.

References axpyBzpcx(), and MAX_MULTI_BLAS_N.

Here is the call graph for this function:

◆ axpyCGNorm()

Complex quda::blas::axpyCGNorm	(	double	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y
	)

Definition at line 796 of file reduce_quda.cu.

References quda::LatticeField::Precision().

Referenced by cDotProductNormB(), quda::CG::operator()(), quda::PreconCG::operator()(), and quda::MultiShiftCG::operator()().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ axpyNorm()

double quda::blas::axpyNorm	(	double	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y
	)

inline

Definition at line 74 of file blas_quda.h.

References axpbyzNorm().

Referenced by benchmark(), quda::IncEigCG::eigCGsolve(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), and test().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ axpyReDot()

double quda::blas::axpyReDot	(	double	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y
	)

Definition at line 740 of file reduce_quda.cu.

Referenced by benchmark(), quda::MultiShiftCG::operator()(), test(), and xpayz().

Here is the caller graph for this function:

◆ axpyZpbx()

void quda::blas::axpyZpbx	(	double	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		double	b
	)

Definition at line 552 of file blas_quda.cu.

References quda::LatticeField::Precision(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.

Referenced by benchmark(), quda::IncEigCG::eigCGsolve(), quda::CG::operator()(), quda::PreconCG::operator()(), quda::MultiShiftCG::operator()(), test(), and xpayz().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ blasKernel()

template<typename FloatN , int M, typename Arg >

__global__ void quda::blas::blasKernel ( Arg arg )

Generic blas kernel with four loads and up to four stores.

Definition at line 43 of file blas_core.cuh.

References parity.

◆ cabxpyAx()

void quda::blas::cabxpyAx	(	double	a,
		const Complex &	b,
		ColorSpinorField &	x,
		ColorSpinorField &	y
	)

Definition at line 591 of file blas_quda.cu.

References IMAG, REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y.

Referenced by benchmark(), test(), and xpayz().

Here is the caller graph for this function:

◆ cabxpyzAxNorm()

double quda::blas::cabxpyzAxNorm	(	double	a,
		const Complex &	b,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

Definition at line 758 of file reduce_quda.cu.

References IMAG, and REAL.

Referenced by benchmark(), cDotProductNormB(), quda::GCR::operator()(), and test().

Here is the caller graph for this function:

◆ caxpby()

void quda::blas::caxpby	(	const Complex &	a,
		ColorSpinorField &	x,
		const Complex &	b,
		ColorSpinorField &	y
	)

Definition at line 523 of file blas_quda.cu.

References IMAG, REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y.

Referenced by quda::BiCGstabLUpdate::apply(), benchmark(), quda::EigenSolver::chebyOp(), quda::EigenSolver::computeEvals(), quda::BiCGstabL::operator()(), test(), and xpayz().

Here is the caller graph for this function:

◆ caxpbypczw()

void quda::blas::caxpbypczw	(	const Complex &	a,
		ColorSpinorField &	x,
		const Complex &	b,
		ColorSpinorField &	y,
		const Complex &	c,
		ColorSpinorField &	z,
		ColorSpinorField &	w
	)

Definition at line 528 of file blas_quda.cu.

References IMAG, REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::w, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.

Referenced by quda::EigenSolver::chebyOp(), and xpayz().

Here is the caller graph for this function:

◆ caxpbypzYmbw()

void quda::blas::caxpbypzYmbw	(	const Complex &	a,
		ColorSpinorField &	x,
		const Complex &	b,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		ColorSpinorField &	w
	)

Definition at line 585 of file blas_quda.cu.

References IMAG, REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::w, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.

Referenced by benchmark(), quda::BiCGstab::operator()(), test(), and xpayz().

Here is the caller graph for this function:

◆ caxpbypzYmbwcDotProductUYNormY()

double3 quda::blas::caxpbypzYmbwcDotProductUYNormY	(	const Complex &	a,
		ColorSpinorField &	x,
		const Complex &	b,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		ColorSpinorField &	w,
		ColorSpinorField &	u
	)

Definition at line 783 of file reduce_quda.cu.

References IMAG, quda::LatticeField::Precision(), and REAL.

Referenced by benchmark(), cDotProductNormB(), quda::BiCGstab::operator()(), and test().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ caxpy() [1/3]

void quda::blas::caxpy	(	const Complex &	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y
	)

Definition at line 512 of file blas_quda.cu.

References quda::LatticeField::Precision(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y.

Referenced by quda::BiCGstabLUpdate::apply(), benchmark(), quda::EigenSolver::blockOrthogonalize(), quda::CG::blocksolve(), caxpy(), caxpyz_recurse(), cDotProductNormB(), quda::TRLM::computeKeptRitz(), quda::EigenSolver::deflate(), quda::EigenSolver::deflateSVD(), quda::Deflation::increment(), quda::Deflation::operator()(), quda::CG::operator()(), quda::BiCGstab::operator()(), quda::MPBiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::CACG::operator()(), quda::CAGCR::operator()(), quda::MultiShiftCG::operator()(), quda::MinResExt::operator()(), quda::orthoDir(), quda::BiCGstabL::orthoDir(), quda::Deflation::reduce(), quda::IncEigCG::RestartVT(), quda::GMResDR::RestartVZH(), test(), quda::updateAp(), quda::BiCGstabL::updateR(), quda::updateSolution(), quda::GMResDR::UpdateSolution(), quda::BiCGstabL::updateUend(), quda::Deflation::verify(), and xpayz().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ caxpy() [2/3]

void quda::blas::caxpy	(	const Complex *	a,
		std::vector< ColorSpinorField *> &	x,
		std::vector< ColorSpinorField *> &	y
	)

Compute the block "caxpy" with over the set of ColorSpinorFields. E.g., it computes.

y = x * a + y

The dimensions of a can be rectangular, e.g., the width of x and y need not be same.

Parameters

a[in]	Matrix of coefficients
x[in]	vector of input ColorSpinorFields
y[in,out]	vector of input/output ColorSpinorFields

Definition at line 732 of file multi_blas_quda.cu.

References caxpy_recurse().

Here is the call graph for this function:

◆ caxpy() [3/3]

void quda::blas::caxpy	(	const Complex *	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y
	)

This is a wrapper for calling the block "caxpy" with a composite ColorSpinorField. E.g., it computes.

y = x * a + y

Parameters

a[in]	Matrix of coefficients
x[in]	Input matrix
y[in,out]	Computed output matrix

Definition at line 763 of file multi_blas_quda.cu.

References caxpy(), and quda::ColorSpinorField::Components().

Here is the call graph for this function:

◆ Caxpy_() [1/3]

__device__ __host__ void quda::blas::Caxpy_	(	const double2 &	a,
		const double2 &	x,
		double2 &	y
	)

Functor to perform the operation y += a * x (complex-valued)

Definition at line 232 of file reduce_core.cuh.

Referenced by quda::blas::caxpyNorm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpyxmaznormx< ReduceType, Float2, FloatN >::operator()(), quda::blas::cabxpyzaxnorm< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpydotzy< ReduceType, Float2, FloatN >::operator()(), and quda::blas::caxpbypzYmbwcDotProductUYNormY_< ReduceType, Float2, FloatN >::operator()().

Here is the caller graph for this function:

◆ Caxpy_() [2/3]

__device__ __host__ void quda::blas::Caxpy_	(	const float2 &	a,
		const float2 &	x,
		float2 &	y
	)

Definition at line 239 of file reduce_core.cuh.

◆ Caxpy_() [3/3]

__device__ __host__ void quda::blas::Caxpy_	(	const float2 &	a,
		const float4 &	x,
		float4 &	y
	)

Definition at line 246 of file reduce_core.cuh.

◆ caxpy_L() [1/2]

void quda::blas::caxpy_L	(	const Complex *	a,
		std::vector< ColorSpinorField *> &	x,
		std::vector< ColorSpinorField *> &	y
	)

Compute the block "caxpy_L" with over the set of ColorSpinorFields. E.g., it computes.

y = x * a + y

Where 'a' must be a square, lower triangular matrix.

Parameters

a[in]	Matrix of coefficients
x[in]	vector of input ColorSpinorFields
y[in,out]	vector of input/output ColorSpinorFields

Definition at line 750 of file multi_blas_quda.cu.

References caxpy_recurse(), and errorQuda.

Referenced by caxpy_L(), and cDotProductNormB().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ caxpy_L() [2/2]

void quda::blas::caxpy_L	(	const Complex *	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y
	)

This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes.

y = x * a + y

Parameters

a[in]	Matrix of coefficients
x[in]	Input matrix
y[in,out]	Computed output matrix

Definition at line 767 of file multi_blas_quda.cu.

References caxpy_L(), and quda::ColorSpinorField::Components().

Here is the call graph for this function:

◆ caxpy_recurse()

void quda::blas::caxpy_recurse	(	const Complex *	a_,
		std::vector< ColorSpinorField *> &	x,
		std::vector< ColorSpinorField *> &	y,
		int	i_idx,
		int	j_idx,
		int	upper
	)

Definition at line 562 of file multi_blas_quda.cu.

References quda::count, and MAX_MULTI_BLAS_N.

Referenced by caxpy(), caxpy_L(), and caxpy_U().

Here is the caller graph for this function:

◆ caxpy_U() [1/2]

void quda::blas::caxpy_U	(	const Complex *	a,
		std::vector< ColorSpinorField *> &	x,
		std::vector< ColorSpinorField *> &	y
	)

Compute the block "caxpy_U" with over the set of ColorSpinorFields. E.g., it computes.

y = x * a + y

Where 'a' must be a square, upper triangular matrix.

Parameters

a[in]	Matrix of coefficients
x[in]	vector of input ColorSpinorFields
y[in,out]	vector of input/output ColorSpinorFields

Definition at line 738 of file multi_blas_quda.cu.

References caxpy_recurse(), and errorQuda.

Referenced by caxpy_U(), and cDotProductNormB().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ caxpy_U() [2/2]

void quda::blas::caxpy_U	(	const Complex *	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y
	)

This is a wrapper for calling the block "caxpy_U" with a composite ColorSpinorField. E.g., it computes.

y = x * a + y

Parameters

a[in]	Matrix of coefficients
x[in]	Input matrix
y[in,out]	Computed output matrix

Definition at line 765 of file multi_blas_quda.cu.

References caxpy_U(), and quda::ColorSpinorField::Components().

Here is the call graph for this function:

◆ caxpyBxpz() [1/2]

void quda::blas::caxpyBxpz	(	const Complex &	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		const Complex &	b,
		ColorSpinorField &	z
	)

Definition at line 574 of file blas_quda.cu.

References IMAG, quda::LatticeField::Precision(), REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.

Referenced by benchmark(), caxpyBxpz(), cDotProductNormB(), test(), quda::BiCGstabL::updateXRend(), and xpayz().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ caxpyBxpz() [2/2]

void quda::blas::caxpyBxpz	(	const Complex *	a_,
		std::vector< ColorSpinorField *> &	x_,
		ColorSpinorField &	y_,
		const Complex *	b_,
		ColorSpinorField &	z_
	)

Compute the vectorized "caxpyBxpz" over the set of ColorSpinorFields, where the second and third vector, y and z, is constant over the batch. E.g., it computes.

y = a * x + y z = b * x + z

The dimensions of a, b are the same as the size of x, with a maximum size of 16.

Parameters

a[in]	Array of coefficients
b[in]	Array of coefficients
x[in]	vector of ColorSpinorFields
y[in,out]	input ColorSpinorField
z[in,out]	input ColorSpinorField

Definition at line 1029 of file multi_blas_quda.cu.

References caxpyBxpz(), and MAX_MULTI_BLAS_N.

Here is the call graph for this function:

◆ caxpyBzpx()

void quda::blas::caxpyBzpx	(	const Complex &	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		const Complex &	b,
		ColorSpinorField &	z
	)

Definition at line 563 of file blas_quda.cu.

References IMAG, quda::LatticeField::Precision(), REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.

Referenced by benchmark(), test(), and xpayz().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ caxpyDotzy()

Complex quda::blas::caxpyDotzy	(	const Complex &	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

Definition at line 771 of file reduce_quda.cu.

References IMAG, and REAL.

Referenced by benchmark(), cDotProductNormB(), quda::orthoDir(), quda::BiCGstabL::orthoDir(), and test().

Here is the caller graph for this function:

◆ caxpyNorm()

double quda::blas::caxpyNorm	(	const Complex &	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y
	)

Definition at line 746 of file reduce_quda.cu.

References IMAG, and REAL.

Referenced by benchmark(), cDotProductNormB(), quda::CG3::operator()(), quda::CG3NE::operator()(), and test().

Here is the caller graph for this function:

◆ caxpyXmaz()

void quda::blas::caxpyXmaz	(	const Complex &	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

Definition at line 597 of file blas_quda.cu.

References IMAG, REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.

Referenced by benchmark(), quda::MR::operator()(), test(), and xpayz().

Here is the caller graph for this function:

◆ caxpyXmazMR()

void quda::blas::caxpyXmazMR	(	const Complex &	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

Definition at line 603 of file blas_quda.cu.

References commAsyncReduction(), errorQuda, IMAG, quda::LatticeField::Location(), QUDA_CPU_FIELD_LOCATION, REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.

Referenced by quda::MR::operator()(), and xpayz().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ caxpyXmazNormX()

double quda::blas::caxpyXmazNormX	(	const Complex &	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

Definition at line 752 of file reduce_quda.cu.

References IMAG, and REAL.

Referenced by benchmark(), cDotProductNormB(), and test().

Here is the caller graph for this function:

◆ caxpyz() [1/2]

void quda::blas::caxpyz	(	const Complex *	a,
		std::vector< ColorSpinorField *> &	x,
		std::vector< ColorSpinorField *> &	y,
		std::vector< ColorSpinorField *> &	z
	)

Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.

z = x * a + y

The dimensions of a can be rectangular, e.g., the width of x and y need not be same, though the maximum width for both is 16.

Parameters

a[in]	Matrix of coefficients
x[in]	vector of input ColorSpinorFields
y[in]	vector of input ColorSpinorFields
z[out]	vector of output ColorSpinorFields

Definition at line 949 of file multi_blas_quda.cu.

References caxpyz_recurse().

Referenced by caxpyz(), cDotProductNormB(), and quda::CACG::operator()().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ caxpyz() [2/2]

void quda::blas::caxpyz	(	const Complex *	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes.

z = x * a + y

Parameters

a[in]	Matrix of coefficients
x[in]	Input matrix
y[in]	Computed output matrix
z[out]	vector of input/output ColorSpinorFields

Definition at line 973 of file multi_blas_quda.cu.

References caxpyz(), and quda::ColorSpinorField::Components().

Here is the call graph for this function:

◆ caxpyz_L() [1/2]

void quda::blas::caxpyz_L	(	const Complex *	a,
		std::vector< ColorSpinorField *> &	x,
		std::vector< ColorSpinorField *> &	y,
		std::vector< ColorSpinorField *> &	z
	)

Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.

z = x * a + y

Where 'a' is assumed to be lower triangular

Parameters

a[in]	Matrix of coefficients
x[in]	vector of input ColorSpinorFields
y[in]	vector of input ColorSpinorFields
z[out]	vector of output ColorSpinorFields

Definition at line 964 of file multi_blas_quda.cu.

References caxpyz_recurse().

Referenced by caxpyz_L(), and cDotProductNormB().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ caxpyz_L() [2/2]

void quda::blas::caxpyz_L	(	const Complex *	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes.

z = x * a + y

Parameters

a[in]	Matrix of coefficients
x[in]	Input matrix
y[in]	Computed output matrix
z[out]	vector of input/output ColorSpinorFields

Definition at line 981 of file multi_blas_quda.cu.

References caxpyz_L(), and quda::ColorSpinorField::Components().

Here is the call graph for this function:

◆ caxpyz_recurse()

void quda::blas::caxpyz_recurse	(	const Complex *	a_,
		std::vector< ColorSpinorField *> &	x,
		std::vector< ColorSpinorField *> &	y,
		std::vector< ColorSpinorField *> &	z,
		int	i,
		int	j,
		int	pass,
		int	upper
	)

Definition at line 770 of file multi_blas_quda.cu.

References caxpy(), quda::count, and MAX_MULTI_BLAS_N.

Referenced by caxpyz(), caxpyz_L(), and caxpyz_U().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ caxpyz_U() [1/2]

void quda::blas::caxpyz_U	(	const Complex *	a,
		std::vector< ColorSpinorField *> &	x,
		std::vector< ColorSpinorField *> &	y,
		std::vector< ColorSpinorField *> &	z
	)

Compute the block "caxpyz" with over the set of ColorSpinorFields. E.g., it computes.

z = x * a + y

Where 'a' is assumed to be upper triangular.

Parameters

a[in]	Matrix of coefficients
x[in]	vector of input ColorSpinorFields
y[in]	vector of input ColorSpinorFields
z[out]	vector of output ColorSpinorFields

Definition at line 956 of file multi_blas_quda.cu.

References caxpyz_recurse().

Referenced by caxpyz_U(), and cDotProductNormB().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ caxpyz_U() [2/2]

void quda::blas::caxpyz_U	(	const Complex *	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

This is a wrapper for calling the block "caxpyz" with a composite ColorSpinorField. E.g., it computes.

z = x * a + y

Parameters

a[in]	Matrix of coefficients
x[in]	Input matrix
y[in]	Computed output matrix
z[out]	vector of input/output ColorSpinorFields

Definition at line 977 of file multi_blas_quda.cu.

References caxpyz_U(), and quda::ColorSpinorField::Components().

Here is the call graph for this function:

◆ cdot_() [1/3]

template<typename ReduceType >

__device__ __host__ void quda::blas::cdot_	(	ReduceType &	sum,
		const double2 &	a,
		const double2 &	b
	)

Returns complex-valued dot product of x and y

Definition at line 199 of file multi_reduce_core.cuh.

Referenced by quda::blas::cabxpyzaxnorm< ReduceType, Float2, FloatN >::flops().

Here is the caller graph for this function:

◆ cdot_() [2/3]

template<typename ReduceType >

__device__ __host__ void quda::blas::cdot_	(	ReduceType &	sum,
		const float2 &	a,
		const float2 &	b
	)

Definition at line 208 of file multi_reduce_core.cuh.

◆ cdot_() [3/3]

template<typename ReduceType >

__device__ __host__ void quda::blas::cdot_	(	ReduceType &	sum,
		const float4 &	a,
		const float4 &	b
	)

Definition at line 217 of file multi_reduce_core.cuh.

◆ cdotNormA_()

template<typename ReduceType , typename InputType >

__device__ __host__ void quda::blas::cdotNormA_	(	ReduceType &	sum,
		const InputType &	a,
		const InputType &	b
	)

First returns the dot product (x,y) Returns the norm of x

Definition at line 385 of file reduce_core.cuh.

References quda::sum().

Here is the call graph for this function:

◆ cdotNormB_()

template<typename ReduceType , typename InputType >

__device__ __host__ void quda::blas::cdotNormB_	(	ReduceType &	sum,
		const InputType &	a,
		const InputType &	b
	)

First returns the dot product (x,y) Returns the norm of y

Definition at line 398 of file reduce_core.cuh.

References quda::sum().

Here is the call graph for this function:

◆ cDotProduct() [1/2]

Complex quda::blas::cDotProduct	(	ColorSpinorField &	x,
		ColorSpinorField &	y
	)

Definition at line 764 of file reduce_quda.cu.

Referenced by benchmark(), quda::EigenSolver::blockOrthogonalize(), quda::CG::blocksolve(), cDotProductNormB(), quda::computeBeta(), quda::EigenSolver::computeEvals(), quda::MPBiCGstab::computeMatrixPowers(), quda::EigenSolver::computeSVD(), quda::BiCGstabL::computeTau(), quda::EigenSolver::deflate(), quda::EigenSolver::deflateSVD(), quda::GMResDR::FlexArnoldiProcedure(), quda::MG::generateNullVectors(), quda::Deflation::increment(), invertMultiShiftQuda(), invertQuda(), quda::Deflation::operator()(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::CACG::operator()(), quda::MultiShiftCG::operator()(), quda::MinResExt::operator()(), quda::GMResDR::operator()(), quda::orthoDir(), quda::BiCGstabL::orthoDir(), quda::EigCGArgs::RestartLanczos(), quda::GMResDR::RestartVZH(), quda::CAGCR::solve(), quda::MinResExt::solve(), test(), quda::MG::verify(), and xmyNorm().

Here is the caller graph for this function:

◆ cDotProduct() [2/2]

void quda::blas::cDotProduct	(	Complex *	result,
		std::vector< ColorSpinorField *> &	a,
		std::vector< ColorSpinorField *> &	b
	)

Computes the matrix of inner products between the vector set a and the vector set b.

Parameters

result[out]	Matrix of inner product result[i][j] = (a[j],b[i])
a[in]	set of input ColorSpinorFields
b[in]	set of input ColorSpinorFields

Definition at line 1031 of file multi_reduce_quda.cu.

References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), errorQuda, and reduceDoubleArray().

Here is the call graph for this function:

◆ cDotProductCopy()

void quda::blas::cDotProductCopy	(	Complex *	result,
		std::vector< ColorSpinorField *> &	a,
		std::vector< ColorSpinorField *> &	b,
		std::vector< ColorSpinorField *> &	c
	)

Computes the matrix of inner products between the vector set a and the vector set b, and copies b into c.

Parameters

result[out]	Matrix of inner product result[i][j] = (a[j],b[i])
a[in]	set of input ColorSpinorFields
b[in]	set of input ColorSpinorFields
c[out]	set of output ColorSpinorFields

Definition at line 1110 of file multi_reduce_quda.cu.

References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), errorQuda, and reduceDoubleArray().

Referenced by cDotProductNormB().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ cDotProductNormA()

double3 quda::blas::cDotProductNormA	(	ColorSpinorField &	a,
		ColorSpinorField &	b
	)

Definition at line 778 of file reduce_quda.cu.

Referenced by benchmark(), cDotProductNormB(), quda::CG::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::SD::operator()(), quda::Deflation::reduce(), test(), quda::Deflation::verify(), and xmyNorm().

Here is the caller graph for this function:

◆ cDotProductNormB()

double3 quda::blas::cDotProductNormB	(	ColorSpinorField &	a,
		ColorSpinorField &	b
	)

inline

Return (a,b) and ||b||^2 - implemented using cDotProductNormA.

Definition at line 83 of file blas_quda.h.

References axpyBzpcx(), axpyCGNorm(), cabxpyzAxNorm(), caxpbypzYmbwcDotProductUYNormY(), caxpy(), caxpy_L(), caxpy_U(), caxpyBxpz(), caxpyDotzy(), caxpyNorm(), caxpyXmazNormX(), caxpyz(), caxpyz_L(), caxpyz_U(), cDotProduct(), cDotProductCopy(), cDotProductNormA(), doubleCG3InitNorm(), doubleCG3UpdateNorm(), hDotProduct(), hDotProduct_Anorm(), HeavyQuarkResidualNorm(), quadrupleCG3InitNorm(), quadrupleCG3UpdateNorm(), quadrupleCGReduction(), reDotProduct(), tripleCGReduction(), and xpyHeavyQuarkResidualNorm().

Referenced by benchmark(), and test().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ completeFastReduce()

void quda::blas::completeFastReduce ( int32_t words )

Definition at line 43 of file reduce_quda.cu.

References quda::count, and h_reduce.

Referenced by multiReduceLaunch(), and reduceLaunch().

Here is the caller graph for this function:

◆ copy()

void quda::blas::copy	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src
	)

Definition at line 355 of file copy_quda.cu.

References quda::blas::copy_ns::copy(), quda::LatticeField::Location(), and QUDA_CUDA_FIELD_LOCATION.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ cxpaypbz()

void quda::blas::cxpaypbz	(	ColorSpinorField &	x,
		const Complex &	b,
		ColorSpinorField &	y,
		const Complex &	c,
		ColorSpinorField &	z
	)

Definition at line 535 of file blas_quda.cu.

References IMAG, REAL, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.

Referenced by benchmark(), quda::BiCGstab::operator()(), test(), and xpayz().

Here is the caller graph for this function:

◆ dot_() [1/3]

template<typename ReduceType >

__device__ __host__ void quda::blas::dot_	(	ReduceType &	sum,
		const double2 &	a,
		const double2 &	b
	)

Return the real dot product of x and y Broken at the moment—need to update reDotProduct with permuting, etc of cDotProduct below.

Return the real dot product of x and y

Definition at line 158 of file multi_reduce_core.cuh.

Referenced by quda::__launch_bounds__(), and quda::blas::Norm2< ReduceType, Float2, FloatN >::flops().

Here is the caller graph for this function:

◆ dot_() [2/3]

template<typename ReduceType >

__device__ __host__ void quda::blas::dot_	(	ReduceType &	sum,
		const float2 &	a,
		const float2 &	b
	)

Definition at line 164 of file multi_reduce_core.cuh.

◆ dot_() [3/3]

template<typename ReduceType >

__device__ __host__ void quda::blas::dot_	(	ReduceType &	sum,
		const float4 &	a,
		const float4 &	b
	)

Definition at line 170 of file multi_reduce_core.cuh.

◆ doubleCG3Init()

void quda::blas::doubleCG3Init	(	double	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

Definition at line 626 of file blas_quda.cu.

References quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.

Referenced by quda::CG3NE::operator()(), and xpayz().

Here is the caller graph for this function:

◆ doubleCG3InitNorm()

double quda::blas::doubleCG3InitNorm	(	double	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

Definition at line 848 of file reduce_quda.cu.

Referenced by cDotProductNormB(), and quda::CG3NE::operator()().

Here is the caller graph for this function:

◆ doubleCG3Update()

void quda::blas::doubleCG3Update	(	double	a,
		double	b,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

Definition at line 631 of file blas_quda.cu.

References quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.

Referenced by quda::CG3NE::operator()(), and xpayz().

Here is the caller graph for this function:

◆ doubleCG3UpdateNorm()

double quda::blas::doubleCG3UpdateNorm	(	double	a,
		double	b,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

Definition at line 853 of file reduce_quda.cu.

Referenced by cDotProductNormB(), and quda::CG3NE::operator()().

Here is the caller graph for this function:

◆ end()

void quda::blas::end ( void )

Definition at line 489 of file blas_quda.cu.

References endReduce().

Referenced by benchmark(), quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), endQuda(), quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune(), and quda::tuneLaunch().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ endReduce()

void quda::blas::endReduce ( void )

Definition at line 120 of file reduce_quda.cu.

References d_reduce, device_free, h_reduce, hd_reduce, host_free, and reduceEnd.

Referenced by end(), and zero().

Here is the caller graph for this function:

◆ getDeviceReduceBuffer()

void * quda::blas::getDeviceReduceBuffer ( )

Definition at line 26 of file reduce_quda.cu.

References d_reduce.

◆ getFastReduce()

bool quda::blas::getFastReduce ( )

Definition at line 30 of file reduce_quda.cu.

References fast_reduce_enabled.

Referenced by quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::MultiReduceCuda(), multiReduceLaunch(), quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >::ReduceCuda(), and reduceLaunch().

Here is the caller graph for this function:

◆ getHostReduceBuffer()

void * quda::blas::getHostReduceBuffer ( )

Definition at line 28 of file reduce_quda.cu.

References h_reduce.

Referenced by multiReduceLaunch().

Here is the caller graph for this function:

◆ getMappedHostReduceBuffer()

void * quda::blas::getMappedHostReduceBuffer ( )

Definition at line 27 of file reduce_quda.cu.

References hd_reduce.

Referenced by multiReduceLaunch().

Here is the caller graph for this function:

◆ getReduceEvent()

cudaEvent_t * quda::blas::getReduceEvent ( )

Definition at line 29 of file reduce_quda.cu.

References reduceEnd.

Referenced by multiReduceLaunch().

Here is the caller graph for this function:

◆ getStream()

cudaStream_t * quda::blas::getStream ( )

Definition at line 494 of file blas_quda.cu.

References blasStream.

Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), quda::blas::copy_ns::copy(), multiBlas(), multiReduce(), multiReduceLaunch(), and nativeReduce().

Here is the caller graph for this function:

◆ hDotProduct()

void quda::blas::hDotProduct	(	Complex *	result,
		std::vector< ColorSpinorField *> &	a,
		std::vector< ColorSpinorField *> &	b
	)

Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Requires a.size()==b.size().

Parameters

result[out]	Matrix of inner product result[i][j] = (a[j],b[i])
a[in]	set of input ColorSpinorFields
b[in]	set of input ColorSpinorFields

Definition at line 1056 of file multi_reduce_quda.cu.

References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), quda::conj(), errorQuda, and reduceDoubleArray().

Referenced by cDotProductNormB(), and quda::CAGCR::solve().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ hDotProduct_Anorm()

void quda::blas::hDotProduct_Anorm	(	Complex *	result,
		std::vector< ColorSpinorField *> &	a,
		std::vector< ColorSpinorField *> &	b
	)

Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Uniquely defined for cases like (p, Ap) where the output is Hermitian, but there's an A-norm instead of an L2 norm. Requires a.size()==b.size().

Parameters

result[out]	Matrix of inner product result[i][j] = (a[j],b[i])
a[in]	set of input ColorSpinorFields
b[in]	set of input ColorSpinorFields

Definition at line 1083 of file multi_reduce_quda.cu.

References quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), quda::conj(), errorQuda, and reduceDoubleArray().

Referenced by cDotProductNormB().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ HeavyQuarkResidualNorm()

double3 quda::blas::HeavyQuarkResidualNorm	(	ColorSpinorField &	x,
		ColorSpinorField &	r
	)

Definition at line 809 of file reduce_quda.cu.

References comm_size(), and quda::ColorSpinorField::Ncolor().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ init()

void quda::blas::init ( )

Definition at line 483 of file blas_quda.cu.

References initReduce(), quda::Nstream, and streams.

Referenced by initQudaMemory().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ initFastReduce() [1/2]

void quda::blas::initFastReduce ( int words )

Referenced by multiReduceLaunch(), and reduceLaunch().

Here is the caller graph for this function:

◆ initFastReduce() [2/2]

void quda::blas::initFastReduce ( int32_t words )

Definition at line 32 of file reduce_quda.cu.

References h_reduce.

◆ initReduce()

void quda::blas::initReduce ( )

Definition at line 64 of file reduce_quda.cu.

References bytes, checkCudaError, d_reduce, device_malloc, deviceProp, fast_reduce_enabled, h_reduce, hd_reduce, mapped_malloc, MAX_MULTI_BLAS_N, memset(), pinned_malloc, QudaSumFloat, reduceEnd, and warningQuda.

Referenced by init(), and zero().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ mixed_blas()

template<template< typename Float, typename FloatN > class Functor, int writeX = 0, int writeY = 0, int writeZ = 0, int writeW = 0, int writeV = 0>

void quda::blas::mixed_blas	(	const double2 &	a,
		const double2 &	b,
		const double2 &	c,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		ColorSpinorField &	w,
		ColorSpinorField &	v
	)

Driver for generic blas routine with four loads and two store. This is the mixed-precision driver which supports a different precision for (x,z,w) and (y,v), where the former is the low precision and the latter is the high precision.

Definition at line 326 of file blas_quda.cu.

References checkLocation, checkPrecision, errorQuda, quda::ColorSpinorField::isNative(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::v, quda::ColorSpinorField::Volume(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::w, warningQuda, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.

Here is the call graph for this function:

◆ mixed_reduce()

template<typename doubleN , typename ReduceType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll>

doubleN quda::blas::mixed_reduce	(	const double2 &	a,
		const double2 &	b,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		ColorSpinorField &	w,
		ColorSpinorField &	v
	)

Driver for generic reduction routine with two loads.

Parameters

ReduceType
siteUnroll	- if this is true, then one site corresponds to exactly one thread

Definition at line 520 of file reduce_quda.cu.

References bytes, checkLocation, checkPrecision, errorQuda, quda::ColorSpinorField::FieldOrder(), genericReduce(), quda::ColorSpinorField::isNative(), quda::ColorSpinorField::Length(), nativeReduce(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_FIELD_ORDER, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, quda::ColorSpinorField::RealLength(), reduceDoubleArray(), streams, quda::ColorSpinorField::Volume(), warningQuda, and zero().

Here is the call graph for this function:

◆ mixedMultiBlas()

template<int NXZ, template< int MXZ, typename Float, typename FloatN > class Functor, typename write , typename T >

void quda::blas::mixedMultiBlas	(	const coeff_array< T > &	a,
		const coeff_array< T > &	b,
		const coeff_array< T > &	c,
		CompositeColorSpinorField &	x,
		CompositeColorSpinorField &	y,
		CompositeColorSpinorField &	z,
		CompositeColorSpinorField &	w
	)

Driver for generic blas routine with four loads and two store.

Definition at line 403 of file multi_blas_quda.cu.

References checkLocation, errorQuda, Nspin, QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

◆ mixedMultiReduce()

template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T >

void quda::blas::mixedMultiReduce	(	doubleN	result[],
		const coeff_array< T > &	a,
		const coeff_array< T > &	b,
		const coeff_array< T > &	c,
		CompositeColorSpinorField &	x,
		CompositeColorSpinorField &	y,
		CompositeColorSpinorField &	z,
		CompositeColorSpinorField &	w
	)

Driver for multi-reduce with up to five vectors

Definition at line 511 of file multi_reduce_quda.cu.

References checkPrecision, errorQuda, Nspin, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

◆ multiBlas() [1/2]

template<int NXZ, typename RegType , typename StoreType , typename yType , int M, template< int, typename, typename > class Functor, typename write , typename T >

void quda::blas::multiBlas	(	const coeff_array< T > &	a,
		const coeff_array< T > &	b,
		const coeff_array< T > &	c,
		std::vector< ColorSpinorField *> &	x,
		std::vector< ColorSpinorField *> &	y,
		std::vector< ColorSpinorField *> &	z,
		std::vector< ColorSpinorField *> &	w,
		int	length
	)

Definition at line 245 of file multi_blas_quda.cu.

References quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), bytes, quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::bytes(), checkCudaError, errorQuda, flops, quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::flops(), getStream(), MAX_MATRIX_SIZE, MAX_MULTI_BLAS_N, SpinorTexture< RegType, StoreType, N >::set(), Spinor< RegType, StoreType, N, write >::set(), quda::blas::write< writeX, writeY, writeZ, writeW >::W, quda::blas::write< writeX, writeY, writeZ, writeW >::X, quda::blas::write< writeX, writeY, writeZ, writeW >::Y, and quda::blas::write< writeX, writeY, writeZ, writeW >::Z.

Here is the call graph for this function:

◆ multiBlas() [2/2]

template<int NXZ, template< int MXZ, typename Float, typename FloatN > class Functor, typename write , typename T >

void quda::blas::multiBlas	(	const coeff_array< T > &	a,
		const coeff_array< T > &	b,
		const coeff_array< T > &	c,
		CompositeColorSpinorField &	x,
		CompositeColorSpinorField &	y,
		CompositeColorSpinorField &	z,
		CompositeColorSpinorField &	w
	)

Driver for generic blas routine with four loads and two store.

Definition at line 294 of file multi_blas_quda.cu.

References checkLocation, errorQuda, Ncolor, Nspin, QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

◆ multiBlasKernel()

template<typename FloatN , int M, int NXZ, typename Arg >

__global__ void quda::blas::multiBlasKernel ( Arg arg_ )

Generic multi-blas kernel with four loads and up to four stores.

Parameters

[in,out] arg Argument struct with required meta data (input/output fields, functor, etc.)

Definition at line 73 of file multi_blas_core.cuh.

References quda::arg(), and parity.

Here is the call graph for this function:

◆ multiReduce() [1/3]

template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename yType , int M, int NXZ, template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , typename T >

void quda::blas::multiReduce	(	doubleN	result[],
		const coeff_array< T > &	a,
		const coeff_array< T > &	b,
		const coeff_array< T > &	c,
		std::vector< ColorSpinorField *> &	x,
		std::vector< ColorSpinorField *> &	y,
		std::vector< ColorSpinorField *> &	z,
		std::vector< ColorSpinorField *> &	w,
		int	length
	)

Definition at line 275 of file multi_reduce_quda.cu.

References Amatrix_d, Amatrix_h, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::apply(), Bmatrix_d, Bmatrix_h, bytes, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::bytes(), checkCudaError, quda::checkSpinor(), Cmatrix_d, Cmatrix_h, quda::blas::coeff_array< T >::data, errorQuda, flops, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::flops(), getStream(), MAX_MATRIX_SIZE, MAX_MULTI_BLAS_N, memset(), QUDA_MAX_MULTI_REDUCE, quda::reduce(), SpinorTexture< RegType, StoreType, N >::set(), Spinor< RegType, StoreType, N, write >::set(), quda::blas::coeff_array< T >::use_const, quda::blas::write< writeX, writeY, writeZ, writeW >::W, warningQuda, quda::blas::write< writeX, writeY, writeZ, writeW >::X, quda::blas::write< writeX, writeY, writeZ, writeW >::Y, and quda::blas::write< writeX, writeY, writeZ, writeW >::Z.

Here is the call graph for this function:

◆ multiReduce() [2/3]

template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T >

void quda::blas::multiReduce	(	doubleN	result[],
		const coeff_array< T > &	a,
		const coeff_array< T > &	b,
		const coeff_array< T > &	c,
		CompositeColorSpinorField &	x,
		CompositeColorSpinorField &	y,
		CompositeColorSpinorField &	z,
		CompositeColorSpinorField &	w
	)

Driver for multi-reduce with up to four vectors

Definition at line 385 of file multi_reduce_quda.cu.

References checkPrecision, errorQuda, Nspin, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

◆ multiReduce() [3/3]

template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal , bool siteUnroll, typename T >

void quda::blas::multiReduce	(	doubleN	result[],
		const coeff_array< T > &	a,
		const coeff_array< T > &	b,
		const coeff_array< T > &	c,
		CompositeColorSpinorField &	x,
		CompositeColorSpinorField &	y,
		CompositeColorSpinorField &	z,
		CompositeColorSpinorField &	w,
		int	i,
		int	j
	)

Definition at line 598 of file multi_reduce_quda.cu.

◆ multiReduce_recurse()

template<template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal >

void quda::blas::multiReduce_recurse	(	Complex *	result,
		std::vector< ColorSpinorField *> &	x,
		std::vector< ColorSpinorField *> &	y,
		std::vector< ColorSpinorField *> &	z,
		std::vector< ColorSpinorField *> &	w,
		int	i_idx,
		int	j_idx,
		bool	hermitian,
		unsigned int	tile_size
	)

Definition at line 706 of file multi_reduce_quda.cu.

References quda::count.

◆ multiReduceKernel()

template<int block_size, typename ReduceType , typename FloatN , int M, int NXZ, typename Arg >

__global__ void quda::blas::multiReduceKernel ( Arg arg_ )

Definition at line 79 of file multi_reduce_core.cuh.

References quda::arg(), parity, and quda::sum().

Referenced by multiReduceLaunch().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ multiReduceLaunch()

template<typename doubleN , typename ReduceType , typename FloatN , int M, int NXZ, typename Arg >

void quda::blas::multiReduceLaunch	(	doubleN	result[],
		Arg &	arg,
		const TuneParam &	tp,
		const cudaStream_t &	stream,
		Tunable &	tunable
	)

Definition at line 36 of file multi_reduce_quda.cu.

References arg_buffer, quda::TuneParam::block, commAsyncReduction(), completeFastReduce(), deviceProp, errorQuda, getFastReduce(), getHostReduceBuffer(), getMappedHostReduceBuffer(), getReduceEvent(), getStream(), quda::TuneParam::grid, initFastReduce(), quda::Tunable::jitifyError(), LAUNCH_KERNEL_LOCAL_PARITY, multiReduceKernel(), quda::qudaEventQuery(), quda::qudaEventRecord(), qudaMemcpy, quda::TuneParam::shared_bytes, and quda::sum().

Here is the call graph for this function:

◆ mxpy()

void quda::blas::mxpy	(	ColorSpinorField &	x,
		ColorSpinorField &	y
	)

inline

Definition at line 34 of file blas_quda.h.

References axpbyz().

Referenced by benchmark(), invert_test(), main(), and test().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ nativeBlas()

template<typename RegType , typename StoreType , typename yType , int M, template< typename, typename > class Functor, int writeX, int writeY, int writeZ, int writeW, int writeV>

void quda::blas::nativeBlas	(	const double2 &	a,
		const double2 &	b,
		const double2 &	c,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		ColorSpinorField &	w,
		ColorSpinorField &	v,
		int	length
	)

Definition at line 149 of file blas_quda.cu.

References quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::apply(), bytes, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::bytes(), checkCudaError, quda::checkLength(), flops, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::flops(), V, X, and Z.

Here is the call graph for this function:

◆ nativeReduce()

template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename zType , int M, template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV>

doubleN quda::blas::nativeReduce	(	const double2 &	a,
		const double2 &	b,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		ColorSpinorField &	w,
		ColorSpinorField &	v,
		int	length
	)

Definition at line 297 of file reduce_quda.cu.

References bytes, checkCudaError, quda::checkLength(), flops, getStream(), quda::reduce(), V, X, and Z.

Referenced by mixed_reduce(), and uni_reduce().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ norm1()

double quda::blas::norm1 ( const ColorSpinorField & b )

Definition at line 714 of file reduce_quda.cu.

Referenced by getLambdaMax(), getRealBidiagMatrix(), quda::norm1(), and xpayz().

Here is the caller graph for this function:

◆ norm1_() [1/3]

template<typename ReduceType >

__device__ __host__ ReduceType quda::blas::norm1_ ( const double2 & a )

Return the L1 norm of x

Definition at line 100 of file reduce_core.cuh.

References quda::sqrt().

Here is the call graph for this function:

◆ norm1_() [2/3]

template<typename ReduceType >

__device__ __host__ ReduceType quda::blas::norm1_ ( const float2 & a )

Definition at line 105 of file reduce_core.cuh.

References quda::sqrt().

Here is the call graph for this function:

◆ norm1_() [3/3]

template<typename ReduceType >

__device__ __host__ ReduceType quda::blas::norm1_ ( const float4 & a )

Definition at line 110 of file reduce_core.cuh.

References quda::sqrt().

Here is the call graph for this function:

◆ norm2()

double quda::blas::norm2 ( const ColorSpinorField & a )

Definition at line 721 of file reduce_quda.cu.

Here is the caller graph for this function:

◆ norm2_() [1/3]

template<typename ReduceType >

__device__ __host__ void quda::blas::norm2_	(	ReduceType &	sum,
		const double2 &	a
	)

Return the L2 norm of x

Definition at line 129 of file reduce_core.cuh.

◆ norm2_() [2/3]

template<typename ReduceType >

__device__ __host__ void quda::blas::norm2_	(	ReduceType &	sum,
		const float2 &	a
	)

Definition at line 135 of file reduce_core.cuh.

◆ norm2_() [3/3]

template<typename ReduceType >

__device__ __host__ void quda::blas::norm2_	(	ReduceType &	sum,
		const float4 &	a
	)

Definition at line 141 of file reduce_core.cuh.

◆ quadrupleCG3InitNorm()

double quda::blas::quadrupleCG3InitNorm	(	double	a,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		ColorSpinorField &	w,
		ColorSpinorField &	v
	)

Definition at line 838 of file reduce_quda.cu.

Referenced by cDotProductNormB(), and quda::CG3::operator()().

Here is the caller graph for this function:

◆ quadrupleCG3UpdateNorm()

double quda::blas::quadrupleCG3UpdateNorm	(	double	a,
		double	b,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		ColorSpinorField &	w,
		ColorSpinorField &	v
	)

Definition at line 843 of file reduce_quda.cu.

Referenced by cDotProductNormB(), and quda::CG3::operator()().

Here is the caller graph for this function:

◆ quadrupleCGReduction()

double4 quda::blas::quadrupleCGReduction	(	ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

Definition at line 833 of file reduce_quda.cu.

Referenced by cDotProductNormB(), and quda::CG::operator()().

Here is the caller graph for this function:

◆ reDotProduct() [1/2]

double quda::blas::reDotProduct	(	ColorSpinorField &	x,
		ColorSpinorField &	y
	)

Definition at line 728 of file reduce_quda.cu.

Referenced by benchmark(), quda::CG::blocksolve(), cDotProductNormB(), quda::MPCG::computeMatrixPowers(), quda::IncEigCG::eigCGsolve(), quda::TRLM::lanczosStep(), quda::CG::operator()(), quda::CG3::operator()(), quda::PreconCG::operator()(), quda::MultiShiftCG::operator()(), test(), and xpayz().

Here is the caller graph for this function:

◆ reDotProduct() [2/2]

void quda::blas::reDotProduct	(	double *	result,
		std::vector< ColorSpinorField *> &	a,
		std::vector< ColorSpinorField *> &	b
	)

Definition at line 622 of file multi_reduce_quda.cu.

References errorQuda, and reduceDoubleArray().

Here is the call graph for this function:

◆ reduceKernel()

template<int block_size, typename ReduceType , typename FloatN , int M, typename Arg >

__global__ void quda::blas::reduceKernel ( Arg arg )

Generic reduction kernel with up to four loads and three saves.

Definition at line 44 of file reduce_core.cuh.

References quda::arg(), parity, quda::sum(), and zero().

Referenced by reduceLaunch().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ reduceLaunch()

template<typename doubleN , typename ReduceType , typename FloatN , int M, typename Arg >

doubleN quda::blas::reduceLaunch	(	Arg &	arg,
		const TuneParam &	tp,
		const cudaStream_t &	stream,
		Tunable &	tunable
	)

Generic reduction kernel launcher

Definition at line 139 of file reduce_quda.cu.

References quda::TuneParam::block, commAsyncReduction(), completeFastReduce(), deviceProp, errorQuda, getFastReduce(), quda::TuneParam::grid, h_reduce, hd_reduce, initFastReduce(), quda::Tunable::jitifyError(), LAUNCH_KERNEL, quda::qudaEventQuery(), quda::qudaEventRecord(), qudaMemcpy, reduceEnd, reduceKernel(), quda::TuneParam::shared_bytes, and quda::sum().

Here is the call graph for this function:

◆ setParam()

void quda::blas::setParam	(	int	kernel,
		int	prec,
		int	threads,
		int	blocks
	)

◆ tripleCGReduction()

double3 quda::blas::tripleCGReduction	(	ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

Definition at line 828 of file reduce_quda.cu.

Referenced by benchmark(), cDotProductNormB(), quda::CG::operator()(), and test().

Here is the caller graph for this function:

◆ tripleCGUpdate()

void quda::blas::tripleCGUpdate	(	double	alpha,
		double	beta,
		ColorSpinorField &	q,
		ColorSpinorField &	r,
		ColorSpinorField &	x,
		ColorSpinorField &	p
	)

Definition at line 614 of file blas_quda.cu.

References quda::LatticeField::Precision(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::w, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.

Referenced by benchmark(), quda::CG::operator()(), test(), and xpayz().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ uni_blas()

template<template< typename Float, typename FloatN > class Functor, int writeX = 0, int writeY = 0, int writeZ = 0, int writeW = 0, int writeV = 0>

void quda::blas::uni_blas	(	const double2 &	a,
		const double2 &	b,
		const double2 &	c,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		ColorSpinorField &	w,
		ColorSpinorField &	v
	)

Driver for generic blas routine with four loads and two store. All fields must have matching precisions.

Definition at line 185 of file blas_quda.cu.

References checkLocation, checkPrecision, errorQuda, quda::ColorSpinorField::FieldOrder(), quda::ColorSpinorField::isNative(), quda::ColorSpinorField::Length(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_FIELD_ORDER, QUDA_FLOAT4_FIELD_ORDER, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::v, quda::ColorSpinorField::Volume(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::w, warningQuda, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::x, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::y, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::z.

Here is the call graph for this function:

◆ uni_reduce()

template<typename doubleN , typename ReduceType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll>

doubleN quda::blas::uni_reduce	(	const double2 &	a,
		const double2 &	b,
		ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	z,
		ColorSpinorField &	w,
		ColorSpinorField &	v
	)

Driver for generic reduction routine with five loads.

Parameters

ReduceType
siteUnroll	- if this is true, then one site corresponds to exactly one thread

Definition at line 349 of file reduce_quda.cu.

References checkLocation, checkPrecision, errorQuda, quda::ColorSpinorField::FieldOrder(), genericReduce(), quda::ColorSpinorField::isNative(), quda::ColorSpinorField::Length(), nativeReduce(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_FIELD_ORDER, QUDA_FLOAT4_FIELD_ORDER, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, quda::ColorSpinorField::RealLength(), reduceDoubleArray(), quda::ColorSpinorField::Volume(), warningQuda, and zero().

Here is the call graph for this function:

◆ xmyNorm()

double quda::blas::xmyNorm	(	ColorSpinorField &	x,
		ColorSpinorField &	y
	)

inline

Definition at line 75 of file blas_quda.h.

References axpbyzNorm(), cDotProduct(), and cDotProductNormA().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ xpay()

void quda::blas::xpay	(	ColorSpinorField &	x,
		double	a,
		ColorSpinorField &	y
	)

inline

Definition at line 37 of file blas_quda.h.

References axpbyz().

Referenced by quda::ApplyNdegTwistedMassPreconditioned(), quda::ApplyTwistedCloverPreconditioned(), quda::ApplyTwistedMassPreconditioned(), benchmark(), quda::CG::blocksolve(), clover_mat(), clover_matpc(), quda::dslash5(), quda::dslash5inv(), quda::DiracCoarsePC::DslashXpay(), dw_4d_mat(), dw_4d_matpc(), dw_mat(), dw_matpc(), quda::IncEigCG::initCGsolve(), quda::Dslash< Float >::instantiate(), invert_test(), quda::TwistedMassLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::WilsonCloverPreconditionedLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::WilsonCloverLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::NdegTwistedMassLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::TwistedCloverLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::TwistedCloverPreconditionedLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), quda::Dslash< Float >::launch(), quda::DiracDomainWall4D::M(), quda::DiracCoarsePC::M(), quda::PreconCG::operator()(), quda::CACG::operator()(), quda::CAGCR::operator()(), quda::MultiShiftCG::operator()(), quda::DiracCoarsePC::prepare(), quda::DiracCoarsePC::reconstruct(), quda::Deflation::reduce(), quda::IncEigCG::RestartVT(), staggeredDslashRef(), test(), tm_mat(), tm_matpc(), tm_ndeg_mat(), tm_ndeg_matpc(), tmc_mat(), tmc_matpc(), quda::Deflation::verify(), wil_mat(), and wil_matpc().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ xpayz()

void quda::blas::xpayz	(	ColorSpinorField &	x,
		double	a,
		ColorSpinorField &	y,
		ColorSpinorField &	z
	)

inline

Definition at line 38 of file blas_quda.h.

References axpbyz(), axpbyzNorm(), axpyBzpcx(), axpyReDot(), axpyZpbx(), cabxpyAx(), caxpby(), caxpbypczw(), caxpbypzYmbw(), caxpy(), caxpyBxpz(), caxpyBzpx(), caxpyXmaz(), caxpyXmazMR(), cxpaypbz(), doubleCG3Init(), doubleCG3Update(), norm1(), norm2(), reDotProduct(), and tripleCGUpdate().

Referenced by quda::CG::operator()().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ xpy()

void quda::blas::xpy	(	ColorSpinorField &	x,
		ColorSpinorField &	y
	)

inline

Definition at line 33 of file blas_quda.h.

References axpbyz().

Referenced by benchmark(), quda::CG::blocksolve(), quda::MG::buildFreeVectors(), quda::IncEigCG::eigCGsolve(), quda::MG::operator()(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::CGNE::operator()(), quda::PreconCG::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::CACGNE::operator()(), quda::MultiShiftCG::operator()(), quda::IncEigCG::operator()(), quda::GMResDR::operator()(), and test().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ xpyHeavyQuarkResidualNorm()

double3 quda::blas::xpyHeavyQuarkResidualNorm	(	ColorSpinorField &	x,
		ColorSpinorField &	y,
		ColorSpinorField &	r
	)

Definition at line 818 of file reduce_quda.cu.

References comm_size(), and quda::ColorSpinorField::Ncolor().

Referenced by benchmark(), quda::CG::blocksolve(), cDotProductNormB(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::BiCGstab::operator()(), quda::BiCGstabL::operator()(), and test().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ zero()

void quda::blas::zero ( ColorSpinorField & a )

Definition at line 472 of file blas_quda.cu.

References endReduce(), and initReduce().

Here is the call graph for this function:

Here is the caller graph for this function:

Variable Documentation

◆ Amatrix_d [1/2]

__constant__ signed char quda::blas::Amatrix_d[MAX_MATRIX_SIZE]

static

Definition at line 17 of file multi_blas_core.cuh.

Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), multiReduce(), quda::blas::multicaxpy_< NXZ, Float2, FloatN >::operator()(), quda::blas::multicaxpyz_< NXZ, Float2, FloatN >::operator()(), and quda::blas::multi_caxpyBxpz_< NXZ, Float2, FloatN >::operator()().

◆ Amatrix_d [2/2]

__constant__ signed char quda::blas::Amatrix_d[MAX_MATRIX_SIZE]

static

Definition at line 20 of file multi_reduce_core.cuh.

◆ Amatrix_h [1/2]

signed char* quda::blas::Amatrix_h

static

Definition at line 21 of file multi_blas_core.cuh.

Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::MultiBlas(), multiReduce(), quda::blas::multicaxpy_< NXZ, Float2, FloatN >::operator()(), quda::blas::multicaxpyz_< NXZ, Float2, FloatN >::operator()(), and quda::blas::multi_caxpyBxpz_< NXZ, Float2, FloatN >::operator()().

◆ Amatrix_h [2/2]

signed char* quda::blas::Amatrix_h

static

Definition at line 24 of file multi_reduce_core.cuh.

◆ arg_buffer [1/2]

__constant__ signed char quda::blas::arg_buffer[MAX_MATRIX_SIZE]

static

Definition at line 29 of file multi_blas_core.cuh.

Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), and multiReduceLaunch().

◆ arg_buffer [2/2]

__constant__ signed char quda::blas::arg_buffer[MAX_MATRIX_SIZE]

static

Definition at line 32 of file multi_reduce_core.cuh.

◆ blasStream

cudaStream_t* quda::blas::blasStream

static

Definition at line 25 of file blas_quda.cu.

Referenced by getStream().

◆ Bmatrix_d [1/2]

__constant__ signed char quda::blas::Bmatrix_d[MAX_MATRIX_SIZE]

static

Definition at line 18 of file multi_blas_core.cuh.

Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), multiReduce(), and quda::blas::multi_caxpyBxpz_< NXZ, Float2, FloatN >::operator()().

◆ Bmatrix_d [2/2]

__constant__ signed char quda::blas::Bmatrix_d[MAX_MATRIX_SIZE]

static

Definition at line 21 of file multi_reduce_core.cuh.

◆ Bmatrix_h [1/2]

signed char* quda::blas::Bmatrix_h

static

Definition at line 22 of file multi_blas_core.cuh.

Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::MultiBlas(), multiReduce(), and quda::blas::multi_caxpyBxpz_< NXZ, Float2, FloatN >::operator()().

◆ Bmatrix_h [2/2]

signed char* quda::blas::Bmatrix_h

static

Definition at line 25 of file multi_reduce_core.cuh.

◆ bytes

unsigned long long quda::blas::bytes

◆ Cmatrix_d [1/2]

__constant__ signed char quda::blas::Cmatrix_d[MAX_MATRIX_SIZE]

static

Definition at line 19 of file multi_blas_core.cuh.

Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), and multiReduce().

◆ Cmatrix_d [2/2]

__constant__ signed char quda::blas::Cmatrix_d[MAX_MATRIX_SIZE]

static

Definition at line 22 of file multi_reduce_core.cuh.

◆ Cmatrix_h [1/2]

signed char* quda::blas::Cmatrix_h

static

Definition at line 23 of file multi_blas_core.cuh.

Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::MultiBlas(), and multiReduce().

◆ Cmatrix_h [2/2]

signed char* quda::blas::Cmatrix_h

static

Definition at line 26 of file multi_reduce_core.cuh.

◆ flops

unsigned long long quda::blas::flops

Definition at line 22 of file blas_quda.cu.

Namespaces

Classes

Functions

Variables

Function Documentation

◆ _caxpby() [1/3]

◆ _caxpby() [2/3]

◆ _caxpby() [3/3]

◆ _caxpy() [1/3]

◆ _caxpy() [2/3]

◆ _caxpy() [3/3]

◆ ax()

◆ axpby()

◆ axpbyz()

◆ axpbyzNorm()

◆ axpy()

◆ axpyBzpcx() [1/2]

◆ axpyBzpcx() [2/2]

◆ axpyCGNorm()

◆ axpyNorm()

◆ axpyReDot()

◆ axpyZpbx()

◆ blasKernel()

◆ cabxpyAx()

◆ cabxpyzAxNorm()

◆ caxpby()

◆ caxpbypczw()

◆ caxpbypzYmbw()

◆ caxpbypzYmbwcDotProductUYNormY()

◆ caxpy() [1/3]

◆ caxpy() [2/3]

◆ caxpy() [3/3]

◆ Caxpy_() [1/3]

◆ Caxpy_() [2/3]

◆ Caxpy_() [3/3]

◆ caxpy_L() [1/2]

◆ caxpy_L() [2/2]

◆ caxpy_recurse()

◆ caxpy_U() [1/2]

◆ caxpy_U() [2/2]

◆ caxpyBxpz() [1/2]

◆ caxpyBxpz() [2/2]

◆ caxpyBzpx()

◆ caxpyDotzy()

◆ caxpyNorm()

◆ caxpyXmaz()

◆ caxpyXmazMR()

◆ caxpyXmazNormX()

◆ caxpyz() [1/2]

◆ caxpyz() [2/2]

◆ caxpyz_L() [1/2]

◆ caxpyz_L() [2/2]

◆ caxpyz_recurse()

◆ caxpyz_U() [1/2]

◆ caxpyz_U() [2/2]

◆ cdot_() [1/3]

◆ cdot_() [2/3]

◆ cdot_() [3/3]

◆ cdotNormA_()

◆ cdotNormB_()

◆ cDotProduct() [1/2]

◆ cDotProduct() [2/2]

◆ cDotProductCopy()

◆ cDotProductNormA()

◆ cDotProductNormB()

◆ completeFastReduce()

◆ copy()

◆ cxpaypbz()

◆ dot_() [1/3]

◆ dot_() [2/3]

◆ dot_() [3/3]

◆ doubleCG3Init()

◆ doubleCG3InitNorm()

◆ doubleCG3Update()

◆ doubleCG3UpdateNorm()

◆ end()

◆ endReduce()

◆ getDeviceReduceBuffer()

◆ getFastReduce()

◆ getHostReduceBuffer()