Namespaces
	asym_clover

	clover

	copy

	domainwall

	domainwall4d

	dslash

	dslash_aux

	fatlink

	fermion_force

	fermionforce

	gaugeforce

	improvedstaggered

	mobius

	ndegtwisted

	pack

	reduce

	staggered

	twisted

	twistedclover

	wilson

Classes
struct	CloverFieldParam

class	CloverField

class	cudaCloverField

class	cpuCloverField

struct	FullClover

struct	FloatNOrder

struct	QDPOrder

struct	QDPJITOrder

struct	BQCDOrder

class	ColorSpinorParam

class	ColorSpinorField

class	cudaColorSpinorField

class	ColorSpinorFieldOrder

class	SpaceColorSpinOrder

class	SpaceSpinColorOrder

class	QOPDomainWallOrder

class	cpuColorSpinorField

struct	SpaceColorSpinorOrder

struct	SpaceSpinorColorOrder

struct	QDPJITDiracOrder

struct	complex

struct	norm_type

struct	norm_type< complex< T > >

struct	complex< float >

struct	complex< double >

class	DiracParam

class	Dirac

class	DiracWilson

class	DiracWilsonPC

class	DiracClover

class	DiracCloverPC

class	DiracDomainWall

class	DiracDomainWallPC

class	DiracDomainWall4DPC

class	DiracMobiusDomainWallPC

class	DiracTwistedMass

class	DiracTwistedMassPC

class	DiracTwistedClover

class	DiracTwistedCloverPC

class	DiracStaggered

class	DiracStaggeredPC

class	DiracImprovedStaggered

class	DiracImprovedStaggeredPC

class	DiracMatrix

class	DiracM

class	DiracMdagM

class	DiracMMdag

class	DiracMdag

class	EigParam

class	FaceBuffer

struct	GaugeFieldParam

class	GaugeField

class	cudaGaugeField

class	cpuGaugeField

struct	Reconstruct

struct	Reconstruct< 19, Float >

struct	Reconstruct< 12, Float >

struct	Reconstruct< 11, Float >

struct	Reconstruct< 13, Float >

struct	Reconstruct< 8, Float >

struct	Reconstruct< 9, Float >

struct	LegacyOrder

struct	MILCOrder

struct	CPSOrder

struct	TIFROrder

struct	kernel_param_s

struct	SolverParam

class	Solver

class	CG

class	MPCG

class	PreconCG

class	BiCGstab

class	SimpleBiCGstab

class	MPBiCGstab

class	GCR

class	MR

class	SD

class	XSD

class	alphaSA

class	MultiShiftSolver

class	MultiShiftCG

class	MinResExt

class	DeflatedSolver

class	IncEigCG

class	Eig_Solver

class	Lanczos

class	ImpRstLanczos

struct	LatticeFieldParam

class	LatticeField

struct	llfat_kernel_param_s

struct	Timer

struct	TimeProfile

struct	mapper

struct	mapper< double >

struct	mapper< float >

struct	mapper< short >

struct	mapper< double2 >

struct	mapper< float2 >

struct	mapper< short2 >

struct	mapper< double4 >

struct	mapper< float4 >

struct	mapper< short4 >

struct	isHalf

struct	isHalf< short >

struct	Trig

struct	Trig< true >

class	RitzMat

struct	TuneKey

class	TuneParam

class	Tunable

struct	axpby

struct	xpy

struct	axpy

struct	xpay

struct	mxpy

struct	ax

struct	caxpy

struct	caxpby

struct	cxpaypbz

struct	axpyBzpcx

struct	axpyZpbx

struct	caxpbypzYmbw

struct	cabxpyAx

struct	caxpbypz

struct	caxpbypczpw

struct	caxpyxmaz

struct	tripleCGUpdate

class	Gamma5Cuda

class	ContractCuda

class	PreserveBasis

struct	NonRelBasis

struct	RelBasis

struct	ChiralToNonRelBasis

struct	NonRelToChiralBasis

class	PackSpinor

struct	CopyGaugeExArg

class	CopyGaugeEx

struct	CopyGaugeArg

class	CopyGauge

struct	RealType

struct	RealType< double2 >

struct	RealType< float2 >

struct	RealType< float4 >

struct	RealType< short2 >

struct	RealType< short4 >

class	CloverCuda

class	TwistGamma5Cuda

class	TwistCloverGamma5Cuda

struct	CopySpinorExArg

class	CopySpinorEx

struct	ExtractGhostArg

class	ExtractGhost

struct	ExtractGhostExArg

class	ExtractGhostEx

class	GaugeForceCuda

struct	DeflationParam

class	EigCGArgs

struct	KSForceArg

class	KSForceComplete

struct	KSLongLinkArg

class	KSLongLinkForce

class	MemAlloc

struct	GhostStapleParam

struct	ComplexTypeId

struct	ComplexTypeId< float >

struct	ComplexTypeId< double >

struct	RealTypeId

struct	RealTypeId< float >

struct	RealTypeId< double >

struct	RealTypeId< float2 >

struct	RealTypeId< double2 >

struct	PromoteTypeId

struct	PromoteTypeId< float2, float >

struct	PromoteTypeId< float, float2 >

struct	PromoteTypeId< double2, double >

struct	PromoteTypeId< double, double2 >

struct	PromoteTypeId< double, int >

struct	PromoteTypeId< int, double >

struct	PromoteTypeId< float, int >

struct	PromoteTypeId< int, float >

struct	Zero

struct	Identity

class	Matrix

class	Array

struct	ReduceFunctor

struct	Norm2

struct	Dot

struct	DotNormA

struct	axpyNorm2

struct	xmyNorm2

struct	caxpyNorm2

struct	caxpyxmaznormx

struct	cabxpyaxnorm

struct	Cdot

struct	xpaycdotzy

struct	caxpydotzy

struct	CdotNormA

struct	CdotNormB

struct	caxpbypzYmbwcDotProductUYNormY

struct	axpyCGNorm2

struct	tripleCGReduction

struct	ShiftColorSpinorFieldArg

class	ShiftColorSpinorField

Typedefs
typedef std::complex< double >	Complex

typedef struct quda::kernel_param_s	kernel_param_t

typedef struct quda::llfat_kernel_param_s	llfat_kernel_param_t

typedef std::map< TuneKey, TuneParam >	map

Functions
void	initBlas ()

void	endBlas (void)

void	setBlasParam (int kernel, int prec, int threads, int blocks)

double	norm2 (const ColorSpinorField &)

void	zeroCuda (cudaColorSpinorField &a)

void	copyCuda (cudaColorSpinorField &dst, const cudaColorSpinorField &src)

double	axpyNormCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)

double	normCuda (const cudaColorSpinorField &b)

double	reDotProductCuda (cudaColorSpinorField &a, cudaColorSpinorField &b)

void	reDotProductCuda (double result, std::vector< cudaColorSpinorField > &a, std::vector< cudaColorSpinorField * > &b)

double	xmyNormCuda (cudaColorSpinorField &a, cudaColorSpinorField &b)

double2	reDotProductNormACuda (cudaColorSpinorField &a, cudaColorSpinorField &b)

void	axpbyCuda (const double &a, cudaColorSpinorField &x, const double &b, cudaColorSpinorField &y)

void	axpyCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)

void	axCuda (const double &a, cudaColorSpinorField &x)

void	xpyCuda (cudaColorSpinorField &x, cudaColorSpinorField &y)

void	xpayCuda (cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y)

void	mxpyCuda (cudaColorSpinorField &x, cudaColorSpinorField &y)

void	axpyZpbxCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, const double &b)

void	axpyBzpcxCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, const double &b, cudaColorSpinorField &z, const double &c)

void	caxpbyCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y)

void	caxpyCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y)

void	cxpaypbzCuda (cudaColorSpinorField &, const Complex &b, cudaColorSpinorField &y, const Complex &c, cudaColorSpinorField &z)

void	caxpbypzYmbwCuda (const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &, cudaColorSpinorField &)

Complex	cDotProductCuda (cudaColorSpinorField &, cudaColorSpinorField &)

void	cDotProductCuda (Complex result, std::vector< cudaColorSpinorField > &a, std::vector< cudaColorSpinorField * > &b)

Complex	xpaycDotzyCuda (cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y, cudaColorSpinorField &z)

double3	cDotProductNormACuda (cudaColorSpinorField &a, cudaColorSpinorField &b)

double3	cDotProductNormBCuda (cudaColorSpinorField &a, cudaColorSpinorField &b)

double3	caxpbypzYmbwcDotProductUYNormYCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w, cudaColorSpinorField &u)

void	cabxpyAxCuda (const double &a, const Complex &b, cudaColorSpinorField &x, cudaColorSpinorField &y)

double	caxpyNormCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y)

void	caxpyXmazCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)

double	caxpyXmazNormXCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)

double	cabxpyAxNormCuda (const double &a, const Complex &b, cudaColorSpinorField &x, cudaColorSpinorField &y)

void	caxpbypzCuda (const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &)

void	caxpbypczpwCuda (const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &)

Complex	caxpyDotzyCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)

Complex	axpyCGNormCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)

double3	HeavyQuarkResidualNormCuda (cudaColorSpinorField &x, cudaColorSpinorField &r)

double3	xpyHeavyQuarkResidualNormCuda (cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &r)

void	tripleCGUpdateCuda (const double &alpha, const double &beta, cudaColorSpinorField &q, cudaColorSpinorField &r, cudaColorSpinorField &x, cudaColorSpinorField &p)

double3	tripleCGReductionCuda (cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)

double	axpyNormCpu (const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)

double	normCpu (const cpuColorSpinorField &b)

double	reDotProductCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)

double	xmyNormCpu (const cpuColorSpinorField &a, cpuColorSpinorField &b)

void	axpbyCpu (const double &a, const cpuColorSpinorField &x, const double &b, cpuColorSpinorField &y)

void	axpyCpu (const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)

void	axCpu (const double &a, cpuColorSpinorField &x)

void	xpyCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y)

void	xpayCpu (const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y)

void	mxpyCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y)

void	axpyZpbxCpu (const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const cpuColorSpinorField &z, const double &b)

void	axpyBzpcxCpu (const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const double &b, const cpuColorSpinorField &z, const double &c)

void	caxpbyCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y)

void	caxpyCpu (const Complex &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)

void	cxpaypbzCpu (const cpuColorSpinorField &x, const Complex &b, const cpuColorSpinorField &y, const Complex &c, cpuColorSpinorField &z)

void	caxpbypzYmbwCpu (const Complex &, const cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &, const cpuColorSpinorField &)

Complex	cDotProductCpu (const cpuColorSpinorField &, const cpuColorSpinorField &)

Complex	xpaycDotzyCpu (const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y, const cpuColorSpinorField &z)

double3	cDotProductNormACpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)

double3	cDotProductNormBCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)

double3	caxpbypzYmbwcDotProductUYNormYCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z, const cpuColorSpinorField &w, const cpuColorSpinorField &u)

void	cabxpyAxCpu (const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)

double	caxpyNormCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y)

void	caxpyXmazCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)

double	caxpyXmazNormXCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)

double	cabxpyAxNormCpu (const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)

void	caxpbypzCpu (const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &)

void	caxpbypczpwCpu (const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &)

Complex	caxpyDotzyCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)

double3	HeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &r)

double3	xpyHeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &r)

std::ostream &	operator<< (std::ostream &output, const CloverFieldParam &param)

void	computeClover (CloverField &clover, const GaugeField &gauge, double coeff, QudaFieldLocation location)

void	computeCloverSigmaTrace (GaugeField &gauge, const CloverField &clover, int dir1, int dir2, QudaFieldLocation location)

void	copyGenericClover (CloverField &out, const CloverField &in, bool inverse, QudaFieldLocation location, void Out=0, void In=0, void outNorm=0, void inNorm=0)

void	cloverDerivative (cudaGaugeField &out, cudaGaugeField &gauge, cudaGaugeField &oprod, int mu, int nu, double coeff, QudaParity parity, int conjugate)

void	cloverInvert (CloverField &clover, bool computeTraceLog, QudaFieldLocation location)

void	copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, void Dst=0, void Src=0, void dstNorm=0, void srcNorm=0)

void	genericSource (cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c)

int	genericCompare (const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol)

void	genericPrintVector (cpuColorSpinorField &a, unsigned int x)

void	exchangeExtendedGhost (cudaColorSpinorField spinor, int R[], int parity, cudaStream_t stream_p)

void	copyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, const int parity, void Dst, void Src, void dstNorm, void srcNorm)

template<typename Float , int Ns, int Nc>
__device__ void	load_shared (typename mapper< Float >::type v[Ns Nc 2], Float *field, int x, int volume)

template<typename Float , int Ns, int Nc>
__device__ void	save_shared (Float field, const typename mapper< Float >::type v[Ns Nc *2], int x, int volumeCB)

template<typename ValueType >
__host__ __device__ ValueType	cos (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	sin (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	tan (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	acos (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	asin (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	atan (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	atan2 (ValueType x, ValueType y)

template<typename ValueType >
__host__ __device__ ValueType	cosh (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	sinh (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	tanh (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	exp (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	log (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	log10 (ValueType x)

template<typename ValueType , typename ExponentType >
__host__ __device__ ValueType	pow (ValueType x, ExponentType e)

template<typename ValueType >
__host__ __device__ ValueType	sqrt (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	abs (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	conj (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	abs (const complex< ValueType > &z)
	Returns the magnitude of z. More...

template<typename ValueType >
__host__ __device__ ValueType	arg (const complex< ValueType > &z)
	Returns the phase angle of z. More...

template<typename ValueType >
__host__ __device__ ValueType	norm (const complex< ValueType > &z)
	Returns the magnitude of z squared. More...

template<typename ValueType >
__host__ __device__ complex < ValueType >	conj (const complex< ValueType > &z)
	Returns the complex conjugate of z. More...

template<typename ValueType >
__host__ __device__ complex < ValueType >	polar (const ValueType &m, const ValueType &theta=0)
	Returns the complex with magnitude m and angle theta in radians. More...

template<typename ValueType >
__host__ __device__ complex < ValueType >	operator* (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex < ValueType >	operator* (const complex< ValueType > &lhs, const ValueType &rhs)

template<typename ValueType >
__host__ __device__ complex < ValueType >	operator* (const ValueType &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex < ValueType >	operator/ (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<>
__host__ __device__ complex < float >	operator/ (const complex< float > &lhs, const complex< float > &rhs)

template<>
__host__ __device__ complex < double >	operator/ (const complex< double > &lhs, const complex< double > &rhs)

template<typename ValueType >
__host__ __device__ complex < ValueType >	operator+ (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex < ValueType >	operator+ (const complex< ValueType > &lhs, const ValueType &rhs)

template<typename ValueType >
__host__ __device__ complex < ValueType >	operator+ (const ValueType &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex < ValueType >	operator- (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex < ValueType >	operator- (const complex< ValueType > &lhs, const ValueType &rhs)

template<typename ValueType >
__host__ __device__ complex < ValueType >	operator- (const ValueType &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex < ValueType >	operator+ (const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex < ValueType >	operator- (const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex < ValueType >	cos (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	cosh (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	exp (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	log (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	log10 (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	pow (const complex< ValueType > &z, const int &n)

template<typename ValueType >
__host__ __device__ complex < ValueType >	pow (const complex< ValueType > &z, const ValueType &x)

template<typename ValueType >
__host__ __device__ complex < ValueType >	pow (const complex< ValueType > &z, const complex< ValueType > &z2)

template<typename ValueType >
__host__ __device__ complex < ValueType >	pow (const ValueType &x, const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	sin (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	sinh (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	sqrt (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	tan (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	tanh (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	acos (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	asin (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	atan (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	acosh (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	asinh (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	atanh (const complex< ValueType > &z)

template<typename ValueType , class charT , class traits >
std::basic_ostream< charT, traits > &	operator<< (std::basic_ostream< charT, traits > &os, const complex< ValueType > &z)

template<typename ValueType , typename charT , class traits >
std::basic_istream< charT, traits > &	operator>> (std::basic_istream< charT, traits > &is, complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex < ValueType >	operator+ (const volatile complex< ValueType > &lhs, const volatile complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex < ValueType >	operator/ (const complex< ValueType > &lhs, const ValueType &rhs)

template<typename ValueType >
__host__ __device__ complex < ValueType >	operator/ (const ValueType &lhs, const complex< ValueType > &rhs)

template<>
__host__ __device__ complex < float >	operator/ (const float &lhs, const complex< float > &rhs)

template<>
__host__ __device__ complex < double >	operator/ (const double &lhs, const complex< double > &rhs)

template<typename ValueType >
__host__ __device__ bool	operator== (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ bool	operator== (const ValueType &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ bool	operator== (const complex< ValueType > &lhs, const ValueType &rhs)

template<typename ValueType >
__host__ __device__ bool	operator!= (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ bool	operator!= (const ValueType &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ bool	operator!= (const complex< ValueType > &lhs, const ValueType &rhs)

template<>
__host__ __device__ float	abs (const complex< float > &z)

template<>
__host__ __device__ double	abs (const complex< double > &z)

template<>
__host__ __device__ float	arg (const complex< float > &z)

template<>
__host__ __device__ double	arg (const complex< double > &z)

template<>
__host__ __device__ complex < float >	polar (const float &magnitude, const float &angle)

template<>
__host__ __device__ complex < double >	polar (const double &magnitude, const double &angle)

template<>
__host__ __device__ complex < float >	cos (const complex< float > &z)

template<>
__host__ __device__ complex < float >	cosh (const complex< float > &z)

template<>
__host__ __device__ complex < float >	exp (const complex< float > &z)

template<>
__host__ __device__ complex < float >	log (const complex< float > &z)

template<>
__host__ __device__ complex < float >	pow (const float &x, const complex< float > &exponent)

template<>
__host__ __device__ complex < float >	sin (const complex< float > &z)

template<>
__host__ __device__ complex < float >	sinh (const complex< float > &z)

template<typename ValueType >
__host__ __device__ complex < float >	sqrt (const complex< float > &z)

template<typename ValueType >
__host__ __device__ complex < float >	atanh (const complex< float > &z)

void	contractCuda (const cudaColorSpinorField &x, const cudaColorSpinorField &y, void *result, const QudaContractType contract_type, const QudaParity parity)

void	contractCuda (const cudaColorSpinorField &x, const cudaColorSpinorField &y, void *result, const QudaContractType contract_type, const int tSlice, const QudaParity parity)

void	gamma5Cuda (cudaColorSpinorField out, const cudaColorSpinorField in)

void	covDev (cudaColorSpinorField out, cudaGaugeField &gauge, const cudaColorSpinorField in, const int parity, const int mu, TimeProfile &profile)

void	setDiracParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)

void	setDiracSloppyParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)

void	setKernelPackT (bool pack)

bool	getKernelPackT ()

void	setTwistPack (bool pack)

bool	getTwistPack ()

void	setPackComms (const int *commDim)

bool	getDslashLaunch ()

void	createDslashEvents ()

void	destroyDslashEvents ()

void	wilsonDslashCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const cudaColorSpinorField in, const int oddBit, const int daggerBit, const cudaColorSpinorField x, const double &k, const int commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)

void	cloverDslashCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const FullClover cloverInv, const cudaColorSpinorField in, const int oddBit, const int daggerBit, const cudaColorSpinorField x, const double &k, const int commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)

void	asymCloverDslashCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const FullClover cloverInv, const cudaColorSpinorField in, const int oddBit, const int daggerBit, const cudaColorSpinorField x, const double &k, const int commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)

void	cloverCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const FullClover clover, const cudaColorSpinorField in, const int oddBit)

void	domainWallDslashCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const cudaColorSpinorField in, const int parity, const int dagger, const cudaColorSpinorField x, const double &m_f, const double &k, const int commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH)

void	domainWallDslashCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const cudaColorSpinorField in, const int parity, const int dagger, const cudaColorSpinorField x, const double &m_f, const double &k, const int commDim, const int DS_type, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)

void	MDWFDslashCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const cudaColorSpinorField in, const int parity, const int dagger, const cudaColorSpinorField x, const double &m_f, const double &k, const int commDim, const int DS_type, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)

void	staggeredDslashCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const cudaColorSpinorField in, const int parity, const int dagger, const cudaColorSpinorField x, const double &k, const int commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)

void	improvedStaggeredDslashCuda (cudaColorSpinorField out, const cudaGaugeField &fatGauge, const cudaGaugeField &longGauge, const cudaColorSpinorField in, const int parity, const int dagger, const cudaColorSpinorField x, const double &k, const int commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)

void	twistedMassDslashCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const cudaColorSpinorField in, const int parity, const int dagger, const cudaColorSpinorField x, const QudaTwistDslashType type, const double &kappa, const double &mu, const double &epsilon, const double &k, const int commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)

void	ndegTwistedMassDslashCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const cudaColorSpinorField in, const int parity, const int dagger, const cudaColorSpinorField x, const QudaTwistDslashType type, const double &kappa, const double &mu, const double &epsilon, const double &k, const int commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH)

void	twistedCloverDslashCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const FullClover clover, const FullClover cloverInv, const cudaColorSpinorField in, const int parity, const int dagger, const cudaColorSpinorField x, const QudaTwistCloverDslashType type, const double &kappa, const double &mu, const double &epsilon, const double &k, const int commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)

void	twistGamma5Cuda (cudaColorSpinorField out, const cudaColorSpinorField in, const int dagger, const double &kappa, const double &mu, const double &epsilon, const QudaTwistGamma5Type twist)
	ndeg tm: More...

void	twistCloverGamma5Cuda (cudaColorSpinorField out, const cudaColorSpinorField in, const int dagger, const double &kappa, const double &mu, const double &epsilon, const QudaTwistGamma5Type twist, const FullClover clov, const FullClover clovInv, const int parity)

void	packFace (void *ghost_buf, cudaColorSpinorField &in, const int nFace, const int dagger, const int parity, const int dim, const int face_num, const cudaStream_t &stream, const double a=0.0, const double b=0.0)

void	packFaceExtended (void *ghost_buf, cudaColorSpinorField &field, const int nFace, const int R[], const int dagger, const int parity, const int dim, const int face_num, const cudaStream_t &stream, const bool unpack=false)

void	packFace (void *ghost_buf, cudaColorSpinorField &in, FullClover &clov, FullClover &clovInv, const int nFace, const int dagger, const int parity, const int dim, const int face_num, const cudaStream_t &stream, const double a=0.0)

void	loadLinkToGPU (cudaGaugeField cudaGauge, cpuGaugeField cpuGauge, QudaGaugeParam *param)

void	loadLinkToGPU_ex (cudaGaugeField cudaGauge, cpuGaugeField cpuGauge)

void	loadLinkToGPU_gf (cudaGaugeField cudaGauge, cpuGaugeField cpuGauge, QudaGaugeParam *param)

void	storeLinkToCPU (cpuGaugeField cpuGauge, cudaGaugeField cudaGauge, QudaGaugeParam *param)

void	packGhostStaple (int X, void even, void odd, int volume, QudaPrecision prec, int stride, int dir, int whichway, void fwd_nbr_buf_gpu, void back_nbr_buf_gpu, void fwd_nbr_buf, void back_nbr_buf, cudaStream_t stream)

void	unpackGhostStaple (int X, void _even, void _odd, int volume, QudaPrecision prec, int stride, int dir, int whichway, void fwd_nbr_buf, void back_nbr_buf, cudaStream_t stream)

void	pack_ghost_all_staples_cpu (void staple, void cpuGhostStapleBack, void cpuGhostStapleFwd, int nFace, QudaPrecision precision, int X)

void	pack_ghost_all_links (void cpuLink, void cpuGhostBack, void *cpuGhostFwd, int dir, int nFace, QudaPrecision precision, int X)

void	pack_gauge_diag (void buf, int X, void **sitelink, int nu, int mu, int dir1, int dir2, QudaPrecision prec)

void	fermion_force_init_cuda (QudaGaugeParam *param)

void	fermion_force_cuda (double eps, double weight1, double weight2, void act_path_coeff, FullHw cudaHw, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaMom, QudaGaugeParam param)

__host__ __device__ double2	operator+ (const double2 &x, const double2 &y)

__host__ __device__ double2	operator- (const double2 &x, const double2 &y)

__host__ __device__ float2	operator- (const float2 &x, const float2 &y)

__host__ __device__ float4	operator- (const float4 &x, const float4 &y)

__host__ __device__ double3	operator+ (const double3 &x, const double3 &y)

__host__ __device__ float4	operator* (const float a, const float4 x)

__host__ __device__ float2	operator* (const float a, const float2 x)

__host__ __device__ double2	operator* (const double a, const double2 x)

__host__ __device__ double4	operator* (const double a, const double4 x)

__host__ __device__ float2	operator+ (const float2 x, const float2 y)

__host__ __device__ float4	operator+ (const float4 x, const float4 y)

__host__ __device__ float4	operator+= (float4 &x, const float4 y)

__host__ __device__ float2	operator+= (float2 &x, const float2 y)

__host__ __device__ double2	operator+= (double2 &x, const double2 y)

__host__ __device__ double3	operator+= (double3 &x, const double3 y)

__host__ __device__ float4	operator-= (float4 &x, const float4 y)

__host__ __device__ float2	operator-= (float2 &x, const float2 y)

__host__ __device__ double2	operator-= (double2 &x, const double2 y)

__host__ __device__ float2	operator*= (float2 &x, const float a)

__host__ __device__ double2	operator*= (double2 &x, const float a)

__host__ __device__ float4	operator*= (float4 &a, const float &b)

__host__ __device__ double2	operator*= (double2 &a, const double &b)

__host__ __device__ double4	operator*= (double4 &a, const double &b)

__host__ __device__ float2	operator- (const float2 &x)

__host__ __device__ double2	operator- (const double2 &x)

__forceinline__ __host__ __device__ float	max_fabs (const float4 &c)

__forceinline__ __host__ __device__ float	max_fabs (const float2 &b)

__forceinline__ __host__ __device__ double	max_fabs (const double4 &c)

__forceinline__ __host__ __device__ double	max_fabs (const double2 &b)

__forceinline__ __host__ __device__ float2	make_FloatN (const double2 &a)

__forceinline__ __host__ __device__ float4	make_FloatN (const double4 &a)

__forceinline__ __host__ __device__ double2	make_FloatN (const float2 &a)

__forceinline__ __host__ __device__ double4	make_FloatN (const float4 &a)

__forceinline__ __host__ __device__ short4	make_shortN (const float4 &a)

__forceinline__ __host__ __device__ short2	make_shortN (const float2 &a)

__forceinline__ __host__ __device__ short4	make_shortN (const double4 &a)

__forceinline__ __host__ __device__ short2	make_shortN (const double2 &a)

std::ostream &	operator<< (std::ostream &output, const GaugeFieldParam &param)

double	norm2 (const cudaGaugeField &u)

void	copyGenericGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out=0, void In=0, void ghostOut=0, void ghostIn=0, int type=0)

void	copyExtendedGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out=0, void In=0)

void	extractGaugeGhost (const GaugeField &u, void **ghost)

void	extractExtendedGaugeGhost (const GaugeField &u, int dim, const int R, void *ghost, bool extract)

double	maxGauge (const GaugeField &u)

void	applyGaugePhase (GaugeField &u)

template<typename Float >
__device__ __host__ void	accumulateComplexProduct (Float a, const Float b, const Float *c, Float sign)

template<typename Float >
__device__ __host__ void	complexProduct (Float a, const Float b, const Float *c)

template<typename Float >
__device__ __host__ void	complexDotProduct (Float a, const Float b, const Float *c)

template<typename Float >
__device__ __host__ void	complexQuotient (Float a, const Float b, const Float *c)

template<typename Float >
__device__ __host__ void	accumulateConjugateProduct (Float a, const Float b, const Float *c, int sign)

template<typename Float >
__device__ __host__ void	complexConjugateProduct (Float a, const Float b, const Float *c)

template<typename Float >
__device__ __host__ Float	timeBoundary (int idx, const int X[QUDA_MAX_DIM], QudaTboundary tBoundary, bool isFirstTimeSlice, bool isLastTimeSlice)

template<typename Float >
__device__ __host__ Float	timeBoundary (int idx, const int X[QUDA_MAX_DIM], const int R[QUDA_MAX_DIM], QudaTboundary tBoundary, bool isFirstTimeSlice, bool isLastTimeSlice, QudaGhostExchange ghostExchange)

void	gauge_force_init_cuda (QudaGaugeParam *param, int max_length)

void	gauge_force_cuda (cudaGaugeField &cudaMom, double eb3, cudaGaugeField &cudaSiteLink, QudaGaugeParam param, int *input_path, int length, double *path_coeff, int num_paths, int max_length)

double	plaquette (const GaugeField &data, QudaFieldLocation location)

void	APEStep (GaugeField &dataDs, const GaugeField &dataOr, double alpha, QudaFieldLocation location)

void	updateGaugeField (GaugeField &out, double dt, const GaugeField &in, const GaugeField &mom, bool conj_mom, bool exact)

void	setUnitarizeLinksPadding (int input_padding, int output_padding)

void	setUnitarizeLinksConstants (double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error, bool check_unitarization=true)

void	unitarizeLinksCuda (const QudaGaugeParam &param, cudaGaugeField &infield, cudaGaugeField outfield, int num_failures)

void	unitarizeLinksCPU (const QudaGaugeParam &param, cpuGaugeField &infield, cpuGaugeField *outfield)

bool	isUnitary (const QudaGaugeParam &param, cpuGaugeField &field, double max_error)

void	completeKSForce (GaugeField &mom, const GaugeField &oprod, const GaugeField &gauge, QudaFieldLocation location, long long *flops=NULL)

std::ostream &	operator<< (std::ostream &output, const LatticeFieldParam &param)

void	llfat_cuda (cudaGaugeField cudaFatLink, cudaGaugeField cudaLongLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam param, double act_path_coeff)

void	llfat_cuda_ex (cudaGaugeField cudaFatLink, cudaGaugeField cudaLongLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam param, double act_path_coeff)

void	llfat_init_cuda (QudaGaugeParam *param)

void	llfat_init_cuda_ex (QudaGaugeParam *param_ex)

void	computeLongLinkCuda (void outEven, void outOdd, const void const inEven, const void const inOdd, double coeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam)

void	computeGenStapleFieldParityKernel (void staple_even, void staple_odd, const void sitelink_even, const void sitelink_odd, void fatlink_even, void fatlink_odd, const void mulink_even, const void mulink_odd, int mu, int nu, int save_staple, double mycoeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam, cudaStream_t *stream)

void	computeGenStapleFieldParityKernel_ex (void staple_even, void staple_odd, const void sitelink_even, const void sitelink_odd, void fatlink_even, void fatlink_odd, const void mulink_even, const void mulink_odd, int mu, int nu, int save_staple, double mycoeff, QudaReconstructType recon, QudaPrecision prec, llfat_kernel_param_t kparam)

void	siteComputeGenStapleParityKernel (void staple_even, void staple_odd, const void sitelink_even, const void sitelink_odd, void fatlink_even, void fatlink_odd, int mu, int nu, double mycoeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam, cudaStream_t *stream)

void	siteComputeGenStapleParityKernel_ex (void staple_even, void staple_odd, const void sitelink_even, const void sitelink_odd, void fatlink_even, void fatlink_odd, int mu, int nu, double mycoeff, QudaReconstructType recon, QudaPrecision prec, llfat_kernel_param_t kparam)

void	llfatOneLinkKernel (cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam param, double act_path_coeff)

void	llfatOneLinkKernel_ex (cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam param, double act_path_coeff, llfat_kernel_param_t kparam)

void	computeFatLinkCore (cudaGaugeField cudaSiteLink, double act_path_coeff, QudaGaugeParam qudaGaugeParam, QudaComputeFatMethod method, cudaGaugeField cudaFatLink, cudaGaugeField *cudaLongLink, TimeProfile &profile)

void	printPeakMemUsage ()

void	assertAllMemFree ()

void *	device_malloc_ (const char func, const char file, int line, size_t size)

void *	safe_malloc_ (const char func, const char file, int line, size_t size)

void *	pinned_malloc_ (const char func, const char file, int line, size_t size)

void *	mapped_malloc_ (const char func, const char file, int line, size_t size)

void	device_free_ (const char func, const char file, int line, void *ptr)

void	host_free_ (const char func, const char file, int line, void *ptr)

void	link_format_cpu_to_gpu (void dst, void src, int reconstruct, int Vh, int pad, int ghostV, QudaPrecision prec, QudaGaugeFieldOrder cpu_order, cudaStream_t stream)

void	link_format_gpu_to_cpu (void dst, void src, int Vh, int stride, QudaPrecision prec, cudaStream_t stream)

void	collectGhostStaple (int X, void even, void odd, int volumeCB, int stride, QudaPrecision precision, void ghost_staple_gpu, int dir, int whichway, cudaStream_t *stream)

template<typename T1 , typename T2 >
__host__ __device__ void	copy (T1 &a, const T2 &b)

template<>
__host__ __device__ void	copy (float &a, const short &b)

template<>
__host__ __device__ void	copy (short &a, const float &b)

void	createStaggeredOprodEvents ()

void	destroyStaggeredOprodEvents ()

void	computeStaggeredOprod (cudaGaugeField &out, cudaColorSpinorField &in, FaceBuffer &facebuffer, const unsigned int parity, const double coeff, const unsigned int displacement)

void	computeStaggeredOprod (cudaGaugeField &outA, cudaGaugeField &outB, cudaColorSpinorField &inEven, cudaColorSpinorField &inOdd, FaceBuffer &faceBuffer, const unsigned int parity, const double coeff[2])

void	loadTuneCache (QudaVerbosity verbosity)

void	saveTuneCache (QudaVerbosity verbosity)

TuneParam &	tuneLaunch (Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)

template<typename Float >
void	axpby (const Float &a, const Float x, const Float &b, Float y, const int N)

template<typename Float >
void	caxpby (const std::complex< Float > &a, const std::complex< Float > x, const std::complex< Float > &b, std::complex< Float > y, int N)

template<typename Float >
void	caxpbypcz (const std::complex< Float > &a, const std::complex< Float > x, const std::complex< Float > &b, const std::complex< Float > y, const std::complex< Float > &c, std::complex< Float > *z, int N)

template<typename Float >
double	norm (const Float *a, const int N)

template<typename Float >
double	reDotProduct (const Float a, const Float b, const int N)

template<typename Float >
Complex	cDotProduct (const std::complex< Float > a, const std::complex< Float > b, const int N)

template<typename Float >
double3	HeavyQuarkResidualNorm (const Float x, const Float r, const int volume, const int Nint)

double3	HeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &r)

void	initReduce ()

void	endReduce ()

cudaStream_t *	getBlasStream ()

__device__ void	caxpy_ (const float2 &a, const float4 &x, float4 &y)

__device__ void	caxpy_ (const float2 &a, const float2 &x, float2 &y)

__device__ void	caxpy_ (const double2 &a, const double2 &x, double2 &y)

__device__ void	caxpby_ (const float2 &a, const float4 &x, const float2 &b, float4 &y)

__device__ void	caxpby_ (const float2 &a, const float2 &x, const float2 &b, float2 &y)

__device__ void	caxpby_ (const double2 &a, const double2 &x, const double2 &b, double2 &y)

__device__ void	cxpaypbz_ (const float4 &x, const float2 &a, const float4 &y, const float2 &b, float4 &z)

__device__ void	cxpaypbz_ (const float2 &x, const float2 &a, const float2 &y, const float2 &b, float2 &z)

__device__ void	cxpaypbz_ (const double2 &x, const double2 &a, const double2 &y, const double2 &b, double2 &z)

void	setGhostSpinor (bool value)

std::ostream &	operator<< (std::ostream &out, const ColorSpinorField &a)

template<typename Float >
ColorSpinorFieldOrder< Float > *	createOrder (const cpuColorSpinorField &a)

template<class T >
void	random (T &t)

template<class T >
void	point (T &t, int x, int s, int c)

template<class U , class V >
int	compareSpinor (const U &u, const V &v, const int tol)

template<class Order >
void	print_vector (const Order &o, unsigned int x)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >
void	packSpinor (OutOrder &outOrder, const InOrder &inOrder, Basis basis, int volume)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >
__global__ void	packSpinorKernel (OutOrder outOrder, const InOrder inOrder, Basis basis, int volume)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
void	genericCopyColorSpinor (OutOrder &outOrder, const InOrder &inOrder, QudaGammaBasis dstBasis, QudaGammaBasis srcBasis, const ColorSpinorField &out, QudaFieldLocation location)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >
void	genericCopyColorSpinor (InOrder &inOrder, ColorSpinorField &out, QudaGammaBasis inBasis, QudaFieldLocation location, FloatOut Out, float outNorm)

template<typename FloatOut , typename FloatIn , int Ns, int Nc>
void	genericCopyColorSpinor (ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location, FloatOut Out, FloatIn In, float outNorm, float inNorm)

template<int Ns, typename dstFloat , typename srcFloat >
void	copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat Dst, srcFloat Src, float dstNorm, float srcNorm)

template<typename dstFloat , typename srcFloat >
void	CopyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat Dst, srcFloat Src, float dstNorm=0, float srcNorm=0)

void	copyGenericGaugeDoubleOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out, void In, void ghostOut, void ghostIn, int type)

void	copyGenericGaugeHalfOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out, void In, void ghostOut, void ghostIn, int type)

void	copyGenericGaugeSingleOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out, void In, void ghostOut, void ghostIn, int type)

void	checkMomOrder (const GaugeField &u)

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
__device__ __host__ void	copyGaugeEx (CopyGaugeExArg< OutOrder, InOrder > &arg, int X, int parity)

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void	copyGaugeEx (CopyGaugeExArg< OutOrder, InOrder > arg)

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
__global__ void	copyGaugeExKernel (CopyGaugeExArg< OutOrder, InOrder > arg)

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void	copyGaugeEx (OutOrder outOrder, const InOrder inOrder, const int E, const int X, const int *faceVolumeCB, const GaugeField &meta, QudaFieldLocation location)

template<typename FloatOut , typename FloatIn , int length, typename InOrder >
void	copyGaugeEx (const InOrder &inOrder, const int X, GaugeField &out, QudaFieldLocation location, FloatOut Out)

template<typename FloatOut , typename FloatIn , int length>
void	copyGaugeEx (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut Out, FloatIn In)

template<typename FloatOut , typename FloatIn >
void	copyGaugeEx (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut Out, FloatIn In)

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void	copyGauge (CopyGaugeArg< OutOrder, InOrder > arg)

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
__global__ void	copyGaugeKernel (CopyGaugeArg< OutOrder, InOrder > arg)

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void	copyGhost (CopyGaugeArg< OutOrder, InOrder > arg)

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
__global__ void	copyGhostKernel (CopyGaugeArg< OutOrder, InOrder > arg)

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void	copyGauge (OutOrder outOrder, const InOrder inOrder, int volume, const int *faceVolumeCB, int nDim, int geometry, const GaugeField &out, QudaFieldLocation location, int type)

template<typename FloatOut , typename FloatIn , int length, typename InOrder >
void	copyGauge (const InOrder &inOrder, GaugeField &out, QudaFieldLocation location, FloatOut Out, FloatOut *outGhost, int type)

template<typename FloatOut , typename FloatIn , int length>
void	copyGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut Out, FloatIn In, FloatOut outGhost, FloatIn inGhost, int type)

template<typename FloatOut , typename FloatIn >
void	copyGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut Out, FloatIn In, FloatOut outGhost, FloatIn inGhost, int type)

std::ostream &	operator<< (std::ostream &out, const cudaColorSpinorField &a)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
__device__ __host__ void	copyInterior (CopySpinorExArg< OutOrder, InOrder, Basis > &arg, int X)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
__global__ void	copyInteriorKernel (CopySpinorExArg< OutOrder, InOrder, Basis > arg)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
void	copyInterior (CopySpinorExArg< OutOrder, InOrder, Basis > &arg)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >
void	copySpinorEx (OutOrder outOrder, const InOrder inOrder, const Basis basis, const int E, const int X, const int parity, const bool extend, const ColorSpinorField &meta, QudaFieldLocation location)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
void	copySpinorEx (OutOrder outOrder, InOrder inOrder, const QudaGammaBasis outBasis, const QudaGammaBasis inBasis, const int E, const int X, const int parity, const bool extend, const ColorSpinorField &meta, QudaFieldLocation location)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >
void	extendedCopyColorSpinor (InOrder &inOrder, ColorSpinorField &out, QudaGammaBasis inBasis, const int E, const int X, const int parity, const bool extend, QudaFieldLocation location, FloatOut Out, float outNorm)

template<typename FloatOut , typename FloatIn , int Ns, int Nc>
void	extendedCopyColorSpinor (ColorSpinorField &out, const ColorSpinorField &in, const int parity, const QudaFieldLocation location, FloatOut Out, FloatIn In, float outNorm, float inNorm)

template<int Ns, typename dstFloat , typename srcFloat >
void	copyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, const int parity, const QudaFieldLocation location, dstFloat Dst, srcFloat Src, float dstNorm, float srcNorm)

template<typename dstFloat , typename srcFloat >
void	CopyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, const int parity, const QudaFieldLocation location, dstFloat Dst, srcFloat Src, float dstNorm=0, float srcNorm=0)

template<typename Float , int length, int nDim, typename Order >
void	extractGhost (ExtractGhostArg< Order, nDim > arg)

template<typename Float , int length, int nDim, typename Order >
__global__ void	extractGhostKernel (ExtractGhostArg< Order, nDim > arg)

template<typename Float , int length, typename Order >
void	extractGhost (Order order, const GaugeField &u, QudaFieldLocation location)

template<typename Float >
void	extractGhost (const GaugeField &u, Float **Ghost)

template<typename Float , int length, typename Arg >
__device__ __host__ void	extractor (Arg &arg, int dir, int a, int b, int c, int d, int g, int parity)

template<typename Float , int length, typename Arg >
__device__ __host__ void	injector (Arg &arg, int dir, int a, int b, int c, int d, int g, int parity)

template<typename Float , int length, int nDim, typename Order , bool extract>
void	extractGhostEx (ExtractGhostExArg< Order, nDim > arg)

template<typename Float , int length, int nDim, typename Order , bool extract>
__global__ void	extractGhostExKernel (ExtractGhostExArg< Order, nDim > arg)

template<typename Float , int length, typename Order >
void	extractGhostEx (Order order, const int dim, const int surfaceCB, const int E, const int *R, bool extract, const GaugeField &u, QudaFieldLocation location)

template<typename Float >
void	extractGhostEx (const GaugeField &u, int dim, const int R, Float *Ghost, bool extract)

template<int oddBit, typename Float , typename Float2 , typename FloatN >
__global__ void	GAUGE_FORCE_KERN_NAME (Float2 momEven, Float2 momOdd, const int dir, const double eb3, const FloatN linkEven, const FloatN linkOdd, const int input_path, const int length, const double *path_coeff, const int num_paths, const kernel_param_t kparam)

void	gauge_force_cuda_dir (cudaGaugeField &cudaMom, const int dir, const double eb3, const cudaGaugeField &cudaSiteLink, const QudaGaugeParam param, int input_path, const int length, const double *path_coeff, const int num_paths, const int max_length)

void	printLaunchTimer ()

void	setDiracPreParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc)

void	createDirac (Dirac &d, Dirac &dSloppy, Dirac *&dPre, QudaInvertParam &param, const bool pc_solve)

void	massRescale (cudaColorSpinorField &b, QudaInvertParam &param)

void	fillInnerSolveParam (SolverParam &inner, const SolverParam &outer)

double	resNorm (const DiracMatrix &mat, cudaColorSpinorField &b, cudaColorSpinorField &x)

int	reliable (double &rNorm, double &maxrx, double &maxrr, const double &r2, const double &delta)

void	fillInitCGSolveParam (SolverParam &initCGparam)

double	timeInterval (struct timeval start, struct timeval end)

void	orthoDir (Complex *beta, cudaColorSpinorField Ap[], int k)

void	backSubs (const Complex alpha, Complex const beta, const double gamma, Complex *delta, int n)

void	updateSolution (cudaColorSpinorField &x, const Complex alpha, Complex const beta, double gamma, int k, cudaColorSpinorField *p[])

void	print (const double d[], int n)

void	updateAlphaZeta (double alpha, double zeta, double zeta_old, const double r2, const double beta, const double pAp, const double offset, const int nShift, const int j_low)

__device__ __host__ int	linkIndex (int x[], int dx[], const int X[4])

__device__ __host__ void	getCoords (int x[4], int cb_index, const int X[4], int parity)

template<typename Float , typename Oprod , typename Gauge , typename Mom >
__host__ __device__ void	completeKSForceCore (KSForceArg< Oprod, Gauge, Mom > &arg, int idx)

template<typename Float , typename Oprod , typename Gauge , typename Mom >
__global__ void	completeKSForceKernel (KSForceArg< Oprod, Gauge, Mom > arg)

template<typename Float , typename Oprod , typename Gauge , typename Mom >
void	completeKSForceCPU (KSForceArg< Oprod, Gauge, Mom > &arg)

template<typename Float , typename Oprod , typename Gauge , typename Mom >
void	completeKSForce (Oprod oprod, Gauge gauge, Mom mom, int dim[4], const GaugeField &meta, QudaFieldLocation location, long long *flops)

template<typename Float , typename Result , typename Oprod , typename Gauge >
__host__ __device__ void	computeKSLongLinkForceCore (KSLongLinkArg< Result, Oprod, Gauge > &arg, int idx)

template<typename Float , typename Result , typename Oprod , typename Gauge >
__global__ void	computeKSLongLinkForceKernel (KSLongLinkArg< Result, Oprod, Gauge > arg)

template<typename Float , typename Result , typename Oprod , typename Gauge >
void	computeKSLongLinkForceCPU (KSLongLinkArg< Result, Oprod, Gauge > &arg)

template<typename Float , typename Result , typename Oprod , typename Gauge >
void	computeKSLongLinkForce (Result res, Oprod oprod, Gauge gauge, int dim[4], const GaugeField &meta, QudaFieldLocation location)

template<typename Float >
void	computeKSLongLinkForce (GaugeField &result, const GaugeField &oprod, const GaugeField &gauge, QudaFieldLocation location)

template<typename Float , int Nc, typename Order >
double	maxGauge (const Order order, int volume, int nDim)

template<int N, typename FloatN , typename Float2 >
__global__ void	do_link_format_cpu_to_gpu (FloatN dst, Float2 src, int reconstruct, int Vh, int pad, int ghostV, size_t threads)

template<int N, typename FloatN , typename Float2 >
__global__ void	do_link_format_cpu_to_gpu_milc (FloatN dst, Float2 src, int reconstruct, int Vh, int pad, int ghostV, size_t threads)

template<typename FloatN >
__global__ void	do_link_format_gpu_to_cpu (FloatN dst, FloatN src, int Vh, int stride)

template<int dir, int whichway, typename Float2 >
__global__ void	collectGhostStapleKernel (Float2 out, Float2 in, int parity, GhostStapleParam param)

template<class Cmplx >
__device__ __host__ Cmplx	makeComplex (const typename RealTypeId< Cmplx >::Type &a, const typename RealTypeId< Cmplx >::Type &b)

__device__ __host__ double2	makeComplex (const double &a, const double &b)

__device__ __host__ float2	makeComplex (const float &a, const float &b)

template<class Cmplx >
__device__ __host__ Cmplx	operator- (const Cmplx &a)

template<class Cmplx >
__device__ __host__ Cmplx &	operator+= (Cmplx &a, const Cmplx &b)

template<class Cmplx >
__device__ __host__ Cmplx &	operator-= (Cmplx &a, const Cmplx &b)

template<class Cmplx >
__device__ __host__ Cmplx	operator+ (const Cmplx &a, const Cmplx &b)

template<class Cmplx >
__device__ __host__ Cmplx	operator- (const Cmplx &a, const Cmplx &b)

__device__ __host__ double2	operator* (const double2 &a, const double &scalar)

__device__ __host__ float2	operator* (const float2 &a, const float &scalar)

template<class Cmplx , class Float >
__device__ __host__ Cmplx	operator+ (const Cmplx &a, const Float &scalar)

template<class Cmplx >
__device__ __host__ Cmplx	operator/ (const Cmplx &a, const typename RealTypeId< Cmplx >::Type &scalar)

template<class Cmplx >
__device__ __host__ Cmplx	operator+ (const typename RealTypeId< Cmplx >::Type &scalar, const Cmplx &a)

template<class Cmplx >
__device__ __host__ Cmplx	operator- (const Cmplx &a, const typename RealTypeId< Cmplx >::Type &scalar)

template<class Cmplx >
__device__ __host__ Cmplx	operator- (const typename RealTypeId< Cmplx >::Type &scalar, const Cmplx &a)

template<class Cmplx >
__device__ __host__ Cmplx	operator* (const Cmplx &a, const Cmplx &b)

template<class Cmplx >
__device__ __host__ Cmplx	conj (const Cmplx &a)

__device__ __host__ double	conj (const double &a)

__device__ __host__ float	conj (const float &a)

template<typename Cmplx >
__device__ __host__ Cmplx	Conj (const Cmplx &a)

template<class Cmplx >
__device__ __host__ Cmplx	getPreciseInverse (const Cmplx &z)

std::ostream &	operator<< (std::ostream &os, const float2 &z)

std::ostream &	operator<< (std::ostream &os, const double2 &z)

template<int N>
__device__ __host__ int	index (int i, int j)

template<class T >
__device__ __host__ T	getTrace (const Matrix< T, 3 > &a)

template<class T >
__device__ __host__ T	getDeterminant (const Matrix< T, 3 > &a)

template<class T , int N>
__device__ __host__ Matrix< T, N >	operator+ (const Matrix< T, N > &a, const Matrix< T, N > &b)

template<class T , int N>
__device__ __host__ Matrix< T, N >	operator+= (Matrix< T, N > &a, const Matrix< T, N > &b)

template<class T , int N>
__device__ __host__ Matrix< T, N >	operator-= (Matrix< T, N > &a, const Matrix< T, N > &b)

template<class T , int N>
__device__ __host__ Matrix< T, N >	operator- (const Matrix< T, N > &a, const Matrix< T, N > &b)

template<class T , int N, class S >
__device__ __host__ Matrix< T, N >	operator* (const S &scalar, const Matrix< T, N > &a)

template<class T , int N, class S >
__device__ __host__ Matrix< T, N >	operator* (const Matrix< T, N > &a, const S &scalar)

template<class T , int N, class S >
__device__ __host__ Matrix< T, N >	operator*= (Matrix< T, N > &a, const S &scalar)

template<class T , int N>
__device__ __host__ Matrix< T, N >	operator- (const Matrix< T, N > &a)

template<class T >
__device__ __host__ Matrix< T, 3 >	operator* (const Matrix< T, 3 > &a, const Matrix< T, 3 > &b)

template<class T , int N>
__device__ __host__ Matrix< T, N >	operator*= (Matrix< T, N > &a, const Matrix< T, N > &b)

template<class T , class U >
__device__ __host__ Matrix < typename PromoteTypeId< T, U > ::Type, 3 >	operator* (const Matrix< T, 3 > &a, const Matrix< U, 3 > &b)

template<class T >
__device__ __host__ Matrix< T, 2 >	operator* (const Matrix< T, 2 > &a, const Matrix< T, 2 > &b)

template<class T , int N>
__device__ __host__ Matrix< T, N >	conj (const Matrix< T, N > &other)

template<class T >
__device__ __host__ void	computeMatrixInverse (const Matrix< T, 3 > &u, Matrix< T, 3 > *uinv)

template<class T , int N>
__device__ __host__ void	setIdentity (Matrix< T, N > *m)

template<int N>
__device__ __host__ void	setIdentity (Matrix< float2, N > *m)

template<int N>
__device__ __host__ void	setIdentity (Matrix< double2, N > *m)

template<class T , int N>
__device__ __host__ void	setZero (Matrix< T, N > *m)

template<int N>
__device__ __host__ void	setZero (Matrix< float2, N > *m)

template<int N>
__device__ __host__ void	setZero (Matrix< double2, N > *m)

template<class T , int N>
__device__ __host__ void	copyColumn (const Matrix< T, N > &m, int c, Array< T, N > *a)

template<class T , int N>
__device__ __host__ void	outerProd (const Array< T, N > &a, const Array< T, N > &b, Matrix< T, N > *m)

template<class T , int N>
__device__ __host__ void	outerProd (const T(&a)[N], const T(&b)[N], Matrix< T, N > *m)

template<class T , int N>
std::ostream &	operator<< (std::ostream &os, const Matrix< T, N > &m)

template<class T , int N>
std::ostream &	operator<< (std::ostream &os, const Array< T, N > &a)

template<class T >
__device__ void	loadLinkVariableFromArray (const T const array, const int dir, const int idx, const int stride, Matrix< T, 3 > link)

template<class T , int N>
__device__ void	loadMatrixFromArray (const T const array, const int idx, const int stride, Matrix< T, N > mat)

__device__ void	loadLinkVariableFromArray (const float2 const array, const int dir, const int idx, const int stride, Matrix< double2, 3 > link)

template<class T , int N>
__device__ void	writeMatrixToArray (const Matrix< T, N > &mat, const int idx, const int stride, T *const array)

__device__ void	appendMatrixToArray (const Matrix< double2, 3 > &mat, const int idx, const int stride, double2 *const array)

__device__ void	appendMatrixToArray (const Matrix< float2, 3 > &mat, const int idx, const int stride, float2 *const array)

template<class T >
__device__ void	writeLinkVariableToArray (const Matrix< T, 3 > &link, const int dir, const int idx, const int stride, T *const array)

__device__ void	writeLinkVariableToArray (const Matrix< double2, 3 > &link, const int dir, const int idx, const int stride, float2 *const array)

template<class T >
__device__ void	loadMomentumFromArray (const T const array, const int dir, const int idx, const int stride, Matrix< T, 3 > mom)

template<class T , class U >
__device__ void	writeMomentumToArray (const Matrix< T, 3 > &mom, const int dir, const int idx, const U coeff, const int stride, T *const array)

template<class Cmplx >
__device__ __host__ void	computeLinkInverse (Matrix< Cmplx, 3 > *uinv, const Matrix< Cmplx, 3 > &u)

void	copyArrayToLink (Matrix< float2, 3 > link, float array)

template<class Cmplx , class Real >
void	copyArrayToLink (Matrix< Cmplx, 3 > link, Real array)

void	copyLinkToArray (float *array, const Matrix< float2, 3 > &link)

template<class Cmplx , class Real >
void	copyLinkToArray (Real *array, const Matrix< Cmplx, 3 > &link)

template<class Cmplx >
__host__ __device__ void	printLink (const Matrix< Cmplx, 3 > &link)

__device__ double	norm2_ (const double2 &a)

__device__ float	norm2_ (const float2 &a)

__device__ float	norm2_ (const float4 &a)

__device__ double	dot_ (const double2 &a, const double2 &b)

__device__ float	dot_ (const float2 &a, const float2 &b)

__device__ float	dot_ (const float4 &a, const float4 &b)

__device__ double2	dotNormA_ (const double2 &a, const double2 &b)

__device__ double2	dotNormA_ (const float2 &a, const float2 &b)

__device__ double2	dotNormA_ (const float4 &a, const float4 &b)

__device__ void	Caxpy_ (const float2 &a, const float4 &x, float4 &y)

__device__ void	Caxpy_ (const float2 &a, const float2 &x, float2 &y)

__device__ void	Caxpy_ (const double2 &a, const double2 &x, double2 &y)

__device__ double2	cdot_ (const double2 &a, const double2 &b)

__device__ double2	cdot_ (const float2 &a, const float2 &b)

__device__ double2	cdot_ (const float4 &a, const float4 &b)

__device__ double3	cdotNormA_ (const double2 &a, const double2 &b)

__device__ double3	cdotNormA_ (const float2 &a, const float2 &b)

__device__ double3	cdotNormA_ (const float4 &a, const float4 &b)

__device__ double3	cdotNormB_ (const double2 &a, const double2 &b)

__device__ double3	cdotNormB_ (const float2 &a, const float2 &b)

__device__ double3	cdotNormB_ (const float4 &a, const float4 &b)

template<IndexType idxType, typename Int >
__device__ __forceinline__ int	neighborIndex (const unsigned int &cb_idx, const int(&shift)[4], const bool(&partitioned)[4], const unsigned int &parity)

template<typename FloatN , int N, typename Output , typename Input >
__global__ void	shiftColorSpinorFieldKernel (ShiftQuarkArg< Output, Input > arg)

template<typename FloatN , int N, typename Output , typename Input >
__global__ void	shiftColorSpinorFieldExternalKernel (ShiftQuarkArg< Output, Input > arg)

void	shiftColorSpinorField (cudaColorSpinorField &dst, const cudaColorSpinorField &src, const unsigned int parity, const unsigned int dim, const int shift)

Variables
unsigned long long	blas_flops

unsigned long long	blas_bytes

const int	maxNface = 3

const int	Nstream = 1

cudaStream_t *	stream

Detailed Description

This code has not been checked. In particular, I suspect it is erroneous in multi-GPU since it looks like the halo ghost region isn't being treated here.

Generic Multi Shift Solver

For staggered, the mass is folded into the dirac operator Otherwise the matrix mass is 'unmodified'.

The lowest offset is in offsets[0]

Typedef Documentation

typedef std::complex< double > quda::Complex

Definition at line 13 of file eig_variables.h.

typedef struct quda::kernel_param_s quda::kernel_param_t

typedef struct quda::llfat_kernel_param_s quda::llfat_kernel_param_t

typedef std::map<TuneKey, TuneParam> quda::map

Definition at line 24 of file tune.cpp.

Enumeration Type Documentation

enum quda::AllocType

Enumerator
DEVICE
HOST
PINNED
MAPPED
N_ALLOC_TYPE

Definition at line 14 of file malloc.cpp.

enum quda::QudaProfileType

Enumerator
QUDA_PROFILE_H2D	host -> device transfers
QUDA_PROFILE_D2H	The time in seconds for device -> host transfers
QUDA_PROFILE_INIT	The time in seconds taken for initiation
QUDA_PROFILE_PREAMBLE	The time in seconds taken for any preamble
QUDA_PROFILE_COMPUTE	The time in seconds taken for the actual computation
QUDA_PROFILE_EPILOGUE	The time in seconds taken for any epilogue
QUDA_PROFILE_FREE	The time in seconds for freeing resources
QUDA_PROFILE_PACK_KERNEL	face packing kernel
QUDA_PROFILE_DSLASH_KERNEL	dslash kernel
QUDA_PROFILE_GATHER	gather (device -> host)
QUDA_PROFILE_SCATTER	scatter (host -> device)
QUDA_PROFILE_EVENT_RECORD	cuda event record
QUDA_PROFILE_EVENT_QUERY	cuda event querying
QUDA_PROFILE_STREAM_WAIT_EVENT	stream waiting for event completion
QUDA_PROFILE_COMMS	synchronous communication
QUDA_PROFILE_COMMS_START	initiating communication
QUDA_PROFILE_COMMS_QUERY	querying communication
QUDA_PROFILE_CONSTANT	time spent setting CUDA constant parameters
QUDA_PROFILE_TOTAL	The total time in seconds for the algorithm. Must be the penultimate type.
QUDA_PROFILE_COUNT	The total number of timers we have. Must be last enum type.

Definition at line 143 of file quda_internal.h.

Function Documentation

template<typename ValueType >

__host__ __device__ ValueType quda::abs ( ValueType x )

inline

Definition at line 110 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ ValueType quda::abs ( const complex< ValueType > & z )

inline

Returns the magnitude of z.

Definition at line 827 of file complex_quda.h.

template<>

__host__ __device__ float quda::abs ( const complex< float > & z )

inline

Definition at line 832 of file complex_quda.h.

template<>

__host__ __device__ double quda::abs ( const complex< double > & z )

inline

Definition at line 837 of file complex_quda.h.

template<typename Float >

__device__ __host__ void quda::accumulateComplexProduct	(	Float *	a,
		const Float *	b,
		const Float *	c,
		Float	sign
	)

inline

Definition at line 9 of file gauge_field_order.h.

template<typename Float >

__device__ __host__ void quda::accumulateConjugateProduct	(	Float *	a,
		const Float *	b,
		const Float *	c,
		int	sign
	)

inline

Definition at line 40 of file gauge_field_order.h.

template<typename ValueType >

__host__ __device__ ValueType quda::acos ( ValueType x )

inline

Definition at line 50 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::acos ( const complex< ValueType > & z )

inline

Definition at line 1041 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::acosh ( const complex< ValueType > & z )

inline

Definition at line 1062 of file complex_quda.h.

void quda::APEStep	(	GaugeField &	dataDs,
		const GaugeField &	dataOr,
		double	alpha,
		QudaFieldLocation	location
	)

Definition at line 497 of file gauge_ape.cu.

__device__ void quda::appendMatrixToArray	(	const Matrix< double2, 3 > &	mat,
		const int	idx,
		const int	stride,
		double2 *const	array
	)

inline

Definition at line 810 of file quda_matrix.h.

__device__ void quda::appendMatrixToArray	(	const Matrix< float2, 3 > &	mat,
		const int	idx,
		const int	stride,
		float2 *const	array
	)

inline

Definition at line 819 of file quda_matrix.h.

void quda::applyGaugePhase ( GaugeField & u )

Apply the staggered phase factor to the gauge field.

Parameters

u	The gauge field to which we apply the staggered phase factors

Definition at line 261 of file gauge_phase.cu.

template<typename ValueType >

__host__ __device__ ValueType quda::arg ( const complex< ValueType > & z )

inline

Returns the phase angle of z.

Definition at line 843 of file complex_quda.h.

template<>

__host__ __device__ float quda::arg ( const complex< float > & z )

inline

Definition at line 848 of file complex_quda.h.

template<>

__host__ __device__ double quda::arg ( const complex< double > & z )

inline

Definition at line 853 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ ValueType quda::asin ( ValueType x )

inline

Definition at line 55 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::asin ( const complex< ValueType > & z )

inline

Definition at line 1048 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::asinh ( const complex< ValueType > & z )

inline

Definition at line 1087 of file complex_quda.h.

void quda::assertAllMemFree ( )

Definition at line 294 of file malloc.cpp.

void quda::asymCloverDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const FullClover	cloverInv,
		const cudaColorSpinorField *	in,
		const int	oddBit,
		const int	daggerBit,
		const cudaColorSpinorField *	x,
		const double &	k,
		const int *	commDim,
		TimeProfile &	profile,
		const QudaDslashPolicy &	dslashPolicy = `QUDA_DSLASH2`
	)

Definition at line 118 of file dslash_clover_asym.cu.

template<typename ValueType >

__host__ __device__ ValueType quda::atan ( ValueType x )

inline

Definition at line 60 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::atan ( const complex< ValueType > & z )

inline

Definition at line 1055 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ ValueType quda::atan2	(	ValueType	x,
		ValueType	y
	)

inline

Definition at line 65 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::atanh ( const complex< ValueType > & z )

inline

Definition at line 1093 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex<float> quda::atanh ( const complex< float > & z )

inline

Definition at line 1111 of file complex_quda.h.

void quda::axCpu	(	const double &	a,
		cpuColorSpinorField &	x
	)

Definition at line 60 of file blas_cpu.cpp.

void quda::axCuda	(	const double &	a,
		cudaColorSpinorField &	x
	)

Definition at line 171 of file blas_quda.cu.

template<typename Float >

void quda::axpby	(	const Float &	a,
		const Float *	x,
		const Float &	b,
		Float *	y,
		const int	N
	)

Definition at line 8 of file blas_cpu.cpp.

void quda::axpbyCpu	(	const double &	a,
		const cpuColorSpinorField &	x,
		const double &	b,
		cpuColorSpinorField &	y
	)

Definition at line 12 of file blas_cpu.cpp.

void quda::axpbyCuda	(	const double &	a,
		cudaColorSpinorField &	x,
		const double &	b,
		cudaColorSpinorField &	y
	)

Definition at line 82 of file blas_quda.cu.

void quda::axpyBzpcxCpu	(	const double &	a,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y,
		const double &	b,
		const cpuColorSpinorField &	z,
		const double &	c
	)

Definition at line 129 of file blas_cpu.cpp.

void quda::axpyBzpcxCuda	(	const double &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y,
		const double &	b,
		cudaColorSpinorField &	z,
		const double &	c
	)

Definition at line 311 of file blas_quda.cu.

Complex quda::axpyCGNormCuda	(	const double &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 682 of file reduce_quda.cu.

void quda::axpyCpu	(	const double &	a,
		const cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 31 of file blas_cpu.cpp.

void quda::axpyCuda	(	const double &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 115 of file blas_quda.cu.

double quda::axpyNormCpu	(	const double &	a,
		const cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 178 of file blas_cpu.cpp.

double quda::axpyNormCuda	(	const double &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 321 of file reduce_quda.cu.

void quda::axpyZpbxCpu	(	const double &	a,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y,
		const cpuColorSpinorField &	z,
		const double &	b
	)

Definition at line 136 of file blas_cpu.cpp.

void quda::axpyZpbxCuda	(	const double &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z,
		const double &	b
	)

Definition at line 338 of file blas_quda.cu.

void quda::backSubs	(	const Complex *	alpha,
		Complex **const	beta,
		const double *	gamma,
		Complex *	delta,
		int	n
	)

Definition at line 101 of file inv_gcr_quda.cpp.

void quda::cabxpyAxCpu	(	const double &	a,
		const Complex &	b,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 259 of file blas_cpu.cpp.

void quda::cabxpyAxCuda	(	const double &	a,
		const Complex &	b,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 386 of file blas_quda.cu.

double quda::cabxpyAxNormCpu	(	const double &	a,
		const Complex &	b,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 283 of file blas_cpu.cpp.

double quda::cabxpyAxNormCuda	(	const double &	a,
		const Complex &	b,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 440 of file reduce_quda.cu.

template<typename Float >

void quda::caxpby	(	const std::complex< Float > &	a,
		const std::complex< Float > *	x,
		const std::complex< Float > &	b,
		std::complex< Float > *	y,
		int	N
	)

Definition at line 70 of file blas_cpu.cpp.

__device__ void quda::caxpby_	(	const float2 &	a,
		const float4 &	x,
		const float2 &	b,
		float4 &	y
	)

Functor to perform the operation y = a*x + b*y (complex-valued)

Definition at line 217 of file blas_quda.cu.

__device__ void quda::caxpby_	(	const float2 &	a,
		const float2 &	x,
		const float2 &	b,
		float2 &	y
	)

Definition at line 225 of file blas_quda.cu.

__device__ void quda::caxpby_	(	const double2 &	a,
		const double2 &	x,
		const double2 &	b,
		double2 &	y
	)

Definition at line 231 of file blas_quda.cu.

void quda::caxpbyCpu	(	const Complex &	a,
		const cpuColorSpinorField &	x,
		const Complex &	b,
		cpuColorSpinorField &	y
	)

Definition at line 92 of file blas_cpu.cpp.

void quda::caxpbyCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		const Complex &	b,
		cudaColorSpinorField &	y
	)

Definition at line 247 of file blas_quda.cu.

template<typename Float >

void quda::caxpbypcz	(	const std::complex< Float > &	a,
		const std::complex< Float > *	x,
		const std::complex< Float > &	b,
		const std::complex< Float > *	y,
		const std::complex< Float > &	c,
		std::complex< Float > *	z,
		int	N
	)

Definition at line 105 of file blas_cpu.cpp.

void quda::caxpbypczpwCpu	(	const Complex &	a,
		cpuColorSpinorField &	x,
		const Complex &	b,
		cpuColorSpinorField &	y,
		const Complex &	c,
		cpuColorSpinorField &	z,
		cpuColorSpinorField &	w
	)

Definition at line 295 of file blas_cpu.cpp.

void quda::caxpbypczpwCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		const Complex &	b,
		cudaColorSpinorField &	y,
		const Complex &	c,
		cudaColorSpinorField &	z,
		cudaColorSpinorField &	w
	)

Definition at line 429 of file blas_quda.cu.

void quda::caxpbypzCpu	(	const Complex &	a,
		cpuColorSpinorField &	x,
		const Complex &	b,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	z
	)

Definition at line 289 of file blas_cpu.cpp.

void quda::caxpbypzCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		const Complex &	b,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z
	)

Definition at line 407 of file blas_quda.cu.

double3 quda::caxpbypzYmbwcDotProductUYNormYCpu	(	const Complex &	a,
		const cpuColorSpinorField &	x,
		const Complex &	b,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	z,
		const cpuColorSpinorField &	w,
		const cpuColorSpinorField &	u
	)

Definition at line 250 of file blas_cpu.cpp.

double3 quda::caxpbypzYmbwcDotProductUYNormYCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		const Complex &	b,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z,
		cudaColorSpinorField &	w,
		cudaColorSpinorField &	u
	)

Definition at line 643 of file reduce_quda.cu.

void quda::caxpbypzYmbwCpu	(	const Complex &	a,
		const cpuColorSpinorField &	x,
		const Complex &	b,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	z,
		const cpuColorSpinorField &	w
	)

Definition at line 143 of file blas_cpu.cpp.

void quda::caxpbypzYmbwCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		const Complex &	b,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z,
		cudaColorSpinorField &	w
	)

Definition at line 366 of file blas_quda.cu.

__device__ void quda::caxpy_	(	const float2 &	a,
		const float4 &	x,
		float4 &	y
	)

Functor to perform the operation y += a * x (complex-valued)

Definition at line 180 of file blas_quda.cu.

__device__ void quda::caxpy_	(	const float2 &	a,
		const float2 &	x,
		float2 &	y
	)

Definition at line 187 of file blas_quda.cu.

__device__ void quda::caxpy_	(	const double2 &	a,
		const double2 &	x,
		double2 &	y
	)

Definition at line 192 of file blas_quda.cu.

__device__ void quda::Caxpy_	(	const float2 &	a,
		const float4 &	x,
		float4 &	y
	)

Functor to perform the operation y += a * x (complex-valued)

Definition at line 353 of file reduce_quda.cu.

__device__ void quda::Caxpy_	(	const float2 &	a,
		const float2 &	x,
		float2 &	y
	)

Definition at line 360 of file reduce_quda.cu.

__device__ void quda::Caxpy_	(	const double2 &	a,
		const double2 &	x,
		double2 &	y
	)

Definition at line 365 of file reduce_quda.cu.

void quda::caxpyCpu	(	const Complex &	a,
		const cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 79 of file blas_cpu.cpp.

void quda::caxpyCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 207 of file blas_quda.cu.

Complex quda::caxpyDotzyCpu	(	const Complex &	a,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	z
	)

Definition at line 303 of file blas_cpu.cpp.

Complex quda::caxpyDotzyCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z
	)

Definition at line 559 of file reduce_quda.cu.

double quda::caxpyNormCpu	(	const Complex &	a,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 264 of file blas_cpu.cpp.

double quda::caxpyNormCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 388 of file reduce_quda.cu.

void quda::caxpyXmazCpu	(	const Complex &	a,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	z
	)

Definition at line 277 of file blas_cpu.cpp.

void quda::caxpyXmazCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z
	)

Definition at line 452 of file blas_quda.cu.

double quda::caxpyXmazNormXCpu	(	const Complex &	a,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	z
	)

Definition at line 270 of file blas_cpu.cpp.

double quda::caxpyXmazNormXCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z
	)

Definition at line 413 of file reduce_quda.cu.

__device__ double2 quda::cdot_	(	const double2 &	a,
		const double2 &	b
	)

Returns complex-valued dot product of x and y

Definition at line 449 of file reduce_quda.cu.

__device__ double2 quda::cdot_	(	const float2 &	a,
		const float2 &	b
	)

Definition at line 451 of file reduce_quda.cu.

__device__ double2 quda::cdot_	(	const float4 &	a,
		const float4 &	b
	)

Definition at line 453 of file reduce_quda.cu.

__device__ double3 quda::cdotNormA_	(	const double2 &	a,
		const double2 &	b
	)

First returns the dot product (x,y) Returns the norm of x

Definition at line 570 of file reduce_quda.cu.

__device__ double3 quda::cdotNormA_	(	const float2 &	a,
		const float2 &	b
	)

Definition at line 572 of file reduce_quda.cu.

__device__ double3 quda::cdotNormA_	(	const float4 &	a,
		const float4 &	b
	)

Definition at line 574 of file reduce_quda.cu.

__device__ double3 quda::cdotNormB_	(	const double2 &	a,
		const double2 &	b
	)

First returns the dot product (x,y) Returns the norm of y

Definition at line 600 of file reduce_quda.cu.

__device__ double3 quda::cdotNormB_	(	const float2 &	a,
		const float2 &	b
	)

Definition at line 602 of file reduce_quda.cu.

__device__ double3 quda::cdotNormB_	(	const float4 &	a,
		const float4 &	b
	)

Definition at line 604 of file reduce_quda.cu.

template<typename Float >

Complex quda::cDotProduct	(	const std::complex< Float > *	a,
		const std::complex< Float > *	b,
		const int	N
	)

Definition at line 211 of file blas_cpu.cpp.

Complex quda::cDotProductCpu	(	const cpuColorSpinorField &	a,
		const cpuColorSpinorField &	b
	)

Definition at line 217 of file blas_cpu.cpp.

Complex quda::cDotProductCuda	(	cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 468 of file reduce_quda.cu.

void quda::cDotProductCuda	(	Complex *	result,
		std::vector< cudaColorSpinorField * > &	a,
		std::vector< cudaColorSpinorField * > &	b
	)

Definition at line 474 of file reduce_quda.cu.

double3 quda::cDotProductNormACpu	(	const cpuColorSpinorField &	a,
		const cpuColorSpinorField &	b
	)

Definition at line 237 of file blas_cpu.cpp.

double3 quda::cDotProductNormACuda	(	cudaColorSpinorField &	a,
		cudaColorSpinorField &	b
	)

Definition at line 591 of file reduce_quda.cu.

double3 quda::cDotProductNormBCpu	(	const cpuColorSpinorField &	a,
		const cpuColorSpinorField &	b
	)

Definition at line 243 of file blas_cpu.cpp.

double3 quda::cDotProductNormBCuda	(	cudaColorSpinorField &	a,
		cudaColorSpinorField &	b
	)

Definition at line 620 of file reduce_quda.cu.

void quda::checkMomOrder ( const GaugeField & u )

Definition at line 14 of file copy_gauge.cu.

void quda::cloverCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const FullClover	clover,
		const cudaColorSpinorField *	in,
		const int	oddBit
	)

Definition at line 229 of file dslash_quda.cu.

void quda::cloverDerivative	(	cudaGaugeField &	out,
		cudaGaugeField &	gauge,
		cudaGaugeField &	oprod,
		int	mu,
		int	nu,
		double	coeff,
		QudaParity	parity,
		int	conjugate
	)

Definition at line 369 of file clover_deriv_quda.cu.

void quda::cloverDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const FullClover	cloverInv,
		const cudaColorSpinorField *	in,
		const int	oddBit,
		const int	daggerBit,
		const cudaColorSpinorField *	x,
		const double &	k,
		const int *	commDim,
		TimeProfile &	profile,
		const QudaDslashPolicy &	dslashPolicy = `QUDA_DSLASH2`
	)

Definition at line 117 of file dslash_clover.cu.

void quda::cloverInvert	(	CloverField &	clover,
		bool	computeTraceLog,
		QudaFieldLocation	location
	)

This function compute the Cholesky decomposition of each clover matrix and stores the clover inverse field.

Parameters

clover	The clover field (contains both the field itself and its inverse)
computeTraceLog	Whether to compute the trace logarithm of the clover term
location	The location of the field

Definition at line 298 of file clover_invert.cu.

void quda::collectGhostStaple	(	int *	X,
		void *	even,
		void *	odd,
		int	volumeCB,
		int	stride,
		QudaPrecision	precision,
		void *	ghost_staple_gpu,
		int	dir,
		int	whichway,
		cudaStream_t *	stream
	)

Definition at line 481 of file misc_helpers.cu.

template<int dir, int whichway, typename Float2 >

__global__ void quda::collectGhostStapleKernel	(	Float2 *	out,
		Float2 *	in,
		int	parity,
		GhostStapleParam	param
	)

Definition at line 403 of file misc_helpers.cu.

template<class U , class V >

int quda::compareSpinor	(	const U &	u,
		const V &	v,
		const int	tol
	)

Definition at line 60 of file color_spinor_util.cu.

void quda::completeKSForce	(	GaugeField &	mom,
		const GaugeField &	oprod,
		const GaugeField &	gauge,
		QudaFieldLocation	location,
		long long *	flops = `NULL`
	)

Definition at line 206 of file ks_force_quda.cu.

template<typename Float , typename Oprod , typename Gauge , typename Mom >

void quda::completeKSForce	(	Oprod	oprod,
		Gauge	gauge,
		Mom	mom,
		int	dim[4],
		const GaugeField &	meta,
		QudaFieldLocation	location,
		long long *	flops
	)

Definition at line 195 of file ks_force_quda.cu.

template<typename Float , typename Oprod , typename Gauge , typename Mom >

__host__ __device__ void quda::completeKSForceCore	(	KSForceArg< Oprod, Gauge, Mom > &	arg,
		int	idx
	)

Definition at line 59 of file ks_force_quda.cu.

template<typename Float , typename Oprod , typename Gauge , typename Mom >

void quda::completeKSForceCPU ( KSForceArg< Oprod, Gauge, Mom > & arg )

Definition at line 133 of file ks_force_quda.cu.

template<typename Float , typename Oprod , typename Gauge , typename Mom >

__global__ void quda::completeKSForceKernel ( KSForceArg< Oprod, Gauge, Mom > arg )

Definition at line 121 of file ks_force_quda.cu.

template<typename Float >

__device__ __host__ void quda::complexConjugateProduct	(	Float *	a,
		const Float *	b,
		const Float *	c
	)

inline

Definition at line 47 of file gauge_field_order.h.

template<typename Float >

__device__ __host__ void quda::complexDotProduct	(	Float *	a,
		const Float *	b,
		const Float *	c
	)

inline

Definition at line 23 of file gauge_field_order.h.

template<typename Float >

__device__ __host__ void quda::complexProduct	(	Float *	a,
		const Float *	b,
		const Float *	c
	)

inline

Definition at line 16 of file gauge_field_order.h.

template<typename Float >

__device__ __host__ void quda::complexQuotient	(	Float *	a,
		const Float *	b,
		const Float *	c
	)

inline

Definition at line 31 of file gauge_field_order.h.

void quda::computeClover	(	CloverField &	clover,
		const GaugeField &	gauge,
		double	coeff,
		QudaFieldLocation	location
	)

Definition at line 602 of file clover_quda.cu.

void quda::computeCloverSigmaTrace	(	GaugeField &	gauge,
		const CloverField &	clover,
		int	dir1,
		int	dir2,
		QudaFieldLocation	location
	)

Definition at line 310 of file clover_trace_quda.cu.

void quda::computeFatLinkCore	(	cudaGaugeField *	cudaSiteLink,
		double *	act_path_coeff,
		QudaGaugeParam *	qudaGaugeParam,
		QudaComputeFatMethod	method,
		cudaGaugeField *	cudaFatLink,
		cudaGaugeField *	cudaLongLink,
		TimeProfile &	profile
	)

void quda::computeGenStapleFieldParityKernel	(	void *	staple_even,
		void *	staple_odd,
		const void *	sitelink_even,
		const void *	sitelink_odd,
		void *	fatlink_even,
		void *	fatlink_odd,
		const void *	mulink_even,
		const void *	mulink_odd,
		int	mu,
		int	nu,
		int	save_staple,
		double	mycoeff,
		QudaReconstructType	recon,
		QudaPrecision	prec,
		dim3	halfGridDim,
		llfat_kernel_param_t	kparam,
		cudaStream_t *	stream
	)

void quda::computeGenStapleFieldParityKernel_ex	(	void *	staple_even,
		void *	staple_odd,
		const void *	sitelink_even,
		const void *	sitelink_odd,
		void *	fatlink_even,
		void *	fatlink_odd,
		const void *	mulink_even,
		const void *	mulink_odd,
		int	mu,
		int	nu,
		int	save_staple,
		double	mycoeff,
		QudaReconstructType	recon,
		QudaPrecision	prec,
		llfat_kernel_param_t	kparam
	)

template<typename Float , typename Result , typename Oprod , typename Gauge >

void quda::computeKSLongLinkForce	(	Result	res,
		Oprod	oprod,
		Gauge	gauge,
		int	dim[4],
		const GaugeField &	meta,
		QudaFieldLocation	location
	)

Definition at line 421 of file ks_force_quda.cu.

template<typename Float >

void quda::computeKSLongLinkForce	(	GaugeField &	result,
		const GaugeField &	oprod,
		const GaugeField &	gauge,
		QudaFieldLocation	location
	)

Definition at line 430 of file ks_force_quda.cu.

template<typename Float , typename Result , typename Oprod , typename Gauge >

__host__ __device__ void quda::computeKSLongLinkForceCore	(	KSLongLinkArg< Result, Oprod, Gauge > &	arg,
		int	idx
	)

Definition at line 276 of file ks_force_quda.cu.

template<typename Float , typename Result , typename Oprod , typename Gauge >

void quda::computeKSLongLinkForceCPU ( KSLongLinkArg< Result, Oprod, Gauge > & arg )

Definition at line 352 of file ks_force_quda.cu.

template<typename Float , typename Result , typename Oprod , typename Gauge >

__global__ void quda::computeKSLongLinkForceKernel ( KSLongLinkArg< Result, Oprod, Gauge > arg )

Definition at line 340 of file ks_force_quda.cu.

template<class Cmplx >

__device__ __host__ void quda::computeLinkInverse	(	Matrix< Cmplx, 3 > *	uinv,
		const Matrix< Cmplx, 3 > &	u
	)

inline

Definition at line 924 of file quda_matrix.h.

void quda::computeLongLinkCuda	(	void *	outEven,
		void *	outOdd,
		const void *const	inEven,
		const void *const	inOdd,
		double	coeff,
		QudaReconstructType	recon,
		QudaPrecision	prec,
		dim3	halfGridDim,
		llfat_kernel_param_t	kparam
	)

template<class T >

__device__ __host__ void quda::computeMatrixInverse	(	const Matrix< T, 3 > &	u,
		Matrix< T, 3 > *	uinv
	)

inline

Definition at line 555 of file quda_matrix.h.

void quda::computeStaggeredOprod	(	cudaGaugeField &	out,
		cudaColorSpinorField &	in,
		FaceBuffer &	facebuffer,
		const unsigned int	parity,
		const double	coeff,
		const unsigned int	displacement
	)

void quda::computeStaggeredOprod	(	cudaGaugeField &	outA,
		cudaGaugeField &	outB,
		cudaColorSpinorField &	inEven,
		cudaColorSpinorField &	inOdd,
		FaceBuffer &	faceBuffer,
		const unsigned int	parity,
		const double	coeff[2]
	)

Definition at line 635 of file staggered_oprod.cu.

template<typename ValueType >

__host__ __device__ ValueType quda::conj ( ValueType x )

inline

Definition at line 115 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::conj ( const complex< ValueType > & z )

inline

Returns the complex conjugate of z.

Definition at line 821 of file complex_quda.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::conj ( const Cmplx & a )

inline

Definition at line 251 of file quda_matrix.h.

__device__ __host__ double quda::conj ( const double & a )

inline

Definition at line 256 of file quda_matrix.h.

__device__ __host__ float quda::conj ( const float & a )

inline

Definition at line 261 of file quda_matrix.h.

template<typename Cmplx >

__device__ __host__ Cmplx quda::Conj ( const Cmplx & a )

inline

Definition at line 267 of file quda_matrix.h.

template<class T , int N>

__device__ __host__ Matrix<T,N> quda::conj ( const Matrix< T, N > & other )

inline

Definition at line 540 of file quda_matrix.h.

void quda::contractCuda	(	const cudaColorSpinorField &	x,
		const cudaColorSpinorField &	y,
		void *	result,
		const QudaContractType	contract_type,
		const QudaParity	parity
	)

Contracts the x and y spinors (x is daggered) and stores the result in the array result. One must specify the contract type (time-sliced or volumed contract, and whether we should include a gamma5 in the middle), as well as the time-slice (see overloaded version of the same function) in case we don't want a volume contraction. The function works only with parity spinors, and the parity must be specified.

Definition at line 290 of file contract.cu.

void quda::contractCuda	(	const cudaColorSpinorField &	x,
		const cudaColorSpinorField &	y,
		void *	result,
		const QudaContractType	contract_type,
		const int	nTSlice,
		const QudaParity	parity
	)

Contracts the x and y spinors (x is daggered) and stores the result in the array result. One must specify the contract type (time-sliced or volumed contract, and whether we should include a gamma5 in the middle), as well as the time-slice in case we don't want a volume contraction. The function works only with parity spinors, and the parity must be specified.

Definition at line 325 of file contract.cu.

template<typename T1 , typename T2 >

__host__ __device__ void quda::copy	(	T1 &	a,
		const T2 &	b
	)

inline

Definition at line 33 of file register_traits.h.

template<>

__host__ __device__ void quda::copy	(	float &	a,
		const short &	b
	)

inline

Definition at line 34 of file register_traits.h.

template<>

__host__ __device__ void quda::copy	(	short &	a,
		const float &	b
	)

inline

Definition at line 35 of file register_traits.h.

void quda::copyArrayToLink	(	Matrix< float2, 3 > *	link,
		float *	array
	)

inline

Definition at line 962 of file quda_matrix.h.

template<class Cmplx , class Real >

void quda::copyArrayToLink	(	Matrix< Cmplx, 3 > *	link,
		Real *	array
	)

inline

Definition at line 973 of file quda_matrix.h.

template<class T , int N>

__device__ __host__ void quda::copyColumn	(	const Matrix< T, N > &	m,
		int	c,
		Array< T, N > *	a
	)

inline

Definition at line 709 of file quda_matrix.h.

void quda::copyCuda	(	cudaColorSpinorField &	dst,
		const cudaColorSpinorField &	src
	)

Definition at line 235 of file copy_quda.cu.

template<int Ns, typename dstFloat , typename srcFloat >

void quda::copyExtendedColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		const int	parity,
		const QudaFieldLocation	location,
		dstFloat *	Dst,
		srcFloat *	Src,
		float *	dstNorm,
		float *	srcNorm
	)

Definition at line 413 of file extended_color_spinor_utilities.cu.

template<typename dstFloat , typename srcFloat >

void quda::CopyExtendedColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		const int	parity,
		const QudaFieldLocation	location,
		dstFloat *	Dst,
		srcFloat *	Src,
		float *	dstNorm = `0`,
		float *	srcNorm = `0`
	)

Definition at line 481 of file extended_color_spinor_utilities.cu.

void quda::copyExtendedColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		const int	parity,
		void *	Dst,
		void *	Src,
		void *	dstNorm,
		void *	srcNorm
	)

Definition at line 507 of file extended_color_spinor_utilities.cu.

void quda::copyExtendedGauge	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out = `0`,
		void *	In = `0`
	)

This function is used for copying the gauge field into an extended gauge field. Defined in copy_extended_gauge.cu.

Parameters

out	The extended output field to which we are copying
in	The input field from which we are copying
location	The location of where we are doing the copying (CPU or CUDA)
Out	The output buffer (optional)
In	The input buffer (optional)

Definition at line 337 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >

void quda::copyGauge ( CopyGaugeArg< OutOrder, InOrder > arg )

Generic CPU gauge reordering and packing

Definition at line 27 of file copy_gauge_inc.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >

void quda::copyGauge	(	OutOrder	outOrder,
		const InOrder	inOrder,
		int	volume,
		const int *	faceVolumeCB,
		int	nDim,
		int	geometry,
		const GaugeField &	out,
		QudaFieldLocation	location,
		int	type
	)

Definition at line 185 of file copy_gauge_inc.cu.

template<typename FloatOut , typename FloatIn , int length, typename InOrder >

void quda::copyGauge	(	const InOrder &	inOrder,
		GaugeField &	out,
		QudaFieldLocation	location,
		FloatOut *	Out,
		FloatOut **	outGhost,
		int	type
	)

Definition at line 224 of file copy_gauge_inc.cu.

template<typename FloatOut , typename FloatIn , int length>

void quda::copyGauge	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		FloatOut *	Out,
		FloatIn *	In,
		FloatOut **	outGhost,
		FloatIn **	inGhost,
		int	type
	)

Definition at line 349 of file copy_gauge_inc.cu.

template<typename FloatOut , typename FloatIn >

void quda::copyGauge	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		FloatOut *	Out,
		FloatIn *	In,
		FloatOut **	outGhost,
		FloatIn **	inGhost,
		int	type
	)

Definition at line 460 of file copy_gauge_inc.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >

__device__ __host__ void quda::copyGaugeEx	(	CopyGaugeExArg< OutOrder, InOrder > &	arg,
		int	X,
		int	parity
	)

Copy a regular gauge field into an extended gauge field

Definition at line 35 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >

void quda::copyGaugeEx ( CopyGaugeExArg< OutOrder, InOrder > arg )

Definition at line 64 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >

void quda::copyGaugeEx	(	OutOrder	outOrder,
		const InOrder	inOrder,
		const int *	E,
		const int *	X,
		const int *	faceVolumeCB,
		const GaugeField &	meta,
		QudaFieldLocation	location
	)

Definition at line 141 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn , int length, typename InOrder >

void quda::copyGaugeEx	(	const InOrder &	inOrder,
		const int *	X,
		GaugeField &	out,
		QudaFieldLocation	location,
		FloatOut *	Out
	)

Definition at line 152 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn , int length>

void quda::copyGaugeEx	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		FloatOut *	Out,
		FloatIn *	In
	)

Definition at line 236 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn >

void quda::copyGaugeEx	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		FloatOut *	Out,
		FloatIn *	In
	)

Definition at line 318 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >

__global__ void quda::copyGaugeExKernel ( CopyGaugeExArg< OutOrder, InOrder > arg )

Definition at line 73 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >

__global__ void quda::copyGaugeKernel ( CopyGaugeArg< OutOrder, InOrder > arg )

Generic CUDA gauge reordering and packing. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 51 of file copy_gauge_inc.cu.

void quda::copyGenericClover	(	CloverField &	out,
		const CloverField &	in,
		bool	inverse,
		QudaFieldLocation	location,
		void *	Out = `0`,
		void *	In = `0`,
		void *	outNorm = `0`,
		void *	inNorm = `0`
	)

This generic function is used for copying the clover field where in the input and output can be in any order and location.

Parameters

out	The output field to which we are copying
in	The input field from which we are copying
inverse	Whether we are copying the inverse term or not
location	The location of where we are doing the copying (CPU or CUDA)
Out	The output buffer (optional)
In	The input buffer (optional)
outNorm	The output norm buffer (optional)
inNorm	The input norm buffer (optional)

Definition at line 182 of file copy_clover.cu.

template<int Ns, typename dstFloat , typename srcFloat >

void quda::copyGenericColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		dstFloat *	Dst,
		srcFloat *	Src,
		float *	dstNorm,
		float *	srcNorm
	)

Definition at line 337 of file copy_color_spinor.cu.

template<typename dstFloat , typename srcFloat >

void quda::CopyGenericColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		dstFloat *	Dst,
		srcFloat *	Src,
		float *	dstNorm = `0`,
		float *	srcNorm = `0`
	)

Definition at line 405 of file copy_color_spinor.cu.

void quda::copyGenericColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst = `0`,
		void *	Src = `0`,
		void *	dstNorm = `0`,
		void *	srcNorm = `0`
	)

Definition at line 422 of file copy_color_spinor.cu.

void quda::copyGenericGauge	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out = `0`,
		void *	In = `0`,
		void **	ghostOut = `0`,
		void **	ghostIn = `0`,
		int	type = `0`
	)

This function is used for extracting the gauge ghost zone from a gauge field array. Defined in copy_gauge.cu.

Parameters

out	The output field to which we are copying
in	The input field from which we are copying
location	The location of where we are doing the copying (CPU or CUDA)
Out	The output buffer (optional)
In	The input buffer (optional)
ghostOut	The output ghost buffer (optional)
ghostIn	The input ghost buffer (optional)
type	The type of copy we doing (0 body and ghost else ghost only)

Definition at line 30 of file copy_gauge.cu.

void quda::copyGenericGaugeDoubleOut	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out,
		void *	In,
		void **	ghostOut,
		void **	ghostIn,
		int	type
	)

Definition at line 5 of file copy_gauge_double.cu.

void quda::copyGenericGaugeHalfOut	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out,
		void *	In,
		void **	ghostOut,
		void **	ghostIn,
		int	type
	)

Definition at line 5 of file copy_gauge_half.cu.

void quda::copyGenericGaugeSingleOut	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out,
		void *	In,
		void **	ghostOut,
		void **	ghostIn,
		int	type
	)

Definition at line 5 of file copy_gauge_single.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >

void quda::copyGhost ( CopyGaugeArg< OutOrder, InOrder > arg )

Generic CPU gauge ghost reordering and packing

Definition at line 74 of file copy_gauge_inc.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >

__global__ void quda::copyGhostKernel ( CopyGaugeArg< OutOrder, InOrder > arg )

Generic CUDA kernel for copying the ghost zone. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 98 of file copy_gauge_inc.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>

__device__ __host__ void quda::copyInterior	(	CopySpinorExArg< OutOrder, InOrder, Basis > &	arg,
		int	X
	)

Definition at line 170 of file extended_color_spinor_utilities.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>

void quda::copyInterior ( CopySpinorExArg< OutOrder, InOrder, Basis > & arg )

Definition at line 220 of file extended_color_spinor_utilities.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>

__global__ void quda::copyInteriorKernel ( CopySpinorExArg< OutOrder, InOrder, Basis > arg )

Definition at line 206 of file extended_color_spinor_utilities.cu.

void quda::copyLinkToArray	(	float *	array,
		const Matrix< float2, 3 > &	link
	)

inline

Definition at line 985 of file quda_matrix.h.

template<class Cmplx , class Real >

void quda::copyLinkToArray	(	Real *	array,
		const Matrix< Cmplx, 3 > &	link
	)

inline

Definition at line 997 of file quda_matrix.h.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >

void quda::copySpinorEx	(	OutOrder	outOrder,
		const InOrder	inOrder,
		const Basis	basis,
		const int *	E,
		const int *	X,
		const int	parity,
		const bool	extend,
		const ColorSpinorField &	meta,
		QudaFieldLocation	location
	)

Definition at line 281 of file extended_color_spinor_utilities.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >

void quda::copySpinorEx	(	OutOrder	outOrder,
		InOrder	inOrder,
		const QudaGammaBasis	outBasis,
		const QudaGammaBasis	inBasis,
		const int *	E,
		const int *	X,
		const int	parity,
		const bool	extend,
		const ColorSpinorField &	meta,
		QudaFieldLocation	location
	)

Definition at line 296 of file extended_color_spinor_utilities.cu.

template<typename ValueType >

__host__ __device__ ValueType quda::cos ( ValueType x )

inline

Definition at line 35 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::cos ( const complex< ValueType > & z )

inline

Definition at line 884 of file complex_quda.h.

template<>

__host__ __device__ complex<float> quda::cos ( const complex< float > & z )

inline

Definition at line 892 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ ValueType quda::cosh ( ValueType x )

inline

Definition at line 70 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::cosh ( const complex< ValueType > & z )

inline

Definition at line 900 of file complex_quda.h.

template<>

__host__ __device__ complex<float> quda::cosh ( const complex< float > & z )

inline

Definition at line 908 of file complex_quda.h.

void quda::covDev	(	cudaColorSpinorField *	out,
		cudaGaugeField &	gauge,
		const cudaColorSpinorField *	in,
		const int	parity,
		const int	mu,
		TimeProfile &	profile
	)

void quda::createDirac	(	Dirac *&	d,
		Dirac *&	dSloppy,
		Dirac *&	dPre,
		QudaInvertParam &	param,
		const bool	pc_solve
	)

Definition at line 1228 of file interface_quda.cpp.

void quda::createDslashEvents ( )

Definition at line 108 of file dslash_quda.cu.

template<typename Float >

ColorSpinorFieldOrder<Float>* quda::createOrder ( const cpuColorSpinorField & a )

Definition at line 7 of file color_spinor_util.cu.

void quda::createStaggeredOprodEvents ( )

__device__ void quda::cxpaypbz_	(	const float4 &	x,
		const float2 &	a,
		const float4 &	y,
		const float2 &	b,
		float4 &	z
	)

Functor to performs the operation z[i] = x[i] + a*y[i] + b*z[i]

Definition at line 256 of file blas_quda.cu.

__device__ void quda::cxpaypbz_	(	const float2 &	x,
		const float2 &	a,
		const float2 &	y,
		const float2 &	b,
		float2 &	z
	)

Definition at line 265 of file blas_quda.cu.

__device__ void quda::cxpaypbz_	(	const double2 &	x,
		const double2 &	a,
		const double2 &	y,
		const double2 &	b,
		double2 &	z
	)

Definition at line 272 of file blas_quda.cu.

void quda::cxpaypbzCpu	(	const cpuColorSpinorField &	x,
		const Complex &	b,
		const cpuColorSpinorField &	y,
		const Complex &	c,
		cpuColorSpinorField &	z
	)

Definition at line 115 of file blas_cpu.cpp.

void quda::cxpaypbzCuda	(	cudaColorSpinorField &	x,
		const Complex &	b,
		cudaColorSpinorField &	y,
		const Complex &	c,
		cudaColorSpinorField &	z
	)

Definition at line 290 of file blas_quda.cu.

void quda::destroyDslashEvents ( )

Definition at line 129 of file dslash_quda.cu.

void quda::destroyStaggeredOprodEvents ( )

void quda::device_free_	(	const char *	func,
		const char *	file,
		int	line,
		void *	ptr
	)

Free device memory allocated with device_malloc(). This function should only be called via the device_free() macro, defined in malloc_quda.h

Definition at line 232 of file malloc.cpp.

void * quda::device_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Perform a standard cudaMalloc() with error-checking. This function should only be called via the device_malloc() macro, defined in malloc_quda.h

Definition at line 146 of file malloc.cpp.

template<int N, typename FloatN , typename Float2 >

__global__ void quda::do_link_format_cpu_to_gpu	(	FloatN *	dst,
		Float2 *	src,
		int	reconstruct,
		int	Vh,
		int	pad,
		int	ghostV,
		size_t	threads
	)

Definition at line 43 of file misc_helpers.cu.

template<int N, typename FloatN , typename Float2 >

__global__ void quda::do_link_format_cpu_to_gpu_milc	(	FloatN *	dst,
		Float2 *	src,
		int	reconstruct,
		int	Vh,
		int	pad,
		int	ghostV,
		size_t	threads
	)

Definition at line 103 of file misc_helpers.cu.

template<typename FloatN >

__global__ void quda::do_link_format_gpu_to_cpu	(	FloatN *	dst,
		FloatN *	src,
		int	Vh,
		int	stride
	)

Definition at line 322 of file misc_helpers.cu.

void quda::domainWallDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const cudaColorSpinorField *	in,
		const int	parity,
		const int	dagger,
		const cudaColorSpinorField *	x,
		const double &	m_f,
		const double &	k,
		const int *	commDim,
		TimeProfile &	profile,
		const QudaDslashPolicy &	dslashPolicy = `QUDA_DSLASH`
	)

Definition at line 172 of file dslash_domain_wall.cu.

void quda::domainWallDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const cudaColorSpinorField *	in,
		const int	parity,
		const int	dagger,
		const cudaColorSpinorField *	x,
		const double &	m_f,
		const double &	k,
		const int *	commDim,
		const int	DS_type,
		TimeProfile &	profile,
		const QudaDslashPolicy &	dslashPolicy = `QUDA_DSLASH2`
	)

Definition at line 234 of file dslash_domain_wall_4d.cu.

__device__ double quda::dot_	(	const double2 &	a,
		const double2 &	b
	)

Return the real dot product of x and y

Definition at line 154 of file reduce_quda.cu.

__device__ float quda::dot_	(	const float2 &	a,
		const float2 &	b
	)

Definition at line 155 of file reduce_quda.cu.

__device__ float quda::dot_	(	const float4 &	a,
		const float4 &	b
	)

Definition at line 156 of file reduce_quda.cu.

__device__ double2 quda::dotNormA_	(	const double2 &	a,
		const double2 &	b
	)

Definition at line 273 of file reduce_quda.cu.

__device__ double2 quda::dotNormA_	(	const float2 &	a,
		const float2 &	b
	)

Definition at line 276 of file reduce_quda.cu.

__device__ double2 quda::dotNormA_	(	const float4 &	a,
		const float4 &	b
	)

Definition at line 280 of file reduce_quda.cu.

void quda::endBlas ( void )

Definition at line 59 of file blas_quda.cu.

void quda::endReduce ( void )

Definition at line 85 of file reduce_quda.cu.

void quda::exchangeExtendedGhost	(	cudaColorSpinorField *	spinor,
		int	R[],
		int	parity,
		cudaStream_t *	stream_p
	)

Definition at line 24 of file extended_color_spinor_utilities.cu.

template<typename ValueType >

__host__ __device__ ValueType quda::exp ( ValueType x )

inline

Definition at line 85 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::exp ( const complex< ValueType > & z )

inline

Definition at line 917 of file complex_quda.h.

template<>

__host__ __device__ complex<float> quda::exp ( const complex< float > & z )

inline

Definition at line 923 of file complex_quda.h.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >

void quda::extendedCopyColorSpinor	(	InOrder &	inOrder,
		ColorSpinorField &	out,
		QudaGammaBasis	inBasis,
		const int *	E,
		const int *	X,
		const int	parity,
		const bool	extend,
		QudaFieldLocation	location,
		FloatOut *	Out,
		float *	outNorm
	)

Definition at line 323 of file extended_color_spinor_utilities.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc>

void quda::extendedCopyColorSpinor	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const int	parity,
		const QudaFieldLocation	location,
		FloatOut *	Out,
		FloatIn *	In,
		float *	outNorm,
		float *	inNorm
	)

Definition at line 359 of file extended_color_spinor_utilities.cu.

void quda::extractExtendedGaugeGhost	(	const GaugeField &	u,
		int	dim,
		const int *	R,
		void **	ghost,
		bool	extract
	)

This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.

Parameters

u	The gauge field from which we want to extract/pack the ghost zone
dim	The dimension in which we are packing/unpacking
ghost	The array where we want to pack/unpack the ghost zone into/from
extract	Whether we are extracting into ghost or injecting from ghost

Definition at line 440 of file extract_gauge_ghost_extended.cu.

void quda::extractGaugeGhost	(	const GaugeField &	u,
		void **	ghost
	)

This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.

Parameters

u	The gauge field from which we want to extract the ghost zone
ghost	The array where we want to pack the ghost zone into

Definition at line 307 of file extract_gauge_ghost.cu.

template<typename Float , int length, int nDim, typename Order >

void quda::extractGhost ( ExtractGhostArg< Order, nDim > arg )

Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Definition at line 33 of file extract_gauge_ghost.cu.

template<typename Float , int length, typename Order >

void quda::extractGhost	(	Order	order,
		const GaugeField &	u,
		QudaFieldLocation	location
	)

Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Definition at line 172 of file extract_gauge_ghost.cu.

template<typename Float >

void quda::extractGhost	(	const GaugeField &	u,
		Float **	Ghost
	)

This is the template driver for extractGhost

Definition at line 214 of file extract_gauge_ghost.cu.

template<typename Float , int length, int nDim, typename Order , bool extract>

void quda::extractGhostEx ( ExtractGhostExArg< Order, nDim > arg )

Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Definition at line 93 of file extract_gauge_ghost_extended.cu.

template<typename Float , int length, typename Order >

void quda::extractGhostEx	(	Order	order,
		const int	dim,
		const int *	surfaceCB,
		const int *	E,
		const int *	R,
		bool	extract,
		const GaugeField &	u,
		QudaFieldLocation	location
	)

Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Parameters

E	the extended gauge dimensions
R	array holding the radius of the extended region
extract	Whether we are extracting or injecting the ghost zone

Definition at line 274 of file extract_gauge_ghost_extended.cu.

template<typename Float >

void quda::extractGhostEx	(	const GaugeField &	u,
		int	dim,
		const int *	R,
		Float **	Ghost,
		bool	extract
	)

This is the template driver for extractGhost

Definition at line 329 of file extract_gauge_ghost_extended.cu.

template<typename Float , int length, int nDim, typename Order , bool extract>

__global__ void quda::extractGhostExKernel ( ExtractGhostExArg< Order, nDim > arg )

Generic GPU gauge ghost extraction and packing NB This routines is specialized to four dimensions FIXME this implementation will have two-way warp divergence Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Definition at line 140 of file extract_gauge_ghost_extended.cu.

template<typename Float , int length, int nDim, typename Order >

__global__ void quda::extractGhostKernel ( ExtractGhostArg< Order, nDim > arg )

Generic GPU gauge ghost extraction and packing NB This routines is specialized to four dimensions FIXME this implementation will have two-way warp divergence

Definition at line 78 of file extract_gauge_ghost.cu.

template<typename Float , int length, typename Arg >

__device__ __host__ void quda::extractor	(	Arg &	arg,
		int	dir,
		int	a,
		int	b,
		int	c,
		int	d,
		int	g,
		int	parity
	)

Definition at line 49 of file extract_gauge_ghost_extended.cu.

void quda::fermion_force_cuda	(	double	eps,
		double	weight1,
		double	weight2,
		void *	act_path_coeff,
		FullHw	cudaHw,
		cudaGaugeField &	cudaSiteLink,
		cudaGaugeField &	cudaMom,
		QudaGaugeParam *	param
	)

void quda::fermion_force_init_cuda ( QudaGaugeParam * param )

void quda::fillInitCGSolveParam ( SolverParam & initCGparam )

Definition at line 394 of file inv_eigcg_quda.cpp.

void quda::fillInnerSolveParam	(	SolverParam &	inner,
		const SolverParam &	outer
	)

Definition at line 28 of file inv_gcr_quda.cpp.

void quda::gamma5Cuda	(	cudaColorSpinorField *	out,
		const cudaColorSpinorField *	in
	)

Applies a gamma5 matrix to a spinor, this is the function to be called in interfaces and it requires only pointers to the output spinor (out) and the input spinor (in), in that order

Definition at line 85 of file contract.cu.

void quda::gauge_force_cuda	(	cudaGaugeField &	cudaMom,
		double	eb3,
		cudaGaugeField &	cudaSiteLink,
		QudaGaugeParam *	param,
		int ***	input_path,
		int *	length,
		double *	path_coeff,
		int	num_paths,
		int	max_length
	)

Definition at line 328 of file gauge_force_quda.cu.

void quda::gauge_force_cuda_dir	(	cudaGaugeField &	cudaMom,
		const int	dir,
		const double	eb3,
		const cudaGaugeField &	cudaSiteLink,
		const QudaGaugeParam *	param,
		int **	input_path,
		const int *	length,
		const double *	path_coeff,
		const int	num_paths,
		const int	max_length
	)

Definition at line 274 of file gauge_force_quda.cu.

void quda::gauge_force_init_cuda	(	QudaGaugeParam *	param,
		int	max_length
	)

Definition at line 112 of file gauge_force_quda.cu.

template<int oddBit, typename Float , typename Float2 , typename FloatN >

__global__ void quda::GAUGE_FORCE_KERN_NAME	(	Float2 *	momEven,
		Float2 *	momOdd,
		const int	dir,
		const double	eb3,
		const FloatN *	linkEven,
		const FloatN *	linkOdd,
		const int *	input_path,
		const int *	length,
		const double *	path_coeff,
		const int	num_paths,
		const kernel_param_t	kparam
	)

Definition at line 477 of file gauge_force_quda.cu.

int quda::genericCompare	(	const cpuColorSpinorField &	a,
		const cpuColorSpinorField &	b,
		int	tol
	)

Definition at line 118 of file color_spinor_util.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >

void quda::genericCopyColorSpinor	(	OutOrder &	outOrder,
		const InOrder &	inOrder,
		QudaGammaBasis	dstBasis,
		QudaGammaBasis	srcBasis,
		const ColorSpinorField &	out,
		QudaFieldLocation	location
	)

Decide whether we are changing basis or not

Definition at line 209 of file copy_color_spinor.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >

void quda::genericCopyColorSpinor	(	InOrder &	inOrder,
		ColorSpinorField &	out,
		QudaGammaBasis	inBasis,
		QudaFieldLocation	location,
		FloatOut *	Out,
		float *	outNorm
	)

Decide on the output order

Definition at line 268 of file copy_color_spinor.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc>

void quda::genericCopyColorSpinor	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		QudaFieldLocation	location,
		FloatOut *	Out,
		FloatIn *	In,
		float *	outNorm,
		float *	inNorm
	)

Decide on the input order

Definition at line 305 of file copy_color_spinor.cu.

void quda::genericPrintVector	(	cpuColorSpinorField &	a,
		unsigned int	x
	)

Definition at line 165 of file color_spinor_util.cu.

void quda::genericSource	(	cpuColorSpinorField &	a,
		QudaSourceType	sourceType,
		int	x,
		int	s,
		int	c
	)

Definition at line 38 of file color_spinor_util.cu.

cudaStream_t * quda::getBlasStream ( )

Definition at line 64 of file blas_quda.cu.

__device__ __host__ void quda::getCoords	(	int	x[4],
		int	cb_index,
		const int	X[4],
		int	parity
	)

inline

Definition at line 48 of file ks_force_quda.cu.

template<class T >

__device__ __host__ T quda::getDeterminant ( const Matrix< T, 3 > & a )

inline

Definition at line 385 of file quda_matrix.h.

bool quda::getDslashLaunch ( )

bool quda::getKernelPackT ( )

Returns: Whether the T dimension is kernel packed or not

Definition at line 84 of file dslash_quda.cu.

template<class Cmplx >

__device__ __host__ Cmplx quda::getPreciseInverse ( const Cmplx & z )

inline

Definition at line 276 of file quda_matrix.h.

template<class T >

__device__ __host__ T quda::getTrace ( const Matrix< T, 3 > & a )

inline

Definition at line 378 of file quda_matrix.h.

bool quda::getTwistPack ( )

Returns: Whether a kernel requires twisted pack or not

Definition at line 91 of file dslash_quda.cu.

template<typename Float >

double3 quda::HeavyQuarkResidualNorm	(	const Float *	x,
		const Float *	r,
		const int	volume,
		const int	Nint
	)

Definition at line 310 of file blas_cpu.cpp.

double3 quda::HeavyQuarkResidualNormCpu	(	cpuColorSpinorField &	x,
		cpuColorSpinorField &	r
	)

Definition at line 331 of file blas_cpu.cpp.

double3 quda::HeavyQuarkResidualNormCpu	(	cpuColorSpinorField &	x,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	r
	)

Definition at line 352 of file blas_cpu.cpp.

double3 quda::HeavyQuarkResidualNormCuda	(	cudaColorSpinorField &	x,
		cudaColorSpinorField &	r
	)

Definition at line 777 of file reduce_quda.cu.

void quda::host_free_	(	const char *	func,
		const char *	file,
		int	line,
		void *	ptr
	)

Free host memory allocated with safe_malloc(), pinned_malloc(), or mapped_malloc(). This function should only be called via the host_free() macro, defined in malloc_quda.h

Definition at line 256 of file malloc.cpp.

void quda::improvedStaggeredDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	fatGauge,
		const cudaGaugeField &	longGauge,
		const cudaColorSpinorField *	in,
		const int	parity,
		const int	dagger,
		const cudaColorSpinorField *	x,
		const double &	k,
		const int *	commDim,
		TimeProfile &	profile,
		const QudaDslashPolicy &	dslashPolicy = `QUDA_DSLASH2`
	)

Definition at line 135 of file dslash_improved_staggered.cu.

template<int N>

__device__ __host__ int quda::index	(	int	i,
		int	j
	)

inline

Definition at line 342 of file quda_matrix.h.

void quda::initBlas ( )

Definition at line 53 of file blas_quda.cu.

void quda::initReduce ( )

Definition at line 52 of file reduce_quda.cu.

template<typename Float , int length, typename Arg >

__device__ __host__ void quda::injector	(	Arg &	arg,
		int	dir,
		int	a,
		int	b,
		int	c,
		int	d,
		int	g,
		int	parity
	)

Definition at line 70 of file extract_gauge_ghost_extended.cu.

bool quda::isUnitary	(	const QudaGaugeParam &	param,
		cpuGaugeField &	field,
		double	max_error
	)

void quda::link_format_cpu_to_gpu	(	void *	dst,
		void *	src,
		int	reconstruct,
		int	Vh,
		int	pad,
		int	ghostV,
		QudaPrecision	prec,
		QudaGaugeFieldOrder	cpu_order,
		cudaStream_t	stream
	)

Definition at line 144 of file misc_helpers.cu.

void quda::link_format_gpu_to_cpu	(	void *	dst,
		void *	src,
		int	Vh,
		int	stride,
		QudaPrecision	prec,
		cudaStream_t	stream
	)

Definition at line 347 of file misc_helpers.cu.

__device__ __host__ int quda::linkIndex	(	int	x[],
		int	dx[],
		const int	X[4]
	)

inline

Definition at line 40 of file ks_force_quda.cu.

void quda::llfat_cuda	(	cudaGaugeField *	cudaFatLink,
		cudaGaugeField *	cudaLongLink,
		cudaGaugeField &	cudaSiteLink,
		cudaGaugeField &	cudaStaple,
		cudaGaugeField &	cudaStaple1,
		QudaGaugeParam *	param,
		double *	act_path_coeff
	)

Definition at line 23 of file llfat_quda_itf.cpp.

void quda::llfat_cuda_ex	(	cudaGaugeField *	cudaFatLink,
		cudaGaugeField *	cudaLongLink,
		cudaGaugeField &	cudaSiteLink,
		cudaGaugeField &	cudaStaple,
		cudaGaugeField &	cudaStaple1,
		QudaGaugeParam *	param,
		double *	act_path_coeff
	)

Definition at line 276 of file llfat_quda_itf.cpp.

void quda::llfat_init_cuda ( QudaGaugeParam * param )

void quda::llfat_init_cuda_ex ( QudaGaugeParam * param_ex )

void quda::llfatOneLinkKernel	(	cudaGaugeField &	cudaFatLink,
		cudaGaugeField &	cudaSiteLink,
		cudaGaugeField &	cudaStaple,
		cudaGaugeField &	cudaStaple1,
		QudaGaugeParam *	param,
		double *	act_path_coeff
	)

Definition at line 1187 of file llfat_quda.cu.

void quda::llfatOneLinkKernel_ex	(	cudaGaugeField &	cudaFatLink,
		cudaGaugeField &	cudaSiteLink,
		cudaGaugeField &	cudaStaple,
		cudaGaugeField &	cudaStaple1,
		QudaGaugeParam *	param,
		double *	act_path_coeff,
		llfat_kernel_param_t	kparam
	)

Definition at line 1232 of file llfat_quda.cu.

template<typename Float , int Ns, int Nc>

__device__ void quda::load_shared	(	typename mapper< Float >::type	v[Ns Nc 2],
		Float *	field,
		int	x,
		int	volume
	)

inline

Definition at line 236 of file color_spinor_field_order.h.

void quda::loadLinkToGPU	(	cudaGaugeField *	cudaGauge,
		cpuGaugeField *	cpuGauge,
		QudaGaugeParam *	param
	)

void quda::loadLinkToGPU_ex	(	cudaGaugeField *	cudaGauge,
		cpuGaugeField *	cpuGauge
	)

void quda::loadLinkToGPU_gf	(	cudaGaugeField *	cudaGauge,
		cpuGaugeField *	cpuGauge,
		QudaGaugeParam *	param
	)

template<class T >

__device__ void quda::loadLinkVariableFromArray	(	const T *const	array,
		const int	dir,
		const int	idx,
		const int	stride,
		Matrix< T, 3 > *	link
	)

inline

Definition at line 767 of file quda_matrix.h.

__device__ void quda::loadLinkVariableFromArray	(	const float2 *const	array,
		const int	dir,
		const int	idx,
		const int	stride,
		Matrix< double2, 3 > *	link
	)

inline

Definition at line 787 of file quda_matrix.h.

template<class T , int N>

__device__ void quda::loadMatrixFromArray	(	const T *const	array,
		const int	idx,
		const int	stride,
		Matrix< T, N > *	mat
	)

inline

Definition at line 778 of file quda_matrix.h.

template<class T >

__device__ void quda::loadMomentumFromArray	(	const T *const	array,
		const int	dir,
		const int	idx,
		const int	stride,
		Matrix< T, 3 > *	mom
	)

inline

Definition at line 857 of file quda_matrix.h.

void quda::loadTuneCache ( QudaVerbosity verbosity )

Definition at line 131 of file tune.cpp.

template<typename ValueType >

__host__ __device__ ValueType quda::log ( ValueType x )

inline

Definition at line 90 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::log ( const complex< ValueType > & z )

inline

Definition at line 929 of file complex_quda.h.

template<>

__host__ __device__ complex<float> quda::log ( const complex< float > & z )

inline

Definition at line 935 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ ValueType quda::log10 ( ValueType x )

inline

Definition at line 95 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::log10 ( const complex< ValueType > & z )

inline

Definition at line 942 of file complex_quda.h.

__forceinline__ __host__ __device__ float2 quda::make_FloatN ( const double2 & a )

Definition at line 201 of file float_vector.h.

__forceinline__ __host__ __device__ float4 quda::make_FloatN ( const double4 & a )

Definition at line 205 of file float_vector.h.

__forceinline__ __host__ __device__ double2 quda::make_FloatN ( const float2 & a )

Definition at line 209 of file float_vector.h.

__forceinline__ __host__ __device__ double4 quda::make_FloatN ( const float4 & a )

Definition at line 213 of file float_vector.h.

__forceinline__ __host__ __device__ short4 quda::make_shortN ( const float4 & a )

Definition at line 217 of file float_vector.h.

__forceinline__ __host__ __device__ short2 quda::make_shortN ( const float2 & a )

Definition at line 221 of file float_vector.h.

__forceinline__ __host__ __device__ short4 quda::make_shortN ( const double4 & a )

Definition at line 225 of file float_vector.h.

__forceinline__ __host__ __device__ short2 quda::make_shortN ( const double2 & a )

Definition at line 229 of file float_vector.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::makeComplex	(	const typename RealTypeId< Cmplx >::Type &	a,
		const typename RealTypeId< Cmplx >::Type &	b
	)

inline

Definition at line 125 of file quda_matrix.h.

__device__ __host__ double2 quda::makeComplex	(	const double &	a,
		const double &	b
	)

inline

Definition at line 134 of file quda_matrix.h.

__device__ __host__ float2 quda::makeComplex	(	const float &	a,
		const float &	b
	)

inline

Definition at line 139 of file quda_matrix.h.

void * quda::mapped_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Allocate page-locked ("pinned") host memory, and map it into the GPU address space. This function should only be called via the mapped_malloc() macro, defined in malloc_quda.h

Definition at line 212 of file malloc.cpp.

void quda::massRescale	(	cudaColorSpinorField &	b,
		QudaInvertParam &	param
	)

Definition at line 1245 of file interface_quda.cpp.

__forceinline__ __host__ __device__ float quda::max_fabs ( const float4 & c )

Definition at line 177 of file float_vector.h.

__forceinline__ __host__ __device__ float quda::max_fabs ( const float2 & b )

Definition at line 183 of file float_vector.h.

__forceinline__ __host__ __device__ double quda::max_fabs ( const double4 & c )

Definition at line 187 of file float_vector.h.

__forceinline__ __host__ __device__ double quda::max_fabs ( const double2 & b )

Definition at line 193 of file float_vector.h.

template<typename Float , int Nc, typename Order >

double quda::maxGauge	(	const Order	order,
		int	volume,
		int	nDim
	)

Generic CPU function find the gauge maximum

Definition at line 9 of file max_gauge.cu.

double quda::maxGauge ( const GaugeField & u )

This function is used to calculate the maximum absolute value of a gauge field array. Defined in max_gauge.cu.

Parameters

u	The gauge field from which we want to compute the max

Definition at line 29 of file max_gauge.cu.

void quda::MDWFDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const cudaColorSpinorField *	in,
		const int	parity,
		const int	dagger,
		const cudaColorSpinorField *	x,
		const double &	m_f,
		const double &	k,
		const int *	commDim,
		const int	DS_type,
		TimeProfile &	profile,
		const QudaDslashPolicy &	dslashPolicy = `QUDA_DSLASH2`
	)

Definition at line 247 of file dslash_mobius.cu.

void quda::mxpyCpu	(	const cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 51 of file blas_cpu.cpp.

void quda::mxpyCuda	(	cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 154 of file blas_quda.cu.

void quda::ndegTwistedMassDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const cudaColorSpinorField *	in,
		const int	parity,
		const int	dagger,
		const cudaColorSpinorField *	x,
		const QudaTwistDslashType	type,
		const double &	kappa,
		const double &	mu,
		const double &	epsilon,
		const double &	k,
		const int *	commDim,
		TimeProfile &	profile,
		const QudaDslashPolicy &	dslashPolicy = `QUDA_DSLASH`
	)

Definition at line 127 of file dslash_ndeg_twisted_mass.cu.

template<IndexType idxType, typename Int >

__device__ __forceinline__ int quda::neighborIndex	(	const unsigned int &	cb_idx,
		const int(&)	shift[4],
		const bool(&)	partitioned[4],
		const unsigned int &	parity
	)

Definition at line 41 of file shift_quark_field.cu.

template<typename ValueType >

__host__ __device__ ValueType quda::norm ( const complex< ValueType > & z )

inline

Returns the magnitude of z squared.

Definition at line 859 of file complex_quda.h.

template<typename Float >

double quda::norm	(	const Float *	a,
		const int	N
	)

Definition at line 160 of file blas_cpu.cpp.

double quda::norm2 ( const ColorSpinorField & a )

Definition at line 486 of file color_spinor_field.cpp.

double quda::norm2 ( const cudaGaugeField & u )

This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L2 norm.

Parameters

u	The gauge field that we want the norm of

Returns: The L2 norm squared of the gauge field

Definition at line 494 of file cuda_gauge_field.cu.

__device__ double quda::norm2_ ( const double2 & a )

Return the L2 norm of x

Definition at line 129 of file reduce_quda.cu.

__device__ float quda::norm2_ ( const float2 & a )

Definition at line 130 of file reduce_quda.cu.

__device__ float quda::norm2_ ( const float4 & a )

Definition at line 131 of file reduce_quda.cu.

double quda::normCpu ( const cpuColorSpinorField & b )

Definition at line 166 of file blas_cpu.cpp.

double quda::normCuda ( const cudaColorSpinorField & b )

Definition at line 145 of file reduce_quda.cu.

template<typename ValueType >

__host__ __device__ bool quda::operator!=	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 802 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ bool quda::operator!=	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 808 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ bool quda::operator!=	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 814 of file complex_quda.h.

__host__ __device__ float4 quda::operator*	(	const float	a,
		const float4	x
	)

inline

Definition at line 35 of file float_vector.h.

__host__ __device__ float2 quda::operator*	(	const float	a,
		const float2	x
	)

inline

Definition at line 44 of file float_vector.h.

__host__ __device__ double2 quda::operator*	(	const double	a,
		const double2	x
	)

inline

Definition at line 51 of file float_vector.h.

__host__ __device__ double4 quda::operator*	(	const double	a,
		const double4	x
	)

inline

Definition at line 58 of file float_vector.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator*	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 692 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator*	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 701 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator*	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 708 of file complex_quda.h.

__device__ __host__ double2 quda::operator*	(	const double2 &	a,
		const double &	scalar
	)

inline

Definition at line 193 of file quda_matrix.h.

__device__ __host__ float2 quda::operator*	(	const float2 &	a,
		const float &	scalar
	)

inline

Definition at line 198 of file quda_matrix.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator*	(	const Cmplx &	a,
		const Cmplx &	b
	)

inline

Definition at line 245 of file quda_matrix.h.

template<class T , int N, class S >

__device__ __host__ Matrix<T,N> quda::operator*	(	const S &	scalar,
		const Matrix< T, N > &	a
	)

inline

Definition at line 439 of file quda_matrix.h.

template<class T , int N, class S >

__device__ __host__ Matrix<T,N> quda::operator*	(	const Matrix< T, N > &	a,
		const S &	scalar
	)

inline

Definition at line 449 of file quda_matrix.h.

template<class T >

__device__ __host__ Matrix<T,3> quda::operator*	(	const Matrix< T, 3 > &	a,
		const Matrix< T, 3 > &	b
	)

inline

Definition at line 472 of file quda_matrix.h.

template<class T , class U >

__device__ __host__ Matrix<typename PromoteTypeId<T,U>::Type,3> quda::operator*	(	const Matrix< T, 3 > &	a,
		const Matrix< U, 3 > &	b
	)

inline

Definition at line 508 of file quda_matrix.h.

template<class T >

__device__ __host__ Matrix<T,2> quda::operator*	(	const Matrix< T, 2 > &	a,
		const Matrix< T, 2 > &	b
	)

inline

Definition at line 527 of file quda_matrix.h.

__host__ __device__ float2 quda::operator*=	(	float2 &	x,
		const float	a
	)

inline

Definition at line 130 of file float_vector.h.

__host__ __device__ double2 quda::operator*=	(	double2 &	x,
		const float	a
	)

inline

Definition at line 136 of file float_vector.h.

__host__ __device__ float4 quda::operator*=	(	float4 &	a,
		const float &	b
	)

inline

Definition at line 142 of file float_vector.h.

__host__ __device__ double2 quda::operator*=	(	double2 &	a,
		const double &	b
	)

inline

Definition at line 150 of file float_vector.h.

__host__ __device__ double4 quda::operator*=	(	double4 &	a,
		const double &	b
	)

inline

Definition at line 156 of file float_vector.h.

template<class T , int N, class S >

__device__ __host__ Matrix<T,N> quda::operator*=	(	Matrix< T, N > &	a,
		const S &	scalar
	)

inline

Definition at line 454 of file quda_matrix.h.

template<class T , int N>

__device__ __host__ Matrix<T,N> quda::operator*=	(	Matrix< T, N > &	a,
		const Matrix< T, N > &	b
	)

inline

Definition at line 491 of file quda_matrix.h.

__host__ __device__ double2 quda::operator+	(	const double2 &	x,
		const double2 &	y
	)

inline

Definition at line 13 of file float_vector.h.

__host__ __device__ double3 quda::operator+	(	const double3 &	x,
		const double3 &	y
	)

inline

Definition at line 29 of file float_vector.h.

__host__ __device__ float2 quda::operator+	(	const float2	x,
		const float2	y
	)

inline

Definition at line 67 of file float_vector.h.

__host__ __device__ float4 quda::operator+	(	const float4	x,
		const float4	y
	)

inline

Definition at line 74 of file float_vector.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator+	(	const Cmplx &	a,
		const Cmplx &	b
	)

inline

Definition at line 164 of file quda_matrix.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator+	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 644 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator+	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 660 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator+	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 666 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator+ ( const complex< ValueType > & rhs )

inline

Definition at line 765 of file complex_quda.h.

template<class Cmplx , class Float >

__device__ __host__ Cmplx quda::operator+	(	const Cmplx &	a,
		const Float &	scalar
	)

inline

Definition at line 204 of file quda_matrix.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator+	(	const typename RealTypeId< Cmplx >::Type &	scalar,
		const Cmplx &	a
	)

inline

Definition at line 227 of file quda_matrix.h.

template<class T , int N>

__device__ __host__ Matrix<T,N> quda::operator+	(	const Matrix< T, N > &	a,
		const Matrix< T, N > &	b
	)

inline

Definition at line 396 of file quda_matrix.h.

template<typename ValueType >

__host__ __device__ complex<ValueType> quda::operator+	(	const volatile complex< ValueType > &	lhs,
		const volatile complex< ValueType > &	rhs
	)

inline

Definition at line 652 of file complex_quda.h.

__host__ __device__ float4 quda::operator+=	(	float4 &	x,
		const float4	y
	)

inline

Definition at line 83 of file float_vector.h.

__host__ __device__ float2 quda::operator+=	(	float2 &	x,
		const float2	y
	)

inline

Definition at line 91 of file float_vector.h.

__host__ __device__ double2 quda::operator+=	(	double2 &	x,
		const double2	y
	)

inline

Definition at line 97 of file float_vector.h.

__host__ __device__ double3 quda::operator+=	(	double3 &	x,
		const double3	y
	)

inline

Definition at line 103 of file float_vector.h.

template<class Cmplx >

__device__ __host__ Cmplx& quda::operator+=	(	Cmplx &	a,
		const Cmplx &	b
	)

inline

Definition at line 149 of file quda_matrix.h.

template<class T , int N>

__device__ __host__ Matrix<T,N> quda::operator+=	(	Matrix< T, N > &	a,
		const Matrix< T, N > &	b
	)

inline

Definition at line 407 of file quda_matrix.h.

__host__ __device__ double2 quda::operator-	(	const double2 &	x,
		const double2 &	y
	)

inline

Definition at line 17 of file float_vector.h.

__host__ __device__ float2 quda::operator-	(	const float2 &	x,
		const float2 &	y
	)

inline

Definition at line 21 of file float_vector.h.

__host__ __device__ float4 quda::operator-	(	const float4 &	x,
		const float4 &	y
	)

inline

Definition at line 25 of file float_vector.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator- ( const Cmplx & a )

inline

Definition at line 144 of file quda_matrix.h.

__host__ __device__ float2 quda::operator- ( const float2 & x )

inline

Definition at line 164 of file float_vector.h.

__host__ __device__ double2 quda::operator- ( const double2 & x )

inline

Definition at line 168 of file float_vector.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator-	(	const Cmplx &	a,
		const Cmplx &	b
	)

inline

Definition at line 169 of file quda_matrix.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator-	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 673 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator-	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 679 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator-	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 685 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator- ( const complex< ValueType > & rhs )

inline

Definition at line 770 of file complex_quda.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator-	(	const Cmplx &	a,
		const typename RealTypeId< Cmplx >::Type &	scalar
	)

inline

Definition at line 233 of file quda_matrix.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator-	(	const typename RealTypeId< Cmplx >::Type &	scalar,
		const Cmplx &	a
	)

inline

Definition at line 239 of file quda_matrix.h.

template<class T , int N>

__device__ __host__ Matrix<T,N> quda::operator-	(	const Matrix< T, N > &	a,
		const Matrix< T, N > &	b
	)

inline

Definition at line 427 of file quda_matrix.h.

template<class T , int N>

__device__ __host__ Matrix<T,N> quda::operator- ( const Matrix< T, N > & a )

inline

Definition at line 460 of file quda_matrix.h.

__host__ __device__ float4 quda::operator-=	(	float4 &	x,
		const float4	y
	)

inline

Definition at line 110 of file float_vector.h.

__host__ __device__ float2 quda::operator-=	(	float2 &	x,
		const float2	y
	)

inline

Definition at line 118 of file float_vector.h.

__host__ __device__ double2 quda::operator-=	(	double2 &	x,
		const double2	y
	)

inline

Definition at line 124 of file float_vector.h.

template<class Cmplx >

__device__ __host__ Cmplx& quda::operator-=	(	Cmplx &	a,
		const Cmplx &	b
	)

inline

Definition at line 156 of file quda_matrix.h.

template<class T , int N>

__device__ __host__ Matrix<T,N> quda::operator-=	(	Matrix< T, N > &	a,
		const Matrix< T, N > &	b
	)

inline

Definition at line 417 of file quda_matrix.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator/	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 716 of file complex_quda.h.

template<>

__host__ __device__ complex< float > quda::operator/	(	const complex< float > &	lhs,
		const complex< float > &	rhs
	)

inline

Definition at line 725 of file complex_quda.h.

template<>

__host__ __device__ complex< double > quda::operator/	(	const complex< double > &	lhs,
		const complex< double > &	rhs
	)

inline

Definition at line 731 of file complex_quda.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator/	(	const Cmplx &	a,
		const typename RealTypeId< Cmplx >::Type &	scalar
	)

inline

Definition at line 221 of file quda_matrix.h.

template<typename ValueType >

__host__ __device__ complex<ValueType> quda::operator/	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 737 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex<ValueType> quda::operator/	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 744 of file complex_quda.h.

template<>

__host__ __device__ complex<float> quda::operator/	(	const float &	lhs,
		const complex< float > &	rhs
	)

inline

Definition at line 752 of file complex_quda.h.

template<>

__host__ __device__ complex<double> quda::operator/	(	const double &	lhs,
		const complex< double > &	rhs
	)

inline

Definition at line 757 of file complex_quda.h.

std::ostream & quda::operator<<	(	std::ostream &	output,
		const CloverFieldParam &	param
	)

Definition at line 273 of file clover_field.cpp.

std::ostream & quda::operator<<	(	std::ostream &	output,
		const LatticeFieldParam &	param
	)

Definition at line 145 of file lattice_field.cpp.

std::ostream & quda::operator<<	(	std::ostream &	output,
		const GaugeFieldParam &	param
	)

Definition at line 122 of file gauge_field.cpp.

template<typename ValueType , class charT , class traits >

std::basic_ostream< charT, traits > & quda::operator<<	(	std::basic_ostream< charT, traits > &	os,
		const complex< ValueType > &	z
	)

Definition at line 295 of file complex_quda.h.

std::ostream& quda::operator<<	(	std::ostream &	os,
		const float2 &	z
	)

inline

Definition at line 285 of file quda_matrix.h.

std::ostream& quda::operator<<	(	std::ostream &	os,
		const double2 &	z
	)

inline

Definition at line 290 of file quda_matrix.h.

std::ostream& quda::operator<<	(	std::ostream &	out,
		const ColorSpinorField &	a
	)

Definition at line 500 of file color_spinor_field.cpp.

template<class T , int N>

std::ostream& quda::operator<<	(	std::ostream &	os,
		const Matrix< T, N > &	m
	)

Definition at line 745 of file quda_matrix.h.

template<class T , int N>

std::ostream& quda::operator<<	(	std::ostream &	os,
		const Array< T, N > &	a
	)

Definition at line 757 of file quda_matrix.h.

std::ostream& quda::operator<<	(	std::ostream &	out,
		const cudaColorSpinorField &	a
	)

Definition at line 1368 of file cuda_color_spinor_field.cu.

template<typename ValueType >

__host__ __device__ bool quda::operator==	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 777 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ bool quda::operator==	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 785 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ bool quda::operator==	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 793 of file complex_quda.h.

template<typename ValueType , typename charT , class traits >

std::basic_istream< charT, traits > & quda::operator>>	(	std::basic_istream< charT, traits > &	is,
		complex< ValueType > &	z
	)

Definition at line 303 of file complex_quda.h.

void quda::orthoDir	(	Complex **	beta,
		cudaColorSpinorField *	Ap[],
		int	k
	)

Definition at line 48 of file inv_gcr_quda.cpp.

template<class T , int N>

__device__ __host__ void quda::outerProd	(	const Array< T, N > &	a,
		const Array< T, N > &	b,
		Matrix< T, N > *	m
	)

inline

Definition at line 720 of file quda_matrix.h.

template<class T , int N>

__device__ __host__ void quda::outerProd	(	const T(&)	a[N],
		const T(&)	b[N],
		Matrix< T, N > *	m
	)

inline

Definition at line 732 of file quda_matrix.h.

void quda::pack_gauge_diag	(	void *	buf,
		int *	X,
		void **	sitelink,
		int	nu,
		int	mu,
		int	dir1,
		int	dir2,
		QudaPrecision	prec
	)

void quda::pack_ghost_all_links	(	void **	cpuLink,
		void **	cpuGhostBack,
		void **	cpuGhostFwd,
		int	dir,
		int	nFace,
		QudaPrecision	precision,
		int *	X
	)

void quda::pack_ghost_all_staples_cpu	(	void *	staple,
		void **	cpuGhostStapleBack,
		void **	cpuGhostStapleFwd,
		int	nFace,
		QudaPrecision	precision,
		int *	X
	)

void quda::packFace	(	void *	ghost_buf,
		cudaColorSpinorField &	in,
		const int	nFace,
		const int	dagger,
		const int	parity,
		const int	dim,
		const int	face_num,
		const cudaStream_t &	stream,
		const double	a = `0.0`,
		const double	b = `0.0`
	)

void quda::packFace	(	void *	ghost_buf,
		cudaColorSpinorField &	in,
		FullClover &	clov,
		FullClover &	clovInv,
		const int	nFace,
		const int	dagger,
		const int	parity,
		const int	dim,
		const int	face_num,
		const cudaStream_t &	stream,
		const double	a = `0.0`
	)

void quda::packFaceExtended	(	void *	ghost_buf,
		cudaColorSpinorField &	field,
		const int	nFace,
		const int	R[],
		const int	dagger,
		const int	parity,
		const int	dim,
		const int	face_num,
		const cudaStream_t &	stream,
		const bool	unpack = `false`
	)

void quda::packGhostStaple	(	int *	X,
		void *	even,
		void *	odd,
		int	volume,
		QudaPrecision	prec,
		int	stride,
		int	dir,
		int	whichway,
		void **	fwd_nbr_buf_gpu,
		void **	back_nbr_buf_gpu,
		void **	fwd_nbr_buf,
		void **	back_nbr_buf,
		cudaStream_t *	stream
	)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >

void quda::packSpinor	(	OutOrder &	outOrder,
		const InOrder &	inOrder,
		Basis	basis,
		int	volume
	)

CPU function to reorder spinor fields.

Definition at line 127 of file copy_color_spinor.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >

__global__ void quda::packSpinorKernel	(	OutOrder	outOrder,
		const InOrder	inOrder,
		Basis	basis,
		int	volume
	)

CUDA kernel to reorder spinor fields. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 141 of file copy_color_spinor.cu.

void * quda::pinned_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Allocate page-locked ("pinned") host memory. This function should only be called via the pinned_malloc() macro, defined in malloc_quda.h

Note that we do rely on cudaHostAlloc(), since buffers allocated in this way have been observed to cause problems when shared with MPI via GPU Direct on some systems.

Definition at line 192 of file malloc.cpp.

double quda::plaquette	(	const GaugeField &	data,
		QudaFieldLocation	location
	)

Definition at line 242 of file gauge_plaq.cu.

template<class T >

void quda::point	(	T &	t,
		int	x,
		int	s,
		int	c
	)

Definition at line 36 of file color_spinor_util.cu.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::polar	(	const ValueType &	m,
		const ValueType &	theta = `0`
	)

inline

Returns the complex with magnitude m and angle theta in radians.

Definition at line 865 of file complex_quda.h.

template<>

__host__ __device__ complex<float> quda::polar	(	const float &	magnitude,
		const float &	angle
	)

inline

Definition at line 871 of file complex_quda.h.

template<>

__host__ __device__ complex<double> quda::polar	(	const double &	magnitude,
		const double &	angle
	)

inline

Definition at line 877 of file complex_quda.h.

template<typename ValueType , typename ExponentType >

__host__ __device__ ValueType quda::pow	(	ValueType	x,
		ExponentType	e
	)

inline

Definition at line 100 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::pow	(	const complex< ValueType > &	z,
		const int &	n
	)

inline

Definition at line 975 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::pow	(	const complex< ValueType > &	z,
		const ValueType &	x
	)

inline

Definition at line 951 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::pow	(	const complex< ValueType > &	z,
		const complex< ValueType > &	z2
	)

inline

Definition at line 957 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::pow	(	const ValueType &	x,
		const complex< ValueType > &	z
	)

inline

Definition at line 963 of file complex_quda.h.

template<>

__host__ __device__ complex<float> quda::pow	(	const float &	x,
		const complex< float > &	exponent
	)

inline

Definition at line 969 of file complex_quda.h.

void quda::print	(	const double	d[],
		int	n
	)

Definition at line 47 of file inv_mpcg_quda.cpp.

template<class Order >

void quda::print_vector	(	const Order &	o,
		unsigned int	x
	)

Definition at line 150 of file color_spinor_util.cu.

void quda::printLaunchTimer ( )

Definition at line 437 of file tune.cpp.

template<class Cmplx >

__host__ __device__ void quda::printLink ( const Matrix< Cmplx, 3 > & link )

inline

Definition at line 1012 of file quda_matrix.h.

void quda::printPeakMemUsage ( )

Definition at line 286 of file malloc.cpp.

template<class T >

void quda::random ( T & t )

Definition at line 22 of file color_spinor_util.cu.

template<typename Float >

double quda::reDotProduct	(	const Float *	a,
		const Float *	b,
		const int	N
	)

Definition at line 185 of file blas_cpu.cpp.

double quda::reDotProductCpu	(	const cpuColorSpinorField &	a,
		const cpuColorSpinorField &	b
	)

Definition at line 191 of file blas_cpu.cpp.

double quda::reDotProductCuda	(	cudaColorSpinorField &	a,
		cudaColorSpinorField &	b
	)

Definition at line 170 of file reduce_quda.cu.

void quda::reDotProductCuda	(	double *	result,
		std::vector< cudaColorSpinorField * > &	a,
		std::vector< cudaColorSpinorField * > &	b
	)

Definition at line 176 of file reduce_quda.cu.

double2 quda::reDotProductNormACuda	(	cudaColorSpinorField &	a,
		cudaColorSpinorField &	b
	)

Definition at line 297 of file reduce_quda.cu.

int quda::reliable	(	double &	rNorm,
		double &	maxrx,
		double &	maxrr,
		const double &	r2,
		const double &	delta
	)

Definition at line 47 of file inv_bicgstab_quda.cpp.

double quda::resNorm	(	const DiracMatrix &	mat,
		cudaColorSpinorField &	b,
		cudaColorSpinorField &	x
	)

Definition at line 20 of file inv_bicgstab_quda.cpp.

void * quda::safe_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Perform a standard malloc() with error-checking. This function should only be called via the safe_malloc() macro, defined in malloc_quda.h

Definition at line 168 of file malloc.cpp.

template<typename Float , int Ns, int Nc>

__device__ void quda::save_shared	(	Float *	field,
		const typename mapper< Float >::type	v[Ns Nc 2],
		int	x,
		int	volumeCB
	)

inline

Definition at line 271 of file color_spinor_field_order.h.

void quda::saveTuneCache ( QudaVerbosity verbosity )

Write tunecache to disk.

Definition at line 205 of file tune.cpp.

void quda::setBlasParam	(	int	kernel,
		int	prec,
		int	threads,
		int	blocks
	)

void quda::setDiracParam	(	DiracParam &	diracParam,
		QudaInvertParam *	inv_param,
		bool	pc
	)

Definition at line 1102 of file interface_quda.cpp.

void quda::setDiracPreParam	(	DiracParam &	diracParam,
		QudaInvertParam *	inv_param,
		const bool	pc
	)

Definition at line 1199 of file interface_quda.cpp.

void quda::setDiracSloppyParam	(	DiracParam &	diracParam,
		QudaInvertParam *	inv_param,
		bool	pc
	)

Definition at line 1182 of file interface_quda.cpp.

void quda::setGhostSpinor ( bool value )

Definition at line 42 of file color_spinor_field.cpp.

template<class T , int N>

__device__ __host__ void quda::setIdentity ( Matrix< T, N > * m )

inline

Definition at line 597 of file quda_matrix.h.

template<int N>

__device__ __host__ void quda::setIdentity ( Matrix< float2, N > * m )

inline

Definition at line 611 of file quda_matrix.h.

template<int N>

__device__ __host__ void quda::setIdentity ( Matrix< double2, N > * m )

inline

Definition at line 625 of file quda_matrix.h.

void quda::setKernelPackT ( bool pack )

Parameters

pack	Sets whether to use a kernel to pack the T dimension

Definition at line 82 of file dslash_quda.cu.

void quda::setPackComms ( const int * commDim )

Sets commDim array used in dslash_pack.cu

Definition at line 39 of file dslash_pack.cu.

void quda::setTwistPack ( bool pack )

Parameters

pack	Sets whether to use a kernel to pack twisted spinor

Definition at line 90 of file dslash_quda.cu.

void quda::setUnitarizeLinksConstants	(	double	unitarize_eps,
		double	max_error,
		bool	allow_svd,
		bool	svd_only,
		double	svd_rel_error,
		double	svd_abs_error,
		bool	check_unitarization = `true`
	)

void quda::setUnitarizeLinksPadding	(	int	input_padding,
		int	output_padding
	)

template<class T , int N>

__device__ __host__ void quda::setZero ( Matrix< T, N > * m )

inline

Definition at line 640 of file quda_matrix.h.

template<int N>

__device__ __host__ void quda::setZero ( Matrix< float2, N > * m )

inline

Definition at line 653 of file quda_matrix.h.

template<int N>

__device__ __host__ void quda::setZero ( Matrix< double2, N > * m )

inline

Definition at line 666 of file quda_matrix.h.

void quda::shiftColorSpinorField	(	cudaColorSpinorField &	dst,
		const cudaColorSpinorField &	src,
		const unsigned int	parity,
		const unsigned int	dim,
		const int	shift
	)

Definition at line 210 of file shift_quark_field.cu.

template<typename FloatN , int N, typename Output , typename Input >

__global__ void quda::shiftColorSpinorFieldExternalKernel ( ShiftQuarkArg< Output, Input > arg )

Definition at line 93 of file shift_quark_field.cu.

template<typename FloatN , int N, typename Output , typename Input >

__global__ void quda::shiftColorSpinorFieldKernel ( ShiftQuarkArg< Output, Input > arg )

Definition at line 68 of file shift_quark_field.cu.

template<typename ValueType >

__host__ __device__ ValueType quda::sin ( ValueType x )

inline

Definition at line 40 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::sin ( const complex< ValueType > & z )

inline

Definition at line 981 of file complex_quda.h.

template<>

__host__ __device__ complex<float> quda::sin ( const complex< float > & z )

inline

Definition at line 989 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ ValueType quda::sinh ( ValueType x )

inline

Definition at line 75 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::sinh ( const complex< ValueType > & z )

inline

Definition at line 997 of file complex_quda.h.

template<>

__host__ __device__ complex<float> quda::sinh ( const complex< float > & z )

inline

Definition at line 1005 of file complex_quda.h.

void quda::siteComputeGenStapleParityKernel	(	void *	staple_even,
		void *	staple_odd,
		const void *	sitelink_even,
		const void *	sitelink_odd,
		void *	fatlink_even,
		void *	fatlink_odd,
		int	mu,
		int	nu,
		double	mycoeff,
		QudaReconstructType	recon,
		QudaPrecision	prec,
		dim3	halfGridDim,
		llfat_kernel_param_t	kparam,
		cudaStream_t *	stream
	)

void quda::siteComputeGenStapleParityKernel_ex	(	void *	staple_even,
		void *	staple_odd,
		const void *	sitelink_even,
		const void *	sitelink_odd,
		void *	fatlink_even,
		void *	fatlink_odd,
		int	mu,
		int	nu,
		double	mycoeff,
		QudaReconstructType	recon,
		QudaPrecision	prec,
		llfat_kernel_param_t	kparam
	)

template<typename ValueType >

__host__ __device__ ValueType quda::sqrt ( ValueType x )

inline

Definition at line 105 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::sqrt ( const complex< ValueType > & z )

inline

Definition at line 1013 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex<float> quda::sqrt ( const complex< float > & z )

inline

Definition at line 1019 of file complex_quda.h.

void quda::staggeredDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const cudaColorSpinorField *	in,
		const int	parity,
		const int	dagger,
		const cudaColorSpinorField *	x,
		const double &	k,
		const int *	commDim,
		TimeProfile &	profile,
		const QudaDslashPolicy &	dslashPolicy = `QUDA_DSLASH2`
	)

Definition at line 119 of file dslash_staggered.cu.

void quda::storeLinkToCPU	(	cpuGaugeField *	cpuGauge,
		cudaGaugeField *	cudaGauge,
		QudaGaugeParam *	param
	)

template<typename ValueType >

__host__ __device__ ValueType quda::tan ( ValueType x )

inline

Definition at line 45 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::tan ( const complex< ValueType > & z )

inline

Definition at line 1025 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ ValueType quda::tanh ( ValueType x )

inline

Definition at line 80 of file complex_quda.h.

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::tanh ( const complex< ValueType > & z )

inline

Definition at line 1031 of file complex_quda.h.

template<typename Float >

__device__ __host__ Float quda::timeBoundary	(	int	idx,
		const int	X[QUDA_MAX_DIM],
		QudaTboundary	tBoundary,
		bool	isFirstTimeSlice,
		bool	isLastTimeSlice
	)

inline

Definition at line 92 of file gauge_field_order.h.

template<typename Float >

__device__ __host__ Float quda::timeBoundary	(	int	idx,
		const int	X[QUDA_MAX_DIM],
		const int	R[QUDA_MAX_DIM],
		QudaTboundary	tBoundary,
		bool	isFirstTimeSlice,
		bool	isLastTimeSlice,
		QudaGhostExchange	ghostExchange
	)

inline

timeBoundary variant for extended gauge field

Parameters

idx	extended field linear index
X	the gauge field dimensions
R	the radii dimenions of the extended region
tBoundary	the boundary condition
isFirstTimeSlice	if we're on the first time slice of nodes
isLastTimeSlide	if we're on the last time slice of nodes
ghostExchange	if the field is extended or not (determines indexing type)

Definition at line 107 of file gauge_field_order.h.

double quda::timeInterval	(	struct timeval	start,
		struct timeval	end
	)

Definition at line 21 of file inv_gcr_quda.cpp.

double3 quda::tripleCGReductionCuda	(	cudaColorSpinorField &	x,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z
	)

Definition at line 811 of file reduce_quda.cu.

void quda::tripleCGUpdateCuda	(	const double &	alpha,
		const double &	beta,
		cudaColorSpinorField &	q,
		cudaColorSpinorField &	r,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	p
	)

Definition at line 480 of file blas_quda.cu.

TuneParam & quda::tuneLaunch	(	Tunable &	tunable,
		QudaTune	enabled,
		QudaVerbosity	verbosity
	)

Return the optimal launch parameters for a given kernel, either by retrieving them from tunecache or autotuning on the spot.

Definition at line 271 of file tune.cpp.

void quda::twistCloverGamma5Cuda	(	cudaColorSpinorField *	out,
		const cudaColorSpinorField *	in,
		const int	dagger,
		const double &	kappa,
		const double &	mu,
		const double &	epsilon,
		const QudaTwistGamma5Type	twist,
		const FullClover *	clov,
		const FullClover *	clovInv,
		const int	parity
	)

Definition at line 495 of file dslash_quda.cu.

void quda::twistedCloverDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const FullClover *	clover,
		const FullClover *	cloverInv,
		const cudaColorSpinorField *	in,
		const int	parity,
		const int	dagger,
		const cudaColorSpinorField *	x,
		const QudaTwistCloverDslashType	type,
		const double &	kappa,
		const double &	mu,
		const double &	epsilon,
		const double &	k,
		const int *	commDim,
		TimeProfile &	profile,
		const QudaDslashPolicy &	dslashPolicy = `QUDA_DSLASH2`
	)

Definition at line 155 of file dslash_twisted_clover.cu.

void quda::twistedMassDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const cudaColorSpinorField *	in,
		const int	parity,
		const int	dagger,
		const cudaColorSpinorField *	x,
		const QudaTwistDslashType	type,
		const double &	kappa,
		const double &	mu,
		const double &	epsilon,
		const double &	k,
		const int *	commDim,
		TimeProfile &	profile,
		const QudaDslashPolicy &	dslashPolicy = `QUDA_DSLASH2`
	)

Definition at line 151 of file dslash_twisted_mass.cu.

void quda::twistGamma5Cuda	(	cudaColorSpinorField *	out,
		const cudaColorSpinorField *	in,
		const int	dagger,
		const double &	kappa,
		const double &	mu,
		const double &	epsilon,
		const QudaTwistGamma5Type	twist
	)

ndeg tm:

Definition at line 356 of file dslash_quda.cu.

void quda::unitarizeLinksCPU	(	const QudaGaugeParam &	param,
		cpuGaugeField &	infield,
		cpuGaugeField *	outfield
	)

void quda::unitarizeLinksCuda	(	const QudaGaugeParam &	param,
		cudaGaugeField &	infield,
		cudaGaugeField *	outfield,
		int *	num_failures
	)

void quda::unpackGhostStaple	(	int *	X,
		void *	_even,
		void *	_odd,
		int	volume,
		QudaPrecision	prec,
		int	stride,
		int	dir,
		int	whichway,
		void **	fwd_nbr_buf,
		void **	back_nbr_buf,
		cudaStream_t *	stream
	)

void quda::updateAlphaZeta	(	double *	alpha,
		double *	zeta,
		double *	zeta_old,
		const double *	r2,
		const double *	beta,
		const double	pAp,
		const double *	offset,
		const int	nShift,
		const int	j_low
	)

Compute the new values of alpha and zeta

Definition at line 38 of file inv_multi_cg_quda.cpp.

void quda::updateGaugeField	(	GaugeField &	out,
		double	dt,
		const GaugeField &	in,
		const GaugeField &	mom,
		bool	conj_mom,
		bool	exact
	)

Evolve the gauge field by step size dt using the momentuim field

Parameters

out	Updated gauge field
dt	Step size
in	Input gauge field
mom	Momentum field
conj_mom	Whether we conjugate the momentum in the exponential
exact	Calculate exact exponential or use an expansion

Definition at line 348 of file gauge_update_quda.cu.

void quda::updateSolution	(	cudaColorSpinorField &	x,
		const Complex *	alpha,
		Complex **const	beta,
		double *	gamma,
		int	k,
		cudaColorSpinorField *	p[]
	)

Definition at line 111 of file inv_gcr_quda.cpp.

void quda::wilsonDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const cudaColorSpinorField *	in,
		const int	oddBit,
		const int	daggerBit,
		const cudaColorSpinorField *	x,
		const double &	k,
		const int *	commDim,
		TimeProfile &	profile,
		const QudaDslashPolicy &	dslashPolicy = `QUDA_DSLASH2`
	)

Definition at line 113 of file dslash_wilson.cu.

template<class T >

__device__ void quda::writeLinkVariableToArray	(	const Matrix< T, 3 > &	link,
		const int	dir,
		const int	idx,
		const int	stride,
		T *const	array
	)

inline

Definition at line 830 of file quda_matrix.h.

__device__ void quda::writeLinkVariableToArray	(	const Matrix< double2, 3 > &	link,
		const int	dir,
		const int	idx,
		const int	stride,
		float2 *const	array
	)

inline

Definition at line 842 of file quda_matrix.h.

template<class T , int N>

__device__ void quda::writeMatrixToArray	(	const Matrix< T, N > &	mat,
		const int	idx,
		const int	stride,
		T *const	array
	)

inline

Definition at line 802 of file quda_matrix.h.

template<class T , class U >

__device__ void quda::writeMomentumToArray	(	const Matrix< T, 3 > &	mom,
		const int	dir,
		const int	idx,
		const U	coeff,
		const int	stride,
		T *const	array
	)

inline

Definition at line 893 of file quda_matrix.h.

double quda::xmyNormCpu	(	const cpuColorSpinorField &	a,
		cpuColorSpinorField &	b
	)

Definition at line 205 of file blas_cpu.cpp.

double quda::xmyNormCuda	(	cudaColorSpinorField &	a,
		cudaColorSpinorField &	b
	)

Definition at line 343 of file reduce_quda.cu.

Complex quda::xpaycDotzyCpu	(	const cpuColorSpinorField &	x,
		const double &	a,
		cpuColorSpinorField &	y,
		const cpuColorSpinorField &	z
	)

Definition at line 231 of file blas_cpu.cpp.

Complex quda::xpaycDotzyCuda	(	cudaColorSpinorField &	x,
		const double &	a,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z
	)

Definition at line 534 of file reduce_quda.cu.

void quda::xpayCpu	(	const cpuColorSpinorField &	x,
		const double &	a,
		cpuColorSpinorField &	y
	)

Definition at line 41 of file blas_cpu.cpp.

void quda::xpayCuda	(	cudaColorSpinorField &	x,
		const double &	a,
		cudaColorSpinorField &	y
	)

Definition at line 138 of file blas_quda.cu.

void quda::xpyCpu	(	const cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 22 of file blas_cpu.cpp.

void quda::xpyCuda	(	cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 98 of file blas_quda.cu.

double3 quda::xpyHeavyQuarkResidualNormCpu	(	cpuColorSpinorField &	x,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	r
	)

double3 quda::xpyHeavyQuarkResidualNormCuda	(	cudaColorSpinorField &	x,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	r
	)

Definition at line 782 of file reduce_quda.cu.

void quda::zeroCuda ( cudaColorSpinorField & a )

Definition at line 40 of file blas_quda.cu.

Variable Documentation

const char* quda::aux_str

Definition at line 46 of file blas_quda.cu.

char quda::aux_tmp[TuneKey::aux_n]

Definition at line 47 of file blas_quda.cu.

unsigned long long quda::blas_bytes

Definition at line 38 of file blas_quda.cu.

unsigned long long quda::blas_flops

Definition at line 37 of file blas_quda.cu.

const int quda::maxNface = 3

The maximum number of faces that can be exchanged

Definition at line 11 of file lattice_field.h.

const int quda::Nstream = 1

Definition at line 217 of file quda_internal.h.

cudaStream_t* quda::stream

Definition at line 816 of file cuda_color_spinor_field.cu.

const char* quda::vol_str

Definition at line 45 of file blas_quda.cu.

Enumerations
enum	QudaProfileType { QUDA_PROFILE_H2D, QUDA_PROFILE_D2H, QUDA_PROFILE_INIT, QUDA_PROFILE_PREAMBLE, QUDA_PROFILE_COMPUTE, QUDA_PROFILE_EPILOGUE, QUDA_PROFILE_FREE, QUDA_PROFILE_PACK_KERNEL, QUDA_PROFILE_DSLASH_KERNEL, QUDA_PROFILE_GATHER, QUDA_PROFILE_SCATTER, QUDA_PROFILE_EVENT_RECORD, QUDA_PROFILE_EVENT_QUERY, QUDA_PROFILE_STREAM_WAIT_EVENT, QUDA_PROFILE_COMMS, QUDA_PROFILE_COMMS_START, QUDA_PROFILE_COMMS_QUERY, QUDA_PROFILE_CONSTANT, QUDA_PROFILE_TOTAL, QUDA_PROFILE_COUNT }

enum	AllocType { DEVICE, HOST, PINNED, MAPPED, N_ALLOC_TYPE }

Namespaces

Classes

Typedefs

Enumerations

Functions

Variables

Detailed Description

Typedef Documentation

Enumeration Type Documentation

Function Documentation

Variable Documentation