Namespaces
	blas

	clover

	colorspinor

	cublas

	dslash

	fermion_force

	gauge

	linalg

	pool

Classes
struct	AllocType

struct	AllocType< false >

struct	AllocType< true >

struct	Arg

class	Array

class	BiCGstab

class	BiCGstabL

class	BiCGstabLUpdate

struct	BlockOrthoArg

struct	bridge_mapper

struct	bridge_mapper< double2, char2 >

struct	bridge_mapper< double2, char4 >

struct	bridge_mapper< double2, double2 >

struct	bridge_mapper< double2, float2 >

struct	bridge_mapper< double2, float4 >

struct	bridge_mapper< double2, short2 >

struct	bridge_mapper< double2, short4 >

struct	bridge_mapper< float2, char2 >

struct	bridge_mapper< float2, double2 >

struct	bridge_mapper< float2, float2 >

struct	bridge_mapper< float2, short2 >

struct	bridge_mapper< float4, char4 >

struct	bridge_mapper< float4, double2 >

struct	bridge_mapper< float4, float4 >

struct	bridge_mapper< float4, short4 >

class	CACG
	Communication-avoiding CG solver. This solver does un-preconditioned CG, running in steps of nKrylov, build up a polynomial in the linear operator of length nKrylov, and then performs a steepest descent minimization on the resulting basis vectors. For now only implemented using the power basis so is only useful as a preconditioner. More...

class	CACGNE

class	CACGNR

class	CAGCR
	Communication-avoiding GCR solver. This solver does un-preconditioned GCR, first building up a polynomial in the linear operator of length nKrylov, and then performs a minimum residual extrapolation on the resulting basis vectors. For use as a multigrid smoother with minimum global synchronization. More...

class	CalculateY

struct	CalculateYArg

struct	CalculateYhatArg

class	CG
	Conjugate-Gradient Solver. More...

class	CG3

class	CG3NE

class	CGNE

class	CGNR

struct	ChecksumArg

struct	ChiralToNonRelBasis

class	Clover

struct	clover_mapper

struct	clover_mapper< char, N, add_rho >

struct	clover_mapper< double, N, add_rho >

struct	clover_mapper< float, N, add_rho >

struct	clover_mapper< short, N, add_rho >

struct	clover_wrapper
	clover_wrapper is an internal class that is used to wrap instances of colorspinor accessors, currying in a specifc location and chirality on the field. The operator() accessors in clover-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the HMatrix class. As a result we can include clover-field accessors directly in HMatrix expressions in kernels without having to declare temporaries with explicit calls to the load/save methods in the clover-field accessors. More...

struct	CloverArg
	Parameteter structure for driving the clover and twist-clover application kernels. More...

struct	CloverDerivArg

class	CloverField

struct	CloverFieldParam

struct	CloverInvertArg

struct	CloverSigmaOprodArg

struct	coeff_5
	Structure containing zMobius / Zolotarev coefficients. More...

class	coeff_type
	Helper class for grabbing the constant struct, whether we are on the GPU or CPU. More...

class	coeff_type< real, true, Arg >
	Specialization for variable complex coefficients. More...

struct	ColorSpinor

struct	ColorSpinor< Float, Nc, 2 >

struct	ColorSpinor< Float, Nc, 4 >

struct	colorspinor_ghost_wrapper
	colorspinor_ghost_wrapper is an internal class that is used to wrap instances of colorspinor accessors, currying in a specifc location on the field. The Ghost() accessors in colorspinor-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the ColorSpinor class. As a result we can include colorspinor-field accessors directly in ColorSpinor expressions in kernels without having to declare temporaries with explicit calls to the loadGhost/saveGhost methods in the colorspinor-field accessors. More...

struct	colorspinor_mapper

struct	colorspinor_mapper< char, 1, Nc, false, huge_alloc >

struct	colorspinor_mapper< char, 2, Nc, false, huge_alloc >

struct	colorspinor_mapper< char, 4, Nc, false, huge_alloc >

struct	colorspinor_mapper< char, 4, Nc, true, huge_alloc >

struct	colorspinor_mapper< double, 1, Nc, false, huge_alloc >

struct	colorspinor_mapper< double, 2, Nc, false, huge_alloc >

struct	colorspinor_mapper< double, 4, Nc, false, huge_alloc >

struct	colorspinor_mapper< double, 4, Nc, true, huge_alloc >

struct	colorspinor_mapper< float, 1, Nc, false, huge_alloc >

struct	colorspinor_mapper< float, 2, Nc, false, huge_alloc >

struct	colorspinor_mapper< float, 4, Nc, false, huge_alloc >

struct	colorspinor_mapper< float, 4, Nc, true, huge_alloc >

struct	colorspinor_mapper< short, 1, Nc, false, huge_alloc >

struct	colorspinor_mapper< short, 2, Nc, false, huge_alloc >

struct	colorspinor_mapper< short, 4, Nc, false, huge_alloc >

struct	colorspinor_mapper< short, 4, Nc, true, huge_alloc >

struct	colorspinor_order_mapper

struct	colorspinor_order_mapper< T, QUDA_FLOAT2_FIELD_ORDER, Ns, Nc >

struct	colorspinor_order_mapper< T, QUDA_SPACE_COLOR_SPIN_FIELD_ORDER, Ns, Nc >

struct	colorspinor_order_mapper< T, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER, Ns, Nc >

struct	colorspinor_wrapper
	colorspinor_wrapper is an internal class that is used to wrap instances of colorspinor accessors, currying in a specifc location on the field. The operator() accessors in colorspinor-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the ColorSpinor class. As a result we can include colorspinor-field accessors directly in ColorSpinor expressions in kernels without having to declare temporaries with explicit calls to the load/save methods in the colorspinor-field accessors. More...

class	ColorSpinorField

class	ColorSpinorParam

struct	complex

struct	complex< char >

struct	complex< double >

struct	complex< float >

struct	complex< int >

struct	complex< short >

struct	CompositeColorSpinorFieldDescriptor

struct	ContractionArg

class	CopyColorSpinor

class	CopyColorSpinor< 4, Arg >

struct	CopyColorSpinorArg

class	CopyGauge

struct	CopyGaugeArg

class	CopyGaugeEx

struct	CopyGaugeExArg

class	CopySpinor

class	CopySpinorEx

struct	CopySpinorExArg

struct	CovDevArg
	Parameter structure for driving the covariatnt derivative operator. More...

class	cpuCloverField

class	cpuColorSpinorField

class	cpuGaugeField

class	cudaCloverField

class	cudaColorSpinorField

class	cudaGaugeField

struct	deflated_solver

class	Deflation

struct	DeflationParam

class	Dirac

class	DiracClover

class	DiracCloverPC

class	DiracCoarse

class	DiracCoarsePC

class	DiracDagger

class	DiracDomainWall

class	DiracDomainWall4D

class	DiracDomainWall4DPC

class	DiracDomainWallPC

class	DiracImprovedStaggered

class	DiracImprovedStaggeredPC

class	DiracM

class	DiracMatrix

class	DiracMdag

class	DiracMdagM

class	DiracMMdag

class	DiracMobius

class	DiracMobiusPC

class	DiracParam

class	DiracStaggered

class	DiracStaggeredPC

class	DiracTwistedClover

class	DiracTwistedCloverPC

class	DiracTwistedMass

class	DiracTwistedMassPC

class	DiracWilson

class	DiracWilsonPC

class	DomainWall4D

struct	DomainWall4DApply

struct	DomainWall4DArg

struct	DomainWall4DLaunch
	This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...

class	DomainWall5D

struct	DomainWall5DApply

struct	DomainWall5DArg

struct	DomainWall5DLaunch
	This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...

class	Dslash

class	Dslash5

struct	Dslash5Arg
	Parameter structure for applying the Dslash. More...

struct	DslashArg

struct	DslashCoarseArg

struct	DslashCoarseLaunch

class	DslashCoarsePolicyTune

struct	DslashConstant
	Constants used by dslash and packing kernels. More...

class	EigCGArgs

class	EigenSolver

class	ExtractGhost

struct	ExtractGhostArg

class	ExtractGhostEx

struct	ExtractGhostExArg

struct	fixedInvMaxValue

struct	fixedInvMaxValue< char >

struct	fixedInvMaxValue< char2 >

struct	fixedInvMaxValue< char4 >

struct	fixedInvMaxValue< short >

struct	fixedInvMaxValue< short2 >

struct	fixedInvMaxValue< short4 >

struct	fixedMaxValue

struct	fixedMaxValue< char >

struct	fixedMaxValue< char2 >

struct	fixedMaxValue< char4 >

struct	fixedMaxValue< short >

struct	fixedMaxValue< short2 >

struct	fixedMaxValue< short4 >

struct	float4_precision_mapper

struct	float4_precision_mapper< char >

struct	float4_precision_mapper< double >

struct	float4_precision_mapper< short >

struct	FmunuArg

struct	FullClover

class	Gamma

struct	GammaArg
	Parameter structure for driving the Gamma operator. More...

struct	gauge_ghost_wrapper
	gauge_ghost_wrapper is an internal class that is used to wrap instances of gauge ghost accessors, currying in a specific location and dimension on the field. The Ghost() accessors in gauge-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the Matrix class. As a result we can include gauge-field ghost accessors directly in Matrix expressions in kernels without having to declare temporaries with explicit calls to the load/save methods in the gauge-field accessors. More...

struct	gauge_mapper

struct	gauge_mapper< char, QUDA_RECONSTRUCT_10, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< char, QUDA_RECONSTRUCT_12, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< char, QUDA_RECONSTRUCT_13, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< char, QUDA_RECONSTRUCT_8, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< char, QUDA_RECONSTRUCT_9, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< char, QUDA_RECONSTRUCT_NO, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< double, QUDA_RECONSTRUCT_10, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< double, QUDA_RECONSTRUCT_12, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< double, QUDA_RECONSTRUCT_13, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< double, QUDA_RECONSTRUCT_8, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< double, QUDA_RECONSTRUCT_9, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< double, QUDA_RECONSTRUCT_NO, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< float, QUDA_RECONSTRUCT_10, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< float, QUDA_RECONSTRUCT_12, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< float, QUDA_RECONSTRUCT_13, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< float, QUDA_RECONSTRUCT_8, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< float, QUDA_RECONSTRUCT_9, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< float, QUDA_RECONSTRUCT_NO, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< short, QUDA_RECONSTRUCT_10, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< short, QUDA_RECONSTRUCT_12, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< short, QUDA_RECONSTRUCT_13, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< short, QUDA_RECONSTRUCT_8, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< short, QUDA_RECONSTRUCT_9, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_mapper< short, QUDA_RECONSTRUCT_NO, N, stag, huge_alloc, ghostExchange, use_inphase >

struct	gauge_order_mapper

struct	gauge_order_mapper< T, QUDA_BQCD_GAUGE_ORDER, Nc >

struct	gauge_order_mapper< T, QUDA_FLOAT2_GAUGE_ORDER, Nc >

struct	gauge_order_mapper< T, QUDA_MILC_GAUGE_ORDER, Nc >

struct	gauge_order_mapper< T, QUDA_QDP_GAUGE_ORDER, Nc >

struct	gauge_order_mapper< T, QUDA_QDPJIT_GAUGE_ORDER, Nc >

struct	gauge_order_mapper< T, QUDA_TIFR_GAUGE_ORDER, Nc >

struct	gauge_order_mapper< T, QUDA_TIFR_PADDED_GAUGE_ORDER, Nc >

struct	gauge_wrapper
	gauge_wrapper is an internal class that is used to wrap instances of gauge accessors, currying in a specific location on the field. The operator() accessors in gauge-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the Matrix class. As a result we can include gauge-field accessors directly in Matrix expressions in kernels without having to declare temporaries with explicit calls to the load/save methods in the gauge-field accessors. More...

struct	GaugeAPEArg

class	GaugeCovDev
	Full Covariant Derivative operator. Although not a Dirac operator per se, it's a linear operator so it's conventient to put in the Dirac operator abstraction. More...

class	GaugeField

struct	GaugeFieldParam

class	GaugeGauss

struct	GaugeGaussArg

class	GaugeLaplace
	Full Gauge Laplace operator. Although not a Dirac operator per se, it's a linear operator so it's conventient to put in the Dirac operator abstraction. More...

class	GaugeLaplacePC
	Even-odd preconditioned Gauge Laplace operator. More...

class	GaugeOvrImpSTOUT

struct	GaugeOvrImpSTOUTArg

class	GaugePlaq

struct	GaugePlaqArg

struct	GaugeSTOUTArg

class	GCR

class	GenericPackGhostLauncher

class	GMResDR

class	GMResDRArgs

class	HMatrix
	Specialized container for Hermitian matrices (e.g., used for wrapping clover matrices) More...

struct	HMatrix_wrapper
	wrapper class that enables us to write to Hmatrices in packed format More...

struct	Identity

struct	ImprovedStaggeredApply

class	IncEigCG

struct	Int2

struct	is_variable

struct	is_variable< DSLASH5_MOBIUS >

struct	is_variable< DSLASH5_MOBIUS_PRE >

struct	is_variable< M5_INV_ZMOBIUS >

struct	isFixed

struct	isFixed< char >

struct	isFixed< char2 >

struct	isFixed< char4 >

struct	isFixed< short >

struct	isFixed< short2 >

struct	isFixed< short4 >

struct	isHalf

struct	isHalf< short >

struct	isHalf< short2 >

struct	isHalf< short4 >

struct	isQuarter

struct	isQuarter< char >

struct	isQuarter< char2 >

struct	isQuarter< char4 >

struct	KSForceArg

class	KSForceComplete

struct	KSLongLinkArg

class	KSLongLinkForce

class	Laplace

struct	LaplaceApply

struct	LaplaceArg
	Parameter structure for driving the covariatnt derivative operator. More...

struct	LaplaceLaunch
	This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...

class	LatticeField

struct	LatticeFieldParam

struct	less_significant

struct	mapper

struct	mapper< char >

struct	mapper< char2 >

struct	mapper< char4 >

struct	mapper< double >

struct	mapper< double2 >

struct	mapper< double4 >

struct	mapper< float >

struct	mapper< float2 >

struct	mapper< float4 >

struct	mapper< short >

struct	mapper< short2 >

struct	mapper< short4 >

class	Matrix

struct	matrix_field

class	MemAlloc

class	MG

struct	MGParam

class	MinResExt
	This computes the optimum guess for the system Ax=b in the L2 residual norm. For use in the HMD force calculations using a minimal residual chronological method. This computes the guess solution as a linear combination of a given number of previous solutions. Following Brower et al, only the orthogonalised vector basis is stored to conserve memory. More...

class	MPBiCGstab

class	MPCG

class	MR

struct	multigrid_solver

class	MultiShiftCG
	Multi-Shift Conjugate Gradient Solver. More...

class	MultiShiftSolver

class	NdegTwistedMass

struct	NdegTwistedMassApply

struct	NdegTwistedMassArg

struct	NdegTwistedMassLaunch
	This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...

class	NdegTwistedMassPreconditioned

struct	NdegTwistedMassPreconditionedApply

struct	NdegTwistedMassPreconditionedLaunch
	This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...

struct	non_native_precision_mapper

struct	non_native_precision_mapper< char >

struct	non_native_precision_mapper< double >

struct	non_native_precision_mapper< float >

struct	non_native_precision_mapper< short >

struct	NonRelBasis

struct	NonRelToChiralBasis

struct	norm_type

struct	norm_type< complex< T > >

struct	normal

struct	normal< double >

struct	normal< float >

class	Object

class	Pack

struct	PackArg

struct	PackGhostArg

struct	precision_spin_color_mapper

struct	precision_spin_color_mapper< double, double, 1, nColor_ >

struct	precision_spin_color_mapper< double, double, 2, nColor_ >

struct	precision_spin_color_mapper< double, double, 4, nColor_ >

struct	precision_spin_color_mapper< float, char, 4, nColor_ >

struct	precision_spin_color_mapper< float, short, 4, nColor_ >

struct	precision_spin_color_mapper< T, G, 1, nColor_ >

class	PreconCG

class	PreconditionedSolver

class	PreserveBasis

class	ProjectSU3

struct	ProjectSU3Arg

struct	PromoteTypeId

struct	PromoteTypeId< complex< double >, double >

struct	PromoteTypeId< complex< float >, float >

struct	PromoteTypeId< double, complex< double > >

struct	PromoteTypeId< double, float >

struct	PromoteTypeId< double, int >

struct	PromoteTypeId< float, complex< float > >

struct	PromoteTypeId< float, double >

struct	PromoteTypeId< float, int >

struct	PromoteTypeId< int, double >

struct	PromoteTypeId< int, float >

struct	QChargeArg

class	QudaMemCopy

struct	RealType

struct	RealType< char >

struct	RealType< char2 >

struct	RealType< char4 >

struct	RealType< complex< char > >

struct	RealType< complex< double > >

struct	RealType< complex< float > >

struct	RealType< complex< short > >

struct	RealType< double >

struct	RealType< double2 >

struct	RealType< float >

struct	RealType< float2 >

struct	RealType< float4 >

struct	RealType< short >

struct	RealType< short2 >

struct	RealType< short4 >

struct	reduce_vector

struct	ReduceArg

struct	RelBasis

struct	RestrictArg

class	RNG
	Class declaration to initialize and hold CURAND RNG states. More...

struct	rngArg

struct	scalar

struct	scalar< char >

struct	scalar< char2 >

struct	scalar< char3 >

struct	scalar< char4 >

struct	scalar< double >

struct	scalar< double2 >

struct	scalar< double3 >

struct	scalar< double4 >

struct	scalar< float >

struct	scalar< float2 >

struct	scalar< float3 >

struct	scalar< float4 >

struct	scalar< short >

struct	scalar< short2 >

struct	scalar< short3 >

struct	scalar< short4 >

class	SD

struct	SharedMemory

class	ShiftColorSpinorField

struct	ShiftColorSpinorFieldArg

class	ShiftUpdate

class	SimpleBiCGstab

class	Solver

struct	SolverParam

struct	SortedEvals

struct	spin_mapper

struct	spin_order_mapper

struct	spin_order_mapper< 1, QUDA_FLOAT4_FIELD_ORDER >

struct	spin_order_mapper< 2, QUDA_FLOAT4_FIELD_ORDER >

class	SpinorNoise

class	Staggered

struct	StaggeredApply

struct	StaggeredArg
	Parameter structure for driving the Staggered Dslash operator. More...

struct	StaggeredLaunch

struct	StaggeredReconstruct

struct	TexVectorType

struct	TexVectorType< char, 1 >

struct	TexVectorType< char, 2 >

struct	TexVectorType< char, 4 >

struct	TexVectorType< double, 1 >

struct	TexVectorType< double, 2 >

struct	TexVectorType< float, 1 >

struct	TexVectorType< float, 2 >

struct	TexVectorType< float, 4 >

struct	TexVectorType< short, 1 >

struct	TexVectorType< short, 2 >

struct	TexVectorType< short, 4 >

class	TimeProfile

struct	Timer

struct	TraceKey

class	Transfer

struct	Trig

struct	Trig< false, float >

struct	Trig< true, float >

class	TRLM
	Thick Restarted Lanczos Method. More...

class	Tunable

class	TunableLocalParity

class	TunableVectorY

class	TunableVectorYZ

struct	TuneKey

class	TuneParam

class	TwistClover

class	TwistedClover

struct	TwistedCloverApply

struct	TwistedCloverArg

struct	TwistedCloverLaunch
	This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...

class	TwistedCloverPreconditioned

struct	TwistedCloverPreconditionedApply

struct	TwistedCloverPreconditionedLaunch
	This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...

class	TwistedMass

struct	TwistedMassApply

struct	TwistedMassArg

struct	TwistedMassLaunch
	This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...

class	TwistedMassPreconditioned

struct	TwistedMassPreconditionedApply

struct	TwistedMassPreconditionedLaunch
	This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...

class	TwistGamma

struct	uniform

struct	uniform< double >

struct	uniform< float >

struct	vec_length

struct	vec_length< char >

struct	vec_length< char2 >

struct	vec_length< char4 >

struct	vec_length< double >

struct	vec_length< double2 >

struct	vec_length< double4 >

struct	vec_length< float >

struct	vec_length< float2 >

struct	vec_length< float4 >

struct	vec_length< short >

struct	vec_length< short2 >

struct	vec_length< short4 >

struct	vector

struct	vector< double, 2 >

struct	vector< float, 2 >

struct	vector< int, 2 >

struct	vector_type

class	VectorCache
	Class which wraps around a shared memory cache for a Vector type, where each thread in the thread block stores a unique Vector in the cache which any other thread can access. Presently, the expectation is that Vector is synonymous with the ColorSpinor class, but we could extend this to apply to the Matrix class as well. More...

struct	VectorType

struct	VectorType< char, 1 >

struct	VectorType< char, 2 >

struct	VectorType< char, 4 >

struct	VectorType< double, 1 >

struct	VectorType< double, 2 >

struct	VectorType< double, 4 >

struct	VectorType< float, 1 >

struct	VectorType< float, 2 >

struct	VectorType< float, 4 >

struct	VectorType< short, 1 >

struct	VectorType< short, 2 >

struct	VectorType< short, 4 >

class	Wilson

struct	WilsonApply

struct	WilsonArg
	Parameter structure for driving the Wilson operator. More...

class	WilsonClover

struct	WilsonCloverApply

struct	WilsonCloverArg

struct	WilsonCloverLaunch
	This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...

class	WilsonCloverPreconditioned

struct	WilsonCloverPreconditionedApply

struct	WilsonCloverPreconditionedLaunch
	This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...

struct	WilsonLaunch
	This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...

struct	WilsonReconstruct

class	Worker

class	WuppertalSmearing

struct	WuppertalSmearingArg

class	XSD

struct	Zero

Typedefs
typedef std::vector< ColorSpinorField * >	CompositeColorSpinorField

using	ColorSpinorFieldSet = ColorSpinorField

typedef int	storeType

typedef std::complex< double >	Complex

typedef struct curandStateMRG32k3a	cuRNGState

using	DynamicStride = Stride< Dynamic, Dynamic >

using	DenseMatrix = MatrixXcd

using	VectorSet = MatrixXcd

using	Vector = VectorXcd

using	RealVector = VectorXd

using	RowMajorDenseMatrix = Matrix< Complex, Dynamic, Dynamic, RowMajor >

typedef std::map< TuneKey, TuneParam >	map

Enumerations
enum	MemoryLocation { Device = 1, Host = 2, Remote = 4 }

enum	Dslash5Type { DSLASH5_DWF, DSLASH5_MOBIUS_PRE, DSLASH5_MOBIUS, M5_INV_DWF, M5_INV_MOBIUS, M5_INV_ZMOBIUS }

enum	KernelType { INTERIOR_KERNEL = 5, EXTERIOR_KERNEL_ALL = 6, EXTERIOR_KERNEL_X = 0, EXTERIOR_KERNEL_Y = 1, EXTERIOR_KERNEL_Z = 2, EXTERIOR_KERNEL_T = 3, KERNEL_POLICY = 7 }

enum	DslashType { DSLASH_INTERIOR, DSLASH_EXTERIOR, DSLASH_FULL }

enum	QudaProfileType { QUDA_PROFILE_H2D, QUDA_PROFILE_D2H, QUDA_PROFILE_INIT, QUDA_PROFILE_PREAMBLE, QUDA_PROFILE_COMPUTE, QUDA_PROFILE_COMMS, QUDA_PROFILE_EPILOGUE, QUDA_PROFILE_FREE, QUDA_PROFILE_IO, QUDA_PROFILE_CHRONO, QUDA_PROFILE_EIGEN, QUDA_PROFILE_ARPACK, QUDA_PROFILE_LOWER_LEVEL, QUDA_PROFILE_PACK_KERNEL, QUDA_PROFILE_DSLASH_KERNEL, QUDA_PROFILE_GATHER, QUDA_PROFILE_SCATTER, QUDA_PROFILE_LAUNCH_KERNEL, QUDA_PROFILE_EVENT_RECORD, QUDA_PROFILE_EVENT_QUERY, QUDA_PROFILE_STREAM_WAIT_EVENT, QUDA_PROFILE_FUNC_SET_ATTRIBUTE, QUDA_PROFILE_EVENT_SYNCHRONIZE, QUDA_PROFILE_STREAM_SYNCHRONIZE, QUDA_PROFILE_DEVICE_SYNCHRONIZE, QUDA_PROFILE_MEMCPY_D2D_ASYNC, QUDA_PROFILE_MEMCPY_D2H_ASYNC, QUDA_PROFILE_MEMCPY2D_D2H_ASYNC, QUDA_PROFILE_MEMCPY_H2D_ASYNC, QUDA_PROFILE_COMMS_START, QUDA_PROFILE_COMMS_QUERY, QUDA_PROFILE_CONSTANT, QUDA_PROFILE_TOTAL, QUDA_PROFILE_COUNT }

enum	ComputeType { COMPUTE_UV, COMPUTE_AV, COMPUTE_TMAV, COMPUTE_TMCAV, COMPUTE_CLOVER_INV_MAX, COMPUTE_TWISTED_CLOVER_INV_MAX, COMPUTE_VUV, COMPUTE_COARSE_CLOVER, COMPUTE_REVERSE_Y, COMPUTE_DIAGONAL, COMPUTE_TMDIAGONAL, COMPUTE_CONVERT, COMPUTE_RESCALE, COMPUTE_INVALID }

enum	DslashCoarsePolicy { DslashCoarsePolicy::DSLASH_COARSE_BASIC, DslashCoarsePolicy::DSLASH_COARSE_ZERO_COPY_PACK, DslashCoarsePolicy::DSLASH_COARSE_ZERO_COPY_READ, DslashCoarsePolicy::DSLASH_COARSE_ZERO_COPY, DslashCoarsePolicy::DSLASH_COARSE_GDR_SEND, DslashCoarsePolicy::DSLASH_COARSE_GDR_RECV, DslashCoarsePolicy::DSLASH_COARSE_GDR, DslashCoarsePolicy::DSLASH_COARSE_ZERO_COPY_PACK_GDR_RECV, DslashCoarsePolicy::DSLASH_COARSE_GDR_SEND_ZERO_COPY_READ, DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED }

enum	BiCGstabLUpdateType { BICGSTABL_UPDATE_U = 0, BICGSTABL_UPDATE_R = 1 }

enum	libtype { libtype::eigen_lib, libtype::magma_lib, libtype::lapack_lib, libtype::mkl_lib, libtype::eigen_lib, libtype::magma_lib, libtype::lapack_lib, libtype::mkl_lib }

enum	libtype { libtype::eigen_lib, libtype::magma_lib, libtype::lapack_lib, libtype::mkl_lib, libtype::eigen_lib, libtype::magma_lib, libtype::lapack_lib, libtype::mkl_lib }

enum	AllocType { DEVICE, DEVICE_PINNED, HOST, PINNED, MAPPED, N_ALLOC_TYPE }

enum	norm_type_ { NORM1, NORM2, ABS_MAX, ABS_MIN, NORM1, NORM2, ABS_MAX, ABS_MIN }

enum	norm_type_ { NORM1, NORM2, ABS_MAX, ABS_MIN, NORM1, NORM2, ABS_MAX, ABS_MIN }

Functions
void	checkSpinor (const ColorSpinorField &a, const ColorSpinorField &b)

void	checkLength (const ColorSpinorField &a, const ColorSpinorField &b)

__host__ __device__ double	set (double &x)

__host__ __device__ double2	set (double2 &x)

__host__ __device__ double3	set (double3 &x)

__host__ __device__ double4	set (double4 &x)

__host__ __device__ void	sum (double &a, double &b)

__host__ __device__ void	sum (double2 &a, double2 &b)

__host__ __device__ void	sum (double3 &a, double3 &b)

__host__ __device__ void	sum (double4 &a, double4 &b)

std::ostream &	operator<< (std::ostream &output, const CloverFieldParam &param)

double	norm1 (const CloverField &u, bool inverse=false)

double	norm2 (const CloverField &a, bool inverse=false)

void	computeClover (CloverField &clover, const GaugeField &gauge, double coeff, QudaFieldLocation location)

void	copyGenericClover (CloverField &out, const CloverField &in, bool inverse, QudaFieldLocation location, void Out=0, void In=0, void outNorm=0, void inNorm=0)
	This generic function is used for copying the clover field where in the input and output can be in any order and location. More...

void	cloverInvert (CloverField &clover, bool computeTraceLog)
	This function compute the Cholesky decomposition of each clover matrix and stores the clover inverse field. More...

void	cloverRho (CloverField &clover, double rho)
	This function adds a real scalar onto the clover diagonal (only to the direct field not the inverse) More...

void	computeCloverForce (GaugeField &force, const GaugeField &U, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField > &p, std::vector< double > &coeff)
	Compute the force contribution from the solver solution fields. More...

void	computeCloverSigmaOprod (GaugeField &oprod, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField > &p, std::vector< std::vector< double > > &coeff)
	Compute the outer product from the solver solution fields arising from the diagonal term of the fermion bilinear in direction mu,nu and sum to outer product field. More...

void	computeCloverSigmaTrace (GaugeField &output, const CloverField &clover, double coeff)
	Compute the matrix tensor field necessary for the force calculation from the clover trace action. This computes a tensor field [mu,nu]. More...

void	cloverDerivative (cudaGaugeField &force, cudaGaugeField &gauge, cudaGaugeField &oprod, double coeff, QudaParity parity)
	Compute the derivative of the clover matrix in the direction mu,nu and compute the resulting force given the outer-product field. More...

template<typename Float , int Nc, int Ns>
__device__ __host__ complex< Float >	innerProduct (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b)
	Compute the inner product over color and spin dot = ,c conj(a(s,c)) * b(s,c) More...

template<typename Float , int Nc, int Ns>
__device__ __host__ complex< Float >	innerProduct (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b, int s)

template<typename Float , int Nc, int Ns>
__device__ __host__ complex< Float >	innerProduct (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b, int sa, int sb)

template<typename Float , int Nc, int Ns>
__device__ __host__ complex< Float >	innerProduct (const ColorSpinor< Float, Nc, 1 > &a, const ColorSpinor< Float, Nc, Ns > &b, int s)
	Compute the inner product over color at spin s between a color vector and a color spinor dot = conj(a(c)) * b(s,c) More...

template<typename Float , int Nc, int Ns>
__device__ __host__ Matrix< complex< Float >, Nc >	outerProdSpinTrace (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b)

template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor< Float, Nc, Ns >	operator+ (const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
	ColorSpinor addition operator. More...

template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor< Float, Nc, Ns >	operator- (const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
	ColorSpinor subtraction operator. More...

template<typename Float , int Nc, int Ns, typename S >
__device__ __host__ ColorSpinor< Float, Nc, Ns >	operator* (const S &a, const ColorSpinor< Float, Nc, Ns > &x)
	Compute the scalar-vector product y = a * x. More...

template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor< Float, Nc, Ns >	operator* (const Matrix< complex< Float >, Nc > &A, const ColorSpinor< Float, Nc, Ns > &x)
	Compute the matrix-vector product y = A * x. More...

template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor< Float, Nc, Ns >	operator* (const HMatrix< Float, Nc *Ns > &A, const ColorSpinor< Float, Nc, Ns > &x)
	Compute the matrix-vector product y = A * x. More...

void	copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, void Dst=0, void Src=0, void dstNorm=0, void srcNorm=0)

void	genericSource (cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c)

int	genericCompare (const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol)

void	genericPrintVector (const cpuColorSpinorField &a, unsigned int x)

void	genericCudaPrintVector (const cudaColorSpinorField &a, unsigned x)

void	wuppertalStep (ColorSpinorField &out, const ColorSpinorField &in, int parity, const GaugeField &U, double A, double B)

void	wuppertalStep (ColorSpinorField &out, const ColorSpinorField &in, int parity, const GaugeField &U, double alpha)

void	exchangeExtendedGhost (cudaColorSpinorField spinor, int R[], int parity, cudaStream_t stream_p)

void	copyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, const int parity, void Dst, void Src, void dstNorm, void srcNorm)

void	genericPackGhost (void *ghost, const ColorSpinorField &a, QudaParity parity, int nFace, int dagger, MemoryLocation destination=nullptr)
	Generic ghost packing routine. More...

void	spinorNoise (ColorSpinorField &src, RNG &randstates, QudaNoiseType type)
	Generate a random noise spinor. This variant allows the user to manage the RNG state. More...

void	spinorNoise (ColorSpinorField &src, unsigned long long seed, QudaNoiseType type)
	Generate a random noise spinor. This variant just requires a seed and will create and destroy the random number state. More...

QudaPCType	PCType_ (const char func, const char file, int line, const ColorSpinorField &a, const ColorSpinorField &b)
	Helper function for determining if the preconditioning type of the fields is the same. More...

template<typename... Args>
QudaPCType	PCType_ (const char func, const char file, int line, const ColorSpinorField &a, const ColorSpinorField &b, const Args &... args)
	Helper function for determining if the precision of the fields is the same. More...

template<typename ValueType >
__host__ __device__ ValueType	cos (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	sin (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	tan (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	acos (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	asin (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	atan (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	atan2 (ValueType x, ValueType y)

template<typename ValueType >
__host__ __device__ ValueType	cosh (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	sinh (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	tanh (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	exp (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	log (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	log10 (ValueType x)

template<typename ValueType , typename ExponentType >
__host__ __device__ ValueType	pow (ValueType x, ExponentType e)

template<typename ValueType >
__host__ __device__ ValueType	sqrt (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	abs (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	conj (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	abs (const complex< ValueType > &z)
	Returns the magnitude of z. More...

template<typename ValueType >
__host__ __device__ ValueType	arg (const complex< ValueType > &z)
	Returns the phase angle of z. More...

template<typename ValueType >
__host__ __device__ ValueType	norm (const complex< ValueType > &z)
	Returns the magnitude of z squared. More...

template<typename ValueType >
__host__ __device__ complex< ValueType >	conj (const complex< ValueType > &z)
	Returns the complex conjugate of z. More...

template<typename ValueType >
__host__ __device__ complex< ValueType >	polar (const ValueType &m, const ValueType &theta=0)
	Returns the complex with magnitude m and angle theta in radians. More...

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator* (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator* (const complex< ValueType > &lhs, const ValueType &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator* (const ValueType &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator/ (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<>
__host__ __device__ complex< float >	operator/ (const complex< float > &lhs, const complex< float > &rhs)

template<>
__host__ __device__ complex< double >	operator/ (const complex< double > &lhs, const complex< double > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator+ (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator+ (const complex< ValueType > &lhs, const ValueType &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator+ (const ValueType &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator- (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator- (const complex< ValueType > &lhs, const ValueType &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator- (const ValueType &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator+ (const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator- (const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	cos (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	cosh (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	exp (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	log (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	log10 (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	pow (const complex< ValueType > &z, const int &n)

template<typename ValueType >
__host__ __device__ complex< ValueType >	pow (const complex< ValueType > &z, const ValueType &x)

template<typename ValueType >
__host__ __device__ complex< ValueType >	pow (const complex< ValueType > &z, const complex< ValueType > &z2)

template<typename ValueType >
__host__ __device__ complex< ValueType >	pow (const ValueType &x, const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	sin (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	sinh (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	sqrt (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	tan (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	tanh (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	acos (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	asin (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	atan (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	acosh (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	asinh (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	atanh (const complex< ValueType > &z)

template<typename ValueType , class charT , class traits >
std::basic_ostream< charT, traits > &	operator<< (std::basic_ostream< charT, traits > &os, const complex< ValueType > &z)

template<typename ValueType , typename charT , class traits >
std::basic_istream< charT, traits > &	operator>> (std::basic_istream< charT, traits > &is, complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator+ (const volatile complex< ValueType > &lhs, const volatile complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator/ (const complex< ValueType > &lhs, const ValueType &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator/ (const ValueType &lhs, const complex< ValueType > &rhs)

template<>
__host__ __device__ complex< float >	operator/ (const float &lhs, const complex< float > &rhs)

template<>
__host__ __device__ complex< double >	operator/ (const double &lhs, const complex< double > &rhs)

template<typename ValueType >
__host__ __device__ bool	operator== (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ bool	operator== (const ValueType &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ bool	operator== (const complex< ValueType > &lhs, const ValueType &rhs)

template<typename ValueType >
__host__ __device__ bool	operator!= (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ bool	operator!= (const ValueType &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ bool	operator!= (const complex< ValueType > &lhs, const ValueType &rhs)

template<>
__host__ __device__ float	abs (const complex< float > &z)

template<>
__host__ __device__ double	abs (const complex< double > &z)

template<>
__host__ __device__ float	arg (const complex< float > &z)

template<>
__host__ __device__ double	arg (const complex< double > &z)

template<>
__host__ __device__ complex< float >	polar (const float &magnitude, const float &angle)

template<>
__host__ __device__ complex< double >	polar (const double &magnitude, const double &angle)

template<>
__host__ __device__ complex< float >	cos (const complex< float > &z)

template<>
__host__ __device__ complex< float >	cosh (const complex< float > &z)

template<>
__host__ __device__ complex< float >	exp (const complex< float > &z)

template<>
__host__ __device__ complex< float >	log (const complex< float > &z)

template<>
__host__ __device__ complex< float >	pow (const float &x, const complex< float > &exponent)

template<>
__host__ __device__ complex< float >	sin (const complex< float > &z)

template<>
__host__ __device__ complex< float >	sinh (const complex< float > &z)

template<>
__host__ __device__ complex< float >	sqrt (const complex< float > &z)

template<typename ValueType >
__host__ __device__ complex< float >	atanh (const complex< float > &z)

template<typename real >
__host__ __device__ complex< real >	cmul (const complex< real > &x, const complex< real > &y)

template<typename real >
__host__ __device__ complex< real >	cmac (const complex< real > &x, const complex< real > &y, const complex< real > &z)

void	contractQuda (const ColorSpinorField &x, const ColorSpinorField &y, void *result, QudaContractType cType)

template<typename type >
int	vecLength ()

template<>
int	vecLength< char > ()

template<>
int	vecLength< short > ()

template<>
int	vecLength< float > ()

template<>
int	vecLength< double > ()

template<>
int	vecLength< char2 > ()

template<>
int	vecLength< short2 > ()

template<>
int	vecLength< float2 > ()

template<>
int	vecLength< double2 > ()

template<>
int	vecLength< char4 > ()

template<>
int	vecLength< short4 > ()

template<>
int	vecLength< float4 > ()

template<>
int	vecLength< double4 > ()

__host__ __device__ float	s2f (short a)

__host__ __device__ double	s2d (short a)

__host__ __device__ float	c2f (char a)

__host__ __device__ double	c2d (char a)

__host__ __device__ float	s2f (short a, float c)

__host__ __device__ double	s2d (short a, double c)

__host__ __device__ float	c2f (char a, float c)

__host__ __device__ double	c2d (char a, double c)

template<typename FloatN >
__device__ void	copyFloatN (FloatN &a, const FloatN &b)

__device__ void	copyFloatN (float2 &a, const char2 &b)

__device__ void	copyFloatN (float4 &a, const char4 &b)

__device__ void	copyFloatN (double2 &a, const char2 &b)

__device__ void	copyFloatN (double4 &a, const char4 &b)

__device__ void	copyFloatN (float2 &a, const short2 &b)

__device__ void	copyFloatN (float4 &a, const short4 &b)

__device__ void	copyFloatN (double2 &a, const short2 &b)

__device__ void	copyFloatN (double4 &a, const short4 &b)

__device__ void	copyFloatN (float2 &a, const double2 &b)

__device__ void	copyFloatN (double2 &a, const float2 &b)

__device__ void	copyFloatN (float4 &a, const double4 &b)

__device__ void	copyFloatN (double4 &a, const float4 &b)

__device__ __host__ int	f2i (float f)

__device__ __host__ int	d2i (double d)

__device__ void	copyFloatN (short2 &a, const float2 &b)

__device__ void	copyFloatN (short4 &a, const float4 &b)

__device__ void	copyFloatN (short2 &a, const double2 &b)

__device__ void	copyFloatN (short4 &a, const double4 &b)

__device__ void	copyFloatN (char2 &a, const float2 &b)

__device__ void	copyFloatN (char4 &a, const float4 &b)

__device__ void	copyFloatN (char2 &a, const double2 &b)

__device__ void	copyFloatN (char4 &a, const double4 &b)

template<typename OutputType , typename InputType >
__device__ void	convert (OutputType x[], InputType y[], const int N)

template<>
__device__ void	convert< float2, short2 > (float2 x[], short2 y[], const int N)

template<>
__device__ void	convert< float4, short4 > (float4 x[], short4 y[], const int N)

template<>
__device__ void	convert< double4, double2 > (double4 x[], double2 y[], const int N)

template<>
__device__ void	convert< double2, double4 > (double2 x[], double4 y[], const int N)

template<>
__device__ void	convert< float4, float2 > (float4 x[], float2 y[], const int N)

template<>
__device__ void	convert< float2, float4 > (float2 x[], float4 y[], const int N)

template<>
__device__ void	convert< short4, float2 > (short4 x[], float2 y[], const int N)

template<>
__device__ void	convert< float2, short4 > (float2 x[], short4 y[], const int N)

template<>
__device__ void	convert< float4, short2 > (float4 x[], short2 y[], const int N)

template<>
__device__ void	convert< short2, float4 > (short2 x[], float4 y[], const int N)

template<>
__device__ void	convert< short4, double2 > (short4 x[], double2 y[], const int N)

template<>
__device__ void	convert< double2, short4 > (double2 x[], short4 y[], const int N)

template<>
__device__ void	convert< double4, short2 > (double4 x[], short2 y[], const int N)

template<>
__device__ void	convert< short2, double4 > (short2 x[], double4 y[], const int N)

template<>
__device__ void	convert< float4, double2 > (float4 x[], double2 y[], const int N)

template<>
__device__ void	convert< double2, float4 > (double2 x[], float4 y[], const int N)

template<>
__device__ void	convert< double4, float2 > (double4 x[], float2 y[], const int N)

template<>
__device__ void	convert< float2, double4 > (float2 x[], double4 y[], const int N)

template<typename scalar , int n>
__device__ __host__ void	zero (vector_type< scalar, n > &v)

template<typename scalar , int n>
__device__ __host__ vector_type< scalar, n >	operator+ (const vector_type< scalar, n > &a, const vector_type< scalar, n > &b)

template<int block_size_x, int block_size_y, typename T , bool do_sum = true, typename Reducer = cub::Sum>
__device__ void	reduce2d (ReduceArg< T > arg, const T &in, const int idx=0)

template<int block_size, typename T , bool do_sum = true, typename Reducer = cub::Sum>
__device__ void	reduce (ReduceArg< T > arg, const T &in, const int idx=0)

template<int block_size_x, int block_size_y, typename T >
__device__ void	reduceRow (ReduceArg< T > arg, const T &in)

void	setDiracParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)

void	setDiracSloppyParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)

template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , int nColor, typename... Args>
void	instantiate (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args)
	This instantiate function is used to instantiate the reconstruct types used. More...

template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , typename... Args>
void	instantiate (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args)
	This instantiate function is used to instantiate the colors. More...

template<template< typename, int, QudaReconstructType > class Apply, typename Recon = WilsonReconstruct, typename... Args>
void	instantiate (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args)
	This instantiate function is used to instantiate the precisions. More...

template<KernelType type>
__host__ __device__ bool	doHalo (int dim=-1)
	Helper function to determine if we should do halo computation. More...

template<KernelType type>
__host__ __device__ bool	doBulk ()
	Helper function to determine if we should do interior computation. More...

template<KernelType type, typename Arg >
__host__ __device__ bool	isComplete (const Arg &arg, int coord[])
	Helper functon to determine if the application of the derivative in the dslash is complete. More...

template<int nDim, QudaPCType pc_type, KernelType kernel_type, typename Arg , int nface_ = 1>
__host__ __device__ int	getCoords (int coord[], const Arg &arg, int &idx, int parity, int &dim)
	Compute the space-time coordinates we are at. More...

template<int dim, typename Arg >
__host__ __device__ bool	inBoundary (const int coord[], const Arg &arg)
	Compute whether the provided coordinate is within the halo region boundary of a given dimension. More...

template<KernelType kernel_type, typename Arg >
__device__ bool	isActive (bool &active, int threadDim, int offsetDim, const int coord[], const Arg &arg)
	Compute whether this thread should be active for updating the a given offsetDim halo. For non-fused halo update kernels this is a trivial kernel that just checks if the given dimension is partitioned and if so, return true. More...

template<typename Float >
std::ostream &	operator<< (std::ostream &out, const DslashArg< Float > &arg)

void	setKernelPackT (bool pack)

bool	getKernelPackT ()

void	pushKernelPackT (bool pack)

void	popKernelPackT ()

void	setPackComms (const int *dim_pack)
	Helper function that sets which dimensions the packing kernel should be packing for. More...

bool	getDslashLaunch ()

void	createDslashEvents ()

void	destroyDslashEvents ()

void	ApplyWilson (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the Wilson stencil. More...

void	ApplyWilsonClover (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the Wilson-clover stencil. More...

void	ApplyWilsonCloverPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the preconditioned Wilson-clover stencil. More...

void	ApplyTwistedMass (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the twisted-mass stencil. More...

void	ApplyTwistedMassPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile)
	Driver for applying the preconditioned twisted-mass stencil. More...

void	ApplyNdegTwistedMass (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the non-degenerate twisted-mass stencil. More...

void	ApplyNdegTwistedMassPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile)
	Driver for applying the preconditioned non-degenerate twisted-mass stencil. More...

void	ApplyTwistedClover (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the twisted-clover stencil. More...

void	ApplyTwistedCloverPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the preconditioned twisted-clover stencil. More...

void	ApplyDomainWall5D (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_f, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the Domain-wall 5-d stencil to a 5-d vector with 5-d preconditioned data order. More...

void	ApplyDomainWall4D (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_5, const Complex b_5, const Complex c_5, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the batched Wilson 4-d stencil to a 5-d vector with 4-d preconditioned data order. More...

void	ApplyDslash5 (ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x, double m_f, double m_5, const Complex b_5, const Complex c_5, double a, bool dagger, Dslash5Type type)
	Apply either the domain-wall / mobius Dslash5 operator or the M5 inverse operator. In the current implementation, it is expected that the color-spinor fields are 4-d preconditioned. More...

void	ApplyLaplace (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int dir, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the Laplace stencil. More...

void	ApplyCovDev (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int mu, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the covariant derivative. More...

void	ApplyClover (ColorSpinorField &out, const ColorSpinorField &in, const CloverField &clover, bool inverse, int parity)
	Apply clover-matrix field to a color-spinor field. More...

void	ApplyStaggered (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Apply the staggered dslash operator to a color-spinor field. More...

void	ApplyImprovedStaggered (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const GaugeField &L, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Apply the improved staggered dslash operator to a color-spinor field. More...

void	ApplyTwistGamma (ColorSpinorField &out, const ColorSpinorField &in, int d, double kappa, double mu, double epsilon, int dagger, QudaTwistGamma5Type type)
	Apply the twisted-mass gamma operator to a color-spinor field. More...

void	ApplyTwistClover (ColorSpinorField &out, const ColorSpinorField &in, const CloverField &clover, double kappa, double mu, double epsilon, int parity, int dagger, QudaTwistGamma5Type twist)
	Apply twisted clover-matrix field to a color-spinor field. More...

void	PackGhost (void ghost[2 QUDA_MAX_DIM], const ColorSpinorField &field, MemoryLocation location, int nFace, bool dagger, int parity, bool spin_project, double a, double b, double c, const cudaStream_t &stream)
	Dslash face packing routine. More...

void	gamma5 (ColorSpinorField &out, const ColorSpinorField &in)
	Applies a gamma5 matrix to a spinor (wrapper to ApplyGamma) More...

void	arpack_solve (std::vector< ColorSpinorField > &h_evecs, std::vector< Complex > &h_evals, const DiracMatrix &mat, QudaEigParam eig_param, TimeProfile &profile)
	The QUDA interface function. One passes two allocated arrays to hold the the eigenmode data, the problem matrix, the arpack parameters defining what problem is to be solves, and a container for QUDA data structure types. More...

__device__ __host__ void	zero (double &a)

__device__ __host__ void	zero (double2 &a)

__device__ __host__ void	zero (double3 &a)

__device__ __host__ void	zero (double4 &a)

__device__ __host__ void	zero (float &a)

__device__ __host__ void	zero (float2 &a)

__device__ __host__ void	zero (float3 &a)

__device__ __host__ void	zero (float4 &a)

__host__ __device__ double2	operator+ (const double2 &x, const double2 &y)

__host__ __device__ double2	operator- (const double2 &x, const double2 &y)

__host__ __device__ float2	operator- (const float2 &x, const float2 &y)

__host__ __device__ float4	operator- (const float4 &x, const float4 &y)

__host__ __device__ double3	operator+ (const double3 &x, const double3 &y)

__host__ __device__ double4	operator+ (const double4 &x, const double4 &y)

__host__ __device__ float4	operator* (const float a, const float4 x)

__host__ __device__ float2	operator* (const float a, const float2 x)

__host__ __device__ double2	operator* (const double a, const double2 x)

__host__ __device__ double4	operator* (const double a, const double4 x)

__host__ __device__ float2	operator+ (const float2 x, const float2 y)

__host__ __device__ float4	operator+ (const float4 x, const float4 y)

__host__ __device__ float4	operator+= (float4 &x, const float4 y)

__host__ __device__ float2	operator+= (float2 &x, const float2 y)

__host__ __device__ double2	operator+= (double2 &x, const double2 y)

__host__ __device__ double3	operator+= (double3 &x, const double3 y)

__host__ __device__ double4	operator+= (double4 &x, const double4 y)

__host__ __device__ float4	operator-= (float4 &x, const float4 y)

__host__ __device__ float2	operator-= (float2 &x, const float2 y)

__host__ __device__ double2	operator-= (double2 &x, const double2 y)

__host__ __device__ float2	operator*= (float2 &x, const float a)

__host__ __device__ double2	operator*= (double2 &x, const float a)

__host__ __device__ float4	operator*= (float4 &a, const float &b)

__host__ __device__ double2	operator*= (double2 &a, const double &b)

__host__ __device__ double4	operator*= (double4 &a, const double &b)

__host__ __device__ float2	operator- (const float2 &x)

__host__ __device__ double2	operator- (const double2 &x)

__forceinline__ __host__ __device__ float	max_fabs (const float4 &c)

__forceinline__ __host__ __device__ float	max_fabs (const float2 &b)

__forceinline__ __host__ __device__ double	max_fabs (const double4 &c)

__forceinline__ __host__ __device__ double	max_fabs (const double2 &b)

__forceinline__ __host__ __device__ float2	make_FloatN (const double2 &a)

__forceinline__ __host__ __device__ float4	make_FloatN (const double4 &a)

__forceinline__ __host__ __device__ double2	make_FloatN (const float2 &a)

__forceinline__ __host__ __device__ double4	make_FloatN (const float4 &a)

__forceinline__ __host__ __device__ short4	make_shortN (const char4 &a)

__forceinline__ __host__ __device__ short2	make_shortN (const char2 &a)

__forceinline__ __host__ __device__ short4	make_shortN (const float4 &a)

__forceinline__ __host__ __device__ short2	make_shortN (const float2 &a)

__forceinline__ __host__ __device__ short4	make_shortN (const double4 &a)

__forceinline__ __host__ __device__ short2	make_shortN (const double2 &a)

__forceinline__ __host__ __device__ char4	make_charN (const short4 &a)

__forceinline__ __host__ __device__ char2	make_charN (const short2 &a)

__forceinline__ __host__ __device__ char4	make_charN (const float4 &a)

__forceinline__ __host__ __device__ char2	make_charN (const float2 &a)

__forceinline__ __host__ __device__ char4	make_charN (const double4 &a)

__forceinline__ __host__ __device__ char2	make_charN (const double2 &a)

template<typename Float2 , typename Complex >
Float2	make_Float2 (const Complex &a)

template<>
double2	make_Float2 (const complex< double > &a)

template<>
double2	make_Float2 (const complex< float > &a)

template<>
float2	make_Float2 (const complex< double > &a)

template<>
float2	make_Float2 (const complex< float > &a)

template<>
double2	make_Float2 (const std::complex< double > &a)

template<>
double2	make_Float2 (const std::complex< float > &a)

template<>
float2	make_Float2 (const std::complex< double > &a)

template<>
float2	make_Float2 (const std::complex< float > &a)

complex< double >	make_Complex (const double2 &a)

complex< float >	make_Complex (const float2 &a)

std::ostream &	operator<< (std::ostream &output, const GaugeFieldParam &param)

double	norm1 (const GaugeField &u)
	This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L1 norm. More...

double	norm2 (const GaugeField &u)
	This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L2 norm. More...

void	ax (const double &a, GaugeField &u)
	Scale the gauge field by the scalar a. More...

void	copyGenericGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out=0, void In=0, void ghostOut=0, void ghostIn=0, int type=0)

void	copyExtendedGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out=0, void In=0)

void	extractGaugeGhost (const GaugeField &u, void **ghost, bool extract=true, int offset=0)

void	extractExtendedGaugeGhost (const GaugeField &u, int dim, const int R, void *ghost, bool extract)

void	applyGaugePhase (GaugeField &u)

uint64_t	Checksum (const GaugeField &u, bool mini=false)

void	gaugeForce (GaugeField &mom, const GaugeField &u, double coeff, int **input_path, int length, double *path_coeff, int num_paths, int max_length)
	Compute the gauge-force contribution to the momentum. More...

double3	plaquette (const GaugeField &U)
	Compute the plaquette of the gauge field. More...

void	gaugeGauss (GaugeField &U, RNG &rngstate, double epsilon)
	Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder). More...

void	gaugeGauss (GaugeField &U, unsigned long long seed, double epsilon)
	Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder). More...

void	APEStep (GaugeField &dataDs, const GaugeField &dataOr, double alpha)
	Apply APE smearing to the gauge field. More...

void	STOUTStep (GaugeField &dataDs, const GaugeField &dataOr, double rho)
	Apply STOUT smearing to the gauge field. More...

void	OvrImpSTOUTStep (GaugeField &dataDs, const GaugeField &dataOr, double rho, double epsilon)
	Apply Over Improved STOUT smearing to the gauge field. More...

void	gaugefixingOVR (cudaGaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta)
	Gauge fixing with overrelaxation with support for single and multi GPU. More...

void	gaugefixingFFT (cudaGaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double alpha, const int autotune, const double tolerance, const int stopWtheta)
	Gauge fixing with Steepest descent method with FFTs with support for single GPU only. More...

void	computeFmunu (GaugeField &Fmunu, const GaugeField &gauge)
	Compute the Fmunu tensor. More...

double	computeQCharge (const GaugeField &Fmunu)
	Compute the topological charge. More...

double	computeQChargeDensity (const GaugeField &Fmunu, void *result)
	Compute the topological charge density per lattice site. More...

void	updateGaugeField (GaugeField &out, double dt, const GaugeField &in, const GaugeField &mom, bool conj_mom, bool exact)

template<typename I , typename J , typename K >
static __device__ __host__ int	linkIndexShift (const I x[], const J dx[], const K X[4])

template<typename I , typename J , typename K >
static __device__ __host__ int	linkIndexShift (I y[], const I x[], const J dx[], const K X[4])

template<typename I >
static __device__ __host__ int	linkIndex (const int x[], const I X[4])

template<typename I >
static __device__ __host__ int	linkIndex (int y[], const int x[], const I X[4])

template<typename I , int n>
static __device__ __host__ int	linkIndexDn (const int x[], const I X[4], const int mu)

template<typename I >
static __device__ __host__ int	linkIndexM1 (const int x[], const I X[4], const int mu)

template<typename I >
static __device__ __host__ int	linkIndexM3 (const int x[], const I X[4], const int mu)

template<typename I >
static __device__ __host__ int	linkNormalIndexP1 (const int x[], const I X[4], const int mu)

template<typename I >
static __device__ __host__ int	linkIndexP1 (const int x[], const I X[4], const int mu)

template<typename I >
static __device__ __host__ int	linkIndexP3 (const int x[], const I X[4], const int mu)

template<int nDim = 4, typename Arg >
static __device__ __host__ int	getNeighborIndexCB (const int x[], int mu, int dir, const Arg &arg)
	Compute the checkerboard 1-d index for the nearest neighbor. More...

template<typename I , typename J >
static __device__ __host__ void	getCoordsCB (int x[], int cb_index, const I X[], J X0h, int parity)

template<typename I >
static __device__ __host__ void	getCoords (int x[], int cb_index, const I X[], int parity)

template<typename I , typename J >
static __device__ __host__ void	getCoordsExtended (I x[], int cb_index, const J X[], int parity, const int R[])

template<typename I , typename J >
static __device__ __host__ void	getCoords5CB (int x[5], int cb_index, const I X[5], J X0h, int parity, QudaPCType pc_type)

template<typename I >
static __device__ __host__ void	getCoords5 (int x[5], int cb_index, const I X[5], int parity, QudaPCType pc_type)

template<typename I >
static __device__ __host__ int	getIndexFull (int cb_index, const I X[4], int parity)

template<int dir, int nDim = 4, typename I >
__device__ __host__ int	ghostFaceIndex (const int x_[], const I X_[], int dim, int nFace)

template<int dir, int nDim = 4, typename I >
__device__ __host__ int	ghostFaceIndexStaggered (const int x_[], const I X_[], int dim, int nFace)

template<int nDim, QudaPCType type, int dim_, int nLayers, typename Int , typename Arg >
__device__ __host__ void	coordsFromFaceIndex (int &idx, int &cb_idx, Int *const x, int face_idx, const int &face_num, int parity, const Arg &arg)
	Compute the full-lattice coordinates from the input face index. This is used by the Wilson-like halo update kernels, and can deal with 4-d or 5-d field and 4-d or 5-d preconditioning. More...

template<int nDim, QudaPCType type, int dim_, int nLayers, typename Int , typename Arg >
__device__ __host__ void	coordsFromFaceIndex (int &idx, int &cb_idx, Int *const x, int face_idx, const int &face_num, const Arg &arg)
	Overloaded variant of indexFromFaceIndex where we use the parity declared in arg. More...

template<int nDim, QudaPCType type, int dim, int nLayers, int face_num, typename Arg >
__device__ __host__ int	indexFromFaceIndex (int face_idx, int parity, const Arg &arg)
	Compute the checkerboard lattice index from the input face index. This is used by the Wilson-like halo packing kernels, and can deal with 4-d or 5-d field and 4-d or 5-d preconditioning. More...

template<int nDim, QudaPCType type, int dim, int nLayers, int face_num, typename Arg >
__device__ __host__ int	indexFromFaceIndex (int face_idx, const Arg &arg)
	Overloaded variant of indexFromFaceIndex where we use the parity declared in arg. More...

template<int nDim, QudaPCType type, int dim, int nLayers, int face_num, typename Arg >
static __device__ int	indexFromFaceIndexStaggered (int face_idx_in, int parity, const Arg &arg)
	Compute global checkerboard index from face index. The following indexing routines work for arbitrary lattice dimensions (though perhaps not odd like thw Wilson variant?) Specifically, we compute an index into the local volume from an index into the face. This is used by the staggered-like face packing routines, and is different from the Wilson variant since here the halo depth is tranversed in a different order - here the halo depth is the faster running dimension. More...

template<int nDim = 4, typename Arg >
__host__ __device__ int	dimFromFaceIndex (int &face_idx, int tid, const Arg &arg)
	Determines which face a given thread is computing. Also rescale face_idx so that is relative to a given dimension. If 5-d variant if called, then it is assumed that arg.threads contains only the 3-d surface of threads but face_idx is a 4-d index (surface * fifth dimension). At present multi-src staggered uses the 4-d variant since the face_idx that is passed in is the 3-d surface not the 4-d one. More...

template<int nDim = 4, typename Arg >
__host__ __device__ int	dimFromFaceIndex (int &face_idx, const Arg &arg)

template<typename T >
__device__ int	block_idx (const T &swizzle)
	Swizzler for reordering the (x) thread block indices - use on conjunction with swizzle-factor autotuning to find the optimum swizzle factor. Specfically, the thread block id is remapped by transposing its coordinates: if the original order can be parametrized by. More...

template<typename Arg >
__device__ __host__ auto	StaggeredPhase (const int coords[], int dim, int dir, const Arg &arg) -> typename Arg::real
	Compute the staggered phase factor at unit shift from the current lattice coordinates. The routine below optimizes out the shift where possible, hence is only visible where we need to consider the boundary condition. More...

__device__ void	load_streaming_double2 (double2 &a, const double2 *addr)

__device__ void	load_streaming_float4 (float4 &a, const float4 *addr)

__device__ void	load_cached_short4 (short4 &a, const short4 *addr)

__device__ void	load_cached_short2 (short2 &a, const short2 *addr)

__device__ void	load_global_short4 (short4 &a, const short4 *addr)

__device__ void	load_global_short2 (short2 &a, const short2 *addr)

__device__ void	load_global_float4 (float4 &a, const float4 *addr)

__device__ void	store_streaming_float4 (float4 *addr, float x, float y, float z, float w)

__device__ void	store_streaming_short4 (short4 *addr, short x, short y, short z, short w)

__device__ void	store_streaming_double2 (double2 *addr, double x, double y)

__device__ void	store_streaming_float2 (float2 *addr, float x, float y)

__device__ void	store_streaming_short2 (short2 *addr, short x, short y)

template<int nColor, typename sumType , typename real >
__device__ __host__ void	colorInnerProduct (complex< sumType > &dot, int i, complex< real > v[nColor], complex< real > w[nColor])

template<int nColor, typename sumType , typename real >
__device__ __host__ void	colorNorm (sumType &nrm, complex< real > v[nColor])

template<typename real , int nColor>
__device__ __host__ void	colorScaleSubtract (complex< real > v[nColor], complex< real > a, complex< real > w[nColor])

template<typename real , int nColor>
__device__ __host__ void	colorScale (complex< real > v[nColor], real a)

template<typename sumFloat , typename Float , int nSpin, int spinBlockSize, int nColor, int coarseSpin, int nVec, typename Arg >
void	blockOrthoCPU (Arg &arg)

template<int block_size, typename sumFloat , typename Float , int nSpin, int spinBlockSize, int nColor, int coarseSpin, int nVec, typename Arg >
	__launch_bounds__ (2 *block_size) __global__ void blockOrthoGPU(Arg arg)

template<typename real , typename Link >
__device__ void	axpy (real a, const real *x, Link &y)

template<typename real , typename Link >
__device__ void	operator+= (real *y, const Link &x)

template<typename real , typename Link >
__device__ void	operator-= (real *y, const Link &x)

template<typename real , typename Arg , typename Link >
__device__ void	computeForce (LINK force, Arg &arg, int xIndex, int yIndex, int mu, int nu)

template<typename real , typename Arg >
__global__ void	cloverDerivativeKernel (Arg arg)

template<typename Float , typename Arg , bool computeTrLog, bool twist>
__device__ __host__ double	cloverInvertCompute (Arg &arg, int x_cb, int parity)

template<typename Float , typename Arg , bool computeTrLog, bool twist>
void	cloverInvert (Arg &arg)

template<int blockSize, typename Float , typename Arg , bool computeTrLog, bool twist>
__global__ void	cloverInvertKernel (Arg arg)

template<typename real , int nvector, int mu, int nu, int parity, typename Arg >
__device__ void	sigmaOprod (Arg &arg, int idx)

template<int nvector, typename real , typename Arg >
__global__ void	sigmaOprodKernel (Arg arg)

template<typename Float >
__device__ __host__ void	caxpy (const complex< Float > &a, const complex< Float > &x, complex< Float > &y)

template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Wtype , typename Arg >
__device__ __host__ void	computeUV (Arg &arg, const Wtype &W, int parity, int x_cb, int ic_c)

template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >
void	ComputeUVCPU (Arg &arg)

template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >
__global__ void	ComputeUVGPU (Arg arg)

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__device__ __host__ void	computeAV (Arg &arg, int parity, int x_cb, int ch, int ic_c)

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
void	ComputeAVCPU (Arg &arg)

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__global__ void	ComputeAVGPU (Arg arg)

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__device__ __host__ void	computeTMAV (Arg &arg, int parity, int x_cb, int v)

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
void	ComputeTMAVCPU (Arg &arg)

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__global__ void	ComputeTMAVGPU (Arg arg)

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__device__ __host__ void	computeTMCAV (Arg &arg, int parity, int x_cb, int ch, int ic_c)

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
void	ComputeTMCAVCPU (Arg &arg)

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__global__ void	ComputeTMCAVGPU (Arg arg)

template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg , typename Gamma >
__device__ __host__ void	multiplyVUV (complex< Float > vuv[], const Arg &arg, const Gamma &gamma, int parity, int x_cb, int ic_c, int jc_c)
	Do a single (AV)^ * UV product, where for preconditioned clover, AV correspond to the clover inverse multiplied by the packed null space vectors, else AV is simply the packed null space vectors. More...

template<typename Arg >
__device__ __host__ int	virtualThreadIdx (const Arg &arg)

template<typename Arg >
__device__ __host__ int	virtualBlockDim (const Arg &arg)

template<typename Arg >
__device__ __host__ int	coarseIndex (const Arg &arg)

template<bool shared_atomic, bool parity_flip, bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg , typename Gamma >
__device__ __host__ void	computeVUV (Arg &arg, const Gamma &gamma, int parity, int x_cb, int c_row, int c_col, int parity_coarse_, int coarse_x_cb_)

template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >
void	ComputeVUVCPU (Arg arg)

template<bool parity_flip, typename Arg >
__device__ void	getIndicesShared (const Arg &arg, int &parity, int &x_cb, int &parity_coarse, int &x_coarse_cb, int &c_col, int &c_row)

template<bool parity_flip, typename Arg >
__device__ void	getIndicesGlobal (const Arg &arg, int &parity, int &x_cb, int &parity_coarse, int &x_coarse_cb, int &c_col, int &c_row)

template<bool shared_atomic, bool parity_flip, bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >
__global__ void	ComputeVUVGPU (Arg arg)

template<typename Float , int nSpin, int nColor, typename Arg >
__device__ __host__ void	computeYreverse (Arg &arg, int parity, int x_cb, int ic_c, int jc_c)

template<typename Float , int nSpin, int nColor, typename Arg >
void	ComputeYReverseCPU (Arg &arg)

template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void	ComputeYReverseGPU (Arg arg)

template<bool from_coarse, typename Float , int fineSpin, int coarseSpin, int fineColor, int coarseColor, typename Arg >
__device__ __host__ void	computeCoarseClover (Arg &arg, int parity, int x_cb, int ic_c, int jc_c)

template<bool from_coarse, typename Float , int fineSpin, int coarseSpin, int fineColor, int coarseColor, typename Arg >
void	ComputeCoarseCloverCPU (Arg &arg)

template<bool from_coarse, typename Float , int fineSpin, int coarseSpin, int fineColor, int coarseColor, typename Arg >
__global__ void	ComputeCoarseCloverGPU (Arg arg)

template<typename Float , int nSpin, int nColor, typename Arg >
void	AddCoarseDiagonalCPU (Arg &arg)

template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void	AddCoarseDiagonalGPU (Arg arg)

template<typename Float , int nSpin, int nColor, typename Arg >
void	AddCoarseTmDiagonalCPU (Arg &arg)

template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void	AddCoarseTmDiagonalGPU (Arg arg)

template<typename Float , int nSpin, int nColor, typename Arg >
__device__ __host__ void	convert (Arg &arg, int parity, int x_cb, int c_row, int c_col)

template<typename Float , int nSpin, int nColor, typename Arg >
void	ConvertCPU (Arg &arg)

template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void	ConvertGPU (Arg arg)

template<typename Float , int nSpin, int nColor, typename Arg >
__device__ __host__ void	rescaleY (Arg &arg, int parity, int x_cb, int c_row, int c_col)

template<typename Float , int nSpin, int nColor, typename Arg >
void	RescaleYCPU (Arg &arg)

template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void	RescaleYGPU (Arg arg)

template<typename Float , int n, bool compute_max_only, typename Arg >
__device__ __host__ Float	computeYhat (Arg &arg, int d, int x_cb, int parity, int i, int j)

template<typename Float , int n, bool compute_max_only, typename Arg >
void	CalculateYhatCPU (Arg &arg)

template<typename Float , int n, bool compute_max_only, typename Arg >
__global__ void	CalculateYhatGPU (Arg arg)

template<typename Float , int Ns, int Ms, int Nc, int Mc, typename Arg >
__device__ __host__ __forceinline__ Float	compute_site_max (Arg &arg, int x_cb, int parity, int spinor_parity, int spin_block, int color_block, bool active)

template<typename Float , bool block_float, int Ns, int Ms, int Nc, int Mc, int nDim, int dim, int dir, typename Arg >
__device__ __host__ __forceinline__ void	packGhost (Arg &arg, int x_cb, int parity, int spinor_parity, int spin_block, int color_block)

template<typename Float , bool block_float, int Ns, int Ms, int Nc, int Mc, int nDim, typename Arg >
void	GenericPackGhost (Arg &arg)

template<typename Float , bool block_float, int Ns, int Ms, int Nc, int Mc, int nDim, int dim_threads, typename Arg >
__global__ void	GenericPackGhostKernel (Arg arg)

template<typename real , typename Arg >
__global__ void	computeColorContraction (Arg arg)

template<typename real , typename Arg >
__global__ void	computeDegrandRossiContraction (Arg arg)

template<typename FloatOut , typename FloatIn , int length, typename Arg >
void	copyGauge (Arg &arg)

template<typename Float , int length, typename Arg >
void	checkNan (Arg &arg)

template<typename FloatOut , typename FloatIn , int length, typename Arg >
__global__ void	copyGaugeKernel (Arg arg)

template<typename FloatOut , typename FloatIn , int length, typename Arg >
void	copyGhost (Arg &arg)

template<typename FloatOut , typename FloatIn , int length, typename Arg >
__global__ void	copyGhostKernel (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, int mu, typename Arg , typename Vector >
__device__ __host__ void	applyCovDev (Vector &out, Arg &arg, int coord[nDim], int x_cb, int parity, int idx, int thread_dim, bool &active)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
__device__ __host__ void	covDev (Arg &arg, int idx, int parity)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void	covDevGPU (Arg arg)

template<DslashType type>
static __host__ __device__ bool	doHalo ()
	Helper function to determine if should halo computation. More...

template<DslashType type>
static __host__ __device__ bool	doBulk ()
	Helper function to determine if should interior computation. More...

template<typename Float , int nDim, int Ns, int Nc, int Mc, int color_stride, int dim_stride, int thread_dir, int thread_dim, bool dagger, DslashType type, typename Arg >
__device__ __host__ void	applyDslash (complex< Float > out[], Arg &arg, int x_cb, int src_idx, int parity, int s_row, int color_block, int color_offset)

template<typename Float , int Ns, int Nc, int Mc, int color_stride, bool dagger, typename Arg >
__device__ __host__ void	applyClover (complex< Float > out[], Arg &arg, int x_cb, int src_idx, int parity, int s, int color_block, int color_offset)

template<typename Float , int nDim, int Ns, int Nc, int Mc, int color_stride, int dim_thread_split, bool dslash, bool clover, bool dagger, DslashType type, int dir, int dim, typename Arg >
__device__ __host__ void	coarseDslash (Arg &arg, int x_cb, int src_idx, int parity, int s, int color_block, int color_offset)

template<typename Float , int nDim, int Ns, int Nc, int Mc, bool dslash, bool clover, bool dagger, DslashType type, typename Arg >
void	coarseDslash (Arg arg)

template<typename Float , int nDim, int Ns, int Nc, int Mc, int color_stride, int dim_thread_split, bool dslash, bool clover, bool dagger, DslashType type, typename Arg >
__global__ void	coarseDslashKernel (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void	domainWall4D (Arg &arg, int idx, int s, int parity)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void	domainWall4DCPU (Arg &arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void	domainWall4DGPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void	domainWall5D (Arg &arg, int idx, int parity)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void	domainWall5DCPU (Arg &arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void	domainWall5DGPU (Arg arg)

template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, typename Arg >
__device__ __host__ void	dslash5 (Arg &arg, int parity, int x_cb, int s)
	Apply the D5 operator at given site. More...

template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, typename Arg >
void	dslash5CPU (Arg &arg)
	CPU kernel for applying the D5 operator. More...

template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, typename Arg >
__global__ void	dslash5GPU (Arg arg)
	GPU kernel for applying the D5 operator. More...

template<typename real , int nColor, bool dagger, Dslash5Type type, bool shared, typename Vector , typename Arg >
__device__ __host__ Vector	constantInv (Arg &arg, int parity, int x_cb, int s_)
	Apply the M5 inverse operator at a given site on the lattice. This is the original algorithm as described in Kim and Izubushi (LATTICE 2013_033), where the b and c coefficients are constant along the Ls dimension, so is suitable for Shamir and Mobius domain-wall fermions. More...

template<typename real , int nColor, bool dagger, Dslash5Type type, bool shared, typename Vector , typename Arg >
__device__ __host__ Vector	variableInv (Arg &arg, int parity, int x_cb, int s_)
	Apply the M5 inverse operator at a given site on the lattice. This is an alternative algorithm that is applicable to variable b and c coefficients: here each thread in the s dimension starts computing at s = s_, and computes the left- and right-handed contributions in two separate passes. For the left-handed contribution we sweep through increasing s, e.g., s=s_, s_+1, s_+2, and for the right-handed one we do the transpose, s=s_, s_-1, s_-2. This allows us to progressively build up the scalar coefficients needed in a SIMD-friendly fashion. More...

template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, bool shared, bool var_inverse, typename Arg >
__device__ __host__ void	dslash5inv (Arg &arg, int parity, int x_cb, int s)
	Apply the M5 inverse operator at a given site on the lattice. More...

template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, bool shared, bool var_inverse, typename Arg >
__global__ void	dslash5invGPU (Arg arg)
	CPU kernel for applying the M5 inverse operator. More...

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
__device__ __host__ void	ndegTwistedMass (Arg &arg, int idx, int flavor, int parity)
	Apply the twisted-mass dslash out(x) = Min = a D * in + (1 + ibgamma_5tau_3 + ctau_1)*x Note this routine only exists in xpay form. More...

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
void	ndegTwistedMassCPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void	ndegTwistedMassGPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool asymmetric, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void	ndegTwistedMass (Arg &arg, int idx, int flavor, int parity)
	Apply the twisted-mass dslash out(x) = Min = a D * in + (1 + ibgamma_5tau_3 + ctau_1)*x Note this routine only exists in xpay form. More...

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void	ndegTwistedMassPreconditionedCPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void	ndegTwistedMassPreconditionedGPU (Arg arg)

template<bool dagger, int twist, int dim, QudaPCType pc, typename Arg >
__device__ __host__ void	pack (Arg &arg, int ghost_idx, int s, int parity)

template<int dim, int nFace = 1, typename Arg >
__device__ __host__ void	packStaggered (Arg &arg, int ghost_idx, int s, int parity)

template<bool dagger, int twist, QudaPCType pc, typename Arg >
__global__ void	packKernel (Arg arg)

template<bool dagger, int twist, QudaPCType pc, typename Arg >
__global__ void	packShmemKernel (Arg arg)

template<typename Arg >
__global__ void	packStaggeredKernel (Arg arg)

template<typename Arg >
__global__ void	packStaggeredShmemKernel (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg , typename Vector >
__device__ __host__ void	applyStaggered (Vector &out, Arg &arg, int coord[nDim], int x_cb, int parity, int idx, int thread_dim, bool &active)
	Applies the off-diagonal part of the Staggered / Asqtad operator. More...

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void	staggered (Arg &arg, int idx, int parity)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void	staggeredGPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void	twistedClover (Arg &arg, int idx, int parity)
	Apply the preconditioned twisted-clover dslash. More...

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void	twistedCloverPreconditionedCPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void	twistedCloverPreconditionedGPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
__device__ __host__ void	twistedMass (Arg &arg, int idx, int parity)
	Apply the twisted-mass dslash out(x) = Min = a D * in + (1 + ibgamma_5)*x Note this routine only exists in xpay form. More...

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
void	twistedMassCPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void	twistedMassGPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, int twist, KernelType kernel_type, typename Arg , typename Vector >
__device__ __host__ void	applyWilsonTM (Vector &out, Arg &arg, int coord[nDim], int x_cb, int s, int parity, int idx, int thread_dim, bool &active)
	Applies the off-diagonal part of the Wilson operator premultiplied by twist rotation - this is required for applying the symmetric preconditioned twisted-mass dagger operator. More...

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool asymmetric, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void	twistedMass (Arg &arg, int idx, int parity)
	Apply the preconditioned twisted-mass dslash. More...

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void	twistedMassPreconditionedCPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void	twistedMassPreconditionedGPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg , typename Vector >
__device__ __host__ void	applyWilson (Vector &out, Arg &arg, int coord[nDim], int x_cb, int s, int parity, int idx, int thread_dim, bool &active)
	Applies the off-diagonal part of the Wilson operator. More...

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void	wilson (Arg &arg, int idx, int s, int parity)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void	wilsonCPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void	wilsonGPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
__device__ __host__ void	wilsonClover (Arg &arg, int idx, int parity)
	Apply the Wilson-clover dslash out(x) = Min = A(x)x(x) + D * in(x-mu) Note this routine only exists in xpay form. More...

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void	wilsonCloverCPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void	wilsonCloverGPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void	wilsonClover (Arg &arg, int idx, int parity)
	Apply the clover preconditioned Wilson dslash. More...

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void	wilsonCloverPreconditionedCPU (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void	wilsonCloverPreconditionedGPU (Arg arg)

template<int mu, int nu, typename Float , typename Arg >
__device__ __host__ __forceinline__ void	computeFmunuCore (Arg &arg, int idx, int parity)

template<typename Float , typename Arg >
__global__ void	computeFmunuKernel (Arg arg)

template<typename Float , typename Arg >
void	computeFmunuCPU (Arg &arg)

template<typename Float , typename Arg , typename Link >
__host__ __device__ void	computeStaple (Arg &arg, int idx, int parity, int dir, Link &staple)

template<typename Float , typename Arg >
__global__ void	computeAPEStep (Arg arg)

template<typename Float , typename Arg >
__device__ double	plaquette (Arg &arg, int x[], int parity, int mu, int nu)

template<int blockSize, typename Float , typename Gauge >
__global__ void	computePlaq (GaugePlaqArg< Gauge > arg)

template<int blockSize, typename Float , typename Arg >
__global__ void	qChargeComputeKernel (Arg arg)

template<typename Float , typename Arg >
__global__ void	computeSTOUTStep (Arg arg)

template<typename Float , typename Arg , typename Link >
__host__ __device__ void	computeStapleRectangle (Arg &arg, int idx, int parity, int dir, Link &staple, Link &rectangle)

template<typename Float , typename Arg >
__global__ void	computeOvrImpSTOUTStep (Arg arg)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, int dir, typename Arg , typename Vector >
__device__ __host__ void	applyLaplace (Vector &out, Arg &arg, int coord[nDim], int x_cb, int parity, int idx, int thread_dim, bool &active)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void	laplace (Arg &arg, int idx, int parity)

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void	laplaceGPU (Arg arg)

template<typename Float , int fineSpin, int fineColor, int coarseColor, int coarse_colors_per_thread, class FineColor , class Rotator >
__device__ __host__ void	rotateCoarseColor (complex< Float > out[fineSpin *coarse_colors_per_thread], const FineColor &in, const Rotator &V, int parity, int nParity, int x_cb, int coarse_color_block)

template<typename Float , int fineSpin, int fineColor, int coarseSpin, int coarseColor, int coarse_colors_per_thread, typename Arg >
void	Restrict (Arg arg)

template<int block_size, typename Float , int fineSpin, int fineColor, int coarseSpin, int coarseColor, int coarse_colors_per_thread, typename Arg >
__global__ void	RestrictKernel (Arg arg)

void	completeKSForce (GaugeField &mom, const GaugeField &oprod, const GaugeField &gauge, QudaFieldLocation location, long long *flops=NULL)

std::ostream &	operator<< (std::ostream &output, const LatticeFieldParam &param)

QudaFieldLocation	Location_ (const char func, const char file, int line, const LatticeField &a, const LatticeField &b)
	Helper function for determining if the location of the fields is the same. More...

template<typename... Args>
QudaFieldLocation	Location_ (const char func, const char file, int line, const LatticeField &a, const LatticeField &b, const Args &... args)
	Helper function for determining if the location of the fields is the same. More...

QudaPrecision	Precision_ (const char func, const char file, int line, const LatticeField &a, const LatticeField &b)
	Helper function for determining if the precision of the fields is the same. More...

template<typename... Args>
QudaPrecision	Precision_ (const char func, const char file, int line, const LatticeField &a, const LatticeField &b, const Args &... args)
	Helper function for determining if the precision of the fields is the same. More...

QudaFieldLocation	reorder_location ()
	Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION. More...

void	reorder_location_set (QudaFieldLocation reorder_location_)
	Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION. More...

const char *	compile_type_str (const LatticeField &meta, QudaFieldLocation location_=QUDA_INVALID_FIELD_LOCATION)
	Helper function for setting auxilary string. More...

void	fatLongKSLink (cudaGaugeField fat, cudaGaugeField lng, const cudaGaugeField &gauge, const double *coeff)
	Compute the fat and long links for an improved staggered (Kogut-Susskind) fermions. More...

void	printPeakMemUsage ()

void	assertAllMemFree ()

long	device_allocated_peak ()

long	pinned_allocated_peak ()

long	mapped_allocated_peak ()

long	host_allocated_peak ()

void *	device_malloc_ (const char func, const char file, int line, size_t size)

void *	device_pinned_malloc_ (const char func, const char file, int line, size_t size)

void *	safe_malloc_ (const char func, const char file, int line, size_t size)

void *	pinned_malloc_ (const char func, const char file, int line, size_t size)

void *	mapped_malloc_ (const char func, const char file, int line, size_t size)

void	device_free_ (const char func, const char file, int line, void *ptr)

void	device_pinned_free_ (const char func, const char file, int line, void *ptr)

void	host_free_ (const char func, const char file, int line, void *ptr)

constexpr const char *	str_end (const char *str)

constexpr bool	str_slant (const char *str)

constexpr const char *	r_slant (const char *str)

constexpr const char *	file_name (const char *str)

QudaFieldLocation	get_pointer_location (const void *ptr)

bool	is_aligned (const void *ptr, size_t alignment)

template<typename real >
__device__ __host__ real	__fast_pow (real a, int b)

double	computeMomAction (const GaugeField &mom)
	Compute and return global the momentum action 1/2 mom^2. More...

void	updateMomentum (GaugeField &mom, double coeff, GaugeField &force, const char *fname)

void	applyU (GaugeField &force, GaugeField &U)

bool	forceMonitor ()
	Whether we are monitoring the force or not. More...

void	flushForceMonitor ()
	Flush any outstanding force monitoring information. More...

void	ApplyCoarse (ColorSpinorField &out, const ColorSpinorField &inA, const ColorSpinorField &inB, const GaugeField &Y, const GaugeField &X, double kappa, int parity=QUDA_INVALID_PARITY, bool dslash=true, bool clover=true, bool dagger=false, const int *commDim=0, QudaPrecision halo_precision=QUDA_INVALID_PRECISION)
	Apply the coarse dslash stencil. This single driver accounts for all variations with and without the clover field, with and without dslash, and both single and full parity fields. More...

void	CoarseOp (GaugeField &Y, GaugeField &X, const Transfer &T, const cudaGaugeField &gauge, const cudaCloverField *clover, double kappa, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
	Coarse operator construction from a fine-grid operator (Wilson / Clover) More...

void	CoarseCoarseOp (GaugeField &Y, GaugeField &X, const Transfer &T, const GaugeField &gauge, const GaugeField &clover, const GaugeField &cloverInv, double kappa, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc, bool need_bidirectional)
	Coarse operator construction from an intermediate-grid operator (Coarse) More...

void	calculateYhat (GaugeField &Yhat, GaugeField &Xinv, const GaugeField &Y, const GaugeField &X)
	Calculate preconditioned coarse links and coarse clover inverse field. More...

void	Monte (cudaGaugeField &data, RNG &rngstate, double Beta, int nhb, int nover)
	Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps. More...

void	InitGaugeField (cudaGaugeField &data)
	Perform a cold start to the gauge field, identity SU(3) matrix, also fills the ghost links in multi-GPU case (no need to exchange data) More...

void	InitGaugeField (cudaGaugeField &data, RNG &rngstate)
	Perform a hot start to the gauge field, random SU(3) matrix, followed by reunitarization, also exchange borders links in multi-GPU case. More...

void	PGaugeExchange (cudaGaugeField &data, const int dir, const int parity)
	Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps. More...

void	PGaugeExchangeFree ()
	Release all allocated memory used to exchange data between nodes. More...

double2	getLinkDeterminant (cudaGaugeField &data)
	Calculate the Determinant. More...

double2	getLinkTrace (cudaGaugeField &data)
	Calculate the Trace. More...

void	qudaMemcpy_ (void dst, const void src, size_t count, cudaMemcpyKind kind, const char func, const char file, const char *line)
	Wrapper around cudaMemcpy used for auto-profiling. Do not call directly, rather call macro below which will grab the location of the call. More...

void	qudaMemcpyAsync_ (void dst, const void src, size_t count, cudaMemcpyKind kind, const cudaStream_t &stream, const char func, const char file, const char *line)
	Wrapper around cudaMemcpyAsync or driver API equivalent Potentially add auto-profiling support. More...

void	qudaMemcpy2DAsync_ (void dst, size_t dpitch, const void src, size_t spitch, size_t width, size_t hieght, cudaMemcpyKind kind, const cudaStream_t &stream, const char func, const char file, const char *line)
	Wrapper around cudaMemcpy2DAsync or driver API equivalent Potentially add auto-profiling support. More...

cudaError_t	qudaLaunchKernel (const void func, dim3 gridDim, dim3 blockDim, void *args, size_t sharedMem, cudaStream_t stream)
	Wrapper around cudaLaunchKernel. More...

cudaError_t	qudaEventQuery (cudaEvent_t &event)
	Wrapper around cudaEventQuery or cuEventQuery. More...

cudaError_t	qudaEventRecord (cudaEvent_t &event, cudaStream_t stream=0)
	Wrapper around cudaEventRecord or cuEventRecord. More...

cudaError_t	qudaStreamWaitEvent (cudaStream_t stream, cudaEvent_t event, unsigned int flags)
	Wrapper around cudaEventRecord or cuEventRecord. More...

cudaError_t	qudaStreamSynchronize (cudaStream_t &stream)
	Wrapper around cudaStreamSynchronize or cuStreamSynchronize. More...

cudaError_t	qudaEventSynchronize (cudaEvent_t &event)
	Wrapper around cudaEventSynchronize or cuEventSynchronize. More...

cudaError_t	qudaDeviceSynchronize_ (const char func, const char file, const char *line)
	Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize. More...

void	printAPIProfile ()
	Print out the timer profile for CUDA API calls. More...

bool	canReuseResidentGauge (QudaInvertParam *inv_param)

template<class T >
__device__ __host__ T	getTrace (const Matrix< T, 3 > &a)

template<template< typename, int > class Mat, class T >
__device__ __host__ T	getDeterminant (const Mat< T, 3 > &a)

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N >	operator+ (const Mat< T, N > &a, const Mat< T, N > &b)

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N >	operator+= (Mat< T, N > &a, const Mat< T, N > &b)

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N >	operator+= (Mat< T, N > &a, const T &b)

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N >	operator-= (Mat< T, N > &a, const Mat< T, N > &b)

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N >	operator- (const Mat< T, N > &a, const Mat< T, N > &b)

template<template< typename, int > class Mat, class T , int N, class S >
__device__ __host__ Mat< T, N >	operator* (const S &scalar, const Mat< T, N > &a)

template<template< typename, int > class Mat, class T , int N, class S >
__device__ __host__ Mat< T, N >	operator* (const Mat< T, N > &a, const S &scalar)

template<template< typename, int > class Mat, class T , int N, class S >
__device__ __host__ Mat< T, N >	operator*= (Mat< T, N > &a, const S &scalar)

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N >	operator- (const Mat< T, N > &a)

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N >	operator* (const Mat< T, N > &a, const Mat< T, N > &b)
	Generic implementation of matrix multiplication. More...

template<template< typename > class complex, typename T , int N>
__device__ __host__ Matrix< complex< T >, N >	operator* (const Matrix< complex< T >, N > &a, const Matrix< complex< T >, N > &b)
	Specialization of complex matrix multiplication that will issue optimal fma instructions. More...

template<class T , int N>
__device__ __host__ Matrix< T, N >	operator*= (Matrix< T, N > &a, const Matrix< T, N > &b)

template<class T , class U , int N>
__device__ __host__ Matrix< typename PromoteTypeId< T, U >::Type, N >	operator* (const Matrix< T, N > &a, const Matrix< U, N > &b)

template<class T >
__device__ __host__ Matrix< T, 2 >	operator* (const Matrix< T, 2 > &a, const Matrix< T, 2 > &b)

template<class T , int N>
__device__ __host__ Matrix< T, N >	conj (const Matrix< T, N > &other)

template<class T >
__device__ __host__ Matrix< T, 3 >	inverse (const Matrix< T, 3 > &u)

template<class T , int N>
__device__ __host__ void	setIdentity (Matrix< T, N > *m)

template<int N>
__device__ __host__ void	setIdentity (Matrix< float2, N > *m)

template<int N>
__device__ __host__ void	setIdentity (Matrix< double2, N > *m)

template<class T , int N>
__device__ __host__ void	setZero (Matrix< T, N > *m)

template<int N>
__device__ __host__ void	setZero (Matrix< float2, N > *m)

template<int N>
__device__ __host__ void	setZero (Matrix< double2, N > *m)

template<typename Complex , int N>
__device__ __host__ void	makeAntiHerm (Matrix< Complex, N > &m)

template<class T , int N>
__device__ __host__ void	copyColumn (const Matrix< T, N > &m, int c, Array< T, N > *a)

template<class T , int N>
__device__ __host__ void	outerProd (const Array< T, N > &a, const Array< T, N > &b, Matrix< T, N > *m)

template<class T , int N>
__device__ __host__ void	outerProd (const T(&a)[N], const T(&b)[N], Matrix< T, N > *m)

template<class T , int N>
std::ostream &	operator<< (std::ostream &os, const Matrix< T, N > &m)

template<class T , int N>
std::ostream &	operator<< (std::ostream &os, const Array< T, N > &a)

template<class T , class U >
__device__ void	loadLinkVariableFromArray (const T const array, const int dir, const int idx, const int stride, Matrix< U, 3 > link)

template<class T , class U , int N>
__device__ void	loadMatrixFromArray (const T const array, const int idx, const int stride, Matrix< U, N > mat)

__device__ void	loadLinkVariableFromArray (const float2 const array, const int dir, const int idx, const int stride, Matrix< complex< double >, 3 > link)

template<class T , int N, class U >
__device__ void	writeMatrixToArray (const Matrix< T, N > &mat, const int idx, const int stride, U *const array)

__device__ void	appendMatrixToArray (const Matrix< complex< double >, 3 > &mat, const int idx, const int stride, double2 *const array)

__device__ void	appendMatrixToArray (const Matrix< complex< float >, 3 > &mat, const int idx, const int stride, float2 *const array)

template<class T , class U >
__device__ void	writeLinkVariableToArray (const Matrix< T, 3 > &link, const int dir, const int idx, const int stride, U *const array)

__device__ void	writeLinkVariableToArray (const Matrix< complex< double >, 3 > &link, const int dir, const int idx, const int stride, float2 *const array)

template<class T >
__device__ void	loadMomentumFromArray (const T const array, const int dir, const int idx, const int stride, Matrix< T, 3 > mom)

template<class T , class U >
__device__ void	writeMomentumToArray (const Matrix< T, 3 > &mom, const int dir, const int idx, const U coeff, const int stride, T *const array)

template<class Cmplx >
__device__ __host__ void	computeLinkInverse (Matrix< Cmplx, 3 > *uinv, const Matrix< Cmplx, 3 > &u)

void	copyArrayToLink (Matrix< float2, 3 > link, float array)

template<class Cmplx , class Real >
void	copyArrayToLink (Matrix< Cmplx, 3 > link, Real array)

void	copyLinkToArray (float *array, const Matrix< float2, 3 > &link)

template<class Cmplx , class Real >
void	copyLinkToArray (Real *array, const Matrix< Cmplx, 3 > &link)

template<class T >
__device__ __host__ Matrix< T, 3 >	getSubTraceUnit (const Matrix< T, 3 > &a)

template<class T >
__device__ __host__ void	SubTraceUnit (Matrix< T, 3 > &a)

template<class T >
__device__ __host__ double	getRealTraceUVdagger (const Matrix< T, 3 > &a, const Matrix< T, 3 > &b)

template<class Cmplx >
__host__ __device__ void	printLink (const Matrix< Cmplx, 3 > &link)

template<class Cmplx >
__device__ __host__ double	ErrorSU3 (const Matrix< Cmplx, 3 > &matrix)

template<class T >
__device__ __host__ void	exponentiate_iQ (const Matrix< T, 3 > &Q, Matrix< T, 3 > *exp_iQ)

template<typename Float >
__device__ __host__ void	expsu3 (Matrix< complex< Float >, 3 > &q)

template<class Real >
__device__ Real	Random (cuRNGState &state, Real a, Real b)
	Return a random number between a and b. More...

template<>
__device__ float	Random< float > (cuRNGState &state, float a, float b)

template<>
__device__ double	Random< double > (cuRNGState &state, double a, double b)

template<class Real >
__device__ Real	Random (cuRNGState &state)
	Return a random number between 0 and 1. More...

template<>
__device__ float	Random< float > (cuRNGState &state)

template<>
__device__ double	Random< double > (cuRNGState &state)

template<typename T1 , typename T2 >
__host__ __device__ void	copy (T1 &a, const T2 &b)

template<>
__host__ __device__ void	copy (double &a, const int2 &b)

template<>
__host__ __device__ void	copy (double2 &a, const int4 &b)

template<>
__host__ __device__ void	copy (float &a, const short &b)

template<>
__host__ __device__ void	copy (short &a, const float &b)

template<>
__host__ __device__ void	copy (float2 &a, const short2 &b)

template<>
__host__ __device__ void	copy (short2 &a, const float2 &b)

template<>
__host__ __device__ void	copy (float4 &a, const short4 &b)

template<>
__host__ __device__ void	copy (short4 &a, const float4 &b)

template<>
__host__ __device__ void	copy (float &a, const char &b)

template<>
__host__ __device__ void	copy (char &a, const float &b)

template<>
__host__ __device__ void	copy (float2 &a, const char2 &b)

template<>
__host__ __device__ void	copy (char2 &a, const float2 &b)

template<>
__host__ __device__ void	copy (float4 &a, const char4 &b)

template<>
__host__ __device__ void	copy (char4 &a, const float4 &b)

template<typename T1 , typename T2 >
__host__ __device__ void	copy_scaled (T1 &a, const T2 &b)

template<>
__host__ __device__ void	copy_scaled (short4 &a, const float4 &b)

template<>
__host__ __device__ void	copy_scaled (char4 &a, const float4 &b)

template<>
__host__ __device__ void	copy_scaled (short2 &a, const float2 &b)

template<>
__host__ __device__ void	copy_scaled (char2 &a, const float2 &b)

template<>
__host__ __device__ void	copy_scaled (short &a, const float &b)

template<>
__host__ __device__ void	copy_scaled (char &a, const float &b)

template<typename T1 , typename T2 , typename T3 >
__host__ __device__ void	copy_and_scale (T1 &a, const T2 &b, const T3 &c)
	Specialized variants of the copy function that include an additional scale factor. Note the scale factor is ignored unless the input type (b) is either a short or char vector. More...

template<>
__host__ __device__ void	copy_and_scale (float4 &a, const short4 &b, const float &c)

template<>
__host__ __device__ void	copy_and_scale (float4 &a, const char4 &b, const float &c)

template<>
__host__ __device__ void	copy_and_scale (float2 &a, const short2 &b, const float &c)

template<>
__host__ __device__ void	copy_and_scale (float2 &a, const char2 &b, const float &c)

template<>
__host__ __device__ void	copy_and_scale (float &a, const short &b, const float &c)

template<>
__host__ __device__ void	copy_and_scale (float &a, const char &b, const float &c)

template<typename VectorType >
__device__ __host__ VectorType	vector_load (void *ptr, int idx)

template<typename VectorType >
__device__ __host__ void	vector_store (void *ptr, int idx, const VectorType &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const double2 &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const float4 &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const float2 &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const short4 &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const short2 &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const char4 &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const char2 &value)

void	computeStaggeredOprod (GaugeField *out[], ColorSpinorField &in, const double coeff[], int nFace)
	Compute the outer-product field between the staggered quark field's one and (for HISQ and ASQTAD) three hop sites. E.g.,. More...

template<typename Matrix , typename Float >
__host__ __device__ bool	checkUnitary (const Matrix &inv, const Matrix &in, const Float tol)
	Check the unitarity of the input matrix to a given tolerance. More...

template<typename Matrix >
__host__ __device__ void	checkUnitaryPrint (const Matrix &inv, const Matrix &in)
	Print out deviation for each component (used for debugging only). More...

template<typename Float >
__host__ __device__ void	polarSu3 (Matrix< complex< Float >, 3 > &in, Float tol)
	Project the input matrix on the SU(3) group. First unitarize the matrix and then project onto the special unitary group. More...

void	BlockOrthogonalize (ColorSpinorField &V, const std::vector< ColorSpinorField > &B, const int fine_to_coarse, const int coarse_to_fine, const int geo_bs, const int spin_bs, const int n_block_ortho)
	Block orthogonnalize the matrix field, where the blocks are defined by lookup tables that map the fine grid points to the coarse grid points, and similarly for the spin degrees of freedom. More...

void	Prolongate (ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &v, int Nvec, const int fine_to_coarse, const int const *spin_map, int parity=QUDA_INVALID_PARITY)
	Apply the prolongation operator. More...

void	Restrict (ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &v, int Nvec, const int fine_to_coarse, const int coarse_to_fine, const int const spin_map, int parity=QUDA_INVALID_PARITY)
	Apply the restriction operator. More...

bool	activeTuning ()
	query if tuning is in progress More...

void	loadTuneCache ()

void	saveTuneCache (bool error=false)

void	saveProfile (const std::string label="")
	Save profile to disk. More...

void	flushProfile ()
	Flush profile contents, setting all counts to zero. More...

TuneParam &	tuneLaunch (Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)

void	postTrace_ (const char func, const char file, int line)
	Post an event in the trace, recording where it was posted. More...

const std::map< TuneKey, TuneParam > &	getTuneCache ()
	Returns a reference to the tunecache map. More...

void	enableProfileCount ()
	Enable the profile kernel counting. More...

void	disableProfileCount ()
	Disable the profile kernel counting. More...

void	setPolicyTuning (bool)
	Enable / disable whether are tuning a policy. More...

void	u32toa (char *buffer, uint32_t value)

void	i32toa (char *buffer, int32_t value)

void	u64toa (char *buffer, uint64_t value)

void	i64toa (char *buffer, int64_t value)

void	setUnitarizeLinksConstants (double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)

void	unitarizeLinksCPU (cpuGaugeField &outfield, const cpuGaugeField &infield)

void	unitarizeLinks (cudaGaugeField &outfield, const cudaGaugeField &infield, int *fails)

void	unitarizeLinks (cudaGaugeField &outfield, int *fails)

bool	isUnitary (const cpuGaugeField &field, double max_error)

void	projectSU3 (cudaGaugeField &U, double tol, int *fails)
	Project the input gauge field onto the SU(3) group. This is a destructive operation. The number of link failures is reported so appropriate action can be taken. More...

template<typename Arg >
__device__ __host__ uint64_t	siteChecksum (const Arg &arg, int d, int parity, int x_cb)

template<typename Arg >
uint64_t	ChecksumCPU (const Arg &arg)

ColorSpinorParam	colorSpinorParam (const CloverField &a, bool inverse)

template<bool from_coarse, typename Float , int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename F , typename Ftmp , typename Vt , typename coarseGauge , typename coarseGaugeAtomic , typename fineGauge , typename fineClover >
void	calculateY (coarseGauge &Y, coarseGauge &X, coarseGaugeAtomic &Y_atomic, coarseGaugeAtomic &X_atomic, Ftmp &UV, F &AV, Vt &V, fineGauge &G, fineClover &C, fineClover &Cinv, GaugeField &Y_, GaugeField &X_, GaugeField &Y_atomic_, GaugeField &X_atomic_, ColorSpinorField &uv, ColorSpinorField &av, const ColorSpinorField &v, double kappa, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc, bool need_bidirectional, const int fine_to_coarse, const int coarse_to_fine)
	Calculate the coarse-link field, including the coarse clover field. More...

std::ostream &	operator<< (std::ostream &out, const ColorSpinorField &a)

template<class T >
void	random (T &t)

template<class T >
void	point (T &t, int x, int s, int c)

template<class T >
void	constant (T &t, int k, int s, int c)

template<class P >
void	sin (P &p, int d, int n, int offset)

template<class T >
void	corner (T &p, int v, int s, int c)

template<class U , class V >
int	compareSpinor (const U &u, const V &v, const int tol)

template<class Order >
void	print_vector (const Order &o, unsigned int x)

template<typename StoreType , int Ns, int Nc, QudaFieldOrder FieldOrder>
void	genericCudaPrintVector (const cudaColorSpinorField &field, unsigned int i)

template<typename Float , int Ns, int Nc>
void	genericCudaPrintVector (const cudaColorSpinorField &field, unsigned int i)

template<typename Float >
void	genericCudaPrintVector (const cudaColorSpinorField &field, unsigned int i)

template<typename Float , int Nc, typename Vector , typename Arg >
__device__ __host__ void	computeNeighborSum (Vector &out, Arg &arg, int x_cb, int parity)

template<typename Float , int Ns, int Nc, typename Arg >
__device__ __host__ void	computeWupperalStep (Arg &arg, int x_cb, int parity)

template<typename Float , int Ns, int Nc, typename Arg >
void	wuppertalStepCPU (Arg arg)

template<typename Float , int Ns, int Nc, typename Arg >
__global__ void	wuppertalStepGPU (Arg arg)

void	copyGenericColorSpinorDD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorDS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorDH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorDQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorSD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorSS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorSH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorSQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorHD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorHS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorHH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorHQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorQD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorQS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorQH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorQQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGDD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGDS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGSD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGSS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGSH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGSQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGHS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGHH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGHQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGQS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGQH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGQQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

template<typename Arg , typename Basis >
void	copyColorSpinor (Arg &arg, const Basis &basis)

template<typename Arg , typename Basis >
__global__ void	copyColorSpinorKernel (Arg arg, Basis basis)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename Out , typename In >
void	genericCopyColorSpinor (Out &outOrder, const In &inOrder, const ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >
void	genericCopyColorSpinor (InOrder &inOrder, ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location, FloatOut Out, float outNorm)

template<typename FloatOut , typename FloatIn , int Ns, int Nc>
void	genericCopyColorSpinor (ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location, FloatOut Out, FloatIn In, float outNorm, float inNorm)

template<int Ns, int Nc, typename dstFloat , typename srcFloat >
void	copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat Dst, srcFloat Src, float dstNorm, float srcNorm)

template<int Nc, typename dstFloat , typename srcFloat >
void	CopyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat Dst, srcFloat Src, float dstNorm=0, float srcNorm=0)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
void	packSpinor (OutOrder &outOrder, const InOrder &inOrder, int volume)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
__global__ void	packSpinorKernel (OutOrder outOrder, const InOrder inOrder, int volume)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
void	genericCopyColorSpinor (OutOrder &outOrder, const InOrder &inOrder, const ColorSpinorField &out, QudaFieldLocation location)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >
void	genericCopyColorSpinor (InOrder &inOrder, ColorSpinorField &out, QudaFieldLocation location, FloatOut *Out)

template<typename FloatOut , typename FloatIn , int Ns, int Nc>
void	genericCopyColorSpinor (ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location, FloatOut Out, FloatIn In)

template<int Ns, int Nc, typename dstFloat , typename srcFloat >
void	copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat Dst, srcFloat Src)

template<int Nc, typename dstFloat , typename srcFloat >
void	CopyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat Dst, srcFloat Src)

void	copyGenericGaugeDoubleOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out, void In, void ghostOut, void ghostIn, int type)

void	copyGenericGaugeSingleOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out, void In, void ghostOut, void ghostIn, int type)

void	copyGenericGaugeHalfOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out, void In, void ghostOut, void ghostIn, int type)

void	copyGenericGaugeQuarterOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out, void In, void ghostOut, void ghostIn, int type)

void	copyGenericGaugeMG (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out, void In, void ghostOut, void ghostIn, int type)

void	checkMomOrder (const GaugeField &u)

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder , bool regularToextended>
__device__ __host__ void	copyGaugeEx (CopyGaugeExArg< OutOrder, InOrder > &arg, int X, int parity)

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder , bool regularToextended>
void	copyGaugeEx (CopyGaugeExArg< OutOrder, InOrder > arg)

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder , bool regularToextended>
__global__ void	copyGaugeExKernel (CopyGaugeExArg< OutOrder, InOrder > arg)

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void	copyGaugeEx (OutOrder outOrder, const InOrder inOrder, const int E, const int X, const int *faceVolumeCB, const GaugeField &meta, QudaFieldLocation location)

template<typename FloatOut , typename FloatIn , int length, typename InOrder >
void	copyGaugeEx (const InOrder &inOrder, const int X, GaugeField &out, QudaFieldLocation location, FloatOut Out)

template<typename FloatOut , typename FloatIn , int length>
void	copyGaugeEx (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut Out, FloatIn In)

template<typename FloatOut , typename FloatIn >
void	copyGaugeEx (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut Out, FloatIn In)

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void	copyGauge (OutOrder &&outOrder, const InOrder &inOrder, const GaugeField &out, const GaugeField &in, QudaFieldLocation location, int type)

template<typename FloatOut , typename FloatIn , int length, typename InOrder >
void	copyGauge (const InOrder &inOrder, const GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut Out, FloatOut *outGhost, int type)

template<typename FloatOut , typename FloatIn , int length>
void	copyGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut Out, FloatIn In, FloatOut outGhost, FloatIn inGhost, int type)

template<typename FloatOut , typename FloatIn , int length, typename Out , typename In , typename Arg >
void	copyMom (Arg &arg, const GaugeField &out, const GaugeField &in, QudaFieldLocation location)

template<typename FloatOut , typename FloatIn >
void	copyGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut Out, FloatIn In, FloatOut outGhost, FloatIn inGhost, int type)

template<typename sFloatOut , typename sFloatIn , int Nc, typename InOrder >
void	copyGaugeMG (const InOrder &inOrder, GaugeField &out, const GaugeField &in, QudaFieldLocation location, sFloatOut Out, sFloatOut *outGhost, int type)

template<typename sFloatOut , typename sFloatIn , int Nc>
void	copyGaugeMG (GaugeField &out, const GaugeField &in, QudaFieldLocation location, sFloatOut Out, sFloatIn In, sFloatOut outGhost, sFloatIn inGhost, int type)

template<typename FloatOut , typename FloatIn >
void	copyGaugeMG (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut Out, FloatIn In, FloatOut outGhost, FloatIn inGhost, int type)

void *	create_gauge_buffer (size_t bytes, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)

void **	create_ghost_buffer (size_t bytes[], QudaGaugeFieldOrder order, QudaFieldGeometry geometry)

void	free_gauge_buffer (void *buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)

void	free_ghost_buffer (void **buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)

std::ostream &	operator<< (std::ostream &out, const cudaColorSpinorField &a)

static std::vector< DslashCoarsePolicy >	policies (static_cast< int >(DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED), DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED)

void	enable_policy (DslashCoarsePolicy p)

void	disable_policy (DslashCoarsePolicy p)

template<typename Float , int nSpin, int nColor, bool spin_project>
std::ostream &	operator<< (std::ostream &out, const PackArg< Float, nSpin, nColor, spin_project > &arg)

template<typename Float , int nColor>
void	PackGhost (void *ghost[], const ColorSpinorField &in, MemoryLocation location, int nFace, bool dagger, int parity, bool spin_project, double a, double b, double c, const cudaStream_t &stream)

template<typename Float >
void	PackGhost (void *ghost[], const ColorSpinorField &in, MemoryLocation location, int nFace, bool dagger, int parity, bool spin_project, double a, double b, double c, const cudaStream_t &stream)

template<typename Float , int nColor, typename Arg >
void	gammaCPU (Arg arg)

template<typename Float , int nColor, int d, typename Arg >
__global__ void	gammaGPU (Arg arg)

template<typename Float , int nColor>
void	ApplyGamma (ColorSpinorField &out, const ColorSpinorField &in, int d)

template<typename Float >
void	ApplyGamma (ColorSpinorField &out, const ColorSpinorField &in, int d)

template<bool doublet, typename Float , int nColor, typename Arg >
void	twistGammaCPU (Arg arg)

template<bool doublet, typename Float , int nColor, int d, typename Arg >
__global__ void	twistGammaGPU (Arg arg)

template<typename Float , int nSpin, int nColor, typename Arg >
__device__ __host__ void	cloverApply (Arg &arg, int x_cb, int parity)

template<typename Float , int nSpin, int nColor, typename Arg >
void	cloverCPU (Arg &arg)

template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void	cloverGPU (Arg arg)

template<bool inverse, typename Float , int nSpin, int nColor, typename Arg >
__device__ __host__ void	twistCloverApply (Arg &arg, int x_cb, int parity)

template<bool inverse, typename Float , int nSpin, int nColor, typename Arg >
void	twistCloverCPU (Arg &arg)

template<bool inverse, typename Float , int nSpin, int nColor, typename Arg >
__global__ void	twistCloverGPU (Arg arg)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
__device__ __host__ void	copyInterior (CopySpinorExArg< OutOrder, InOrder, Basis > &arg, int X)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
__global__ void	copyInteriorKernel (CopySpinorExArg< OutOrder, InOrder, Basis > arg)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
void	copyInterior (CopySpinorExArg< OutOrder, InOrder, Basis > &arg)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >
void	copySpinorEx (OutOrder outOrder, const InOrder inOrder, const Basis basis, const int E, const int X, const int parity, const bool extend, const ColorSpinorField &meta, QudaFieldLocation location)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
void	copySpinorEx (OutOrder outOrder, InOrder inOrder, const QudaGammaBasis outBasis, const QudaGammaBasis inBasis, const int E, const int X, const int parity, const bool extend, const ColorSpinorField &meta, QudaFieldLocation location)

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >
void	extendedCopyColorSpinor (InOrder &inOrder, ColorSpinorField &out, QudaGammaBasis inBasis, const int E, const int X, const int parity, const bool extend, QudaFieldLocation location, FloatOut Out, float outNorm)

template<typename FloatOut , typename FloatIn , int Ns, int Nc>
void	extendedCopyColorSpinor (ColorSpinorField &out, const ColorSpinorField &in, const int parity, const QudaFieldLocation location, FloatOut Out, FloatIn In, float outNorm, float inNorm)

template<int Ns, typename dstFloat , typename srcFloat >
void	copyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, const int parity, const QudaFieldLocation location, dstFloat Dst, srcFloat Src, float dstNorm, float srcNorm)

template<typename dstFloat , typename srcFloat >
void	CopyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, const int parity, const QudaFieldLocation location, dstFloat Dst, srcFloat Src, float dstNorm=0, float srcNorm=0)

template<typename Float >
void	extractGhost (const GaugeField &u, Float **Ghost, bool extract, int offset)

void	extractGaugeGhostMG (const GaugeField &u, void **ghost, bool extract, int offset)

template<typename Float , int length, int dim, typename Arg >
__device__ __host__ void	extractor (Arg &arg, int dir, int a, int b, int c, int d, int g, int parity)

template<typename Float , int length, int dim, typename Arg >
__device__ __host__ void	injector (Arg &arg, int dir, int a, int b, int c, int d, int g, int parity)

template<typename Float , int length, int nDim, int dim, typename Order , bool extract>
void	extractGhostEx (ExtractGhostExArg< Order, nDim, dim > arg)

template<typename Float , int length, int nDim, int dim, typename Order , bool extract>
__global__ void	extractGhostExKernel (ExtractGhostExArg< Order, nDim, dim > arg)

template<typename Float , int length, typename Order >
void	extractGhostEx (Order order, const int dim, const int surfaceCB, const int E, const int *R, bool extract, const GaugeField &u, QudaFieldLocation location)

template<typename Float >
void	extractGhostEx (const GaugeField &u, int dim, const int R, Float *Ghost, bool extract)

template<int nDim, bool extract, typename Arg >
void	extractGhost (Arg &arg)

template<int nDim, bool extract, typename Arg >
__global__ void	extractGhostKernel (Arg arg)

template<typename Float , int length, typename Order >
void	extractGhost (Order order, const GaugeField &u, QudaFieldLocation location, bool extract, int offset)

template<typename storeFloat , int Nc>
void	extractGhostMG (const GaugeField &u, storeFloat **Ghost, bool extract, int offset)

template<typename Float >
void	extractGhostMG (const GaugeField &u, Float **Ghost, bool extract, int offset)

ColorSpinorParam	colorSpinorParam (const GaugeField &a)

template<int NCOLORS>
static __host__ __device__ void	IndexBlock (int block, int &p, int &q)

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void	GaugeFixHit_AtomicAdd (Matrix< complex< Float >, NCOLORS > &link, const Float relax_boost, const int tid)

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void	GaugeFixHit_NoAtomicAdd (Matrix< complex< Float >, NCOLORS > &link, const Float relax_boost, const int tid)

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void	GaugeFixHit_NoAtomicAdd_LessSM (Matrix< complex< Float >, NCOLORS > &link, const Float relax_boost, const int tid)

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void	GaugeFixHit_AtomicAdd (Matrix< complex< Float >, NCOLORS > &link, Matrix< complex< Float >, NCOLORS > &link1, const Float relax_boost, const int tid)

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void	GaugeFixHit_NoAtomicAdd (Matrix< complex< Float >, NCOLORS > &link, Matrix< complex< Float >, NCOLORS > &link1, const Float relax_boost, const int tid)

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void	GaugeFixHit_NoAtomicAdd_LessSM (Matrix< complex< Float >, NCOLORS > &link, Matrix< complex< Float >, NCOLORS > &link1, const Float relax_boost, const int tid)

template<typename Float , typename Gauge >
void	plaquette (const Gauge dataOr, const GaugeField &data, double2 &plq, QudaFieldLocation location)

template<typename Float >
void	plaquette (const GaugeField &data, double2 &plq, QudaFieldLocation location)

template<typename real , typename Link >
__device__ __host__ Link	gauss_su3 (cuRNGState &localState)

template<typename Float , typename Arg >
__global__ void	computeGenGauss (Arg arg)

template<typename Float , QudaReconstructType recon, bool group>
void	genGauss (GaugeField &U, RNG &rngstate, double sigma)

template<typename Float , typename GaugeOr , typename GaugeDs >
void	OvrImpSTOUTStep (GaugeOr origin, GaugeDs dest, const GaugeField &dataOr, Float rho, Float epsilon)

template<typename Float >
void	OvrImpSTOUTStep (GaugeField &dataDs, const GaugeField &dataOr, Float rho, Float epsilon)

void	printLaunchTimer ()

void	setDiracRefineParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc)

void	setDiracPreParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc, bool comms)

void	createDirac (Dirac &d, Dirac &dSloppy, Dirac *&dPre, QudaInvertParam &param, const bool pc_solve)

void	createDirac (Dirac &d, Dirac &dSloppy, Dirac &dPre, Dirac &dRef, QudaInvertParam &param, const bool pc_solve)

void	massRescale (cudaColorSpinorField &b, QudaInvertParam &param)

void	fillInnerSolveParam (SolverParam &inner, const SolverParam &outer)

int	reliable (double &rNorm, double &maxrx, double &maxrr, const double &r2, const double &delta)

template<int N>
void	compute_alpha_N (Complex Q_AQandg, Complex alpha)

template<int N>
void	compute_beta_N (Complex Q_AQandg, Complex Q_AS, Complex *beta)

template<libtype which_lib>
void	ComputeRitz (EigCGArgs &args)

template<>
void	ComputeRitz< libtype::eigen_lib > (EigCGArgs &args)

template<>
void	ComputeRitz< libtype::magma_lib > (EigCGArgs &args)

static void	fillEigCGInnerSolverParam (SolverParam &inner, const SolverParam &outer, bool use_sloppy_partial_accumulator=true)

static void	fillInitCGSolverParam (SolverParam &inner, const SolverParam &outer)

double	timeInterval (struct timeval start, struct timeval end)

void	computeBeta (Complex *beta, std::vector< ColorSpinorField > Ap, int i, int N, int k)

void	updateAp (Complex *beta, std::vector< ColorSpinorField > Ap, int begin, int size, int k)

void	orthoDir (Complex *beta, std::vector< ColorSpinorField > Ap, int k, int pipeline)

void	backSubs (const Complex alpha, Complex const beta, const double gamma, Complex *delta, int n)

void	updateSolution (ColorSpinorField &x, const Complex alpha, Complex const beta, double gamma, int k, std::vector< ColorSpinorField *> p)

template<libtype which_lib>
void	ComputeHarmonicRitz (GMResDRArgs &args)

template<>
void	ComputeHarmonicRitz< libtype::magma_lib > (GMResDRArgs &args)

template<>
void	ComputeHarmonicRitz< libtype::eigen_lib > (GMResDRArgs &args)

template<libtype which_lib>
void	ComputeEta (GMResDRArgs &args)

template<>
void	ComputeEta< libtype::magma_lib > (GMResDRArgs &args)

template<>
void	ComputeEta< libtype::eigen_lib > (GMResDRArgs &args)

void	fillFGMResDRInnerSolveParam (SolverParam &inner, const SolverParam &outer)

template<typename T >
static void	applyT (T d_out[], const T d_in[], const T gamma[], const T rho[], int N)

template<typename T >
static void	applyB (T d_out[], const T d_in[], int N)

void	print (const double d[], int n)

template<typename T >
static void	zero (T d[], int N)

template<typename T >
static void	applyThirdTerm (T d_out[], const T d_in[], int k, int j, int s, const T gamma[], const T rho[], const T gamma_kprev[], const T rho_kprev[])

template<typename T >
static void	computeCoeffs (T d_out[], const T d_p1[], const T d_p2[], int k, int j, int s, const T gamma[], const T rho[], const T gamma_kprev[], const T rho_kprev[])

void	updateAlphaZeta (double alpha, double zeta, double zeta_old, const double r2, const double beta, const double pAp, const double offset, const int nShift, const int j_low)

static void	fillInnerSolverParam (SolverParam &inner, const SolverParam &outer)

template<typename Float , typename Oprod , typename Gauge , typename Mom >
__host__ __device__ void	completeKSForceCore (KSForceArg< Oprod, Gauge, Mom > &arg, int idx)

template<typename Float , typename Oprod , typename Gauge , typename Mom >
__global__ void	completeKSForceKernel (KSForceArg< Oprod, Gauge, Mom > arg)

template<typename Float , typename Oprod , typename Gauge , typename Mom >
void	completeKSForceCPU (KSForceArg< Oprod, Gauge, Mom > &arg)

template<typename Float , typename Oprod , typename Gauge , typename Mom >
void	completeKSForce (Oprod oprod, Gauge gauge, Mom mom, int dim[4], const GaugeField &meta, QudaFieldLocation location, long long *flops)

template<typename Float , typename Result , typename Oprod , typename Gauge >
__host__ __device__ void	computeKSLongLinkForceCore (KSLongLinkArg< Result, Oprod, Gauge > &arg, int idx)

template<typename Float , typename Result , typename Oprod , typename Gauge >
__global__ void	computeKSLongLinkForceKernel (KSLongLinkArg< Result, Oprod, Gauge > arg)

template<typename Float , typename Result , typename Oprod , typename Gauge >
void	computeKSLongLinkForceCPU (KSLongLinkArg< Result, Oprod, Gauge > &arg)

template<typename Float , typename Result , typename Oprod , typename Gauge >
void	computeKSLongLinkForce (Result res, Oprod oprod, Gauge gauge, int dim[4], const GaugeField &meta, QudaFieldLocation location)

template<typename Float >
void	computeKSLongLinkForce (GaugeField &result, const GaugeField &oprod, const GaugeField &gauge, QudaFieldLocation location)

static void	print_trace (void)

static void	print_alloc_header ()

static void	print_alloc (AllocType type)

static void	track_malloc (const AllocType &type, const MemAlloc &a, void *ptr)

static void	track_free (const AllocType &type, void *ptr)

static void *	aligned_malloc (MemAlloc &a, size_t size)

template<typename real , int Nc, QudaCloverFieldOrder order>
double	norm (const CloverField &u, norm_type_ type)

template<typename real , int Nc>
double	norm (const CloverField &u, norm_type_ type)

template<typename real >
double	_norm (const CloverField &u, norm_type_ type)

template<typename real , int Nc, QudaGaugeFieldOrder order>
double	norm (const GaugeField &u, int d, norm_type_ type)

template<typename real , int Nc>
double	norm (const GaugeField &u, int d, norm_type_ type)

template<typename real >
double	norm (const GaugeField &u, int d, norm_type_ type)

void	forceRecord (double2 &force, double dt, const char *fname)

dim3	GetBlockDim (size_t threads, size_t size)

__global__ void	kernel_random (cuRNGState *state, unsigned long long seed, int size_cb, rngArg arg)
	CUDA kernel to initialize CURAND RNG states. More...

void	launch_kernel_random (cuRNGState *state, unsigned long long seed, int size_cb, int n_parity, int X[4])
	Call CUDA kernel to initialize CURAND RNG states. More...

template<IndexType idxType, typename Int >
__device__ __forceinline__ int	neighborIndex (const unsigned int &cb_idx, const int(&shift)[4], const bool(&partitioned)[4], const unsigned int &parity)

template<typename FloatN , int N, typename Output , typename Input >
__global__ void	shiftColorSpinorFieldKernel (ShiftQuarkArg< Output, Input > arg)

template<typename FloatN , int N, typename Output , typename Input >
__global__ void	shiftColorSpinorFieldExternalKernel (ShiftQuarkArg< Output, Input > arg)

void	shiftColorSpinorField (cudaColorSpinorField &dst, const cudaColorSpinorField &src, const unsigned int parity, const unsigned int dim, const int shift)

static void	report (const char *type)

template<typename real , typename Arg >
__device__ __host__ void	genGauss (Arg &arg, cuRNGState &localState, int parity, int x_cb, int s, int c)

template<typename real , typename Arg >
__device__ __host__ void	genUniform (Arg &arg, cuRNGState &localState, int parity, int x_cb, int s, int c)

template<typename real , int Ns, int Nc, QudaNoiseType type, typename Arg >
void	SpinorNoiseCPU (Arg &arg)

template<typename real , int Ns, int Nc, QudaNoiseType type, typename Arg >
__global__ void	SpinorNoiseGPU (Arg arg)

void	computeStaggeredOprod (GaugeField &outA, GaugeField &outB, ColorSpinorField &inEven, ColorSpinorField &inOdd, int parity, const double coeff[2], int nFace)

int	traceEnabled ()

static void	deserializeTuneCache (std::istream &in)

static void	serializeTuneCache (std::ostream &out)

static void	serializeProfile (std::ostream &out, std::ostream &async_out)

static void	serializeTrace (std::ostream &out)

static void	broadcastTuneCache ()

bool	policyTuning ()

template<typename Float , typename G >
__global__ void	ProjectSU3kernel (ProjectSU3Arg< Float, G > arg)

void	setTransferGPU (bool)

Variables
__device__ unsigned int	count [QUDA_MAX_MULTI_REDUCE] = { }

__shared__ bool	isLastBlockDone

__shared__ volatile bool	isLastWarpDone [16]

static __constant__ signed char	B_array_d [MAX_MATRIX_SIZE]

static signed char	B_array_h [MAX_MATRIX_SIZE]

__shared__ float	s []

constexpr int	size = 4096

static __constant__ char	mobius_d [size]

static __constant__ char	mobius_d [size]

static int	commDim [QUDA_MAX_DIM]

const int	Nstream = 9

static const char	gDigitsLut [200]

static bool	bidirectional_debug = false

cudaStream_t *	stream

static bool	complete_recv_fwd [QUDA_MAX_DIM] = { }

static bool	complete_recv_back [QUDA_MAX_DIM] = { }

static bool	complete_send_fwd [QUDA_MAX_DIM] = { }

static bool	complete_send_back [QUDA_MAX_DIM] = { }

static auto	pinned_allocator = [] (size_t bytes ) { return static_cast<Complex*>(pool_pinned_malloc(bytes)); }

static auto	pinned_deleter = [] (Complex *hptr) { pool_pinned_free(hptr); }

static bool	dslash_init = false

static int	first_active_policy =static_cast<int>(DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED)

static char	policy_string [TuneKey::aux_n]

static bool	kernelPackT = false

static std::stack< bool >	kptstack

static double	unscaled_shifts [QUDA_MAX_MULTI_SHIFT]

static int	max_eigcg_cycles = 4

static QudaFieldLocation	reorder_location_ = QUDA_CUDA_FIELD_LOCATION

static std::map< void *, MemAlloc >	alloc [N_ALLOC_TYPE]

static long	total_bytes [N_ALLOC_TYPE] = {0}

static long	max_total_bytes [N_ALLOC_TYPE] = {0}

static long	total_host_bytes

static long	max_total_host_bytes

static long	total_pinned_bytes

static long	max_total_pinned_bytes

static std::stringstream	force_stream

static long long	force_count = 0

static long long	force_flush = 1000

static bool	debug = false

static TimeProfile	apiTimer ("CUDA API calls (driver)")

static TuneKey	last_key

static std::list< TraceKey >	trace_list

static int	enable_trace = 0

static const std::string	quda_hash = QUDA_HASH

static std::string	resource_path

static map	tunecache

static map::iterator	it

static size_t	initial_cache_size = 0

static const std::string	quda_version = STR(QUDA_VERSION_MAJOR) "." STR(QUDA_VERSION_MINOR) "." STR(QUDA_VERSION_SUBMINOR)

static bool	tuning = false

static bool	profile_count = true

static bool	policy_tuning = false

static TimeProfile	launchTimer ("tuneLaunch")

Detailed Description

This is the covariant derivative based on the basic gauged Laplace operator

This is the gauged domain-wall 4-d preconditioned operator.

Note, for now, this just applies a batched 4-d dslash across the fifth dimension.

This is the gauged domain-wall 5-d preconditioned operator.

This is a staggered Dirac operator

This is the gauged twisted-mass operator acting on a non-generate quark doublet.

This is the preconditioned twisted-mass operator acting on a non-generate quark doublet.

This is the basic gauged twisted-clover operator

This is the preconditioned gauged twisted-mass operator

This is the basic gauged twisted-mass operator

This is the basic gauged Wilson operator

TODO

gauge fix support
ghost texture support in accessors
CPU support

This is the Wilson-clover linear operator

This is the Wilson-clover preconditioned linear operator

This code has not been checked. In particular, I suspect it is erroneous in multi-GPU since it looks like the halo ghost region isn't being treated here.

Generic Multi Shift Solver

For staggered, the mass is folded into the dirac operator Otherwise the matrix mass is 'unmodified'.

The lowest offset is in offsets[0]

This is the laplacian derivative based on the basic gauged differential operator

Typedef Documentation

◆ ColorSpinorFieldSet

using quda::ColorSpinorFieldSet = typedef ColorSpinorField

Definition at line 1220 of file invert_quda.h.

◆ Complex

typedef std::complex<double> quda::Complex

Definition at line 46 of file quda_internal.h.

◆ CompositeColorSpinorField

typedef std::vector<ColorSpinorField*> quda::CompositeColorSpinorField

Typedef for a set of spinors. Can be further divided into subsets ,e.g., with different precisions (not implemented currently)

Definition at line 17 of file color_spinor_field.h.

◆ cuRNGState

typedef struct curandStateMRG32k3a quda::cuRNGState

Definition at line 17 of file random_quda.h.

◆ DenseMatrix

typedef MatrixXcd quda::DenseMatrix

Definition at line 36 of file inv_eigcg_quda.cpp.

◆ DynamicStride

typedef Stride< Dynamic, Dynamic > quda::DynamicStride

Definition at line 18 of file deflation.cpp.

◆ map

typedef std::map<TuneKey, TuneParam> quda::map

Definition at line 28 of file tune.cpp.

◆ RealVector

using quda::RealVector = typedef VectorXd

Definition at line 39 of file inv_eigcg_quda.cpp.

◆ RowMajorDenseMatrix

typedef Matrix< Complex, Dynamic, Dynamic, RowMajor > quda::RowMajorDenseMatrix

Definition at line 42 of file inv_eigcg_quda.cpp.

◆ storeType

typedef int quda::storeType

Definition at line 15 of file coarse_op_kernel.cuh.

◆ Vector

typedef VectorXcd quda::Vector

Definition at line 38 of file inv_eigcg_quda.cpp.

◆ VectorSet

typedef MatrixXcd quda::VectorSet

Definition at line 37 of file inv_eigcg_quda.cpp.

Enumeration Type Documentation

◆ AllocType

enum quda::AllocType

Enumerator
DEVICE
DEVICE_PINNED
HOST
PINNED
MAPPED
N_ALLOC_TYPE

Definition at line 16 of file malloc.cpp.

◆ BiCGstabLUpdateType

enum quda::BiCGstabLUpdateType

The following code is based on Kate's worker class in Multi-CG.

This worker class is used to update most of the u and r vectors. On BiCG iteration j, r[0] through r[j] and u[0] through u[j] all get updated, but the subsequent mat-vec operation only gets applied to r[j] and u[j]. Thus, we can hide updating r[0] through r[j-1] and u[0] through u[j-1], respectively, in the comms for the matvec on r[j] and u[j]. This results in improved strong scaling for BiCGstab-L.

See paragraphs 2 and 3 in the comments on the Worker class in Multi-CG for more remarks.

Enumerator
BICGSTABL_UPDATE_U
BICGSTABL_UPDATE_R

Definition at line 173 of file inv_bicgstabl_quda.cpp.

◆ ComputeType

enum quda::ComputeType

Enumerator
COMPUTE_UV
COMPUTE_AV
COMPUTE_TMAV
COMPUTE_TMCAV
COMPUTE_CLOVER_INV_MAX
COMPUTE_TWISTED_CLOVER_INV_MAX
COMPUTE_VUV
COMPUTE_COARSE_CLOVER
COMPUTE_REVERSE_Y
COMPUTE_DIAGONAL
COMPUTE_TMDIAGONAL
COMPUTE_CONVERT
COMPUTE_RESCALE
COMPUTE_INVALID

Definition at line 13 of file coarse_op.cuh.

◆ Dslash5Type

enum quda::Dslash5Type

Enumerator
DSLASH5_DWF
DSLASH5_MOBIUS_PRE
DSLASH5_MOBIUS
M5_INV_DWF
M5_INV_MOBIUS
M5_INV_ZMOBIUS

Definition at line 396 of file dslash_quda.h.

◆ DslashCoarsePolicy

enum quda::DslashCoarsePolicy

strong

Enumerator
DSLASH_COARSE_BASIC
DSLASH_COARSE_ZERO_COPY_PACK
DSLASH_COARSE_ZERO_COPY_READ
DSLASH_COARSE_ZERO_COPY
DSLASH_COARSE_GDR_SEND
DSLASH_COARSE_GDR_RECV
DSLASH_COARSE_GDR
DSLASH_COARSE_ZERO_COPY_PACK_GDR_RECV
DSLASH_COARSE_GDR_SEND_ZERO_COPY_READ
DSLASH_COARSE_POLICY_DISABLED

Definition at line 458 of file dslash_coarse.cu.

◆ DslashType

enum quda::DslashType

Enumerator
DSLASH_INTERIOR
DSLASH_EXTERIOR
DSLASH_FULL

Definition at line 16 of file dslash_coarse.cuh.

◆ KernelType

enum quda::KernelType

Enumerator
INTERIOR_KERNEL
EXTERIOR_KERNEL_ALL
EXTERIOR_KERNEL_X
EXTERIOR_KERNEL_Y
EXTERIOR_KERNEL_Z
EXTERIOR_KERNEL_T
KERNEL_POLICY

Definition at line 464 of file index_helper.cuh.

◆ libtype [1/2]

enum quda::libtype

strong

Enumerator
eigen_lib
magma_lib
lapack_lib
mkl_lib
eigen_lib
magma_lib
lapack_lib
mkl_lib

Definition at line 47 of file inv_eigcg_quda.cpp.

◆ libtype [2/2]

enum quda::libtype

strong

Enumerator
eigen_lib
magma_lib
lapack_lib
mkl_lib
eigen_lib
magma_lib
lapack_lib
mkl_lib

Definition at line 57 of file inv_gmresdr_quda.cpp.

◆ MemoryLocation

enum quda::MemoryLocation

Enumerator
Device
Host
Remote

Definition at line 15 of file color_spinor_field.h.

◆ norm_type_ [1/2]

enum quda::norm_type_

Enumerator
NORM1
NORM2
ABS_MAX
ABS_MIN
NORM1
NORM2
ABS_MAX
ABS_MIN

Definition at line 7 of file max_gauge.cu.

◆ norm_type_ [2/2]

enum quda::norm_type_

Enumerator
NORM1
NORM2
ABS_MAX
ABS_MIN
NORM1
NORM2
ABS_MAX
ABS_MIN

Definition at line 7 of file max_clover.cu.

◆ QudaProfileType

enum quda::QudaProfileType

Enumerator
QUDA_PROFILE_H2D	host -> device transfers
QUDA_PROFILE_D2H	The time in seconds for device -> host transfers
QUDA_PROFILE_INIT	The time in seconds taken for initiation
QUDA_PROFILE_PREAMBLE	The time in seconds taken for any preamble
QUDA_PROFILE_COMPUTE	The time in seconds taken for the actual computation
QUDA_PROFILE_COMMS	synchronous communication
QUDA_PROFILE_EPILOGUE	The time in seconds taken for any epilogue
QUDA_PROFILE_FREE	The time in seconds for freeing resources
QUDA_PROFILE_IO	time spent on file i/o
QUDA_PROFILE_CHRONO	time spent on chronology
QUDA_PROFILE_EIGEN	time spent on host-side Eigen
QUDA_PROFILE_ARPACK	time spent on host-side ARPACK
QUDA_PROFILE_LOWER_LEVEL	dummy timer to mark beginning of lower level timers which do not count towrads global time
QUDA_PROFILE_PACK_KERNEL	face packing kernel
QUDA_PROFILE_DSLASH_KERNEL	dslash kernel
QUDA_PROFILE_GATHER	gather (device -> host)
QUDA_PROFILE_SCATTER	scatter (host -> device)
QUDA_PROFILE_LAUNCH_KERNEL	cudaLaunchKernel
QUDA_PROFILE_EVENT_RECORD	cuda event record
QUDA_PROFILE_EVENT_QUERY	cuda event querying
QUDA_PROFILE_STREAM_WAIT_EVENT	stream waiting for event completion
QUDA_PROFILE_FUNC_SET_ATTRIBUTE	set function attribute
QUDA_PROFILE_EVENT_SYNCHRONIZE	event synchronization
QUDA_PROFILE_STREAM_SYNCHRONIZE	stream synchronization
QUDA_PROFILE_DEVICE_SYNCHRONIZE	device synchronization
QUDA_PROFILE_MEMCPY_D2D_ASYNC	device to device async copy
QUDA_PROFILE_MEMCPY_D2H_ASYNC	device to host async copy
QUDA_PROFILE_MEMCPY2D_D2H_ASYNC	device to host 2-d memcpy async copy
QUDA_PROFILE_MEMCPY_H2D_ASYNC	host to device async copy
QUDA_PROFILE_COMMS_START	initiating communication
QUDA_PROFILE_COMMS_QUERY	querying communication
QUDA_PROFILE_CONSTANT	time spent setting CUDA constant parameters
QUDA_PROFILE_TOTAL	The total time in seconds for the algorithm. Must be the penultimate type.
QUDA_PROFILE_COUNT	The total number of timers we have. Must be last enum type.

Definition at line 103 of file timer.h.

Function Documentation

◆ __fast_pow()

template<typename real >

__device__ __host__ real quda::__fast_pow	(	real	a,
		int	b
	)

inline

Definition at line 15 of file math_helper.cuh.

References pow().

Referenced by constantInv().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ __launch_bounds__()

template<int block_size, typename sumFloat , typename Float , int nSpin, int spinBlockSize, int nColor, int coarseSpin, int nVec, typename Arg >

quda::__launch_bounds__ ( 2 * block_size )

Definition at line 233 of file block_orthogonalize.cuh.

References quda::BlockOrthoArg< Rotator, Vector, fineSpin, spinBlockSize, coarseSpin, nVec >::B, B_array_d, dot(), quda::blas::dot_(), nColor, quda::Arg< real, Ns, Nc, order >::nParity, quda::BlockOrthoArg< Rotator, Vector, fineSpin, spinBlockSize, coarseSpin, nVec >::parity, and s.

Here is the call graph for this function:

◆ _norm()

template<typename real >

double quda::_norm	(	const CloverField &	u,
		norm_type_	type
	)

Definition at line 40 of file max_clover.cu.

References errorQuda, and quda::CloverField::Ncolor().

Here is the call graph for this function:

◆ abs() [1/4]

template<typename ValueType >

__host__ __device__ ValueType quda::abs ( ValueType x )

inline

Definition at line 125 of file complex_quda.h.

References abs().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ abs() [2/4]

template<typename ValueType >

__host__ __device__ ValueType quda::abs ( const complex< ValueType > & z )

inline

Returns the magnitude of z.

Definition at line 1060 of file complex_quda.h.

◆ abs() [3/4]

template<>

__host__ __device__ float quda::abs ( const complex< float > & z )

inline

Definition at line 1065 of file complex_quda.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Here is the call graph for this function:

◆ abs() [4/4]

template<>

__host__ __device__ double quda::abs ( const complex< double > & z )

inline

Definition at line 1070 of file complex_quda.h.

References quda::complex< double >::imag(), and quda::complex< double >::real().

Referenced by abs().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ acos() [1/2]

template<typename ValueType >

__host__ __device__ ValueType quda::acos ( ValueType x )

inline

Definition at line 61 of file complex_quda.h.

References acos().

Referenced by exponentiate_iQ(), and setUnitarizeLinksConstants().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ acos() [2/2]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::acos ( const complex< ValueType > & z )

inline

Definition at line 1274 of file complex_quda.h.

References asin().

Referenced by acos().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ acosh()

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::acosh ( const complex< ValueType > & z )

inline

Definition at line 1295 of file complex_quda.h.

References log(), and sqrt().

Here is the call graph for this function:

◆ activeTuning()

bool quda::activeTuning ( )

query if tuning is in progress

Returns: tuning in progress?

Definition at line 121 of file tune.cpp.

References tuning.

Referenced by quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply(), qudaLaunchKernel(), and quda::TunableVectorYZ::resizeStep().

Here is the caller graph for this function:

◆ AddCoarseDiagonalCPU()

template<typename Float , int nSpin, int nColor, typename Arg >

void quda::AddCoarseDiagonalCPU ( Arg & arg )

Definition at line 1020 of file coarse_op_kernel.cuh.

References nColor, and s.

◆ AddCoarseDiagonalGPU()

template<typename Float , int nSpin, int nColor, typename Arg >

__global__ void quda::AddCoarseDiagonalGPU ( Arg arg )

Definition at line 1036 of file coarse_op_kernel.cuh.

References nColor, and s.

◆ AddCoarseTmDiagonalCPU()

template<typename Float , int nSpin, int nColor, typename Arg >

void quda::AddCoarseTmDiagonalCPU ( Arg & arg )

Definition at line 1050 of file coarse_op_kernel.cuh.

References quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::mu, nColor, and s.

◆ AddCoarseTmDiagonalGPU()

template<typename Float , int nSpin, int nColor, typename Arg >

__global__ void quda::AddCoarseTmDiagonalGPU ( Arg arg )

Definition at line 1073 of file coarse_op_kernel.cuh.

References quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::mu, nColor, and s.

◆ aligned_malloc()

static void* quda::aligned_malloc	(	MemAlloc &	a,
		size_t	size
	)

static

Under CUDA 4.0, cudaHostRegister seems to require that both the beginning and end of the buffer be aligned on page boundaries. This local function takes care of the alignment and gets called by pinned_malloc_() and mapped_malloc_()

Definition at line 141 of file malloc.cpp.

References quda::MemAlloc::base_size, errorQuda, quda::MemAlloc::file, quda::MemAlloc::func, quda::MemAlloc::line, and quda::MemAlloc::size.

Referenced by mapped_malloc_(), and pinned_malloc_().

Here is the caller graph for this function:

◆ APEStep()

void quda::APEStep	(	GaugeField &	dataDs,
		const GaugeField &	dataOr,
		double	alpha
	)

Apply APE smearing to the gauge field.

Parameters

[out]	dataDs	Output smeared field
[in]	dataOr	Input gauge field
[in]	alpha	smearing parameter

Definition at line 128 of file gauge_ape.cu.

References errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().

Referenced by performAPEnStep().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ appendMatrixToArray() [1/2]

__device__ void quda::appendMatrixToArray	(	const Matrix< complex< double >, 3 > &	mat,
		const int	idx,
		const int	stride,
		double2 *const	array
	)

inline

Definition at line 904 of file quda_matrix.h.

References mat().

Here is the call graph for this function:

◆ appendMatrixToArray() [2/2]

__device__ void quda::appendMatrixToArray	(	const Matrix< complex< float >, 3 > &	mat,
		const int	idx,
		const int	stride,
		float2 *const	array
	)

inline

Definition at line 914 of file quda_matrix.h.

References mat().

Here is the call graph for this function:

◆ applyB()

template<typename T >

static void quda::applyB	(	T	d_out[],
		const T	d_in[],
		int	N
	)

static

Definition at line 37 of file inv_mpcg_quda.cpp.

Referenced by applyThirdTerm().

Here is the caller graph for this function:

◆ applyClover()

template<typename Float , int Ns, int Nc, int Mc, int color_stride, bool dagger, typename Arg >

__device__ __host__ void quda::applyClover	(	complex< Float >	out[],
		Arg &	arg,
		int	x_cb,
		int	src_idx,
		int	parity,
		int	s,
		int	color_block,
		int	color_offset
	)

inline

Applies the coarse clover matrix on a given parity and checkerboard site index

Parameters

out	The result out += X * in
X	The coarse clover field
in	The input field
parity	The site parity
x_cb	The checkerboarded site index

Definition at line 280 of file dslash_coarse.cuh.

References conj(), dagger, quda::DslashCoarseArg< Float, yFloat, ghostFloat, coarseSpin, coarseColor, csOrder, gOrder >::dim, quda::Arg< real, Ns, Nc, order >::nParity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ ApplyClover()

void quda::ApplyClover	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const CloverField &	clover,
		bool	inverse,
		int	parity
	)

Apply clover-matrix field to a color-spinor field.

Parameters

[out]	out	Result color-spinor field
[in]	in	Input color-spinor field
[in]	clover	Clover-matrix field
[in]	inverse	Whether we are applying the inverse or not
[in]	Field	parity (if color-spinor field is single parity)

Definition at line 604 of file dslash_quda.cu.

References quda::Clover< Float, nSpin, nColor, Arg >::apply(), arg(), checkCudaError, checkLocation, checkPrecision, errorQuda, in, inverse(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), Nstream, out, parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, and streams.

Referenced by quda::DiracClover::Clover(), and quda::DiracCloverPC::CloverInv().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyCoarse()

void quda::ApplyCoarse	(	ColorSpinorField &	out,
		const ColorSpinorField &	inA,
		const ColorSpinorField &	inB,
		const GaugeField &	Y,
		const GaugeField &	X,
		double	kappa,
		int	parity = `QUDA_INVALID_PARITY`,
		bool	dslash = `true`,
		bool	clover = `true`,
		bool	dagger = `false`,
		const int *	commDim = `0`,
		QudaPrecision	halo_precision = `QUDA_INVALID_PRECISION`
	)

Apply the coarse dslash stencil. This single driver accounts for all variations with and without the clover field, with and without dslash, and both single and full parity fields.

Parameters

[out]	out	The result vector
[in]	inA	The first input vector
[in]	inB	The second input vector
[in]	Y	Coarse link field
[in]	X	Coarse clover field
[in]	kappa	Scaling parameter
[in]	parity	Parity of the field (if single parity)
[in]	dslash	Are we applying dslash?
[in]	clover	Are we applying clover?
[in]	dagger	Apply dagger operator?
[in]	commDim	Which dimensions are partitioned?
[in]	halo_precision	What precision to use for the halos (if QUDA_INVALID_PRECISION, use field precision)

Definition at line 772 of file dslash_coarse.cu.

References quda::DslashCoarsePolicyTune::apply().

Referenced by quda::DiracCoarse::Clover(), quda::DiracCoarse::CloverInv(), quda::DiracCoarse::Dslash(), quda::DiracCoarsePC::Dslash(), quda::DiracCoarse::DslashXpay(), and quda::DiracCoarse::M().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ applyCovDev()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, int mu, typename Arg , typename Vector >

__device__ __host__ void quda::applyCovDev	(	Vector &	out,
		Arg &	arg,
		int	coord[nDim],
		int	x_cb,
		int	parity,
		int	idx,
		int	thread_dim,
		bool &	active
	)

inline

Applies the off-diagonal part of the covariant derivative operator

Parameters

[out]	out	The out result field
[in,out]	arg	Parameter struct
[in]	U	The gauge field
[in]	coord	Site coordinate
[in]	x_cb	The checker-boarded site index. This is a 4-d index only
[in]	parity	The site parity
[in]	idx	Thread index (equal to face index for exterior kernels)
[in]	thread_dim	Which dimension this thread corresponds to (fused exterior only)

Definition at line 63 of file covDev.cuh.

References conj(), quda::CovDevArg< Float, nColor, reconstruct_ >::ghost, quda::CovDevArg< Float, nColor, reconstruct_ >::in, quda::Arg< real, Ns, Nc, order >::nParity, and quda::CovDevArg< Float, nColor, reconstruct_ >::U.

Here is the call graph for this function:

◆ ApplyCovDev()

void quda::ApplyCovDev	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		int	mu,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the covariant derivative.

out = U * in

where U is the gauge field in a particular direction.

This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the covariant derivative
[in]	mu	Direction of the derivative. For mu > 3 it goes backwards
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

Definition at line 185 of file covDev.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, mu, out, parity, popKernelPackT(), pushKernelPackT(), and quda::ColorSpinorField::V().

Referenced by quda::GaugeCovDev::DslashCD().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyDomainWall4D()

void quda::ApplyDomainWall4D	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	a,
		double	m_5,
		const Complex *	b_5,
		const Complex *	c_5,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the batched Wilson 4-d stencil to a 5-d vector with 4-d preconditioned data order.

out = D * in

where D is the gauged Wilson linear operator.

If a is non-zero, the operation is given by out = x + a * D in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	a	Scale factor applied
[in]	m_5	Wilson mass shift
[in]	b_5	Mobius coefficient array (length Ls)
[in]	c_5	Mobius coefficient array (length Ls)
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

Definition at line 99 of file dslash_domain_wall_4d.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracDomainWall4D::Dslash4(), quda::DiracMobius::Dslash4(), quda::DiracDomainWall4D::Dslash4Xpay(), quda::DiracMobius::Dslash4Xpay(), quda::DiracDomainWall4D::M(), and quda::DiracMobius::M().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyDomainWall5D()

void quda::ApplyDomainWall5D	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	a,
		double	m_f,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the Domain-wall 5-d stencil to a 5-d vector with 5-d preconditioned data order.

out = D_5 * in

where D_5 is the 5-d wilson linear operator with fifth dimension boundary condition set by the fermion mass.

If a is non-zero, the operation is given by out = x + a * D_5 in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	a	Scale factor applied (typically -kappa_5)
[in]	m_f	Fermion mass parameter
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

Definition at line 118 of file dslash_domain_wall_5d.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, popKernelPackT(), pushKernelPackT(), and quda::ColorSpinorField::V().

Referenced by quda::DiracDomainWall::Dslash(), quda::DiracDomainWall::DslashXpay(), and quda::DiracDomainWall::M().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ applyDslash()

template<typename Float , int nDim, int Ns, int Nc, int Mc, int color_stride, int dim_stride, int thread_dir, int thread_dim, bool dagger, DslashType type, typename Arg >

__device__ __host__ void quda::applyDslash	(	complex< Float >	out[],
		Arg &	arg,
		int	x_cb,
		int	src_idx,
		int	parity,
		int	s_row,
		int	color_block,
		int	color_offset
	)

inline

Definition at line 94 of file dslash_coarse.cuh.

References conj(), dagger, getCoordsCB(), linkIndexM1(), linkIndexP1(), quda::Arg< real, Ns, Nc, order >::nParity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ ApplyDslash5()

void quda::ApplyDslash5	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const ColorSpinorField &	x,
		double	m_f,
		double	m_5,
		const Complex *	b_5,
		const Complex *	c_5,
		double	a,
		bool	dagger,
		Dslash5Type	type
	)

Apply either the domain-wall / mobius Dslash5 operator or the M5 inverse operator. In the current implementation, it is expected that the color-spinor fields are 4-d preconditioned.

Parameters

[out]	out	Result color-spinor field
[in]	in	Input color-spinor field
[in]	x	Auxilary input color-spinor field
[in]	m_f	Fermion mass parameter
[in]	m_5	Wilson mass shift
[in]	b_5	Mobius coefficient array (length Ls)
[in]	c_5	Mobius coefficient array (length Ls)
[in]	a	Scale factor use in xpay operator
[in]	dagger	Whether this is for the dagger operator
[in]	type	Type of dslash we are applying

Definition at line 216 of file dslash5_domain_wall.cu.

References quda::Dslash5< Float, nColor, Arg >::apply(), quda::Dslash5< Float, nColor, Arg >::arg, checkLocation, checkPrecision, dagger, errorQuda, in, quda::ColorSpinorField::Ncolor(), Nstream, out, quda::ColorSpinorField::PCType(), QUDA_4D_PC, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, and streams.

Referenced by quda::DiracMobius::Dslash4pre(), quda::DiracMobius::Dslash4preXpay(), quda::DiracDomainWall4D::Dslash5(), quda::DiracMobius::Dslash5(), quda::DiracDomainWall4DPC::Dslash5inv(), quda::DiracMobiusPC::Dslash5inv(), quda::DiracDomainWall4DPC::Dslash5invXpay(), quda::DiracMobiusPC::Dslash5invXpay(), quda::DiracDomainWall4D::Dslash5Xpay(), quda::DiracMobius::Dslash5Xpay(), quda::DiracDomainWall4D::M(), and quda::DiracMobius::M().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyGamma() [1/2]

template<typename Float , int nColor>

void quda::ApplyGamma	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		int	d
	)

Definition at line 292 of file dslash_quda.cu.

References quda::Gamma< ValueType, basis, dir >::apply(), arg(), Nstream, and streams.

Referenced by ApplyGamma(), and gamma5().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyGamma() [2/2]

template<typename Float >

void quda::ApplyGamma	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		int	d
	)

Definition at line 301 of file dslash_quda.cu.

References ApplyGamma(), checkLocation, checkPrecision, errorQuda, in, quda::ColorSpinorField::Ncolor(), out, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

Here is the call graph for this function:

◆ applyGaugePhase()

void quda::applyGaugePhase ( GaugeField & u )

Apply the staggered phase factor to the gauge field.

Parameters

[in] u The gauge field to which we apply the staggered phase factors

Definition at line 223 of file gauge_phase.cu.

References errorQuda, quda::GaugeField::exchangeGhost(), quda::LatticeField::GhostExchange(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_GHOST_EXCHANGE_PAD, and QUDA_SINGLE_PRECISION.

Referenced by quda::GaugeField::applyStaggeredPhase(), quda::cpuGaugeField::Gauge_p(), and quda::GaugeField::removeStaggeredPhase().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyImprovedStaggered()

void quda::ApplyImprovedStaggered	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		const GaugeField &	L,
		double	a,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Apply the improved staggered dslash operator to a color-spinor field.

Parameters

[out]	out	Result color-spinor field
[in]	in	Input color-spinor field
[in]	U	Gauge-Link (1-link or fat-link)
[in]	L	Long-Links for asqtad
[in]	a	xpay parameter (set to 0.0 for non-xpay version)
[in]	x	Vector field we accumulate onto to
[in]	parity	parity parameter
[in]	dagger	Whether we are applying the dagger or not
[in]	improved	whether to apply the standard-staggered (false) or asqtad (true) operator

Definition at line 181 of file dslash_improved_staggered.cu.

References checkLocation, checkPrecision, comm_dim_partitioned(), dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, quda::ColorSpinorField::V(), and quda::LatticeField::X().

Referenced by quda::DiracImprovedStaggered::Dslash(), quda::DiracImprovedStaggered::DslashXpay(), and quda::DiracImprovedStaggered::M().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ applyLaplace()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, int dir, typename Arg , typename Vector >

__device__ __host__ void quda::applyLaplace	(	Vector &	out,
		Arg &	arg,
		int	coord[nDim],
		int	x_cb,
		int	parity,
		int	idx,
		int	thread_dim,
		bool &	active
	)

inline

Applies the off-diagonal part of the covariant derivative operator

Parameters

[out]	out	The out result field
[in,out]	arg	Parameter struct
[in]	U	The gauge field
[in]	coord	Site coordinate
[in]	x_cb	The checker-boarded site index. This is a 4-d index only
[in]	parity	The site parity
[in]	idx	Thread index (equal to face index for exterior kernels)
[in]	thread_dim	Which dimension this thread corresponds to (fused exterior only)

Definition at line 69 of file laplace.cuh.

References conj(), quda::LaplaceArg< Float, nColor, reconstruct_ >::ghost, quda::LaplaceArg< Float, nColor, reconstruct_ >::in, linkIndexM1(), linkIndexP1(), quda::Arg< real, Ns, Nc, order >::nParity, and quda::LaplaceArg< Float, nColor, reconstruct_ >::U.

Referenced by laplace().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyLaplace()

void quda::ApplyLaplace	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		int	dir,
		double	kappa,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the Laplace stencil.

out = - kappa * A * in

where A is the gauge laplace linear operator.

If x is defined, the operation is given by out = x - kappa * A in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the gauge Laplace
[in]	dir	Direction of the derivative 0,1,2,3 to omit (-1 is full 4D)
[in]	kappa	Scale factor applied
[in]	x	Vector field we accumulate onto to

Definition at line 188 of file laplace.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, kappa, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::GaugeLaplace::Dslash(), and quda::GaugeLaplace::DslashXpay().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyNdegTwistedMass()

void quda::ApplyNdegTwistedMass	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	a,
		double	b,
		double	c,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the non-degenerate twisted-mass stencil.

out = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1) * x

where D is the gauged Wilson linear operator. The quark fields out, in and x are five dimensional, with the fifth dimension corresponding to the flavor dimension. The convention is that the first 4-d slice (s=0) corresponds to the positive twist and the second slice (s=1) corresponds to the negative twist.

This operator can be applied to both single parity (4d checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	a	Scale factor applied to Wilson term (typically -kappa)
[in]	b	Chiral twist factor applied (typically 2mukappa)
[in]	c	Flavor twist factor applied (typically -2epsilonkappa)
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

Definition at line 102 of file dslash_ndeg_twisted_mass.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracTwistedMass::Dslash(), quda::DiracTwistedMass::DslashXpay(), and quda::DiracTwistedMass::M().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyNdegTwistedMassPreconditioned()

void quda::ApplyNdegTwistedMassPreconditioned	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	a,
		double	b,
		double	c,
		bool	xpay,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		bool	asymmetric,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the preconditioned non-degenerate twisted-mass stencil.

out = a * (1 + i*b*gamma_5*tau_3 + c*tau_1) * D * in + x

where D is the gauged Wilson linear operator. The quark fields out, in and x are five dimensional, with the fifth dimension corresponding to the flavor dimension. The convention is that the first 4-d slice (s=0) corresponds to the positive twist and the second slice (s=1) corresponds to the negative twist.

This operator can (at present) be applied to only single parity (checker-boarded) fields.

For the dagger operator, we generally apply the conjugate transpose operator

out = x + D^ A^{-}

with the additional asymmetric special case, where we apply do not transpose the order of operations

out = A^{-} D^ (no xpay term)

This variant is required when have the asymmetric preconditioned operator and require the preconditioned twist term to remain in between the applications of D. This would be combined with a subsequent non-preconditioned dagger operator, A*x - kappa^2 D, to form the full operator.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	a	Scale factor applied to Wilson term (typically -kappa^2/(1 + bb -cc) )
[in]	b	Chiral twist factor applied (typically -2mukappa)
[in]	c	Flavor twist factor applied (typically 2epsilonkappa)
[in]	xpay	Whether to do xpay or not
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	asymmetric	Whether this is for the asymmetric preconditioned dagger operator (a(1 - ibgamma_5) D^dagger * in)
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

Definition at line 146 of file dslash_ndeg_twisted_mass_preconditioned.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, popKernelPackT(), pushKernelPackT(), quda::ColorSpinorField::V(), and quda::blas::xpay().

Referenced by quda::DiracTwistedMassPC::Dslash(), and quda::DiracTwistedMassPC::DslashXpay().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ applyStaggered()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg , typename Vector >

__device__ __host__ void quda::applyStaggered	(	Vector &	out,
		Arg &	arg,
		int	coord[nDim],
		int	x_cb,
		int	parity,
		int	idx,
		int	thread_dim,
		bool &	active
	)

inline

Applies the off-diagonal part of the Staggered / Asqtad operator.

Parameters

[out]	out	The out result field
[in]	U	The gauge field
[in]	in	The input field
[in]	parity	The site parity
[in]	x_cb	The checkerboarded site index

Definition at line 76 of file dslash_staggered.cuh.

References conj(), quda::StaggeredArg< Float, nColor, reconstruct_u_, reconstruct_l_, improved_, phase_ >::ghost, quda::StaggeredArg< Float, nColor, reconstruct_u_, reconstruct_l_, improved_, phase_ >::in, quda::StaggeredArg< Float, nColor, reconstruct_u_, reconstruct_l_, improved_, phase_ >::L, linkIndexM1(), linkIndexM3(), linkIndexP1(), linkIndexP3(), quda::Arg< real, Ns, Nc, order >::nParity, printLink(), StaggeredPhase(), and quda::StaggeredArg< Float, nColor, reconstruct_u_, reconstruct_l_, improved_, phase_ >::U.

Here is the call graph for this function:

◆ ApplyStaggered()

void quda::ApplyStaggered	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	a,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Apply the staggered dslash operator to a color-spinor field.

Parameters

[out]	out	Result color-spinor field
[in]	in	Input color-spinor field
[in]	U	Gauge-Link (1-link or fat-link)
[in]	a	xpay parameter (set to 0.0 for non-xpay version)
[in]	x	Vector field we accumulate onto to
[in]	parity	parity parameter
[in]	dagger	Whether we are applying the dagger or not
[in]	improved	whether to apply the standard-staggered (false) or asqtad (true) operator

Definition at line 112 of file dslash_staggered.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracStaggered::Dslash(), quda::DiracStaggered::DslashXpay(), and quda::DiracStaggered::M().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ applyT()

template<typename T >

static void quda::applyT	(	T	d_out[],
		const T	d_in[],
		const T	gamma[],
		const T	rho[],
		int	N
	)

static

Definition at line 18 of file inv_mpcg_quda.cpp.

Referenced by applyThirdTerm().

Here is the caller graph for this function:

◆ applyThirdTerm()

template<typename T >

static void quda::applyThirdTerm	(	T	d_out[],
		const T	d_in[],
		int	k,
		int	j,
		int	s,
		const T	gamma[],
		const T	rho[],
		const T	gamma_kprev[],
		const T	rho_kprev[]
	)

static

Definition at line 57 of file inv_mpcg_quda.cpp.

References applyB(), applyT(), s, and zero().

Referenced by computeCoeffs().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyTwistClover()

void quda::ApplyTwistClover	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const CloverField &	clover,
		double	kappa,
		double	mu,
		double	epsilon,
		int	parity,
		int	dagger,
		QudaTwistGamma5Type	twist
	)

Apply twisted clover-matrix field to a color-spinor field.

Parameters

[out]	out	Result color-spinor field
[in]	in	Input color-spinor field
[in]	clover	Clover-matrix field
[in]	kappa	kappa parameter
[in]	mu	mu parameter
[in]	epsilon	epsilon parameter
[in]	Field	parity (if color-spinor field is single parity)
[in]	dagger	Whether we are applying the dagger or not
[in]	twist	The type of kernel we are doing if (twist == QUDA_TWIST_GAMMA5_DIRECT) apply (Clover + iagamma_5) to the input spinor else if (twist == QUDA_TWIST_GAMMA5_INVERSE) apply (Clover + iagamma_5)/(Clover^2 + a^2) to the input spinor

Definition at line 769 of file dslash_quda.cu.

References quda::TwistClover< Float, nSpin, nColor, Arg >::apply(), arg(), checkCudaError, checkLocation, checkPrecision, dagger, epsilon, errorQuda, in, kappa, mu, quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), Nstream, out, parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, QUDA_TWIST_GAMMA5_DIRECT, and streams.

Referenced by quda::DiracTwistedClover::twistedCloverApply().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyTwistedClover()

void quda::ApplyTwistedClover	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		const CloverField &	C,
		double	a,
		double	b,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the twisted-clover stencil.

out = a * D * in + (C + i*b*gamma_5) * x

where D is the gauged Wilson linear operator, and C is the clover field.

This operator can be applied to both single parity (4d checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	C	The clover field used for the operator
[in]	a	Scale factor applied to Wilson term (typically -kappa)
[in]	b	Chiral twist factor applied (typically 2mukappa)
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

Definition at line 122 of file dslash_twisted_clover.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracTwistedClover::DslashXpay(), and quda::DiracTwistedClover::M().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyTwistedCloverPreconditioned()

void quda::ApplyTwistedCloverPreconditioned	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		const CloverField &	C,
		double	a,
		double	b,
		bool	xpay,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the preconditioned twisted-clover stencil.

out = a * (C + i*b*gamma_5)^{-1} * D * in + x = a * C^{-2} (C - i*b*gamma_5) * D * in + x = A^{-1} * D * in + x

where D is the gauged Wilson linear operator and C is the clover field. This operator can (at present) be applied to only single parity (checker-boarded) fields. When the dagger operator is requested, we do not transpose the order of operations, e.g.

out = A^{-} D^ (no xpay term)

Although not a conjugate transpose of the regular operator, this variant is used to enable kernel fusion between the application of D and the subsequent application of A, e.g., in the symmetric dagger operator we need to apply

M = (1 - kappa^2 D^{} A^{-} D{^} A^{-} )

and since cannot fuse D{^} A^{-}, we instead fused A^{-} D{^}.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	C	The clover field used for the operator
[in]	a	Scale factor applied to Wilson term ( typically 1 / (1 + bb) or kappa^2 / (1 + bb) )
[in]	b	Twist factor applied (typically -2kappamu)
[in]	xpay	Whether to do xpay or not
[in]	x	Vector field we accumulate onto to when xpay is true
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

Definition at line 161 of file dslash_twisted_clover_preconditioned.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, quda::ColorSpinorField::V(), and quda::blas::xpay().

Referenced by quda::DiracTwistedCloverPC::Dslash(), and quda::DiracTwistedCloverPC::DslashXpay().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyTwistedMass()

void quda::ApplyTwistedMass	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	a,
		double	b,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the twisted-mass stencil.

out = a * D * in + (1 + i*b*gamma_5) * x

where D is the gauged Wilson linear operator.

This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	a	Scale factor applied to Wilson term (typically -kappa)
[in]	b	Twist factor applied (typically 2mukappa)
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

Definition at line 103 of file dslash_twisted_mass.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracTwistedMass::Dslash(), quda::DiracTwistedMass::DslashXpay(), and quda::DiracTwistedMass::M().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyTwistedMassPreconditioned()

void quda::ApplyTwistedMassPreconditioned	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	a,
		double	b,
		bool	xpay,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		bool	asymmetric,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the preconditioned twisted-mass stencil.

out = a*(1 + i*b*gamma_5) * D * in + x

where D is the gauged Wilson linear operator. This operator can (at present) be applied to only single parity (checker-boarded) fields. For the dagger operator, we generally apply the conjugate transpose operator

out = x + D^ A^{-}

with the additional asymmetric special case, where we apply do not transpose the order of operations

out = A^{-} D^ (no xpay term)

This variant is required when have the asymmetric preconditioned operator and require the preconditioned twist term to remain in between the applications of D. This would be combined with a subsequent non-preconditioned dagger operator, A*x - kappa^2 D, to form the full operator.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	a	Scale factor applied to Wilson term ( typically kappa^2 / (1 + b*b) )
[in]	b	Twist factor applied (typically -2kappamu)
[in]	xpay	Whether to do xpay or not
[in]	x	Vector field we accumulate onto to when xpay is true
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	asymmetric	Whether this is for the asymmetric preconditioned dagger operator (a(1 - ibgamma_5) D^dagger * in)
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

Definition at line 116 of file dslash_twisted_mass_preconditioned.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, popKernelPackT(), pushKernelPackT(), quda::ColorSpinorField::V(), and quda::blas::xpay().

Referenced by quda::DiracTwistedMassPC::Dslash(), and quda::DiracTwistedMassPC::DslashXpay().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyTwistGamma()

void quda::ApplyTwistGamma	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		int	d,
		double	kappa,
		double	mu,
		double	epsilon,
		int	dagger,
		QudaTwistGamma5Type	type
	)

Apply the twisted-mass gamma operator to a color-spinor field.

Parameters

[out]	out	Result color-spinor field
[in]	in	Input color-spinor field
[in]	d	Which gamma matrix we are applying (C counting, so gamma_5 has d=4)
[in]	kappa	kappa parameter
[in]	mu	mu parameter
[in]	epsilon	epsilon parameter
[in]	dagger	Whether we are applying the dagger or not
[in]	twist	The type of kernel we are doing

Definition at line 416 of file dslash_quda.cu.

References quda::TwistGamma< Float, nColor, Arg >::apply(), arg(), checkCudaError, checkLocation, checkPrecision, dagger, epsilon, errorQuda, in, kappa, mu, quda::ColorSpinorField::Ncolor(), Nstream, out, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, and streams.

Referenced by quda::DiracTwistedMass::twistedApply().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ applyU()

void quda::applyU	(	GaugeField &	force,
		GaugeField &	U
	)

Left multiply the force field by the gauge field

force = U * force

Parameters

force	Force field
U	Gauge field

Definition at line 446 of file momentum.cu.

References checkCudaError, errorQuda, quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_FLOAT2_GAUGE_ORDER.

Referenced by computeStaggeredForceQuda(), and updateMomentum().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyWilson()

void quda::ApplyWilson	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	kappa,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the Wilson stencil.

out = D * in

where D is the gauged Wilson linear operator.

If kappa is non-zero, the operation is given by out = x + kappa * D in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	kappa	Scale factor applied
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

Definition at line 86 of file dslash_wilson.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracWilson::Dslash(), quda::DiracWilson::DslashXpay(), and quda::DiracWilson::M().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ applyWilson()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg , typename Vector >

__device__ __host__ void quda::applyWilson	(	Vector &	out,
		Arg &	arg,
		int	coord[nDim],
		int	x_cb,
		int	s,
		int	parity,
		int	idx,
		int	thread_dim,
		bool &	active
	)

inline

Applies the off-diagonal part of the Wilson operator.

Parameters

[out]	out	The out result field
[in,out]	arg	Parameter struct
[in]	coord	Site coordinate
[in]	x_cb	The checker-boarded site index (at present this is a 4-d index only)
[in]	s	The fifth-dimension index
[in]	parity	Site parity
[in]	idx	Thread index (equal to face index for exterior kernels)
[in]	thread_dim	Which dimension this thread corresponds to (fused exterior only)

Definition at line 62 of file dslash_wilson.cuh.

References conj(), quda::DslashArg< Float >::dagger, EXTERIOR_KERNEL_ALL, quda::WilsonArg< Float, nColor, reconstruct_ >::ghost, quda::WilsonArg< Float, nColor, reconstruct_ >::in, quda::DslashArg< Float >::kernel_type, nColor, quda::DslashArg< Float >::nParity, quda::DslashArg< Float >::parity, quda::WilsonArg< Float, nColor, reconstruct_ >::reconstruct, and quda::WilsonArg< Float, nColor, reconstruct_ >::U.

Here is the call graph for this function:

◆ ApplyWilsonClover()

void quda::ApplyWilsonClover	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		const CloverField &	A,
		double	kappa,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the Wilson-clover stencil.

out = A * x + kappa * D * in

where D is the gauged Wilson linear operator.

This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	Input field that D is applied to
[in]	x	Input field that A is applied to
[in]	U	The gauge field used for the operator
[in]	A	The clover field used for the operator
[in]	kappa	Scale factor applied
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

Definition at line 121 of file dslash_wilson_clover.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracClover::DslashXpay(), and quda::DiracClover::M().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ApplyWilsonCloverPreconditioned()

void quda::ApplyWilsonCloverPreconditioned	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		const CloverField &	A,
		double	kappa,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the preconditioned Wilson-clover stencil.

out = A^{-1} * D * in + x

where D is the gauged Wilson linear operator and A is the clover field. This operator can (at present) be applied to only single parity (checker-boarded) fields. When the dagger operator is requested, we do not transpose the order of operations, e.g.

out = A^{-} D^ (no xpay term)

Although not a conjugate transpose of the regular operator, this variant is used to enable kernel fusion between the application of D and the subsequent application of A, e.g., in the symmetric dagger operator we need to apply

M = (1 - kappa^2 D^{} A^{-1} D{^} A^{-1} )

and since cannot fuse D{^} A^{-}, we instead fused A^{-} D{^}.

If kappa is non-zero, the operation is given by out = x + kappa * A^{-1} D in. This operator can (at present) be applied to only single parity (checker-boarded) fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	A	The clover field used for the operator
[in]	kappa	Scale factor applied
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

Definition at line 158 of file dslash_wilson_clover_preconditioned.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracCloverPC::Dslash(), and quda::DiracCloverPC::DslashXpay().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ applyWilsonTM()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, int twist, KernelType kernel_type, typename Arg , typename Vector >

__device__ __host__ void quda::applyWilsonTM	(	Vector &	out,
		Arg &	arg,
		int	coord[nDim],
		int	x_cb,
		int	s,
		int	parity,
		int	idx,
		int	thread_dim,
		bool &	active
	)

inline

Applies the off-diagonal part of the Wilson operator premultiplied by twist rotation - this is required for applying the symmetric preconditioned twisted-mass dagger operator.

Parameters

[out]	out	The out result field
[in,out]	arg	Parameter struct
[in]	coord	Site coordinate
[in]	x_cb	The checker-boarded site index
[in]	s	Fifth-dimension index
[in]	parity	Site parity
[in]	idx	Thread index (equal to face index for exterior kernels)
[in]	thread_dim	Which dimension this thread corresponds to (fused exterior only)

Definition at line 52 of file dslash_twisted_mass_preconditioned.cuh.

References quda::TwistedMassArg< Float, nColor, reconstruct_ >::asymmetric, conj(), quda::DslashArg< Float >::dagger, EXTERIOR_KERNEL_ALL, getNeighborIndexCB(), quda::WilsonArg< Float, nColor, reconstruct_ >::ghost, quda::WilsonArg< Float, nColor, reconstruct_ >::in, quda::DslashArg< Float >::kernel_type, nColor, quda::DslashArg< Float >::nParity, quda::DslashArg< Float >::parity, quda::WilsonArg< Float, nColor, reconstruct_ >::reconstruct, quda::WilsonArg< Float, nColor, reconstruct_ >::U, and quda::DslashArg< Float >::xpay.

Here is the call graph for this function:

◆ arg() [1/3]

template<typename ValueType >

__host__ __device__ ValueType quda::arg ( const complex< ValueType > & z )

inline

Returns the phase angle of z.

Definition at line 1076 of file complex_quda.h.

References atan2().

Here is the call graph for this function:

◆ arg() [2/3]

template<>

__host__ __device__ float quda::arg ( const complex< float > & z )

inline

Definition at line 1081 of file complex_quda.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Here is the call graph for this function:

◆ arg() [3/3]

template<>

__host__ __device__ double quda::arg ( const complex< double > & z )

inline

Definition at line 1086 of file complex_quda.h.

References atan2(), quda::complex< double >::imag(), and quda::complex< double >::real().

Here is the call graph for this function:

◆ arpack_solve()

void quda::arpack_solve	(	std::vector< ColorSpinorField *> &	h_evecs,
		std::vector< Complex > &	h_evals,
		const DiracMatrix &	mat,
		QudaEigParam *	eig_param,
		TimeProfile &	profile
	)

The QUDA interface function. One passes two allocated arrays to hold the the eigenmode data, the problem matrix, the arpack parameters defining what problem is to be solves, and a container for QUDA data structure types.

arpack_solve()

Parameters

[out]	h_evecs	Host fields where the e-vectors will be copied to
[out]	h_evals	Where the e-values will be copied to
[in]	mat	An explicit construction of the problem matrix.
[in]	param	Parameter container defining the how the matrix is to be solved.
[in]	eig_param	Parameter structure for all QUDA eigensolvers
[in,out]	profile	TimeProfile instance used for profiling

Definition at line 507 of file quda_arpack_interface.cpp.

References errorQuda.

Referenced by eigensolveQuda().

Here is the caller graph for this function:

◆ asin() [1/2]

template<typename ValueType >

__host__ __device__ ValueType quda::asin ( ValueType x )

inline

Definition at line 66 of file complex_quda.h.

References asin().

Referenced by acos().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ asin() [2/2]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::asin ( const complex< ValueType > & z )

inline

Definition at line 1281 of file complex_quda.h.

References asinh().

Referenced by asin().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ asinh()

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::asinh ( const complex< ValueType > & z )

inline

Definition at line 1320 of file complex_quda.h.

References log(), and sqrt().

Referenced by asin().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ assertAllMemFree()

void quda::assertAllMemFree ( )

Definition at line 384 of file malloc.cpp.

References DEVICE, DEVICE_PINNED, HOST, MAPPED, PINNED, print_alloc(), print_alloc_header(), printfQuda, and warningQuda.

Referenced by endQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ atan() [1/2]

template<typename ValueType >

__host__ __device__ ValueType quda::atan ( ValueType x )

inline

Definition at line 71 of file complex_quda.h.

References atan().

Here is the call graph for this function:

◆ atan() [2/2]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::atan ( const complex< ValueType > & z )

inline

Definition at line 1288 of file complex_quda.h.

References atanh().

Referenced by atan().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ atan2()

template<typename ValueType >

__host__ __device__ ValueType quda::atan2	(	ValueType	x,
		ValueType	y
	)

inline

Definition at line 76 of file complex_quda.h.

Referenced by arg(), quda::Trig< isFixed, T >::Atan2(), atanh(), link_sanity_check_internal_8(), new_save_half(), and su3Construct8().

Here is the caller graph for this function:

◆ atanh() [1/2]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::atanh ( const complex< ValueType > & z )

inline

Definition at line 1326 of file complex_quda.h.

References atan2(), and log().

Referenced by atan().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ atanh() [2/2]

template<typename ValueType >

__host__ __device__ complex<float> quda::atanh ( const complex< float > & z )

inline

Definition at line 1344 of file complex_quda.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Here is the call graph for this function:

◆ ax()

void quda::ax	(	const double &	a,
		GaugeField &	u
	)

Scale the gauge field by the scalar a.

Parameters

[in]	a	scalar multiplier
[in]	u	The gauge field we want to multiply

Definition at line 349 of file gauge_field.cpp.

References quda::blas::ax(), colorSpinorParam(), and quda::ColorSpinorField::Create().

Referenced by quda::MG::buildFreeVectors(), computeHISQForceQuda(), dslashReference_5th(), dslashReference_5th_inv(), quda::cpuGaugeField::Gauge_p(), quda::MG::generateNullVectors(), main(), and mdslashReference_5th_inv().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ axpy()

template<typename real , typename Link >

__device__ void quda::axpy	(	real	a,
		const real *	x,
		Link &	y
	)

inline

Definition at line 37 of file clover_deriv.cuh.

Referenced by cloverDerivativeKernel(), dslashReference_5th_inv(), mdslashReference_5th_inv(), quda::PreconCG::operator()(), quda::SD::operator()(), operator-=(), and staggeredDslashRef().

Here is the caller graph for this function:

◆ backSubs()

void quda::backSubs	(	const Complex *	alpha,
		Complex **const	beta,
		const double *	gamma,
		Complex *	delta,
		int	n
	)

Definition at line 135 of file inv_gcr_quda.cpp.

Referenced by updateSolution().

Here is the caller graph for this function:

◆ block_idx()

template<typename T >

__device__ int quda::block_idx ( const T & swizzle )

inline

Swizzler for reordering the (x) thread block indices - use on conjunction with swizzle-factor autotuning to find the optimum swizzle factor. Specfically, the thread block id is remapped by transposing its coordinates: if the original order can be parametrized by.

blockIdx.x = j * swizzle + i,

then the new order is

block_idx = i * (gridDim.x / swizzle) + j

We need to factor out any remainder and leave this in original ordering.

Parameters

[in] swizzle Swizzle factor to be applied

Returns: Swizzled block index

Definition at line 834 of file index_helper.cuh.

◆ blockOrthoCPU()

template<typename sumFloat , typename Float , int nSpin, int spinBlockSize, int nColor, int coarseSpin, int nVec, typename Arg >

void quda::blockOrthoCPU ( Arg & arg )

Definition at line 105 of file block_orthogonalize.cuh.

References dot(), nColor, quda::Arg< real, Ns, Nc, order >::nParity, quda::BlockOrthoArg< Rotator, Vector, fineSpin, spinBlockSize, coarseSpin, nVec >::parity, and s.

Here is the call graph for this function:

◆ BlockOrthogonalize()

void quda::BlockOrthogonalize	(	ColorSpinorField &	V,
		const std::vector< ColorSpinorField *> &	B,
		const int *	fine_to_coarse,
		const int *	coarse_to_fine,
		const int *	geo_bs,
		const int	spin_bs,
		const int	n_block_ortho
	)

Block orthogonnalize the matrix field, where the blocks are defined by lookup tables that map the fine grid points to the coarse grid points, and similarly for the spin degrees of freedom.

Parameters

[in,out]	V	Matrix field to be orthgonalized
[in]	B	input vectors
[in]	geo_bs	Geometric block size
[in]	fine_to_coarse	Fine-to-coarse lookup table (linear indices)
[in]	coarse_to_fine	Coarse-to-fine lookup table (linear indices)
[in]	spin_bs	Spin block size
[in]	n_block_ortho	Number of times to Gram-Schmidt

Definition at line 317 of file block_orthogonalize.cu.

References errorQuda, n_block_ortho, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, and V.

Referenced by quda::Transfer::reset(), and quda::Transfer::setTransferGPU().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ broadcastTuneCache()

static void quda::broadcastTuneCache ( )

static

Distribute the tunecache from node 0 to all other nodes.

Definition at line 290 of file tune.cpp.

References comm_broadcast(), comm_rank(), deserializeTuneCache(), serializeTuneCache(), and size.

Referenced by loadTuneCache(), and tuneLaunch().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ c2d() [1/2]

__host__ __device__ double quda::c2d ( char a )

inline

Definition at line 39 of file convert.h.

Referenced by copyFloatN().

Here is the caller graph for this function:

◆ c2d() [2/2]

__host__ __device__ double quda::c2d	(	char	a,
		double	c
	)

inline

Definition at line 56 of file convert.h.

◆ c2f() [1/2]

__host__ __device__ float quda::c2f ( char a )

inline

Definition at line 38 of file convert.h.

Referenced by copy(), copy_and_scale(), and copyFloatN().

Here is the caller graph for this function:

◆ c2f() [2/2]

__host__ __device__ float quda::c2f	(	char	a,
		float	c
	)

inline

Definition at line 52 of file convert.h.

◆ calculateY()

template<bool from_coarse, typename Float , int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename F , typename Ftmp , typename Vt , typename coarseGauge , typename coarseGaugeAtomic , typename fineGauge , typename fineClover >

void quda::calculateY	(	coarseGauge &	Y,
		coarseGauge &	X,
		coarseGaugeAtomic &	Y_atomic,
		coarseGaugeAtomic &	X_atomic,
		Ftmp &	UV,
		F &	AV,
		Vt &	V,
		fineGauge &	G,
		fineClover &	C,
		fineClover &	Cinv,
		GaugeField &	Y_,
		GaugeField &	X_,
		GaugeField &	Y_atomic_,
		GaugeField &	X_atomic_,
		ColorSpinorField &	uv,
		ColorSpinorField &	av,
		const ColorSpinorField &	v,
		double	kappa,
		double	mu,
		double	mu_factor,
		QudaDiracType	dirac,
		QudaMatPCType	matpc,
		bool	need_bidirectional,
		const int *	fine_to_coarse,
		const int *	coarse_to_fine
	)

Calculate the coarse-link field, including the coarse clover field.

Parameters

Y[out]	Coarse link field accessor
X[out]	Coarse clover field accessor
UV[out]	Temporary accessor used to store fine link field * null space vectors
AV[out]	Temporary accessor use to store fine clover inverse * null space vectors (only applicable when fine-grid operator is the preconditioned clover operator else in general this just aliases V
V[in]	Packed null-space vector accessor
G[in]	Fine grid link / gauge field accessor
C[in]	Fine grid clover field accessor
Cinv[in]	Fine grid clover inverse field accessor
Y_[out]	Coarse link field
X_[out]	Coarse clover field
X_[out]	Coarse clover inverese field (used as temporary here)
v[in]	Packed null-space vectors
kappa[in]	Kappa parameter
mu[in]	Twisted-mass parameter
matpc[in]	The type of preconditioning of the source fine-grid operator
need_bidirectional[in]	If we need to force bi-directional build or not. Required if some previous level was preconditioned, even if this one isn't

Definition at line 869 of file coarse_op.cuh.

References abs(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::arg, quda::LatticeField::bufferIndex, checkLocation, COMPUTE_AV, COMPUTE_CLOVER_INV_MAX, COMPUTE_COARSE_CLOVER, COMPUTE_CONVERT, COMPUTE_DIAGONAL, COMPUTE_RESCALE, COMPUTE_REVERSE_Y, COMPUTE_TMAV, COMPUTE_TMCAV, COMPUTE_TMDIAGONAL, COMPUTE_TWISTED_CLOVER_INV_MAX, COMPUTE_UV, COMPUTE_VUV, errorQuda, quda::ColorSpinorField::exchangeGhost(), getVerbosity(), quda::ColorSpinorField::Ghost(), quda::colorspinor::FieldOrderCB< Float, nSpin, nColor, nVec, order, storeFloat, ghostFloat, disable_ghost, block_float, use_tex >::norm2(), quda::LatticeField::Precision(), printfQuda, QUDA_BACKWARDS, QUDA_CLOVER_DIRAC, QUDA_CLOVERPC_DIRAC, QUDA_COARSE_DIRAC, QUDA_COARSEPC_DIRAC, QUDA_CUDA_FIELD_LOCATION, QUDA_DEBUG_VERBOSE, QUDA_FORWARDS, QUDA_HALF_PRECISION, QUDA_INVALID_PARITY, QUDA_MATPC_EVEN_EVEN, QUDA_MATPC_EVEN_EVEN_ASYMMETRIC, QUDA_MATPC_ODD_ODD, QUDA_MATPC_ODD_ODD_ASYMMETRIC, QUDA_MAX_DIM, QUDA_TWISTED_CLOVER_DIRAC, QUDA_TWISTED_CLOVERPC_DIRAC, QUDA_TWISTED_MASS_DIRAC, QUDA_TWISTED_MASSPC_DIRAC, QUDA_VERBOSE, quda::colorspinor::FieldOrderCB< Float, nSpin, nColor, nVec, order, storeFloat, ghostFloat, disable_ghost, block_float, use_tex >::resetGhost(), quda::LatticeField::Scale(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::setComputeType(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::setDimension(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::setDirection(), sqrt(), quda::ColorSpinorField::X(), quda::LatticeField::X(), and quda::GaugeField::zero().

Referenced by CoarseOp().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ calculateYhat()

void quda::calculateYhat	(	GaugeField &	Yhat,
		GaugeField &	Xinv,
		const GaugeField &	Y,
		const GaugeField &	X
	)

Calculate preconditioned coarse links and coarse clover inverse field.

Parameters

Yhat[out]	Preconditioned coarse link field
Xinv[out]	Coarse clover inverse field
Y[in]	Coarse link field
X[in]	Coarse clover field

Definition at line 245 of file coarse_op_preconditioned.cu.

References checkPrecision, errorQuda, getVerbosity(), quda::LatticeField::Precision(), printfQuda, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, QUDA_SUMMARIZE, and X.

Referenced by quda::DiracCoarse::createPreconditionedCoarseOp().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ CalculateYhatCPU()

template<typename Float , int n, bool compute_max_only, typename Arg >

void quda::CalculateYhatCPU ( Arg & arg )

Definition at line 100 of file coarse_op_preconditioned.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ CalculateYhatGPU()

template<typename Float , int n, bool compute_max_only, typename Arg >

__global__ void quda::CalculateYhatGPU ( Arg arg )

Definition at line 118 of file coarse_op_preconditioned.cuh.

References arg(), atomicMax(), and parity.

Here is the call graph for this function:

◆ canReuseResidentGauge()

bool quda::canReuseResidentGauge ( QudaInvertParam * inv_param )

Check that the resident gauge field is compatible with the requested inv_param

Parameters

inv_param Contains all metadata regarding host and device storage

Definition at line 2232 of file interface_quda.cpp.

References QudaInvertParam_s::cuda_prec, QudaInvertParam_s::dslash_type, quda::LatticeField::Precision(), and QUDA_ASQTAD_DSLASH.

Here is the call graph for this function:

◆ caxpy()

template<typename Float >

__device__ __host__ void quda::caxpy	(	const complex< Float > &	a,
		const complex< Float > &	x,
		complex< Float > &	y
	)

inline

Definition at line 117 of file coarse_op_kernel.cuh.

Referenced by quda::CalculateYhatArg< Float, PreconditionedGauge, Gauge, n >::CalculateYhatArg(), computeUV(), computeYhat(), quda::GMResDR::FlexArnoldiProcedure(), quda::MG::generateNullVectors(), multiplyVUV(), and quda::GMResDR::RestartVZH().

Here is the caller graph for this function:

◆ checkLength()

void quda::checkLength	(	const ColorSpinorField &	a,
		const ColorSpinorField &	b
	)

inline

Definition at line 26 of file blas_helper.cuh.

References errorQuda, quda::ColorSpinorField::Length(), and quda::ColorSpinorField::Stride().

Referenced by quda::blas::nativeBlas(), and quda::blas::nativeReduce().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ checkMomOrder()

void quda::checkMomOrder ( const GaugeField & u )

Definition at line 22 of file copy_gauge.cu.

References errorQuda, quda::GaugeField::Order(), QUDA_FLOAT2_GAUGE_ORDER, QUDA_MILC_GAUGE_ORDER, QUDA_MILC_SITE_GAUGE_ORDER, QUDA_RECONSTRUCT_10, QUDA_RECONSTRUCT_NO, QUDA_TIFR_GAUGE_ORDER, QUDA_TIFR_PADDED_GAUGE_ORDER, and quda::GaugeField::Reconstruct().

Referenced by copyGauge().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ checkNan()

template<typename Float , int length, typename Arg >

void quda::checkNan ( Arg & arg )

Check whether the field contains Nans

Definition at line 63 of file copy_gauge.cuh.

References errorQuda, length, nColor, quda::gauge::Ncolor(), and parity.

Here is the call graph for this function:

◆ checkSpinor()

void quda::checkSpinor	(	const ColorSpinorField &	a,
		const ColorSpinorField &	b
	)

inline

Definition at line 20 of file blas_helper.cuh.

References errorQuda, quda::ColorSpinorField::Length(), and quda::ColorSpinorField::Stride().

Referenced by quda::blas::multiReduce().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ Checksum()

uint64_t quda::Checksum	(	const GaugeField &	u,
		bool	mini = `false`
	)

Compute XOR-based checksum of this gauge field: each gauge field entry is converted to type uint64_t, and compute the cummulative XOR of these values.

Parameters

[in] mini Whether to compute a mini checksum or global checksum. A mini checksum only computes over a subset of the lattice sites and is to be used for online comparisons, e.g., checking a field has changed with a global update algorithm.

Returns: checksum value

Definition at line 34 of file checksum.cu.

References arg(), ChecksumCPU(), comm_allreduce_xor(), errorQuda, quda::GaugeField::Ncolor(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_BQCD_GAUGE_ORDER, QUDA_DOUBLE_PRECISION, QUDA_MILC_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_QDPJIT_GAUGE_ORDER, QUDA_SINGLE_PRECISION, QUDA_TIFR_GAUGE_ORDER, and QUDA_TIFR_PADDED_GAUGE_ORDER.

Referenced by quda::GaugeField::checksum(), and quda::cpuGaugeField::Gauge_p().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ChecksumCPU()

template<typename Arg >

uint64_t quda::ChecksumCPU ( const Arg & arg )

Definition at line 23 of file checksum.cu.

References parity, siteChecksum(), and quda::Arg< real, Ns, Nc, order >::volumeCB.

Referenced by Checksum().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ checkUnitary()

template<typename Matrix , typename Float >

__host__ __device__ bool quda::checkUnitary	(	const Matrix &	inv,
		const Matrix &	in,
		const Float	tol
	)

inline

Check the unitarity of the input matrix to a given tolerance.

Parameters

inv	The inverse of the input matrix
in	The input matrix to which we're reporting its unitarity
tol	Tolerance to which this check is applied

Definition at line 24 of file su3_project.cuh.

References conj(), in, quda::Matrix< T, N >::size(), and tol.

Referenced by polarSu3().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ checkUnitaryPrint()

template<typename Matrix >

__host__ __device__ void quda::checkUnitaryPrint	(	const Matrix &	inv,
		const Matrix &	in
	)

Print out deviation for each component (used for debugging only).

Parameters

inv	The inverse of the input matrix
in	The input matrix to which we're reporting its unitarity

Definition at line 66 of file su3_project.cuh.

References in, and quda::Matrix< T, N >::size().

Here is the call graph for this function:

◆ cloverApply()

template<typename Float , int nSpin, int nColor, typename Arg >

__device__ __host__ void quda::cloverApply	(	Arg &	arg,
		int	x_cb,
		int	parity
	)

inline

Definition at line 519 of file dslash_quda.cu.

References nColor, quda::Arg< real, Ns, Nc, order >::nParity, out, and parity.

◆ cloverCPU()

template<typename Float , int nSpin, int nColor, typename Arg >

void quda::cloverCPU ( Arg & arg )

Definition at line 552 of file dslash_quda.cu.

References quda::Arg< real, Ns, Nc, order >::nParity, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

◆ cloverDerivative()

void quda::cloverDerivative	(	cudaGaugeField &	force,
		cudaGaugeField &	gauge,
		cudaGaugeField &	oprod,
		double	coeff,
		QudaParity	parity
	)

Compute the derivative of the clover matrix in the direction mu,nu and compute the resulting force given the outer-product field.

Parameters

force	The computed force field (read/write update)
gauge	The input gauge field
oprod	The input outer-product field (tensor matrix field)
coeff	Multiplicative coefficient (e.g., clover coefficient)
parity	The field parity we are working on

Definition at line 174 of file clover_deriv_quda.cu.

References errorQuda, quda::GaugeField::Geometry(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_EVEN_PARITY, QUDA_SINGLE_PRECISION, QUDA_TENSOR_GEOMETRY, QUDA_VECTOR_GEOMETRY, and quda::LatticeField::X().

Referenced by computeCloverForceQuda(), and quda::FullClover::FullClover().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ cloverDerivativeKernel()

template<typename real , typename Arg >

__global__ void quda::cloverDerivativeKernel ( Arg arg )

Definition at line 320 of file clover_deriv.cuh.

References arg(), axpy(), DECLARE_LINK, quda::CloverDerivArg< Float, Force, Gauge, Oprod >::force, index(), mu, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ cloverGPU()

template<typename Float , int nSpin, int nColor, typename Arg >

__global__ void quda::cloverGPU ( Arg arg )

Definition at line 560 of file dslash_quda.cu.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ cloverInvert() [1/2]

template<typename Float , typename Arg , bool computeTrLog, bool twist>

void quda::cloverInvert ( Arg & arg )

Definition at line 65 of file clover_invert.cuh.

References arg(), parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ cloverInvert() [2/2]

void quda::cloverInvert	(	CloverField &	clover,
		bool	computeTraceLog
	)

This function compute the Cholesky decomposition of each clover matrix and stores the clover inverse field.

Parameters

clover	The clover field (contains both the field itself and its inverse)
computeTraceLog	Whether to compute the trace logarithm of the clover term

Definition at line 106 of file clover_invert.cu.

References errorQuda, quda::CloverField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::FullClover::FullClover(), and loadCloverQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ cloverInvertCompute()

template<typename Float , typename Arg , bool computeTrLog, bool twist>

__device__ __host__ double quda::cloverInvertCompute	(	Arg &	arg,
		int	x_cb,
		int	parity
	)

inline

Use a Cholesky decomposition and invert the clover matrix

Definition at line 33 of file clover_invert.cuh.

References quda::linalg::Cholesky< Mat, T, N, fast >::D(), quda::linalg::Cholesky< Mat, T, N, fast >::invert(), log(), Mat(), nColor, and quda::CloverInvertArg< Float >::twist.

Here is the call graph for this function:

◆ cloverInvertKernel()

template<int blockSize, typename Float , typename Arg , bool computeTrLog, bool twist>

__global__ void quda::cloverInvertKernel ( Arg arg )

Definition at line 82 of file clover_invert.cuh.

References arg(), parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ cloverRho()

void quda::cloverRho	(	CloverField &	clover,
		double	rho
	)

This function adds a real scalar onto the clover diagonal (only to the direct field not the inverse)

Parameters

clover	The clover field
rho	Real scalar to be added on

Referenced by quda::FullClover::FullClover().

Here is the caller graph for this function:

◆ cmac()

template<typename real >

__host__ __device__ complex<real> quda::cmac	(	const complex< real > &	x,
		const complex< real > &	y,
		const complex< real > &	z
	)

inline

Definition at line 1372 of file complex_quda.h.

Referenced by quda::gauge::Reconstruct< 12, Float, ghostExchange_ >::Unpack(), quda::gauge::Reconstruct< 13, Float, ghostExchange_, stag_phase >::Unpack(), and quda::gauge::Reconstruct< 8, Float, ghostExchange_ >::Unpack().

Here is the caller graph for this function:

◆ cmul()

template<typename real >

__host__ __device__ complex<real> quda::cmul	(	const complex< real > &	x,
		const complex< real > &	y
	)

inline

Definition at line 1361 of file complex_quda.h.

Referenced by quda::gauge::Reconstruct< 9, Float, ghostExchange_, stag_phase >::Pack(), quda::gauge::Reconstruct< 12, Float, ghostExchange_ >::Unpack(), quda::gauge::Reconstruct< 13, Float, ghostExchange_, stag_phase >::Unpack(), quda::gauge::Reconstruct< 8, Float, ghostExchange_ >::Unpack(), and quda::gauge::Reconstruct< 9, Float, ghostExchange_, stag_phase >::Unpack().

Here is the caller graph for this function:

◆ CoarseCoarseOp()

void quda::CoarseCoarseOp	(	GaugeField &	Y,
		GaugeField &	X,
		const Transfer &	T,
		const GaugeField &	gauge,
		const GaugeField &	clover,
		const GaugeField &	cloverInv,
		double	kappa,
		double	mu,
		double	mu_factor,
		QudaDiracType	dirac,
		QudaMatPCType	matpc,
		bool	need_bidirectional
	)

Coarse operator construction from an intermediate-grid operator (Coarse)

Parameters

Y[out]	Coarse link field
X[out]	Coarse clover field
T[in]	Transfer operator that defines the new coarse space
gauge[in]	Link field from fine grid
clover[in]	Clover field on fine grid
cloverInv[in]	Clover inverse field on fine grid
kappa[in]	Kappa parameter
mu[in]	Mu parameter (set to non-zero for twisted-mass/twisted-clover)
mu_factor[in]	Multiplicative factor for the mu parameter
matpc[in]	The type of even-odd preconditioned fine-grid operator we are constructing the coarse grid operator from. If matpc==QUDA_MATPC_INVALID then we assume the operator is not even-odd preconditioned and we coarsen the full operator.
need_bidirectional[in]	Whether or not we need to force a bi-directional build, even if the given level isn't preconditioned—if any previous level is preconditioned, we've violated that symmetry.

Definition at line 192 of file coarsecoarse_op.cu.

References checkLocation, quda::ColorSpinorParam::create, quda::GaugeField::Create(), quda::ColorSpinorField::Create(), errorQuda, quda::GaugeFieldParam::location, quda::LatticeField::MemType(), param, quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_SINGLE_PRECISION, QUDA_ZERO_FIELD_CREATE, quda::GaugeFieldParam::setPrecision(), quda::Transfer::Vectors(), and X.

Referenced by quda::DiracCoarse::createCoarseOp(), and quda::DiracCoarsePC::createCoarseOp().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ coarseDslash() [1/2]

template<typename Float , int nDim, int Ns, int Nc, int Mc, int color_stride, int dim_thread_split, bool dslash, bool clover, bool dagger, DslashType type, int dir, int dim, typename Arg >

__device__ __host__ void quda::coarseDslash	(	Arg &	arg,
		int	x_cb,
		int	src_idx,
		int	parity,
		int	s,
		int	color_block,
		int	color_offset
	)

inline

Definition at line 309 of file dslash_coarse.cuh.

References arg(), quda::vector_type< scalar, n >::data, quda::Arg< real, Ns, Nc, order >::nParity, quda::DslashCoarseArg< Float, yFloat, ghostFloat, coarseSpin, coarseColor, csOrder, gOrder >::out, quda::DslashCoarseArg< Float, yFloat, ghostFloat, coarseSpin, coarseColor, csOrder, gOrder >::parity, s, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ coarseDslash() [2/2]

template<typename Float , int nDim, int Ns, int Nc, int Mc, bool dslash, bool clover, bool dagger, DslashType type, typename Arg >

void quda::coarseDslash ( Arg arg )

Definition at line 350 of file dslash_coarse.cuh.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::DslashCoarseArg< Float, yFloat, ghostFloat, coarseSpin, coarseColor, csOrder, gOrder >::parity, s, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ coarseDslashKernel()

template<typename Float , int nDim, int Ns, int Nc, int Mc, int color_stride, int dim_thread_split, bool dslash, bool clover, bool dagger, DslashType type, typename Arg >

__global__ void quda::coarseDslashKernel ( Arg arg )

Definition at line 379 of file dslash_coarse.cuh.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::DslashCoarseArg< Float, yFloat, ghostFloat, coarseSpin, coarseColor, csOrder, gOrder >::parity, s, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ coarseIndex()

template<typename Arg >

__device__ __host__ int quda::coarseIndex ( const Arg & arg )

inline

Definition at line 619 of file coarse_op_kernel.cuh.

Referenced by getIndicesShared().

Here is the caller graph for this function:

◆ CoarseOp()

void quda::CoarseOp	(	GaugeField &	Y,
		GaugeField &	X,
		const Transfer &	T,
		const cudaGaugeField &	gauge,
		const cudaCloverField *	clover,
		double	kappa,
		double	mu,
		double	mu_factor,
		QudaDiracType	dirac,
		QudaMatPCType	matpc
	)

Coarse operator construction from a fine-grid operator (Wilson / Clover)

Parameters

Y[out]	Coarse link field
X[out]	Coarse clover field
T[in]	Transfer operator that defines the coarse space
gauge[in]	Gauge field from fine grid
clover[in]	Clover field on fine grid (optional)
kappa[in]	Kappa parameter
mu[in]	Mu parameter (set to non-zero for twisted-mass/twisted-clover)
mu_factor[in]	Multiplicative factor for the mu parameter
matpc[in]	The type of even-odd preconditioned fine-grid operator we are constructing the coarse grid operator from. If matpc==QUDA_MATPC_INVALID then we assume the operator is not even-odd preconditioned and we coarsen the full operator.

Definition at line 201 of file coarse_op.cu.

Referenced by quda::DiracWilson::createCoarseOp(), quda::DiracClover::createCoarseOp(), quda::DiracCloverPC::createCoarseOp(), quda::DiracTwistedMass::createCoarseOp(), quda::DiracTwistedMassPC::createCoarseOp(), quda::DiracTwistedClover::createCoarseOp(), and quda::DiracTwistedCloverPC::createCoarseOp().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ colorInnerProduct()

template<int nColor, typename sumType , typename real >

__device__ __host__ void quda::colorInnerProduct	(	complex< sumType > &	dot,
		int	i,
		complex< real >	v[nColor],
		complex< real >	w[nColor]
	)

inline

Definition at line 63 of file block_orthogonalize.cuh.

References nColor.

◆ colorNorm()

template<int nColor, typename sumType , typename real >

__device__ __host__ void quda::colorNorm	(	sumType &	nrm,
		complex< real >	v[nColor]
	)

inline

Definition at line 76 of file block_orthogonalize.cuh.

References nColor.

◆ colorScale()

template<typename real , int nColor>

__device__ __host__ void quda::colorScale	(	complex< real >	v[nColor],
		real	a
	)

inline

Definition at line 97 of file block_orthogonalize.cuh.

References nColor.

◆ colorScaleSubtract()

template<typename real , int nColor>

__device__ __host__ void quda::colorScaleSubtract	(	complex< real >	v[nColor],
		complex< real >	a,
		complex< real >	w[nColor]
	)

inline

Definition at line 86 of file block_orthogonalize.cuh.

References nColor.

◆ colorSpinorParam() [1/2]

ColorSpinorParam quda::colorSpinorParam ( const GaugeField & a )

Definition at line 304 of file gauge_field.cpp.

Here is the call graph for this function:

◆ colorSpinorParam() [2/2]

ColorSpinorParam quda::colorSpinorParam	(	const CloverField &	a,
		bool	inverse
	)

Definition at line 445 of file clover_field.cpp.

Referenced by ax(), norm1(), and norm2().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ compareSpinor()

template<class U , class V >

int quda::compareSpinor	(	const U &	u,
		const V &	v,
		const int	tol
	)

Definition at line 184 of file color_spinor_util.cu.

References comm_allreduce_int(), comm_size(), parity, pow(), printfQuda, and tol.

Referenced by genericCompare().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ compile_type_str()

const char* quda::compile_type_str	(	const LatticeField &	meta,
		QudaFieldLocation	location_ = `QUDA_INVALID_FIELD_LOCATION`
	)

inline

Helper function for setting auxilary string.

Parameters

[in] meta LatticeField used for querying field location

Returns: String containing location and compilation type

Definition at line 718 of file lattice_field.h.

References quda::LatticeField::Location(), QUDA_CUDA_FIELD_LOCATION, and QUDA_INVALID_FIELD_LOCATION.

Referenced by quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::CalculateY(), quda::CopyGauge< FloatOut, FloatIn, length, Arg >::CopyGauge(), quda::GaugePlaq< Float, Gauge >::GaugePlaq(), and quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::GenericPackGhostLauncher().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ completeKSForce() [1/2]

void quda::completeKSForce	(	GaugeField &	mom,
		const GaugeField &	oprod,
		const GaugeField &	gauge,
		QudaFieldLocation	location,
		long long *	flops = `NULL`
	)

Definition at line 152 of file ks_force_quda.cu.

References errorQuda, quda::blas::flops, quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_RECONSTRUCT_10, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, quda::GaugeField::Reconstruct(), and quda::LatticeField::X().

Here is the call graph for this function:

◆ completeKSForce() [2/2]

template<typename Float , typename Oprod , typename Gauge , typename Mom >

void quda::completeKSForce	(	Oprod	oprod,
		Gauge	gauge,
		Mom	mom,
		int	dim[4],
		const GaugeField &	meta,
		QudaFieldLocation	location,
		long long *	flops
	)

Definition at line 141 of file ks_force_quda.cu.

References quda::KSForceComplete< Float, Oprod, Gauge, Mom >::apply(), arg(), quda::KSForceComplete< Float, Oprod, Gauge, Mom >::flops(), and qudaDeviceSynchronize.

Here is the call graph for this function:

◆ completeKSForceCore()

template<typename Float , typename Oprod , typename Gauge , typename Mom >

__host__ __device__ void quda::completeKSForceCore	(	KSForceArg< Oprod, Gauge, Mom > &	arg,
		int	idx
	)

Definition at line 43 of file ks_force_quda.cu.

References quda::KSForceArg< Oprod, Gauge, Mom >::gauge, getCoords(), linkIndexShift(), makeAntiHerm(), quda::KSForceArg< Oprod, Gauge, Mom >::mom, quda::KSForceArg< Oprod, Gauge, Mom >::oprod, parity, quda::KSForceArg< Oprod, Gauge, Mom >::threads, quda::KSForceArg< Oprod, Gauge, Mom >::X, and X.

Here is the call graph for this function:

◆ completeKSForceCPU()

template<typename Float , typename Oprod , typename Gauge , typename Mom >

void quda::completeKSForceCPU ( KSForceArg< Oprod, Gauge, Mom > & arg )

Definition at line 93 of file ks_force_quda.cu.

References arg(), and quda::KSForceArg< Oprod, Gauge, Mom >::threads.

Here is the call graph for this function:

◆ completeKSForceKernel()

template<typename Float , typename Oprod , typename Gauge , typename Mom >

__global__ void quda::completeKSForceKernel ( KSForceArg< Oprod, Gauge, Mom > arg )

Definition at line 84 of file ks_force_quda.cu.

References arg(), and quda::KSForceArg< Oprod, Gauge, Mom >::threads.

Here is the call graph for this function:

◆ compute_alpha_N()

template<int N>

void quda::compute_alpha_N	(	Complex *	Q_AQandg,
		Complex *	alpha
	)

Definition at line 280 of file inv_ca_cg.cpp.

◆ compute_beta_N()

template<int N>

void quda::compute_beta_N	(	Complex *	Q_AQandg,
		Complex *	Q_AS,
		Complex *	beta
	)

Definition at line 356 of file inv_ca_cg.cpp.

◆ compute_site_max()

template<typename Float , int Ns, int Ms, int Nc, int Mc, typename Arg >

__device__ __host__ __forceinline__ Float quda::compute_site_max	(	Arg &	arg,
		int	x_cb,
		int	parity,
		int	spinor_parity,
		int	spin_block,
		int	color_block,
		bool	active
	)

Compute the max element over the spin-color components of a given site.

Definition at line 48 of file color_spinor_pack.cuh.

References errorQuda, MAX_BLOCK_FLOAT_NC, and s.

◆ computeAPEStep()

template<typename Float , typename Arg >

__global__ void quda::computeAPEStep ( Arg arg )

Definition at line 96 of file gauge_ape.cuh.

References arg(), conj(), getCoords(), linkIndexShift(), parity, setIdentity(), and quda::GaugeAPEArg< Float, GaugeOr, GaugeDs >::X.

Here is the call graph for this function:

◆ computeAV()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >

__device__ __host__ void quda::computeAV	(	Arg &	arg,
		int	parity,
		int	x_cb,
		int	ch,
		int	ic_c
	)

inline

Calculates the matrix A V^{s,c'}(x) = A^{c}(x) * V^{s,c}(x) Where: s = fine spin, c' = coarse color, c = fine color

Definition at line 230 of file coarse_op_kernel.cuh.

References quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::AV, quda::linalg::Cholesky< Mat, T, N, fast >::backward(), quda::linalg::Cholesky< Mat, T, N, fast >::forward(), s, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::V.

Here is the call graph for this function:

◆ ComputeAVCPU()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >

void quda::ComputeAVCPU ( Arg & arg )

Definition at line 272 of file coarse_op_kernel.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ ComputeAVGPU()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >

__global__ void quda::ComputeAVGPU ( Arg arg )

Definition at line 288 of file coarse_op_kernel.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ computeBeta()

void quda::computeBeta	(	Complex **	beta,
		std::vector< ColorSpinorField *>	Ap,
		int	i,
		int	N,
		int	k
	)

Definition at line 62 of file inv_gcr_quda.cpp.

References quda::blas::cDotProduct(), and printfQuda.

Referenced by orthoDir().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ computeClover()

void quda::computeClover	(	CloverField &	clover,
		const GaugeField &	gauge,
		double	coeff,
		QudaFieldLocation	location
	)

Definition at line 204 of file clover_quda.cu.

References quda::CloverArg< Float, nSpin, nColor, dynamic_clover_ >::clover, errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::cudaCloverField::compute(), createCloverQuda(), and quda::FullClover::FullClover().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ computeCloverForce()

void quda::computeCloverForce	(	GaugeField &	force,
		const GaugeField &	U,
		std::vector< ColorSpinorField *> &	x,
		std::vector< ColorSpinorField *> &	p,
		std::vector< double > &	coeff
	)

Compute the force contribution from the solver solution fields.

Force(x, mu) = U(x, mu) * sum_i=1^nvec ( P_mu^+ x(x+mu) p(x)^ + P_mu^- p(x+mu) x(x)^ )

M = A_even - kappa^2 * Dslash * A_odd^{-1} * Dslash x(even) = M^{-1} b(even) x(odd) = A_odd^{-1} * Dslash * x(even) p(even) = M * x(even) p(odd) = A_odd^{-1} * Dslash^dag * M * x(even).

Parameters

force[out,in]	The resulting force field
U	The input gauge field
x	Solution field (both parities)
p	Intermediate vectors (both parities)
coeff	Multiplicative coefficient (e.g., dt * residue)

Definition at line 465 of file clover_outer_product.cu.

References checkCudaError, errorQuda, quda::ColorSpinorField::GhostFace(), quda::GaugeField::Order(), parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_GAUGE_ORDER, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().

Referenced by computeCloverForceQuda(), and quda::FullClover::FullClover().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ computeCloverSigmaOprod()

void quda::computeCloverSigmaOprod	(	GaugeField &	oprod,
		std::vector< ColorSpinorField *> &	x,
		std::vector< ColorSpinorField *> &	p,
		std::vector< std::vector< double > > &	coeff
	)

Compute the outer product from the solver solution fields arising from the diagonal term of the fermion bilinear in direction mu,nu and sum to outer product field.

Parameters

oprod[out,in]	Computed outer product field (tensor matrix field)
x[in]	Solution field (both parities)
p[in]	Intermediate vectors (both parities) coeff[in] Multiplicative coefficient (e.g., dt * residiue), one for each parity

Definition at line 98 of file clover_sigma_outer_product.cu.

References checkCudaError, errorQuda, MAX_NVECTOR, quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_GAUGE_ORDER, and Spinor< RegType, StoreType, N, write >::set().

Referenced by computeCloverForceQuda(), and quda::FullClover::FullClover().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ computeCloverSigmaTrace()

void quda::computeCloverSigmaTrace	(	GaugeField &	output,
		const CloverField &	clover,
		double	coeff
	)

Compute the matrix tensor field necessary for the force calculation from the clover trace action. This computes a tensor field [mu,nu].

Parameters

output	The computed matrix field (tensor matrix field)
clover	The input clover field
coeff	Scalar coefficient multiplying the result (e.g., stepsize)

Definition at line 242 of file clover_trace_quda.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by computeCloverForceQuda(), and quda::FullClover::FullClover().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ computeCoarseClover()

template<bool from_coarse, typename Float , int fineSpin, int coarseSpin, int fineColor, int coarseColor, typename Arg >

__device__ __host__ void quda::computeCoarseClover	(	Arg &	arg,
		int	parity,
		int	x_cb,
		int	ic_c,
		int	jc_c
	)

Definition at line 928 of file coarse_op_kernel.cuh.

References conj(), getCoords(), QUDA_MAX_DIM, s, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::X.

Here is the call graph for this function:

◆ ComputeCoarseCloverCPU()

template<bool from_coarse, typename Float , int fineSpin, int coarseSpin, int fineColor, int coarseColor, typename Arg >

void quda::ComputeCoarseCloverCPU ( Arg & arg )

Definition at line 988 of file coarse_op_kernel.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ ComputeCoarseCloverGPU()

template<bool from_coarse, typename Float , int fineSpin, int coarseSpin, int fineColor, int coarseColor, typename Arg >

__global__ void quda::ComputeCoarseCloverGPU ( Arg arg )

Definition at line 1002 of file coarse_op_kernel.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ computeCoeffs()

template<typename T >

static void quda::computeCoeffs	(	T	d_out[],
		const T	d_p1[],
		const T	d_p2[],
		int	k,
		int	j,
		int	s,
		const T	gamma[],
		const T	rho[],
		const T	gamma_kprev[],
		const T	rho_kprev[]
	)

static

Definition at line 79 of file inv_mpcg_quda.cpp.

References applyThirdTerm().

Referenced by quda::MPCG::operator()().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ computeColorContraction()

template<typename real , typename Arg >

__global__ void quda::computeColorContraction ( Arg arg )

Definition at line 38 of file contraction.cuh.

References innerProduct(), mu, quda::ContractionArg< real >::nColor, nColor, quda::ContractionArg< real >::nSpin, parity, quda::ContractionArg< real >::x, and quda::ContractionArg< real >::y.

Here is the call graph for this function:

◆ computeDegrandRossiContraction()

template<typename real , typename Arg >

__global__ void quda::computeDegrandRossiContraction ( Arg arg )

Definition at line 65 of file contraction.cuh.

References innerProduct(), mu, quda::ContractionArg< real >::nColor, quda::ContractionArg< real >::nSpin, parity, quda::ContractionArg< real >::x, and quda::ContractionArg< real >::y.

Here is the call graph for this function:

◆ ComputeEta()

template<libtype which_lib>

void quda::ComputeEta ( GMResDRArgs & args )

Definition at line 157 of file inv_gmresdr_quda.cpp.

References errorQuda.

◆ ComputeEta< libtype::eigen_lib >()

template<>

void quda::ComputeEta< libtype::eigen_lib > ( GMResDRArgs & args )

Definition at line 179 of file inv_gmresdr_quda.cpp.

◆ ComputeEta< libtype::magma_lib >()

template<>

void quda::ComputeEta< libtype::magma_lib > ( GMResDRArgs & args )

Definition at line 159 of file inv_gmresdr_quda.cpp.

References errorQuda, magma_Xgels(), and memset().

Here is the call graph for this function:

◆ computeFmunu()

void quda::computeFmunu	(	GaugeField &	Fmunu,
		const GaugeField &	gauge
	)

Compute the Fmunu tensor.

Parameters

[out]	Fmunu	The Fmunu tensor
[in]	gauge	The gauge field upon which to compute the Fmnu tensor

Definition at line 99 of file gauge_field_strength_tensor.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by createCloverQuda(), qChargeDensityQuda(), and qChargeQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ computeFmunuCore()

template<int mu, int nu, typename Float , typename Arg >

__device__ __host__ __forceinline__ void quda::computeFmunuCore	(	Arg &	arg,
		int	idx,
		int	parity
	)

Definition at line 28 of file field_strength_tensor.cuh.

References conj(), getCoords(), linkIndexShift(), mu, and quda::FmunuArg< Float, Fmunu, Gauge >::X.

Here is the call graph for this function:

◆ computeFmunuCPU()

template<typename Float , typename Arg >

void quda::computeFmunuCPU ( Arg & arg )

Definition at line 184 of file field_strength_tensor.cuh.

References arg(), mu, and parity.

Here is the call graph for this function:

◆ computeFmunuKernel()

template<typename Float , typename Arg >

__global__ void quda::computeFmunuKernel ( Arg arg )

Definition at line 166 of file field_strength_tensor.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ computeForce()

template<typename real , typename Arg , typename Link >

__device__ void quda::computeForce	(	LINK	force,
		Arg &	arg,
		int	xIndex,
		int	yIndex,
		int	mu,
		int	nu
	)

Definition at line 128 of file clover_deriv.cuh.

References conj(), DECLARE_ARRAY, getCoordsExtended(), LINK, linkIndexShift(), and mu.

Here is the call graph for this function:

◆ computeGenGauss()

template<typename Float , typename Arg >

__global__ void quda::computeGenGauss ( Arg arg )

Definition at line 71 of file gauge_random.cu.

References getCoords(), linkIndex(), mu, parity, and setIdentity().

Here is the call graph for this function:

◆ ComputeHarmonicRitz()

template<libtype which_lib>

void quda::ComputeHarmonicRitz ( GMResDRArgs & args )

Definition at line 88 of file inv_gmresdr_quda.cpp.

References errorQuda.

◆ ComputeHarmonicRitz< libtype::eigen_lib >()

template<>

void quda::ComputeHarmonicRitz< libtype::eigen_lib > ( GMResDRArgs & args )

Definition at line 127 of file inv_gmresdr_quda.cpp.

References abs(), norm(), and quda::SortedEvals::SelectSmall().

Here is the call graph for this function:

◆ ComputeHarmonicRitz< libtype::magma_lib >()

template<>

void quda::ComputeHarmonicRitz< libtype::magma_lib > ( GMResDRArgs & args )

Definition at line 90 of file inv_gmresdr_quda.cpp.

References abs(), errorQuda, magma_Xgeev(), magma_Xgesv(), norm(), and quda::SortedEvals::SelectSmall().

Here is the call graph for this function:

◆ computeKSLongLinkForce() [1/2]

template<typename Float , typename Result , typename Oprod , typename Gauge >

void quda::computeKSLongLinkForce	(	Result	res,
		Oprod	oprod,
		Gauge	gauge,
		int	dim[4],
		const GaugeField &	meta,
		QudaFieldLocation	location
	)

Definition at line 353 of file ks_force_quda.cu.

References quda::KSLongLinkForce< Float, Result, Oprod, Gauge >::apply(), arg(), and qudaDeviceSynchronize.

Referenced by computeKSLongLinkForce().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ computeKSLongLinkForce() [2/2]

template<typename Float >

void quda::computeKSLongLinkForce	(	GaugeField &	result,
		const GaugeField &	oprod,
		const GaugeField &	gauge,
		QudaFieldLocation	location
	)

Definition at line 362 of file ks_force_quda.cu.

References computeKSLongLinkForce(), errorQuda, quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_RECONSTRUCT_10, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, quda::GaugeField::Reconstruct(), and quda::LatticeField::X().

Here is the call graph for this function:

◆ computeKSLongLinkForceCore()

template<typename Float , typename Result , typename Oprod , typename Gauge >

__host__ __device__ void quda::computeKSLongLinkForceCore	(	KSLongLinkArg< Result, Oprod, Gauge > &	arg,
		int	idx
	)

Definition at line 222 of file ks_force_quda.cu.

◆ computeKSLongLinkForceCPU()

template<typename Float , typename Result , typename Oprod , typename Gauge >

void quda::computeKSLongLinkForceCPU ( KSLongLinkArg< Result, Oprod, Gauge > & arg )

Definition at line 298 of file ks_force_quda.cu.

References arg(), and quda::KSLongLinkArg< Result, Oprod, Gauge >::threads.

Here is the call graph for this function:

◆ computeKSLongLinkForceKernel()

template<typename Float , typename Result , typename Oprod , typename Gauge >

__global__ void quda::computeKSLongLinkForceKernel ( KSLongLinkArg< Result, Oprod, Gauge > arg )

Definition at line 286 of file ks_force_quda.cu.

References arg(), and quda::KSLongLinkArg< Result, Oprod, Gauge >::threads.

Here is the call graph for this function:

◆ computeLinkInverse()

template<class Cmplx >

__device__ __host__ void quda::computeLinkInverse	(	Matrix< Cmplx, 3 > *	uinv,
		const Matrix< Cmplx, 3 > &	u
	)

inline

Definition at line 1023 of file quda_matrix.h.

References getDeterminant().

Here is the call graph for this function:

◆ computeMomAction()

double quda::computeMomAction ( const GaugeField & mom )

Compute and return global the momentum action 1/2 mom^2.

Parameters

mom	Momentum field

Returns: Momentum action contribution

Definition at line 178 of file momentum.cu.

References arg(), quda::blas::bytes, E, errorQuda, quda::blas::flops, forceMonitor(), forceRecord(), getCoords(), getTuning(), getVerbosity(), quda::Matrix< T, N >::L1(), quda::Matrix< T, N >::L2(), LAUNCH_KERNEL_LOCAL_PARITY, linkIndex(), quda::LatticeField::Location(), makeAntiHerm(), norm2(), quda::GaugeField::Order(), parity, quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_GAUGE_ORDER, QUDA_RECONSTRUCT_10, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, quda::LatticeField::R(), quda::GaugeField::Reconstruct(), stream, tuneLaunch(), updateMomentum(), quda::LatticeField::VolString(), quda::LatticeField::VolumeCB(), X, and quda::LatticeField::X().

Referenced by forceRecord(), and momActionQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ computeNeighborSum()

template<typename Float , int Nc, typename Vector , typename Arg >

__device__ __host__ void quda::computeNeighborSum	(	Vector &	out,
		Arg &	arg,
		int	x_cb,
		int	parity
	)

inline

Computes out = sum_mu U_mu(x)in(x+d) + U^(x-d)in(x-d)

Parameters

[out]	out	The out result field
[in]	U	The gauge field
[in]	in	The input field
[in]	x_cb	The checkerboarded site index
[in]	parity	The site parity

Definition at line 51 of file color_spinor_wuppertal.cu.

References conj(), getCoords(), quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::in, linkIndexM1(), linkIndexP1(), quda::Arg< real, Ns, Nc, order >::nParity, and quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::U.

Here is the call graph for this function:

◆ computeOvrImpSTOUTStep()

template<typename Float , typename Arg >

__global__ void quda::computeOvrImpSTOUTStep ( Arg arg )

Definition at line 463 of file gauge_stout.cuh.

References arg(), conj(), ErrorSU3(), exponentiate_iQ(), getCoords(), getTrace(), inverse(), linkIndexShift(), parity, setIdentity(), and quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::X.

Here is the call graph for this function:

◆ computePlaq()

template<int blockSize, typename Float , typename Gauge >

__global__ void quda::computePlaq ( GaugePlaqArg< Gauge > arg )

Definition at line 49 of file gauge_plaq.cuh.

References arg(), quda::GaugePlaqArg< Gauge >::border, getCoords(), mu, parity, quda::GaugePlaqArg< Gauge >::threads, and quda::GaugePlaqArg< Gauge >::X.

Referenced by quda::GaugePlaq< Float, Gauge >::apply().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ computeQCharge()

double quda::computeQCharge ( const GaugeField & Fmunu )

Compute the topological charge.

Parameters

[in] Fmunu The Fmunu tensor, usually calculated from a smeared configuration

Returns: double The total topological charge

Definition at line 97 of file gauge_qcharge.cu.

References errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().

Referenced by qChargeQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ computeQChargeDensity()

double quda::computeQChargeDensity	(	const GaugeField &	Fmunu,
		void *	result
	)

Compute the topological charge density per lattice site.

Parameters

[in]	Fmunu	The Fmunu tensor, usually calculated from a smeared configuration
[out]	qDensity	The topological charge at each lattice site

Returns: double The total topological charge

Definition at line 116 of file gauge_qcharge.cu.

References errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().

Referenced by qChargeDensityQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ComputeRitz()

template<libtype which_lib>

void quda::ComputeRitz ( EigCGArgs & args )

Definition at line 133 of file inv_eigcg_quda.cpp.

References errorQuda.

◆ ComputeRitz< libtype::eigen_lib >()

template<>

void quda::ComputeRitz< libtype::eigen_lib > ( EigCGArgs & args )

Definition at line 136 of file inv_eigcg_quda.cpp.

◆ ComputeRitz< libtype::magma_lib >()

template<>

void quda::ComputeRitz< libtype::magma_lib > ( EigCGArgs & args )

Definition at line 164 of file inv_eigcg_quda.cpp.

References errorQuda, and magma_Xheev().

Here is the call graph for this function:

◆ computeStaggeredOprod() [1/2]

void quda::computeStaggeredOprod	(	GaugeField *	out[],
		ColorSpinorField &	in,
		const double	coeff[],
		int	nFace
	)

Compute the outer-product field between the staggered quark field's one and (for HISQ and ASQTAD) three hop sites. E.g.,.

out[0][d](x) = (in(x+1_d) x conj(in(x))) out[1][d](x) = (in(x+3_d) x conj(in(x)))

where 1_d and 3_d represent a relative shift of magnitude 1 and 3 in dimension d, respectively

Note out[1] is only computed if nFace=3

Parameters

[out]	out	Array of nFace outer-product matrix fields
[in]	in	Input quark field
[in]	coeff	Coefficient
[in]	nFace	Number of faces (1 or 3)

Definition at line 447 of file staggered_oprod.cu.

References errorQuda, quda::ColorSpinorField::Even(), and quda::ColorSpinorField::Odd().

Referenced by computeHISQForceQuda(), and computeStaggeredForceQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ computeStaggeredOprod() [2/2]

void quda::computeStaggeredOprod	(	GaugeField &	outA,
		GaugeField &	outB,
		ColorSpinorField &	inEven,
		ColorSpinorField &	inOdd,
		int	parity,
		const double	coeff[2],
		int	nFace
	)

Definition at line 404 of file staggered_oprod.cu.

References quda::cudaColorSpinorField::allocateGhostBuffer(), errorQuda, quda::GaugeField::Order(), parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_GAUGE_ORDER, and QUDA_SINGLE_PRECISION.

Here is the call graph for this function:

◆ computeStaple()

template<typename Float , typename Arg , typename Link >

__host__ __device__ void quda::computeStaple	(	Arg &	arg,
		int	idx,
		int	parity,
		int	dir,
		Link &	staple
	)

Definition at line 36 of file gauge_ape.cuh.

References conj(), getCoords(), linkIndexShift(), mu, setZero(), and quda::GaugeAPEArg< Float, GaugeOr, GaugeDs >::X.

Referenced by fatLongKSLink(), and quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ computeStapleRectangle()

template<typename Float , typename Arg , typename Link >

__host__ __device__ void quda::computeStapleRectangle	(	Arg &	arg,
		int	idx,
		int	parity,
		int	dir,
		Link &	staple,
		Link &	rectangle
	)

Definition at line 232 of file gauge_stout.cuh.

References conj(), getCoords(), linkIndexShift(), mu, setZero(), and quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::X.

Here is the call graph for this function:

◆ computeSTOUTStep()

template<typename Float , typename Arg >

__global__ void quda::computeSTOUTStep ( Arg arg )

Definition at line 96 of file gauge_stout.cuh.

References arg(), conj(), ErrorSU3(), exponentiate_iQ(), getCoords(), getTrace(), inverse(), linkIndexShift(), parity, setIdentity(), and quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::X.

Here is the call graph for this function:

◆ computeTMAV()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >

__device__ __host__ void quda::computeTMAV	(	Arg &	arg,
		int	parity,
		int	x_cb,
		int	v
	)

inline

Calculates the matrix A V^{s,c'}(x) = A^{c}(x) * V^{s,c}(x) for twisted-mass fermions Where: s = fine spin, c' = coarse color, c = fine color

Definition at line 312 of file coarse_op_kernel.cuh.

References s.

◆ ComputeTMAVCPU()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >

void quda::ComputeTMAVCPU ( Arg & arg )

Definition at line 332 of file coarse_op_kernel.cuh.

References parity.

◆ ComputeTMAVGPU()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >

__global__ void quda::ComputeTMAVGPU ( Arg arg )

Definition at line 343 of file coarse_op_kernel.cuh.

References arg(), quda::linalg::Cholesky< Mat, T, N, fast >::invert(), Mat(), nColor, and parity.

Here is the call graph for this function:

◆ computeTMCAV()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >

__device__ __host__ void quda::computeTMCAV	(	Arg &	arg,
		int	parity,
		int	x_cb,
		int	ch,
		int	ic_c
	)

inline

Calculates the matrix A V^{s,c'}(x) = A^{c}(x) * V^{s,c}(x) for twisted-clover fermions Where: s = fine spin, c' = coarse color, c = fine color

Definition at line 430 of file coarse_op_kernel.cuh.

References quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::AV, quda::linalg::Cholesky< Mat, T, N, fast >::backward(), quda::linalg::Cholesky< Mat, T, N, fast >::forward(), quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::mu, s, quda::HMatrix< T, N >::square(), quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::UV, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::V.

Here is the call graph for this function:

◆ ComputeTMCAVCPU()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >

void quda::ComputeTMCAVCPU ( Arg & arg )

Definition at line 491 of file coarse_op_kernel.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ ComputeTMCAVGPU()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >

__global__ void quda::ComputeTMCAVGPU ( Arg arg )

Definition at line 506 of file coarse_op_kernel.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ computeUV()

template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Wtype , typename Arg >

__device__ __host__ void quda::computeUV	(	Arg &	arg,
		const Wtype &	W,
		int	parity,
		int	x_cb,
		int	ic_c
	)

inline

Calculates the matrix UV^{s,c'}_mu(x) = U^{c}_mu(x) * V^{s,c}_mu(x+mu) Where: mu = dir, s = fine spin, c' = coarse color, c = fine color

Definition at line 130 of file coarse_op_kernel.cuh.

References caxpy(), getCoords(), linkIndexP1(), QUDA_FORWARDS, s, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::UV.

Here is the call graph for this function:

◆ ComputeUVCPU()

template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >

void quda::ComputeUVCPU ( Arg & arg )

Definition at line 197 of file coarse_op_kernel.cuh.

References arg(), parity, and QUDA_FORWARDS.

Here is the call graph for this function:

◆ ComputeUVGPU()

template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >

__global__ void quda::ComputeUVGPU ( Arg arg )

Definition at line 212 of file coarse_op_kernel.cuh.

References arg(), parity, and QUDA_FORWARDS.

Here is the call graph for this function:

◆ computeVUV()

template<bool shared_atomic, bool parity_flip, bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg , typename Gamma >

__device__ __host__ void quda::computeVUV	(	Arg &	arg,
		const Gamma &	gamma,
		int	parity,
		int	x_cb,
		int	c_row,
		int	c_col,
		int	parity_coarse_,
		int	coarse_x_cb_
	)

Definition at line 628 of file coarse_op_kernel.cuh.

References arg(), atomicAdd(), conj(), quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::dim_index, errorQuda, getCoords(), max_color_per_block, parity, QUDA_BACKWARDS, QUDA_MAX_DIM, virtualThreadIdx(), quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::X, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::Y.

Here is the call graph for this function:

◆ ComputeVUVCPU()

template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >

void quda::ComputeVUVCPU ( Arg arg )

Definition at line 779 of file coarse_op_kernel.cuh.

◆ ComputeVUVGPU()

template<bool shared_atomic, bool parity_flip, bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >

__global__ void quda::ComputeVUVGPU ( Arg arg )

Definition at line 857 of file coarse_op_kernel.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ computeWupperalStep()

template<typename Float , int Ns, int Nc, typename Arg >

__device__ __host__ void quda::computeWupperalStep	(	Arg &	arg,
		int	x_cb,
		int	parity
	)

inline

Definition at line 102 of file color_spinor_wuppertal.cu.

References arg(), quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::in, quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::out, and quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::parity.

Here is the call graph for this function:

◆ computeYhat()

template<typename Float , int n, bool compute_max_only, typename Arg >

__device__ __host__ Float quda::computeYhat	(	Arg &	arg,
		int	d,
		int	x_cb,
		int	parity,
		int	i,
		int	j
	)

inline

Definition at line 45 of file coarse_op_preconditioned.cuh.

References caxpy(), conj(), getCoords(), and linkIndexM1().

Here is the call graph for this function:

◆ computeYreverse()

template<typename Float , int nSpin, int nColor, typename Arg >

__device__ __host__ void quda::computeYreverse	(	Arg &	arg,
		int	parity,
		int	x_cb,
		int	ic_c,
		int	jc_c
	)

Compute the forward links from backwards links by flipping the sign of the spin projector

Definition at line 877 of file coarse_op_kernel.cuh.

References quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::Y.

◆ ComputeYReverseCPU()

template<typename Float , int nSpin, int nColor, typename Arg >

void quda::ComputeYReverseCPU ( Arg & arg )

Definition at line 898 of file coarse_op_kernel.cuh.

References arg(), nColor, and parity.

Here is the call graph for this function:

◆ ComputeYReverseGPU()

template<typename Float , int nSpin, int nColor, typename Arg >

__global__ void quda::ComputeYReverseGPU ( Arg arg )

Definition at line 912 of file coarse_op_kernel.cuh.

References arg(), nColor, and parity.

Here is the call graph for this function:

◆ conj() [1/3]

template<typename ValueType >

__host__ __device__ ValueType quda::conj ( ValueType x )

inline

Definition at line 130 of file complex_quda.h.

Referenced by applyClover(), applyCovDev(), applyDslash(), applyLaplace(), applyStaggered(), applyWilson(), applyWilsonTM(), quda::CG::blocksolve(), checkUnitary(), computeAPEStep(), computeCoarseClover(), computeFmunuCore(), computeForce(), quda::MPBiCGstab::computeMatrixPowers(), computeNeighborSum(), computeOvrImpSTOUTStep(), computeStaple(), computeStapleRectangle(), computeSTOUTStep(), computeVUV(), computeYhat(), conj(), ErrorSU3(), expsu3(), quda::GMResDR::FlexArnoldiProcedure(), quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg(), quda::gauge::Reconstruct< 13, Float, ghostExchange_, stag_phase >::getPhase(), quda::gauge::Reconstruct< 9, Float, ghostExchange_, stag_phase >::getPhase(), quda::blas::hDotProduct(), quda::blas::hDotProduct_Anorm(), quda::Deflation::increment(), isUnitary(), quda::Matrix< T, N >::isUnitary(), llfat_mult_su3_an(), llfat_mult_su3_na(), makeAntiHerm(), multiplyVUV(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT2_CLOVER_ORDER >::operator()(), quda::BiCGstab::operator()(), quda::SimpleBiCGstab::operator()(), quda::MPBiCGstab::operator()(), outerProd(), plaquette(), polarSu3(), rotateCoarseColor(), setUnitarizeLinksConstants(), sigmaOprod(), quda::gauge::Reconstruct< 12, Float, ghostExchange_ >::Unpack(), quda::gauge::Reconstruct< 13, Float, ghostExchange_, stag_phase >::Unpack(), and quda::gauge::Reconstruct< 8, Float, ghostExchange_ >::Unpack().

◆ conj() [2/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::conj ( const complex< ValueType > & z )

inline

Returns the complex conjugate of z.

Definition at line 1054 of file complex_quda.h.

◆ conj() [3/3]

template<class T , int N>

__device__ __host__ Matrix<T,N> quda::conj ( const Matrix< T, N > & other )

inline

Definition at line 596 of file quda_matrix.h.

References conj().

Here is the call graph for this function:

◆ constant()

template<class T >

void quda::constant	(	T &	t,
		int	k,
		int	s,
		int	c
	)

Set all space-time real elements at spin s and color c of the field equal to k

Definition at line 38 of file color_spinor_util.cu.

References parity.

Referenced by genericSource().

Here is the caller graph for this function:

◆ constantInv()

template<typename real , int nColor, bool dagger, Dslash5Type type, bool shared, typename Vector , typename Arg >

__device__ __host__ Vector quda::constantInv	(	Arg &	arg,
		int	parity,
		int	x_cb,
		int	s_
	)

inline

Apply the M5 inverse operator at a given site on the lattice. This is the original algorithm as described in Kim and Izubushi (LATTICE 2013_033), where the b and c coefficients are constant along the Ls dimension, so is suitable for Shamir and Mobius domain-wall fermions.

Template Parameters

shared Whether to use a shared memory scratch pad to store the input field acroos the Ls dimension to minimize global memory reads.

Parameters

[in]	arg	Argument struct containing any meta data and accessors
[in]	parity	Parity we are on
[in]	x_b	Checkerboarded 4-d space-time index
[in]	s_	Ls dimension coordinate

Definition at line 295 of file dslash_domain_wall_m5.cuh.

References __fast_pow(), dagger, exp(), in, quda::VectorCache< real, Vector >::load(), out, s, quda::VectorCache< real, Vector >::save(), and quda::VectorCache< real, Vector >::sync().

Here is the call graph for this function:

◆ contractQuda()

void quda::contractQuda	(	const ColorSpinorField &	x,
		const ColorSpinorField &	y,
		void *	result,
		QudaContractType	cType
	)

Definition at line 107 of file contract.cu.

References checkPrecision, errorQuda, quda::ColorSpinorField::GammaBasis(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), QUDA_DEGRAND_ROSSI_GAMMA_BASIS, QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by contractQuda(), and test().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ convert() [1/2]

template<typename OutputType , typename InputType >

__device__ void quda::convert	(	OutputType	x[],
		InputType	y[],
		const int	N
	)

inline

Convert a vector of type InputType to type OutputType.

The main current limitation is that there is an implicit assumption that N * sizeof(OutputType) / sizeof(InputType) is an integer. E.g., you cannot convert a vector 9 float2s into a vector of 5 float4s.

Parameters

x	Output vector.
y	Input vector.
N	Length of output vector.

Definition at line 149 of file convert.h.

References copyFloatN().

Here is the call graph for this function:

◆ convert() [2/2]

template<typename Float , int nSpin, int nColor, typename Arg >

__device__ __host__ void quda::convert	(	Arg &	arg,
		int	parity,
		int	x_cb,
		int	c_row,
		int	c_col
	)

Convert the field from the atomic format to the required computation format, e.g. fixed point to floating point

Definition at line 1096 of file coarse_op_kernel.cuh.

References in.

◆ convert< double2, double4 >()

template<>

__device__ void quda::convert< double2, double4 >	(	double2	x[],
		double4	y[],
		const int	N
	)

inline

Definition at line 176 of file convert.h.

◆ convert< double2, float4 >()

template<>

__device__ void quda::convert< double2, float4 >	(	double2	x[],
		float4	y[],
		const int	N
	)

inline

Definition at line 268 of file convert.h.

◆ convert< double2, short4 >()

template<>

__device__ void quda::convert< double2, short4 >	(	double2	x[],
		short4	y[],
		const int	N
	)

inline

Definition at line 238 of file convert.h.

◆ convert< double4, double2 >()

template<>

__device__ void quda::convert< double4, double2 >	(	double4	x[],
		double2	y[],
		const int	N
	)

inline

Definition at line 170 of file convert.h.

◆ convert< double4, float2 >()

template<>

__device__ void quda::convert< double4, float2 >	(	double4	x[],
		float2	y[],
		const int	N
	)

inline

Definition at line 277 of file convert.h.

◆ convert< double4, short2 >()

template<>

__device__ void quda::convert< double4, short2 >	(	double4	x[],
		short2	y[],
		const int	N
	)

inline

Definition at line 247 of file convert.h.

◆ convert< float2, double4 >()

template<>

__device__ void quda::convert< float2, double4 >	(	float2	x[],
		double4	y[],
		const int	N
	)

inline

Definition at line 283 of file convert.h.

◆ convert< float2, float4 >()

template<>

__device__ void quda::convert< float2, float4 >	(	float2	x[],
		float4	y[],
		const int	N
	)

inline

Definition at line 191 of file convert.h.

◆ convert< float2, short2 >()

template<>

__device__ void quda::convert< float2, short2 >	(	float2	x[],
		short2	y[],
		const int	N
	)

inline

Definition at line 156 of file convert.h.

◆ convert< float2, short4 >()

template<>

__device__ void quda::convert< float2, short4 >	(	float2	x[],
		short4	y[],
		const int	N
	)

inline

Definition at line 207 of file convert.h.

◆ convert< float4, double2 >()

template<>

__device__ void quda::convert< float4, double2 >	(	float4	x[],
		double2	y[],
		const int	N
	)

inline

Definition at line 262 of file convert.h.

◆ convert< float4, float2 >()

template<>

__device__ void quda::convert< float4, float2 >	(	float4	x[],
		float2	y[],
		const int	N
	)

inline

Definition at line 185 of file convert.h.

◆ convert< float4, short2 >()

template<>

__device__ void quda::convert< float4, short2 >	(	float4	x[],
		short2	y[],
		const int	N
	)

inline

Definition at line 216 of file convert.h.

◆ convert< float4, short4 >()

template<>

__device__ void quda::convert< float4, short4 >	(	float4	x[],
		short4	y[],
		const int	N
	)

inline

Definition at line 162 of file convert.h.

◆ convert< short2, double4 >()

template<>

__device__ void quda::convert< short2, double4 >	(	short2	x[],
		double4	y[],
		const int	N
	)

inline

Definition at line 253 of file convert.h.

References d2i().

Here is the call graph for this function:

◆ convert< short2, float4 >()

template<>

__device__ void quda::convert< short2, float4 >	(	short2	x[],
		float4	y[],
		const int	N
	)

inline

Definition at line 222 of file convert.h.

References f2i().

Here is the call graph for this function:

◆ convert< short4, double2 >()

template<>

__device__ void quda::convert< short4, double2 >	(	short4	x[],
		double2	y[],
		const int	N
	)

inline

Definition at line 231 of file convert.h.

References d2i().

Here is the call graph for this function:

◆ convert< short4, float2 >()

template<>

__device__ void quda::convert< short4, float2 >	(	short4	x[],
		float2	y[],
		const int	N
	)

inline

Definition at line 200 of file convert.h.

References f2i().

Here is the call graph for this function:

◆ ConvertCPU()

template<typename Float , int nSpin, int nColor, typename Arg >

void quda::ConvertCPU ( Arg & arg )

Definition at line 1133 of file coarse_op_kernel.cuh.

References arg(), nColor, and parity.

Here is the call graph for this function:

◆ ConvertGPU()

template<typename Float , int nSpin, int nColor, typename Arg >

__global__ void quda::ConvertGPU ( Arg arg )

Definition at line 1147 of file coarse_op_kernel.cuh.

References arg(), nColor, and parity.

Here is the call graph for this function:

◆ coordsFromFaceIndex() [1/2]

template<int nDim, QudaPCType type, int dim_, int nLayers, typename Int , typename Arg >

__device__ __host__ void quda::coordsFromFaceIndex	(	int &	idx,
		int &	cb_idx,
		Int *const	x,
		int	face_idx,
		const int &	face_num,
		int	parity,
		const Arg &	arg
	)

inline

Compute the full-lattice coordinates from the input face index. This is used by the Wilson-like halo update kernels, and can deal with 4-d or 5-d field and 4-d or 5-d preconditioning.

Parameters

[out]	idx	The full lattice coordinate
[out]	cb_idx	The checkboarded lattice coordinate
[out]	x	Coordinates we are computing
[in]	face_idx	Input checkerboarded face index
[in]	face_num	Face number
[in]	parity	Parity index
[in]	arg	Argument struct with required meta data

Definition at line 488 of file index_helper.cuh.

References EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, QUDA_4D_PC, QUDA_5D_PC, and X.

◆ coordsFromFaceIndex() [2/2]

template<int nDim, QudaPCType type, int dim_, int nLayers, typename Int , typename Arg >

__device__ __host__ void quda::coordsFromFaceIndex	(	int &	idx,
		int &	cb_idx,
		Int *const	x,
		int	face_idx,
		const int &	face_num,
		const Arg &	arg
	)

inline

Overloaded variant of indexFromFaceIndex where we use the parity declared in arg.

Definition at line 585 of file index_helper.cuh.

References arg().

Here is the call graph for this function:

◆ copy() [1/15]

template<typename T1 , typename T2 >

__host__ __device__ void quda::copy	(	T1 &	a,
		const T2 &	b
	)

inline

Definition at line 152 of file register_traits.h.

Referenced by computeCloverForceQuda(), copy_and_scale(), copy_scaled(), genericCopyColorSpinor(), quda::clover::FloatNOrder< Float, length, N, add_rho, huge_alloc >::load(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::load(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::loadGhost(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::loadGhostEx(), new_load_half(), new_save_half(), old_load_half(), old_save_half(), quda::PreconCG::operator()(), qudaMemcpy_(), qudaMemcpyAsync_(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::save(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::saveGhost(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::saveGhostEx(), and quda::GaugeField::SiteSize().

Here is the caller graph for this function:

◆ copy() [2/15]

template<>

__host__ __device__ void quda::copy	(	double &	a,
		const int2 &	b
	)

inline

Definition at line 154 of file register_traits.h.

References errorQuda.

◆ copy() [3/15]

template<>

__host__ __device__ void quda::copy	(	double2 &	a,
		const int4 &	b
	)

inline

Definition at line 162 of file register_traits.h.

References errorQuda.

◆ copy() [4/15]

template<>

__host__ __device__ void quda::copy	(	float &	a,
		const short &	b
	)

inline

Definition at line 170 of file register_traits.h.

References s2f().

Here is the call graph for this function:

◆ copy() [5/15]

template<>

__host__ __device__ void quda::copy	(	short &	a,
		const float &	b
	)

inline

Definition at line 171 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy() [6/15]

template<>

__host__ __device__ void quda::copy	(	float2 &	a,
		const short2 &	b
	)

inline

Definition at line 173 of file register_traits.h.

References s2f().

Here is the call graph for this function:

◆ copy() [7/15]

template<>

__host__ __device__ void quda::copy	(	short2 &	a,
		const float2 &	b
	)

inline

Definition at line 177 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy() [8/15]

template<>

__host__ __device__ void quda::copy	(	float4 &	a,
		const short4 &	b
	)

inline

Definition at line 181 of file register_traits.h.

References s2f().

Here is the call graph for this function:

◆ copy() [9/15]

template<>

__host__ __device__ void quda::copy	(	short4 &	a,
		const float4 &	b
	)

inline

Definition at line 185 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy() [10/15]

template<>

__host__ __device__ void quda::copy	(	float &	a,
		const char &	b
	)

inline

Definition at line 189 of file register_traits.h.

References c2f().

Here is the call graph for this function:

◆ copy() [11/15]

template<>

__host__ __device__ void quda::copy	(	char &	a,
		const float &	b
	)

inline

Definition at line 190 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy() [12/15]

template<>

__host__ __device__ void quda::copy	(	float2 &	a,
		const char2 &	b
	)

inline

Definition at line 192 of file register_traits.h.

References c2f().

Here is the call graph for this function:

◆ copy() [13/15]

template<>

__host__ __device__ void quda::copy	(	char2 &	a,
		const float2 &	b
	)

inline

Definition at line 196 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy() [14/15]

template<>

__host__ __device__ void quda::copy	(	float4 &	a,
		const char4 &	b
	)

inline

Definition at line 200 of file register_traits.h.

References c2f().

Here is the call graph for this function:

◆ copy() [15/15]

template<>

__host__ __device__ void quda::copy	(	char4 &	a,
		const float4 &	b
	)

inline

Definition at line 204 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy_and_scale() [1/7]

template<typename T1 , typename T2 , typename T3 >

__host__ __device__ void quda::copy_and_scale	(	T1 &	a,
		const T2 &	b,
		const T3 &	c
	)

inline

Specialized variants of the copy function that include an additional scale factor. Note the scale factor is ignored unless the input type (b) is either a short or char vector.

Definition at line 249 of file register_traits.h.

References copy().

Referenced by quda::clover::FloatNOrder< Float, length, N, add_rho, huge_alloc >::load().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ copy_and_scale() [2/7]

template<>

__host__ __device__ void quda::copy_and_scale	(	float4 &	a,
		const short4 &	b,
		const float &	c
	)

inline

Definition at line 254 of file register_traits.h.

References s2f().

Here is the call graph for this function:

◆ copy_and_scale() [3/7]

template<>

__host__ __device__ void quda::copy_and_scale	(	float4 &	a,
		const char4 &	b,
		const float &	c
	)

inline

Definition at line 262 of file register_traits.h.

References c2f().

Here is the call graph for this function:

◆ copy_and_scale() [4/7]

template<>

__host__ __device__ void quda::copy_and_scale	(	float2 &	a,
		const short2 &	b,
		const float &	c
	)

inline

Definition at line 270 of file register_traits.h.

References s2f().

Here is the call graph for this function:

◆ copy_and_scale() [5/7]

template<>

__host__ __device__ void quda::copy_and_scale	(	float2 &	a,
		const char2 &	b,
		const float &	c
	)

inline

Definition at line 276 of file register_traits.h.

References c2f().

Here is the call graph for this function:

◆ copy_and_scale() [6/7]

template<>

__host__ __device__ void quda::copy_and_scale	(	float &	a,
		const short &	b,
		const float &	c
	)

inline

Definition at line 282 of file register_traits.h.

References s2f().

Here is the call graph for this function:

◆ copy_and_scale() [7/7]

template<>

__host__ __device__ void quda::copy_and_scale	(	float &	a,
		const char &	b,
		const float &	c
	)

inline

Definition at line 287 of file register_traits.h.

References c2f().

Here is the call graph for this function:

◆ copy_scaled() [1/7]

template<typename T1 , typename T2 >

__host__ __device__ void quda::copy_scaled	(	T1 &	a,
		const T2 &	b
	)

inline

Definition at line 209 of file register_traits.h.

References copy().

Here is the call graph for this function:

◆ copy_scaled() [2/7]

template<>

__host__ __device__ void quda::copy_scaled	(	short4 &	a,
		const float4 &	b
	)

inline

Definition at line 211 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy_scaled() [3/7]

template<>

__host__ __device__ void quda::copy_scaled	(	char4 &	a,
		const float4 &	b
	)

inline

Definition at line 219 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy_scaled() [4/7]

template<>

__host__ __device__ void quda::copy_scaled	(	short2 &	a,
		const float2 &	b
	)

inline

Definition at line 227 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy_scaled() [5/7]

template<>

__host__ __device__ void quda::copy_scaled	(	char2 &	a,
		const float2 &	b
	)

inline

Definition at line 233 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy_scaled() [6/7]

template<>

__host__ __device__ void quda::copy_scaled	(	short &	a,
		const float &	b
	)

inline

Definition at line 239 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy_scaled() [7/7]

template<>

__host__ __device__ void quda::copy_scaled	(	char &	a,
		const float &	b
	)

inline

Definition at line 241 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copyArrayToLink() [1/2]

void quda::copyArrayToLink	(	Matrix< float2, 3 > *	link,
		float *	array
	)

inline

Definition at line 1061 of file quda_matrix.h.

Referenced by isUnitary(), and unitarizeLinksCPU().

Here is the caller graph for this function:

◆ copyArrayToLink() [2/2]

template<class Cmplx , class Real >

void quda::copyArrayToLink	(	Matrix< Cmplx, 3 > *	link,
		Real *	array
	)

inline

Definition at line 1074 of file quda_matrix.h.

◆ copyColorSpinor()

template<typename Arg , typename Basis >

void quda::copyColorSpinor	(	Arg &	arg,
		const Basis &	basis
	)

CPU function to reorder spinor fields.

Definition at line 136 of file copy_color_spinor.cuh.

References quda::ColorSpinor< Float, Nc, Ns >::data, in, quda::Arg< real, Ns, Nc, order >::nParity, out, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Referenced by quda::CopyColorSpinor< Ns, Arg >::apply(), and quda::CopyColorSpinor< 4, Arg >::apply().

Here is the caller graph for this function:

◆ copyColorSpinorKernel()

template<typename Arg , typename Basis >

__global__ void quda::copyColorSpinorKernel	(	Arg	arg,
		Basis	basis
	)

CUDA kernel to reorder spinor fields. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 149 of file copy_color_spinor.cuh.

References quda::ColorSpinor< Float, Nc, Ns >::data, in, out, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Referenced by quda::CopyColorSpinor< Ns, Arg >::apply(), and quda::CopyColorSpinor< 4, Arg >::apply().

Here is the caller graph for this function:

◆ copyColumn()

template<class T , int N>

__device__ __host__ void quda::copyColumn	(	const Matrix< T, N > &	m,
		int	c,
		Array< T, N > *	a
	)

inline

Definition at line 793 of file quda_matrix.h.

Referenced by getRealBidiagMatrix().

Here is the caller graph for this function:

◆ copyExtendedColorSpinor() [1/2]

template<int Ns, typename dstFloat , typename srcFloat >

void quda::copyExtendedColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		const int	parity,
		const QudaFieldLocation	location,
		dstFloat *	Dst,
		srcFloat *	Src,
		float *	dstNorm,
		float *	srcNorm
	)

Definition at line 360 of file extended_color_spinor_utilities.cu.

References quda::ColorSpinorField::Bytes(), errorQuda, quda::ColorSpinorField::FieldOrder(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Ndim(), quda::ColorSpinorField::Norm(), quda::ColorSpinorField::NormBytes(), parity, QUDA_EVEN_ODD_SITE_ORDER, QUDA_FULL_SITE_SUBSET, QUDA_LEXICOGRAPHIC_SITE_ORDER, QUDA_ODD_EVEN_SITE_ORDER, QUDA_QDPJIT_FIELD_ORDER, quda::ColorSpinorField::SiteOrder(), quda::ColorSpinorField::SiteSubset(), and quda::ColorSpinorField::V().

Here is the call graph for this function:

◆ CopyExtendedColorSpinor()

template<typename dstFloat , typename srcFloat >

void quda::CopyExtendedColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		const int	parity,
		const QudaFieldLocation	location,
		dstFloat *	Dst,
		srcFloat *	Src,
		float *	dstNorm = `0`,
		float *	srcNorm = `0`
	)

Definition at line 428 of file extended_color_spinor_utilities.cu.

References errorQuda, quda::ColorSpinorField::Nspin(), and parity.

Referenced by copyExtendedColorSpinor().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ copyExtendedColorSpinor() [2/2]

void quda::copyExtendedColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		const int	parity,
		void *	Dst,
		void *	Src,
		void *	dstNorm,
		void *	srcNorm
	)

Definition at line 454 of file extended_color_spinor_utilities.cu.

References CopyExtendedColorSpinor(), errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::XSD::operator()().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ copyExtendedGauge()

void quda::copyExtendedGauge	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out = `0`,
		void *	In = `0`
	)

This function is used for copying the gauge field into an extended gauge field. Defined in copy_extended_gauge.cu.

Parameters

out	The extended output field to which we are copying
in	The input field from which we are copying
location	The location of where we are doing the copying (CPU or CUDA)
Out	The output buffer (optional)
In	The input buffer (optional)

Definition at line 343 of file copy_gauge_extended.cu.

References copyGaugeEx(), errorQuda, quda::LatticeField::Ndim(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and quda::LatticeField::X().

Referenced by computeGaugeFixingOVRQuda(), computeHISQForceQuda(), quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), copyExtendedResidentGaugeQuda(), createExtendedGauge(), quda::cpuGaugeField::Gauge_p(), hisq_force_init(), main(), performWuppertalnStep(), quda::cudaGaugeField::saveCPUField(), and saveGaugeQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ copyFloatN() [1/21]

template<typename FloatN >

__device__ void quda::copyFloatN	(	FloatN &	a,
		const FloatN &	b
	)

inline

Definition at line 61 of file convert.h.

Referenced by convert(), SpinorTexture< RegType, StoreType, N >::load(), SpinorTexture< RegType, StoreType, N >::loadGhost(), Texture< InterType, StoreType >::operator[](), and Spinor< RegType, StoreType, N, write >::save().

Here is the caller graph for this function:

◆ copyFloatN() [2/21]

__device__ void quda::copyFloatN	(	float2 &	a,
		const char2 &	b
	)

inline

Definition at line 64 of file convert.h.

References c2f().

Here is the call graph for this function:

◆ copyFloatN() [3/21]

__device__ void quda::copyFloatN	(	float4 &	a,
		const char4 &	b
	)

inline

Definition at line 65 of file convert.h.

References c2f().

Here is the call graph for this function:

◆ copyFloatN() [4/21]

__device__ void quda::copyFloatN	(	double2 &	a,
		const char2 &	b
	)

inline

Definition at line 69 of file convert.h.

References c2d().

Here is the call graph for this function:

◆ copyFloatN() [5/21]

__device__ void quda::copyFloatN	(	double4 &	a,
		const char4 &	b
	)

inline

Definition at line 70 of file convert.h.

References c2d().

Here is the call graph for this function:

◆ copyFloatN() [6/21]

__device__ void quda::copyFloatN	(	float2 &	a,
		const short2 &	b
	)

inline

Definition at line 76 of file convert.h.

References s2f().

Here is the call graph for this function:

◆ copyFloatN() [7/21]

__device__ void quda::copyFloatN	(	float4 &	a,
		const short4 &	b
	)

inline

Definition at line 77 of file convert.h.

References s2f().

Here is the call graph for this function:

◆ copyFloatN() [8/21]

__device__ void quda::copyFloatN	(	double2 &	a,
		const short2 &	b
	)

inline

Definition at line 81 of file convert.h.

References s2d().

Here is the call graph for this function:

◆ copyFloatN() [9/21]

__device__ void quda::copyFloatN	(	double4 &	a,
		const short4 &	b
	)

inline

Definition at line 82 of file convert.h.

References s2d().

Here is the call graph for this function:

◆ copyFloatN() [10/21]

__device__ void quda::copyFloatN	(	float2 &	a,
		const double2 &	b
	)

inline

Definition at line 87 of file convert.h.

◆ copyFloatN() [11/21]

__device__ void quda::copyFloatN	(	double2 &	a,
		const float2 &	b
	)

inline

Definition at line 88 of file convert.h.

◆ copyFloatN() [12/21]

__device__ void quda::copyFloatN	(	float4 &	a,
		const double4 &	b
	)

inline

Definition at line 89 of file convert.h.

◆ copyFloatN() [13/21]

__device__ void quda::copyFloatN	(	double4 &	a,
		const float4 &	b
	)

inline

Definition at line 90 of file convert.h.

◆ copyFloatN() [14/21]

__device__ void quda::copyFloatN	(	short2 &	a,
		const float2 &	b
	)

inline

Definition at line 115 of file convert.h.

References f2i().

Here is the call graph for this function:

◆ copyFloatN() [15/21]

__device__ void quda::copyFloatN	(	short4 &	a,
		const float4 &	b
	)

inline

Definition at line 116 of file convert.h.

References f2i().

Here is the call graph for this function:

◆ copyFloatN() [16/21]

__device__ void quda::copyFloatN	(	short2 &	a,
		const double2 &	b
	)

inline

Definition at line 120 of file convert.h.

References d2i().

Here is the call graph for this function:

◆ copyFloatN() [17/21]

__device__ void quda::copyFloatN	(	short4 &	a,
		const double4 &	b
	)

inline

Definition at line 121 of file convert.h.

References d2i().

Here is the call graph for this function:

◆ copyFloatN() [18/21]

__device__ void quda::copyFloatN	(	char2 &	a,
		const float2 &	b
	)

inline

Definition at line 126 of file convert.h.

References f2i().

Here is the call graph for this function:

◆ copyFloatN() [19/21]

__device__ void quda::copyFloatN	(	char4 &	a,
		const float4 &	b
	)

inline

Definition at line 127 of file convert.h.

References f2i().

Here is the call graph for this function:

◆ copyFloatN() [20/21]

__device__ void quda::copyFloatN	(	char2 &	a,
		const double2 &	b
	)

inline

Definition at line 131 of file convert.h.

References d2i().

Here is the call graph for this function:

◆ copyFloatN() [21/21]

__device__ void quda::copyFloatN	(	char4 &	a,
		const double4 &	b
	)

inline

Definition at line 132 of file convert.h.

References d2i().

Here is the call graph for this function:

◆ copyGauge() [1/5]

template<typename FloatOut , typename FloatIn , int length, typename InOrder >

void quda::copyGauge	(	const InOrder &	inOrder,
		const GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		FloatOut *	Out,
		FloatOut **	outGhost,
		int	type
	)

Definition at line 7 of file copy_gauge_inc.cu.

References errorQuda, in, quda::GaugeField::isNative(), quda::GaugeField::Order(), out, QUDA_BQCD_GAUGE_ORDER, QUDA_CPS_WILSON_GAUGE_ORDER, QUDA_MILC_GAUGE_ORDER, QUDA_MILC_SITE_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_QDPJIT_GAUGE_ORDER, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_13, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_9, QUDA_RECONSTRUCT_NO, QUDA_STAGGERED_PHASE_MILC, QUDA_STAGGERED_PHASE_NO, QUDA_STAGGERED_PHASE_TIFR, QUDA_TIFR_GAUGE_ORDER, QUDA_TIFR_PADDED_GAUGE_ORDER, quda::GaugeField::Reconstruct(), and quda::GaugeField::StaggeredPhase().

Here is the call graph for this function:

◆ copyGauge() [2/5]

template<typename FloatOut , typename FloatIn , int length, typename Arg >

void quda::copyGauge ( Arg & arg )

Generic CPU gauge reordering and packing

Definition at line 32 of file copy_gauge.cuh.

References in, length, nColor, quda::gauge::Ncolor(), out, and parity.

Referenced by copyGauge().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ copyGauge() [3/5]

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >

void quda::copyGauge	(	OutOrder &&	outOrder,
		const InOrder &	inOrder,
		const GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		int	type
	)

Definition at line 135 of file copy_gauge_helper.cuh.

References quda::CopyGauge< FloatOut, FloatIn, length, Arg >::apply(), arg(), errorQuda, quda::GaugeField::Geometry(), QUDA_COARSE_GEOMETRY, QUDA_CPU_FIELD_LOCATION, QUDA_VECTOR_GEOMETRY, quda::CopyGauge< FloatOut, FloatIn, length, Arg >::set_ghost(), and warningQuda.

Here is the call graph for this function:

◆ copyGauge() [4/5]

template<typename FloatOut , typename FloatIn , int length>

void quda::copyGauge	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		FloatOut *	Out,
		FloatIn *	In,
		FloatOut **	outGhost,
		FloatIn **	inGhost,
		int	type
	)

Definition at line 144 of file copy_gauge_inc.cu.

References checkMomOrder(), errorQuda, in, quda::GaugeField::isNative(), quda::GaugeField::Order(), out, QUDA_BQCD_GAUGE_ORDER, QUDA_CPS_WILSON_GAUGE_ORDER, QUDA_MILC_GAUGE_ORDER, QUDA_MILC_SITE_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_QDPJIT_GAUGE_ORDER, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_13, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_9, QUDA_RECONSTRUCT_NO, QUDA_STAGGERED_PHASE_MILC, QUDA_STAGGERED_PHASE_NO, QUDA_TIFR_GAUGE_ORDER, QUDA_TIFR_PADDED_GAUGE_ORDER, quda::GaugeField::Reconstruct(), and quda::GaugeField::StaggeredPhase().

Here is the call graph for this function:

◆ copyGauge() [5/5]

template<typename FloatOut , typename FloatIn >

void quda::copyGauge	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		FloatOut *	Out,
		FloatIn *	In,
		FloatOut **	outGhost,
		FloatIn **	inGhost,
		int	type
	)

Definition at line 284 of file copy_gauge_inc.cu.

References arg(), checkMomOrder(), copyGauge(), copyGenericGauge(), errorQuda, quda::GaugeField::Geometry(), in, quda::GaugeField::LinkType(), quda::GaugeField::Ncolor(), quda::GaugeField::Order(), out, quda::LatticeField::Precision(), QUDA_ASQTAD_MOM_LINKS, QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_GAUGE_ORDER, QUDA_HALF_PRECISION, QUDA_MILC_GAUGE_ORDER, QUDA_MILC_SITE_GAUGE_ORDER, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, QUDA_TIFR_GAUGE_ORDER, QUDA_TIFR_PADDED_GAUGE_ORDER, and QUDA_VECTOR_GEOMETRY.

Here is the call graph for this function:

◆ copyGaugeEx() [1/6]

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder , bool regularToextended>

__device__ __host__ void quda::copyGaugeEx	(	CopyGaugeExArg< OutOrder, InOrder > &	arg,
		int	X,
		int	parity
	)

Copy a regular/extended gauge field into an extended/regular gauge field

Definition at line 50 of file copy_gauge_extended.cu.

References quda::CopyGaugeExArg< OutOrder, InOrder >::geometry, quda::CopyGaugeExArg< OutOrder, InOrder >::in, in, length, nColor, quda::gauge::Ncolor(), quda::CopyGaugeExArg< OutOrder, InOrder >::out, out, parity, R, X, quda::CopyGaugeExArg< OutOrder, InOrder >::Xin, and quda::CopyGaugeExArg< OutOrder, InOrder >::Xout.

Referenced by copyExtendedGauge().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ copyGaugeEx() [2/6]

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder , bool regularToextended>

void quda::copyGaugeEx ( CopyGaugeExArg< OutOrder, InOrder > arg )

Definition at line 93 of file copy_gauge_extended.cu.

References arg(), parity, quda::CopyGaugeExArg< OutOrder, InOrder >::volume, and X.

Here is the call graph for this function:

◆ copyGaugeEx() [3/6]

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >

void quda::copyGaugeEx	(	OutOrder	outOrder,
		const InOrder	inOrder,
		const int *	E,
		const int *	X,
		const int *	faceVolumeCB,
		const GaugeField &	meta,
		QudaFieldLocation	location
	)

Definition at line 158 of file copy_gauge_extended.cu.

References arg(), checkCudaError, quda::GaugeField::Geometry(), quda::LatticeField::Ndim(), and QUDA_CUDA_FIELD_LOCATION.

Here is the call graph for this function:

◆ copyGaugeEx() [4/6]

template<typename FloatOut , typename FloatIn , int length, typename InOrder >

void quda::copyGaugeEx	(	const InOrder &	inOrder,
		const int *	X,
		GaugeField &	out,
		QudaFieldLocation	location,
		FloatOut *	Out
	)

Definition at line 169 of file copy_gauge_extended.cu.

References errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Nface(), quda::GaugeField::Order(), out, QUDA_MAX_DIM, QUDA_MILC_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_13, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_9, QUDA_RECONSTRUCT_NO, QUDA_TIFR_GAUGE_ORDER, quda::GaugeField::Reconstruct(), quda::LatticeField::SurfaceCB(), X, and quda::LatticeField::X().

Here is the call graph for this function:

◆ copyGaugeEx() [5/6]

template<typename FloatOut , typename FloatIn , int length>

void quda::copyGaugeEx	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		FloatOut *	Out,
		FloatIn *	In
	)

Definition at line 250 of file copy_gauge_extended.cu.

References errorQuda, in, quda::GaugeField::isNative(), quda::GaugeField::Order(), out, QUDA_MILC_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_13, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_9, QUDA_RECONSTRUCT_NO, QUDA_TIFR_GAUGE_ORDER, quda::GaugeField::Reconstruct(), and quda::LatticeField::X().

Here is the call graph for this function:

◆ copyGaugeEx() [6/6]

template<typename FloatOut , typename FloatIn >

void quda::copyGaugeEx	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		FloatOut *	Out,
		FloatIn *	In
	)

Definition at line 324 of file copy_gauge_extended.cu.

References errorQuda, quda::GaugeField::Geometry(), in, quda::GaugeField::LinkType(), quda::GaugeField::Ncolor(), out, and QUDA_ASQTAD_MOM_LINKS.

Here is the call graph for this function:

◆ copyGaugeExKernel()

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder , bool regularToextended>

__global__ void quda::copyGaugeExKernel ( CopyGaugeExArg< OutOrder, InOrder > arg )

Definition at line 102 of file copy_gauge_extended.cu.

References arg(), parity, quda::CopyGaugeExArg< OutOrder, InOrder >::volume, and X.

Here is the call graph for this function:

◆ copyGaugeKernel()

template<typename FloatOut , typename FloatIn , int length, typename Arg >

__global__ void quda::copyGaugeKernel ( Arg arg )

Generic CUDA gauge reordering and packing. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 96 of file copy_gauge.cuh.

References in, length, nColor, quda::gauge::Ncolor(), out, and parity.

Here is the call graph for this function:

◆ copyGaugeMG() [1/3]

template<typename sFloatOut , typename sFloatIn , int Nc, typename InOrder >

void quda::copyGaugeMG	(	const InOrder &	inOrder,
		GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		sFloatOut *	Out,
		sFloatOut **	outGhost,
		int	type
	)

Definition at line 10 of file copy_gauge_mg.cu.

References quda::GaugeField::abs_max(), errorQuda, in, quda::GaugeField::isNative(), length, quda::GaugeField::Order(), out, quda::LatticeField::Precision(), QUDA_HALF_PRECISION, QUDA_MILC_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_RECONSTRUCT_NO, quda::GaugeField::Reconstruct(), and quda::LatticeField::Scale().

Referenced by copyGenericGaugeMG().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ copyGaugeMG() [2/3]

template<typename sFloatOut , typename sFloatIn , int Nc>

void quda::copyGaugeMG	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		sFloatOut *	Out,
		sFloatIn *	In,
		sFloatOut **	outGhost,
		sFloatIn **	inGhost,
		int	type
	)

Definition at line 74 of file copy_gauge_mg.cu.

References errorQuda, in, quda::GaugeField::isNative(), length, quda::GaugeField::Order(), out, QUDA_MILC_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_RECONSTRUCT_NO, and quda::GaugeField::Reconstruct().

Here is the call graph for this function:

◆ copyGaugeMG() [3/3]

template<typename FloatOut , typename FloatIn >

void quda::copyGaugeMG	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		FloatOut *	Out,
		FloatIn *	In,
		FloatOut **	outGhost,
		FloatIn **	inGhost,
		int	type
	)

Definition at line 126 of file copy_gauge_mg.cu.

References errorQuda, in, quda::GaugeField::Ncolor(), and out.

Here is the call graph for this function:

◆ copyGenericClover()

void quda::copyGenericClover	(	CloverField &	out,
		const CloverField &	in,
		bool	inverse,
		QudaFieldLocation	location,
		void *	Out = `0`,
		void *	In = `0`,
		void *	outNorm = `0`,
		void *	inNorm = `0`
	)

This generic function is used for copying the clover field where in the input and output can be in any order and location.

Parameters

out	The output field to which we are copying
in	The input field from which we are copying
inverse	Whether we are copying the inverse term or not
location	The location of where we are doing the copying (CPU or CUDA)
Out	The output buffer (optional)
In	The input buffer (optional)
outNorm	The output norm buffer (optional)
inNorm	The input norm buffer (optional)

Definition at line 175 of file copy_clover.cu.

References errorQuda, in, inverse(), quda::CloverField::Order(), out, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::cudaCloverField::copy(), quda::FullClover::FullClover(), and quda::cudaCloverField::saveCPUField().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ copyGenericColorSpinor() [1/3]

template<int Ns, int Nc, typename dstFloat , typename srcFloat >

void quda::copyGenericColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		dstFloat *	Dst,
		srcFloat *	Src
	)

Definition at line 130 of file copy_color_spinor_mg.cuh.

References quda::ColorSpinorField::Bytes(), errorQuda, quda::ColorSpinorField::FieldOrder(), quda::ColorSpinorField::Ndim(), QUDA_EVEN_ODD_SITE_ORDER, QUDA_FULL_SITE_SUBSET, QUDA_LEXICOGRAPHIC_SITE_ORDER, QUDA_ODD_EVEN_SITE_ORDER, QUDA_QDPJIT_FIELD_ORDER, quda::ColorSpinorField::SiteOrder(), quda::ColorSpinorField::SiteSubset(), quda::ColorSpinorField::V(), and quda::ColorSpinorField::Volume().

Here is the call graph for this function:

◆ CopyGenericColorSpinor() [1/2]

template<int Nc, typename dstFloat , typename srcFloat >

void quda::CopyGenericColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		dstFloat *	Dst,
		srcFloat *	Src
	)

Definition at line 184 of file copy_color_spinor_mg.cuh.

References errorQuda, and quda::ColorSpinorField::Nspin().

Here is the call graph for this function:

◆ copyGenericColorSpinor() [2/3]

template<int Ns, int Nc, typename dstFloat , typename srcFloat >

void quda::copyGenericColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		dstFloat *	Dst,
		srcFloat *	Src,
		float *	dstNorm,
		float *	srcNorm
	)

Definition at line 374 of file copy_color_spinor.cuh.

References errorQuda, quda::ColorSpinorField::FieldOrder(), quda::ColorSpinorField::Ndim(), QUDA_EVEN_ODD_SITE_ORDER, QUDA_FULL_SITE_SUBSET, QUDA_LEXICOGRAPHIC_SITE_ORDER, QUDA_ODD_EVEN_SITE_ORDER, QUDA_QDPJIT_FIELD_ORDER, quda::ColorSpinorField::SiteOrder(), quda::ColorSpinorField::SiteSubset(), and quda::ColorSpinorField::Volume().

Here is the call graph for this function:

◆ CopyGenericColorSpinor() [2/2]

template<int Nc, typename dstFloat , typename srcFloat >

void quda::CopyGenericColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		dstFloat *	Dst,
		srcFloat *	Src,
		float *	dstNorm = `0`,
		float *	srcNorm = `0`
	)

Definition at line 409 of file copy_color_spinor.cuh.

References errorQuda, and quda::ColorSpinorField::Nspin().

Here is the call graph for this function:

◆ copyGenericColorSpinor() [3/3]

void quda::copyGenericColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst = `0`,
		void *	Src = `0`,
		void *	dstNorm = `0`,
		void *	srcNorm = `0`
	)

Definition at line 40 of file copy_color_spinor.cu.

Referenced by quda::cpuColorSpinorField::copy(), quda::cudaColorSpinorField::copySpinorField(), quda::cudaColorSpinorField::loadSpinorField(), and quda::cudaColorSpinorField::saveSpinorField().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ copyGenericColorSpinorDD()

void quda::copyGenericColorSpinorDD	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_dd.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorDH()

void quda::copyGenericColorSpinorDH	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_dh.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorDQ()

void quda::copyGenericColorSpinorDQ	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_dq.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorDS()

void quda::copyGenericColorSpinorDS	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_ds.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorHD()

void quda::copyGenericColorSpinorHD	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_hd.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorHH()

void quda::copyGenericColorSpinorHH	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_hh.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorHQ()

void quda::copyGenericColorSpinorHQ	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_hq.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorHS()

void quda::copyGenericColorSpinorHS	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_hs.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGDD()

void quda::copyGenericColorSpinorMGDD	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_mg_dd.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGDS()

void quda::copyGenericColorSpinorMGDS	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_mg_ds.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGHH()

void quda::copyGenericColorSpinorMGHH	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_mg_hh.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGHQ()

void quda::copyGenericColorSpinorMGHQ	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_mg_hq.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGHS()

void quda::copyGenericColorSpinorMGHS	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_mg_hs.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGQH()

void quda::copyGenericColorSpinorMGQH	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_mg_qh.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGQQ()

void quda::copyGenericColorSpinorMGQQ	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_mg_qq.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGQS()

void quda::copyGenericColorSpinorMGQS	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_mg_qs.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGSD()

void quda::copyGenericColorSpinorMGSD	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_mg_sd.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGSH()

void quda::copyGenericColorSpinorMGSH	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_mg_sh.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGSQ()

void quda::copyGenericColorSpinorMGSQ	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_mg_sq.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGSS()

void quda::copyGenericColorSpinorMGSS	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_mg_ss.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorQD()

void quda::copyGenericColorSpinorQD	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_qd.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorQH()

void quda::copyGenericColorSpinorQH	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_qh.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorQQ()

void quda::copyGenericColorSpinorQQ	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_qq.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorQS()

void quda::copyGenericColorSpinorQS	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_qs.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorSD()

void quda::copyGenericColorSpinorSD	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_sd.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorSH()

void quda::copyGenericColorSpinorSH	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_sh.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorSQ()

void quda::copyGenericColorSpinorSQ	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_sq.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorSS()

void quda::copyGenericColorSpinorSS	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst,
		void *	Src,
		void *	a = `0`,
		void *	b = `0`
	)

Definition at line 5 of file copy_color_spinor_ss.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericGauge()

void quda::copyGenericGauge	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out = `0`,
		void *	In = `0`,
		void **	ghostOut = `0`,
		void **	ghostIn = `0`,
		int	type = `0`
	)

This function is used for extracting the gauge ghost zone from a gauge field array. Defined in copy_gauge.cu.

Parameters

out	The output field to which we are copying
in	The input field from which we are copying
location	The location of where we are doing the copying (CPU or CUDA)
Out	The output buffer (optional)
In	The input buffer (optional)
ghostOut	The output ghost buffer (optional)
ghostIn	The input ghost buffer (optional)
type	The type of copy we doing (0 body and ghost else ghost only)

Definition at line 41 of file copy_gauge.cu.

References copyGenericGaugeDoubleOut(), copyGenericGaugeHalfOut(), copyGenericGaugeMG(), copyGenericGaugeQuarterOut(), copyGenericGaugeSingleOut(), errorQuda, quda::GaugeField::Geometry(), quda::LatticeField::GhostExchange(), quda::GaugeField::Ncolor(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_GHOST_EXCHANGE_PAD, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), copyGauge(), quda::cudaGaugeField::exchangeGhost(), quda::cpuGaugeField::Gauge_p(), quda::cudaGaugeField::injectGhost(), and quda::cudaGaugeField::saveCPUField().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ copyGenericGaugeDoubleOut()

void quda::copyGenericGaugeDoubleOut	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out,
		void *	In,
		void **	ghostOut,
		void **	ghostIn,
		int	type
	)

Definition at line 5 of file copy_gauge_double.cu.

References in, and out.

Referenced by copyGenericGauge().

Here is the caller graph for this function:

◆ copyGenericGaugeHalfOut()

void quda::copyGenericGaugeHalfOut	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out,
		void *	In,
		void **	ghostOut,
		void **	ghostIn,
		int	type
	)

Definition at line 5 of file copy_gauge_half.cu.

References errorQuda, in, and out.

Referenced by copyGenericGauge().

Here is the caller graph for this function:

◆ copyGenericGaugeMG()

void quda::copyGenericGaugeMG	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out,
		void *	In,
		void **	ghostOut,
		void **	ghostIn,
		int	type
	)

Definition at line 146 of file copy_gauge_mg.cu.

References copyGaugeMG(), errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by copyGenericGauge().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ copyGenericGaugeQuarterOut()

void quda::copyGenericGaugeQuarterOut	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out,
		void *	In,
		void **	ghostOut,
		void **	ghostIn,
		int	type
	)

Definition at line 6 of file copy_gauge_quarter.cu.

References errorQuda, in, and out.

Referenced by copyGenericGauge().

Here is the caller graph for this function:

◆ copyGenericGaugeSingleOut()

void quda::copyGenericGaugeSingleOut	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out,
		void *	In,
		void **	ghostOut,
		void **	ghostIn,
		int	type
	)

Definition at line 5 of file copy_gauge_single.cu.

References errorQuda, in, and out.

Referenced by copyGenericGauge().

Here is the caller graph for this function:

◆ copyGhost()

template<typename FloatOut , typename FloatIn , int length, typename Arg >

void quda::copyGhost ( Arg & arg )

Generic CPU gauge ghost reordering and packing

Definition at line 126 of file copy_gauge.cuh.

References in, length, nColor, quda::gauge::Ncolor(), out, and parity.

Here is the call graph for this function:

◆ copyGhostKernel()

template<typename FloatOut , typename FloatIn , int length, typename Arg >

__global__ void quda::copyGhostKernel ( Arg arg )

Generic CUDA kernel for copying the ghost zone. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 157 of file copy_gauge.cuh.

References in, length, nColor, quda::gauge::Ncolor(), out, and parity.

Here is the call graph for this function:

◆ copyInterior() [1/2]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>

__device__ __host__ void quda::copyInterior	(	CopySpinorExArg< OutOrder, InOrder, Basis > &	arg,
		int	X
	)

Definition at line 166 of file extended_color_spinor_utilities.cu.

References quda::CopySpinorExArg< OutOrder, InOrder, Basis >::basis, quda::CopySpinorExArg< OutOrder, InOrder, Basis >::E, in, quda::CopySpinorExArg< OutOrder, InOrder, Basis >::in, out, quda::CopySpinorExArg< OutOrder, InOrder, Basis >::out, parity, quda::CopySpinorExArg< OutOrder, InOrder, Basis >::parity, R, and quda::CopySpinorExArg< OutOrder, InOrder, Basis >::X.

◆ copyInterior() [2/2]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>

void quda::copyInterior ( CopySpinorExArg< OutOrder, InOrder, Basis > & arg )

Definition at line 217 of file extended_color_spinor_utilities.cu.

References arg(), and quda::CopySpinorExArg< OutOrder, InOrder, Basis >::length.

Here is the call graph for this function:

◆ copyInteriorKernel()

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>

__global__ void quda::copyInteriorKernel ( CopySpinorExArg< OutOrder, InOrder, Basis > arg )

Definition at line 203 of file extended_color_spinor_utilities.cu.

References arg(), and quda::CopySpinorExArg< OutOrder, InOrder, Basis >::length.

Here is the call graph for this function:

◆ copyLinkToArray() [1/2]

void quda::copyLinkToArray	(	float *	array,
		const Matrix< float2, 3 > &	link
	)

inline

Definition at line 1088 of file quda_matrix.h.

Referenced by unitarizeLinksCPU().

Here is the caller graph for this function:

◆ copyLinkToArray() [2/2]

template<class Cmplx , class Real >

void quda::copyLinkToArray	(	Real *	array,
		const Matrix< Cmplx, 3 > &	link
	)

inline

Definition at line 1102 of file quda_matrix.h.

◆ copyMom()

template<typename FloatOut , typename FloatIn , int length, typename Out , typename In , typename Arg >

void quda::copyMom	(	Arg &	arg,
		const GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location
	)

Definition at line 278 of file copy_gauge_inc.cu.

References quda::CopyGauge< FloatOut, FloatIn, length, Arg >::apply().

Here is the call graph for this function:

◆ copySpinorEx() [1/2]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >

void quda::copySpinorEx	(	OutOrder	outOrder,
		const InOrder	inOrder,
		const Basis	basis,
		const int *	E,
		const int *	X,
		const int	parity,
		const bool	extend,
		const ColorSpinorField &	meta,
		QudaFieldLocation	location
	)

Definition at line 271 of file extended_color_spinor_utilities.cu.

References quda::CopySpinorEx< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis, extend >::apply(), arg(), checkCudaError, and QUDA_CUDA_FIELD_LOCATION.

Here is the call graph for this function:

◆ copySpinorEx() [2/2]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >

void quda::copySpinorEx	(	OutOrder	outOrder,
		InOrder	inOrder,
		const QudaGammaBasis	outBasis,
		const QudaGammaBasis	inBasis,
		const int *	E,
		const int *	X,
		const int	parity,
		const bool	extend,
		const ColorSpinorField &	meta,
		QudaFieldLocation	location
	)

Definition at line 286 of file extended_color_spinor_utilities.cu.

References E, errorQuda, parity, QUDA_DEGRAND_ROSSI_GAMMA_BASIS, QUDA_UKQCD_GAMMA_BASIS, and X.

◆ corner()

template<class T >

void quda::corner	(	T &	p,
		int	v,
		int	s,
		int	c
	)

Create a corner source with value "v" on color "c" on a single corner overloaded into "s". "s" is encoded via a bitmap: 1010 -> x = 0, y = 1, z = 0, t = 1 corner, for ex.

Definition at line 82 of file color_spinor_util.cu.

References errorQuda, getCoords(), parity, and X.

Referenced by genericSource().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ cos() [1/3]

template<typename ValueType >

__host__ __device__ ValueType quda::cos ( ValueType x )

inline

Definition at line 46 of file complex_quda.h.

References cos().

Referenced by cos(), quda::Trig< isFixed, T >::Cos(), cosh(), exponentiate_iQ(), genGauss(), link_sanity_check_internal_8(), new_load_half(), polar(), setUnitarizeLinksConstants(), sin(), sinh(), su3Reconstruct8(), and tan().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ cos() [2/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::cos ( const complex< ValueType > & z )

inline

Definition at line 1117 of file complex_quda.h.

References cos(), cosh(), sin(), and sinh().

Here is the call graph for this function:

◆ cos() [3/3]

template<>

__host__ __device__ complex<float> quda::cos ( const complex< float > & z )

inline

Definition at line 1125 of file complex_quda.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Referenced by cos().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ cosh() [1/3]

template<typename ValueType >

__host__ __device__ ValueType quda::cosh ( ValueType x )

inline

Definition at line 81 of file complex_quda.h.

References cosh().

Referenced by cos(), cosh(), sin(), and sinh().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ cosh() [2/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::cosh ( const complex< ValueType > & z )

inline

Definition at line 1133 of file complex_quda.h.

References cos(), cosh(), sin(), and sinh().

Here is the call graph for this function:

◆ cosh() [3/3]

template<>

__host__ __device__ complex<float> quda::cosh ( const complex< float > & z )

inline

Definition at line 1141 of file complex_quda.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Referenced by cosh().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ covDev()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >

__device__ __host__ void quda::covDev	(	Arg &	arg,
		int	idx,
		int	parity
	)

inline

Definition at line 119 of file covDev.cuh.

References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::CovDevArg< Float, nColor, reconstruct_ >::out, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ covDevGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__global__ void quda::covDevGPU ( Arg arg )

Definition at line 182 of file covDev.cuh.

References arg(), and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ create_gauge_buffer()

void * quda::create_gauge_buffer	(	size_t	bytes,
		QudaGaugeFieldOrder	order,
		QudaFieldGeometry	geometry
	)

Definition at line 591 of file cuda_gauge_field.cpp.

References quda::GaugeField::geometry, pool_device_malloc, and QUDA_QDP_GAUGE_ORDER.

Referenced by quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), quda::cpuGaugeField::exchangeExtendedGhost(), and quda::cudaGaugeField::saveCPUField().

Here is the caller graph for this function:

◆ create_ghost_buffer()

void ** quda::create_ghost_buffer	(	size_t	bytes[],
		QudaGaugeFieldOrder	order,
		QudaFieldGeometry	geometry
	)

Definition at line 602 of file cuda_gauge_field.cpp.

References quda::GaugeField::geometry, and pool_device_malloc.

Referenced by quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), quda::cpuGaugeField::exchangeExtendedGhost(), and quda::cudaGaugeField::saveCPUField().

Here is the caller graph for this function:

◆ createDirac() [1/2]

void quda::createDirac	(	Dirac *&	d,
		Dirac *&	dSloppy,
		Dirac *&	dPre,
		QudaInvertParam &	param,
		const bool	pc_solve
	)

Definition at line 1730 of file interface_quda.cpp.

References quda::Dirac::create(), QudaInvertParam_s::inv_type, QUDA_INC_EIGCG_INVERTER, setDiracParam(), setDiracPreParam(), and setDiracSloppyParam().

Referenced by eigensolveQuda(), invertMultiShiftQuda(), invertMultiSrcQuda(), and invertQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ createDirac() [2/2]

void quda::createDirac	(	Dirac *&	d,
		Dirac *&	dSloppy,
		Dirac *&	dPre,
		Dirac *&	dRef,
		QudaInvertParam &	param,
		const bool	pc_solve
	)

Definition at line 1747 of file interface_quda.cpp.

References quda::Dirac::create(), QudaInvertParam_s::inv_type, QUDA_INC_EIGCG_INVERTER, setDiracParam(), setDiracPreParam(), setDiracRefineParam(), and setDiracSloppyParam().

Here is the call graph for this function:

◆ createDslashEvents()

void quda::createDslashEvents ( )

◆ d2i()

__device__ __host__ int quda::d2i ( double d )

inline

Definition at line 104 of file convert.h.

Referenced by convert< short2, double4 >(), convert< short4, double2 >(), and copyFloatN().

Here is the caller graph for this function:

◆ deserializeTuneCache()

static void quda::deserializeTuneCache ( std::istream & in )

static

Deserialize tunecache from an istream, useful for reading a file or receiving from other nodes.

Definition at line 134 of file tune.cpp.

References quda::TuneKey::aux, quda::TuneParam::aux, quda::TuneKey::aux_n, quda::TuneParam::block, quda::TuneParam::comment, errorQuda, quda::TuneParam::grid, quda::TraceKey::key, quda::TuneKey::name, quda::TuneKey::name_n, param, quda::TuneParam::shared_bytes, quda::TuneParam::time, quda::TuneKey::volume, and quda::TuneKey::volume_n.

Referenced by broadcastTuneCache(), and loadTuneCache().

Here is the caller graph for this function:

◆ destroyDslashEvents()

void quda::destroyDslashEvents ( )

Definition at line 144 of file dslash_quda.cu.

References checkCudaError, quda::dslash::dslashStart, quda::dslash::gatherEnd, quda::dslash::gatherStart, host_free, Nstream, quda::dslash::packEnd, quda::dslash::scatterEnd, and quda::dslash::scatterStart.

Referenced by endQuda().

Here is the caller graph for this function:

◆ device_allocated_peak()

long quda::device_allocated_peak ( )

Returns: peak device memory allocated

Definition at line 59 of file malloc.cpp.

References DEVICE.

◆ device_free_()

void quda::device_free_	(	const char *	func,
		const char *	file,
		int	line,
		void *	ptr
	)

Free device memory allocated with device_malloc(). This function should only be called via the device_free() macro, defined in malloc_quda.h

Definition at line 301 of file malloc.cpp.

References count, DEVICE, device_pinned_free_(), errorQuda, and track_free().

Referenced by quda::pool::device_free_(), quda::pool::device_malloc_(), and device_pinned_free_().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ device_malloc_()

void * quda::device_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Perform a standard cudaMalloc() with error-checking. This function should only be called via the device_malloc() macro, defined in malloc_quda.h

Definition at line 169 of file malloc.cpp.

References quda::MemAlloc::base_size, DEVICE, device_pinned_malloc_(), errorQuda, quda::MemAlloc::size, and track_malloc().

Referenced by quda::pool::device_malloc_(), and device_pinned_malloc_().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ device_pinned_free_()

void quda::device_pinned_free_	(	const char *	func,
		const char *	file,
		int	line,
		void *	ptr
	)

Free device memory allocated with device_pinned malloc(). This function should only be called via the device_pinned_free() macro, defined in malloc_quda.h

Definition at line 322 of file malloc.cpp.

References comm_peer2peer_present(), count, device_free_(), DEVICE_PINNED, errorQuda, printfQuda, and track_free().

Referenced by device_free_().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ device_pinned_malloc_()

void * quda::device_pinned_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Perform a cuMemAlloc with error-checking. This function is to guarantee a unique memory allocation on the device, since cudaMalloc can be redirected (as is the case with QDPJIT). This should only be called via the device_pinned_malloc() macro, defined in malloc_quda.h.

Definition at line 200 of file malloc.cpp.

References quda::MemAlloc::base_size, comm_peer2peer_present(), device_malloc_(), DEVICE_PINNED, errorQuda, quda::MemAlloc::size, and track_malloc().

Referenced by device_malloc_().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ dimFromFaceIndex() [1/2]

template<int nDim = 4, typename Arg >

__host__ __device__ int quda::dimFromFaceIndex	(	int &	face_idx,
		int	tid,
		const Arg &	arg
	)

inline

Determines which face a given thread is computing. Also rescale face_idx so that is relative to a given dimension. If 5-d variant if called, then it is assumed that arg.threads contains only the 3-d surface of threads but face_idx is a 4-d index (surface * fifth dimension). At present multi-src staggered uses the 4-d variant since the face_idx that is passed in is the 3-d surface not the 4-d one.

Parameters

[out]	face_idx	Face index
[in]	tid	Checkerboard volume index
[in]	arg	Input parameters

Returns: dimension this face_idx corresponds to

Definition at line 783 of file index_helper.cuh.

References s.

Referenced by packKernel(), and packStaggeredKernel().

Here is the caller graph for this function:

◆ dimFromFaceIndex() [2/2]

template<int nDim = 4, typename Arg >

__host__ __device__ int quda::dimFromFaceIndex	(	int &	face_idx,
		const Arg &	arg
	)

inline

Definition at line 809 of file index_helper.cuh.

References arg().

Here is the call graph for this function:

◆ disable_policy()

void quda::disable_policy ( DslashCoarsePolicy p )

Definition at line 606 of file dslash_coarse.cu.

References DSLASH_COARSE_POLICY_DISABLED, and policies().

Here is the call graph for this function:

◆ disableProfileCount()

void quda::disableProfileCount ( )

Disable the profile kernel counting.

Definition at line 125 of file tune.cpp.

Referenced by quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), quda::TunableVectorYZ::resizeStep(), and quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune().

Here is the caller graph for this function:

◆ doBulk() [1/2]

template<KernelType type>

__host__ __device__ bool quda::doBulk ( )

inline

Helper function to determine if we should do interior computation.

Parameters

[in] dim Dimension we are working on

Definition at line 35 of file dslash_helper.cuh.

References EXTERIOR_KERNEL_ALL, EXTERIOR_KERNEL_T, EXTERIOR_KERNEL_X, EXTERIOR_KERNEL_Y, EXTERIOR_KERNEL_Z, and INTERIOR_KERNEL.

◆ doBulk() [2/2]

template<DslashType type>

static __host__ __device__ bool quda::doBulk ( )

static

Helper function to determine if should interior computation.

Definition at line 72 of file dslash_coarse.cuh.

References DSLASH_FULL, DSLASH_INTERIOR, and s.

◆ doHalo() [1/2]

template<KernelType type>

__host__ __device__ bool quda::doHalo ( int dim = -1 )

inline

Helper function to determine if we should do halo computation.

Parameters

[in] dim Dimension we are working on. If dim=-1 (default argument) then we return true if type is any halo kernel.

Definition at line 17 of file dslash_helper.cuh.

References EXTERIOR_KERNEL_ALL, EXTERIOR_KERNEL_T, EXTERIOR_KERNEL_X, EXTERIOR_KERNEL_Y, EXTERIOR_KERNEL_Z, and INTERIOR_KERNEL.

◆ doHalo() [2/2]

template<DslashType type>

static __host__ __device__ bool quda::doHalo ( )

static

Helper function to determine if should halo computation.

Definition at line 58 of file dslash_coarse.cuh.

References DSLASH_EXTERIOR, and DSLASH_FULL.

◆ domainWall4D()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__device__ __host__ void quda::domainWall4D	(	Arg &	arg,
		int	idx,
		int	s,
		int	parity
	)

inline

Definition at line 44 of file dslash_domain_wall_4d.cuh.

References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, s, quda::WilsonArg< Float, nColor, reconstruct_ >::x, and quda::DslashArg< Float >::xpay.

Here is the call graph for this function:

◆ domainWall4DCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

void quda::domainWall4DCPU ( Arg & arg )

Definition at line 74 of file dslash_domain_wall_4d.cuh.

References arg(), quda::DslashArg< Float >::nParity, quda::DslashArg< Float >::parity, and s.

Here is the call graph for this function:

◆ domainWall4DGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__global__ void quda::domainWall4DGPU ( Arg arg )

Definition at line 90 of file dslash_domain_wall_4d.cuh.

References arg(), quda::DslashArg< Float >::nParity, quda::DslashArg< Float >::parity, and s.

Here is the call graph for this function:

◆ domainWall5D()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__device__ __host__ void quda::domainWall5D	(	Arg &	arg,
		int	idx,
		int	parity
	)

inline

Definition at line 28 of file dslash_domain_wall_5d.cuh.

References arg(), quda::DslashArg< Float >::dagger, EXTERIOR_KERNEL_ALL, quda::WilsonArg< Float, nColor, reconstruct_ >::in, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, quda::WilsonArg< Float, nColor, reconstruct_ >::reconstruct, s, quda::WilsonArg< Float, nColor, reconstruct_ >::x, and quda::DslashArg< Float >::xpay.

Here is the call graph for this function:

◆ domainWall5DCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

void quda::domainWall5DCPU ( Arg & arg )

Definition at line 86 of file dslash_domain_wall_5d.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ domainWall5DGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__global__ void quda::domainWall5DGPU ( Arg arg )

Definition at line 100 of file dslash_domain_wall_5d.cuh.

References arg(), quda::DslashArg< Float >::nParity, quda::DslashArg< Float >::parity, and s.

Here is the call graph for this function:

◆ dslash5()

template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, typename Arg >

__device__ __host__ void quda::dslash5	(	Arg &	arg,
		int	parity,
		int	x_cb,
		int	s
	)

inline

Apply the D5 operator at given site.

Parameters

[in]	arg	Argument struct containing any meta data and accessors
[in]	parity	Parity we are on
[in]	x_b	Checkerboarded 4-d space-time index
[in]	s	Ls dimension coordinate

Definition at line 191 of file dslash_domain_wall_m5.cuh.

References quda::coeff_type< real, is_variable, Arg >::a(), quda::coeff_type< real, is_variable, Arg >::b(), quda::coeff_type< real, is_variable, Arg >::c(), dagger, DSLASH5_DWF, DSLASH5_MOBIUS, DSLASH5_MOBIUS_PRE, in, out, and quda::blas::xpay().

Here is the call graph for this function:

◆ dslash5CPU()

template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, typename Arg >

void quda::dslash5CPU ( Arg & arg )

CPU kernel for applying the D5 operator.

Parameters

[in] arg Argument struct containing any meta data and accessors

Definition at line 250 of file dslash_domain_wall_m5.cuh.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, and s.

Here is the call graph for this function:

◆ dslash5GPU()

template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, typename Arg >

__global__ void quda::dslash5GPU ( Arg arg )

GPU kernel for applying the D5 operator.

Parameters

[in] arg Argument struct containing any meta data and accessors

Definition at line 266 of file dslash_domain_wall_m5.cuh.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, and s.

Here is the call graph for this function:

◆ dslash5inv()

template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, bool shared, bool var_inverse, typename Arg >

__device__ __host__ void quda::dslash5inv	(	Arg &	arg,
		int	parity,
		int	x_cb,
		int	s
	)

inline

Apply the M5 inverse operator at a given site on the lattice.

Template Parameters

shared Whether to use a shared memory scratch pad to store the input field across the Ls dimension to minimize global memory reads.

Parameters

[in]	arg	Argument struct containing any meta data and accessors
[in]	parity	Parity we are on
[in]	x_b	Checkerboarded 4-d space-time index
[in]	s	Ls dimension coordinate

Definition at line 433 of file dslash_domain_wall_m5.cuh.

References quda::coeff_type< real, is_variable, Arg >::a(), arg(), out, parity, s, and quda::blas::xpay().

Here is the call graph for this function:

◆ dslash5invGPU()

template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, bool shared, bool var_inverse, typename Arg >

__global__ void quda::dslash5invGPU ( Arg arg )

CPU kernel for applying the M5 inverse operator.

Template Parameters

shared Whether to use a shared memory scratch pad to store the input field acroos the Ls dimension to minimize global memory reads.

Parameters

[in] arg Argument struct containing any meta data and accessors

Definition at line 463 of file dslash_domain_wall_m5.cuh.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, and s.

Here is the call graph for this function:

◆ enable_policy()

void quda::enable_policy ( DslashCoarsePolicy p )

Definition at line 602 of file dslash_coarse.cu.

References policies().

Referenced by quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ enableProfileCount()

void quda::enableProfileCount ( )

Enable the profile kernel counting.

Definition at line 126 of file tune.cpp.

Referenced by quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), quda::TunableVectorYZ::resizeStep(), and quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune().

Here is the caller graph for this function:

◆ ErrorSU3()

template<class Cmplx >

__device__ __host__ double quda::ErrorSU3 ( const Matrix< Cmplx, 3 > & matrix )

Definition at line 1164 of file quda_matrix.h.

References conj(), and norm().

Referenced by computeOvrImpSTOUTStep(), and computeSTOUTStep().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ exchangeExtendedGhost()

void quda::exchangeExtendedGhost	(	cudaColorSpinorField *	spinor,
		int	R[],
		int	parity,
		cudaStream_t *	stream_p
	)

Definition at line 25 of file extended_color_spinor_utilities.cu.

References commDim, quda::cudaColorSpinorField::commsQuery(), quda::cudaColorSpinorField::commsStart(), dagger, quda::cudaColorSpinorField::gather(), quda::dslash::gatherEnd, quda::cudaColorSpinorField::packExtended(), qudaDeviceSynchronize, qudaEventRecord(), quda::cudaColorSpinorField::scatterExtended(), and streams.

Referenced by quda::GaugeField::Nface(), and quda::XSD::operator()().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ exp() [1/3]

template<typename ValueType >

__host__ __device__ ValueType quda::exp ( ValueType x )

inline

Definition at line 96 of file complex_quda.h.

References exp().

Referenced by constantInv(), exp(), expsu3(), pow(), smallSVD(), tanh(), and test().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ exp() [2/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::exp ( const complex< ValueType > & z )

inline

Definition at line 1150 of file complex_quda.h.

References exp(), and polar().

Here is the call graph for this function:

◆ exp() [3/3]

template<>

__host__ __device__ complex<float> quda::exp ( const complex< float > & z )

inline

Definition at line 1156 of file complex_quda.h.

References quda::complex< float >::imag(), polar(), and quda::complex< float >::real().

Referenced by exp().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ exponentiate_iQ()

template<class T >

__device__ __host__ void quda::exponentiate_iQ	(	const Matrix< T, 3 > &	Q,
		Matrix< T, 3 > *	exp_iQ
	)

inline

Definition at line 1191 of file quda_matrix.h.

References acos(), cos(), getDeterminant(), getTrace(), parity, pow(), setIdentity(), setZero(), sin(), and sqrt().

Referenced by computeOvrImpSTOUTStep(), and computeSTOUTStep().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ expsu3()

template<typename Float >

__device__ __host__ void quda::expsu3 ( Matrix< complex< Float >, 3 > & q )

Direct port of the TIFR expsu3 algorithm

Definition at line 1325 of file quda_matrix.h.

References conj(), exp(), log(), and sqrt().

Here is the call graph for this function:

◆ extendedCopyColorSpinor() [1/2]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >

void quda::extendedCopyColorSpinor	(	InOrder &	inOrder,
		ColorSpinorField &	out,
		QudaGammaBasis	inBasis,
		const int *	E,
		const int *	X,
		const int	parity,
		const bool	extend,
		QudaFieldLocation	location,
		FloatOut *	Out,
		float *	outNorm
	)

Definition at line 313 of file extended_color_spinor_utilities.cu.

References E, errorQuda, quda::ColorSpinorField::GammaBasis(), quda::ColorSpinorField::isNative(), out, parity, and X.

Here is the call graph for this function:

◆ extendedCopyColorSpinor() [2/2]

template<typename FloatOut , typename FloatIn , int Ns, int Nc>

void quda::extendedCopyColorSpinor	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const int	parity,
		const QudaFieldLocation	location,
		FloatOut *	Out,
		FloatIn *	In,
		float *	outNorm,
		float *	inNorm
	)

Definition at line 329 of file extended_color_spinor_utilities.cu.

References E, errorQuda, quda::ColorSpinorField::GammaBasis(), quda::ColorSpinorField::isNative(), out, parity, quda::ColorSpinorField::Volume(), X, and quda::ColorSpinorField::X().

Here is the call graph for this function:

◆ extractExtendedGaugeGhost()

void quda::extractExtendedGaugeGhost	(	const GaugeField &	u,
		int	dim,
		const int *	R,
		void **	ghost,
		bool	extract
	)

This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.

Parameters

u	The gauge field from which we want to extract/pack the ghost zone
dim	The dimension in which we are packing/unpacking
ghost	The array where we want to pack/unpack the ghost zone into/from
extract	Whether we are extracting into ghost or injecting from ghost

Definition at line 418 of file extract_gauge_ghost_extended.cu.

References errorQuda, extractGhostEx(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::cudaGaugeField::exchangeExtendedGhost(), quda::cpuGaugeField::exchangeExtendedGhost(), and quda::cpuGaugeField::Gauge_p().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ extractGaugeGhost()

void quda::extractGaugeGhost	(	const GaugeField &	u,
		void **	ghost,
		bool	extract = `true`,
		int	offset = `0`
	)

This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.

Parameters

u	The gauge field from which we want to extract the ghost zone
ghost	The array where we want to pack the ghost zone into
extract	Where we are extracting into ghost or injecting from ghost
offset	By default we exchange the nDim site-vector of links in the first nDim dimensions; offset allows us to instead exchange the links in nDim+offset dimensions. This is used to faciliate sending bi-directional links which is needed for the coarse links.

Definition at line 105 of file extract_gauge_ghost.cu.

References errorQuda, extractGaugeGhostMG(), extractGhost(), quda::GaugeField::Ncolor(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::cudaGaugeField::exchangeGhost(), quda::cpuGaugeField::exchangeGhost(), quda::cpuGaugeField::Gauge_p(), quda::cudaGaugeField::injectGhost(), and quda::cpuGaugeField::injectGhost().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ extractGaugeGhostMG()

void quda::extractGaugeGhostMG	(	const GaugeField &	u,
		void **	ghost,
		bool	extract,
		int	offset
	)

Definition at line 75 of file extract_gauge_ghost_mg.cu.

References errorQuda, extractGhostMG(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by extractGaugeGhost(), and extractGhost().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ extractGhost() [1/3]

template<typename Float >

void quda::extractGhost	(	const GaugeField &	u,
		Float **	Ghost,
		bool	extract,
		int	offset
	)

This is the template driver for extractGhost

Definition at line 10 of file extract_gauge_ghost.cu.

References errorQuda, extractGaugeGhostMG(), quda::GaugeField::isNative(), length, quda::GaugeField::Order(), QUDA_BQCD_GAUGE_ORDER, QUDA_CPS_WILSON_GAUGE_ORDER, QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, QUDA_MILC_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_QDPJIT_GAUGE_ORDER, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_13, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_9, QUDA_RECONSTRUCT_NO, QUDA_STAGGERED_PHASE_MILC, QUDA_STAGGERED_PHASE_NO, QUDA_TIFR_GAUGE_ORDER, QUDA_TIFR_PADDED_GAUGE_ORDER, quda::GaugeField::Reconstruct(), and quda::GaugeField::StaggeredPhase().

Referenced by extractGaugeGhost().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ extractGhost() [2/3]

template<int nDim, bool extract, typename Arg >

void quda::extractGhost ( Arg & arg )

Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Definition at line 46 of file extract_gauge_ghost_helper.cuh.

References nColor, and parity.

◆ extractGhost() [3/3]

template<typename Float , int length, typename Order >

void quda::extractGhost	(	Order	order,
		const GaugeField &	u,
		QudaFieldLocation	location,
		bool	extract,
		int	offset
	)

Generic gauge ghost extraction and packing (or the converse) NB This routines is specialized to four dimensions

Definition at line 236 of file extract_gauge_ghost_helper.cuh.

References quda::ExtractGhost< nDim, Arg >::apply(), arg(), commDim, extractor(), X, and quda::LatticeField::X().

Here is the call graph for this function:

◆ extractGhostEx() [1/3]

template<typename Float , int length, int nDim, int dim, typename Order , bool extract>

void quda::extractGhostEx ( ExtractGhostExArg< Order, nDim, dim > arg )

Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Definition at line 100 of file extract_gauge_ghost_extended.cu.

References quda::ExtractGhostExArg< Order, nDim, dim >::A0, quda::ExtractGhostExArg< Order, nDim, dim >::A1, arg(), quda::ExtractGhostExArg< Order, nDim, dim >::B0, quda::ExtractGhostExArg< Order, nDim, dim >::B1, quda::ExtractGhostExArg< Order, nDim, dim >::C0, quda::ExtractGhostExArg< Order, nDim, dim >::C1, quda::ExtractGhostExArg< Order, nDim, dim >::order, parity, quda::ExtractGhostExArg< Order, nDim, dim >::R, and quda::ExtractGhostExArg< Order, nDim, dim >::X.

Referenced by extractExtendedGaugeGhost().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ extractGhostEx() [2/3]

template<typename Float , int length, typename Order >

void quda::extractGhostEx	(	Order	order,
		const int	dim,
		const int *	surfaceCB,
		const int *	E,
		const int *	R,
		bool	extract,
		const GaugeField &	u,
		QudaFieldLocation	location
	)

Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Parameters

E	the extended gauge dimensions
R	array holding the radius of the extended region
extract	Whether we are extracting or injecting the ghost zone

Definition at line 258 of file extract_gauge_ghost_extended.cu.

References quda::ExtractGhostEx< Float, length, nDim, dim, Order >::apply(), arg(), checkCudaError, commDim, errorQuda, extractor(), and X.

Here is the call graph for this function:

◆ extractGhostEx() [3/3]

template<typename Float >

void quda::extractGhostEx	(	const GaugeField &	u,
		int	dim,
		const int *	R,
		Float **	Ghost,
		bool	extract
	)

This is the template driver for extractGhost

Definition at line 330 of file extract_gauge_ghost_extended.cu.

References errorQuda, quda::GaugeField::isNative(), length, quda::GaugeField::Order(), QUDA_BQCD_GAUGE_ORDER, QUDA_CPS_WILSON_GAUGE_ORDER, QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, QUDA_MILC_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_QDPJIT_GAUGE_ORDER, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_13, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_9, QUDA_RECONSTRUCT_NO, QUDA_TIFR_GAUGE_ORDER, R, quda::GaugeField::Reconstruct(), quda::LatticeField::SurfaceCB(), and quda::LatticeField::X().

Here is the call graph for this function:

◆ extractGhostExKernel()

template<typename Float , int length, int nDim, int dim, typename Order , bool extract>

__global__ void quda::extractGhostExKernel ( ExtractGhostExArg< Order, nDim, dim > arg )

Generic GPU gauge ghost extraction and packing NB This routines is specialized to four dimensions FIXME this implementation will have two-way warp divergence Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Definition at line 144 of file extract_gauge_ghost_extended.cu.

References quda::ExtractGhostExArg< Order, nDim, dim >::A0, quda::ExtractGhostExArg< Order, nDim, dim >::A1, arg(), quda::ExtractGhostExArg< Order, nDim, dim >::B0, quda::ExtractGhostExArg< Order, nDim, dim >::B1, quda::ExtractGhostExArg< Order, nDim, dim >::C0, quda::ExtractGhostExArg< Order, nDim, dim >::C1, parity, quda::ExtractGhostExArg< Order, nDim, dim >::R, quda::ExtractGhostExArg< Order, nDim, dim >::threads, quda::ExtractGhostExArg< Order, nDim, dim >::X, and X.

Here is the call graph for this function:

◆ extractGhostKernel()

template<int nDim, bool extract, typename Arg >

__global__ void quda::extractGhostKernel ( Arg arg )

Generic GPU gauge ghost extraction and packing NB This routines is specialized to four dimensions FIXME this implementation will have two-way warp divergence

Definition at line 114 of file extract_gauge_ghost_helper.cuh.

References nColor, parity, and X.

◆ extractGhostMG() [1/2]

template<typename storeFloat , int Nc>

void quda::extractGhostMG	(	const GaugeField &	u,
		storeFloat **	Ghost,
		bool	extract,
		int	offset
	)

This is the template driver for extractGhost

Definition at line 15 of file extract_gauge_ghost_mg.cu.

References errorQuda, quda::GaugeField::isNative(), length, quda::GaugeField::Order(), QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, QUDA_QDP_GAUGE_ORDER, QUDA_RECONSTRUCT_NO, and quda::GaugeField::Reconstruct().

Referenced by extractGaugeGhostMG().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ extractGhostMG() [2/2]

template<typename Float >

void quda::extractGhostMG	(	const GaugeField &	u,
		Float **	Ghost,
		bool	extract,
		int	offset
	)

This is the template driver for extractGhost

Definition at line 54 of file extract_gauge_ghost_mg.cu.

References errorQuda, quda::GaugeField::LinkType(), quda::GaugeField::Ncolor(), QUDA_COARSE_LINKS, QUDA_RECONSTRUCT_NO, and quda::GaugeField::Reconstruct().

Here is the call graph for this function:

◆ extractor()

template<typename Float , int length, int dim, typename Arg >

__device__ __host__ void quda::extractor	(	Arg &	arg,
		int	dir,
		int	a,
		int	b,
		int	c,
		int	d,
		int	g,
		int	parity
	)

Definition at line 56 of file extract_gauge_ghost_extended.cu.

References quda::Matrix< T, N >::data, length, and quda::gauge::Ncolor().

Referenced by extractGhost(), and extractGhostEx().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ f2i()

__device__ __host__ int quda::f2i ( float f )

inline

Definition at line 93 of file convert.h.

Referenced by convert< short2, float4 >(), convert< short4, float2 >(), copy(), copy_scaled(), and copyFloatN().

Here is the caller graph for this function:

◆ fatLongKSLink()

void quda::fatLongKSLink	(	cudaGaugeField *	fat,
		cudaGaugeField *	lng,
		const cudaGaugeField &	gauge,
		const double *	coeff
	)

Compute the fat and long links for an improved staggered (Kogut-Susskind) fermions.

Parameters

fat[out]	The computed fat link
lng[out]	The computed long link (only computed if lng!=0)
u[in]	The input gauge field
coeff[in]	Array of path coefficients

Definition at line 532 of file llfat_quda.cu.

References checkCudaError, computeStaple(), quda::GaugeFieldParam::create, errorQuda, gParam, MIN_COEFF, quda::LatticeFieldParam::Precision(), QUDA_NULL_FIELD_CREATE, QUDA_RECONSTRUCT_NO, qudaDeviceSynchronize, quda::GaugeFieldParam::reconstruct, quda::GaugeField::Reconstruct(), quda::GaugeFieldParam::setPrecision(), and quda::LatticeField::X().

Referenced by computeKSLinkQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ file_name()

constexpr const char* quda::file_name ( const char * str )

inline

Definition at line 50 of file malloc_quda.h.

References get_pointer_location(), r_slant(), str_end(), and str_slant().

Here is the call graph for this function:

◆ fillEigCGInnerSolverParam()

static void quda::fillEigCGInnerSolverParam	(	SolverParam &	inner,
		const SolverParam &	outer,
		bool	use_sloppy_partial_accumulator = `true`
	)

static

◆ fillFGMResDRInnerSolveParam()

void quda::fillFGMResDRInnerSolveParam	(	SolverParam &	inner,
		const SolverParam &	outer
	)

◆ fillInitCGSolverParam()

static void quda::fillInitCGSolverParam	(	SolverParam &	inner,
		const SolverParam &	outer
	)

static

Definition at line 233 of file inv_eigcg_quda.cpp.

References quda::SolverParam::delta, quda::SolverParam::gflops, quda::SolverParam::inv_type, quda::SolverParam::iter, quda::SolverParam::maxiter, quda::SolverParam::precision, quda::SolverParam::precision_precondition, quda::SolverParam::precision_sloppy, QUDA_CG_INVERTER, QUDA_USE_INIT_GUESS_YES, quda::SolverParam::secs, quda::SolverParam::tol, quda::SolverParam::tol_restart, quda::SolverParam::use_init_guess, and quda::SolverParam::use_sloppy_partial_accumulator.

Referenced by quda::IncEigCG::IncEigCG().

Here is the caller graph for this function:

◆ fillInnerSolveParam()

void quda::fillInnerSolveParam	(	SolverParam &	inner,
		const SolverParam &	outer
	)

Definition at line 25 of file inv_gcr_quda.cpp.

Referenced by quda::GCR::GCR(), and quda::BiCGstab::operator()().

Here is the caller graph for this function:

◆ fillInnerSolverParam()

static void quda::fillInnerSolverParam	(	SolverParam &	inner,
		const SolverParam &	outer
	)

static

◆ flushForceMonitor()

void quda::flushForceMonitor ( )

Flush any outstanding force monitoring information.

Definition at line 29 of file momentum.cu.

References comm_rank(), count, forceMonitor(), getVerbosity(), printfQuda, and QUDA_VERBOSE.

Referenced by endQuda(), and forceRecord().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ flushProfile()

void quda::flushProfile ( )

Flush profile contents, setting all counts to zero.

Definition at line 504 of file tune.cpp.

References quda::TuneParam::n_calls, and param.

Referenced by newDeflationQuda(), and quda::TunableVectorYZ::resizeStep().

Here is the caller graph for this function:

◆ forceMonitor()

bool quda::forceMonitor ( )

Whether we are monitoring the force or not.

Returns: Boolean whether we are monitoring the force

Definition at line 13 of file momentum.cu.

References quda::cublas::init().

Referenced by computeGaugeForceQuda(), computeMomAction(), and flushForceMonitor().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ forceRecord()

void quda::forceRecord	(	double2 &	force,
		double	dt,
		const char *	fname
	)

Definition at line 57 of file momentum.cu.

References arg(), quda::blas::bytes, comm_allreduce(), comm_allreduce_max_array(), comm_rank(), computeMomAction(), errorQuda, quda::blas::flops, flushForceMonitor(), getTuning(), getVerbosity(), LAUNCH_KERNEL_LOCAL_PARITY, quda::LatticeField::Location(), mu, quda::GaugeField::Order(), parity, QUDA_CUDA_FIELD_LOCATION, QUDA_FLOAT2_GAUGE_ORDER, QUDA_RECONSTRUCT_10, qudaDeviceSynchronize, quda::GaugeField::Reconstruct(), stream, tuneLaunch(), quda::LatticeField::VolString(), quda::LatticeField::VolumeCB(), X, and quda::LatticeField::X().

Referenced by computeMomAction().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ free_gauge_buffer()

void quda::free_gauge_buffer	(	void *	buffer,
		QudaGaugeFieldOrder	order,
		QudaFieldGeometry	geometry
	)

Definition at line 614 of file cuda_gauge_field.cpp.

References quda::GaugeField::geometry, pool_device_free, and QUDA_QDP_GAUGE_ORDER.

Referenced by quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), quda::cpuGaugeField::exchangeExtendedGhost(), and quda::cudaGaugeField::saveCPUField().

Here is the caller graph for this function:

◆ free_ghost_buffer()

void quda::free_ghost_buffer	(	void **	buffer,
		QudaGaugeFieldOrder	order,
		QudaFieldGeometry	geometry
	)

Definition at line 623 of file cuda_gauge_field.cpp.

References quda::GaugeField::geometry, and pool_device_free.

Referenced by quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), quda::cpuGaugeField::exchangeExtendedGhost(), and quda::cudaGaugeField::saveCPUField().

Here is the caller graph for this function:

◆ gamma5()

void quda::gamma5	(	ColorSpinorField &	out,
		const ColorSpinorField &	in
	)

Applies a gamma5 matrix to a spinor (wrapper to ApplyGamma)

Parameters

[out]	out	Output field
[in]	in	Input field

Definition at line 461 of file dslash_quda.cu.

References ApplyGamma().

Referenced by computeCloverForceQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ gammaCPU()

template<typename Float , int nColor, typename Arg >

void quda::gammaCPU ( Arg arg )

Definition at line 225 of file dslash_quda.cu.

References in, quda::Arg< real, Ns, Nc, order >::nParity, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

◆ gammaGPU()

template<typename Float , int nColor, int d, typename Arg >

__global__ void quda::gammaGPU ( Arg arg )

Definition at line 240 of file dslash_quda.cu.

References in, quda::Arg< real, Ns, Nc, order >::nParity, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

◆ GaugeFixHit_AtomicAdd() [1/2]

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>

__forceinline__ __device__ void quda::GaugeFixHit_AtomicAdd	(	Matrix< complex< Float >, NCOLORS > &	link,
		const Float	relax_boost,
		const int	tid
	)

Device function to perform gauge fixing with overrelxation. Uses 8 treads per lattice site, the reduction is performed by shared memory without using atomicadd. This implementation needs 8x more shared memory than the implementation using atomicadd

Definition at line 69 of file gauge_fix_ovr_hit_devf.cuh.

References atomicAdd().

Here is the call graph for this function:

◆ GaugeFixHit_AtomicAdd() [2/2]

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>

__forceinline__ __device__ void quda::GaugeFixHit_AtomicAdd	(	Matrix< complex< Float >, NCOLORS > &	link,
		Matrix< complex< Float >, NCOLORS > &	link1,
		const Float	relax_boost,
		const int	tid
	)

Device function to perform gauge fixing with overrelxation. Uses 8 treads per lattice site, the reduction is performed by shared memory without using atomicadd. This implementation needs 8x more shared memory than the implementation using atomicadd

Definition at line 392 of file gauge_fix_ovr_hit_devf.cuh.

References atomicAdd().

Here is the call graph for this function:

◆ GaugeFixHit_NoAtomicAdd() [1/2]

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>

__forceinline__ __device__ void quda::GaugeFixHit_NoAtomicAdd	(	Matrix< complex< Float >, NCOLORS > &	link,
		const Float	relax_boost,
		const int	tid
	)

Device function to perform gauge fixing with overrelxation. Uses 4 treads per lattice site, the reduction is performed by shared memory using atomicadd.

Definition at line 159 of file gauge_fix_ovr_hit_devf.cuh.

◆ GaugeFixHit_NoAtomicAdd() [2/2]

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>

__forceinline__ __device__ void quda::GaugeFixHit_NoAtomicAdd	(	Matrix< complex< Float >, NCOLORS > &	link,
		Matrix< complex< Float >, NCOLORS > &	link1,
		const Float	relax_boost,
		const int	tid
	)

Device function to perform gauge fixing with overrelxation. Uses 4 treads per lattice site, the reduction is performed by shared memory using atomicadd.

Definition at line 486 of file gauge_fix_ovr_hit_devf.cuh.

◆ GaugeFixHit_NoAtomicAdd_LessSM() [1/2]

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>

__forceinline__ __device__ void quda::GaugeFixHit_NoAtomicAdd_LessSM	(	Matrix< complex< Float >, NCOLORS > &	link,
		const Float	relax_boost,
		const int	tid
	)

Device function to perform gauge fixing with overrelxation. Uses 8 treads per lattice site, the reduction is performed by shared memory without using atomicadd. This implementation uses the same amount of shared memory as the atomicadd implementation with more thread block synchronization

Definition at line 254 of file gauge_fix_ovr_hit_devf.cuh.

◆ GaugeFixHit_NoAtomicAdd_LessSM() [2/2]

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>

__forceinline__ __device__ void quda::GaugeFixHit_NoAtomicAdd_LessSM	(	Matrix< complex< Float >, NCOLORS > &	link,
		Matrix< complex< Float >, NCOLORS > &	link1,
		const Float	relax_boost,
		const int	tid
	)

Device function to perform gauge fixing with overrelxation. Uses 4 treads per lattice site, the reduction is performed by shared memory without using atomicadd. This implementation uses the same amount of shared memory as the atomicadd implementation with more thread block synchronization

Definition at line 563 of file gauge_fix_ovr_hit_devf.cuh.

◆ gaugefixingFFT()

void quda::gaugefixingFFT	(	cudaGaugeField &	data,
		const int	gauge_dir,
		const int	Nsteps,
		const int	verbose_interval,
		const double	alpha,
		const int	autotune,
		const double	tolerance,
		const int	stopWtheta
	)

Gauge fixing with Steepest descent method with FFTs with support for single GPU only.

Parameters

[in,out]	data,quda	gauge field
[in]	gauge_dir,3	for Coulomb gauge fixing, other for Landau gauge fixing
[in]	Nsteps,maximum	number of steps to perform gauge fixing
[in]	verbose_interval,print	gauge fixing info when iteration count is a multiple of this
[in]	alpha,gauge	fixing parameter of the method, most common value is 0.08
[in]	autotune,1	to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value
[in]	tolerance,torelance	value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
[in]	stopWtheta,0	for MILC criterium and 1 to use the theta value

Definition at line 1083 of file gauge_fix_fft.cu.

References comm_dim_partitioned(), errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by computeGaugeFixingFFTQuda(), and TEST_F().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ gaugefixingOVR()

void quda::gaugefixingOVR	(	cudaGaugeField &	data,
		const int	gauge_dir,
		const int	Nsteps,
		const int	verbose_interval,
		const double	relax_boost,
		const double	tolerance,
		const int	reunit_interval,
		const int	stopWtheta
	)

Gauge fixing with overrelaxation with support for single and multi GPU.

Parameters

[in,out]	data,quda	gauge field
[in]	gauge_dir,3	for Coulomb gauge fixing, other for Landau gauge fixing
[in]	Nsteps,maximum	number of steps to perform gauge fixing
[in]	verbose_interval,print	gauge fixing info when iteration count is a multiple of this
[in]	relax_boost,gauge	fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
[in]	tolerance,torelance	value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
[in]	reunit_interval,reunitarize	gauge field when iteration count is a multiple of this
[in]	stopWtheta,0	for MILC criterium and 1 to use the theta value

Definition at line 1606 of file gauge_fix_ovr.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by computeGaugeFixingOVRQuda(), and TEST_F().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ gaugeForce()

void quda::gaugeForce	(	GaugeField &	mom,
		const GaugeField &	u,
		double	coeff,
		int ***	input_path,
		int *	length,
		double *	path_coeff,
		int	num_paths,
		int	max_length
	)

Compute the gauge-force contribution to the momentum.

Parameters

[out]	mom	Momentum field
[in]	u	Gauge field (extended when running no multiple GPUs)
[in]	coeff	Step-size coefficient
[in]	input_path	Host-array holding all path contributions for the gauge action
[in]	length	Host array holding the length of all paths
[in]	path_coeff	Coefficient of each path
[in]	num_paths	Numer of paths
[in]	max_length	Maximum length of each path

Definition at line 340 of file gauge_force.cu.

References errorQuda, length, quda::LatticeField::Location(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by computeGaugeForceQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ gaugeGauss() [1/2]

void quda::gaugeGauss	(	GaugeField &	U,
		RNG &	rngstate,
		double	epsilon
	)

Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder).

Parameters

[out]	U	The output gauge field
[in]	rngstate	random states
[in]	sigma	Width of Gaussian distrubution

Definition at line 145 of file gauge_random.cu.

References errorQuda, quda::GaugeField::exchangeExtendedGhost(), quda::GaugeField::exchangeGhost(), getVerbosity(), quda::LatticeField::GhostExchange(), quda::GaugeField::isNative(), quda::GaugeField::LinkType(), quda::GaugeField::Ncolor(), quda::GaugeField::Order(), quda::LatticeField::Precision(), printfQuda, QUDA_DOUBLE_PRECISION, QUDA_GHOST_EXCHANGE_EXTENDED, QUDA_GHOST_EXCHANGE_PAD, QUDA_MOMENTUM_LINKS, QUDA_RECONSTRUCT_10, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_13, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_9, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, QUDA_SU3_LINKS, QUDA_SUMMARIZE, quda::LatticeField::R(), quda::GaugeField::Reconstruct(), quda::GaugeGaussArg< Float, recon, group_ >::rngstate, quda::GaugeGaussArg< Float, recon, group_ >::sigma, and quda::GaugeGaussArg< Float, recon, group_ >::U.

Referenced by gaugeGauss(), gaussGaugeQuda(), and genGauss().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ gaugeGauss() [2/2]

void quda::gaugeGauss	(	GaugeField &	U,
		unsigned long long	seed,
		double	epsilon
	)

Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder).

Parameters

[out]	U	The GaugeField
[in]	seed	The seed used for the RNG
[in]	sigma	Wdith of the Gaussian distribution

Definition at line 187 of file gauge_random.cu.

References gaugeGauss(), quda::RNG::Init(), and quda::RNG::Release().

Here is the call graph for this function:

◆ gauss_su3()

template<typename real , typename Link >

__device__ __host__ Link quda::gauss_su3 ( cuRNGState & localState )

Definition at line 39 of file gauge_random.cu.

References log(), and sqrt().

Here is the call graph for this function:

◆ genericCompare()

int quda::genericCompare	(	const cpuColorSpinorField &	a,
		const cpuColorSpinorField &	b,
		int	tol
	)

Definition at line 241 of file color_spinor_util.cu.

References quda::colorspinor::FieldOrderCB< Float, nSpin, nColor, nVec, order, storeFloat, ghostFloat, disable_ghost, block_float, use_tex >::abs_max(), quda::blas::ax(), compareSpinor(), errorQuda, quda::ColorSpinorField::FieldOrder(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER, and tol.

Referenced by quda::cpuColorSpinorField::Compare().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ genericCopyColorSpinor() [1/6]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >

void quda::genericCopyColorSpinor	(	OutOrder &	outOrder,
		const InOrder &	inOrder,
		const ColorSpinorField &	out,
		QudaFieldLocation	location
	)

Definition at line 84 of file copy_color_spinor_mg.cuh.

References quda::CopySpinor< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder >::apply(), and copy().

Here is the call graph for this function:

◆ genericCopyColorSpinor() [2/6]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >

void quda::genericCopyColorSpinor	(	InOrder &	inOrder,
		ColorSpinorField &	out,
		QudaFieldLocation	location,
		FloatOut *	Out
	)

Decide on the output order

Definition at line 92 of file copy_color_spinor_mg.cuh.

References errorQuda, quda::ColorSpinorField::FieldOrder(), out, QUDA_FLOAT2_FIELD_ORDER, and QUDA_SPACE_SPIN_COLOR_FIELD_ORDER.

Here is the call graph for this function:

◆ genericCopyColorSpinor() [3/6]

template<typename FloatOut , typename FloatIn , int Ns, int Nc>

void quda::genericCopyColorSpinor	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		QudaFieldLocation	location,
		FloatOut *	Out,
		FloatIn *	In
	)

Decide on the input order

Definition at line 111 of file copy_color_spinor_mg.cuh.

References errorQuda, quda::ColorSpinorField::FieldOrder(), out, QUDA_FLOAT2_FIELD_ORDER, and QUDA_SPACE_SPIN_COLOR_FIELD_ORDER.

Here is the call graph for this function:

◆ genericCopyColorSpinor() [4/6]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename Out , typename In >

void quda::genericCopyColorSpinor	(	Out &	outOrder,
		const In &	inOrder,
		const ColorSpinorField &	out,
		const ColorSpinorField &	in,
		QudaFieldLocation	location
	)

Decide whether we are changing basis or not

Definition at line 270 of file copy_color_spinor.cuh.

References quda::CopyColorSpinor< Ns, Arg >::apply(), arg(), and copy().

Here is the call graph for this function:

◆ genericCopyColorSpinor() [5/6]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >

void quda::genericCopyColorSpinor	(	InOrder &	inOrder,
		ColorSpinorField &	out,
		const ColorSpinorField &	in,
		QudaFieldLocation	location,
		FloatOut *	Out,
		float *	outNorm
	)

Decide on the output order

Definition at line 280 of file copy_color_spinor.cuh.

References errorQuda, quda::ColorSpinorField::FieldOrder(), in, quda::ColorSpinorField::isNative(), out, QUDA_FLOAT2_FIELD_ORDER, QUDA_PADDED_SPACE_SPIN_COLOR_FIELD_ORDER, QUDA_QDPJIT_FIELD_ORDER, QUDA_SPACE_COLOR_SPIN_FIELD_ORDER, and QUDA_SPACE_SPIN_COLOR_FIELD_ORDER.

Here is the call graph for this function:

◆ genericCopyColorSpinor() [6/6]

template<typename FloatOut , typename FloatIn , int Ns, int Nc>

void quda::genericCopyColorSpinor	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		QudaFieldLocation	location,
		FloatOut *	Out,
		FloatIn *	In,
		float *	outNorm,
		float *	inNorm
	)

Decide on the input order

Definition at line 330 of file copy_color_spinor.cuh.

References errorQuda, quda::ColorSpinorField::FieldOrder(), in, quda::ColorSpinorField::isNative(), out, QUDA_FLOAT2_FIELD_ORDER, QUDA_PADDED_SPACE_SPIN_COLOR_FIELD_ORDER, QUDA_QDPJIT_FIELD_ORDER, QUDA_SPACE_COLOR_SPIN_FIELD_ORDER, and QUDA_SPACE_SPIN_COLOR_FIELD_ORDER.

Here is the call graph for this function:

◆ genericCudaPrintVector() [1/4]

template<typename StoreType , int Ns, int Nc, QudaFieldOrder FieldOrder>

void quda::genericCudaPrintVector	(	const cudaColorSpinorField &	field,
		unsigned int	i
	)

Definition at line 397 of file color_spinor_util.cu.

References quda::ColorSpinorField::Norm(), printfQuda, s, and quda::ColorSpinorField::V().

Here is the call graph for this function:

◆ genericCudaPrintVector() [2/4]

template<typename Float , int Ns, int Nc>

void quda::genericCudaPrintVector	(	const cudaColorSpinorField &	field,
		unsigned int	i
	)

Definition at line 445 of file color_spinor_util.cu.

References errorQuda, quda::ColorSpinorField::FieldOrder(), QUDA_FLOAT2_FIELD_ORDER, QUDA_FLOAT4_FIELD_ORDER, QUDA_FLOAT_FIELD_ORDER, QUDA_SPACE_COLOR_SPIN_FIELD_ORDER, and QUDA_SPACE_SPIN_COLOR_FIELD_ORDER.

Here is the call graph for this function:

◆ genericCudaPrintVector() [3/4]

template<typename Float >

void quda::genericCudaPrintVector	(	const cudaColorSpinorField &	field,
		unsigned int	i
	)

Definition at line 461 of file color_spinor_util.cu.

References errorQuda, genericCudaPrintVector(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

Here is the call graph for this function:

◆ genericCudaPrintVector() [4/4]

void quda::genericCudaPrintVector	(	const cudaColorSpinorField &	a,
		unsigned	x
	)

Referenced by genericCudaPrintVector(), and quda::cudaColorSpinorField::PrintVector().

Here is the caller graph for this function:

◆ GenericPackGhost()

template<typename Float , bool block_float, int Ns, int Ms, int Nc, int Mc, int nDim, typename Arg >

void quda::GenericPackGhost ( Arg & arg )

Definition at line 135 of file color_spinor_pack.cuh.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::PackGhostArg< Field >::parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ genericPackGhost()

void quda::genericPackGhost	(	void **	ghost,
		const ColorSpinorField &	a,
		QudaParity	parity,
		int	nFace,
		int	dagger,
		MemoryLocation *	destination = `nullptr`
	)

inline

Generic ghost packing routine.

Parameters

[out]	ghost	Array of packed ghosts with array ordering [2*dim+dir]
[in]	a	Input field that is being packed
[in]	parity	Which parity are we packing
[in]	dagger	Is for a dagger operator (presently ignored)

Definition at line 180 of file color_spinor_pack.cu.

References quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::apply(), quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::arg, errorQuda, MAX_BLOCK_FLOAT_NC, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::cudaColorSpinorField::exchangeGhost(), and quda::cpuColorSpinorField::packGhost().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ GenericPackGhostKernel()

template<typename Float , bool block_float, int Ns, int Ms, int Nc, int Mc, int nDim, int dim_threads, typename Arg >

__global__ void quda::GenericPackGhostKernel ( Arg arg )

Definition at line 165 of file color_spinor_pack.cuh.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::PackGhostArg< Field >::parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ genericPrintVector()

void quda::genericPrintVector	(	const cpuColorSpinorField &	a,
		unsigned int	x
	)

Definition at line 337 of file color_spinor_util.cu.

References errorQuda, quda::ColorSpinorField::FieldOrder(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), print_vector(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and QUDA_SPACE_SPIN_COLOR_FIELD_ORDER.

Referenced by quda::cpuColorSpinorField::PrintVector().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ genericSource()

void quda::genericSource	(	cpuColorSpinorField &	a,
		QudaSourceType	sourceType,
		int	x,
		int	s,
		int	c
	)

Definition at line 112 of file color_spinor_util.cu.

References constant(), corner(), errorQuda, quda::ColorSpinorField::FieldOrder(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), point(), quda::LatticeField::Precision(), QUDA_CONSTANT_SOURCE, QUDA_CORNER_SOURCE, QUDA_DOUBLE_PRECISION, QUDA_POINT_SOURCE, QUDA_RANDOM_SOURCE, QUDA_SINGLE_PRECISION, QUDA_SINUSOIDAL_SOURCE, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER, random(), s, and sin().

Referenced by quda::cpuColorSpinorField::Source().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ genGauss() [1/2]

template<typename real , typename Arg >

__device__ __host__ void quda::genGauss	(	Arg &	arg,
		cuRNGState &	localState,
		int	parity,
		int	x_cb,
		int	s,
		int	c
	)

inline

Definition at line 32 of file spinor_noise.cu.

References cos(), log(), sin(), sqrt(), and quda::Arg< real, Ns, Nc, order >::v.

Here is the call graph for this function:

◆ genGauss() [2/2]

template<typename Float , QudaReconstructType recon, bool group>

void quda::genGauss	(	GaugeField &	U,
		RNG &	rngstate,
		double	sigma
	)

Definition at line 138 of file gauge_random.cu.

References quda::GaugeGauss< Float, Arg >::apply(), arg(), and gaugeGauss().

Here is the call graph for this function:

◆ genUniform()

template<typename real , typename Arg >

__device__ __host__ void quda::genUniform	(	Arg &	arg,
		cuRNGState &	localState,
		int	parity,
		int	x_cb,
		int	s,
		int	c
	)

inline

Definition at line 40 of file spinor_noise.cu.

References quda::Arg< real, Ns, Nc, order >::v.

◆ get_pointer_location()

QudaFieldLocation quda::get_pointer_location ( const void * ptr )

Definition at line 399 of file malloc.cpp.

References errorQuda, QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, and QUDA_INVALID_FIELD_LOCATION.

Referenced by file_name(), and printQudaInvertParam().

Here is the caller graph for this function:

◆ GetBlockDim()

dim3 quda::GetBlockDim	(	size_t	threads,
		size_t	size
	)

Definition at line 25 of file random.cu.

References BLOCKSDIVUP.

Referenced by launch_kernel_random().

Here is the caller graph for this function:

◆ getCoords() [1/2]

template<int nDim, QudaPCType pc_type, KernelType kernel_type, typename Arg , int nface_ = 1>

__host__ __device__ int quda::getCoords	(	int	coord[],
		const Arg &	arg,
		int &	idx,
		int	parity,
		int &	dim
	)

inline

Compute the space-time coordinates we are at.

Parameters

[out]	coord	The computed space-time coordinates
[in]	arg	DslashArg struct
[in,out]	idx	Space-time index (usually equal to global x-thread index). When doing EXTERIOR kernels we overwrite this with the index into our face (ghost index).
[in]	parity	Field parity
[out]	the	dimension we are working on (fused kernel only)

Returns: checkerboard space-time index

Definition at line 88 of file dslash_helper.cuh.

References arg(), EXTERIOR_KERNEL_ALL, getCoords5CB(), getCoordsCB(), INTERIOR_KERNEL, Ls, parity, QUDA_5D_PC, and X.

Referenced by completeKSForceCore(), computeAPEStep(), computeCoarseClover(), computeFmunuCore(), computeGenGauss(), computeMomAction(), computeNeighborSum(), computeOvrImpSTOUTStep(), computePlaq(), computeStaple(), computeStapleRectangle(), computeSTOUTStep(), computeUV(), computeVUV(), computeYhat(), corner(), quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::getPaddedIndex(), quda::gauge::TIFRPaddedOrder< Float, length >::getPaddedIndex(), kernel_random(), packGhost(), and sin().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ getCoords() [2/2]

template<typename I >

static __device__ __host__ void quda::getCoords	(	int	x[],
		int	cb_index,
		const I	X[],
		int	parity
	)

inlinestatic

Compute the 4-d spatial index from the checkerboarded 1-d index at parity parity. Wrapper around getCoordsCB.

Parameters

[out]	x	Computed spatial index
[in]	cb_index	1-d checkerboarded index
[in]	X	Full lattice dimensions
[in]	X0h	Half of x-dim lattice dimension
[in]	parity	Site parity

Definition at line 228 of file index_helper.cuh.

References getCoordsCB().

Here is the call graph for this function:

◆ getCoords5()

template<typename I >

static __device__ __host__ void quda::getCoords5	(	int	x[5],
		int	cb_index,
		const I	X[5],
		int	parity,
		QudaPCType	pc_type
	)

inlinestatic

Compute the 5-d spatial index from the checkerboarded 1-d index at parity parity. Wrapper around getCoords5CB.

Parameters

[out]	x	Computed spatial index
[in]	cb_index	1-d checkerboarded index
[in]	X	Full lattice dimensions
[in]	parity	Site parity

Definition at line 301 of file index_helper.cuh.

References getCoords5CB().

Referenced by packGhost().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ getCoords5CB()

template<typename I , typename J >

static __device__ __host__ void quda::getCoords5CB	(	int	x[5],
		int	cb_index,
		const I	X[5],
		J	X0h,
		int	parity,
		QudaPCType	pc_type
	)

inlinestatic

Compute the 5-d spatial index from the checkerboarded 1-d index at parity parity

Parameters

[out]	x	Computed spatial index
[in]	cb_index	1-d checkerboarded index
[in]	X	Full lattice dimensions
[in]	X0h	Half of x-dim lattice dimension
[in]	parity	Site parity

Definition at line 270 of file index_helper.cuh.

References QUDA_5D_PC.

Referenced by getCoords(), and getCoords5().

Here is the caller graph for this function:

◆ getCoordsCB()

template<typename I , typename J >

static __device__ __host__ void quda::getCoordsCB	(	int	x[],
		int	cb_index,
		const I	X[],
		J	X0h,
		int	parity
	)

inlinestatic

Compute the 4-d spatial index from the checkerboarded 1-d index at parity parity

Parameters

[out]	x	Computed spatial index
[in]	cb_index	1-d checkerboarded index
[in]	X	Full lattice dimensions
[in]	X0h	Half of x-dim lattice dimension
[in]	parity	Site parity

Definition at line 201 of file index_helper.cuh.

References parity.

Referenced by applyDslash(), and getCoords().

Here is the caller graph for this function:

◆ getCoordsExtended()

template<typename I , typename J >

static __device__ __host__ void quda::getCoordsExtended	(	I	x[],
		int	cb_index,
		const J	X[],
		int	parity,
		const int	R[]
	)

inlinestatic

Compute the 4-d spatial index from the checkerboarded 1-d index at parity parity

Parameters

x	Computed spatial index
cb_index	1-d checkerboarded index
X	Full lattice dimensions
parity	Site parity

Definition at line 242 of file index_helper.cuh.

References parity.

Referenced by computeForce().

Here is the caller graph for this function:

◆ getDeterminant()

template<template< typename, int > class Mat, class T >

__device__ __host__ T quda::getDeterminant ( const Mat< T, 3 > & a )

inline

Definition at line 422 of file quda_matrix.h.

References Mat().

Referenced by computeLinkInverse(), exponentiate_iQ(), quda::gauge::Reconstruct< 13, Float, ghostExchange_, stag_phase >::getPhase(), quda::gauge::Reconstruct< 9, Float, ghostExchange_, stag_phase >::getPhase(), inverse(), polarSu3(), and setUnitarizeLinksConstants().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ getDslashLaunch()

bool quda::getDslashLaunch ( )

◆ getIndexFull()

template<typename I >

static __device__ __host__ int quda::getIndexFull	(	int	cb_index,
		const I	X[4],
		int	parity
	)

inlinestatic

Compute the 1-d global index from 1-d checkerboard index and parity. This should never be used to index into QUDA fields due to the potential of padding between even and odd regions.

Parameters

cb_index	1-d checkerboard index
X	lattice dimensions
parity	Site parity

Definition at line 316 of file index_helper.cuh.

References parity.

◆ getIndicesGlobal()

template<bool parity_flip, typename Arg >

__device__ void quda::getIndicesGlobal	(	const Arg &	arg,
		int &	parity,
		int &	x_cb,
		int &	parity_coarse,
		int &	x_coarse_cb,
		int &	c_col,
		int &	c_row
	)

inline

Definition at line 834 of file coarse_op_kernel.cuh.

References quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::parity_flip, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::shared_atomic.

◆ getIndicesShared()

template<bool parity_flip, typename Arg >

__device__ void quda::getIndicesShared	(	const Arg &	arg,
		int &	parity,
		int &	x_cb,
		int &	parity_coarse,
		int &	x_coarse_cb,
		int &	c_col,
		int &	c_row
	)

inline

Definition at line 797 of file coarse_op_kernel.cuh.

References coarseIndex(), parity, virtualBlockDim(), and virtualThreadIdx().

Here is the call graph for this function:

◆ getKernelPackT()

bool quda::getKernelPackT ( )

Returns: Whether the T dimension is kernel packed or not

Definition at line 26 of file dslash_quda.cu.

References kernelPackT.

Referenced by quda::dslash::DslashPolicyTune< Dslash >::apply(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), quda::Pack< Float, nColor, spin_project >::fillAux(), quda::dslash::issueGather(), quda::dslash::issuePack(), quda::dslash::DslashBasic< Dslash >::operator()(), quda::dslash::DslashFusedExterior< Dslash >::operator()(), quda::dslash::DslashGDRRecv< Dslash >::operator()(), quda::dslash::DslashFusedGDRRecv< Dslash >::operator()(), quda::Pack< Float, nColor, spin_project >::Pack(), PackGhost(), pushKernelPackT(), quda::cudaColorSpinorField::sendGhost(), quda::cudaColorSpinorField::sendStart(), quda::Dslash< Float >::setParam(), and DslashCuda::setParam().

Here is the caller graph for this function:

◆ getLinkDeterminant()

double2 quda::getLinkDeterminant ( cudaGaugeField & data )

Calculate the Determinant.

Parameters

[in] data Gauge field

Returns: double2 complex Determinant value

Definition at line 194 of file pgauge_det_trace.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by GaugeAlgTest::TearDown(), and TEST_F().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ getLinkTrace()

double2 quda::getLinkTrace ( cudaGaugeField & data )

Calculate the Trace.

Parameters

[in] data Gauge field

Returns: double2 complex trace value

Definition at line 215 of file pgauge_det_trace.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by GaugeAlgTest::TearDown().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ getNeighborIndexCB()

template<int nDim = 4, typename Arg >

static __device__ __host__ int quda::getNeighborIndexCB	(	const int	x[],
		int	mu,
		int	dir,
		const Arg &	arg
	)

inlinestatic

Compute the checkerboard 1-d index for the nearest neighbor.

Parameters

[in]	x	nDim lattice coordinates
[in]	mu	dimension in which to add 1
[in]	dir	direction (+1 or -1)
[in]	arg	parameter struct

Returns: 1-d checkboard index

Definition at line 166 of file index_helper.cuh.

Referenced by applyWilsonTM().

Here is the caller graph for this function:

◆ getRealTraceUVdagger()

template<class T >

__device__ __host__ double quda::getRealTraceUVdagger	(	const Matrix< T, 3 > &	a,
		const Matrix< T, 3 > &	b
	)

inline

Definition at line 1131 of file quda_matrix.h.

References sum().

Here is the call graph for this function:

◆ getSubTraceUnit()

template<class T >

__device__ __host__ Matrix<T,3> quda::getSubTraceUnit ( const Matrix< T, 3 > & a )

inline

Definition at line 1115 of file quda_matrix.h.

◆ getTrace()

template<class T >

__device__ __host__ T quda::getTrace ( const Matrix< T, 3 > & a )

inline

Definition at line 415 of file quda_matrix.h.

References Mat().

Referenced by computeOvrImpSTOUTStep(), computeSTOUTStep(), exponentiate_iQ(), plaquette(), qChargeComputeKernel(), and setUnitarizeLinksConstants().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ getTuneCache()

const map & quda::getTuneCache ( )

Returns a reference to the tunecache map.

Returns: tunecache reference

Definition at line 128 of file tune.cpp.

References tunecache.

Referenced by quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), quda::TunableVectorYZ::resizeStep(), and quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune().

Here is the caller graph for this function:

◆ ghostFaceIndex()

template<int dir, int nDim = 4, typename I >

__device__ __host__ int quda::ghostFaceIndex	(	const int	x_[],
		const I	X_[],
		int	dim,
		int	nFace
	)

inline

Compute the checkerboarded index into the ghost field corresponding to full (local) site index x[]

Parameters

x_	local site
X_	local lattice dimensions
dim	dimension
nFace	depth of ghost

Definition at line 335 of file index_helper.cuh.

References index(), and X.

Here is the call graph for this function:

◆ ghostFaceIndexStaggered()

template<int dir, int nDim = 4, typename I >

__device__ __host__ int quda::ghostFaceIndexStaggered	(	const int	x_[],
		const I	X_[],
		int	dim,
		int	nFace
	)

inline

Compute the checkerboarded index into the ghost field corresponding to full (local) site index x[] for staggered

Parameters

x_	local site
X_	local lattice dimensions
dim	dimension
nFace	depth of ghost

Definition at line 396 of file index_helper.cuh.

References index(), and X.

Here is the call graph for this function:

◆ host_allocated_peak()

long quda::host_allocated_peak ( )

Returns: peak host memory allocated

Definition at line 65 of file malloc.cpp.

References HOST.

◆ host_free_()

void quda::host_free_	(	const char *	func,
		const char *	file,
		int	line,
		void *	ptr
	)

Free host memory allocated with safe_malloc(), pinned_malloc(), or mapped_malloc(). This function should only be called via the host_free() macro, defined in malloc_quda.h

Definition at line 344 of file malloc.cpp.

References count, errorQuda, HOST, MAPPED, PINNED, print_trace(), printfQuda, and track_free().

Referenced by quda::pool::pinned_free_().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ i32toa()

void quda::i32toa	(	char *	buffer,
		int32_t	value
	)

inline

Definition at line 117 of file uint_to_char.h.

References u32toa().

Referenced by quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), and postTrace_().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ i64toa()

void quda::i64toa	(	char *	buffer,
		int64_t	value
	)

inline

Definition at line 284 of file uint_to_char.h.

References u64toa().

Here is the call graph for this function:

◆ inBoundary()

template<int dim, typename Arg >

__host__ __device__ bool quda::inBoundary	(	const int	coord[],
		const Arg &	arg
	)

inline

Compute whether the provided coordinate is within the halo region boundary of a given dimension.

Parameters

[in]	coord	Coordinates
[in]	Arg	Dslash argument struct

Returns: True if in boundary, else false

Definition at line 155 of file dslash_helper.cuh.

◆ IndexBlock()

template<int NCOLORS>

static __host__ __device__ void quda::IndexBlock	(	int	block,
		int &	p,
		int &	q
	)

inlinestatic

Retrieve the SU(N) indices for the current block number

Parameters

[in]	block,current	block number, from 0 to (NCOLORS * (NCOLORS - 1) / 2)
[out]	p,row	index pointing to the SU(N) matrix
[out]	q,column	index pointing to the SU(N) matrix

Definition at line 36 of file gauge_fix_ovr_hit_devf.cuh.

References index().

Here is the call graph for this function:

◆ indexFromFaceIndex() [1/2]

template<int nDim, QudaPCType type, int dim, int nLayers, int face_num, typename Arg >

__device__ __host__ int quda::indexFromFaceIndex	(	int	face_idx,
		int	parity,
		const Arg &	arg
	)

inline

Compute the checkerboard lattice index from the input face index. This is used by the Wilson-like halo packing kernels, and can deal with 4-d or 5-d field and 4-d or 5-d preconditioning.

Parameters

[in]	face_idx	Checkerboard halo index
[in]	parity	Parity index
[in]	arg	Argument struct with required meta data

Returns: Checkerboard lattice index

Definition at line 601 of file index_helper.cuh.

References QUDA_4D_PC, QUDA_5D_PC, and s.

◆ indexFromFaceIndex() [2/2]

template<int nDim, QudaPCType type, int dim, int nLayers, int face_num, typename Arg >

__device__ __host__ int quda::indexFromFaceIndex	(	int	face_idx,
		const Arg &	arg
	)

inline

Overloaded variant of indexFromFaceIndex where we use the parity declared in arg.

Definition at line 694 of file index_helper.cuh.

References arg().

Here is the call graph for this function:

◆ indexFromFaceIndexStaggered()

template<int nDim, QudaPCType type, int dim, int nLayers, int face_num, typename Arg >

static __device__ int quda::indexFromFaceIndexStaggered	(	int	face_idx_in,
		int	parity,
		const Arg &	arg
	)

inlinestatic

Compute global checkerboard index from face index. The following indexing routines work for arbitrary lattice dimensions (though perhaps not odd like thw Wilson variant?) Specifically, we compute an index into the local volume from an index into the face. This is used by the staggered-like face packing routines, and is different from the Wilson variant since here the halo depth is tranversed in a different order - here the halo depth is the faster running dimension.

Parameters

[in]	face_idx_in	Checkerboarded face index
[in]	param	Parameter struct with required meta data

Returns: Global checkerboard coordinate

Definition at line 717 of file index_helper.cuh.

References dims, s, and X.

◆ InitGaugeField() [1/2]

void quda::InitGaugeField ( cudaGaugeField & data )

Perform a cold start to the gauge field, identity SU(3) matrix, also fills the ghost links in multi-GPU case (no need to exchange data)

Parameters

[in,out] data Gauge field

Referenced by main(), and GaugeAlgTest::SetUp().

Here is the caller graph for this function:

◆ InitGaugeField() [2/2]

void quda::InitGaugeField	(	cudaGaugeField &	data,
		RNG &	rngstate
	)

Perform a hot start to the gauge field, random SU(3) matrix, followed by reunitarization, also exchange borders links in multi-GPU case.

Parameters

[in,out]	data	Gauge field
[in,out]	rngstate	state of the CURAND random number generator

Definition at line 450 of file pgauge_init.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Here is the call graph for this function:

◆ injector()

template<typename Float , int length, int dim, typename Arg >

__device__ __host__ void quda::injector	(	Arg &	arg,
		int	dir,
		int	a,
		int	b,
		int	c,
		int	d,
		int	g,
		int	parity
	)

Definition at line 76 of file extract_gauge_ghost_extended.cu.

References quda::Matrix< T, N >::data, length, and quda::gauge::Ncolor().

Here is the call graph for this function:

◆ innerProduct() [1/4]

template<typename Float , int Nc, int Ns>

__device__ __host__ complex<Float> quda::innerProduct	(	const ColorSpinor< Float, Nc, Ns > &	a,
		const ColorSpinor< Float, Nc, Ns > &	b
	)

inline

Compute the inner product over color and spin dot = ,c conj(a(s,c)) * b(s,c)

Parameters

a	Left-hand side ColorSpinor
b	Right-hand side ColorSpinor

Returns: The inner product

Definition at line 914 of file color_spinor.h.

References dot(), and s.

Referenced by computeColorContraction(), computeDegrandRossiContraction(), and innerProduct().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ innerProduct() [2/4]

template<typename Float , int Nc, int Ns>

__device__ __host__ complex<Float> quda::innerProduct	(	const ColorSpinor< Float, Nc, Ns > &	a,
		const ColorSpinor< Float, Nc, Ns > &	b,
		int	s
	)

inline

Compute the inner product over color at spin s between two ColorSpinor fields dot = conj(a(s,c)) * b(s,c)

Parameters

a	Left-hand side ColorSpinor
b	Right-hand side ColorSpinor
s	diagonal spin index

Returns: The inner product

Definition at line 932 of file color_spinor.h.

References innerProduct().

Here is the call graph for this function:

◆ innerProduct() [3/4]

template<typename Float , int Nc, int Ns>

__device__ __host__ complex<Float> quda::innerProduct	(	const ColorSpinor< Float, Nc, Ns > &	a,
		const ColorSpinor< Float, Nc, Ns > &	b,
		int	sa,
		int	sb
	)

inline

Compute the inner product over color at spin sa and sb between two ColorSpinor fields dot = conj(a(s1,c)) * b(s2,c)

Parameters

a	Left-hand side ColorSpinor
b	Right-hand side ColorSpinor
sa	Left-hand side spin index
sb	Right-hand side spin index

Returns: The inner product

Definition at line 948 of file color_spinor.h.

References dot().

Here is the call graph for this function:

◆ innerProduct() [4/4]

template<typename Float , int Nc, int Ns>

__device__ __host__ complex<Float> quda::innerProduct	(	const ColorSpinor< Float, Nc, 1 > &	a,
		const ColorSpinor< Float, Nc, Ns > &	b,
		int	s
	)

inline

Compute the inner product over color at spin s between a color vector and a color spinor dot = conj(a(c)) * b(s,c)

Parameters

a	Left-hand side ColorVector
b	Right-hand side ColorSpinor

Returns: The inner product

Definition at line 971 of file color_spinor.h.

References innerProduct().

Here is the call graph for this function:

◆ instantiate() [1/3]

template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , int nColor, typename... Args>

void quda::instantiate	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		Args &&...	args
	)

inline

This instantiate function is used to instantiate the reconstruct types used.

Parameters

[out]	out	Output result field
[in]	in	Input field
[in]	U	Gauge field
[in]	args	Additional arguments for different dslash kernels

Definition at line 426 of file dslash.h.

References errorQuda, quda::Dslash< Float >::in, quda::Dslash< Float >::out, and quda::GaugeField::Reconstruct().

Here is the call graph for this function:

◆ instantiate() [2/3]

template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , typename... Args>

void quda::instantiate	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		Args &&...	args
	)

inline

This instantiate function is used to instantiate the colors.

Parameters

[out]	out	Output result field
[in]	in	Input field
[in]	U	Gauge field
[in]	args	Additional arguments for different dslash kernels

Definition at line 459 of file dslash.h.

References errorQuda, quda::Dslash< Float >::in, quda::GaugeField::Ncolor(), quda::ColorSpinorField::Ncolor(), and quda::Dslash< Float >::out.

Here is the call graph for this function:

◆ instantiate() [3/3]

template<template< typename, int, QudaReconstructType > class Apply, typename Recon = WilsonReconstruct, typename... Args>

void quda::instantiate	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		Args &&...	args
	)

inline

This instantiate function is used to instantiate the precisions.

Parameters

[out]	out	Output result field
[in]	in	Input field
[in]	U	Gauge field
[in]	args	Additional arguments for different dslash kernels

Definition at line 476 of file dslash.h.

References errorQuda, quda::Dslash< Float >::in, quda::Dslash< Float >::out, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

Here is the call graph for this function:

◆ inverse()

template<class T >

__device__ __host__ Matrix<T,3> quda::inverse ( const Matrix< T, 3 > & u )

inline

Definition at line 611 of file quda_matrix.h.

References getDeterminant().

Referenced by quda::clover::Accessor< Float, nColor, nSpin, QUDA_PACKED_CLOVER_ORDER >::Accessor(), ApplyClover(), computeOvrImpSTOUTStep(), computeSTOUTStep(), quda::cudaCloverField::copy(), copyGenericClover(), loadCloverQuda(), polarSu3(), and setUnitarizeLinksConstants().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ is_aligned()

bool quda::is_aligned	(	const void *	ptr,
		size_t	alignment
	)

inline

Returns: whether the pointer is aligned

Definition at line 57 of file malloc_quda.h.

Referenced by quda::cudaColorSpinorField::create(), quda::cudaCloverField::cudaCloverField(), and quda::cudaGaugeField::zeroPad().

Here is the caller graph for this function:

◆ isActive()

template<KernelType kernel_type, typename Arg >

__device__ bool quda::isActive	(	bool &	active,
		int	threadDim,
		int	offsetDim,
		const int	coord[],
		const Arg &	arg
	)

inline

Compute whether this thread should be active for updating the a given offsetDim halo. For non-fused halo update kernels this is a trivial kernel that just checks if the given dimension is partitioned and if so, return true.

For fused halo region update kernels: here every thread has a prescribed dimension it is tasked with updating, but for the edges and vertices, the thread responsible for the entire update is the "greatest" one. Hence some threads may be labelled as a given dimension, but they have to update other dimensions too. Conversely, a given thread may be labeled for a given dimension, but if that thread lies at en edge or vertex, and we have partitioned a higher dimension, then that thread will cede to the higher thread.

Parameters

[in,out]	Whether	this thread is "cumulatively" active (cumulative over all dimensions)
[in]	threadDim	Prescribed dimension of this thread
[in]	offsetDim	The dimension we are querying whether this thread should be responsible
[in]	offset	The size of the hop
[in]	y	Site coordinate
[in]	partitioned	Array of which dimensions have been partitioned
[in]	X	Lattice dimensions

Returns: true if this thread is active

Definition at line 188 of file dslash_helper.cuh.

References EXTERIOR_KERNEL_ALL.

◆ isComplete()

template<KernelType type, typename Arg >

__host__ __device__ bool quda::isComplete	(	const Arg &	arg,
		int	coord[]
	)

inline

Helper functon to determine if the application of the derivative in the dslash is complete.

Parameters

[in]	Argument	parameter struct
[in]	Checkerboard	space-time index
[in]	Parity	we are acting on

Definition at line 55 of file dslash_helper.cuh.

References EXTERIOR_KERNEL_ALL, EXTERIOR_KERNEL_T, EXTERIOR_KERNEL_X, EXTERIOR_KERNEL_Y, EXTERIOR_KERNEL_Z, and INTERIOR_KERNEL.

◆ isUnitary()

bool quda::isUnitary	(	const cpuGaugeField &	field,
		double	max_error
	)

◆ kernel_random()

__global__ void quda::kernel_random	(	cuRNGState *	state,
		unsigned long long	seed,
		int	size_cb,
		rngArg	arg
	)

CUDA kernel to initialize CURAND RNG states.

Parameters

state	CURAND RNG state array
seed	initial seed for RNG
size	size of the CURAND RNG state array
arg	Metadata needed for computing multi-gpu offsets

Definition at line 51 of file random.cu.

References quda::rngArg::commCoord, quda::rngArg::commDim, getCoords(), parity, and quda::rngArg::X.

Here is the call graph for this function:

◆ laplace()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__device__ __host__ void quda::laplace	(	Arg &	arg,
		int	idx,
		int	parity
	)

inline

Definition at line 132 of file laplace.cuh.

References applyLaplace(), arg(), quda::DslashArg< Float >::dagger, EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, nColor, quda::DslashArg< Float >::nParity, quda::LaplaceArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, quda::LaplaceArg< Float, nColor, reconstruct_ >::x, and quda::DslashArg< Float >::xpay.

Referenced by quda::LaplaceApply< Float, nColor, recon >::LaplaceApply(), and quda::Laplace< Float, nDim, nColor, Arg >::tuneKey().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ laplaceGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__global__ void quda::laplaceGPU ( Arg arg )

Definition at line 178 of file laplace.cuh.

References arg(), and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ launch_kernel_random()

void quda::launch_kernel_random	(	cuRNGState *	state,
		unsigned long long	seed,
		int	size_cb,
		int	n_parity,
		int	X[4]
	)

Call CUDA kernel to initialize CURAND RNG states.

Parameters

state	CURAND RNG state array
seed	initial seed for RNG
size_cb	Checkerboarded size of the CURAND RNG state array
n_parity	Number of parities (1 or 2)
X	array of lattice dimensions

Definition at line 75 of file random.cu.

References arg(), GetBlockDim(), and qudaDeviceSynchronize.

Referenced by quda::RNG::Init().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ linkIndex() [1/2]

template<typename I >

static __device__ __host__ int quda::linkIndex	(	const int	x[],
		const I	X[4]
	)

inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[]

Returns: 1-d checkerboard index

Parameters

x	4-d lattice index
X	Full lattice dimensions

Definition at line 46 of file index_helper.cuh.

Referenced by computeGenGauss(), computeMomAction(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::getPaddedIndex(), and quda::gauge::TIFRPaddedOrder< Float, length >::getPaddedIndex().

Here is the caller graph for this function:

◆ linkIndex() [2/2]

template<typename I >

static __device__ __host__ int quda::linkIndex	(	int	y[],
		const int	x[],
		const I	X[4]
	)

inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[]

Returns: 1-d checkerboard index

Parameters

y	copy of 4-d lattice index
x	4-d lattice index
X	Full lattice dimensions

Definition at line 60 of file index_helper.cuh.

◆ linkIndexDn()

template<typename I , int n>

static __device__ __host__ int quda::linkIndexDn	(	const int	x[],
		const I	X[4],
		const int	mu
	)

inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[] +n in the mu direction

Returns: 1-d checkerboard index

Template Parameters

n	number of hops (=/-) in the mu direction

Parameters

x	4-d lattice index
X	Full lattice dimensions
mu	direction in which to add n hops

Definition at line 76 of file index_helper.cuh.

References mu.

Referenced by linkIndexM1(), and linkIndexM3().

Here is the caller graph for this function:

◆ linkIndexM1()

template<typename I >

static __device__ __host__ int quda::linkIndexM1	(	const int	x[],
		const I	X[4],
		const int	mu
	)

inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[] -1 in the mu direction

Returns: 1-d checkerboard index

Parameters

x	4-d lattice index
X	Full lattice dimensions
mu	direction in which to subtract 1

Definition at line 94 of file index_helper.cuh.

References linkIndexDn(), mu, and X.

Referenced by applyDslash(), applyLaplace(), applyStaggered(), computeNeighborSum(), and computeYhat().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ linkIndexM3()

template<typename I >

static __device__ __host__ int quda::linkIndexM3	(	const int	x[],
		const I	X[4],
		const int	mu
	)

inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[] -3 in the mu direction

Returns: 1-d checkerboard index

Parameters

x	4-d lattice index
X	Full lattice dimensions
mu	direction in which to subtract 3

Definition at line 107 of file index_helper.cuh.

References linkIndexDn(), mu, and X.

Referenced by applyStaggered().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ linkIndexP1()

template<typename I >

static __device__ __host__ int quda::linkIndexP1	(	const int	x[],
		const I	X[4],
		const int	mu
	)

inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[] +1 in the mu direction

Returns: 1-d checkerboard index

Parameters

x	4-d lattice index
X	Full lattice dimensions
mu	direction in which to add 1

Definition at line 139 of file index_helper.cuh.

References mu, and X.

Referenced by applyDslash(), applyLaplace(), applyStaggered(), computeNeighborSum(), and computeUV().

Here is the caller graph for this function:

◆ linkIndexP3()

template<typename I >

static __device__ __host__ int quda::linkIndexP3	(	const int	x[],
		const I	X[4],
		const int	mu
	)

inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[] +3 in the mu direction

Returns: 1-d checkerboard index

Parameters

x	4-d lattice index
X	Full lattice dimensions
mu	direction in which to add 3

Definition at line 151 of file index_helper.cuh.

References mu, and X.

Referenced by applyStaggered().

Here is the caller graph for this function:

◆ linkIndexShift() [1/2]

template<typename I , typename J , typename K >

static __device__ __host__ int quda::linkIndexShift	(	const I	x[],
		const J	dx[],
		const K	X[4]
	)

inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[] + dx[]

Returns: 1-d checkerboard index

Parameters

x	4-d lattice index
dx	4-d shift index
X	Full lattice dimensions

Definition at line 13 of file index_helper.cuh.

Referenced by completeKSForceCore(), computeAPEStep(), computeFmunuCore(), computeForce(), computeOvrImpSTOUTStep(), computeStaple(), computeStapleRectangle(), computeSTOUTStep(), quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg(), and plaquette().

Here is the caller graph for this function:

◆ linkIndexShift() [2/2]

template<typename I , typename J , typename K >

static __device__ __host__ int quda::linkIndexShift	(	I	y[],
		const I	x[],
		const J	dx[],
		const K	X[4]
	)

inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[] + dx[]

Returns: 1-d checkerboard index

Parameters

y	new 4-d lattice index
x	original 4-d lattice index
dx	4-d shift index
X	Full lattice dimensions

Definition at line 31 of file index_helper.cuh.

◆ linkNormalIndexP1()

template<typename I >

static __device__ __host__ int quda::linkNormalIndexP1	(	const int	x[],
		const I	X[4],
		const int	mu
	)

inlinestatic

Compute the full 1-d index from the 4-d coordinate x[] +1 in the mu direction

Returns: 1-d checkerboard index

Parameters

x	4-d lattice index
X	Full lattice dimensions
mu	direction in which to add 1

Definition at line 121 of file index_helper.cuh.

References mu.

◆ load_cached_short2()

__device__ void quda::load_cached_short2	(	short2 &	a,
		const short2 *	addr
	)

inline

Definition at line 45 of file inline_ptx.h.

References __PTR.

◆ load_cached_short4()

__device__ void quda::load_cached_short4	(	short4 &	a,
		const short4 *	addr
	)

inline

Definition at line 35 of file inline_ptx.h.

References __PTR.

◆ load_global_float4()

__device__ void quda::load_global_float4	(	float4 &	a,
		const float4 *	addr
	)

inline

Definition at line 71 of file inline_ptx.h.

References __PTR.

◆ load_global_short2()

__device__ void quda::load_global_short2	(	short2 &	a,
		const short2 *	addr
	)

inline

Definition at line 63 of file inline_ptx.h.

References __PTR.

◆ load_global_short4()

__device__ void quda::load_global_short4	(	short4 &	a,
		const short4 *	addr
	)

inline

Definition at line 53 of file inline_ptx.h.

References __PTR.

◆ load_streaming_double2()

__device__ void quda::load_streaming_double2	(	double2 &	a,
		const double2 *	addr
	)

inline

Definition at line 21 of file inline_ptx.h.

References __PTR.

◆ load_streaming_float4()

__device__ void quda::load_streaming_float4	(	float4 &	a,
		const float4 *	addr
	)

inline

Definition at line 28 of file inline_ptx.h.

References __PTR.

◆ loadLinkVariableFromArray() [1/2]

template<class T , class U >

__device__ void quda::loadLinkVariableFromArray	(	const T *const	array,
		const int	dir,
		const int	idx,
		const int	stride,
		Matrix< U, 3 > *	link
	)

inline

Definition at line 857 of file quda_matrix.h.

References quda::Matrix< T, N >::data.

◆ loadLinkVariableFromArray() [2/2]

__device__ void quda::loadLinkVariableFromArray	(	const float2 *const	array,
		const int	dir,
		const int	idx,
		const int	stride,
		Matrix< complex< double >, 3 > *	link
	)

inline

Definition at line 879 of file quda_matrix.h.

◆ loadMatrixFromArray()

template<class T , class U , int N>

__device__ void quda::loadMatrixFromArray	(	const T *const	array,
		const int	idx,
		const int	stride,
		Matrix< U, N > *	mat
	)

inline

Definition at line 869 of file quda_matrix.h.

References quda::Matrix< T, N >::data.

◆ loadMomentumFromArray()

template<class T >

__device__ void quda::loadMomentumFromArray	(	const T *const	array,
		const int	dir,
		const int	idx,
		const int	stride,
		Matrix< T, 3 > *	mom
	)

inline

Definition at line 955 of file quda_matrix.h.

References quda::Matrix< T, N >::data.

◆ loadTuneCache()

void quda::loadTuneCache ( )

Definition at line 322 of file tune.cpp.

References broadcastTuneCache(), comm_rank(), deserializeTuneCache(), errorQuda, getTuning(), getVerbosity(), gitversion, printfQuda, QUDA_SUMMARIZE, QUDA_TUNE_NO, resource_path, and warningQuda.

Referenced by initQudaMemory(), and quda::TunableVectorYZ::resizeStep().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ Location_() [1/2]

QudaFieldLocation quda::Location_	(	const char *	func,
		const char *	file,
		int	line,
		const LatticeField &	a,
		const LatticeField &	b
	)

inline

Helper function for determining if the location of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field

Returns: If location is unique return the location

Definition at line 642 of file lattice_field.h.

References errorQuda, quda::LatticeField::Location(), and QUDA_INVALID_FIELD_LOCATION.

Referenced by Location_().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ Location_() [2/2]

template<typename... Args>

QudaFieldLocation quda::Location_	(	const char *	func,
		const char *	file,
		int	line,
		const LatticeField &	a,
		const LatticeField &	b,
		const Args &...	args
	)

inline

Helper function for determining if the location of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field
[in]	args	List of additional fields to check location on

Returns: If location is unique return the location

Definition at line 659 of file lattice_field.h.

References Location_().

Here is the call graph for this function:

◆ log() [1/3]

template<typename ValueType >

__host__ __device__ ValueType quda::log ( ValueType x )

inline

Definition at line 101 of file complex_quda.h.

References log().

Referenced by acosh(), asinh(), atanh(), cloverInvertCompute(), expsu3(), gauss_su3(), genGauss(), log(), log10(), pow(), and smallSVD().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ log() [2/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::log ( const complex< ValueType > & z )

inline

Definition at line 1162 of file complex_quda.h.

References abs(), arg(), and log().

Here is the call graph for this function:

◆ log() [3/3]

template<>

__host__ __device__ complex<float> quda::log ( const complex< float > & z )

inline

Definition at line 1168 of file complex_quda.h.

References abs(), and arg().

Referenced by log().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ log10() [1/2]

template<typename ValueType >

__host__ __device__ ValueType quda::log10 ( ValueType x )

inline

Definition at line 106 of file complex_quda.h.

References log10().

Here is the call graph for this function:

◆ log10() [2/2]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::log10 ( const complex< ValueType > & z )

inline

Definition at line 1175 of file complex_quda.h.

References log().

Referenced by log10().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ make_charN() [1/6]

__forceinline__ __host__ __device__ char4 quda::make_charN ( const short4 & a )

Definition at line 263 of file float_vector.h.

◆ make_charN() [2/6]

__forceinline__ __host__ __device__ char2 quda::make_charN ( const short2 & a )

Definition at line 267 of file float_vector.h.

◆ make_charN() [3/6]

__forceinline__ __host__ __device__ char4 quda::make_charN ( const float4 & a )

Definition at line 271 of file float_vector.h.

◆ make_charN() [4/6]

__forceinline__ __host__ __device__ char2 quda::make_charN ( const float2 & a )

Definition at line 275 of file float_vector.h.

◆ make_charN() [5/6]

__forceinline__ __host__ __device__ char4 quda::make_charN ( const double4 & a )

Definition at line 279 of file float_vector.h.

◆ make_charN() [6/6]

__forceinline__ __host__ __device__ char2 quda::make_charN ( const double2 & a )

Definition at line 283 of file float_vector.h.

◆ make_Complex() [1/2]

complex<double> quda::make_Complex ( const double2 & a )

inline

Definition at line 309 of file float_vector.h.

◆ make_Complex() [2/2]

complex<float> quda::make_Complex ( const float2 & a )

inline

Definition at line 310 of file float_vector.h.

◆ make_Float2() [1/9]

template<typename Float2 , typename Complex >

Float2 quda::make_Float2 ( const Complex & a )

inline

Definition at line 288 of file float_vector.h.

◆ make_Float2() [2/9]

template<>

double2 quda::make_Float2 ( const complex< double > & a )

inline

Definition at line 291 of file float_vector.h.

References quda::complex< double >::imag(), and quda::complex< double >::real().

Here is the call graph for this function:

◆ make_Float2() [3/9]

template<>

double2 quda::make_Float2 ( const complex< float > & a )

inline

Definition at line 293 of file float_vector.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Here is the call graph for this function:

◆ make_Float2() [4/9]

template<>

float2 quda::make_Float2 ( const complex< double > & a )

inline

Definition at line 295 of file float_vector.h.

References quda::complex< double >::imag(), and quda::complex< double >::real().

Here is the call graph for this function:

◆ make_Float2() [5/9]

template<>

float2 quda::make_Float2 ( const complex< float > & a )

inline

Definition at line 297 of file float_vector.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Here is the call graph for this function:

◆ make_Float2() [6/9]

template<>

double2 quda::make_Float2 ( const std::complex< double > & a )

inline

Definition at line 300 of file float_vector.h.

◆ make_Float2() [7/9]

template<>

double2 quda::make_Float2 ( const std::complex< float > & a )

inline

Definition at line 302 of file float_vector.h.

◆ make_Float2() [8/9]

template<>

float2 quda::make_Float2 ( const std::complex< double > & a )

inline

Definition at line 304 of file float_vector.h.

◆ make_Float2() [9/9]

template<>

float2 quda::make_Float2 ( const std::complex< float > & a )

inline

Definition at line 306 of file float_vector.h.

◆ make_FloatN() [1/4]

__forceinline__ __host__ __device__ float2 quda::make_FloatN ( const double2 & a )

Definition at line 223 of file float_vector.h.

◆ make_FloatN() [2/4]

__forceinline__ __host__ __device__ float4 quda::make_FloatN ( const double4 & a )

Definition at line 227 of file float_vector.h.

◆ make_FloatN() [3/4]

__forceinline__ __host__ __device__ double2 quda::make_FloatN ( const float2 & a )

Definition at line 231 of file float_vector.h.

◆ make_FloatN() [4/4]

__forceinline__ __host__ __device__ double4 quda::make_FloatN ( const float4 & a )

Definition at line 235 of file float_vector.h.

◆ make_shortN() [1/6]

__forceinline__ __host__ __device__ short4 quda::make_shortN ( const char4 & a )

Definition at line 239 of file float_vector.h.

◆ make_shortN() [2/6]

__forceinline__ __host__ __device__ short2 quda::make_shortN ( const char2 & a )

Definition at line 243 of file float_vector.h.

◆ make_shortN() [3/6]

__forceinline__ __host__ __device__ short4 quda::make_shortN ( const float4 & a )

Definition at line 247 of file float_vector.h.

◆ make_shortN() [4/6]

__forceinline__ __host__ __device__ short2 quda::make_shortN ( const float2 & a )

Definition at line 251 of file float_vector.h.

◆ make_shortN() [5/6]

__forceinline__ __host__ __device__ short4 quda::make_shortN ( const double4 & a )

Definition at line 255 of file float_vector.h.

◆ make_shortN() [6/6]

__forceinline__ __host__ __device__ short2 quda::make_shortN ( const double2 & a )

Definition at line 259 of file float_vector.h.

◆ makeAntiHerm()

template<typename Complex , int N>

__device__ __host__ void quda::makeAntiHerm ( Matrix< Complex, N > & m )

inline

Definition at line 746 of file quda_matrix.h.

References conj().

Referenced by completeKSForceCore(), and computeMomAction().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ mapped_allocated_peak()

long quda::mapped_allocated_peak ( )

Returns: peak mapped memory allocated

Definition at line 63 of file malloc.cpp.

References MAPPED.

◆ mapped_malloc_()

void * quda::mapped_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Allocate page-locked ("pinned") host memory, and map it into the GPU address space. This function should only be called via the mapped_malloc() macro, defined in malloc_quda.h

Definition at line 273 of file malloc.cpp.

References aligned_malloc(), quda::MemAlloc::base_size, errorQuda, MAPPED, memset(), and track_malloc().

Here is the call graph for this function:

◆ massRescale()

void quda::massRescale	(	cudaColorSpinorField &	b,
		QudaInvertParam &	param
	)

Definition at line 1769 of file interface_quda.cpp.

References quda::blas::ax(), QudaInvertParam_s::dslash_type, errorQuda, getVerbosity(), kappa, QudaInvertParam_s::kappa, kappa5, QudaInvertParam_s::m5, QudaInvertParam_s::mass, QudaInvertParam_s::mass_normalization, quda::blas::norm2(), QudaInvertParam_s::num_offset, QudaInvertParam_s::offset, pow(), printfQuda, QUDA_ASQTAD_DSLASH, QUDA_ASYMMETRIC_MASS_NORMALIZATION, QUDA_DEBUG_VERBOSE, QUDA_DOMAIN_WALL_4D_DSLASH, QUDA_DOMAIN_WALL_DSLASH, QUDA_KAPPA_NORMALIZATION, QUDA_MASS_NORMALIZATION, QUDA_MAT_SOLUTION, QUDA_MATDAG_MAT_SOLUTION, QUDA_MATPC_SOLUTION, QUDA_MATPCDAG_MATPC_SOLUTION, QUDA_MOBIUS_DWF_DSLASH, QUDA_STAGGERED_DSLASH, and QudaInvertParam_s::solution_type.

Referenced by invertMultiShiftQuda(), invertMultiSrcQuda(), and invertQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ max_fabs() [1/4]

__forceinline__ __host__ __device__ float quda::max_fabs ( const float4 & c )

Definition at line 198 of file float_vector.h.

Referenced by store_norm().

Here is the caller graph for this function:

◆ max_fabs() [2/4]

__forceinline__ __host__ __device__ float quda::max_fabs ( const float2 & b )

Definition at line 204 of file float_vector.h.

◆ max_fabs() [3/4]

__forceinline__ __host__ __device__ double quda::max_fabs ( const double4 & c )

Definition at line 208 of file float_vector.h.

◆ max_fabs() [4/4]

__forceinline__ __host__ __device__ double quda::max_fabs ( const double2 & b )

Definition at line 214 of file float_vector.h.

◆ Monte()

void quda::Monte	(	cudaGaugeField &	data,
		RNG &	rngstate,
		double	Beta,
		int	nhb,
		int	nover
	)

Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps.

Parameters

[in,out]	data	Gauge field
[in,out]	rngstate	state of the CURAND random number generator
[in]	Beta	inverse of the gauge coupling, beta = 2 Nc / g_0^2
[in]	nhb	number of heatbath steps
[in]	nover	number of overrelaxation steps

Definition at line 856 of file pgauge_heatbath.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by main(), and GaugeAlgTest::SetUp().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ multiplyVUV()

template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg , typename Gamma >

__device__ __host__ void quda::multiplyVUV	(	complex< Float >	vuv[],
		const Arg &	arg,
		const Gamma &	gamma,
		int	parity,
		int	x_cb,
		int	ic_c,
		int	jc_c
	)

inline

Do a single (AV)^ * UV product, where for preconditioned clover, AV correspond to the clover inverse multiplied by the packed null space vectors, else AV is simply the packed null space vectors.

Parameters

[out]	vuv	Result array
[in,out]	arg	Arg storing the fields and parameters
[in]	Fine	grid parity we're working on
[in]	x_cb	Checkboarded x dimension

Definition at line 537 of file coarse_op_kernel.cuh.

References quda::Gamma< ValueType, basis, dir >::apply(), quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::AV, caxpy(), conj(), quda::Gamma< ValueType, basis, dir >::getcol(), QUDA_BACKWARDS, s, quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::UV, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::V.

Here is the call graph for this function:

◆ ndegTwistedMass() [1/2]

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >

__device__ __host__ void quda::ndegTwistedMass	(	Arg &	arg,
		int	idx,
		int	flavor,
		int	parity
	)

inline

Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1)*x Note this routine only exists in xpay form.

Definition at line 31 of file dslash_ndeg_twisted_mass.cuh.

References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, and quda::WilsonArg< Float, nColor, reconstruct_ >::x.

Here is the call graph for this function:

◆ ndegTwistedMass() [2/2]

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool asymmetric, bool xpay, KernelType kernel_type, typename Arg >

__device__ __host__ void quda::ndegTwistedMass	(	Arg &	arg,
		int	idx,
		int	flavor,
		int	parity
	)

inline

Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1)*x Note this routine only exists in xpay form.

Definition at line 49 of file dslash_ndeg_twisted_mass_preconditioned.cuh.

References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::VectorCache< real, Vector >::load(), quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, quda::VectorCache< real, Vector >::save(), quda::VectorCache< real, Vector >::sync(), and quda::WilsonArg< Float, nColor, reconstruct_ >::x.

Here is the call graph for this function:

◆ ndegTwistedMassCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >

void quda::ndegTwistedMassCPU ( Arg arg )

Definition at line 78 of file dslash_ndeg_twisted_mass.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ ndegTwistedMassGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__global__ void quda::ndegTwistedMassGPU ( Arg arg )

Definition at line 94 of file dslash_ndeg_twisted_mass.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ ndegTwistedMassPreconditionedCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

void quda::ndegTwistedMassPreconditionedCPU ( Arg arg )

Definition at line 113 of file dslash_ndeg_twisted_mass_preconditioned.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ ndegTwistedMassPreconditionedGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__global__ void quda::ndegTwistedMassPreconditionedGPU ( Arg arg )

Definition at line 142 of file dslash_ndeg_twisted_mass_preconditioned.cuh.

References arg(), and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ neighborIndex()

template<IndexType idxType, typename Int >

__device__ __forceinline__ int quda::neighborIndex	(	const unsigned int &	cb_idx,
		const int(&)	shift[4],
		const bool(&)	partitioned[4],
		const unsigned int &	parity
	)

Definition at line 41 of file shift_quark_field.cu.

References coordsFromIndex(), quda::ShiftColorSpinorFieldArg< Output, Input >::partitioned, quda::ShiftColorSpinorFieldArg< Output, Input >::shift, X1, X2, X3, and X4.

Referenced by gaugeLink(), shiftColorSpinorFieldKernel(), and spinorNeighbor().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ norm() [1/6]

template<typename real , int Nc, QudaGaugeFieldOrder order>

double quda::norm	(	const GaugeField &	u,
		int	d,
		norm_type_	type
	)

Definition at line 15 of file max_gauge.cu.

References ABS_MAX, quda::clover::FieldOrder< Float, nColor, nSpin, order >::abs_max(), ABS_MIN, quda::clover::FieldOrder< Float, nColor, nSpin, order >::abs_min(), NORM1, quda::clover::FieldOrder< Float, nColor, nSpin, order >::norm1(), NORM2, and quda::clover::FieldOrder< Float, nColor, nSpin, order >::norm2().

Here is the call graph for this function:

◆ norm() [2/6]

template<typename real , int Nc, QudaCloverFieldOrder order>

double quda::norm	(	const CloverField &	u,
		norm_type_	type
	)

Definition at line 15 of file max_clover.cu.

References ABS_MAX, quda::clover::FieldOrder< Float, nColor, nSpin, order >::abs_max(), ABS_MIN, quda::clover::FieldOrder< Float, nColor, nSpin, order >::abs_min(), NORM1, quda::clover::FieldOrder< Float, nColor, nSpin, order >::norm1(), NORM2, and quda::clover::FieldOrder< Float, nColor, nSpin, order >::norm2().

Here is the call graph for this function:

◆ norm() [3/6]

template<typename real , int Nc>

double quda::norm	(	const GaugeField &	u,
		int	d,
		norm_type_	type
	)

Definition at line 28 of file max_gauge.cu.

References errorQuda, quda::GaugeField::FieldOrder(), QUDA_FLOAT2_GAUGE_ORDER, QUDA_MILC_GAUGE_ORDER, and QUDA_QDP_GAUGE_ORDER.

Here is the call graph for this function:

◆ norm() [4/6]

template<typename real , int Nc>

double quda::norm	(	const CloverField &	u,
		norm_type_	type
	)

Definition at line 29 of file max_clover.cu.

References errorQuda, quda::CloverField::Order(), QUDA_FLOAT2_CLOVER_ORDER, and QUDA_FLOAT4_CLOVER_ORDER.

Here is the call graph for this function:

◆ norm() [5/6]

template<typename real >

double quda::norm	(	const GaugeField &	u,
		int	d,
		norm_type_	type
	)

Definition at line 40 of file max_gauge.cu.

References errorQuda, and quda::GaugeField::Ncolor().

Here is the call graph for this function:

◆ norm() [6/6]

template<typename ValueType >

__host__ __device__ ValueType quda::norm ( const complex< ValueType > & z )

inline

Returns the magnitude of z squared.

Definition at line 1092 of file complex_quda.h.

Here is the caller graph for this function:

◆ norm1() [1/2]

double quda::norm1	(	const CloverField &	u,
		bool	inverse = `false`
	)

This is a debugging function, where we cast a clover field into a spinor field so we can compute its L1 norm.

Parameters

a	The clover field that we want the norm of

Returns: The L1 norm of the gauge field

Definition at line 478 of file clover_field.cpp.

References colorSpinorParam(), quda::ColorSpinorField::Create(), and quda::blas::norm1().

Referenced by quda::cpuGaugeField::Gauge_p(), quda::CloverField::Rho(), and quda::GaugeField::SiteSize().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ norm1() [2/2]

double quda::norm1 ( const GaugeField & u )

This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L1 norm.

Parameters

u	The gauge field that we want the norm of

Returns: The L1 norm of the gauge field

Definition at line 341 of file gauge_field.cpp.

References colorSpinorParam(), quda::ColorSpinorField::Create(), and quda::blas::norm1().

Here is the call graph for this function:

◆ norm2() [1/2]

double quda::norm2	(	const CloverField &	a,
		bool	inverse = `false`
	)

This is a debugging function, where we cast a clover field into a spinor field so we can compute its L2 norm.

Parameters

a	The clover field that we want the norm of

Returns: The L2 norm squared of the gauge field

Definition at line 470 of file clover_field.cpp.

References colorSpinorParam(), quda::ColorSpinorField::Create(), and quda::blas::norm2().

Referenced by quda::MG::buildFreeVectors(), computeMomAction(), quda::DiracMobiusPC::Dslash5inv(), quda::GMResDR::FlexArnoldiProcedure(), quda::cpuGaugeField::Gauge_p(), quda::MG::generateNullVectors(), quda::Deflation::operator()(), quda::MG::operator()(), quda::PreconCG::operator()(), quda::SimpleBiCGstab::operator()(), quda::SD::operator()(), quda::IncEigCG::operator()(), quda::GMResDR::operator()(), quda::Deflation::reduce(), quda::CloverField::Rho(), quda::GaugeField::SiteSize(), quda::Deflation::verify(), and quda::MG::verify().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ norm2() [2/2]

double quda::norm2 ( const GaugeField & u )

This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L2 norm.

Parameters

u	The gauge field that we want the norm of

Returns: The L2 norm squared of the gauge field

Definition at line 333 of file gauge_field.cpp.

References colorSpinorParam(), quda::ColorSpinorField::Create(), and quda::blas::norm2().

Here is the call graph for this function:

◆ operator!=() [1/3]

template<typename ValueType >

__host__ __device__ bool quda::operator!=	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 1035 of file complex_quda.h.

◆ operator!=() [2/3]

template<typename ValueType >

__host__ __device__ bool quda::operator!=	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 1041 of file complex_quda.h.

◆ operator!=() [3/3]

template<typename ValueType >

__host__ __device__ bool quda::operator!=	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 1047 of file complex_quda.h.

◆ operator*() [1/16]

__host__ __device__ float4 quda::operator*	(	const float	a,
		const float4	x
	)

inline

Definition at line 48 of file float_vector.h.

◆ operator*() [2/16]

__host__ __device__ float2 quda::operator*	(	const float	a,
		const float2	x
	)

inline

Definition at line 57 of file float_vector.h.

◆ operator*() [3/16]

__host__ __device__ double2 quda::operator*	(	const double	a,
		const double2	x
	)

inline

Definition at line 64 of file float_vector.h.

◆ operator*() [4/16]

__host__ __device__ double4 quda::operator*	(	const double	a,
		const double4	x
	)

inline

Definition at line 71 of file float_vector.h.

◆ operator*() [5/16]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator*	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 902 of file complex_quda.h.

◆ operator*() [6/16]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator*	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 911 of file complex_quda.h.

◆ operator*() [7/16]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator*	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 918 of file complex_quda.h.

◆ operator*() [8/16]

template<template< typename, int > class Mat, class T , int N, class S >

__device__ __host__ Mat<T,N> quda::operator*	(	const S &	scalar,
		const Mat< T, N > &	a
	)

inline

Definition at line 476 of file quda_matrix.h.

◆ operator*() [9/16]

template<template< typename, int > class Mat, class T , int N, class S >

__device__ __host__ Mat<T,N> quda::operator*	(	const Mat< T, N > &	a,
		const S &	scalar
	)

inline

Definition at line 484 of file quda_matrix.h.

References Mat().

Here is the call graph for this function:

◆ operator*() [10/16]

template<template< typename, int > class Mat, class T , int N>

__device__ __host__ Mat<T,N> quda::operator*	(	const Mat< T, N > &	a,
		const Mat< T, N > &	b
	)

inline

Generic implementation of matrix multiplication.

Definition at line 507 of file quda_matrix.h.

◆ operator*() [11/16]

template<template< typename > class complex, typename T , int N>

__device__ __host__ Matrix<complex<T>,N> quda::operator*	(	const Matrix< complex< T >, N > &	a,
		const Matrix< complex< T >, N > &	b
	)

inline

Specialization of complex matrix multiplication that will issue optimal fma instructions.

Definition at line 528 of file quda_matrix.h.

◆ operator*() [12/16]

template<class T , class U , int N>

__device__ __host__ Matrix<typename PromoteTypeId<T,U>::Type,N> quda::operator*	(	const Matrix< T, N > &	a,
		const Matrix< U, N > &	b
	)

inline

Definition at line 563 of file quda_matrix.h.

◆ operator*() [13/16]

template<class T >

__device__ __host__ Matrix<T,2> quda::operator*	(	const Matrix< T, 2 > &	a,
		const Matrix< T, 2 > &	b
	)

inline

Definition at line 583 of file quda_matrix.h.

◆ operator*() [14/16]

template<typename Float , int Nc, int Ns, typename S >

__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::operator*	(	const S &	a,
		const ColorSpinor< Float, Nc, Ns > &	x
	)

inline

Compute the scalar-vector product y = a * x.

Parameters

[in]	a	Input scalar
[in]	x	Input vector

Returns: The vector a * x

Definition at line 1067 of file color_spinor.h.

References quda::ColorSpinor< Float, Nc, Ns >::data, and s.

◆ operator*() [15/16]

template<typename Float , int Nc, int Ns>

__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::operator*	(	const Matrix< complex< Float >, Nc > &	A,
		const ColorSpinor< Float, Nc, Ns > &	x
	)

inline

Compute the matrix-vector product y = A * x.

Parameters

[in]	A	Input matrix
[in]	x	Input vector

Returns: The vector A * x

Definition at line 1089 of file color_spinor.h.

References quda::ColorSpinor< Float, Nc, Ns >::data, and s.

◆ operator*() [16/16]

template<typename Float , int Nc, int Ns>

__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::operator*	(	const HMatrix< Float, Nc *Ns > &	A,
		const ColorSpinor< Float, Nc, Ns > &	x
	)

inline

Compute the matrix-vector product y = A * x.

Parameters

[in]	A	Input Hermitian matrix with dimensions NcxNs x NcxNs
[in]	x	Input vector

Returns: The vector A * x

Definition at line 1124 of file color_spinor.h.

References quda::ColorSpinor< Float, Nc, Ns >::data.

◆ operator*=() [1/7]

__host__ __device__ float2 quda::operator*=	(	float2 &	x,
		const float	a
	)

inline

Definition at line 151 of file float_vector.h.

◆ operator*=() [2/7]

__host__ __device__ double2 quda::operator*=	(	double2 &	x,
		const float	a
	)

inline

Definition at line 157 of file float_vector.h.

◆ operator*=() [3/7]

__host__ __device__ float4 quda::operator*=	(	float4 &	a,
		const float &	b
	)

inline

Definition at line 163 of file float_vector.h.

◆ operator*=() [4/7]

__host__ __device__ double2 quda::operator*=	(	double2 &	a,
		const double &	b
	)

inline

Definition at line 171 of file float_vector.h.

◆ operator*=() [5/7]

__host__ __device__ double4 quda::operator*=	(	double4 &	a,
		const double &	b
	)

inline

Definition at line 177 of file float_vector.h.

◆ operator*=() [6/7]

template<template< typename, int > class Mat, class T , int N, class S >

__device__ __host__ Mat<T,N> quda::operator*=	(	Mat< T, N > &	a,
		const S &	scalar
	)

inline

Definition at line 489 of file quda_matrix.h.

References Mat().

Here is the call graph for this function:

◆ operator*=() [7/7]

template<class T , int N>

__device__ __host__ Matrix<T,N> quda::operator*=	(	Matrix< T, N > &	a,
		const Matrix< T, N > &	b
	)

inline

Definition at line 552 of file quda_matrix.h.

◆ operator+() [1/13]

__host__ __device__ double2 quda::operator+	(	const double2 &	x,
		const double2 &	y
	)

inline

Definition at line 24 of file float_vector.h.

◆ operator+() [2/13]

__host__ __device__ double3 quda::operator+	(	const double3 &	x,
		const double3 &	y
	)

inline

Definition at line 40 of file float_vector.h.

◆ operator+() [3/13]

__host__ __device__ double4 quda::operator+	(	const double4 &	x,
		const double4 &	y
	)

inline

Definition at line 44 of file float_vector.h.

◆ operator+() [4/13]

template<typename scalar , int n>

__device__ __host__ vector_type<scalar,n> quda::operator+	(	const vector_type< scalar, n > &	a,
		const vector_type< scalar, n > &	b
	)

inline

Definition at line 60 of file cub_helper.cuh.

◆ operator+() [5/13]

__host__ __device__ float2 quda::operator+	(	const float2	x,
		const float2	y
	)

inline

Definition at line 80 of file float_vector.h.

◆ operator+() [6/13]

__host__ __device__ float4 quda::operator+	(	const float4	x,
		const float4	y
	)

inline

Definition at line 87 of file float_vector.h.

◆ operator+() [7/13]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator+	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 854 of file complex_quda.h.

◆ operator+() [8/13]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator+	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 870 of file complex_quda.h.

◆ operator+() [9/13]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator+	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 876 of file complex_quda.h.

◆ operator+() [10/13]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator+ ( const complex< ValueType > & rhs )

inline

Definition at line 996 of file complex_quda.h.

◆ operator+() [11/13]

template<template< typename, int > class Mat, class T , int N>

__device__ __host__ Mat<T,N> quda::operator+	(	const Mat< T, N > &	a,
		const Mat< T, N > &	b
	)

inline

Definition at line 433 of file quda_matrix.h.

◆ operator+() [12/13]

template<typename ValueType >

__host__ __device__ complex<ValueType> quda::operator+	(	const volatile complex< ValueType > &	lhs,
		const volatile complex< ValueType > &	rhs
	)

inline

Definition at line 862 of file complex_quda.h.

◆ operator+() [13/13]

template<typename Float , int Nc, int Ns>

__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::operator+	(	const ColorSpinor< Float, Nc, Ns > &	x,
		const ColorSpinor< Float, Nc, Ns > &	y
	)

inline

ColorSpinor addition operator.

Parameters

[in]	x	Input vector
[in]	y	Input vector

Returns: The vector x + y

Definition at line 1023 of file color_spinor.h.

References quda::ColorSpinor< Float, Nc, Ns >::data, and s.

◆ operator+=() [1/8]

template<typename real , typename Link >

__device__ void quda::operator+=	(	real *	y,
		const Link &	x
	)

inline

Definition at line 47 of file clover_deriv.cuh.

◆ operator+=() [2/8]

__host__ __device__ float4 quda::operator+=	(	float4 &	x,
		const float4	y
	)

inline

Definition at line 96 of file float_vector.h.

◆ operator+=() [3/8]

__host__ __device__ float2 quda::operator+=	(	float2 &	x,
		const float2	y
	)

inline

Definition at line 104 of file float_vector.h.

◆ operator+=() [4/8]

__host__ __device__ double2 quda::operator+=	(	double2 &	x,
		const double2	y
	)

inline

Definition at line 110 of file float_vector.h.

◆ operator+=() [5/8]

__host__ __device__ double3 quda::operator+=	(	double3 &	x,
		const double3	y
	)

inline

Definition at line 116 of file float_vector.h.

◆ operator+=() [6/8]

__host__ __device__ double4 quda::operator+=	(	double4 &	x,
		const double4	y
	)

inline

Definition at line 123 of file float_vector.h.

◆ operator+=() [7/8]

template<template< typename, int > class Mat, class T , int N>

__device__ __host__ Mat<T,N> quda::operator+=	(	Mat< T, N > &	a,
		const Mat< T, N > &	b
	)

inline

Definition at line 443 of file quda_matrix.h.

◆ operator+=() [8/8]

template<template< typename, int > class Mat, class T , int N>

__device__ __host__ Mat<T,N> quda::operator+=	(	Mat< T, N > &	a,
		const T &	b
	)

inline

Definition at line 451 of file quda_matrix.h.

References Mat().

Here is the call graph for this function:

◆ operator-() [1/12]

__host__ __device__ double2 quda::operator-	(	const double2 &	x,
		const double2 &	y
	)

inline

Definition at line 28 of file float_vector.h.

◆ operator-() [2/12]

__host__ __device__ float2 quda::operator-	(	const float2 &	x,
		const float2 &	y
	)

inline

Definition at line 32 of file float_vector.h.

◆ operator-() [3/12]

__host__ __device__ float4 quda::operator-	(	const float4 &	x,
		const float4 &	y
	)

inline

Definition at line 36 of file float_vector.h.

◆ operator-() [4/12]

__host__ __device__ float2 quda::operator- ( const float2 & x )

inline

Definition at line 185 of file float_vector.h.

◆ operator-() [5/12]

__host__ __device__ double2 quda::operator- ( const double2 & x )

inline

Definition at line 189 of file float_vector.h.

◆ operator-() [6/12]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator-	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 883 of file complex_quda.h.

◆ operator-() [7/12]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator-	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 889 of file complex_quda.h.

◆ operator-() [8/12]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator-	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 895 of file complex_quda.h.

◆ operator-() [9/12]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator- ( const complex< ValueType > & rhs )

inline

Definition at line 1001 of file complex_quda.h.

◆ operator-() [10/12]

template<template< typename, int > class Mat, class T , int N>

__device__ __host__ Mat<T,N> quda::operator-	(	const Mat< T, N > &	a,
		const Mat< T, N > &	b
	)

inline

Definition at line 467 of file quda_matrix.h.

◆ operator-() [11/12]

template<template< typename, int > class Mat, class T , int N>

__device__ __host__ Mat<T,N> quda::operator- ( const Mat< T, N > & a )

inline

Definition at line 495 of file quda_matrix.h.

◆ operator-() [12/12]

template<typename Float , int Nc, int Ns>

__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::operator-	(	const ColorSpinor< Float, Nc, Ns > &	x,
		const ColorSpinor< Float, Nc, Ns > &	y
	)

inline

ColorSpinor subtraction operator.

Parameters

[in]	x	Input vector
[in]	y	Input vector

Returns: The vector x + y

Definition at line 1045 of file color_spinor.h.

References quda::ColorSpinor< Float, Nc, Ns >::data, and s.

◆ operator-=() [1/5]

template<typename real , typename Link >

__device__ void quda::operator-=	(	real *	y,
		const Link &	x
	)

inline

Definition at line 58 of file clover_deriv.cuh.

References axpy().

Here is the call graph for this function:

◆ operator-=() [2/5]

__host__ __device__ float4 quda::operator-=	(	float4 &	x,
		const float4	y
	)

inline

Definition at line 131 of file float_vector.h.

◆ operator-=() [3/5]

__host__ __device__ float2 quda::operator-=	(	float2 &	x,
		const float2	y
	)

inline

Definition at line 139 of file float_vector.h.

◆ operator-=() [4/5]

__host__ __device__ double2 quda::operator-=	(	double2 &	x,
		const double2	y
	)

inline

Definition at line 145 of file float_vector.h.

◆ operator-=() [5/5]

template<template< typename, int > class Mat, class T , int N>

__device__ __host__ Mat<T,N> quda::operator-=	(	Mat< T, N > &	a,
		const Mat< T, N > &	b
	)

inline

Definition at line 459 of file quda_matrix.h.

◆ operator/() [1/7]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator/	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 926 of file complex_quda.h.

References norm().

Here is the call graph for this function:

◆ operator/() [2/7]

template<>

__host__ __device__ complex< float > quda::operator/	(	const complex< float > &	lhs,
		const complex< float > &	rhs
	)

inline

Definition at line 935 of file complex_quda.h.

References quda::complex< float >::imag(), quda::complex< float >::real(), and s.

Here is the call graph for this function:

◆ operator/() [3/7]

template<>

__host__ __device__ complex< double > quda::operator/	(	const complex< double > &	lhs,
		const complex< double > &	rhs
	)

inline

Definition at line 952 of file complex_quda.h.

References quda::complex< double >::imag(), quda::complex< double >::real(), and s.

Here is the call graph for this function:

◆ operator/() [4/7]

template<typename ValueType >

__host__ __device__ complex<ValueType> quda::operator/	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 969 of file complex_quda.h.

◆ operator/() [5/7]

template<typename ValueType >

__host__ __device__ complex<ValueType> quda::operator/	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 976 of file complex_quda.h.

References norm().

Here is the call graph for this function:

◆ operator/() [6/7]

template<>

__host__ __device__ complex<float> quda::operator/	(	const float &	lhs,
		const complex< float > &	rhs
	)

inline

Definition at line 984 of file complex_quda.h.

◆ operator/() [7/7]

template<>

__host__ __device__ complex<double> quda::operator/	(	const double &	lhs,
		const complex< double > &	rhs
	)

inline

Definition at line 989 of file complex_quda.h.

◆ operator<<() [1/10]

template<typename Float , int nSpin, int nColor, bool spin_project>

std::ostream& quda::operator<<	(	std::ostream &	out,
		const PackArg< Float, nSpin, nColor, spin_project > &	arg
	)

Definition at line 21 of file dslash_pack2.cu.

References arg(), and out.

Here is the call graph for this function:

◆ operator<<() [2/10]

std::ostream & quda::operator<<	(	std::ostream &	output,
		const CloverFieldParam &	param
	)

Definition at line 427 of file clover_field.cpp.

References quda::CloverFieldParam::clover, quda::CloverFieldParam::cloverInv, quda::CloverFieldParam::create, quda::CloverFieldParam::csw, quda::CloverFieldParam::direct, quda::CloverFieldParam::inverse, quda::CloverFieldParam::invNorm, quda::CloverFieldParam::mu2, quda::CloverFieldParam::norm, quda::CloverFieldParam::order, param, quda::CloverFieldParam::rho, and quda::CloverFieldParam::twisted.

Referenced by quda::CloverFieldParam::CloverFieldParam(), quda::ColorSpinorField::Components(), quda::cudaColorSpinorField::Ghost2(), quda::LatticeFieldParam::LatticeFieldParam(), and quda::GaugeFieldParam::setPrecision().

Here is the caller graph for this function:

◆ operator<<() [3/10]

std::ostream & quda::operator<<	(	std::ostream &	output,
		const LatticeFieldParam &	param
	)

Definition at line 704 of file lattice_field.cpp.

References quda::LatticeFieldParam::ghostExchange, quda::LatticeFieldParam::GhostPrecision(), quda::LatticeFieldParam::nDim, quda::LatticeFieldParam::pad, quda::LatticeFieldParam::Precision(), quda::LatticeFieldParam::r, quda::LatticeFieldParam::scale, and quda::LatticeFieldParam::x.

Here is the call graph for this function:

◆ operator<<() [4/10]

std::ostream & quda::operator<<	(	std::ostream &	output,
		const GaugeFieldParam &	param
	)

Definition at line 282 of file gauge_field.cpp.

◆ operator<<() [5/10]

template<typename ValueType , class charT , class traits >

std::basic_ostream< charT, traits > & quda::operator<<	(	std::basic_ostream< charT, traits > &	os,
		const complex< ValueType > &	z
	)

Definition at line 310 of file complex_quda.h.

◆ operator<<() [6/10]

template<typename Float >

std::ostream& quda::operator<<	(	std::ostream &	out,
		const DslashArg< Float > &	arg
	)

Definition at line 300 of file dslash_helper.cuh.

References arg(), and out.

Here is the call graph for this function:

◆ operator<<() [7/10]

template<class T , int N>

std::ostream& quda::operator<<	(	std::ostream &	os,
		const Matrix< T, N > &	m
	)

Definition at line 833 of file quda_matrix.h.

◆ operator<<() [8/10]

template<class T , int N>

std::ostream& quda::operator<<	(	std::ostream &	os,
		const Array< T, N > &	a
	)

Definition at line 847 of file quda_matrix.h.

◆ operator<<() [9/10]

std::ostream& quda::operator<<	(	std::ostream &	out,
		const ColorSpinorField &	a
	)

Definition at line 860 of file color_spinor_field.cpp.

◆ operator<<() [10/10]

std::ostream& quda::operator<<	(	std::ostream &	out,
		const cudaColorSpinorField &	a
	)

Definition at line 1435 of file cuda_color_spinor_field.cpp.

References quda::cudaColorSpinorField::alloc, quda::cudaColorSpinorField::init, quda::ColorSpinorField::norm, out, and quda::ColorSpinorField::v.

◆ operator==() [1/3]

template<typename ValueType >

__host__ __device__ bool quda::operator==	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 1008 of file complex_quda.h.

◆ operator==() [2/3]

template<typename ValueType >

__host__ __device__ bool quda::operator==	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 1017 of file complex_quda.h.

◆ operator==() [3/3]

template<typename ValueType >

__host__ __device__ bool quda::operator==	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 1025 of file complex_quda.h.

◆ operator>>()

template<typename ValueType , typename charT , class traits >

std::basic_istream< charT, traits > & quda::operator>>	(	std::basic_istream< charT, traits > &	is,
		complex< ValueType > &	z
	)

Definition at line 318 of file complex_quda.h.

◆ orthoDir()

void quda::orthoDir	(	Complex **	beta,
		std::vector< ColorSpinorField *>	Ap,
		int	k,
		int	pipeline
	)

Definition at line 95 of file inv_gcr_quda.cpp.

References quda::blas::caxpy(), quda::blas::caxpyDotzy(), quda::blas::cDotProduct(), computeBeta(), pipeline, and updateAp().

Referenced by quda::GCR::operator()().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ outerProd() [1/2]

template<class T , int N>

__device__ __host__ void quda::outerProd	(	const Array< T, N > &	a,
		const Array< T, N > &	b,
		Matrix< T, N > *	m
	)

inline

Definition at line 805 of file quda_matrix.h.

References conj().

Referenced by constructHHMat().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ outerProd() [2/2]

template<class T , int N>

__device__ __host__ void quda::outerProd	(	const T(&)	a[N],
		const T(&)	b[N],
		Matrix< T, N > *	m
	)

inline

Definition at line 818 of file quda_matrix.h.

References conj().

Here is the call graph for this function:

◆ outerProdSpinTrace()

template<typename Float , int Nc, int Ns>

__device__ __host__ Matrix<complex<Float>, Nc> quda::outerProdSpinTrace	(	const ColorSpinor< Float, Nc, Ns > &	a,
		const ColorSpinor< Float, Nc, Ns > &	b
	)

inline

Compute the outer product over color and take the spin trace out(j,i) = a(s,j) * conj (b(s,i))

Parameters

a	Left-hand side ColorSpinor
b	Right-hand side ColorSpinor

Returns: The spin traced matrix

Definition at line 985 of file color_spinor.h.

References out, and s.

Referenced by sigmaOprod().

Here is the caller graph for this function:

◆ OvrImpSTOUTStep() [1/3]

void quda::OvrImpSTOUTStep	(	GaugeField &	dataDs,
		const GaugeField &	dataOr,
		double	rho,
		double	epsilon
	)

Apply Over Improved STOUT smearing to the gauge field.

Parameters

[out]	dataDs	Output smeared field
[in]	dataOr	Input gauge field
[in]	rho	smearing parameter
[in]	epsilon	smearing parameter

Definition at line 269 of file gauge_stout.cu.

References epsilon, errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().

Referenced by OvrImpSTOUTStep(), and performOvrImpSTOUTnStep().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ OvrImpSTOUTStep() [2/3]

template<typename Float , typename GaugeOr , typename GaugeDs >

void quda::OvrImpSTOUTStep	(	GaugeOr	origin,
		GaugeDs	dest,
		const GaugeField &	dataOr,
		Float	rho,
		Float	epsilon
	)

Definition at line 208 of file gauge_stout.cu.

References arg(), DOUBLE_TOL, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, qudaDeviceSynchronize, and SINGLE_TOL.

Here is the call graph for this function:

◆ OvrImpSTOUTStep() [3/3]

template<typename Float >

void quda::OvrImpSTOUTStep	(	GaugeField &	dataDs,
		const GaugeField &	dataOr,
		Float	rho,
		Float	epsilon
	)

Definition at line 217 of file gauge_stout.cu.

References errorQuda, OvrImpSTOUTStep(), QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_NO, and quda::GaugeField::Reconstruct().

Here is the call graph for this function:

◆ pack()

template<bool dagger, int twist, int dim, QudaPCType pc, typename Arg >

__device__ __host__ void quda::pack	(	Arg &	arg,
		int	ghost_idx,
		int	s,
		int	parity
	)

inline

Definition at line 83 of file dslash_pack.cuh.

References arg(), quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::dagger, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::in, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::nFace, quda::Arg< real, Ns, Nc, order >::nParity, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::parity, QUDA_5D_PC, and quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::twist.

Referenced by quda::dslash::issuePack(), and PackGhost().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ packGhost()

template<typename Float , bool block_float, int Ns, int Ms, int Nc, int Mc, int nDim, int dim, int dir, typename Arg >

__device__ __host__ __forceinline__ void quda::packGhost	(	Arg &	arg,
		int	x_cb,
		int	parity,
		int	spinor_parity,
		int	spin_block,
		int	color_block
	)

Definition at line 95 of file color_spinor_pack.cuh.

References arg(), getCoords(), getCoords5(), quda::PackGhostArg< Field >::nDim, quda::PackGhostArg< Field >::parity, and s.

Here is the call graph for this function:

◆ PackGhost() [1/3]

template<typename Float , int nColor>

void quda::PackGhost	(	void *	ghost[],
		const ColorSpinorField &	in,
		MemoryLocation	location,
		int	nFace,
		bool	dagger,
		int	parity,
		bool	spin_project,
		double	a,
		double	b,
		double	c,
		const cudaStream_t &	stream
	)

Definition at line 342 of file dslash_pack2.cu.

References quda::Pack< Float, nColor, spin_project >::apply(), and pack().

Here is the call graph for this function:

◆ PackGhost() [2/3]

template<typename Float >

void quda::PackGhost	(	void *	ghost[],
		const ColorSpinorField &	in,
		MemoryLocation	location,
		int	nFace,
		bool	dagger,
		int	parity,
		bool	spin_project,
		double	a,
		double	b,
		double	c,
		const cudaStream_t &	stream
	)

Definition at line 356 of file dslash_pack2.cu.

Here is the call graph for this function:

◆ PackGhost() [3/3]

void quda::PackGhost	(	void *	ghost[2 *QUDA_MAX_DIM],
		const ColorSpinorField &	field,
		MemoryLocation	location,
		int	nFace,
		bool	dagger,
		int	parity,
		bool	spin_project,
		double	a,
		double	b,
		double	c,
		const cudaStream_t &	stream
	)

Dslash face packing routine.

Parameters

[out]	ghost_buf	Array of packed halos, order is [2*dim+dir]
[in]	field	ColorSpinorField to be packed
[in]	location	Locations where the packed fields are (Device, Host and/or Remote)
[in]	nFace	Depth of halo
[in]	dagger	Whether this is for the dagger operator
[in]	parity	Field parity
[in]	spin_project	Whether to spin_project when packing
[in]	a	Twisted mass scale factor (for preconditioned twisted-mass dagger operator)
[in]	b	Twisted mass chiral twist factor (for preconditioned twisted-mass dagger operator)
[in]	c	Twisted mass flavor twist factor (for preconditioned non degenerate twisted-mass dagger operator)
[in]	stream	Which stream are we executing in

Definition at line 367 of file dslash_pack2.cu.

Referenced by quda::cudaColorSpinorField::packGhost().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ packKernel()

template<bool dagger, int twist, QudaPCType pc, typename Arg >

__global__ void quda::packKernel ( Arg arg )

Definition at line 184 of file dslash_pack.cuh.

References arg(), dimFromFaceIndex(), quda::Arg< real, Ns, Nc, order >::nParity, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::parity, QUDA_5D_PC, s, and quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::sites_per_block.

Here is the call graph for this function:

◆ packShmemKernel()

template<bool dagger, int twist, QudaPCType pc, typename Arg >

__global__ void quda::packShmemKernel ( Arg arg )

Definition at line 222 of file dslash_pack.cuh.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::parity, QUDA_5D_PC, and s.

Here is the call graph for this function:

◆ packSpinor()

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >

void quda::packSpinor	(	OutOrder &	outOrder,
		const InOrder &	inOrder,
		int	volume
	)

CPU function to reorder spinor fields.

Definition at line 22 of file copy_color_spinor_mg.cuh.

References s.

◆ packSpinorKernel()

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >

__global__ void quda::packSpinorKernel	(	OutOrder	outOrder,
		const InOrder	inOrder,
		int	volume
	)

CUDA kernel to reorder spinor fields. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 34 of file copy_color_spinor_mg.cuh.

References s.

◆ packStaggered()

template<int dim, int nFace = 1, typename Arg >

__device__ __host__ void quda::packStaggered	(	Arg &	arg,
		int	ghost_idx,
		int	s,
		int	parity
	)

inline

Definition at line 154 of file dslash_pack.cuh.

References arg(), quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::in, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::nFace, quda::Arg< real, Ns, Nc, order >::nParity, and quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::parity.

Here is the call graph for this function:

◆ packStaggeredKernel()

template<typename Arg >

__global__ void quda::packStaggeredKernel ( Arg arg )

Definition at line 288 of file dslash_pack.cuh.

References arg(), dimFromFaceIndex(), quda::Arg< real, Ns, Nc, order >::nParity, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::parity, s, and quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::sites_per_block.

Here is the call graph for this function:

◆ packStaggeredShmemKernel()

template<typename Arg >

__global__ void quda::packStaggeredShmemKernel ( Arg arg )

Definition at line 325 of file dslash_pack.cuh.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::parity, and s.

Here is the call graph for this function:

◆ PCType_() [1/2]

QudaPCType quda::PCType_	(	const char *	func,
		const char *	file,
		int	line,
		const ColorSpinorField &	a,
		const ColorSpinorField &	b
	)

inline

Helper function for determining if the preconditioning type of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field

Returns: If PCType is unique return this

Definition at line 1011 of file color_spinor_field.h.

References errorQuda, quda::ColorSpinorField::PCType(), and QUDA_PC_INVALID.

Referenced by PCType_().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ PCType_() [2/2]

template<typename... Args>

QudaPCType quda::PCType_	(	const char *	func,
		const char *	file,
		int	line,
		const ColorSpinorField &	a,
		const ColorSpinorField &	b,
		const Args &...	args
	)

inline

Helper function for determining if the precision of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field
[in]	args	List of additional fields to check precision on

Returns: If precision is unique return the precision

Definition at line 1030 of file color_spinor_field.h.

References PCType_().

Here is the call graph for this function:

◆ PGaugeExchange()

void quda::PGaugeExchange	(	cudaGaugeField &	data,
		const int	dir,
		const int	parity
	)

Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps.

Parameters

[in,out]	data	Gauge field
[in,out]	rngstate	state of the CURAND random number generator
[in]	Beta	inverse of the gauge coupling, beta = 2 Nc / g_0^2
[in]	nhb	number of heatbath steps
[in]	nover	number of overrelaxation steps

Definition at line 342 of file pgauge_exchange.cu.

References comm_dim_partitioned(), errorQuda, parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Here is the call graph for this function:

◆ PGaugeExchangeFree()

void quda::PGaugeExchangeFree ( )

Release all allocated memory used to exchange data between nodes.

Referenced by main(), and GaugeAlgTest::TearDown().

Here is the caller graph for this function:

◆ pinned_allocated_peak()

long quda::pinned_allocated_peak ( )

Returns: peak pinned memory allocated

Definition at line 61 of file malloc.cpp.

References PINNED.

◆ pinned_malloc_()

void * quda::pinned_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Allocate page-locked ("pinned") host memory. This function should only be called via the pinned_malloc() macro, defined in malloc_quda.h

Note that we do not rely on cudaHostAlloc(), since buffers allocated in this way have been observed to cause problems when shared with MPI via GPU Direct on some systems.

Definition at line 250 of file malloc.cpp.

References aligned_malloc(), quda::MemAlloc::base_size, errorQuda, memset(), PINNED, and track_malloc().

Referenced by quda::pool::pinned_malloc_().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ plaquette() [1/4]

double3 quda::plaquette ( const GaugeField & U )

Compute the plaquette of the gauge field.

Parameters

[in] U The gauge field upon which to compute the plaquette

Returns: double3 variable returning (plaquette, spatial plaquette, temporal plaquette) site averages normalized such that each plaquette is in the range [0,1]

Definition at line 65 of file gauge_plaq.cu.

References INSTANTIATE_PRECISION, and quda::LatticeField::Location().

Referenced by main(), performAPEnStep(), performOvrImpSTOUTnStep(), performSTOUTnStep(), plaqQuda(), GaugeAlgTest::SetUp(), and TEST_F().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ plaquette() [2/4]

template<typename Float , typename Arg >

__device__ double quda::plaquette	(	Arg &	arg,
		int	x[],
		int	parity,
		int	mu,
		int	nu
	)

inline

Definition at line 32 of file gauge_plaq.cuh.

References conj(), getTrace(), linkIndexShift(), and mu.

Here is the call graph for this function:

◆ plaquette() [3/4]

template<typename Float , typename Gauge >

void quda::plaquette	(	const Gauge	dataOr,
		const GaugeField &	data,
		double2 &	plq,
		QudaFieldLocation	location
	)

Definition at line 51 of file gauge_plaq.cu.

References quda::GaugePlaq< Float, Gauge >::apply(), quda::GaugePlaq< Float, Gauge >::arg, comm_allreduce_array(), comm_size(), qudaDeviceSynchronize, quda::ReduceArg< double2 >::result_h, and quda::GaugePlaqArg< Gauge >::threads.

Here is the call graph for this function:

◆ plaquette() [4/4]

template<typename Float >

void quda::plaquette	(	const GaugeField &	data,
		double2 &	plq,
		QudaFieldLocation	location
	)

Definition at line 61 of file gauge_plaq.cu.

References INSTANTIATE_RECONSTRUCT.

◆ point()

template<class T >

void quda::point	(	T &	t,
		int	x,
		int	s,
		int	c
	)

Create a point source at spacetime point x, spin s and colour c

Definition at line 31 of file color_spinor_util.cu.

Referenced by genericSource().

Here is the caller graph for this function:

◆ polar() [1/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::polar	(	const ValueType &	m,
		const ValueType &	theta = `0`
	)

inline

Returns the complex with magnitude m and angle theta in radians.

Definition at line 1098 of file complex_quda.h.

References cos(), and sin().

Referenced by construct_fat_long_gauge_field(), exp(), and sqrt().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ polar() [2/3]

template<>

__host__ __device__ complex<float> quda::polar	(	const float &	magnitude,
		const float &	angle
	)

inline

Definition at line 1104 of file complex_quda.h.

◆ polar() [3/3]

template<>

__host__ __device__ complex<double> quda::polar	(	const double &	magnitude,
		const double &	angle
	)

inline

Definition at line 1110 of file complex_quda.h.

References cos(), and sin().

Here is the call graph for this function:

◆ polarSu3()

template<typename Float >

__host__ __device__ void quda::polarSu3	(	Matrix< complex< Float >, 3 > &	in,
		Float	tol
	)

inline

Project the input matrix on the SU(3) group. First unitarize the matrix and then project onto the special unitary group.

Parameters

in	The input matrix to which we're projecting
tol	Tolerance to which this check is applied

Definition at line 87 of file su3_project.cuh.

References arg(), checkUnitary(), conj(), getDeterminant(), in, inverse(), mod(), norm(), out, and pow().

Here is the call graph for this function:

◆ policies()

static std::vector<DslashCoarsePolicy> quda::policies	(	static_cast< int >	DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED,
		DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED
	)

static

Referenced by quda::DslashCoarsePolicyTune::advanceAux(), quda::DslashCoarsePolicyTune::apply(), disable_policy(), quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), and enable_policy().

Here is the caller graph for this function:

◆ policyTuning()

bool quda::policyTuning ( )

Definition at line 495 of file tune.cpp.

References policy_tuning.

Referenced by tuneLaunch().

Here is the caller graph for this function:

◆ popKernelPackT()

void quda::popKernelPackT ( )

Definition at line 42 of file dslash_quda.cu.

References errorQuda, and setKernelPackT().

Referenced by quda::dslash::DslashPolicyTune< Dslash >::apply(), ApplyCovDev(), ApplyDomainWall5D(), ApplyNdegTwistedMassPreconditioned(), ApplyTwistedMassPreconditioned(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), quda::cudaColorSpinorField::exchangeGhost(), quda::cudaColorSpinorField::sendGhost(), and quda::cudaColorSpinorField::sendStart().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ postTrace_()

void quda::postTrace_	(	const char *	func,
		const char *	file,
		int	line
	)

Post an event in the trace, recording where it was posted.

Definition at line 92 of file tune.cpp.

References quda::TuneKey::aux_n, i32toa(), quda::TraceKey::key, tmp, and traceEnabled().

Referenced by quda::TunableVectorYZ::resizeStep().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ pow() [1/6]

template<typename ValueType , typename ExponentType >

__host__ __device__ ValueType quda::pow	(	ValueType	x,
		ExponentType	e
	)

inline

Definition at line 111 of file complex_quda.h.

References pow().

Referenced by __fast_pow(), checkGauge(), comm_declare_receive_displaced(), comm_declare_send_displaced(), comm_declare_strided_receive_displaced(), comm_declare_strided_send_displaced(), compare_mom(), compareLink(), compareSpinor(), quda::Dslash5Arg< Float, nColor >::Dslash5Arg(), dslashReference_5th_inv(), exponentiate_iQ(), insertNoise(), invertMultiShiftQuda(), massRescale(), MatDagMatQuda(), quda::CG::operator()(), quda::MultiShiftCG::operator()(), polarSu3(), TEST(), and TEST_P().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ pow() [2/6]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::pow	(	const complex< ValueType > &	z,
		const int &	n
	)

inline

Definition at line 1208 of file complex_quda.h.

References exp(), and log().

Here is the call graph for this function:

◆ pow() [3/6]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::pow	(	const complex< ValueType > &	z,
		const ValueType &	x
	)

inline

Definition at line 1184 of file complex_quda.h.

References exp(), and log().

Here is the call graph for this function:

◆ pow() [4/6]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::pow	(	const complex< ValueType > &	z,
		const complex< ValueType > &	z2
	)

inline

Definition at line 1190 of file complex_quda.h.

References exp(), and log().

Here is the call graph for this function:

◆ pow() [5/6]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::pow	(	const ValueType &	x,
		const complex< ValueType > &	z
	)

inline

Definition at line 1196 of file complex_quda.h.

References exp(), and log().

Here is the call graph for this function:

◆ pow() [6/6]

template<>

__host__ __device__ complex<float> quda::pow	(	const float &	x,
		const complex< float > &	exponent
	)

inline

Definition at line 1202 of file complex_quda.h.

References exp().

Referenced by pow().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ Precision_() [1/2]

QudaPrecision quda::Precision_	(	const char *	func,
		const char *	file,
		int	line,
		const LatticeField &	a,
		const LatticeField &	b
	)

inline

Helper function for determining if the precision of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field

Returns: If precision is unique return the precision

Definition at line 672 of file lattice_field.h.

References errorQuda, quda::LatticeFieldParam::precision, quda::LatticeField::Precision(), and QUDA_INVALID_PRECISION.

Referenced by Precision_().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ Precision_() [2/2]

template<typename... Args>

QudaPrecision quda::Precision_	(	const char *	func,
		const char *	file,
		int	line,
		const LatticeField &	a,
		const LatticeField &	b,
		const Args &...	args
	)

inline

Helper function for determining if the precision of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field
[in]	args	List of additional fields to check precision on

Returns: If precision is unique return the precision

Definition at line 689 of file lattice_field.h.

References Precision_().

Here is the call graph for this function:

◆ print()

void quda::print	(	const double	d[],
		int	n
	)

Definition at line 44 of file inv_mpcg_quda.cpp.

Referenced by quda::MPBiCGstab::computeMatrixPowers().

Here is the caller graph for this function:

◆ print_alloc()

static void quda::print_alloc ( AllocType type )

static

Definition at line 85 of file malloc.cpp.

References quda::MemAlloc::base_size, quda::MemAlloc::file, quda::MemAlloc::func, quda::MemAlloc::line, and printfQuda.

Referenced by assertAllMemFree().

Here is the caller graph for this function:

◆ print_alloc_header()

static void quda::print_alloc_header ( )

static

Definition at line 78 of file malloc.cpp.

References printfQuda.

Referenced by assertAllMemFree().

Here is the caller graph for this function:

◆ print_trace()

static void quda::print_trace ( void )

static

Definition at line 67 of file malloc.cpp.

References printfQuda, and quda::MemAlloc::size.

Referenced by host_free_().

Here is the caller graph for this function:

◆ print_vector()

template<class Order >

void quda::print_vector	(	const Order &	o,
		unsigned int	x
	)

Definition at line 321 of file color_spinor_util.cu.

References parity, and printfQuda.

Referenced by genericPrintVector().

Here is the caller graph for this function:

◆ printAPIProfile()

void quda::printAPIProfile ( )

Print out the timer profile for CUDA API calls.

Definition at line 336 of file quda_cuda_api.cpp.

Referenced by endQuda().

Here is the caller graph for this function:

◆ printLaunchTimer()

void quda::printLaunchTimer ( )

Definition at line 843 of file tune.cpp.

References quda::TimeProfile::Print().

Referenced by endQuda(), and profilerStop().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ printLink()

template<class Cmplx >

__host__ __device__ void quda::printLink ( const Matrix< Cmplx, 3 > & link )

inline

Definition at line 1149 of file quda_matrix.h.

Referenced by applyStaggered(), and isUnitary().

Here is the caller graph for this function:

◆ printPeakMemUsage()

void quda::printPeakMemUsage ( )

Definition at line 375 of file malloc.cpp.

References DEVICE, DEVICE_PINNED, and printfQuda.

Referenced by endQuda().

Here is the caller graph for this function:

◆ projectSU3()

void quda::projectSU3	(	cudaGaugeField &	U,
		double	tol,
		int *	fails
	)

Project the input gauge field onto the SU(3) group. This is a destructive operation. The number of link failures is reported so appropriate action can be taken.

Parameters

U	Gauge field that we are projecting onto SU(3)
tol	Tolerance to which the iterative algorithm works
fails	Number of link failures (device pointer)

Definition at line 590 of file unitarize_links_quda.cu.

References quda::ProjectSU3< Float, G >::apply(), arg(), checkCudaError, errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, qudaDeviceSynchronize, quda::GaugeField::Reconstruct(), quda::GaugeField::StaggeredPhaseApplied(), and tol.

Referenced by projectSU3Quda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ ProjectSU3kernel()

template<typename Float , typename G >

__global__ void quda::ProjectSU3kernel ( ProjectSU3Arg< Float, G > arg )

Definition at line 533 of file unitarize_links_quda.cu.

References atomicAdd(), quda::ProjectSU3Arg< Float, G >::fails, mu, parity, quda::ProjectSU3Arg< Float, G >::threads, quda::ProjectSU3Arg< Float, G >::tol, and quda::ProjectSU3Arg< Float, G >::u.

Here is the call graph for this function:

◆ Prolongate()

void quda::Prolongate	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const ColorSpinorField &	v,
		int	Nvec,
		const int *	fine_to_coarse,
		const int const	spin_map,
		int	parity = `QUDA_INVALID_PARITY`
	)

Apply the prolongation operator.

Parameters

[out]	out	Resulting fine grid field
[in]	in	Input field on coarse grid
[in]	v	Matrix field containing the null-space components
[in]	Nvec	Number of null-space components
[in]	fine_to_coarse	Fine-to-coarse lookup table (linear indices)
[in]	spin_map	Spin blocking lookup table
[in]	parity	of the output fine field (if single parity output field)

Definition at line 296 of file prolongator.cu.

References checkCudaError, checkLocation, checkPrecision, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::Transfer::P(), and quda::Transfer::setTransferGPU().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ pushKernelPackT()

void quda::pushKernelPackT ( bool pack )

Definition at line 30 of file dslash_quda.cu.

References getKernelPackT(), setKernelPackT(), and warningQuda.

Referenced by quda::dslash::DslashPolicyTune< Dslash >::apply(), ApplyCovDev(), ApplyDomainWall5D(), ApplyNdegTwistedMassPreconditioned(), ApplyTwistedMassPreconditioned(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), quda::cudaColorSpinorField::exchangeGhost(), quda::cudaColorSpinorField::sendGhost(), and quda::cudaColorSpinorField::sendStart().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ qChargeComputeKernel()

template<int blockSize, typename Float , typename Arg >

__global__ void quda::qChargeComputeKernel ( Arg arg )

Definition at line 28 of file gauge_qcharge.cuh.

References arg(), getTrace(), parity, and Pi2.

Here is the call graph for this function:

◆ qudaDeviceSynchronize_()

cudaError_t quda::qudaDeviceSynchronize_	(	const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize.

Definition at line 306 of file quda_cuda_api.cpp.

References errorQuda, PROFILE, QUDA_PROFILE_DEVICE_SYNCHRONIZE, and QUDA_PROFILE_FUNC_SET_ATTRIBUTE.

◆ qudaEventQuery()

cudaError_t quda::qudaEventQuery ( cudaEvent_t & event )

Wrapper around cudaEventQuery or cuEventQuery.

Parameters

[in] event Event we are querying

Returns: Status of event query

Definition at line 209 of file quda_cuda_api.cpp.

References errorQuda, PROFILE, and QUDA_PROFILE_EVENT_QUERY.

Referenced by quda::blas::multiReduceLaunch(), quda::dslash::DslashBasic< Dslash >::operator()(), quda::dslash::DslashFusedExterior< Dslash >::operator()(), quda::dslash::DslashGDRRecv< Dslash >::operator()(), quda::dslash::DslashFusedGDRRecv< Dslash >::operator()(), and quda::blas::reduceLaunch().

Here is the caller graph for this function:

◆ qudaEventRecord()

cudaError_t quda::qudaEventRecord	(	cudaEvent_t &	event,
		cudaStream_t	stream = `0`
	)

Wrapper around cudaEventRecord or cuEventRecord.

Parameters

[in,out]	event	Event we are recording
[in,out]	stream	Stream where to record the event

Definition at line 230 of file quda_cuda_api.cpp.

References errorQuda, PROFILE, and QUDA_PROFILE_EVENT_RECORD.

Here is the caller graph for this function:

◆ qudaEventSynchronize()

cudaError_t quda::qudaEventSynchronize ( cudaEvent_t & event )

Wrapper around cudaEventSynchronize or cuEventSynchronize.

Parameters

[in] event Event which we are synchronizing with respect to

Definition at line 287 of file quda_cuda_api.cpp.

References errorQuda, PROFILE, and QUDA_PROFILE_EVENT_SYNCHRONIZE.

Referenced by quda::cudaGaugeField::commsComplete().

Here is the caller graph for this function:

◆ qudaLaunchKernel()

cudaError_t quda::qudaLaunchKernel	(	const void *	func,
		dim3	gridDim,
		dim3	blockDim,
		void **	args,
		size_t	sharedMem,
		cudaStream_t	stream
	)

Wrapper around cudaLaunchKernel.

Parameters

[in]	func	Device function symbol
[in]	gridDim	Grid dimensions
[in]	blockDim	Block dimensions
[in]	args	Arguments
[in]	sharedMem	Shared memory requested per thread block
[in]	stream	Stream identifier

Definition at line 201 of file quda_cuda_api.cpp.

References activeTuning(), errorQuda, PROFILE, and QUDA_PROFILE_LAUNCH_KERNEL.

Referenced by quda::Dslash< Float >::launch(), quda::Dslash5< Float, nColor, Arg >::launch(), and quda::Pack< Float, nColor, spin_project >::launch().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ qudaMemcpy2DAsync_()

void quda::qudaMemcpy2DAsync_	(	void *	dst,
		size_t	dpitch,
		const void *	src,
		size_t	spitch,
		size_t	width,
		size_t	hieght,
		cudaMemcpyKind	kind,
		const cudaStream_t &	stream,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaMemcpy2DAsync or driver API equivalent Potentially add auto-profiling support.

Parameters

[out]	dst	Destination pointer
[in]	dpitch	Destination pitch
[in]	src	Source pointer
[in]	spitch	Source pitch
[in]	width	Width in bytes
[in]	height	Number of rows
[in]	kind	Type of memory copy
[in]	stream	Stream to issue copy

Definition at line 170 of file quda_cuda_api.cpp.

References quda::QudaMemCopy::dst, errorQuda, param, PROFILE, and QUDA_PROFILE_MEMCPY2D_D2H_ASYNC.

◆ qudaMemcpy_()

void quda::qudaMemcpy_	(	void *	dst,
		const void *	src,
		size_t	count,
		cudaMemcpyKind	kind,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaMemcpy used for auto-profiling. Do not call directly, rather call macro below which will grab the location of the call.

Parameters

[out]	dst	Destination pointer
[in]	src	Source pointer
[in]	count	Size of transfer
[in]	kind	Type of memory copy

Definition at line 126 of file quda_cuda_api.cpp.

References quda::QudaMemCopy::apply(), copy(), and errorQuda.

Here is the call graph for this function:

◆ qudaMemcpyAsync_()

void quda::qudaMemcpyAsync_	(	void *	dst,
		const void *	src,
		size_t	count,
		cudaMemcpyKind	kind,
		const cudaStream_t &	stream,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaMemcpyAsync or driver API equivalent Potentially add auto-profiling support.

Parameters

[out]	dst	Destination pointer
[in]	src	Source pointer
[in]	count	Size of transfer
[in]	kind	Type of memory copy
[in]	stream	Stream to issue copy

Definition at line 140 of file quda_cuda_api.cpp.

References quda::QudaMemCopy::apply(), copy(), errorQuda, PROFILE, QUDA_PROFILE_MEMCPY_D2D_ASYNC, QUDA_PROFILE_MEMCPY_D2H_ASYNC, and QUDA_PROFILE_MEMCPY_H2D_ASYNC.

Here is the call graph for this function:

◆ qudaStreamSynchronize()

cudaError_t quda::qudaStreamSynchronize ( cudaStream_t & stream )

Wrapper around cudaStreamSynchronize or cuStreamSynchronize.

Parameters

[in] stream Stream which we are synchronizing with respect to

Definition at line 268 of file quda_cuda_api.cpp.

References errorQuda, PROFILE, and QUDA_PROFILE_STREAM_SYNCHRONIZE.

Referenced by quda::cudaGaugeField::exchangeGhost(), quda::cudaGaugeField::injectGhost(), quda::dslash::DslashZeroCopyPack< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopyPack< Dslash >::operator()(), quda::dslash::DslashZeroCopyPackGDRRecv< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopyPackGDRRecv< Dslash >::operator()(), quda::dslash::DslashZeroCopy< Dslash >::operator()(), and quda::dslash::DslashFusedZeroCopy< Dslash >::operator()().

Here is the caller graph for this function:

◆ qudaStreamWaitEvent()

cudaError_t quda::qudaStreamWaitEvent	(	cudaStream_t	stream,
		cudaEvent_t	event,
		unsigned int	flags
	)

Wrapper around cudaEventRecord or cuEventRecord.

Parameters

[in,out]	stream	Stream which we are instructing to waitç∂
[in]	event	Event we are waiting on
[in]	flags	Flags to pass to function

Definition at line 249 of file quda_cuda_api.cpp.

References errorQuda, PROFILE, and QUDA_PROFILE_STREAM_WAIT_EVENT.

Here is the caller graph for this function:

◆ r_slant()

constexpr const char* quda::r_slant ( const char * str )

inline

Definition at line 49 of file malloc_quda.h.

Referenced by file_name().

Here is the caller graph for this function:

◆ random()

template<class T >

void quda::random ( T & t )

Random number insertion over all field elements

Definition at line 14 of file color_spinor_util.cu.

References comm_drand(), parity, and s.

Referenced by genericSource().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ Random() [1/2]

template<class Real >

__device__ Real quda::Random	(	cuRNGState &	state,
		Real	a,
		Real	b
	)

inline

Return a random number between a and b.

Parameters

state	curand rng state
a	lower range
b	upper range

Returns: random number in range a,b

Definition at line 75 of file random_quda.h.

◆ Random() [2/2]

template<class Real >

__device__ Real quda::Random ( cuRNGState & state )

inline

Return a random number between 0 and 1.

Parameters

state curand rng state

Returns: random number in range 0,1

Definition at line 96 of file random_quda.h.

◆ Random< double >() [1/2]

template<>

__device__ double quda::Random< double >	(	cuRNGState &	state,
		double	a,
		double	b
	)

inline

Definition at line 86 of file random_quda.h.

◆ Random< double >() [2/2]

template<>

__device__ double quda::Random< double > ( cuRNGState & state )

inline

Definition at line 107 of file random_quda.h.

◆ Random< float >() [1/2]

template<>

__device__ float quda::Random< float >	(	cuRNGState &	state,
		float	a,
		float	b
	)

inline

Definition at line 81 of file random_quda.h.

◆ Random< float >() [2/2]

template<>

__device__ float quda::Random< float > ( cuRNGState & state )

inline

Definition at line 102 of file random_quda.h.

◆ reduce()

template<int block_size, typename T , bool do_sum = true, typename Reducer = cub::Sum>

__device__ void quda::reduce	(	ReduceArg< T >	arg,
		const T &	in,
		const int	idx = `0`
	)

inline

Definition at line 137 of file cub_helper.cuh.

References arg(), and in.

Referenced by quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply(), quda::blas::multiReduce(), and quda::blas::nativeReduce().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ reduce2d()

template<int block_size_x, int block_size_y, typename T , bool do_sum = true, typename Reducer = cub::Sum>

__device__ void quda::reduce2d	(	ReduceArg< T >	arg,
		const T &	in,
		const int	idx = `0`
	)

inline

Definition at line 94 of file cub_helper.cuh.

References quda::ReduceArg< T >::partial, quda::ReduceArg< T >::result_d, sum(), and zero().

Here is the call graph for this function:

◆ reduceRow()

template<int block_size_x, int block_size_y, typename T >

__device__ void quda::reduceRow	(	ReduceArg< T >	arg,
		const T &	in
	)

inline

Definition at line 207 of file cub_helper.cuh.

References quda::vector_type< scalar, n >::data, quda::ColorSpinorField::exchange(), in, quda::ReduceArg< T >::partial, quda::ReduceArg< T >::result_d, and sum().

Here is the call graph for this function:

◆ reliable()

int quda::reliable	(	double &	rNorm,
		double &	maxrx,
		double &	maxrr,
		const double &	r2,
		const double &	delta
	)

Definition at line 37 of file inv_bicgstab_quda.cpp.

References sqrt(), and updateR().

Referenced by quda::BiCGstab::operator()(), and quda::MultiShiftCG::operator()().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ reorder_location()

QudaFieldLocation quda::reorder_location ( )

Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION.

Returns: Reorder location

Definition at line 725 of file lattice_field.cpp.

References reorder_location_.

Referenced by quda::cudaCloverField::copy(), quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), quda::cudaColorSpinorField::loadSpinorField(), quda::cudaGaugeField::saveCPUField(), and quda::cudaColorSpinorField::saveSpinorField().

Here is the caller graph for this function:

◆ reorder_location_set()

void quda::reorder_location_set ( QudaFieldLocation reorder_location_ )

Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION.

Parameters

reorder_location_ The location to set where data will be reordered

Definition at line 726 of file lattice_field.cpp.

Referenced by initQudaDevice().

Here is the caller graph for this function:

◆ report()

static void quda::report ( const char * type )

static

Definition at line 9 of file solver.cpp.

References getVerbosity(), printfQuda, and QUDA_VERBOSE.

Referenced by quda::Solver::create().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ rescaleY()

template<typename Float , int nSpin, int nColor, typename Arg >

__device__ __host__ void quda::rescaleY	(	Arg &	arg,
		int	parity,
		int	x_cb,
		int	c_row,
		int	c_col
	)

Rescale the matrix elements by arg.rescale

Definition at line 1167 of file coarse_op_kernel.cuh.

◆ RescaleYCPU()

template<typename Float , int nSpin, int nColor, typename Arg >

void quda::RescaleYCPU ( Arg & arg )

Definition at line 1181 of file coarse_op_kernel.cuh.

References arg(), nColor, and parity.

Here is the call graph for this function:

◆ RescaleYGPU()

template<typename Float , int nSpin, int nColor, typename Arg >

__global__ void quda::RescaleYGPU ( Arg arg )

Definition at line 1195 of file coarse_op_kernel.cuh.

References arg(), nColor, and parity.

Here is the call graph for this function:

◆ Restrict() [1/2]

template<typename Float , int fineSpin, int fineColor, int coarseSpin, int coarseColor, int coarse_colors_per_thread, typename Arg >

void quda::Restrict ( Arg arg )

Definition at line 90 of file restrictor.cuh.

References quda::Arg< real, Ns, Nc, order >::nParity, parity, s, and tmp.

Referenced by quda::Transfer::R(), and quda::Transfer::setTransferGPU().

Here is the caller graph for this function:

◆ Restrict() [2/2]

void quda::Restrict	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const ColorSpinorField &	v,
		int	Nvec,
		const int *	fine_to_coarse,
		const int *	coarse_to_fine,
		const int const	spin_map,
		int	parity = `QUDA_INVALID_PARITY`
	)

Apply the restriction operator.

Parameters

[out]	out	Resulting coarsened field
[in]	in	Input field on fine grid
[in]	v	Matrix field containing the null-space components
[in]	Nvec	Number of null-space components
[in]	fine_to_coarse	Fine-to-coarse lookup table (linear indices)
[in]	spin_map	Spin blocking lookup table
[in]	parity	of the input fine field (if single parity input field)

Definition at line 263 of file restrictor.cu.

References checkPrecision, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Here is the call graph for this function:

◆ RestrictKernel()

template<int block_size, typename Float , int fineSpin, int fineColor, int coarseSpin, int coarseColor, int coarse_colors_per_thread, typename Arg >

__global__ void quda::RestrictKernel ( Arg arg )

Here, we ensure that each thread block maps exactly to a geometric block. Each thread block corresponds to one geometric block, with number of threads equal to the number of fine grid points per aggregate, so each thread represents a fine-grid point. The look up table coarse_to_fine is the mapping to each fine grid point.

Definition at line 136 of file restrictor.cuh.

References quda::Arg< real, Ns, Nc, order >::nParity, parity, s, and tmp.

◆ rotateCoarseColor()

template<typename Float , int fineSpin, int fineColor, int coarseColor, int coarse_colors_per_thread, class FineColor , class Rotator >

__device__ __host__ void quda::rotateCoarseColor	(	complex< Float >	out[fineSpin *coarse_colors_per_thread],
		const FineColor &	in,
		const Rotator &	V,
		int	parity,
		int	nParity,
		int	x_cb,
		int	coarse_color_block
	)

inline

Rotates from the fine-color basis into the coarse-color basis.

Definition at line 50 of file restrictor.cuh.

References conj(), in, out, s, and V.

Here is the call graph for this function:

◆ s2d() [1/2]

__host__ __device__ double quda::s2d ( short a )

inline

Definition at line 35 of file convert.h.

Referenced by copyFloatN().

Here is the caller graph for this function:

◆ s2d() [2/2]

__host__ __device__ double quda::s2d	(	short	a,
		double	c
	)

inline

Definition at line 46 of file convert.h.

◆ s2f() [1/2]

__host__ __device__ float quda::s2f ( short a )

inline

Definition at line 34 of file convert.h.

Referenced by copy(), copy_and_scale(), and copyFloatN().

Here is the caller graph for this function:

◆ s2f() [2/2]

__host__ __device__ float quda::s2f	(	short	a,
		float	c
	)

inline

Definition at line 42 of file convert.h.

◆ safe_malloc_()

void * quda::safe_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Perform a standard malloc() with error-checking. This function should only be called via the safe_malloc() macro, defined in malloc_quda.h

Definition at line 226 of file malloc.cpp.

References quda::MemAlloc::base_size, errorQuda, HOST, memset(), quda::MemAlloc::size, and track_malloc().

Here is the call graph for this function:

◆ saveProfile()

void quda::saveProfile ( const std::string label = "" )

Save profile to disk.

Definition at line 514 of file tune.cpp.

References quda::TuneKey::aux_n, comm_rank(), count, getVerbosity(), gitversion, launchTimer, quda::TuneParam::n_calls, param, printfQuda, QUDA_SUMMARIZE, quda_version, serializeProfile(), serializeTrace(), quda::TraceKey::time, tmp, traceEnabled(), and warningQuda.

Referenced by endQuda(), newDeflationQuda(), and quda::TunableVectorYZ::resizeStep().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ saveTuneCache()

void quda::saveTuneCache ( bool error )

Write tunecache to disk.

Definition at line 426 of file tune.cpp.

References comm_rank(), getVerbosity(), gitversion, printfQuda, QUDA_SUMMARIZE, quda_version, serializeTuneCache(), quda::TraceKey::time, and warningQuda.

Referenced by eigensolveQuda(), endQuda(), invertMultiShiftQuda(), invertMultiSrcQuda(), invertQuda(), quda::multigrid_solver::multigrid_solver(), newMultigridQuda(), quda::TunableVectorYZ::resizeStep(), and updateMultigridQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ serializeProfile()

static void quda::serializeProfile	(	std::ostream &	out,
		std::ostream &	async_out
	)

static

Serialize tunecache to an ostream, useful for writing to a file or sending to other nodes.

Definition at line 199 of file tune.cpp.

References quda::TuneKey::aux, quda::TuneKey::aux_n, quda::TuneParam::comment, quda::TraceKey::key, quda::TuneParam::n_calls, quda::TuneKey::name, param, quda::TuneParam::time, quda::TraceKey::time, tmp, and quda::TuneKey::volume.

Referenced by saveProfile().

Here is the caller graph for this function:

◆ serializeTrace()

static void quda::serializeTrace ( std::ostream & out )

static

Serialize trace to an ostream, useful for writing to a file or sending to other nodes.

Definition at line 261 of file tune.cpp.

References quda::TuneKey::aux, quda::TuneKey::aux_n, quda::TraceKey::key, quda::TuneKey::name, tmp, and quda::TuneKey::volume.

Referenced by saveProfile().

Here is the caller graph for this function:

◆ serializeTuneCache()

static void quda::serializeTuneCache ( std::ostream & out )

static

Serialize tunecache to an ostream, useful for writing to a file or sending to other nodes.

Definition at line 172 of file tune.cpp.

References quda::TuneKey::aux, quda::TuneParam::aux, quda::TuneParam::block, quda::TuneParam::comment, quda::TuneParam::grid, quda::TraceKey::key, quda::TuneKey::name, param, quda::TuneParam::shared_bytes, quda::TuneParam::time, and quda::TuneKey::volume.

Referenced by broadcastTuneCache(), and saveTuneCache().

Here is the caller graph for this function:

◆ set() [1/4]

__host__ __device__ double quda::set ( double & x )

inline

Definition at line 58 of file blas_helper.cuh.

Referenced by Spinor< RegType, StoreType, N, write >::set().

Here is the caller graph for this function:

◆ set() [2/4]

__host__ __device__ double2 quda::set ( double2 & x )

inline

Definition at line 59 of file blas_helper.cuh.

◆ set() [3/4]

__host__ __device__ double3 quda::set ( double3 & x )

inline

Definition at line 60 of file blas_helper.cuh.

◆ set() [4/4]

__host__ __device__ double4 quda::set ( double4 & x )

inline

Definition at line 61 of file blas_helper.cuh.

◆ setDiracParam()

void quda::setDiracParam	(	DiracParam &	diracParam,
		QudaInvertParam *	inv_param,
		bool	pc
	)

Definition at line 1562 of file interface_quda.cpp.

Referenced by cloverQuda(), computeCloverForceQuda(), computeStaggeredForceQuda(), createDirac(), quda::deflated_solver::deflated_solver(), dslashQuda(), dslashQuda_4dpc(), dslashQuda_mdwf(), init(), MatDagMatQuda(), MatQuda(), quda::DiracParam::print(), setDiracPreParam(), setDiracRefineParam(), and setDiracSloppyParam().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ setDiracPreParam()

void quda::setDiracPreParam	(	DiracParam &	diracParam,
		QudaInvertParam *	inv_param,
		const bool	pc,
		bool	comms
	)

Definition at line 1698 of file interface_quda.cpp.

References quda::DiracParam::clover, cloverPrecondition, quda::DiracParam::commDim, QudaInvertParam_s::cuda_prec_precondition, QudaInvertParam_s::dslash_type, QudaInvertParam_s::dslash_type_precondition, errorQuda, quda::DiracParam::fatGauge, quda::DiracParam::gauge, gaugeExtended, gaugeFatExtended, gaugeFatPrecondition, gaugeLongExtended, gaugeLongPrecondition, gaugePrecondition, QudaInvertParam_s::inv_type, quda::DiracParam::longGauge, QudaInvertParam_s::overlap, quda::LatticeField::Precision(), QUDA_ASQTAD_DSLASH, QUDA_PCG_INVERTER, QUDA_STAGGERED_DIRAC, QUDA_STAGGERED_DSLASH, QUDA_STAGGEREDPC_DIRAC, setDiracParam(), and quda::DiracParam::type.

Referenced by createDirac(), quda::multigrid_solver::multigrid_solver(), and updateMultigridQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ setDiracRefineParam()

void quda::setDiracRefineParam	(	DiracParam &	diracParam,
		QudaInvertParam *	inv_param,
		const bool	pc
	)

Definition at line 1679 of file interface_quda.cpp.

References quda::DiracParam::clover, cloverRefinement, quda::DiracParam::commDim, QudaInvertParam_s::cuda_prec_refinement_sloppy, QudaInvertParam_s::dslash_type, errorQuda, quda::DiracParam::fatGauge, quda::DiracParam::gauge, gaugeFatRefinement, gaugeLongRefinement, gaugeRefinement, quda::DiracParam::longGauge, quda::LatticeField::Precision(), QUDA_ASQTAD_DSLASH, and setDiracParam().

Referenced by createDirac().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ setDiracSloppyParam()

void quda::setDiracSloppyParam	(	DiracParam &	diracParam,
		QudaInvertParam *	inv_param,
		bool	pc
	)

Definition at line 1661 of file interface_quda.cpp.

References quda::DiracParam::clover, cloverSloppy, quda::DiracParam::commDim, QudaInvertParam_s::cuda_prec_sloppy, QudaInvertParam_s::dslash_type, errorQuda, quda::DiracParam::fatGauge, quda::DiracParam::gauge, gaugeFatSloppy, gaugeLongSloppy, gaugeSloppy, quda::DiracParam::longGauge, quda::LatticeField::Precision(), QUDA_ASQTAD_DSLASH, and setDiracParam().

Referenced by createDirac(), quda::deflated_solver::deflated_solver(), quda::multigrid_solver::multigrid_solver(), quda::DiracParam::print(), and updateMultigridQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ setIdentity() [1/3]

template<class T , int N>

__device__ __host__ void quda::setIdentity ( Matrix< T, N > * m )

inline

Definition at line 653 of file quda_matrix.h.

Referenced by bdSVD(), computeAPEStep(), computeGenGauss(), computeOvrImpSTOUTStep(), computeSTOUTStep(), constructHHMat(), exponentiate_iQ(), getRealBidiagMatrix(), and smallSVD().

Here is the caller graph for this function:

◆ setIdentity() [2/3]

template<int N>

__device__ __host__ void quda::setIdentity ( Matrix< float2, N > * m )

inline

Definition at line 669 of file quda_matrix.h.

◆ setIdentity() [3/3]

template<int N>

__device__ __host__ void quda::setIdentity ( Matrix< double2, N > * m )

inline

Definition at line 685 of file quda_matrix.h.

◆ setKernelPackT()

void quda::setKernelPackT ( bool pack )

Parameters

pack	Sets whether to use a kernel to pack the T dimension

Definition at line 24 of file dslash_quda.cu.

Referenced by quda::dslash::DslashPolicyTune< Dslash >::apply(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), eigensolveQuda(), popKernelPackT(), pushKernelPackT(), and set_kernel_pack_t_().

Here is the caller graph for this function:

◆ setPackComms()

void quda::setPackComms ( const int * dim_pack )

Helper function that sets which dimensions the packing kernel should be packing for.

Parameters

[in] dim_pack Array that specifies which dimenstions need to be packed.

Definition at line 14 of file dslash_pack2.cu.

References commDim, and QUDA_MAX_DIM.

Referenced by quda::Dslash< Float >::Dslash(), and DslashCuda::DslashCuda().

Here is the caller graph for this function:

◆ setPolicyTuning()

void quda::setPolicyTuning ( bool policy_tuning_ )

Enable / disable whether are tuning a policy.

Definition at line 499 of file tune.cpp.

Referenced by quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), quda::TunableVectorYZ::resizeStep(), quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune(), quda::DslashCoarsePolicyTune::~DslashCoarsePolicyTune(), quda::dslash::DslashPolicyTune< Dslash >::~DslashPolicyTune(), and quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::~TileSizeTune().

Here is the caller graph for this function:

◆ setTransferGPU()

void quda::setTransferGPU ( bool )

◆ setUnitarizeLinksConstants()

void quda::setUnitarizeLinksConstants	(	double	unitarize_eps,
		double	max_error,
		bool	allow_svd,
		bool	svd_only,
		double	svd_rel_error,
		double	svd_abs_error
	)

Definition at line 72 of file unitarize_links_quda.cu.

References acos(), arg(), conj(), cos(), epsilon, errorQuda, getDeterminant(), getTrace(), in, inverse(), s, size, and sqrt().

Referenced by computeKSLinkQuda(), hisq_test(), setActionPaths(), GaugeAlgTest::SetReunitarizationConsts(), setReunitarizationConsts(), and unitarize_link_test().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ setZero() [1/3]

template<class T , int N>

__device__ __host__ void quda::setZero ( Matrix< T, N > * m )

inline

Definition at line 702 of file quda_matrix.h.

Referenced by computeStaple(), computeStapleRectangle(), exponentiate_iQ(), and quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg().

Here is the caller graph for this function:

◆ setZero() [2/3]

template<int N>

__device__ __host__ void quda::setZero ( Matrix< float2, N > * m )

inline

Definition at line 717 of file quda_matrix.h.

◆ setZero() [3/3]

template<int N>

__device__ __host__ void quda::setZero ( Matrix< double2, N > * m )

inline

Definition at line 732 of file quda_matrix.h.

◆ shiftColorSpinorField()

void quda::shiftColorSpinorField	(	cudaColorSpinorField &	dst,
		const cudaColorSpinorField &	src,
		const unsigned int	parity,
		const unsigned int	dim,
		const int	shift
	)

◆ shiftColorSpinorFieldExternalKernel()

template<typename FloatN , int N, typename Output , typename Input >

__global__ void quda::shiftColorSpinorFieldExternalKernel ( ShiftQuarkArg< Output, Input > arg )

Definition at line 93 of file shift_quark_field.cu.

◆ shiftColorSpinorFieldKernel()

template<typename FloatN , int N, typename Output , typename Input >

__global__ void quda::shiftColorSpinorFieldKernel ( ShiftQuarkArg< Output, Input > arg )

Definition at line 68 of file shift_quark_field.cu.

References neighborIndex(), and quda::ShiftColorSpinorFieldArg< Output, Input >::shift.

Here is the call graph for this function:

◆ sigmaOprod()

template<typename real , int nvector, int mu, int nu, int parity, typename Arg >

__device__ void quda::sigmaOprod	(	Arg &	arg,
		int	idx
	)

inline

Definition at line 42 of file clover_sigma_outer_product.cuh.

References conj(), quda::ColorSpinor< Float, Nc, Ns >::data, mu, quda::CloverSigmaOprodArg< Float, Output, InputA, InputB >::nvector, outerProdSpinTrace(), and parity.

Here is the call graph for this function:

◆ sigmaOprodKernel()

template<int nvector, typename real , typename Arg >

__global__ void quda::sigmaOprodKernel ( Arg arg )

Definition at line 66 of file clover_sigma_outer_product.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ sin() [1/4]

template<typename ValueType >

__host__ __device__ ValueType quda::sin ( ValueType x )

inline

Definition at line 51 of file complex_quda.h.

References sin().

Referenced by cos(), cosh(), exponentiate_iQ(), genericSource(), genGauss(), link_sanity_check_internal_8(), new_load_half(), polar(), sin(), quda::Trig< isFixed, T >::Sin(), sinh(), su3Reconstruct8(), and tan().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ sin() [2/4]

template<class P >

void quda::sin	(	P &	p,
		int	d,
		int	n,
		int	offset
	)

Insert a sinusoidal wave sin ( n * (x[d] / X[d]) * pi ) in dimension d

Definition at line 56 of file color_spinor_util.cu.

References getCoords(), parity, s, sin(), and X.

Here is the call graph for this function:

◆ sin() [3/4]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::sin ( const complex< ValueType > & z )

inline

Definition at line 1214 of file complex_quda.h.

References cos(), cosh(), sin(), and sinh().

Here is the call graph for this function:

◆ sin() [4/4]

template<>

__host__ __device__ complex<float> quda::sin ( const complex< float > & z )

inline

Definition at line 1222 of file complex_quda.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Referenced by sin().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ sinh() [1/3]

template<typename ValueType >

__host__ __device__ ValueType quda::sinh ( ValueType x )

inline

Definition at line 86 of file complex_quda.h.

References sinh().

Referenced by cos(), cosh(), sin(), and sinh().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ sinh() [2/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::sinh ( const complex< ValueType > & z )

inline

Definition at line 1230 of file complex_quda.h.

References cos(), cosh(), sin(), and sinh().

Here is the call graph for this function:

◆ sinh() [3/3]

template<>

__host__ __device__ complex<float> quda::sinh ( const complex< float > & z )

inline

Definition at line 1238 of file complex_quda.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Referenced by sinh().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ siteChecksum()

template<typename Arg >

__device__ __host__ uint64_t quda::siteChecksum	(	const Arg &	arg,
		int	d,
		int	parity,
		int	x_cb
	)

inline

Definition at line 17 of file checksum.cu.

References quda::Matrix< T, N >::checksum(), and nColor.

Referenced by ChecksumCPU().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ spinorNoise() [1/2]

void quda::spinorNoise	(	ColorSpinorField &	in,
		RNG &	rngstate,
		QudaNoiseType	type
	)

Generate a random noise spinor. This variant allows the user to manage the RNG state.

Parameters

src	The colorspinorfield
randstates	Random state
type	The type of noise to create (QUDA_NOISE_GAUSSIAN or QUDA_NOISE_UNIFORM)

Decide on the input order

Definition at line 122 of file spinor_noise.cu.

References quda::SpinorNoise< real, Ns, Nc, type, Arg >::apply(), arg(), quda::ColorSpinorParam::create, quda::ColorSpinorField::Create(), errorQuda, quda::ColorSpinorField::FieldOrder(), in, quda::ColorSpinorParam::location, quda::LatticeField::Location(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), param, prec, quda::LatticeField::Precision(), QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_FIELD_ORDER, QUDA_FLOAT4_FIELD_ORDER, QUDA_NOISE_GAUSS, QUDA_NOISE_UNIFORM, QUDA_NULL_FIELD_CREATE, QUDA_SINGLE_PRECISION, and quda::ColorSpinorParam::setPrecision().

Referenced by construct_spinor_source(), quda::MG::MG(), quda::TRLM::operator()(), quda::MG::reset(), spinorNoise(), and quda::MG::verify().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ spinorNoise() [2/2]

void quda::spinorNoise	(	ColorSpinorField &	src,
		unsigned long long	seed,
		QudaNoiseType	type
	)

Generate a random noise spinor. This variant just requires a seed and will create and destroy the random number state.

Parameters

src	The colorspinorfield
seed	Seed
type	The type of noise to create (QUDA_NOISE_GAUSSIAN or QUDA_NOISE_UNIFORM)

Definition at line 210 of file spinor_noise.cu.

References quda::RNG::Init(), quda::RNG::Release(), and spinorNoise().

Here is the call graph for this function:

◆ SpinorNoiseCPU()

template<typename real , int Ns, int Nc, QudaNoiseType type, typename Arg >

void quda::SpinorNoiseCPU ( Arg & arg )

CPU function to reorder spinor fields.

Definition at line 47 of file spinor_noise.cu.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, QUDA_NOISE_GAUSS, QUDA_NOISE_UNIFORM, quda::Arg< real, Ns, Nc, order >::rng, s, quda::RNG::State(), and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ SpinorNoiseGPU()

template<typename real , int Ns, int Nc, QudaNoiseType type, typename Arg >

__global__ void quda::SpinorNoiseGPU ( Arg arg )

CUDA kernel to reorder spinor fields. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 68 of file spinor_noise.cu.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, QUDA_NOISE_GAUSS, QUDA_NOISE_UNIFORM, quda::Arg< real, Ns, Nc, order >::rng, s, quda::RNG::State(), and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ sqrt() [1/3]

template<typename ValueType >

__host__ __device__ ValueType quda::sqrt ( ValueType x )

inline

Definition at line 120 of file complex_quda.h.

References sqrt().

Here is the call graph for this function:

◆ sqrt() [2/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::sqrt ( const complex< ValueType > & z )

inline

Definition at line 1246 of file complex_quda.h.

References abs(), arg(), polar(), and sqrt().

Here is the call graph for this function:

◆ sqrt() [3/3]

template<>

__host__ __device__ complex<float> quda::sqrt ( const complex< float > & z )

inline

Definition at line 1252 of file complex_quda.h.

References abs(), arg(), and polar().

Referenced by sqrt().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ staggered()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__device__ __host__ void quda::staggered	(	Arg &	arg,
		int	idx,
		int	parity
	)

inline

Definition at line 164 of file dslash_staggered.cuh.

References arg(), quda::DslashArg< Float >::dagger, EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, quda::DslashArg< Float >::nParity, quda::StaggeredArg< Float, nColor, reconstruct_u_, reconstruct_l_, improved_, phase_ >::out, quda::DslashArg< Float >::parity, quda::StaggeredArg< Float, nColor, reconstruct_u_, reconstruct_l_, improved_, phase_ >::x, and quda::DslashArg< Float >::xpay.

Referenced by quda::ImprovedStaggeredApply< Float, nColor, recon_l >::ImprovedStaggeredApply(), and quda::StaggeredApply< Float, nColor, recon_u >::StaggeredApply().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ staggeredGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__global__ void quda::staggeredGPU ( Arg arg )

Definition at line 197 of file dslash_staggered.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ StaggeredPhase()

template<typename Arg >

__device__ __host__ auto quda::StaggeredPhase	(	const int	coords[],
		int	dim,
		int	dir,
		const Arg &	arg
	)		-> typename Arg::real

inline

Compute the staggered phase factor at unit shift from the current lattice coordinates. The routine below optimizes out the shift where possible, hence is only visible where we need to consider the boundary condition.

Parameters

[in]	coords	Lattice coordinates
[in]	X	Lattice dimensions
[in]	dim	Dimension we are hopping
[in]	dir	Direction of the unit hop (+1 or -1)
[in]	tboundary	Boundary condition

Definition at line 868 of file index_helper.cuh.

References arg(), QUDA_STAGGERED_PHASE_MILC, QUDA_STAGGERED_PHASE_TIFR, and X.

Referenced by applyStaggered().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ store_streaming_double2()

__device__ void quda::store_streaming_double2	(	double2 *	addr,
		double	x,
		double	y
	)

inline

Definition at line 88 of file inline_ptx.h.

References __PTR.

Referenced by vector_store().

Here is the caller graph for this function:

◆ store_streaming_float2()

__device__ void quda::store_streaming_float2	(	float2 *	addr,
		float	x,
		float	y
	)

inline

Definition at line 93 of file inline_ptx.h.

References __PTR.

Referenced by vector_store().

Here is the caller graph for this function:

◆ store_streaming_float4()

__device__ void quda::store_streaming_float4	(	float4 *	addr,
		float	x,
		float	y,
		float	z,
		float	w
	)

inline

Definition at line 78 of file inline_ptx.h.

References __PTR.

Referenced by vector_store().

Here is the caller graph for this function:

◆ store_streaming_short2()

__device__ void quda::store_streaming_short2	(	short2 *	addr,
		short	x,
		short	y
	)

inline

Definition at line 98 of file inline_ptx.h.

References __PTR.

Referenced by vector_store().

Here is the caller graph for this function:

◆ store_streaming_short4()

__device__ void quda::store_streaming_short4	(	short4 *	addr,
		short	x,
		short	y,
		short	z,
		short	w
	)

inline

Definition at line 83 of file inline_ptx.h.

References __PTR.

Referenced by vector_store().

Here is the caller graph for this function:

◆ STOUTStep()

void quda::STOUTStep	(	GaugeField &	dataDs,
		const GaugeField &	dataOr,
		double	rho
	)

Apply STOUT smearing to the gauge field.

Parameters

[out]	dataDs	Output smeared field
[in]	dataOr	Input gauge field
[in]	rho	smearing parameter

Definition at line 129 of file gauge_stout.cu.

References errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().

Referenced by performSTOUTnStep().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ str_end()

constexpr const char* quda::str_end ( const char * str )

inline

Definition at line 47 of file malloc_quda.h.

Referenced by file_name().

Here is the caller graph for this function:

◆ str_slant()

constexpr bool quda::str_slant ( const char * str )

inline

Definition at line 48 of file malloc_quda.h.

Referenced by file_name().

Here is the caller graph for this function:

◆ SubTraceUnit()

template<class T >

__device__ __host__ void quda::SubTraceUnit ( Matrix< T, 3 > & a )

inline

Definition at line 1125 of file quda_matrix.h.

◆ sum() [1/4]

__host__ __device__ void quda::sum	(	double &	a,
		double &	b
	)

inline

Definition at line 62 of file blas_helper.cuh.

Here is the caller graph for this function:

◆ sum() [2/4]

__host__ __device__ void quda::sum	(	double2 &	a,
		double2 &	b
	)

inline

Definition at line 63 of file blas_helper.cuh.

◆ sum() [3/4]

__host__ __device__ void quda::sum	(	double3 &	a,
		double3 &	b
	)

inline

Definition at line 68 of file blas_helper.cuh.

◆ sum() [4/4]

__host__ __device__ void quda::sum	(	double4 &	a,
		double4 &	b
	)

inline

Definition at line 74 of file blas_helper.cuh.

References doubledouble::head(), sum(), doubledouble2::x, doubledouble3::x, doubledouble2::y, doubledouble3::y, and doubledouble3::z.

Here is the call graph for this function:

◆ tan() [1/2]

template<typename ValueType >

__host__ __device__ ValueType quda::tan ( ValueType x )

inline

Definition at line 56 of file complex_quda.h.

References tan().

Here is the call graph for this function:

◆ tan() [2/2]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::tan ( const complex< ValueType > & z )

inline

Definition at line 1258 of file complex_quda.h.

References cos(), and sin().

Referenced by tan().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ tanh() [1/2]

template<typename ValueType >

__host__ __device__ ValueType quda::tanh ( ValueType x )

inline

Definition at line 91 of file complex_quda.h.

References tanh().

Here is the call graph for this function:

◆ tanh() [2/2]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::tanh ( const complex< ValueType > & z )

inline

Definition at line 1264 of file complex_quda.h.

References exp().

Referenced by tanh().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ timeInterval()

double quda::timeInterval	(	struct timeval	start,
		struct timeval	end
	)

Definition at line 18 of file inv_gcr_quda.cpp.

◆ traceEnabled()

int quda::traceEnabled ( )

Definition at line 73 of file tune.cpp.

References enable_trace, and quda::cublas::init().

Referenced by postTrace_(), saveProfile(), and tuneLaunch().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ track_free()

static void quda::track_free	(	const AllocType &	type,
		void *	ptr
	)

static

Definition at line 121 of file malloc.cpp.

References DEVICE, DEVICE_PINNED, MAPPED, PINNED, and quda::MemAlloc::size.

Referenced by device_free_(), device_pinned_free_(), and host_free_().

Here is the caller graph for this function:

◆ track_malloc()

static void quda::track_malloc	(	const AllocType &	type,
		const MemAlloc &	a,
		void *	ptr
	)

static

Definition at line 99 of file malloc.cpp.

References quda::MemAlloc::base_size, DEVICE, DEVICE_PINNED, MAPPED, PINNED, total_host_bytes, and total_pinned_bytes.

Referenced by device_malloc_(), device_pinned_malloc_(), mapped_malloc_(), pinned_malloc_(), and safe_malloc_().

Here is the caller graph for this function:

◆ tuneLaunch()

TuneParam & quda::tuneLaunch	(	Tunable &	tunable,
		QudaTune	enabled,
		QudaVerbosity	verbosity
	)

Return the optimal launch parameters for a given kernel, either by retrieving them from tunecache or autotuning on the spot.

Definition at line 643 of file tune.cpp.

References quda::Tunable::advanceTuneParam(), quda::Tunable::apply(), quda::TuneKey::aux, quda::TuneParam::aux, quda::TuneParam::block, broadcastTuneCache(), quda::Tunable::checkLaunchParam(), comm_rank(), quda::TuneParam::comment, commGlobalReduction(), quda::Tunable::defaultTuneParam(), quda::blas::end(), errorQuda, quda::TuneParam::grid, quda::Tunable::initTuneParam(), quda::Tunable::jitifyError(), quda::TraceKey::key, quda::Timer::Last(), last_key, quda::TuneParam::n_calls, quda::TuneKey::name, param, quda::Tunable::paramString(), quda::Tunable::perfString(), policyTuning(), quda::Tunable::postTune(), quda::Tunable::preTune(), printfQuda, QUDA_DEBUG_VERBOSE, QUDA_PROFILE_COMPUTE, QUDA_PROFILE_EPILOGUE, QUDA_PROFILE_INIT, QUDA_PROFILE_PREAMBLE, QUDA_PROFILE_TOTAL, QUDA_TUNE_NO, QUDA_TUNE_YES, QUDA_VERBOSE, quda::TuneParam::shared_bytes, quda::Timer::Start(), quda::Timer::Stop(), quda::TuneParam::time, quda::TraceKey::time, traceEnabled(), quda::Tunable::tuneKey(), quda::Tunable::tuningIter(), and quda::TuneKey::volume.

Here is the call graph for this function:

◆ twistCloverApply()

template<bool inverse, typename Float , int nSpin, int nColor, typename Arg >

__device__ __host__ void quda::twistCloverApply	(	Arg &	arg,
		int	x_cb,
		int	parity
	)

inline

Definition at line 665 of file dslash_quda.cu.

References Mat(), nColor, quda::Arg< real, Ns, Nc, order >::nParity, out, and parity.

Here is the call graph for this function:

◆ twistCloverCPU()

template<bool inverse, typename Float , int nSpin, int nColor, typename Arg >

void quda::twistCloverCPU ( Arg & arg )

Definition at line 709 of file dslash_quda.cu.

References quda::Arg< real, Ns, Nc, order >::nParity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

◆ twistCloverGPU()

template<bool inverse, typename Float , int nSpin, int nColor, typename Arg >

__global__ void quda::twistCloverGPU ( Arg arg )

Definition at line 717 of file dslash_quda.cu.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ twistedClover()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__device__ __host__ void quda::twistedClover	(	Arg &	arg,
		int	idx,
		int	parity
	)

inline

Apply the preconditioned twisted-clover dslash.

no xpay: out(x) = M*in = A(x)^{-1}D * in(x-mu)
with xpay: out(x) = M*in = (1 + a*A(x)^{-1}D) * in(x-mu)

Definition at line 40 of file dslash_twisted_clover_preconditioned.cuh.

References quda::TwistedCloverArg< Float, nColor, reconstruct_, dynamic_clover_ >::A, quda::TwistedCloverArg< Float, nColor, reconstruct_, dynamic_clover_ >::A2inv, arg(), quda::TwistedCloverArg< Float, nColor, reconstruct_, dynamic_clover_ >::b, EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, Mat(), nColor, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, tmp, quda::WilsonArg< Float, nColor, reconstruct_ >::x, and quda::DslashArg< Float >::xpay.

Here is the call graph for this function:

◆ twistedCloverPreconditionedCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

void quda::twistedCloverPreconditionedCPU ( Arg arg )

Definition at line 109 of file dslash_twisted_clover_preconditioned.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ twistedCloverPreconditionedGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__global__ void quda::twistedCloverPreconditionedGPU ( Arg arg )

Definition at line 124 of file dslash_twisted_clover_preconditioned.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Referenced by quda::TwistedCloverPreconditionedLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ twistedMass() [1/2]

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >

__device__ __host__ void quda::twistedMass	(	Arg &	arg,
		int	idx,
		int	parity
	)

inline

Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5)*x Note this routine only exists in xpay form.

Definition at line 29 of file dslash_twisted_mass.cuh.

References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, and quda::WilsonArg< Float, nColor, reconstruct_ >::x.

Here is the call graph for this function:

◆ twistedMass() [2/2]

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool asymmetric, bool xpay, KernelType kernel_type, typename Arg >

__device__ __host__ void quda::twistedMass	(	Arg &	arg,
		int	idx,
		int	parity
	)

inline

Apply the preconditioned twisted-mass dslash.

no xpay: out(x) = M*in = a*(1+i*b*gamma_5)D * in
with xpay: out(x) = M*in = x + a*(1+i*b*gamma_5)D * in

Definition at line 146 of file dslash_twisted_mass_preconditioned.cuh.

References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, and quda::WilsonArg< Float, nColor, reconstruct_ >::x.

Here is the call graph for this function:

◆ twistedMassCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >

void quda::twistedMassCPU ( Arg arg )

Definition at line 62 of file dslash_twisted_mass.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ twistedMassGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__global__ void quda::twistedMassGPU ( Arg arg )

Definition at line 76 of file dslash_twisted_mass.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ twistedMassPreconditionedCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

void quda::twistedMassPreconditionedCPU ( Arg arg )

Definition at line 191 of file dslash_twisted_mass_preconditioned.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ twistedMassPreconditionedGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__global__ void quda::twistedMassPreconditionedGPU ( Arg arg )

Definition at line 217 of file dslash_twisted_mass_preconditioned.cuh.

References arg(), and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ twistGammaCPU()

template<bool doublet, typename Float , int nColor, typename Arg >

void quda::twistGammaCPU ( Arg arg )

Definition at line 332 of file dslash_quda.cu.

References quda::Arg< real, Ns, Nc, order >::nParity, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

◆ twistGammaGPU()

template<bool doublet, typename Float , int nColor, int d, typename Arg >

__global__ void quda::twistGammaGPU ( Arg arg )

Definition at line 353 of file dslash_quda.cu.

References parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

◆ u32toa()

void quda::u32toa	(	char *	buffer,
		uint32_t	value
	)

inline

Definition at line 45 of file uint_to_char.h.

Referenced by i32toa(), and quda::Laplace< Float, nDim, nColor, Arg >::tuneKey().

Here is the caller graph for this function:

◆ u64toa()

void quda::u64toa	(	char *	buffer,
		uint64_t	value
	)

inline

Definition at line 127 of file uint_to_char.h.

Referenced by i64toa(), quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune(), and quda::QudaMemCopy::tuneKey().

Here is the caller graph for this function:

◆ unitarizeLinks() [1/2]

void quda::unitarizeLinks	(	cudaGaugeField &	outfield,
		const cudaGaugeField &	infield,
		int *	fails
	)

Definition at line 500 of file unitarize_links_quda.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by GaugeAlgTest::CallUnitarizeLinks(), CallUnitarizeLinks(), computeKSLinkQuda(), isUnitary(), unitarize_link_test(), and unitarizeLinks().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ unitarizeLinks() [2/2]

void quda::unitarizeLinks	(	cudaGaugeField &	outfield,
		int *	fails
	)

Definition at line 517 of file unitarize_links_quda.cu.

References unitarizeLinks().

Here is the call graph for this function:

◆ unitarizeLinksCPU()

void quda::unitarizeLinksCPU	(	cpuGaugeField &	outfield,
		const cpuGaugeField &	infield
	)

Definition at line 299 of file unitarize_links_quda.cu.

References copyArrayToLink(), copyLinkToArray(), errorQuda, quda::cpuGaugeField::Gauge_p(), num_failures, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and quda::LatticeField::Volume().

Referenced by computeHISQLinksCPU(), and TEST().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ updateAlphaZeta()

void quda::updateAlphaZeta	(	double *	alpha,
		double *	zeta,
		double *	zeta_old,
		const double *	r2,
		const double *	beta,
		const double	pAp,
		const double *	offset,
		const int	nShift,
		const int	j_low
	)

Compute the new values of alpha and zeta

Definition at line 128 of file inv_multi_cg_quda.cpp.

References QUDA_MAX_MULTI_SHIFT.

Referenced by quda::MultiShiftCG::operator()().

Here is the caller graph for this function:

◆ updateAp()

void quda::updateAp	(	Complex **	beta,
		std::vector< ColorSpinorField *>	Ap,
		int	begin,
		int	size,
		int	k
	)

Definition at line 82 of file inv_gcr_quda.cpp.

References quda::blas::caxpy(), and size.

Referenced by orthoDir().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ updateGaugeField()

void quda::updateGaugeField	(	GaugeField &	out,
		double	dt,
		const GaugeField &	in,
		const GaugeField &	mom,
		bool	conj_mom,
		bool	exact
	)

Evolve the gauge field by step size dt using the momentuim field

Parameters

out	Updated gauge field
dt	Step size
in	Input gauge field
mom	Momentum field
conj_mom	Whether we conjugate the momentum in the exponential
exact	Calculate exact exponential or use an expansion

Definition at line 227 of file gauge_update_quda.cu.

References errorQuda, in, quda::LatticeField::Location(), out, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by updateGaugeFieldQuda().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ updateMomentum()

void quda::updateMomentum	(	GaugeField &	mom,
		double	coeff,
		GaugeField &	force,
		const char *	fname
	)

Update the momentum field from the force field

mom = mom - coeff * [force]_TA

where [A]_TA means the traceless anti-hermitian projection of A

Parameters

mom	Momentum field
coeff	Integration stepsize
force	Force field
func	The function calling this (fname will be printed if force monitoring is enabled)

Definition at line 328 of file momentum.cu.

References applyU(), arg(), quda::TuneParam::block, quda::blas::bytes, checkCudaError, errorQuda, quda::blas::flops, getTuning(), getVerbosity(), quda::TuneParam::grid, quda::LatticeField::Location(), quda::GaugeField::Order(), parity, quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_GAUGE_ORDER, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, qudaDeviceSynchronize, quda::GaugeField::Reconstruct(), quda::TuneParam::shared_bytes, stream, tuneLaunch(), quda::LatticeField::VolString(), quda::LatticeField::VolumeCB(), X, and quda::LatticeField::X().

Referenced by computeCloverForceQuda(), computeGaugeForceQuda(), computeHISQForceQuda(), computeMomAction(), computeStaggeredForceQuda(), and hisq_force_test().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ updateSolution()

void quda::updateSolution	(	ColorSpinorField &	x,
		const Complex *	alpha,
		Complex **const	beta,
		double *	gamma,
		int	k,
		std::vector< ColorSpinorField *>	p
	)

Definition at line 145 of file inv_gcr_quda.cpp.

References backSubs(), quda::blas::caxpy(), and X.

Referenced by quda::GCR::operator()().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ variableInv()

template<typename real , int nColor, bool dagger, Dslash5Type type, bool shared, typename Vector , typename Arg >

__device__ __host__ Vector quda::variableInv	(	Arg &	arg,
		int	parity,
		int	x_cb,
		int	s_
	)

inline

Apply the M5 inverse operator at a given site on the lattice. This is an alternative algorithm that is applicable to variable b and c coefficients: here each thread in the s dimension starts computing at s = s_, and computes the left- and right-handed contributions in two separate passes. For the left-handed contribution we sweep through increasing s, e.g., s=s_, s_+1, s_+2, and for the right-handed one we do the transpose, s=s_, s_-1, s_-2. This allows us to progressively build up the scalar coefficients needed in a SIMD-friendly fashion.

Template Parameters

shared Whether to use a shared memory scratch pad to store the input field acroos the Ls dimension to minimize global memory reads.

Parameters

[in]	arg	Argument struct containing any meta data and accessors
[in]	parity	Parity we are on
[in]	x_b	Checkerboarded 4-d space-time index
[in]	s_	Ls dimension coordinate

template<typename VectorType >

__device__ __host__ VectorType quda::vector_load	(	void *	ptr,
		int	idx
	)

inline

Definition at line 412 of file register_traits.h.

◆ vector_store() [1/8]

template<typename VectorType >

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const VectorType &	value
	)

inline

Definition at line 422 of file register_traits.h.

Referenced by quda::clover::FloatNOrder< Float, length, N, add_rho, huge_alloc >::save(), quda::colorspinor::FloatNOrder< Float, Ns, Nc, N, spin_project, huge_alloc >::save(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::save(), quda::colorspinor::FloatNOrder< Float, Ns, Nc, N, spin_project, huge_alloc >::saveGhost(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::saveGhost(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::saveGhostEx(), and vector_store().

Here is the caller graph for this function:

◆ vector_store() [2/8]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const double2 &	value
	)

inline

Definition at line 427 of file register_traits.h.

References store_streaming_double2().

Here is the call graph for this function:

◆ vector_store() [3/8]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const float4 &	value
	)

inline

Definition at line 436 of file register_traits.h.

References store_streaming_float4().

Here is the call graph for this function:

◆ vector_store() [4/8]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const float2 &	value
	)

inline

Definition at line 445 of file register_traits.h.

References store_streaming_float2().

Here is the call graph for this function:

◆ vector_store() [5/8]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const short4 &	value
	)

inline

Definition at line 454 of file register_traits.h.

References store_streaming_short4().

Here is the call graph for this function:

◆ vector_store() [6/8]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const short2 &	value
	)

inline

Definition at line 463 of file register_traits.h.

References store_streaming_short2().

Here is the call graph for this function:

◆ vector_store() [7/8]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const char4 &	value
	)

inline

Definition at line 473 of file register_traits.h.

References store_streaming_short2().

Here is the call graph for this function:

◆ vector_store() [8/8]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const char2 &	value
	)

inline

Definition at line 484 of file register_traits.h.

References vector_store().

Here is the call graph for this function:

◆ virtualBlockDim()

template<typename Arg >

__device__ __host__ int quda::virtualBlockDim ( const Arg & arg )

inline

Definition at line 613 of file coarse_op_kernel.cuh.

Referenced by getIndicesShared().

Here is the caller graph for this function:

◆ virtualThreadIdx()

template<typename Arg >

__device__ __host__ int quda::virtualThreadIdx ( const Arg & arg )

inline

Definition at line 604 of file coarse_op_kernel.cuh.

Referenced by computeVUV(), and getIndicesShared().

Here is the caller graph for this function:

◆ wilson()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__device__ __host__ void quda::wilson	(	Arg &	arg,
		int	idx,
		int	s,
		int	parity
	)

inline

Definition at line 135 of file dslash_wilson.cuh.

References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, s, quda::WilsonArg< Float, nColor, reconstruct_ >::x, and quda::DslashArg< Float >::xpay.

Referenced by quda::WilsonApply< Float, nColor, recon >::WilsonApply(), quda::WilsonCloverApply< Float, nColor, recon >::WilsonCloverApply(), and quda::WilsonCloverPreconditionedApply< Float, nColor, recon >::WilsonCloverPreconditionedApply().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ wilsonClover() [1/2]

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__device__ __host__ void quda::wilsonClover	(	Arg &	arg,
		int	idx,
		int	parity
	)

inline

Apply the clover preconditioned Wilson dslash.

no xpay: out(x) = M*in = A(x)^{-1}D * in(x-mu)
with xpay: out(x) = M*in = (1 - kappa*A(x)^{-1}D) * in(x-mu)

Definition at line 37 of file dslash_wilson_clover_preconditioned.cuh.

References quda::WilsonCloverArg< Float, nColor, reconstruct_, twist_ >::A, arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, nColor, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, tmp, quda::WilsonArg< Float, nColor, reconstruct_ >::x, and quda::DslashArg< Float >::xpay.

Here is the call graph for this function:

◆ wilsonClover() [2/2]

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >

__device__ __host__ void quda::wilsonClover	(	Arg &	arg,
		int	idx,
		int	parity
	)

inline

Apply the Wilson-clover dslash out(x) = M*in = A(x)*x(x) + D * in(x-mu) Note this routine only exists in xpay form.

Definition at line 38 of file dslash_wilson_clover.cuh.

References quda::WilsonCloverArg< Float, nColor, reconstruct_, twist_ >::A, arg(), quda::WilsonCloverArg< Float, nColor, reconstruct_, twist_ >::b, EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, nColor, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, tmp, and quda::WilsonArg< Float, nColor, reconstruct_ >::x.

Here is the call graph for this function:

◆ wilsonCloverCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

void quda::wilsonCloverCPU ( Arg arg )

Definition at line 89 of file dslash_wilson_clover.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ wilsonCloverGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__global__ void quda::wilsonCloverGPU ( Arg arg )

Definition at line 103 of file dslash_wilson_clover.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ wilsonCloverPreconditionedCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

void quda::wilsonCloverPreconditionedCPU ( Arg arg )

Definition at line 100 of file dslash_wilson_clover_preconditioned.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ wilsonCloverPreconditionedGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__global__ void quda::wilsonCloverPreconditionedGPU ( Arg arg )

Definition at line 115 of file dslash_wilson_clover_preconditioned.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Referenced by quda::WilsonCloverPreconditionedLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ wilsonCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

void quda::wilsonCPU ( Arg arg )

Definition at line 165 of file dslash_wilson.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ wilsonGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >

__global__ void quda::wilsonGPU ( Arg arg )

Definition at line 180 of file dslash_wilson.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ writeLinkVariableToArray() [1/2]

template<class T , class U >

__device__ void quda::writeLinkVariableToArray	(	const Matrix< T, 3 > &	link,
		const int	dir,
		const int	idx,
		const int	stride,
		U *const	array
	)

inline

Definition at line 926 of file quda_matrix.h.

References quda::Matrix< T, N >::data.

◆ writeLinkVariableToArray() [2/2]

__device__ void quda::writeLinkVariableToArray	(	const Matrix< complex< double >, 3 > &	link,
		const int	dir,
		const int	idx,
		const int	stride,
		float2 *const	array
	)

inline

Definition at line 939 of file quda_matrix.h.

◆ writeMatrixToArray()

template<class T , int N, class U >

__device__ void quda::writeMatrixToArray	(	const Matrix< T, N > &	mat,
		const int	idx,
		const int	stride,
		U *const	array
	)

inline

Definition at line 895 of file quda_matrix.h.

References quda::Matrix< T, N >::data.

◆ writeMomentumToArray()

template<class T , class U >

__device__ void quda::writeMomentumToArray	(	const Matrix< T, 3 > &	mom,
		const int	dir,
		const int	idx,
		const U	coeff,
		const int	stride,
		T *const	array
	)

inline

Definition at line 991 of file quda_matrix.h.

References quda::Matrix< T, N >::data.

◆ wuppertalStep() [1/2]

void quda::wuppertalStep	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		int	parity,
		const GaugeField &	U,
		double	A,
		double	B
	)

Apply a generic Wuppertal smearing step Computes out(x) = A*in(x) + B* (U_{-}(x)in(x+mu) + U^(x-mu)in(x-mu))

Parameters

[out]	out	The out result field
[in]	in	The in spinor field
[in]	U	The gauge field
[in]	A	The scaling factor for in(x)
[in]	B	The scaling factor for (U_{-}(x)in(x+mu) + U^(x-mu)in(x-mu))

Definition at line 186 of file color_spinor_wuppertal.cu.

Referenced by performWuppertalnStep(), and wuppertalStep().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ wuppertalStep() [2/2]

void quda::wuppertalStep	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		int	parity,
		const GaugeField &	U,
		double	alpha
	)

Apply a standard Wuppertal smearing step Computes out(x) = 1/(1+6*alpha)*(in(x) + alpha* (U_{-}(x)in(x+mu) + U^(x-mu)in(x-mu)))

Parameters

[out]	out	The out result field
[in]	in	The in spinor field
[in]	U	The gauge field
[in]	alpha	The smearing parameter

Definition at line 291 of file color_spinor_wuppertal.cu.

References wuppertalStep().

Here is the call graph for this function:

◆ wuppertalStepCPU()

template<typename Float , int Ns, int Nc, typename Arg >

void quda::wuppertalStepCPU ( Arg arg )

Definition at line 117 of file color_spinor_wuppertal.cu.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ wuppertalStepGPU()

template<typename Float , int Ns, int Nc, typename Arg >

__device__ __host__ void quda::zero ( float4 & a )

inline

Definition at line 22 of file float_vector.h.

◆ zero() [9/10]

template<typename T >

static void quda::zero	(	T	d[],
		int	N
	)

static

Definition at line 52 of file inv_mpcg_quda.cpp.

◆ zero() [10/10]

template<typename scalar , int n>

__device__ __host__ void quda::zero ( vector_type< scalar, n > & v )

inline

Definition at line 54 of file cub_helper.cuh.

References quda::vector_type< scalar, n >::data.

Referenced by quda::ShiftUpdate::apply(), applyThirdTerm(), quda::MG::buildFreeVectors(), quda::MPBiCGstab::computeMatrixPowers(), quda::GMResDR::FlexArnoldiProcedure(), quda::cpuGaugeField::Gauge_p(), quda::MG::generateNullVectors(), quda::cudaColorSpinorField::Ghost2(), quda::HMatrix< T, N >::HMatrix(), quda::Matrix< T, N >::Matrix(), quda::cudaGaugeField::Odd_p(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_PACKED_CLOVER_ORDER >::operator()(), quda::MG::operator()(), quda::MPCG::operator()(), quda::PreconCG::operator()(), quda::MPBiCGstab::operator()(), quda::SD::operator()(), quda::GMResDR::operator()(), reduce2d(), quda::ReduceArg< vector_type< ReduceType, NXZ > >::ReduceArg(), quda::MG::reset(), quda::GaugeField::SiteSize(), quda::vector_type< ReduceType, NXZ >::vector_type(), quda::Deflation::verify(), and quda::MG::verify().

Here is the caller graph for this function:

Variable Documentation

◆ alloc

std::map<void *, MemAlloc> quda::alloc[N_ALLOC_TYPE]

static

Definition at line 53 of file malloc.cpp.

Referenced by quda::colorspinor::FieldOrderCB< Float, coarseSpin, coarseColor, 1, csOrder, Float, ghostFloat >::abs_max(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply(), quda::colorspinor::FieldOrderCB< Float, coarseSpin, coarseColor, 1, csOrder, Float, ghostFloat >::norm2(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT2_CLOVER_ORDER >::transform_reduce(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::transform_reduce(), quda::gauge::Accessor< Float, nColor, QUDA_QDP_GAUGE_ORDER, storeFloat, use_tex >::transform_reduce(), quda::gauge::Accessor< Float, nColor, QUDA_MILC_GAUGE_ORDER, storeFloat, use_tex >::transform_reduce(), and quda::gauge::Accessor< Float, nColor, QUDA_FLOAT2_GAUGE_ORDER, storeFloat, use_tex >::transform_reduce().

◆ apiTimer

TimeProfile quda::apiTimer("CUDA API calls (driver)")

static

◆ B_array_d

__constant__ signed char quda::B_array_d[MAX_MATRIX_SIZE]

static

Definition at line 16 of file block_orthogonalize.cuh.

Referenced by __launch_bounds__().

◆ B_array_h

signed char quda::B_array_h[MAX_MATRIX_SIZE]

static

Definition at line 19 of file block_orthogonalize.cuh.

◆ bidirectional_debug

bool quda::bidirectional_debug = false

static

Definition at line 11 of file coarse_op.cuh.

◆ commDim

int quda::commDim[QUDA_MAX_DIM]

static

Definition at line 9 of file dslash_pack.cuh.

Referenced by quda::dslash::DslashCommsPattern::DslashCommsPattern(), exchange_cpu_sitelink_ex(), exchangeExtendedGhost(), extractGhost(), extractGhostEx(), quda::Pack< Float, nColor, spin_project >::fillAux(), quda::MG::generateNullVectors(), quda::Pack< Float, nColor, spin_project >::gridStep(), last_node_in_t(), quda::Pack< Float, nColor, spin_project >::maxGridSize(), quda::Pack< Float, nColor, spin_project >::minGridSize(), quda::Pack< Float, nColor, spin_project >::Pack(), PackGhost(), and setPackComms().

◆ complete_recv_back

bool quda::complete_recv_back[QUDA_MAX_DIM] = { }

static

Definition at line 1123 of file cuda_color_spinor_field.cpp.

◆ complete_recv_fwd

bool quda::complete_recv_fwd[QUDA_MAX_DIM] = { }

static

Definition at line 1122 of file cuda_color_spinor_field.cpp.

◆ complete_send_back

bool quda::complete_send_back[QUDA_MAX_DIM] = { }

static

Definition at line 1125 of file cuda_color_spinor_field.cpp.

◆ complete_send_fwd

bool quda::complete_send_fwd[QUDA_MAX_DIM] = { }

static

Definition at line 1124 of file cuda_color_spinor_field.cpp.

◆ count

__device__ unsigned int quda::count[QUDA_MAX_MULTI_REDUCE] = { }

◆ debug

bool quda::debug = false

static

Definition at line 12 of file multigrid.cpp.

long long quda::force_count = 0

static

Definition at line 26 of file momentum.cu.

◆ force_flush

long long quda::force_flush = 1000

static

Definition at line 27 of file momentum.cu.

◆ force_stream

std::stringstream quda::force_stream

static

Definition at line 25 of file momentum.cu.

◆ gDigitsLut

const char quda::gDigitsLut[200]

static

Initial value:

= {
    '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9',
    '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9',
    '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9',
    '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9',
    '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9',
    '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9',
    '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9',
    '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9',
    '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9',
    '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9'
  }

Definition at line 32 of file uint_to_char.h.

◆ initial_cache_size

size_t quda::initial_cache_size = 0

static

Definition at line 110 of file tune.cpp.

◆ isLastBlockDone

__shared__ bool quda::isLastBlockDone

Definition at line 91 of file cub_helper.cuh.

◆ isLastWarpDone

__shared__ volatile bool quda::isLastWarpDone[16]

Definition at line 140 of file cub_helper.cuh.

◆ it

map::iterator quda::it

long quda::max_total_bytes[N_ALLOC_TYPE] = {0}

static

Definition at line 55 of file malloc.cpp.

◆ max_total_host_bytes

long quda::max_total_host_bytes

static

Definition at line 56 of file malloc.cpp.

◆ max_total_pinned_bytes

long quda::max_total_pinned_bytes

static

Definition at line 57 of file malloc.cpp.

◆ mobius_d [1/2]

__constant__ char quda::mobius_d[size]

static

Definition at line 9 of file dslash_domain_wall_4d.cuh.

Referenced by quda::DomainWall4DArg< Float, nColor, reconstruct_ >::a5(), quda::DomainWall4D< Float, nDim, nColor, Arg >::apply(), and quda::coeff_type< real, true, Arg >::coeff().

◆ mobius_d [2/2]

__constant__ char quda::mobius_d[size]

static

Definition at line 19 of file dslash_domain_wall_m5.cuh.

◆ Nstream

const int quda::Nstream = 9

Definition at line 83 of file quda_internal.h.

◆ pinned_allocator

auto quda::pinned_allocator = [] (size_t bytes ) { return static_cast<Complex*>(pool_pinned_malloc(bytes)); }

static

Definition at line 20 of file deflation.cpp.

Referenced by quda::Deflation::reduce(), and quda::Deflation::verify().

◆ pinned_deleter

auto quda::pinned_deleter = [] (Complex *hptr) { pool_pinned_free(hptr); }

static

Definition at line 21 of file deflation.cpp.

Referenced by quda::Deflation::reduce(), and quda::Deflation::verify().

◆ policy_string

char quda::policy_string[TuneKey::aux_n]

static

Definition at line 600 of file dslash_coarse.cu.

◆ policy_tuning

bool quda::policy_tuning = false

static

Definition at line 494 of file tune.cpp.

Referenced by policyTuning().

◆ profile_count

bool quda::profile_count = true

static

Definition at line 123 of file tune.cpp.

__shared__ float quda::s[]

Applies the coarse dslash on a given parity and checkerboard site index

Parameters

out	The result - kappa * Dslash in
Y	The coarse gauge field
kappa	Kappa value
in	The input field
parity	The site parity
x_cb	The checkerboarded site index

◆ size

constexpr int quda::size = 4096

Definition at line 8 of file dslash_domain_wall_4d.cuh.

Referenced by quda::cublas::BatchInvertMatrix(), broadcastTuneCache(), quda::EigenSolver::computeEvals(), quda::BiCGstabL::computeTau(), cpu_axy(), cpu_xpy(), quda::Dslash5Arg< Float, nColor >::Dslash5Arg(), quda::CopyGauge< FloatOut, FloatIn, length, Arg >::minThreads(), quda::ExtractGhost< nDim, Arg >::minThreads(), quda::ExtractGhostEx< Float, length, nDim, dim, Order >::minThreads(), quda::Object::operator new(), quda::Object::operator new[](), qChargeDensityQuda(), setUnitarizeLinksConstants(), quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune(), updateAp(), and quda::BiCGstabL::updateR().

◆ stream

cudaStream_t* quda::stream

Definition at line 897 of file cuda_color_spinor_field.cpp.

std::list<TraceKey> quda::trace_list

static

Definition at line 70 of file tune.cpp.

◆ tunecache

map quda::tunecache

static

Definition at line 108 of file tune.cpp.

Referenced by getTuneCache().

◆ tuning

bool quda::tuning = false

static

tuning in progress?

Definition at line 119 of file tune.cpp.

Referenced by activeTuning().

◆ unscaled_shifts

double quda::unscaled_shifts[QUDA_MAX_MULTI_SHIFT]

static

Definition at line 1767 of file interface_quda.cpp.

Referenced by invertMultiShiftQuda().

Namespaces

Classes

Typedefs

Enumerations

Functions

Variables

Detailed Description

Typedef Documentation

◆ ColorSpinorFieldSet

◆ Complex

◆ CompositeColorSpinorField

◆ cuRNGState

◆ DenseMatrix

◆ DynamicStride

◆ map

◆ RealVector

◆ RowMajorDenseMatrix

◆ storeType

◆ Vector

◆ VectorSet

Enumeration Type Documentation

◆ AllocType

◆ BiCGstabLUpdateType

◆ ComputeType

◆ Dslash5Type

◆ DslashCoarsePolicy

◆ DslashType

◆ KernelType

◆ libtype [1/2]

◆ libtype [2/2]

◆ MemoryLocation

◆ norm_type_ [1/2]

◆ norm_type_ [2/2]

◆ QudaProfileType

Function Documentation

◆ __fast_pow()

◆ __launch_bounds__()

◆ _norm()

◆ abs() [1/4]

◆ abs() [2/4]

◆ abs() [3/4]

◆ abs() [4/4]

◆ acos() [1/2]

◆ acos() [2/2]

◆ acosh()

◆ activeTuning()

◆ AddCoarseDiagonalCPU()

◆ AddCoarseDiagonalGPU()

◆ AddCoarseTmDiagonalCPU()

◆ AddCoarseTmDiagonalGPU()

◆ aligned_malloc()

◆ APEStep()

◆ appendMatrixToArray() [1/2]

◆ appendMatrixToArray() [2/2]

◆ applyB()

◆ applyClover()

◆ ApplyClover()

◆ ApplyCoarse()

◆ applyCovDev()

◆ ApplyCovDev()

◆ ApplyDomainWall4D()

◆ ApplyDomainWall5D()

◆ applyDslash()

◆ ApplyDslash5()

◆ ApplyGamma() [1/2]

◆ ApplyGamma() [2/2]

◆ applyGaugePhase()

◆ ApplyImprovedStaggered()

◆ applyLaplace()

◆ ApplyLaplace()

◆ ApplyNdegTwistedMass()

◆ ApplyNdegTwistedMassPreconditioned()

◆ applyStaggered()

◆ ApplyStaggered()

◆ applyT()

◆ applyThirdTerm()

◆ ApplyTwistClover()

◆ ApplyTwistedClover()

◆ ApplyTwistedCloverPreconditioned()

◆ ApplyTwistedMass()