Namespaces
	blas

	blas_lapack

	clover

	colorspinor

	device

	dslash

	fermion_force

	gauge

	mma

	mobius_eofa

	mobius_tensor_core

	pool

	reducer

Classes
struct	CloverFieldParam

class	CloverField

class	cudaCloverField

class	cpuCloverField

struct	FullClover

struct	clover_wrapper
	clover_wrapper is an internal class that is used to wrap instances of colorspinor accessors, currying in a specifc location and chirality on the field. The operator() accessors in clover-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the HMatrix class. As a result we can include clover-field accessors directly in HMatrix expressions in kernels without having to declare temporaries with explicit calls to the load/save methods in the clover-field accessors. More...

struct	clover_mapper

struct	clover_mapper< double, N, add_rho >

struct	clover_mapper< float, N, add_rho >

struct	clover_mapper< short, N, add_rho >

struct	clover_mapper< int8_t, N, add_rho >

struct	colorspinor_wrapper
	colorspinor_wrapper is an internal class that is used to wrap instances of colorspinor accessors, currying in a specifc location on the field. The operator() accessors in colorspinor-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the ColorSpinor class. As a result we can include colorspinor-field accessors directly in ColorSpinor expressions in kernels without having to declare temporaries with explicit calls to the load/save methods in the colorspinor-field accessors. More...

struct	colorspinor_ghost_wrapper
	colorspinor_ghost_wrapper is an internal class that is used to wrap instances of colorspinor accessors, currying in a specifc location on the field. The Ghost() accessors in colorspinor-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the ColorSpinor class. As a result we can include colorspinor-field accessors directly in ColorSpinor expressions in kernels without having to declare temporaries with explicit calls to the loadGhost/saveGhost methods in the colorspinor-field accessors. More...

struct	ColorSpinor

struct	ColorSpinor< Float, Nc, 4 >

struct	ColorSpinor< Float, Nc, 2 >

struct	CompositeColorSpinorFieldDescriptor

class	ColorSpinorParam

struct	DslashConstant
	Constants used by dslash and packing kernels. More...

class	ColorSpinorField

class	cudaColorSpinorField

class	cpuColorSpinorField

struct	colorspinor_mapper

struct	colorspinor_mapper< double, 4, Nc, false, huge_alloc >

struct	colorspinor_mapper< double, 4, Nc, true, huge_alloc >

struct	colorspinor_mapper< double, 2, Nc, false, huge_alloc >

struct	colorspinor_mapper< double, 1, Nc, false, huge_alloc >

struct	colorspinor_mapper< float, 4, Nc, false, huge_alloc >

struct	colorspinor_mapper< float, 4, Nc, true, huge_alloc >

struct	colorspinor_mapper< float, 2, Nc, false, huge_alloc >

struct	colorspinor_mapper< float, 1, Nc, false, huge_alloc >

struct	colorspinor_mapper< short, 4, Nc, false, huge_alloc >

struct	colorspinor_mapper< short, 4, Nc, true, huge_alloc >

struct	colorspinor_mapper< short, 2, Nc, false, huge_alloc >

struct	colorspinor_mapper< short, 1, Nc, false, huge_alloc >

struct	colorspinor_mapper< int8_t, 4, Nc, false, huge_alloc >

struct	colorspinor_mapper< int8_t, 4, Nc, true, huge_alloc >

struct	colorspinor_mapper< int8_t, 2, Nc, false, huge_alloc >

struct	colorspinor_mapper< int8_t, 1, Nc, false, huge_alloc >

struct	colorspinor_order_mapper

struct	colorspinor_order_mapper< T, QUDA_SPACE_COLOR_SPIN_FIELD_ORDER, Ns, Nc >

struct	colorspinor_order_mapper< T, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER, Ns, Nc >

struct	colorspinor_order_mapper< T, QUDA_FLOAT2_FIELD_ORDER, Ns, Nc >

struct	CommKey

struct	complex

struct	norm_type

struct	norm_type< complex< T > >

struct	complex< float >

struct	complex< double >

struct	complex< int8_t >

struct	complex< short >

struct	complex< int >

struct	DeflationParam

class	Deflation

struct	deflated_solver

class	DiracParam

class	Dirac

class	DiracWilson

class	DiracWilsonPC

class	DiracClover

class	DiracCloverPC

class	DiracCloverHasenbuschTwist

class	DiracCloverHasenbuschTwistPC

class	DiracDomainWall

class	DiracDomainWallPC

class	DiracDomainWall4D

class	DiracDomainWall4DPC

class	DiracMobius

class	DiracMobiusPC

class	DiracMobiusEofa

class	DiracMobiusEofaPC

class	DiracTwistedMass

class	DiracTwistedMassPC

class	DiracTwistedClover

class	DiracTwistedCloverPC

class	DiracStaggered

class	DiracStaggeredPC

class	DiracStaggeredKD

class	DiracImprovedStaggered

class	DiracImprovedStaggeredPC

class	DiracImprovedStaggeredKD

class	DiracCoarse

class	DiracCoarsePC

class	GaugeLaplace
	Full Gauge Laplace operator. Although not a Dirac operator per se, it's a linear operator so it's conventient to put in the Dirac operator abstraction. More...

class	GaugeLaplacePC
	Even-odd preconditioned Gauge Laplace operator. More...

class	GaugeCovDev
	Full Covariant Derivative operator. Although not a Dirac operator per se, it's a linear operator so it's conventient to put in the Dirac operator abstraction. More...

class	DiracMatrix

class	DiracM

class	DiracMdagM

class	DiracMdagMLocal

class	DiracMMdag

class	DiracMdag

class	DiracDagger

class	DiracG5M

class	Dslash
	This is the generic driver for launching Dslash kernels (the base kernel of which is defined in dslash_helper.cuh). This is templated on the a template template parameter which is the underlying operator wrapped in a class,. More...

class	EigenSolver

class	TRLM
	Thick Restarted Lanczos Method. More...

class	BLKTRLM
	Block Thick Restarted Lanczos Method. More...

class	IRAM
	Implicitly Restarted Arnoldi Method. More...

struct	RealType

struct	RealType< double >

struct	RealType< double2 >

struct	RealType< complex< double > >

struct	RealType< float >

struct	RealType< float2 >

struct	RealType< complex< float > >

struct	RealType< float4 >

struct	RealType< short >

struct	RealType< short2 >

struct	RealType< complex< short > >

struct	RealType< short4 >

struct	RealType< int8_t >

struct	RealType< char2 >

struct	RealType< complex< int8_t > >

struct	RealType< char4 >

struct	vector_type

struct	GaugeFieldParam

class	GaugeField

class	cudaGaugeField

class	cpuGaugeField

struct	gauge_wrapper
	gauge_wrapper is an internal class that is used to wrap instances of gauge accessors, currying in a specific location on the field. The operator() accessors in gauge-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the Matrix class. As a result we can include gauge-field accessors directly in Matrix expressions in kernels without having to declare temporaries with explicit calls to the load/save methods in the gauge-field accessors. More...

struct	gauge_ghost_wrapper
	gauge_ghost_wrapper is an internal class that is used to wrap instances of gauge ghost accessors, currying in a specific location and dimension on the field. The Ghost() accessors in gauge-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the Matrix class. As a result we can include gauge-field ghost accessors directly in Matrix expressions in kernels without having to declare temporaries with explicit calls to the load/save methods in the gauge-field accessors. More...

struct	gauge_mapper

struct	gauge_mapper< double, QUDA_RECONSTRUCT_NO, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< double, QUDA_RECONSTRUCT_13, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< double, QUDA_RECONSTRUCT_12, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< double, QUDA_RECONSTRUCT_10, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< double, QUDA_RECONSTRUCT_9, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< double, QUDA_RECONSTRUCT_8, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< float, QUDA_RECONSTRUCT_NO, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< float, QUDA_RECONSTRUCT_13, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< float, QUDA_RECONSTRUCT_12, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< float, QUDA_RECONSTRUCT_10, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< float, QUDA_RECONSTRUCT_9, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< float, QUDA_RECONSTRUCT_8, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< short, QUDA_RECONSTRUCT_NO, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< short, QUDA_RECONSTRUCT_13, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< short, QUDA_RECONSTRUCT_12, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< short, QUDA_RECONSTRUCT_10, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< short, QUDA_RECONSTRUCT_9, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< short, QUDA_RECONSTRUCT_8, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< int8_t, QUDA_RECONSTRUCT_NO, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< int8_t, QUDA_RECONSTRUCT_13, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< int8_t, QUDA_RECONSTRUCT_12, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< int8_t, QUDA_RECONSTRUCT_10, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< int8_t, QUDA_RECONSTRUCT_9, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< int8_t, QUDA_RECONSTRUCT_8, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_NATIVE_GAUGE_ORDER >

struct	gauge_mapper< T, recon, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_MILC_GAUGE_ORDER >

struct	gauge_mapper< T, recon, N, stag, huge_alloc, ghostExchange, use_inphase, QUDA_QDP_GAUGE_ORDER >

struct	gauge_order_mapper

struct	gauge_order_mapper< T, QUDA_QDP_GAUGE_ORDER, Nc >

struct	gauge_order_mapper< T, QUDA_QDPJIT_GAUGE_ORDER, Nc >

struct	gauge_order_mapper< T, QUDA_MILC_GAUGE_ORDER, Nc >

struct	gauge_order_mapper< T, QUDA_CPS_WILSON_GAUGE_ORDER, Nc >

struct	gauge_order_mapper< T, QUDA_BQCD_GAUGE_ORDER, Nc >

struct	gauge_order_mapper< T, QUDA_TIFR_GAUGE_ORDER, Nc >

struct	gauge_order_mapper< T, QUDA_TIFR_PADDED_GAUGE_ORDER, Nc >

struct	gauge_order_mapper< T, QUDA_FLOAT2_GAUGE_ORDER, Nc >

struct	ReconstructFull

struct	ReconstructWilson

struct	ReconstructStaggered

struct	ReconstructNo12

struct	ReconstructNone

struct	ReconstructMom

struct	Reconstruct10

struct	instantiateApply
	This class instantiates the Apply class based on the instantiated templates below. More...

struct	instantiateApply< false, Apply, Float, nColor, recon, G, Args... >
	This class is a specialization which does not instantiate the Apply class if the is_enabled has evaluated to false. More...

struct	instantiateReconstruct
	Instantiate the reconstruction template at index i and recurse to prior element. More...

struct	instantiateReconstruct< Apply, Float, nColor, Recon, 0, G, Args... >
	Termination specialization of instantiateReconstruct. More...

struct	WilsonReconstruct

struct	StaggeredReconstruct

struct	SolverParam

class	Solver

class	CG
	Conjugate-Gradient Solver. More...

class	CGNE

class	CGNR

class	CG3

class	CG3NE

class	CG3NR

class	MPCG

class	PreconCG

class	BiCGstab

class	SimpleBiCGstab

class	MPBiCGstab

class	BiCGstabL

class	GCR

class	MR

class	CACG
	Communication-avoiding CG solver. This solver does un-preconditioned CG, running in steps of n_krylov, build up a polynomial in the linear operator of length n_krylov, and then performs a steepest descent minimization on the resulting basis vectors. For now only implemented using the power basis so is only useful as a preconditioner. More...

class	CACGNE

class	CACGNR

class	CAGCR
	Communication-avoiding GCR solver. This solver does un-preconditioned GCR, first building up a polynomial in the linear operator of length n_krylov, and then performs a minimum residual extrapolation on the resulting basis vectors. For use as a multigrid smoother with minimum global synchronization. More...

class	SD

class	XSD

class	PreconditionedSolver

class	MultiShiftSolver

class	MultiShiftCG
	Multi-Shift Conjugate Gradient Solver. More...

class	MinResExt
	This computes the optimum guess for the system Ax=b in the L2 residual norm. For use in the HMD force calculations using a minimal residual chronological method. This computes the guess solution as a linear combination of a given number of previous solutions. Following Brower et al, only the orthogonalised vector basis is stored to conserve memory. More...

class	IncEigCG

class	GMResDR

struct	deflation_space
	This is an object that captures the state required for a deflated solver. More...

struct	LatticeFieldParam

class	LatticeField

struct	matrix_field

struct	MGParam

class	MG

struct	multigrid_solver

class	Object

struct	char8

struct	short8

struct	float8

struct	double8

struct	fixedMaxValue

struct	fixedMaxValue< short >

struct	fixedMaxValue< short2 >

struct	fixedMaxValue< short4 >

struct	fixedMaxValue< short8 >

struct	fixedMaxValue< int8_t >

struct	fixedMaxValue< char2 >

struct	fixedMaxValue< char4 >

struct	fixedMaxValue< char8 >

struct	fixedInvMaxValue

struct	fixedInvMaxValue< short >

struct	fixedInvMaxValue< short2 >

struct	fixedInvMaxValue< short4 >

struct	fixedInvMaxValue< short8 >

struct	fixedInvMaxValue< int8_t >

struct	fixedInvMaxValue< char2 >

struct	fixedInvMaxValue< char4 >

struct	fixedInvMaxValue< char8 >

struct	Zero

struct	Identity

class	HMatrix
	Specialized container for Hermitian matrices (e.g., used for wrapping clover matrices) More...

class	Matrix

struct	HMatrix_wrapper
	wrapper class that enables us to write to Hmatrices in packed format More...

class	Array

class	RNG
	Class declaration to initialize and hold CURAND RNG states. More...

struct	uniform

struct	uniform< float >

struct	uniform< double >

struct	normal

struct	normal< float >

struct	normal< double >

struct	atomic_type
	The atomic word size we use for a given reduction type. This type should be lock-free to guarantee correct behaviour on platforms that are not coherent with respect to the host. More...

struct	atomic_type< float >

struct	ReduceArg

struct	PromoteTypeId

struct	PromoteTypeId< complex< float >, float >

struct	PromoteTypeId< float, complex< float > >

struct	PromoteTypeId< complex< double >, double >

struct	PromoteTypeId< double, complex< double > >

struct	PromoteTypeId< double, int >

struct	PromoteTypeId< int, double >

struct	PromoteTypeId< float, int >

struct	PromoteTypeId< int, float >

struct	PromoteTypeId< double, float >

struct	PromoteTypeId< float, double >

struct	PromoteTypeId< double, short >

struct	PromoteTypeId< short, double >

struct	PromoteTypeId< double, int8_t >

struct	PromoteTypeId< int8_t, double >

struct	PromoteTypeId< float, short >

struct	PromoteTypeId< short, float >

struct	PromoteTypeId< float, int8_t >

struct	PromoteTypeId< int8_t, float >

struct	PromoteTypeId< short, int8_t >

struct	PromoteTypeId< int8_t, short >

struct	mapper

struct	mapper< double >

struct	mapper< float >

struct	mapper< short >

struct	mapper< int8_t >

struct	mapper< double2 >

struct	mapper< float2 >

struct	mapper< short2 >

struct	mapper< char2 >

struct	mapper< double4 >

struct	mapper< float4 >

struct	mapper< short4 >

struct	mapper< char4 >

struct	mapper< double8 >

struct	mapper< float8 >

struct	mapper< short8 >

struct	mapper< char8 >

struct	bridge_mapper

struct	bridge_mapper< double2, double2 >

struct	bridge_mapper< double2, float2 >

struct	bridge_mapper< double2, short2 >

struct	bridge_mapper< double2, char2 >

struct	bridge_mapper< double2, float4 >

struct	bridge_mapper< double2, short4 >

struct	bridge_mapper< double2, char4 >

struct	bridge_mapper< float4, double2 >

struct	bridge_mapper< float4, float4 >

struct	bridge_mapper< float4, short4 >

struct	bridge_mapper< float4, char4 >

struct	bridge_mapper< float2, double2 >

struct	bridge_mapper< float2, float2 >

struct	bridge_mapper< float2, short2 >

struct	bridge_mapper< float2, char2 >

struct	bridge_mapper< double2, short8 >

struct	bridge_mapper< double2, char8 >

struct	bridge_mapper< float8, short8 >

struct	bridge_mapper< float8, char8 >

struct	bridge_mapper< float4, short8 >

struct	bridge_mapper< float4, char8 >

struct	vec_length

struct	vec_length< double8 >

struct	vec_length< double4 >

struct	vec_length< double3 >

struct	vec_length< double2 >

struct	vec_length< double >

struct	vec_length< float8 >

struct	vec_length< float4 >

struct	vec_length< float3 >

struct	vec_length< float2 >

struct	vec_length< float >

struct	vec_length< short8 >

struct	vec_length< short4 >

struct	vec_length< short3 >

struct	vec_length< short2 >

struct	vec_length< short >

struct	vec_length< char8 >

struct	vec_length< char4 >

struct	vec_length< char3 >

struct	vec_length< char2 >

struct	vec_length< int8_t >

struct	vec_length< Complex >

struct	vec_length< complex< double > >

struct	vec_length< complex< float > >

struct	vec_length< complex< short > >

struct	vec_length< complex< int8_t > >

struct	vector

struct	vector< double, 2 >

struct	vector< float, 2 >

struct	vector< int, 2 >

struct	scalar

struct	scalar< double8 >

struct	scalar< double4 >

struct	scalar< double3 >

struct	scalar< double2 >

struct	scalar< double >

struct	scalar< float8 >

struct	scalar< float4 >

struct	scalar< float3 >

struct	scalar< float2 >

struct	scalar< float >

struct	scalar< short8 >

struct	scalar< short4 >

struct	scalar< short3 >

struct	scalar< short2 >

struct	scalar< short >

struct	scalar< char8 >

struct	scalar< char4 >

struct	scalar< char3 >

struct	scalar< char2 >

struct	scalar< int8_t >

struct	scalar< complex< double > >

struct	scalar< complex< float > >

struct	isHalf

struct	isHalf< short >

struct	isHalf< short2 >

struct	isHalf< short4 >

struct	isHalf< short8 >

struct	isQuarter

struct	isQuarter< int8_t >

struct	isQuarter< char2 >

struct	isQuarter< char4 >

struct	isQuarter< char8 >

struct	isFixed

struct	isFixed< short >

struct	isFixed< short2 >

struct	isFixed< short4 >

struct	isFixed< short8 >

struct	isFixed< int8_t >

struct	isFixed< char2 >

struct	isFixed< char4 >

struct	isFixed< char8 >

struct	Trig

struct	Trig< false, float >

struct	Trig< true, float >

struct	VectorType

struct	VectorType< double, 1 >

struct	VectorType< double, 2 >

struct	VectorType< double, 3 >

struct	VectorType< double, 4 >

struct	VectorType< double, 8 >

struct	VectorType< float, 1 >

struct	VectorType< float, 2 >

struct	VectorType< float, 3 >

struct	VectorType< float, 4 >

struct	VectorType< float, 8 >

struct	VectorType< short, 1 >

struct	VectorType< short, 2 >

struct	VectorType< short, 3 >

struct	VectorType< short, 4 >

struct	VectorType< short, 8 >

struct	VectorType< int8_t, 1 >

struct	VectorType< int8_t, 2 >

struct	VectorType< int8_t, 3 >

struct	VectorType< int8_t, 4 >

struct	VectorType< int8_t, 8 >

struct	AllocType

struct	AllocType< true >

struct	AllocType< false >

struct	Timer

class	TimeProfile

class	Transfer

struct	plus

struct	maximum

struct	minimum

struct	identity

struct	TransformReduceArg

class	TransformReduce

struct	TuneKey

class	TuneParam

class	Tunable

class	TunableLocalParityReduction

class	TunableVectorY

class	TunableVectorYZ

class	VectorIO
	VectorIO is a simple wrapper class for loading and saving sets of vector fields using QIO. More...

class	Worker

class	BiCGstabLUpdate

class	EigCGArgs

struct	SortedEvals

class	GMResDRArgs

class	ShiftUpdate

class	MemAlloc

class	QudaMem

struct	Int2

struct	TraceKey

struct	less_significant

Typedefs
typedef std::vector< ColorSpinorField * >	CompositeColorSpinorField

using	ColorSpinorFieldSet = ColorSpinorField

typedef std::complex< double >	Complex

typedef struct curandStateMRG32k3a	cuRNGState

using	DynamicStride = Stride< Dynamic, Dynamic >

using	DenseMatrix = MatrixXcd

using	VectorSet = MatrixXcd

using	Vector = VectorXcd

using	RealVector = VectorXd

using	RowMajorDenseMatrix = Matrix< Complex, Dynamic, Dynamic, RowMajor >

typedef std::map< TuneKey, TuneParam >	map

template<typename T >
using	mgarray = std::array< T, QUDA_MAX_MG_LEVEL >

Enumerations
enum class	CloverPrefetchType { BOTH_CLOVER_PREFETCH_TYPE , CLOVER_CLOVER_PREFETCH_TYPE , INVERSE_CLOVER_PREFETCH_TYPE , INVALID_CLOVER_PREFETCH_TYPE = QUDA_INVALID_ENUM }

enum	MemoryLocation { Device = 1 , Host = 2 , Remote = 4 , Shmem = 8 }

enum	Dslash5Type { DSLASH5_DWF , DSLASH5_MOBIUS_PRE , DSLASH5_MOBIUS , M5_INV_DWF , M5_INV_MOBIUS , M5_INV_ZMOBIUS , M5_EOFA , M5INV_EOFA }

enum class	MdwfFusedDslashType { D4_D5INV_D5PRE , D4_D5INV_D5INVDAG , D4DAG_D5PREDAG_D5INVDAG , D4DAG_D5PREDAG , D5PRE }

enum	blockType { PENCIL , LOWER_TRI , UPPER_TRI }

enum class	QudaOffsetCopyMode { COLLECT , DISPERSE }

enum	QudaProfileType { QUDA_PROFILE_H2D , QUDA_PROFILE_D2H , QUDA_PROFILE_INIT , QUDA_PROFILE_PREAMBLE , QUDA_PROFILE_COMPUTE , QUDA_PROFILE_COMMS , QUDA_PROFILE_EPILOGUE , QUDA_PROFILE_FREE , QUDA_PROFILE_IO , QUDA_PROFILE_CHRONO , QUDA_PROFILE_EIGEN , QUDA_PROFILE_EIGENLU , QUDA_PROFILE_EIGENEV , QUDA_PROFILE_EIGENQR , QUDA_PROFILE_ARPACK , QUDA_PROFILE_HOST_COMPUTE , QUDA_PROFILE_LOWER_LEVEL , QUDA_PROFILE_PACK_KERNEL , QUDA_PROFILE_DSLASH_KERNEL , QUDA_PROFILE_GATHER , QUDA_PROFILE_SCATTER , QUDA_PROFILE_LAUNCH_KERNEL , QUDA_PROFILE_EVENT_RECORD , QUDA_PROFILE_EVENT_QUERY , QUDA_PROFILE_STREAM_WAIT_EVENT , QUDA_PROFILE_FUNC_SET_ATTRIBUTE , QUDA_PROFILE_EVENT_SYNCHRONIZE , QUDA_PROFILE_STREAM_SYNCHRONIZE , QUDA_PROFILE_DEVICE_SYNCHRONIZE , QUDA_PROFILE_MEMCPY_D2D_ASYNC , QUDA_PROFILE_MEMCPY_D2H_ASYNC , QUDA_PROFILE_MEMCPY2D_D2H_ASYNC , QUDA_PROFILE_MEMCPY_H2D_ASYNC , QUDA_PROFILE_MEMCPY_DEFAULT_ASYNC , QUDA_PROFILE_COMMS_START , QUDA_PROFILE_COMMS_QUERY , QUDA_PROFILE_CONSTANT , QUDA_PROFILE_TOTAL , QUDA_PROFILE_COUNT }

enum	BiCGstabLUpdateType { BICGSTABL_UPDATE_U = 0 , BICGSTABL_UPDATE_R = 1 }

enum class	libtype { eigen_lib , magma_lib , lapack_lib , mkl_lib , eigen_lib , magma_lib , lapack_lib , mkl_lib }

enum class	libtype { eigen_lib , magma_lib , lapack_lib , mkl_lib , eigen_lib , magma_lib , lapack_lib , mkl_lib }

enum	AllocType { DEVICE , DEVICE_PINNED , HOST , PINNED , MAPPED , MANAGED , SHMEM , N_ALLOC_TYPE , DEVICE , DEVICE_PINNED , HOST , PINNED , MAPPED , MANAGED , N_ALLOC_TYPE }

enum	AllocType { DEVICE , DEVICE_PINNED , HOST , PINNED , MAPPED , MANAGED , SHMEM , N_ALLOC_TYPE , DEVICE , DEVICE_PINNED , HOST , PINNED , MAPPED , MANAGED , N_ALLOC_TYPE }

Functions
std::ostream &	operator<< (std::ostream &output, const CloverFieldParam &param)

double	norm1 (const CloverField &u, bool inverse=false)

double	norm2 (const CloverField &a, bool inverse=false)

void	computeClover (CloverField &clover, const GaugeField &fmunu, double coeff)
	Driver for computing the clover field from the field strength tensor. More...

void	copyGenericClover (CloverField &out, const CloverField &in, bool inverse, QudaFieldLocation location, void Out=0, void In=0, void outNorm=0, void inNorm=0)
	This generic function is used for copying the clover field where in the input and output can be in any order and location. More...

void	cloverInvert (CloverField &clover, bool computeTraceLog)
	This function compute the Cholesky decomposition of each clover matrix and stores the clover inverse field. More...

void	cloverRho (CloverField &clover, double rho)
	This function adds a real scalar onto the clover diagonal (only to the direct field not the inverse) More...

void	computeCloverForce (GaugeField &force, const GaugeField &U, std::vector< ColorSpinorField * > &x, std::vector< ColorSpinorField * > &p, std::vector< double > &coeff)
	Compute the force contribution from the solver solution fields. More...

void	computeCloverSigmaOprod (GaugeField &oprod, std::vector< ColorSpinorField * > &x, std::vector< ColorSpinorField * > &p, std::vector< std::vector< double > > &coeff)
	Compute the outer product from the solver solution fields arising from the diagonal term of the fermion bilinear in direction mu,nu and sum to outer product field. More...

void	computeCloverSigmaTrace (GaugeField &output, const CloverField &clover, double coeff)
	Compute the matrix tensor field necessary for the force calculation from the clover trace action. This computes a tensor field [mu,nu]. More...

void	cloverDerivative (cudaGaugeField &force, cudaGaugeField &gauge, cudaGaugeField &oprod, double coeff, QudaParity parity)
	Compute the derivative of the clover matrix in the direction mu,nu and compute the resulting force given the outer-product field. More...

void	copyFieldOffset (CloverField &out, const CloverField &in, CommKey offset, QudaPCType pc_type)
	This function is used for copying from a source clover field to a destination clover field with an offset. More...

constexpr bool	dynamic_clover_inverse ()
	Helper function that returns whether we have enabled dyanmic clover inversion or not. More...

template<typename Float , int Nc, int Ns>
__device__ __host__ complex< Float >	innerProduct (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b)
	Compute the inner product over color and spin dot = \sum_s,c conj(a(s,c)) * b(s,c) More...

template<typename Float , int Nc, int Ns>
__device__ __host__ complex< Float >	colorContract (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b, int sa, int sb)
	Compute the color contraction over color at spin s dot = \sum_s,c a(s,c) * b(s,c) More...

template<typename Float , int Nc, int Ns>
__device__ __host__ complex< Float >	innerProduct (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b, int s)

template<typename Float , int Nc, int Ns>
__device__ __host__ complex< Float >	innerProduct (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b, int sa, int sb)

template<typename Float , int Nc, int Nsa, int Nsb>
__device__ __host__ complex< Float >	innerProduct (const ColorSpinor< Float, Nc, Nsa > &a, const ColorSpinor< Float, Nc, Nsb > &b, int sa, int sb)
	Compute the inner product over color at spin sa and sb between a color spinors a and b of different spin length dot = \sum_c conj(a(c)) * b(s,c) More...

template<typename Float , int Ns>
__device__ __host__ ColorSpinor< Float, 3, 1 >	crossProduct (const ColorSpinor< Float, 3, Ns > &a, const ColorSpinor< Float, 3, Ns > &b, int sa, int sb)

template<typename Float , int Nc, int Ns>
__device__ __host__ Matrix< complex< Float >, Nc >	outerProdSpinTrace (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b)

template<typename Float , int Nc>
__device__ __host__ Matrix< complex< Float >, Nc >	outerProduct (const ColorSpinor< Float, Nc, 1 > &a, const ColorSpinor< Float, Nc, 1 > &b)

template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor< Float, Nc, Ns >	operator+ (const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
	ColorSpinor addition operator. More...

template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor< Float, Nc, Ns >	operator- (const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
	ColorSpinor subtraction operator. More...

template<typename Float , int Nc, int Ns, typename S >
__device__ __host__ ColorSpinor< Float, Nc, Ns >	operator* (const S &a, const ColorSpinor< Float, Nc, Ns > &x)
	Compute the scalar-vector product y = a * x. More...

template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor< Float, Nc, Ns >	operator* (const Matrix< complex< Float >, Nc > &A, const ColorSpinor< Float, Nc, Ns > &x)
	Compute the matrix-vector product y = A * x. More...

template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor< Float, Nc, Ns >	mv_add (const Matrix< complex< Float >, Nc > &A, const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
	Compute the matrix-vector product z = A * x + y. More...

template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor< Float, Nc, Ns >	operator* (const HMatrix< Float, Nc *Ns > &A, const ColorSpinor< Float, Nc, Ns > &x)
	Compute the matrix-vector product y = A * x. More...

constexpr QudaParity	impliedParityFromMatPC (const QudaMatPCType &matpc_type)
	Helper function for getting the implied spinor parity from a matrix preconditioning type. More...

void	copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, void Dst=0, void Src=0, void dstNorm=0, void srcNorm=0)

void	genericSource (cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c)

int	genericCompare (const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol)

void	copyFieldOffset (ColorSpinorField &out, const ColorSpinorField &in, CommKey offset, QudaPCType pc_type)
	This function is used for copying from a source colorspinor field to a destination field with an offset. More...

void	genericPrintVector (const cpuColorSpinorField &a, unsigned int x)

void	genericCudaPrintVector (const cudaColorSpinorField &a, unsigned x)

void	exchangeExtendedGhost (cudaColorSpinorField spinor, int R[], int parity, qudaStream_t stream_p)

void	copyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, const int parity, void Dst, void Src, void dstNorm, void srcNorm)

void	genericPackGhost (void *ghost, const ColorSpinorField &a, QudaParity parity, int nFace, int dagger, MemoryLocation destination=nullptr)
	Generic ghost packing routine. More...

void	spinorNoise (ColorSpinorField &src, RNG &randstates, QudaNoiseType type)
	Generate a random noise spinor. This variant allows the user to manage the RNG state. More...

void	spinorNoise (ColorSpinorField &src, unsigned long long seed, QudaNoiseType type)
	Generate a random noise spinor. This variant just requires a seed and will create and destroy the random number state. More...

QudaPCType	PCType_ (const char func, const char file, int line, const ColorSpinorField &a, const ColorSpinorField &b)
	Helper function for determining if the preconditioning type of the fields is the same. More...

template<typename... Args>
QudaPCType	PCType_ (const char func, const char file, int line, const ColorSpinorField &a, const ColorSpinorField &b, const Args &... args)
	Helper function for determining if the precision of the fields is the same. More...

QudaFieldOrder	Order_ (const char func, const char file, int line, const ColorSpinorField &a, const ColorSpinorField &b)
	Helper function for determining if the order of the fields is the same. More...

template<typename... Args>
QudaFieldOrder	Order_ (const char func, const char file, int line, const ColorSpinorField &a, const ColorSpinorField &b, const Args &... args)
	Helper function for determining if the order of the fields is the same. More...

int	Length_ (const char func, const char file, int line, const ColorSpinorField &a, const ColorSpinorField &b)
	Helper function for determining if the length of the fields is the same. More...

template<typename... Args>
int	Length_ (const char func, const char file, int line, const ColorSpinorField &a, const ColorSpinorField &b, const Args &... args)
	Helper function for determining if the length of the fields is the same. More...

constexpr int	product (const CommKey &input)

constexpr CommKey	operator+ (const CommKey &lhs, const CommKey &rhs)

constexpr CommKey	operator* (const CommKey &lhs, const CommKey &rhs)

constexpr CommKey	operator/ (const CommKey &lhs, const CommKey &rhs)

constexpr CommKey	operator% (const CommKey &lhs, const CommKey &rhs)

constexpr bool	operator< (const CommKey &lhs, const CommKey &rhs)

constexpr bool	operator> (const CommKey &lhs, const CommKey &rhs)

constexpr CommKey	coordinate_from_index (int index, CommKey dim)

constexpr int	index_from_coordinate (CommKey coord, CommKey dim)

template<typename ValueType >
__host__ __device__ ValueType	cos (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	sin (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	tan (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	acos (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	asin (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	atan (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	atan2 (ValueType x, ValueType y)

template<typename ValueType >
__host__ __device__ ValueType	cosh (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	sinh (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	tanh (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	exp (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	log (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	log10 (ValueType x)

template<typename ValueType , typename ExponentType >
__host__ __device__ ValueType	pow (ValueType x, ExponentType e)

template<typename ValueType >
__host__ __device__ ValueType	sqrt (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	abs (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	conj (ValueType x)

template<typename ValueType >
__host__ __device__ ValueType	abs (const complex< ValueType > &z)
	Returns the magnitude of z. More...

template<typename ValueType >
__host__ __device__ ValueType	arg (const complex< ValueType > &z)
	Returns the phase angle of z. More...

template<typename ValueType >
__host__ __device__ ValueType	norm (const complex< ValueType > &z)
	Returns the magnitude of z squared. More...

template<typename ValueType >
__host__ __device__ complex< ValueType >	conj (const complex< ValueType > &z)
	Returns the complex conjugate of z. More...

template<typename ValueType >
__host__ __device__ complex< ValueType >	polar (const ValueType &m, const ValueType &theta=0)
	Returns the complex with magnitude m and angle theta in radians. More...

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator* (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator* (const complex< ValueType > &lhs, const ValueType &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator* (const ValueType &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator/ (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<>
__host__ __device__ complex< float >	operator/ (const complex< float > &lhs, const complex< float > &rhs)

template<>
__host__ __device__ complex< double >	operator/ (const complex< double > &lhs, const complex< double > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator+ (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator+ (const complex< ValueType > &lhs, const ValueType &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator+ (const ValueType &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator- (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator- (const complex< ValueType > &lhs, const ValueType &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator- (const ValueType &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator+ (const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator- (const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	cos (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	cosh (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	exp (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	log (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	log10 (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	pow (const complex< ValueType > &z, const int &n)

template<typename ValueType >
__host__ __device__ complex< ValueType >	pow (const complex< ValueType > &z, const ValueType &x)

template<typename ValueType >
__host__ __device__ complex< ValueType >	pow (const complex< ValueType > &z, const complex< ValueType > &z2)

template<typename ValueType >
__host__ __device__ complex< ValueType >	pow (const ValueType &x, const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	sin (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	sinh (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	sqrt (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	tan (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	tanh (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	acos (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	asin (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	atan (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	acosh (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	asinh (const complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	atanh (const complex< ValueType > &z)

template<typename ValueType , class charT , class traits >
std::basic_ostream< charT, traits > &	operator<< (std::basic_ostream< charT, traits > &os, const complex< ValueType > &z)

template<typename ValueType , typename charT , class traits >
std::basic_istream< charT, traits > &	operator>> (std::basic_istream< charT, traits > &is, complex< ValueType > &z)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator+ (const volatile complex< ValueType > &lhs, const volatile complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator/ (const complex< ValueType > &lhs, const ValueType &rhs)

template<typename ValueType >
__host__ __device__ complex< ValueType >	operator/ (const ValueType &lhs, const complex< ValueType > &rhs)

template<>
__host__ __device__ complex< float >	operator/ (const float &lhs, const complex< float > &rhs)

template<>
__host__ __device__ complex< double >	operator/ (const double &lhs, const complex< double > &rhs)

template<typename ValueType >
__host__ __device__ bool	operator== (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ bool	operator== (const ValueType &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ bool	operator== (const complex< ValueType > &lhs, const ValueType &rhs)

template<typename ValueType >
__host__ __device__ bool	operator!= (const complex< ValueType > &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ bool	operator!= (const ValueType &lhs, const complex< ValueType > &rhs)

template<typename ValueType >
__host__ __device__ bool	operator!= (const complex< ValueType > &lhs, const ValueType &rhs)

template<>
__host__ __device__ float	abs (const complex< float > &z)

template<>
__host__ __device__ double	abs (const complex< double > &z)

template<>
__host__ __device__ float	arg (const complex< float > &z)

template<>
__host__ __device__ double	arg (const complex< double > &z)

template<>
__host__ __device__ complex< float >	polar (const float &magnitude, const float &angle)

template<>
__host__ __device__ complex< double >	polar (const double &magnitude, const double &angle)

template<>
__host__ __device__ complex< float >	cos (const complex< float > &z)

template<>
__host__ __device__ complex< float >	cosh (const complex< float > &z)

template<>
__host__ __device__ complex< float >	exp (const complex< float > &z)

template<>
__host__ __device__ complex< float >	log (const complex< float > &z)

template<>
__host__ __device__ complex< float >	pow (const float &x, const complex< float > &exponent)

template<>
__host__ __device__ complex< float >	sin (const complex< float > &z)

template<>
__host__ __device__ complex< float >	sinh (const complex< float > &z)

template<>
__host__ __device__ complex< float >	sqrt (const complex< float > &z)

template<typename ValueType >
__host__ __device__ complex< float >	atanh (const complex< float > &z)

template<typename real >
__host__ __device__ complex< real >	cmul (const complex< real > &x, const complex< real > &y)

template<typename real >
__host__ __device__ complex< real >	cmac (const complex< real > &x, const complex< real > &y, const complex< real > &z)

template<typename real >
__host__ __device__ complex< real >	i_ (const complex< real > &a)

void	contractQuda (const ColorSpinorField &x, const ColorSpinorField &y, void *result, QudaContractType cType)

template<typename T >
__host__ __device__ float	i2f (T a)

__device__ __host__ int	f2i (float f)

__device__ __host__ int	d2i (double d)

template<typename T1 , typename T2 >
__host__ __device__ std::enable_if<!isFixed< T1 >::value &&!isFixed< T2 >::value, void >::type	copy (T1 &a, const T2 &b)
	Copy function which is trival between floating point types. When converting to an integer type, the input float is assumed to be in the range [-1,1] and we rescale to saturate the integer range. When converting from an integer type, we scale the output to be on the same range. More...

template<typename T1 , typename T2 >
__host__ __device__ std::enable_if<!isFixed< T1 >::value &&isFixed< T2 >::value, void >::type	copy (T1 &a, const T2 &b)

template<typename T1 , typename T2 >
__host__ __device__ std::enable_if< isFixed< T1 >::value &&!isFixed< T2 >::value, void >::type	copy (T1 &a, const T2 &b)

template<typename T1 , typename T2 >
__host__ __device__ std::enable_if<!isFixed< T1 >::value, void >::type	copy_scaled (T1 &a, const T2 &b)
	Specialized variants of the copy function that assumes the scaling factor has already been done. More...

template<typename T1 , typename T2 >
__host__ __device__ std::enable_if< isFixed< T1 >::value, void >::type	copy_scaled (T1 &a, const T2 &b)

template<typename T1 , typename T2 , typename T3 >
__host__ __device__ std::enable_if<!isFixed< T2 >::value, void >::type	copy_and_scale (T1 &a, const T2 &b, const T3 &c)
	Specialized variants of the copy function that include an additional scale factor. Note the scale factor is ignored unless the input type (b) is either a short or char vector. More...

template<typename T1 , typename T2 , typename T3 >
__host__ __device__ std::enable_if< isFixed< T2 >::value, void >::type	copy_and_scale (T1 &a, const T2 &b, const T3 &c)

void	setDiracParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)

void	setDiracSloppyParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)

void	createDirac (Dirac &d, Dirac &dSloppy, Dirac *&dPre, QudaInvertParam &param, const bool pc_solve)

void	createDiracWithRefine (Dirac &d, Dirac &dSloppy, Dirac &dPre, Dirac &dRef, QudaInvertParam &param, const bool pc_solve)

void	createDiracWithEig (Dirac &d, Dirac &dSloppy, Dirac &dPre, Dirac &dRef, QudaInvertParam &param, const bool pc_solve)

void	setKernelPackT (bool pack)

bool	getKernelPackT ()

void	pushKernelPackT (bool pack)

void	popKernelPackT ()

void	setPackComms (const int *dim_pack)
	Helper function that sets which dimensions the packing kernel should be packing for. More...

bool	getDslashLaunch ()

void	createDslashEvents ()

void	destroyDslashEvents ()

void	ApplyWilson (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the Wilson stencil. More...

void	ApplyWilsonClover (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the Wilson-clover stencil. More...

void	ApplyWilsonCloverHasenbuschTwist (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, double mu, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the Wilson-clover stencil. More...

void	ApplyWilsonCloverPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the preconditioned Wilson-clover stencil. More...

void	ApplyWilsonCloverHasenbuschTwistPCClovInv (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, double mu, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the twisted-mass stencil. More...

void	ApplyWilsonCloverHasenbuschTwistPCNoClovInv (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, double mu, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the Wilson-clover stencil with thist for Hasenbusch. More...

void	ApplyTwistedMass (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)

void	ApplyTwistedMassPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile)
	Driver for applying the preconditioned twisted-mass stencil. More...

void	ApplyNdegTwistedMass (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the non-degenerate twisted-mass stencil. More...

void	ApplyNdegTwistedMassPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile)
	Driver for applying the preconditioned non-degenerate twisted-mass stencil. More...

void	ApplyTwistedClover (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the twisted-clover stencil. More...

void	ApplyTwistedCloverPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the preconditioned twisted-clover stencil. More...

void	ApplyDomainWall5D (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_f, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the Domain-wall 5-d stencil to a 5-d vector with 5-d preconditioned data order. More...

void	ApplyDomainWall4D (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_5, const Complex b_5, const Complex c_5, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the batched Wilson 4-d stencil to a 5-d vector with 4-d preconditioned data order. More...

void	ApplyDslash5 (ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x, double m_f, double m_5, const Complex b_5, const Complex c_5, double a, bool dagger, Dslash5Type type)
	Apply either the domain-wall / mobius Dslash5 operator or the M5 inverse operator. In the current implementation, it is expected that the color-spinor fields are 4-d preconditioned. More...

void	ApplyLaplace (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int dir, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the Laplace stencil. More...

void	ApplyCovDev (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int mu, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Driver for applying the covariant derivative. More...

void	ApplyClover (ColorSpinorField &out, const ColorSpinorField &in, const CloverField &clover, bool inverse, int parity)
	Apply clover-matrix field to a color-spinor field. More...

void	ApplyStaggered (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Apply the staggered dslash operator to a color-spinor field. More...

void	ApplyImprovedStaggered (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const GaugeField &L, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
	Apply the improved staggered dslash operator to a color-spinor field. More...

void	ApplyStaggeredKahlerDiracInverse (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &Xinv, bool dagger)
	Apply the (improved) staggered Kahler-Dirac inverse block to a color-spinor field. More...

void	ApplyTwistGamma (ColorSpinorField &out, const ColorSpinorField &in, int d, double kappa, double mu, double epsilon, int dagger, QudaTwistGamma5Type type)
	Apply the twisted-mass gamma operator to a color-spinor field. More...

void	ApplyTwistClover (ColorSpinorField &out, const ColorSpinorField &in, const CloverField &clover, double kappa, double mu, double epsilon, int parity, int dagger, QudaTwistGamma5Type twist)
	Apply twisted clover-matrix field to a color-spinor field. More...

void	PackGhost (void ghost[2 QUDA_MAX_DIM], const ColorSpinorField &field, MemoryLocation location, int nFace, bool dagger, int parity, bool spin_project, double a, double b, double c, int shmem, const qudaStream_t &stream)
	Dslash face packing routine. More...

void	gamma5 (ColorSpinorField &out, const ColorSpinorField &in)
	Applies a gamma5 matrix to a spinor (wrapper to ApplyGamma) More...

void	arpack_solve (std::vector< ColorSpinorField * > &h_evecs, std::vector< Complex > &h_evals, const DiracMatrix &mat, QudaEigParam *eig_param, TimeProfile &profile)
	The QUDA interface function. One passes two allocated arrays to hold the the eigenmode data, the problem matrix, the arpack parameters defining what problem is to be solves, and a container for QUDA data structure types. More...

__host__ __device__ double2	operator+ (const double2 &x, const double2 &y)

__host__ __device__ double2	operator- (const double2 &x, const double2 &y)

__host__ __device__ float2	operator- (const float2 &x, const float2 &y)

__host__ __device__ float4	operator- (const float4 &x, const float4 &y)

__host__ __device__ float8	operator- (const float8 &x, const float8 &y)

__host__ __device__ double3	operator+ (const double3 &x, const double3 &y)

__host__ __device__ double4	operator+ (const double4 &x, const double4 &y)

__host__ __device__ float4	operator* (const float &a, const float4 &x)

__host__ __device__ float2	operator* (const float &a, const float2 &x)

__host__ __device__ double2	operator* (const double &a, const double2 &x)

__host__ __device__ double4	operator* (const double &a, const double4 &x)

__host__ __device__ float8	operator* (const float &a, const float8 &x)

__host__ __device__ float2	operator+ (const float2 &x, const float2 &y)

__host__ __device__ float4	operator+ (const float4 &x, const float4 &y)

__host__ __device__ float8	operator+ (const float8 &x, const float8 &y)

__host__ __device__ float4	operator+= (float4 &x, const float4 &y)

__host__ __device__ float2	operator+= (float2 &x, const float2 &y)

__host__ __device__ float8	operator+= (float8 &x, const float8 &y)

__host__ __device__ double2	operator+= (double2 &x, const double2 &y)

__host__ __device__ double3	operator+= (double3 &x, const double3 &y)

__host__ __device__ double4	operator+= (double4 &x, const double4 &y)

__host__ __device__ float4	operator-= (float4 &x, const float4 &y)

__host__ __device__ float2	operator-= (float2 &x, const float2 &y)

__host__ __device__ float8	operator-= (float8 &x, const float8 &y)

__host__ __device__ double2	operator-= (double2 &x, const double2 &y)

__host__ __device__ float2	operator*= (float2 &x, const float &a)

__host__ __device__ double2	operator*= (double2 &x, const float &a)

__host__ __device__ float4	operator*= (float4 &a, const float &b)

__host__ __device__ float8	operator*= (float8 &a, const float &b)

__host__ __device__ double2	operator*= (double2 &a, const double &b)

__host__ __device__ double4	operator*= (double4 &a, const double &b)

__host__ __device__ float2	operator- (const float2 &x)

__host__ __device__ double2	operator- (const double2 &x)

std::ostream &	operator<< (std::ostream &output, const double2 &a)

std::ostream &	operator<< (std::ostream &output, const double3 &a)

std::ostream &	operator<< (std::ostream &output, const double4 &a)

__device__ __host__ void	zero (double &a)

__device__ __host__ void	zero (double2 &a)

__device__ __host__ void	zero (double3 &a)

__device__ __host__ void	zero (double4 &a)

__device__ __host__ void	zero (float &a)

__device__ __host__ void	zero (float2 &a)

__device__ __host__ void	zero (float3 &a)

__device__ __host__ void	zero (float4 &a)

__device__ __host__ void	zero (short &a)

__device__ __host__ void	zero (char &a)

template<typename T , int n>
std::ostream &	operator<< (std::ostream &output, const vector_type< T, n > &a)

template<typename scalar , int n>
__device__ __host__ void	zero (vector_type< scalar, n > &v)

template<typename scalar , int n>
__device__ __host__ vector_type< scalar, n >	operator+ (const vector_type< scalar, n > &a, const vector_type< scalar, n > &b)

std::ostream &	operator<< (std::ostream &output, const GaugeFieldParam &param)

double	norm1 (const GaugeField &u)
	This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L1 norm. More...

double	norm2 (const GaugeField &u)
	This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L2 norm. More...

void	ax (const double &a, GaugeField &u)
	Scale the gauge field by the scalar a. More...

void	copyGenericGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out=0, void In=0, void ghostOut=0, void ghostIn=0, int type=0)

void	copyFieldOffset (GaugeField &out, const GaugeField &in, CommKey offset, QudaPCType pc_type)
	This function is used for copying from a source gauge field to a destination gauge field with an offset. More...

void	copyExtendedGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out=0, void In=0)

cudaGaugeField *	createExtendedGauge (cudaGaugeField &in, const int *R, TimeProfile &profile, bool redundant_comms=false, QudaReconstructType recon=QUDA_RECONSTRUCT_INVALID)

cpuGaugeField *	createExtendedGauge (void *gauge, QudaGaugeParam &gauge_param, const int R)

void	extractGaugeGhost (const GaugeField &u, void **ghost, bool extract=true, int offset=0)

void	extractExtendedGaugeGhost (const GaugeField &u, int dim, const int R, void *ghost, bool extract)

void	applyGaugePhase (GaugeField &u)

uint64_t	Checksum (const GaugeField &u, bool mini=false)

QudaReconstructType	Reconstruct_ (const char func, const char file, int line, const GaugeField &a, const GaugeField &b)
	Helper function for determining if the reconstruct of the fields is the same. More...

template<typename... Args>
QudaReconstructType	Reconstruct_ (const char func, const char file, int line, const GaugeField &a, const GaugeField &b, const Args &... args)
	Helper function for determining if the reconstruct of the fields is the same. More...

void	gaugeForce (GaugeField &mom, const GaugeField &u, double coeff, int **input_path, int length, double *path_coeff, int num_paths, int max_length)
	Compute the gauge-force contribution to the momentum. More...

void	gaugeObservables (GaugeField &u, QudaGaugeObservableParam &param, TimeProfile &profile)
	Calculates a variety of gauge-field observables. More...

void	projectSU3 (GaugeField &U, double tol, int *fails)
	Project the input gauge field onto the SU(3) group. This is a destructive operation. The number of link failures is reported so appropriate action can be taken. More...

double3	plaquette (const GaugeField &U)
	Compute the plaquette of the gauge field. More...

void	gaugeGauss (GaugeField &U, RNG &rngstate, double epsilon)
	Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder). More...

void	gaugeGauss (GaugeField &U, unsigned long long seed, double epsilon)
	Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder). More...

void	APEStep (GaugeField &dataDs, GaugeField &dataOr, double alpha)
	Apply APE smearing to the gauge field. More...

void	STOUTStep (GaugeField &dataDs, GaugeField &dataOr, double rho)
	Apply STOUT smearing to the gauge field. More...

void	OvrImpSTOUTStep (GaugeField &dataDs, GaugeField &dataOr, double rho, double epsilon)
	Apply Over Improved STOUT smearing to the gauge field. More...

void	WFlowStep (GaugeField &out, GaugeField &temp, GaugeField &in, double epsilon, QudaWFlowType wflow_type)
	Apply Wilson Flow steps W1, W2, Vt to the gauge field. This routine assumes that the input and output fields are extended, with the input field being exchanged prior to calling this function. On exit from this routine, the output field will have been exchanged. More...

void	gaugeFixingOVR (GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta)
	Gauge fixing with overrelaxation with support for single and multi GPU. More...

void	gaugeFixingFFT (GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double alpha, const int autotune, const double tolerance, const int stopWtheta)
	Gauge fixing with Steepest descent method with FFTs with support for single GPU only. More...

void	computeFmunu (GaugeField &Fmunu, const GaugeField &gauge)
	Compute the Fmunu tensor. More...

void	computeQCharge (double energy[3], double &qcharge, const GaugeField &Fmunu)
	Compute the topological charge and field energy. More...

void	computeQChargeDensity (double energy[3], double &qcharge, void *qdensity, const GaugeField &Fmunu)
	Compute the topological charge, field energy and the topological charge density per lattice site. More...

void	updateGaugeField (GaugeField &out, double dt, const GaugeField &in, const GaugeField &mom, bool conj_mom, bool exact)

__device__ void	load_streaming_double2 (double2 &a, const double2 *addr)

__device__ void	load_streaming_float4 (float4 &a, const float4 *addr)

__device__ void	load_cached_short4 (short4 &a, const short4 *addr)

__device__ void	load_cached_short2 (short2 &a, const short2 *addr)

__device__ void	load_global_short4 (short4 &a, const short4 *addr)

__device__ void	load_global_short2 (short2 &a, const short2 *addr)

__device__ void	load_global_float4 (float4 &a, const float4 *addr)

__device__ void	store_streaming_float4 (float4 *addr, float x, float y, float z, float w)

__device__ void	store_streaming_short4 (short4 *addr, short x, short y, short z, short w)

__device__ void	store_streaming_double2 (double2 *addr, double x, double y)

__device__ void	store_streaming_float2 (float2 *addr, float x, float y)

__device__ void	store_streaming_short2 (short2 *addr, short x, short y)

template<QudaReconstructType recon>
constexpr bool	is_enabled ()

template<>
constexpr bool	is_enabled< QUDA_RECONSTRUCT_NO > ()

template<>
constexpr bool	is_enabled< QUDA_RECONSTRUCT_13 > ()

template<>
constexpr bool	is_enabled< QUDA_RECONSTRUCT_12 > ()

template<>
constexpr bool	is_enabled< QUDA_RECONSTRUCT_9 > ()

template<>
constexpr bool	is_enabled< QUDA_RECONSTRUCT_8 > ()

template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , typename G , typename... Args>
constexpr void	instantiate (G &U, Args &&... args)
	This instantiate function is used to instantiate the colors. More...

template<template< typename, int, QudaReconstructType > class Apply, typename Recon = ReconstructFull, typename G , typename... Args>
constexpr void	instantiate (G &U, Args &&... args)
	This instantiate function is used to instantiate the precisions. More...

template<template< typename > class Apply, typename C , typename... Args>
constexpr void	instantiate (C &c, Args &&... args)
	This instantiate function is used to instantiate the clover precision. More...

template<template< typename, int > class Apply, typename store_t , typename F , typename... Args>
constexpr void	instantiate (F &field, Args &&... args)
	This instantiate function is used to instantiate the colors. More...

template<template< typename, int > class Apply, typename F , typename... Args>
constexpr void	instantiate (F &field, Args &&... args)
	This instantiate function is used to instantiate the precision and number of colors. More...

template<template< typename > class Apply, typename F , typename... Args>
constexpr void	instantiatePrecision (F &field, Args &&... args)
	The instantiatePrecision function is used to instantiate the precision. Note unlike the "instantiate" functions above, this helper always instantiates double precision regardless of the QUDA_PRECISION value: this enables its use for copy interface routines which should always enable double precision support. More...

template<template< typename, typename > class Apply, typename T , typename F , typename... Args>
constexpr void	instantiatePrecision2 (F &field, Args &&... args)
	The instantiatePrecision2 function is used to instantiate the precision for a class that accepts 2 typename arguments, with the first typename corresponding to the precision being instantiated at hand. This is useful for copy routines, where we need to instantiate a second, e.g., destination, precision after already instantiating the first, e.g., source, precision. Similar to the "instantiatePrecision" function above, this helper always instantiates double precision regardless of the QUDA_PRECISION value: this enables its use for copy interface routines which should always enable double precision support. More...

template<template< typename > class Apply, typename F , typename... Args>
constexpr void	instantiatePrecisionMG (F &field, Args &&... args)
	The instantiatePrecision function is used to instantiate the precision. More...

template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , int nColor, typename... Args>
void	instantiate (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args)
	This instantiate function is used to instantiate the reconstruct types used. More...

template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , typename... Args>
void	instantiate (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args)
	This instantiate function is used to instantiate the colors. More...

template<template< typename, int, QudaReconstructType > class Apply, typename Recon = WilsonReconstruct, typename... Args>
void	instantiate (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args)
	This instantiate function is used to instantiate the precisions. More...

template<template< typename, int, QudaReconstructType > class Apply, typename Recon = WilsonReconstruct, typename... Args>
void	instantiatePreconditioner (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args)
	This instantiatePrecondtiioner function is used to instantiate the precisions for a preconditioner. This is the same as the instantiate helper above, except it only handles half and quarter precision. More...

void	completeKSForce (GaugeField &mom, const GaugeField &oprod, const GaugeField &gauge, QudaFieldLocation location, long long *flops=NULL)

std::ostream &	operator<< (std::ostream &output, const LatticeFieldParam &param)

QudaFieldLocation	Location_ (const char func, const char file, int line, const LatticeField &a, const LatticeField &b)
	Helper function for determining if the location of the fields is the same. More...

template<typename... Args>
QudaFieldLocation	Location_ (const char func, const char file, int line, const LatticeField &a, const LatticeField &b, const Args &... args)
	Helper function for determining if the location of the fields is the same. More...

QudaPrecision	Precision_ (const char func, const char file, int line, const LatticeField &a, const LatticeField &b)
	Helper function for determining if the precision of the fields is the same. More...

template<typename... Args>
QudaPrecision	Precision_ (const char func, const char file, int line, const LatticeField &a, const LatticeField &b, const Args &... args)
	Helper function for determining if the precision of the fields is the same. More...

bool	Native_ (const char func, const char file, int line, const LatticeField &a)
	Helper function for determining if the field is in native order. More...

template<typename... Args>
bool	Native_ (const char func, const char file, int line, const LatticeField &a, const Args &... args)
	Helper function for determining if the fields are in native order. More...

QudaFieldLocation	reorder_location ()
	Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION. More...

void	reorder_location_set (QudaFieldLocation reorder_location_)
	Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION. More...

const char *	compile_type_str (const LatticeField &meta, QudaFieldLocation location_=QUDA_INVALID_FIELD_LOCATION)
	Helper function for setting auxilary string. More...

void	fatKSLink (GaugeField fat, const GaugeField &u, const double coeff)
	Compute the fat links for an improved staggered (Kogut-Susskind) fermions. More...

void	longKSLink (GaugeField lng, const GaugeField &u, const double coeff)
	Compute the long links for an improved staggered (Kogut-Susskind) fermions. More...

void	printPeakMemUsage ()

void	assertAllMemFree ()

size_t	device_allocated ()

size_t	pinned_allocated ()

size_t	mapped_allocated ()

size_t	host_allocated ()

size_t	device_allocated_peak ()

size_t	pinned_allocated_peak ()

size_t	mapped_allocated_peak ()

size_t	host_allocated_peak ()

bool	use_managed_memory ()

bool	is_prefetch_enabled ()

void *	device_malloc_ (const char func, const char file, int line, size_t size)

void *	device_pinned_malloc_ (const char func, const char file, int line, size_t size)

void *	device_comms_pinned_malloc_ (const char func, const char file, int line, size_t size)

void *	safe_malloc_ (const char func, const char file, int line, size_t size)

void *	pinned_malloc_ (const char func, const char file, int line, size_t size)

void *	mapped_malloc_ (const char func, const char file, int line, size_t size)

void *	managed_malloc_ (const char func, const char file, int line, size_t size)

void	device_free_ (const char func, const char file, int line, void *ptr)

void	device_pinned_free_ (const char func, const char file, int line, void *ptr)

void	device_comms_pinned_free_ (const char func, const char file, int line, void *ptr)

void	managed_free_ (const char func, const char file, int line, void *ptr)

void	host_free_ (const char func, const char file, int line, void *ptr)

constexpr const char *	str_end (const char *str)

constexpr bool	str_slant (const char *str)

constexpr const char *	r_slant (const char *str)

constexpr const char *	file_name (const char *str)

QudaFieldLocation	get_pointer_location (const void *ptr)

void *	get_mapped_device_pointer_ (const char func, const char file, int line, const void *ptr)

bool	is_aligned (const void *ptr, size_t alignment)

double	computeMomAction (const GaugeField &mom)
	Compute and return global the momentum action 1/2 mom^2. More...

void	updateMomentum (GaugeField &mom, double coeff, GaugeField &force, const char *fname)

void	applyU (GaugeField &force, GaugeField &U)

bool	forceMonitor ()
	Whether we are monitoring the force or not. More...

void	flushForceMonitor ()
	Flush any outstanding force monitoring information. More...

void	ApplyCoarse (ColorSpinorField &out, const ColorSpinorField &inA, const ColorSpinorField &inB, const GaugeField &Y, const GaugeField &X, double kappa, int parity=QUDA_INVALID_PARITY, bool dslash=true, bool clover=true, bool dagger=false, const int *commDim=0, QudaPrecision halo_precision=QUDA_INVALID_PRECISION)
	Apply the coarse dslash stencil. This single driver accounts for all variations with and without the clover field, with and without dslash, and both single and full parity fields. More...

void	CoarseOp (GaugeField &Y, GaugeField &X, const Transfer &T, const cudaGaugeField &gauge, const cudaCloverField *clover, double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
	Coarse operator construction from a fine-grid operator (Wilson / Clover) More...

void	StaggeredCoarseOp (GaugeField &Y, GaugeField &X, const Transfer &T, const cudaGaugeField &gauge, const cudaGaugeField *XinvKD, double mass, QudaDiracType dirac, QudaMatPCType matpc)
	Coarse operator construction from a fine-grid operator (Staggered) More...

void	CoarseCoarseOp (GaugeField &Y, GaugeField &X, const Transfer &T, const GaugeField &gauge, const GaugeField &clover, const GaugeField &cloverInv, double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc, bool need_bidirectional, bool use_mma=false)
	Coarse operator construction from an intermediate-grid operator (Coarse) More...

void	calculateYhat (GaugeField &Yhat, GaugeField &Xinv, const GaugeField &Y, const GaugeField &X, bool use_mma=false)
	Calculate preconditioned coarse links and coarse clover inverse field. More...

void	Monte (GaugeField &data, RNG &rngstate, double Beta, int nhb, int nover)
	Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps. More...

void	InitGaugeField (GaugeField &data)
	Perform a cold start to the gauge field, identity SU(3) matrix, also fills the ghost links in multi-GPU case (no need to exchange data) More...

void	InitGaugeField (GaugeField &data, RNG &rngstate)
	Perform a hot start to the gauge field, random SU(3) matrix, followed by reunitarization, also exchange borders links in multi-GPU case. More...

void	PGaugeExchange (GaugeField &data, const int n_dim, const int parity)
	Exchange "borders" between nodes. Although the radius border is 2, it only updates the interior radius border, i.e., at 1 and X[d-2] where X[d] already includes the Radius border, and don't update at 0 and X[d-1] faces. More...

void	PGaugeExchangeFree ()
	Release all allocated memory used to exchange data between nodes. More...

double2	getLinkDeterminant (GaugeField &data)
	Calculate the Determinant. More...

double2	getLinkTrace (GaugeField &data)
	Calculate the Trace. More...

qudaError_t	qudaLaunchKernel (const void func, const TuneParam &tp, void *args, qudaStream_t stream)
	Wrapper around cudaLaunchKernel. More...

template<typename T , typename... Arg>
qudaError_t	qudaLaunchKernel (T *func, const TuneParam &tp, qudaStream_t stream, const Arg &... arg)
	Templated wrapper around qudaLaunchKernel which can accept a templated kernel, and expects a kernel with a single Arg argument. More...

void	qudaMemcpy_ (void dst, const void src, size_t count, cudaMemcpyKind kind, const char func, const char file, const char *line)
	Wrapper around cudaMemcpy or driver API equivalent. More...

void	qudaMemcpyAsync_ (void dst, const void src, size_t count, cudaMemcpyKind kind, const qudaStream_t &stream, const char func, const char file, const char *line)
	Wrapper around cudaMemcpyAsync or driver API equivalent. More...

void	qudaMemcpy2D_ (void dst, size_t dpitch, const void src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, const char func, const char file, const char *line)
	Wrapper around cudaMemcpy2DAsync or driver API equivalent. More...

void	qudaMemcpy2DAsync_ (void dst, size_t dpitch, const void src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, const qudaStream_t &stream, const char func, const char file, const char *line)
	Wrapper around cudaMemcpy2DAsync or driver API equivalent. More...

void	qudaMemset_ (void ptr, int value, size_t count, const char func, const char file, const char line)
	Wrapper around cudaMemset or driver API equivalent. More...

void	qudaMemset2D_ (void ptr, size_t pitch, int value, size_t width, size_t height, const char func, const char file, const char line)
	Wrapper around cudaMemset2D or driver API equivalent. More...

void	qudaMemsetAsync_ (void ptr, int value, size_t count, const qudaStream_t &stream, const char func, const char file, const char line)
	Wrapper around cudaMemsetAsync or driver API equivalent. More...

void	qudaMemset2DAsync_ (void ptr, size_t pitch, int value, size_t width, size_t height, const qudaStream_t &stream, const char func, const char file, const char line)
	Wrapper around cudaMemsetAsync or driver API equivalent. More...

void	qudaMemPrefetchAsync_ (void ptr, size_t count, QudaFieldLocation mem_space, const qudaStream_t &stream, const char func, const char file, const char line)
	Wrapper around cudaMemPrefetchAsync or driver API equivalent. More...

bool	qudaEventQuery_ (cudaEvent_t &event, const char func, const char file, const char *line)
	Wrapper around cudaEventQuery or cuEventQuery with built-in error checking. More...

void	qudaEventRecord_ (cudaEvent_t &event, qudaStream_t stream, const char func, const char file, const char *line)
	Wrapper around cudaEventRecord or cuEventRecord with built-in error checking. More...

void	qudaStreamWaitEvent_ (qudaStream_t stream, cudaEvent_t event, unsigned int flags, const char func, const char file, const char *line)
	Wrapper around cudaStreamWaitEvent or cuStreamWaitEvent with built-in error checking. More...

void	qudaEventSynchronize_ (cudaEvent_t &event, const char func, const char file, const char *line)
	Wrapper around cudaEventSynchronize or cuEventSynchronize with built-in error checking. More...

void	qudaStreamSynchronize_ (qudaStream_t &stream, const char func, const char file, const char *line)
	Wrapper around cudaStreamSynchronize or cuStreamSynchronize with built-in error checking. More...

void	qudaDeviceSynchronize_ (const char func, const char file, const char *line)
	Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize with built-in error checking. More...

void	printAPIProfile ()
	Print out the timer profile for CUDA API calls. More...

bool	canReuseResidentGauge (QudaInvertParam *inv_param)

template<class T >
__device__ __host__ T	getTrace (const Matrix< T, 3 > &a)

template<template< typename, int > class Mat, class T >
__device__ __host__ T	getDeterminant (const Mat< T, 3 > &a)

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N >	operator+ (const Mat< T, N > &a, const Mat< T, N > &b)

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N >	operator+= (Mat< T, N > &a, const Mat< T, N > &b)

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N >	operator+= (Mat< T, N > &a, const T &b)

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N >	operator-= (Mat< T, N > &a, const Mat< T, N > &b)

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N >	operator- (const Mat< T, N > &a, const Mat< T, N > &b)

template<template< typename, int > class Mat, class T , int N, class S >
__device__ __host__ Mat< T, N >	operator* (const S &scalar, const Mat< T, N > &a)

template<template< typename, int > class Mat, class T , int N, class S >
__device__ __host__ Mat< T, N >	operator* (const Mat< T, N > &a, const S &scalar)

template<template< typename, int > class Mat, class T , int N, class S >
__device__ __host__ Mat< T, N >	operator*= (Mat< T, N > &a, const S &scalar)

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N >	operator- (const Mat< T, N > &a)

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N >	operator* (const Mat< T, N > &a, const Mat< T, N > &b)
	Generic implementation of matrix multiplication. More...

template<template< typename > class complex, typename T , int N>
__device__ __host__ Matrix< complex< T >, N >	operator* (const Matrix< complex< T >, N > &a, const Matrix< complex< T >, N > &b)
	Specialization of complex matrix multiplication that will issue optimal fma instructions. More...

template<class T , int N>
__device__ __host__ Matrix< T, N >	operator*= (Matrix< T, N > &a, const Matrix< T, N > &b)

template<class T , class U , int N>
__device__ __host__ Matrix< typename PromoteTypeId< T, U >::type, N >	operator* (const Matrix< T, N > &a, const Matrix< U, N > &b)

template<class T >
__device__ __host__ Matrix< T, 2 >	operator* (const Matrix< T, 2 > &a, const Matrix< T, 2 > &b)

template<class T , int N>
__device__ __host__ Matrix< T, N >	conj (const Matrix< T, N > &other)

template<class T >
__device__ __host__ Matrix< T, 3 >	inverse (const Matrix< T, 3 > &u)

template<class T , int N>
__device__ __host__ void	setIdentity (Matrix< T, N > *m)

template<int N>
__device__ __host__ void	setIdentity (Matrix< float2, N > *m)

template<int N>
__device__ __host__ void	setIdentity (Matrix< double2, N > *m)

template<class T , int N>
__device__ __host__ void	setZero (Matrix< T, N > *m)

template<int N>
__device__ __host__ void	setZero (Matrix< float2, N > *m)

template<int N>
__device__ __host__ void	setZero (Matrix< double2, N > *m)

template<typename Complex , int N>
__device__ __host__ void	makeAntiHerm (Matrix< Complex, N > &m)

template<typename Complex , int N>
__device__ __host__ void	makeHerm (Matrix< Complex, N > &m)

template<class T , int N>
__device__ __host__ void	copyColumn (const Matrix< T, N > &m, int c, Array< T, N > *a)

template<class T , int N>
std::ostream &	operator<< (std::ostream &os, const Matrix< T, N > &m)

template<class T , int N>
std::ostream &	operator<< (std::ostream &os, const Array< T, N > &a)

template<class Cmplx >
__device__ __host__ void	computeLinkInverse (Matrix< Cmplx, 3 > *uinv, const Matrix< Cmplx, 3 > &u)

void	copyArrayToLink (Matrix< float2, 3 > link, float array)

template<class Cmplx , class Real >
void	copyArrayToLink (Matrix< Cmplx, 3 > link, Real array)

void	copyLinkToArray (float *array, const Matrix< float2, 3 > &link)

template<class Cmplx , class Real >
void	copyLinkToArray (Real *array, const Matrix< Cmplx, 3 > &link)

template<class T >
__device__ __host__ Matrix< T, 3 >	getSubTraceUnit (const Matrix< T, 3 > &a)

template<class T >
__device__ __host__ void	SubTraceUnit (Matrix< T, 3 > &a)

template<class T >
__device__ __host__ double	getRealTraceUVdagger (const Matrix< T, 3 > &a, const Matrix< T, 3 > &b)

template<class Cmplx >
__host__ __device__ void	printLink (const Matrix< Cmplx, 3 > &link)

template<class Cmplx >
__device__ __host__ double	ErrorSU3 (const Matrix< Cmplx, 3 > &matrix)

template<class T >
__device__ __host__ auto	exponentiate_iQ (const Matrix< T, 3 > &Q)

template<typename Float >
__device__ __host__ void	expsu3 (Matrix< complex< Float >, 3 > &q)

template<class Real >
__device__ Real	Random (cuRNGState &state, Real a, Real b)
	Return a random number between a and b. More...

template<>
__device__ float	Random< float > (cuRNGState &state, float a, float b)

template<>
__device__ double	Random< double > (cuRNGState &state, double a, double b)

template<class Real >
__device__ Real	Random (cuRNGState &state)
	Return a random number between 0 and 1. More...

template<>
__device__ float	Random< float > (cuRNGState &state)

template<>
__device__ double	Random< double > (cuRNGState &state)

constexpr int	max_n_reduce ()

template<typename T >
constexpr T	init_value ()
	The initialization value we used to check for completion. More...

template<typename T >
constexpr T	terminate_value ()
	The termination value we use to prevent a possible hang in case the computed reduction is equal to the initialization. More...

template<typename VectorType >
__device__ __host__ VectorType	vector_load (const void *ptr, int idx)

template<>
__device__ __host__ short8	vector_load (const void *ptr, int idx)

template<typename VectorType >
__device__ __host__ void	vector_store (void *ptr, int idx, const VectorType &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const double2 &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const float4 &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const float2 &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const short4 &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const short2 &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const char4 &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const char2 &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const short8 &value)

template<>
__device__ __host__ void	vector_store (void *ptr, int idx, const char8 &value)

template<class Field >
void	split_field (Field &collect_field, std::vector< Field * > &v_base_field, const CommKey &comm_key, QudaPCType pc_type=QUDA_4D_PC)

template<class Field >
void	join_field (std::vector< Field * > &v_base_field, const Field &collect_field, const CommKey &comm_key, QudaPCType pc_type=QUDA_4D_PC)

void	BuildStaggeredKahlerDiracInverse (GaugeField &Xinv, const cudaGaugeField &gauge, const double mass)
	Build the Kahler-Dirac inverse block for KD operators. More...

cudaGaugeField *	AllocateAndBuildStaggeredKahlerDiracInverse (const cudaGaugeField &gauge, const double mass, const QudaPrecision override_prec)
	Allocate and build the Kahler-Dirac inverse block for KD operators. More...

void	computeStaggeredOprod (GaugeField *out[], ColorSpinorField &in, const double coeff[], int nFace)
	Compute the outer-product field between the staggered quark field's one and (for HISQ and ASQTAD) three hop sites. E.g.,. More...

void	BlockOrthogonalize (ColorSpinorField &V, const std::vector< ColorSpinorField * > &B, const int fine_to_coarse, const int coarse_to_fine, const int *geo_bs, const int spin_bs, const int n_block_ortho)
	Block orthogonnalize the matrix field, where the blocks are defined by lookup tables that map the fine grid points to the coarse grid points, and similarly for the spin degrees of freedom. More...

void	Prolongate (ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &v, int Nvec, const int fine_to_coarse, const int const *spin_map, int parity=QUDA_INVALID_PARITY)
	Apply the prolongation operator. More...

void	Restrict (ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &v, int Nvec, const int fine_to_coarse, const int coarse_to_fine, const int const spin_map, int parity=QUDA_INVALID_PARITY)
	Apply the restriction operator. More...

void	StaggeredProlongate (ColorSpinorField &out, const ColorSpinorField &in, const int fine_to_coarse, const int const *spin_map, int parity=QUDA_INVALID_PARITY)
	Apply the unitary "prolongation" operator for Kahler-Dirac preconditioning. More...

void	StaggeredRestrict (ColorSpinorField &out, const ColorSpinorField &in, const int fine_to_coarse, const int const *spin_map, int parity=QUDA_INVALID_PARITY)
	Apply the unitary "restriction" operator for Kahler-Dirac preconditioning. More...

template<typename Arg >
void	transform_reduce (Arg &arg)

template<typename Arg >
	__launch_bounds__ (Arg::block_size) __global__ void transform_reduce_kernel(Arg arg)

template<typename reduce_t , typename T , typename I , typename transformer , typename reducer >
void	transform_reduce (QudaFieldLocation location, std::vector< reduce_t > &result, const std::vector< T * > &v, I n_items, transformer h, reduce_t init, reducer r)
	QUDA implementation providing thrust::transform_reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory, and is a batched implementation. More...

template<typename reduce_t , typename T , typename I , typename transformer , typename reducer >
reduce_t	transform_reduce (QudaFieldLocation location, const T *v, I n_items, transformer h, reduce_t init, reducer r)
	QUDA implementation providing thrust::transform_reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory. More...

template<typename reduce_t , typename T , typename I , typename transformer , typename reducer >
void	reduce (QudaFieldLocation location, std::vector< reduce_t > &result, const std::vector< T * > &v, I n_items, reduce_t init, reducer r)
	QUDA implementation providing thrust::reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory, and is a batched implementation. More...

template<typename reduce_t , typename T , typename I , typename reducer >
reduce_t	reduce (QudaFieldLocation location, const T *v, I n_items, reduce_t init, reducer r)
	QUDA implementation providing thrust::reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory. More...

const std::map< TuneKey, TuneParam > &	getTuneCache ()
	Returns a reference to the tunecache map. More...

bool	activeTuning ()
	query if tuning is in progress More...

void	loadTuneCache ()

void	saveTuneCache (bool error=false)

void	saveProfile (const std::string label="")
	Save profile to disk. More...

void	flushProfile ()
	Flush profile contents, setting all counts to zero. More...

TuneParam	tuneLaunch (Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)

void	postTrace_ (const char func, const char file, int line)
	Post an event in the trace, recording where it was posted. More...

void	enableProfileCount ()
	Enable the profile kernel counting. More...

void	disableProfileCount ()
	Disable the profile kernel counting. More...

void	setPolicyTuning (bool)
	Enable / disable whether are tuning a policy. More...

bool	policyTuning ()
	Query whether we are currently tuning a policy. More...

void	setUberTuning (bool)
	Enable / disable whether we are tuning an uber kernel. More...

bool	uberTuning ()
	Query whether we are tuning an uber kernel. More...

void	u32toa (char *buffer, uint32_t value)

void	i32toa (char *buffer, int32_t value)

void	u64toa (char *buffer, uint64_t value)

void	i64toa (char *buffer, int64_t value)

void	setUnitarizeLinksConstants (double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)

void	unitarizeLinksCPU (GaugeField &outfield, const GaugeField &infield)

void	unitarizeLinks (GaugeField &outfield, const GaugeField &infield, int *fails)

void	unitarizeLinks (GaugeField &outfield, int *fails)

bool	isUnitary (const cpuGaugeField &field, double max_error)

ColorSpinorParam	colorSpinorParam (const CloverField &a, bool inverse)

std::ostream &	operator<< (std::ostream &out, const ColorSpinorField &a)

void	copyGenericColorSpinorDD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorDS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorDH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorDQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorSD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorSS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorSH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorSQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorHD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorHS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorHH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorHQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorQD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorQS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorQH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorQQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGDD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGDS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGSD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGSS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGSH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGSQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGHS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGHH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGHQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGQS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGQH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericColorSpinorMGQQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void , void , void a=0, void b=0)

void	copyGenericGaugeDoubleIn (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out, void In, void ghostOut, void ghostIn, int type)

void	copyGenericGaugeSingleIn (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out, void In, void ghostOut, void ghostIn, int type)

void	copyGenericGaugeHalfIn (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out, void In, void ghostOut, void ghostIn, int type)

void	copyGenericGaugeQuarterIn (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out, void In, void ghostOut, void ghostIn, int type)

void	copyGenericGaugeMG (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void Out, void In, void ghostOut, void ghostIn, int type)

void	checkMomOrder (const GaugeField &u)

void *	create_gauge_buffer (size_t bytes, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)

void **	create_ghost_buffer (size_t bytes[], QudaGaugeFieldOrder order, QudaFieldGeometry geometry)

void	free_gauge_buffer (void *buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)

void	free_ghost_buffer (void **buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)

std::ostream &	operator<< (std::ostream &out, const cudaColorSpinorField &a)

ColorSpinorParam	colorSpinorParam (const GaugeField &a)

void	printLaunchTimer ()

void	setDiracRefineParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc)

void	setDiracPreParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc, bool comms)

void	setDiracEigParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc, bool comms)

void	massRescale (cudaColorSpinorField &b, QudaInvertParam &param, bool for_multishift)

void	fillInnerSolveParam (SolverParam &inner, const SolverParam &outer)

int	reliable (double &rNorm, double &maxrx, double &maxrr, const double &r2, const double &delta)

template<int N>
void	compute_alpha_N (Complex Q_AQandg, Complex alpha)

template<int N>
void	compute_beta_N (Complex Q_AQandg, Complex Q_AS, Complex *beta)

template<libtype which_lib>
void	ComputeRitz (EigCGArgs &args)

template<>
void	ComputeRitz< libtype::eigen_lib > (EigCGArgs &args)

template<>
void	ComputeRitz< libtype::magma_lib > (EigCGArgs &args)

double	timeInterval (struct timeval start, struct timeval end)

void	computeBeta (Complex *beta, std::vector< ColorSpinorField > Ap, int i, int N, int k)

void	updateAp (Complex *beta, std::vector< ColorSpinorField > Ap, int begin, int size, int k)

void	orthoDir (Complex *beta, std::vector< ColorSpinorField > Ap, int k, int pipeline)

void	backSubs (const Complex alpha, Complex const beta, const double gamma, Complex *delta, int n)

void	updateSolution (ColorSpinorField &x, const Complex alpha, Complex const beta, double gamma, int k, std::vector< ColorSpinorField * > p)

template<libtype which_lib>
void	ComputeHarmonicRitz (GMResDRArgs &args)

template<>
void	ComputeHarmonicRitz< libtype::magma_lib > (GMResDRArgs &args)

template<>
void	ComputeHarmonicRitz< libtype::eigen_lib > (GMResDRArgs &args)

template<libtype which_lib>
void	ComputeEta (GMResDRArgs &args)

template<>
void	ComputeEta< libtype::magma_lib > (GMResDRArgs &args)

template<>
void	ComputeEta< libtype::eigen_lib > (GMResDRArgs &args)

void	fillFGMResDRInnerSolveParam (SolverParam &inner, const SolverParam &outer)

void	print (const double d[], int n)

void	updateAlphaZeta (double alpha, double zeta, double zeta_old, const double r2, const double beta, const double pAp, const double offset, const int nShift, const int j_low)

size_t	managed_allocated ()

size_t	managed_allocated_peak ()

void	qudaFuncSetAttribute_ (const void kernel, cudaFuncAttribute attr, int value, const char func, const char file, const char line)
	Wrapper around cudaFuncSetAttribute with built-in error checking. More...

void	qudaFuncGetAttributes_ (cudaFuncAttributes &attr, const void kernel, const char func, const char file, const char line)
	Wrapper around cudaFuncGetAttributes with built-in error checking. More...

int	traceEnabled ()

void	setTransferGPU (bool)

Variables
const int	Nstream = 9

qudaStream_t *	stream

Detailed Description

Here we detail how the MMA kernels for computeUV and computeVUV should be launched. Specifically:

bM, bN, bK: the CTA-local MMA shape sizes.
block_y, block_z: number of threads in each direction. (blockDim.x has nothing to do with the MMA shape)

Here we detail how the MMA kernels for computeYhat should be launched. Specifically:

bM, bN, bK: the CTA-local MMA shape sizes.
block_y, block_z: number of threads in each direction. (blockDim.x has nothing to do with the MMA shape)

This file contains deinitions required when compiling with C++14. Without these, we can end up with undefined references at link time. We can remove this file when we jump to C++17 and declare these are inline variables in instantiate.h.

Generic Multi Shift Solver

For staggered, the mass is folded into the dirac operator Otherwise the matrix mass is 'unmodified'.

The lowest offset is in offsets[0]

Typedef Documentation

◆ ColorSpinorFieldSet

using quda::ColorSpinorFieldSet = typedef ColorSpinorField

Definition at line 1352 of file invert_quda.h.

◆ Complex

typedef std::complex<double> quda::Complex

Definition at line 86 of file quda_internal.h.

◆ CompositeColorSpinorField

typedef std::vector<ColorSpinorField*> quda::CompositeColorSpinorField

Typedef for a set of spinors. Can be further divided into subsets ,e.g., with different precisions (not implemented currently)

Definition at line 71 of file color_spinor_field.h.

◆ cuRNGState

typedef struct curandStateMRG32k3a quda::cuRNGState

Definition at line 1 of file random_quda.h.

◆ DenseMatrix

typedef MatrixXcd quda::DenseMatrix

Definition at line 32 of file inv_eigcg_quda.cpp.

◆ DynamicStride

typedef Stride< Dynamic, Dynamic > quda::DynamicStride

Definition at line 17 of file deflation.cpp.

◆ map

typedef std::map<TuneKey, TuneParam> quda::map

Definition at line 34 of file tune.cpp.

◆ mgarray

template<typename T >

using quda::mgarray = typedef std::array<T, QUDA_MAX_MG_LEVEL>

Definition at line 12 of file command_line_params.h.

◆ RealVector

using quda::RealVector = typedef VectorXd

Definition at line 35 of file inv_eigcg_quda.cpp.

◆ RowMajorDenseMatrix

typedef Matrix< Complex, Dynamic, Dynamic, RowMajor > quda::RowMajorDenseMatrix

Definition at line 38 of file inv_eigcg_quda.cpp.

◆ Vector

typedef VectorXcd quda::Vector

Definition at line 34 of file inv_eigcg_quda.cpp.

◆ VectorSet

typedef MatrixXcd quda::VectorSet

Definition at line 33 of file inv_eigcg_quda.cpp.

Enumeration Type Documentation

◆ AllocType [1/2]

enum quda::AllocType

Enumerator
DEVICE
DEVICE_PINNED
HOST
PINNED
MAPPED
MANAGED
SHMEM
N_ALLOC_TYPE
DEVICE
DEVICE_PINNED
HOST
PINNED
MAPPED
MANAGED
N_ALLOC_TYPE

Definition at line 22 of file malloc.cpp.

◆ AllocType [2/2]

enum quda::AllocType

Enumerator
DEVICE
DEVICE_PINNED
HOST
PINNED
MAPPED
MANAGED
SHMEM
N_ALLOC_TYPE
DEVICE
DEVICE_PINNED
HOST
PINNED
MAPPED
MANAGED
N_ALLOC_TYPE

Definition at line 20 of file malloc.cpp.

◆ BiCGstabLUpdateType

enum quda::BiCGstabLUpdateType

The following code is based on Kate's worker class in Multi-CG.

This worker class is used to update most of the u and r vectors. On BiCG iteration j, r[0] through r[j] and u[0] through u[j] all get updated, but the subsequent mat-vec operation only gets applied to r[j] and u[j]. Thus, we can hide updating r[0] through r[j-1] and u[0] through u[j-1], respectively, in the comms for the matvec on r[j] and u[j]. This results in improved strong scaling for BiCGstab-L.

See paragraphs 2 and 3 in the comments on the Worker class in Multi-CG for more remarks.

Enumerator
BICGSTABL_UPDATE_U
BICGSTABL_UPDATE_R

Definition at line 168 of file inv_bicgstabl_quda.cpp.

◆ blockType

enum quda::blockType

Enumerator
PENCIL
LOWER_TRI
UPPER_TRI

Definition at line 12 of file eigensolve_quda.h.

◆ CloverPrefetchType

enum quda::CloverPrefetchType

strong

Enumerator
BOTH_CLOVER_PREFETCH_TYPE
CLOVER_CLOVER_PREFETCH_TYPE
INVERSE_CLOVER_PREFETCH_TYPE
INVALID_CLOVER_PREFETCH_TYPE

Definition at line 28 of file clover_field.h.

◆ Dslash5Type

enum quda::Dslash5Type

Enumerator
DSLASH5_DWF
DSLASH5_MOBIUS_PRE
DSLASH5_MOBIUS
M5_INV_DWF
M5_INV_MOBIUS
M5_INV_ZMOBIUS
M5_EOFA
M5INV_EOFA

Definition at line 557 of file dslash_quda.h.

◆ libtype [1/2]

enum quda::libtype

strong

Enumerator
eigen_lib
magma_lib
lapack_lib
mkl_lib
eigen_lib
magma_lib
lapack_lib
mkl_lib

Definition at line 42 of file inv_eigcg_quda.cpp.

◆ libtype [2/2]

enum quda::libtype

strong

Enumerator
eigen_lib
magma_lib
lapack_lib
mkl_lib
eigen_lib
magma_lib
lapack_lib
mkl_lib

Definition at line 52 of file inv_gmresdr_quda.cpp.

◆ MdwfFusedDslashType

enum quda::MdwfFusedDslashType

strong

Applying the following five kernels in the order of 4-0-1-2-3 is equivalent to applying the full even-odd preconditioned symmetric MdagM operator: op = (1 - M5inv * D4 * D5pre * M5inv * D4 * D5pre)^dag (1 - M5inv * D4 * D5pre * M5inv * D4 * D5pre)

Enumerator
D4_D5INV_D5PRE
D4_D5INV_D5INVDAG
D4DAG_D5PREDAG_D5INVDAG
D4DAG_D5PREDAG
D5PRE

Definition at line 574 of file dslash_quda.h.

◆ MemoryLocation

enum quda::MemoryLocation

Enumerator
Device
Host
Remote
Shmem

Definition at line 50 of file color_spinor_field.h.

◆ QudaOffsetCopyMode

enum quda::QudaOffsetCopyMode

strong

Enumerator
COLLECT
DISPERSE

Definition at line 46 of file lattice_field.h.

◆ QudaProfileType

enum quda::QudaProfileType

Enumerator
QUDA_PROFILE_H2D	host -> device transfers
QUDA_PROFILE_D2H	The time in seconds for device -> host transfers
QUDA_PROFILE_INIT	The time in seconds taken for initiation
QUDA_PROFILE_PREAMBLE	The time in seconds taken for any preamble
QUDA_PROFILE_COMPUTE	The time in seconds taken for the actual computation
QUDA_PROFILE_COMMS	synchronous communication
QUDA_PROFILE_EPILOGUE	The time in seconds taken for any epilogue
QUDA_PROFILE_FREE	The time in seconds for freeing resources
QUDA_PROFILE_IO	time spent on file i/o
QUDA_PROFILE_CHRONO	time spent on chronology
QUDA_PROFILE_EIGEN	time spent on host-side Eigen
QUDA_PROFILE_EIGENLU	time spent on host-side Eigen LU
QUDA_PROFILE_EIGENEV	time spent on host-side Eigen EV
QUDA_PROFILE_EIGENQR	time spent on host-side Eigen QR
QUDA_PROFILE_ARPACK	time spent on host-side ARPACK
QUDA_PROFILE_HOST_COMPUTE	time spent on miscellaneous host-side computation
QUDA_PROFILE_LOWER_LEVEL	dummy timer to mark beginning of lower level timers which do not count towrads global time
QUDA_PROFILE_PACK_KERNEL	face packing kernel
QUDA_PROFILE_DSLASH_KERNEL	dslash kernel
QUDA_PROFILE_GATHER	gather (device -> host)
QUDA_PROFILE_SCATTER	scatter (host -> device)
QUDA_PROFILE_LAUNCH_KERNEL	cudaLaunchKernel
QUDA_PROFILE_EVENT_RECORD	cuda event record
QUDA_PROFILE_EVENT_QUERY	cuda event querying
QUDA_PROFILE_STREAM_WAIT_EVENT	stream waiting for event completion
QUDA_PROFILE_FUNC_SET_ATTRIBUTE	set function attribute
QUDA_PROFILE_EVENT_SYNCHRONIZE	event synchronization
QUDA_PROFILE_STREAM_SYNCHRONIZE	stream synchronization
QUDA_PROFILE_DEVICE_SYNCHRONIZE	device synchronization
QUDA_PROFILE_MEMCPY_D2D_ASYNC	device to device async copy
QUDA_PROFILE_MEMCPY_D2H_ASYNC	device to host async copy
QUDA_PROFILE_MEMCPY2D_D2H_ASYNC	device to host 2-d memcpy async copy
QUDA_PROFILE_MEMCPY_H2D_ASYNC	host to device async copy
QUDA_PROFILE_MEMCPY_DEFAULT_ASYNC	default async copy
QUDA_PROFILE_COMMS_START	initiating communication
QUDA_PROFILE_COMMS_QUERY	querying communication
QUDA_PROFILE_CONSTANT	time spent setting CUDA constant parameters
QUDA_PROFILE_TOTAL	The total time in seconds for the algorithm. Must be the penultimate type.
QUDA_PROFILE_COUNT	The total number of timers we have. Must be last enum type.

Definition at line 103 of file timer.h.

Function Documentation

◆ __launch_bounds__()

template<typename Arg >

quda::__launch_bounds__ ( Arg::block_size )

Definition at line 74 of file transform_reduce.h.

◆ abs() [1/4]

template<>

__host__ __device__ double quda::abs ( const complex< double > & z )

inline

Definition at line 1066 of file complex_quda.h.

◆ abs() [2/4]

template<>

__host__ __device__ float quda::abs ( const complex< float > & z )

inline

Definition at line 1061 of file complex_quda.h.

◆ abs() [3/4]

template<typename ValueType >

__host__ __device__ ValueType quda::abs ( const complex< ValueType > & z )

inline

Returns the magnitude of z.

Definition at line 1056 of file complex_quda.h.

◆ abs() [4/4]

template<typename ValueType >

__host__ __device__ ValueType quda::abs ( ValueType x )

inline

Definition at line 125 of file complex_quda.h.

◆ acos() [1/2]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::acos ( const complex< ValueType > & z )

inline

Definition at line 1270 of file complex_quda.h.

◆ acos() [2/2]

template<typename ValueType >

__host__ __device__ ValueType quda::acos ( ValueType x )

inline

Definition at line 61 of file complex_quda.h.

◆ acosh()

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::acosh ( const complex< ValueType > & z )

inline

Definition at line 1291 of file complex_quda.h.

◆ activeTuning()

bool quda::activeTuning ( )

query if tuning is in progress

Returns: tuning in progress?

Definition at line 137 of file tune.cpp.

◆ AllocateAndBuildStaggeredKahlerDiracInverse()

cudaGaugeField* quda::AllocateAndBuildStaggeredKahlerDiracInverse	(	const cudaGaugeField &	gauge,
		const double	mass,
		const QudaPrecision	override_prec
	)

Allocate and build the Kahler-Dirac inverse block for KD operators.

Parameters

[in]	in	gauge original fine gauge field
[in]	in	mass the mass of the original staggered operator w/out factor of 2 convention
[in]	in	precision of Xinv field

Returns: constructed Xinv, which needs to be deleted manually

◆ APEStep()

void quda::APEStep	(	GaugeField &	dataDs,
		GaugeField &	dataOr,
		double	alpha
	)

Apply APE smearing to the gauge field.

Parameters

[out]	dataDs	Output smeared field
[in]	dataOr	Input gauge field
[in]	alpha	smearing parameter

◆ ApplyClover()

void quda::ApplyClover	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const CloverField &	clover,
		bool	inverse,
		int	parity
	)

Apply clover-matrix field to a color-spinor field.

Parameters

[out]	out	Result color-spinor field
[in]	in	Input color-spinor field
[in]	clover	Clover-matrix field
[in]	inverse	Whether we are applying the inverse or not
[in]	Field	parity (if color-spinor field is single parity)

◆ ApplyCoarse()

void quda::ApplyCoarse	(	ColorSpinorField &	out,
		const ColorSpinorField &	inA,
		const ColorSpinorField &	inB,
		const GaugeField &	Y,
		const GaugeField &	X,
		double	kappa,
		int	parity = `QUDA_INVALID_PARITY`,
		bool	dslash = `true`,
		bool	clover = `true`,
		bool	dagger = `false`,
		const int *	commDim = `0`,
		QudaPrecision	halo_precision = `QUDA_INVALID_PRECISION`
	)

Apply the coarse dslash stencil. This single driver accounts for all variations with and without the clover field, with and without dslash, and both single and full parity fields.

Parameters

[out]	out	The result vector
[in]	inA	The first input vector
[in]	inB	The second input vector
[in]	Y	Coarse link field
[in]	X	Coarse clover field
[in]	kappa	Scaling parameter
[in]	parity	Parity of the field (if single parity)
[in]	dslash	Are we applying dslash?
[in]	clover	Are we applying clover?
[in]	dagger	Apply dagger operator?
[in]	commDim	Which dimensions are partitioned?
[in]	halo_precision	What precision to use for the halos (if QUDA_INVALID_PRECISION, use field precision)

◆ ApplyCovDev()

void quda::ApplyCovDev	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		int	mu,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the covariant derivative.

out = U * in

where U is the gauge field in a particular direction.

This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the covariant derivative
[in]	mu	Direction of the derivative. For mu > 3 it goes backwards
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

◆ ApplyDomainWall4D()

void quda::ApplyDomainWall4D	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	a,
		double	m_5,
		const Complex *	b_5,
		const Complex *	c_5,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the batched Wilson 4-d stencil to a 5-d vector with 4-d preconditioned data order.

out = D * in

where D is the gauged Wilson linear operator.

If a is non-zero, the operation is given by out = x + a * D in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	a	Scale factor applied
[in]	m_5	Wilson mass shift
[in]	b_5	Mobius coefficient array (length Ls)
[in]	c_5	Mobius coefficient array (length Ls)
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

◆ ApplyDomainWall5D()

void quda::ApplyDomainWall5D	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	a,
		double	m_f,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the Domain-wall 5-d stencil to a 5-d vector with 5-d preconditioned data order.

out = D_5 * in

where D_5 is the 5-d wilson linear operator with fifth dimension boundary condition set by the fermion mass.

If a is non-zero, the operation is given by out = x + a * D_5 in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	a	Scale factor applied (typically -kappa_5)
[in]	m_f	Fermion mass parameter
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

◆ ApplyDslash5()

void quda::ApplyDslash5	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const ColorSpinorField &	x,
		double	m_f,
		double	m_5,
		const Complex *	b_5,
		const Complex *	c_5,
		double	a,
		bool	dagger,
		Dslash5Type	type
	)

Apply either the domain-wall / mobius Dslash5 operator or the M5 inverse operator. In the current implementation, it is expected that the color-spinor fields are 4-d preconditioned.

Parameters

[out]	out	Result color-spinor field
[in]	in	Input color-spinor field
[in]	x	Auxilary input color-spinor field
[in]	m_f	Fermion mass parameter
[in]	m_5	Wilson mass shift
[in]	b_5	Mobius coefficient array (length Ls)
[in]	c_5	Mobius coefficient array (length Ls)
[in]	a	Scale factor use in xpay operator
[in]	dagger	Whether this is for the dagger operator
[in]	type	Type of dslash we are applying

◆ applyGaugePhase()

void quda::applyGaugePhase ( GaugeField & u )

Apply the staggered phase factor to the gauge field.

Parameters

[in] u The gauge field to which we apply the staggered phase factors

◆ ApplyImprovedStaggered()

void quda::ApplyImprovedStaggered	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		const GaugeField &	L,
		double	a,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Apply the improved staggered dslash operator to a color-spinor field.

Parameters

[out]	out	Result color-spinor field
[in]	in	Input color-spinor field
[in]	U	Gauge-Link (1-link or fat-link)
[in]	L	Long-Links for asqtad
[in]	a	xpay parameter (set to 0.0 for non-xpay version)
[in]	x	Vector field we accumulate onto to
[in]	parity	parity parameter
[in]	dagger	Whether we are applying the dagger or not
[in]	improved	whether to apply the standard-staggered (false) or asqtad (true) operator

◆ ApplyLaplace()

void quda::ApplyLaplace	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		int	dir,
		double	a,
		double	b,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the Laplace stencil.

out = - kappa * A * in

where A is the gauge laplace linear operator.

If x is defined, the operation is given by out = x - kappa * A in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the gauge Laplace
[in]	dir	Direction of the derivative 0,1,2,3 to omit (-1 is full 4D)
[in]	a	Scale factor applied to derivative
[in]	b	Scale factor applied to aux field
[in]	x	Vector field we accumulate onto to

◆ ApplyNdegTwistedMass()

void quda::ApplyNdegTwistedMass	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	a,
		double	b,
		double	c,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the non-degenerate twisted-mass stencil.

out = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1) * x

where D is the gauged Wilson linear operator. The quark fields out, in and x are five dimensional, with the fifth dimension corresponding to the flavor dimension. The convention is that the first 4-d slice (s=0) corresponds to the positive twist and the second slice (s=1) corresponds to the negative twist.

This operator can be applied to both single parity (4d checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	a	Scale factor applied to Wilson term (typically -kappa)
[in]	b	Chiral twist factor applied (typically 2mukappa)
[in]	c	Flavor twist factor applied (typically -2epsilonkappa)
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

◆ ApplyNdegTwistedMassPreconditioned()

void quda::ApplyNdegTwistedMassPreconditioned	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	a,
		double	b,
		double	c,
		bool	xpay,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		bool	asymmetric,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the preconditioned non-degenerate twisted-mass stencil.

out = a * (1 + i*b*gamma_5*tau_3 + c*tau_1) * D * in + x

where D is the gauged Wilson linear operator. The quark fields out, in and x are five dimensional, with the fifth dimension corresponding to the flavor dimension. The convention is that the first 4-d slice (s=0) corresponds to the positive twist and the second slice (s=1) corresponds to the negative twist.

This operator can (at present) be applied to only single parity (checker-boarded) fields.

For the dagger operator, we generally apply the conjugate transpose operator

out = x + D^\dagger A^{-\dagger}

with the additional asymmetric special case, where we apply do not transpose the order of operations

out = A^{-\dagger} D^\dagger (no xpay term)

This variant is required when have the asymmetric preconditioned operator and require the preconditioned twist term to remain in between the applications of D. This would be combined with a subsequent non-preconditioned dagger operator, A*x - kappa^2 D, to form the full operator.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	a	Scale factor applied to Wilson term (typically -kappa^2/(1 + bb -cc) )
[in]	b	Chiral twist factor applied (typically -2mukappa)
[in]	c	Flavor twist factor applied (typically 2epsilonkappa)
[in]	xpay	Whether to do xpay or not
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	asymmetric	Whether this is for the asymmetric preconditioned dagger operator (a(1 - ibgamma_5) D^dagger * in)
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

◆ ApplyStaggered()

void quda::ApplyStaggered	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	a,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Apply the staggered dslash operator to a color-spinor field.

Parameters

[out]	out	Result color-spinor field
[in]	in	Input color-spinor field
[in]	U	Gauge-Link (1-link or fat-link)
[in]	a	xpay parameter (set to 0.0 for non-xpay version)
[in]	x	Vector field we accumulate onto to
[in]	parity	parity parameter
[in]	dagger	Whether we are applying the dagger or not
[in]	improved	whether to apply the standard-staggered (false) or asqtad (true) operator

◆ ApplyStaggeredKahlerDiracInverse()

void quda::ApplyStaggeredKahlerDiracInverse	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	Xinv,
		bool	dagger
	)

Apply the (improved) staggered Kahler-Dirac inverse block to a color-spinor field.

Parameters

[out]	out	Result color-spinor field
[in]	in	Input color-spinor field
[in]	Xinv	Kahler-Dirac inverse field
[in]	dagger	Whether we are applying the dagger or not

◆ ApplyTwistClover()

void quda::ApplyTwistClover	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const CloverField &	clover,
		double	kappa,
		double	mu,
		double	epsilon,
		int	parity,
		int	dagger,
		QudaTwistGamma5Type	twist
	)

Apply twisted clover-matrix field to a color-spinor field.

Parameters

[out]	out	Result color-spinor field
[in]	in	Input color-spinor field
[in]	clover	Clover-matrix field
[in]	kappa	kappa parameter
[in]	mu	mu parameter
[in]	epsilon	epsilon parameter
[in]	Field	parity (if color-spinor field is single parity)
[in]	dagger	Whether we are applying the dagger or not
[in]	twist	The type of kernel we are doing if (twist == QUDA_TWIST_GAMMA5_DIRECT) apply (Clover + iagamma_5) to the input spinor else if (twist == QUDA_TWIST_GAMMA5_INVERSE) apply (Clover + iagamma_5)/(Clover^2 + a^2) to the input spinor

◆ ApplyTwistedClover()

void quda::ApplyTwistedClover	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		const CloverField &	C,
		double	a,
		double	b,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the twisted-clover stencil.

out = a * D * in + (C + i*b*gamma_5) * x

where D is the gauged Wilson linear operator, and C is the clover field.

This operator can be applied to both single parity (4d checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	C	The clover field used for the operator
[in]	a	Scale factor applied to Wilson term (typically -kappa)
[in]	b	Chiral twist factor applied (typically 2mukappa)
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

◆ ApplyTwistedCloverPreconditioned()

void quda::ApplyTwistedCloverPreconditioned	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		const CloverField &	C,
		double	a,
		double	b,
		bool	xpay,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the preconditioned twisted-clover stencil.

out = a * (C + i*b*gamma_5)^{-1} * D * in + x = a * C^{-2} (C - i*b*gamma_5) * D * in + x = A^{-1} * D * in + x

where D is the gauged Wilson linear operator and C is the clover field. This operator can (at present) be applied to only single parity (checker-boarded) fields. When the dagger operator is requested, we do not transpose the order of operations, e.g.

out = A^{-\dagger} D^\dagger (no xpay term)

Although not a conjugate transpose of the regular operator, this variant is used to enable kernel fusion between the application of D and the subsequent application of A, e.g., in the symmetric dagger operator we need to apply

M = (1 - kappa^2 D^{\dagger} A^{-\dagger} D{^\dagger} A^{-\dagger} )

and since cannot fuse D{^\dagger} A^{-\dagger}, we instead fused A^{-\dagger} D{^\dagger}.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	C	The clover field used for the operator
[in]	a	Scale factor applied to Wilson term ( typically 1 / (1 + bb) or kappa^2 / (1 + bb) )
[in]	b	Twist factor applied (typically -2kappamu)
[in]	xpay	Whether to do xpay or not
[in]	x	Vector field we accumulate onto to when xpay is true
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

◆ ApplyTwistedMass()

void quda::ApplyTwistedMass	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	a,
		double	b,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

◆ ApplyTwistedMassPreconditioned()

void quda::ApplyTwistedMassPreconditioned	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	a,
		double	b,
		bool	xpay,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		bool	asymmetric,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the preconditioned twisted-mass stencil.

out = a*(1 + i*b*gamma_5) * D * in + x

where D is the gauged Wilson linear operator. This operator can (at present) be applied to only single parity (checker-boarded) fields. For the dagger operator, we generally apply the conjugate transpose operator

out = x + D^\dagger A^{-\dagger}

with the additional asymmetric special case, where we apply do not transpose the order of operations

out = A^{-\dagger} D^\dagger (no xpay term)

This variant is required when have the asymmetric preconditioned operator and require the preconditioned twist term to remain in between the applications of D. This would be combined with a subsequent non-preconditioned dagger operator, A*x - kappa^2 D, to form the full operator.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	a	Scale factor applied to Wilson term ( typically kappa^2 / (1 + b*b) )
[in]	b	Twist factor applied (typically -2kappamu)
[in]	xpay	Whether to do xpay or not
[in]	x	Vector field we accumulate onto to when xpay is true
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	asymmetric	Whether this is for the asymmetric preconditioned dagger operator (a(1 - ibgamma_5) D^dagger * in)
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

◆ ApplyTwistGamma()

void quda::ApplyTwistGamma	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		int	d,
		double	kappa,
		double	mu,
		double	epsilon,
		int	dagger,
		QudaTwistGamma5Type	type
	)

Apply the twisted-mass gamma operator to a color-spinor field.

Parameters

[out]	out	Result color-spinor field
[in]	in	Input color-spinor field
[in]	d	Which gamma matrix we are applying (C counting, so gamma_5 has d=4)
[in]	kappa	kappa parameter
[in]	mu	mu parameter
[in]	epsilon	epsilon parameter
[in]	dagger	Whether we are applying the dagger or not
[in]	twist	The type of kernel we are doing

◆ applyU()

void quda::applyU	(	GaugeField &	force,
		GaugeField &	U
	)

Left multiply the force field by the gauge field

force = U * force

Parameters

force	Force field
U	Gauge field

◆ ApplyWilson()

void quda::ApplyWilson	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		double	kappa,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the Wilson stencil.

out = D * in

where D is the gauged Wilson linear operator.

If kappa is non-zero, the operation is given by out = x + kappa * D in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	kappa	Scale factor applied
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

◆ ApplyWilsonClover()

void quda::ApplyWilsonClover	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		const CloverField &	A,
		double	kappa,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the Wilson-clover stencil.

out = A * x + kappa * D * in

where D is the gauged Wilson linear operator.

This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	Input field that D is applied to
[in]	x	Input field that A is applied to
[in]	U	The gauge field used for the operator
[in]	A	The clover field used for the operator
[in]	kappa	Scale factor applied
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

◆ ApplyWilsonCloverHasenbuschTwist()

void quda::ApplyWilsonCloverHasenbuschTwist	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		const CloverField &	A,
		double	kappa,
		double	mu,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the Wilson-clover stencil.

out = A * x + kappa * D * in

where D is the gauged Wilson linear operator.

This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	Input field that D is applied to
[in]	x	Input field that A is applied to
[in]	U	The gauge field used for the operator
[in]	A	The clover field used for the operator
[in]	kappa	Scale factor applied
[in]	mu	Twist factor
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

◆ ApplyWilsonCloverHasenbuschTwistPCClovInv()

void quda::ApplyWilsonCloverHasenbuschTwistPCClovInv	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		const CloverField &	A,
		double	kappa,
		double	mu,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the twisted-mass stencil.

out = a * D * in + (1 + i*b*gamma_5) * x

where D is the gauged Wilson linear operator.

This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	a	Scale factor applied to Wilson term (typically -kappa)
[in]	b	Twist factor applied (typically 2mukappa)
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

Driver for applying the Wilson-clover with twist for Hasenbusch

out = (1 +/- ig5 b A) * x + kappa * A^{-1}D * in

where D is the gauged Wilson linear operator.

This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	Input field that D is applied to
[in]	x	Input field that A is applied to
[in]	U	The gauge field used for the operator
[in]	A	The clover field used for the operator
[in]	kappa	Scale factor applied
[in]	b	Twist factor applied
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

◆ ApplyWilsonCloverHasenbuschTwistPCNoClovInv()

void quda::ApplyWilsonCloverHasenbuschTwistPCNoClovInv	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		const CloverField &	A,
		double	kappa,
		double	mu,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the Wilson-clover stencil with thist for Hasenbusch.

out = (1 +/- ig5 b A) * x + kappa * D * in

where D is the gauged Wilson linear operator.

This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters

[out]	out	The output result field
[in]	in	Input field that D is applied to
[in]	x	Input field that A is applied to
[in]	U	The gauge field used for the operator
[in]	A	The clover field used for the operator
[in]	kappa	Scale factor applied
[in]	b	Twist factor applied
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

◆ ApplyWilsonCloverPreconditioned()

void quda::ApplyWilsonCloverPreconditioned	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		const CloverField &	A,
		double	kappa,
		const ColorSpinorField &	x,
		int	parity,
		bool	dagger,
		const int *	comm_override,
		TimeProfile &	profile
	)

Driver for applying the preconditioned Wilson-clover stencil.

out = A^{-1} * D * in + x

where D is the gauged Wilson linear operator and A is the clover field. This operator can (at present) be applied to only single parity (checker-boarded) fields. When the dagger operator is requested, we do not transpose the order of operations, e.g.

out = A^{-\dagger} D^\dagger (no xpay term)

Although not a conjugate transpose of the regular operator, this variant is used to enable kernel fusion between the application of D and the subsequent application of A, e.g., in the symmetric dagger operator we need to apply

M = (1 - kappa^2 D^{\dagger} A^{-1} D{^\dagger} A^{-1} )

and since cannot fuse D{^\dagger} A^{-\dagger}, we instead fused A^{-\dagger} D{^\dagger}.

If kappa is non-zero, the operation is given by out = x + kappa * A^{-1} D in. This operator can (at present) be applied to only single parity (checker-boarded) fields.

Parameters

[out]	out	The output result field
[in]	in	The input field
[in]	U	The gauge field used for the operator
[in]	A	The clover field used for the operator
[in]	kappa	Scale factor applied
[in]	x	Vector field we accumulate onto to
[in]	parity	Destination parity
[in]	dagger	Whether this is for the dagger operator
[in]	comm_override	Override for which dimensions are partitioned
[in]	profile	The TimeProfile used for profiling the dslash

◆ arg() [1/3]

template<>

__host__ __device__ double quda::arg ( const complex< double > & z )

inline

Definition at line 1082 of file complex_quda.h.

◆ arg() [2/3]

template<>

__host__ __device__ float quda::arg ( const complex< float > & z )

inline

Definition at line 1077 of file complex_quda.h.

◆ arg() [3/3]

template<typename ValueType >

__host__ __device__ ValueType quda::arg ( const complex< ValueType > & z )

inline

Returns the phase angle of z.

Definition at line 1072 of file complex_quda.h.

◆ arpack_solve()

void quda::arpack_solve	(	std::vector< ColorSpinorField * > &	h_evecs,
		std::vector< Complex > &	h_evals,
		const DiracMatrix &	mat,
		QudaEigParam *	eig_param,
		TimeProfile &	profile
	)

The QUDA interface function. One passes two allocated arrays to hold the the eigenmode data, the problem matrix, the arpack parameters defining what problem is to be solves, and a container for QUDA data structure types.

arpack_solve()

Parameters

[out]	h_evecs	Host fields where the e-vectors will be copied to
[out]	h_evals	Where the e-values will be copied to
[in]	mat	An explicit construction of the problem matrix.
[in]	param	Parameter container defining the how the matrix is to be solved.
[in]	eig_param	Parameter structure for all QUDA eigensolvers
[in,out]	profile	TimeProfile instance used for profiling

Definition at line 507 of file quda_arpack_interface.cpp.

◆ asin() [1/2]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::asin ( const complex< ValueType > & z )

inline

Definition at line 1277 of file complex_quda.h.

◆ asin() [2/2]

template<typename ValueType >

__host__ __device__ ValueType quda::asin ( ValueType x )

inline

Definition at line 66 of file complex_quda.h.

◆ asinh()

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::asinh ( const complex< ValueType > & z )

inline

Definition at line 1316 of file complex_quda.h.

◆ assertAllMemFree()

void quda::assertAllMemFree ( )

Definition at line 549 of file malloc.cpp.

◆ atan() [1/2]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::atan ( const complex< ValueType > & z )

inline

Definition at line 1284 of file complex_quda.h.

◆ atan() [2/2]

template<typename ValueType >

__host__ __device__ ValueType quda::atan ( ValueType x )

inline

Definition at line 71 of file complex_quda.h.

◆ atan2()

template<typename ValueType >

__host__ __device__ ValueType quda::atan2	(	ValueType	x,
		ValueType	y
	)

inline

Definition at line 76 of file complex_quda.h.

◆ atanh() [1/2]

template<typename ValueType >

__host__ __device__ complex<float> quda::atanh ( const complex< float > & z )

inline

Definition at line 1340 of file complex_quda.h.

◆ atanh() [2/2]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::atanh ( const complex< ValueType > & z )

inline

Definition at line 1322 of file complex_quda.h.

◆ ax()

void quda::ax	(	const double &	a,
		GaugeField &	u
	)

Scale the gauge field by the scalar a.

Parameters

[in]	a	scalar multiplier
[in]	u	The gauge field we want to multiply

Definition at line 339 of file gauge_field.cpp.

◆ backSubs()

void quda::backSubs	(	const Complex *	alpha,
		Complex **const	beta,
		const double *	gamma,
		Complex *	delta,
		int	n
	)

Definition at line 136 of file inv_gcr_quda.cpp.

◆ BlockOrthogonalize()

void quda::BlockOrthogonalize	(	ColorSpinorField &	V,
		const std::vector< ColorSpinorField * > &	B,
		const int *	fine_to_coarse,
		const int *	coarse_to_fine,
		const int *	geo_bs,
		const int	spin_bs,
		const int	n_block_ortho
	)

Block orthogonnalize the matrix field, where the blocks are defined by lookup tables that map the fine grid points to the coarse grid points, and similarly for the spin degrees of freedom.

Parameters

[in,out]	V	Matrix field to be orthgonalized
[in]	B	input vectors
[in]	geo_bs	Geometric block size
[in]	fine_to_coarse	Fine-to-coarse lookup table (linear indices)
[in]	coarse_to_fine	Coarse-to-fine lookup table (linear indices)
[in]	spin_bs	Spin block size
[in]	n_block_ortho	Number of times to Gram-Schmidt

◆ BuildStaggeredKahlerDiracInverse()

void quda::BuildStaggeredKahlerDiracInverse	(	GaugeField &	Xinv,
		const cudaGaugeField &	gauge,
		const double	mass
	)

Build the Kahler-Dirac inverse block for KD operators.

Parameters

[out]	out	Xinv resulting Kahler-Dirac inverse (assumed allocated)
[in]	in	gauge original fine gauge field
[in]	in	mass the mass of the original staggered operator w/out factor of 2 convention

◆ calculateYhat()

void quda::calculateYhat	(	GaugeField &	Yhat,
		GaugeField &	Xinv,
		const GaugeField &	Y,
		const GaugeField &	X,
		bool	use_mma = `false`
	)

Calculate preconditioned coarse links and coarse clover inverse field.

Parameters

Yhat[out]	Preconditioned coarse link field
Xinv[out]	Coarse clover inverse field
Y[in]	Coarse link field
X[in]	Coarse clover field
use_mma[in]	Whether or not use MMA (tensor core) to do the calculation, default to false

◆ canReuseResidentGauge()

bool quda::canReuseResidentGauge ( QudaInvertParam * inv_param )

Check that the resident gauge field is compatible with the requested inv_param

Parameters

inv_param Contains all metadata regarding host and device storage

Definition at line 2173 of file interface_quda.cpp.

◆ checkMomOrder()

void quda::checkMomOrder ( const GaugeField & u )

Definition at line 22 of file copy_gauge.cpp.

◆ Checksum()

uint64_t quda::Checksum	(	const GaugeField &	u,
		bool	mini = `false`
	)

Compute XOR-based checksum of this gauge field: each gauge field entry is converted to type uint64_t, and compute the cummulative XOR of these values.

Parameters

[in] mini Whether to compute a mini checksum or global checksum. A mini checksum only computes over a subset of the lattice sites and is to be used for online comparisons, e.g., checking a field has changed with a global update algorithm.

Returns: checksum value

◆ cloverDerivative()

void quda::cloverDerivative	(	cudaGaugeField &	force,
		cudaGaugeField &	gauge,
		cudaGaugeField &	oprod,
		double	coeff,
		QudaParity	parity
	)

Compute the derivative of the clover matrix in the direction mu,nu and compute the resulting force given the outer-product field.

Parameters

force	The computed force field (read/write update)
gauge	The input gauge field
oprod	The input outer-product field (tensor matrix field)
coeff	Multiplicative coefficient (e.g., clover coefficient)
parity	The field parity we are working on

◆ cloverInvert()

void quda::cloverInvert	(	CloverField &	clover,
		bool	computeTraceLog
	)

This function compute the Cholesky decomposition of each clover matrix and stores the clover inverse field.

Parameters

clover	The clover field (contains both the field itself and its inverse)
computeTraceLog	Whether to compute the trace logarithm of the clover term

◆ cloverRho()

void quda::cloverRho	(	CloverField &	clover,
		double	rho
	)

This function adds a real scalar onto the clover diagonal (only to the direct field not the inverse)

Parameters

clover	The clover field
rho	Real scalar to be added on

◆ cmac()

template<typename real >

__host__ __device__ complex<real> quda::cmac	(	const complex< real > &	x,
		const complex< real > &	y,
		const complex< real > &	z
	)

inline

Definition at line 1368 of file complex_quda.h.

◆ cmul()

template<typename real >

__host__ __device__ complex<real> quda::cmul	(	const complex< real > &	x,
		const complex< real > &	y
	)

inline

Definition at line 1357 of file complex_quda.h.

◆ CoarseCoarseOp()

void quda::CoarseCoarseOp	(	GaugeField &	Y,
		GaugeField &	X,
		const Transfer &	T,
		const GaugeField &	gauge,
		const GaugeField &	clover,
		const GaugeField &	cloverInv,
		double	kappa,
		double	mass,
		double	mu,
		double	mu_factor,
		QudaDiracType	dirac,
		QudaMatPCType	matpc,
		bool	need_bidirectional,
		bool	use_mma = `false`
	)

Coarse operator construction from an intermediate-grid operator (Coarse)

Parameters

Y[out]	Coarse link field
X[out]	Coarse clover field
T[in]	Transfer operator that defines the new coarse space
gauge[in]	Link field from fine grid
clover[in]	Clover field on fine grid
cloverInv[in]	Clover inverse field on fine grid
kappa[in]	Kappa parameter
mass[in]	Mass parameter
mu[in]	Mu parameter (set to non-zero for twisted-mass/twisted-clover)
mu_factor[in]	Multiplicative factor for the mu parameter
matpc[in]	The type of even-odd preconditioned fine-grid operator we are constructing the coarse grid operator from. If matpc==QUDA_MATPC_INVALID then we assume the operator is not even-odd preconditioned and we coarsen the full operator.
need_bidirectional[in]	Whether or not we need to force a bi-directional build, even if the given level isn't preconditioned—if any previous level is preconditioned, we've violated that symmetry.
use_mma[in]	Whether or not use MMA (tensor core) to do the calculation, default to false

◆ CoarseOp()

void quda::CoarseOp	(	GaugeField &	Y,
		GaugeField &	X,
		const Transfer &	T,
		const cudaGaugeField &	gauge,
		const cudaCloverField *	clover,
		double	kappa,
		double	mass,
		double	mu,
		double	mu_factor,
		QudaDiracType	dirac,
		QudaMatPCType	matpc
	)

Coarse operator construction from a fine-grid operator (Wilson / Clover)

Parameters

Y[out]	Coarse link field
X[out]	Coarse clover field
T[in]	Transfer operator that defines the coarse space
gauge[in]	Gauge field from fine grid
clover[in]	Clover field on fine grid (optional)
kappa[in]	Kappa parameter
mass[in]	Mass parameter
mu[in]	Mu parameter (set to non-zero for twisted-mass/twisted-clover)
mu_factor[in]	Multiplicative factor for the mu parameter
matpc[in]	The type of even-odd preconditioned fine-grid operator we are constructing the coarse grid operator from. If matpc==QUDA_MATPC_INVALID then we assume the operator is not even-odd preconditioned and we coarsen the full operator.

◆ colorContract()

template<typename Float , int Nc, int Ns>

__device__ __host__ complex<Float> quda::colorContract	(	const ColorSpinor< Float, Nc, Ns > &	a,
		const ColorSpinor< Float, Nc, Ns > &	b,
		int	sa,
		int	sb
	)

inline

Compute the color contraction over color at spin s dot = \sum_s,c a(s,c) * b(s,c)

Parameters

a	Left-hand side ColorSpinor
b	Right-hand side ColorSpinor

Returns: The color contraction

Definition at line 930 of file color_spinor.h.

◆ colorSpinorParam() [1/2]

ColorSpinorParam quda::colorSpinorParam	(	const CloverField &	a,
		bool	inverse
	)

Definition at line 460 of file clover_field.cpp.

◆ colorSpinorParam() [2/2]

ColorSpinorParam quda::colorSpinorParam ( const GaugeField & a )

Definition at line 296 of file gauge_field.cpp.

◆ compile_type_str()

const char* quda::compile_type_str	(	const LatticeField &	meta,
		QudaFieldLocation	location_ = `QUDA_INVALID_FIELD_LOCATION`
	)

inline

Helper function for setting auxilary string.

Parameters

[in] meta LatticeField used for querying field location

Returns: String containing location and compilation type

Definition at line 839 of file lattice_field.h.

◆ completeKSForce()

void quda::completeKSForce	(	GaugeField &	mom,
		const GaugeField &	oprod,
		const GaugeField &	gauge,
		QudaFieldLocation	location,
		long long *	flops = `NULL`
	)

◆ compute_alpha_N()

template<int N>

void quda::compute_alpha_N	(	Complex *	Q_AQandg,
		Complex *	alpha
	)

Definition at line 298 of file inv_ca_cg.cpp.

◆ compute_beta_N()

template<int N>

void quda::compute_beta_N	(	Complex *	Q_AQandg,
		Complex *	Q_AS,
		Complex *	beta
	)

Definition at line 372 of file inv_ca_cg.cpp.

◆ computeBeta()

void quda::computeBeta	(	Complex **	beta,
		std::vector< ColorSpinorField * >	Ap,
		int	i,
		int	N,
		int	k
	)

Definition at line 63 of file inv_gcr_quda.cpp.

◆ computeClover()

void quda::computeClover	(	CloverField &	clover,
		const GaugeField &	fmunu,
		double	coeff
	)

Driver for computing the clover field from the field strength tensor.

Parameters

[out]	clover	Compute clover field
[in]	fmunu	Field strength tensor
[in]	coefft	Clover coefficient

◆ computeCloverForce()

void quda::computeCloverForce	(	GaugeField &	force,
		const GaugeField &	U,
		std::vector< ColorSpinorField * > &	x,
		std::vector< ColorSpinorField * > &	p,
		std::vector< double > &	coeff
	)

Compute the force contribution from the solver solution fields.

Force(x, mu) = U(x, mu) * sum_i=1^nvec ( P_mu^+ x(x+mu) p(x)^\dag + P_mu^- p(x+mu) x(x)^\dag )

M = A_even - kappa^2 * Dslash * A_odd^{-1} * Dslash x(even) = M^{-1} b(even) x(odd) = A_odd^{-1} * Dslash * x(even) p(even) = M * x(even) p(odd) = A_odd^{-1} * Dslash^dag * M * x(even).

Parameters

force[out,in]	The resulting force field
U	The input gauge field
x	Solution field (both parities)
p	Intermediate vectors (both parities)
coeff	Multiplicative coefficient (e.g., dt * residue)

◆ computeCloverSigmaOprod()

void quda::computeCloverSigmaOprod	(	GaugeField &	oprod,
		std::vector< ColorSpinorField * > &	x,
		std::vector< ColorSpinorField * > &	p,
		std::vector< std::vector< double > > &	coeff
	)

Compute the outer product from the solver solution fields arising from the diagonal term of the fermion bilinear in direction mu,nu and sum to outer product field.

Parameters

oprod[out,in]	Computed outer product field (tensor matrix field)
x[in]	Solution field (both parities)
p[in]	Intermediate vectors (both parities) @coeff coeff[in] Multiplicative coefficient (e.g., dt * residiue), one for each parity

◆ computeCloverSigmaTrace()

void quda::computeCloverSigmaTrace	(	GaugeField &	output,
		const CloverField &	clover,
		double	coeff
	)

Compute the matrix tensor field necessary for the force calculation from the clover trace action. This computes a tensor field [mu,nu].

Parameters

output	The computed matrix field (tensor matrix field)
clover	The input clover field
coeff	Scalar coefficient multiplying the result (e.g., stepsize)

◆ ComputeEta()

template<libtype which_lib>

void quda::ComputeEta ( GMResDRArgs & args )

Definition at line 165 of file inv_gmresdr_quda.cpp.

◆ ComputeEta< libtype::eigen_lib >()

template<>

void quda::ComputeEta< libtype::eigen_lib > ( GMResDRArgs & args )

Definition at line 188 of file inv_gmresdr_quda.cpp.

◆ ComputeEta< libtype::magma_lib >()

template<>

void quda::ComputeEta< libtype::magma_lib > ( GMResDRArgs & args )

Definition at line 167 of file inv_gmresdr_quda.cpp.

◆ computeFmunu()

void quda::computeFmunu	(	GaugeField &	Fmunu,
		const GaugeField &	gauge
	)

Compute the Fmunu tensor.

Parameters

[out]	Fmunu	The Fmunu tensor
[in]	gauge	The gauge field upon which to compute the Fmnu tensor

◆ ComputeHarmonicRitz()

template<libtype which_lib>

void quda::ComputeHarmonicRitz ( GMResDRArgs & args )

Definition at line 95 of file inv_gmresdr_quda.cpp.

◆ ComputeHarmonicRitz< libtype::eigen_lib >()

template<>

void quda::ComputeHarmonicRitz< libtype::eigen_lib > ( GMResDRArgs & args )

Definition at line 135 of file inv_gmresdr_quda.cpp.

◆ ComputeHarmonicRitz< libtype::magma_lib >()

template<>

void quda::ComputeHarmonicRitz< libtype::magma_lib > ( GMResDRArgs & args )

Definition at line 97 of file inv_gmresdr_quda.cpp.

◆ computeLinkInverse()

template<class Cmplx >

__device__ __host__ void quda::computeLinkInverse	(	Matrix< Cmplx, 3 > *	uinv,
		const Matrix< Cmplx, 3 > &	u
	)

inline

Definition at line 830 of file quda_matrix.h.

◆ computeMomAction()

double quda::computeMomAction ( const GaugeField & mom )

Compute and return global the momentum action 1/2 mom^2.

Parameters

mom	Momentum field

Returns: Momentum action contribution

◆ computeQCharge()

void quda::computeQCharge	(	double	energy[3],
		double &	qcharge,
		const GaugeField &	Fmunu
	)

Compute the topological charge and field energy.

Parameters

[out]	energy	The total, spatial, and temporal field energy
[out]	qcharge	The total topological charge
[in]	Fmunu	The Fmunu tensor, usually calculated from a smeared configuration

◆ computeQChargeDensity()

void quda::computeQChargeDensity	(	double	energy[3],
		double &	qcharge,
		void *	qdensity,
		const GaugeField &	Fmunu
	)

Compute the topological charge, field energy and the topological charge density per lattice site.

Parameters

[out]	energy	The total, spatial, and temporal field energy
[out]	qcharge	The total topological charge
[out]	qdensity	The topological charge at each lattice site
[in]	Fmunu	The Fmunu tensor, usually calculated from a smeared configuration

◆ ComputeRitz()

template<libtype which_lib>

void quda::ComputeRitz ( EigCGArgs & args )

Definition at line 147 of file inv_eigcg_quda.cpp.

◆ ComputeRitz< libtype::eigen_lib >()

template<>

void quda::ComputeRitz< libtype::eigen_lib > ( EigCGArgs & args )

Definition at line 150 of file inv_eigcg_quda.cpp.

◆ ComputeRitz< libtype::magma_lib >()

template<>

void quda::ComputeRitz< libtype::magma_lib > ( EigCGArgs & args )

Definition at line 178 of file inv_eigcg_quda.cpp.

◆ computeStaggeredOprod()

void quda::computeStaggeredOprod	(	GaugeField *	out[],
		ColorSpinorField &	in,
		const double	coeff[],
		int	nFace
	)

Compute the outer-product field between the staggered quark field's one and (for HISQ and ASQTAD) three hop sites. E.g.,.

out[0][d](x) = (in(x+1_d) x conj(in(x))) out[1][d](x) = (in(x+3_d) x conj(in(x)))

where 1_d and 3_d represent a relative shift of magnitude 1 and 3 in dimension d, respectively

Note out[1] is only computed if nFace=3

Parameters

[out]	out	Array of nFace outer-product matrix fields
[in]	in	Input quark field
[in]	coeff	Coefficient
[in]	nFace	Number of faces (1 or 3)

◆ conj() [1/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::conj ( const complex< ValueType > & z )

inline

Returns the complex conjugate of z.

Definition at line 1050 of file complex_quda.h.

◆ conj() [2/3]

template<class T , int N>

__device__ __host__ Matrix<T,N> quda::conj ( const Matrix< T, N > & other )

inline

Definition at line 590 of file quda_matrix.h.

◆ conj() [3/3]

template<typename ValueType >

__host__ __device__ ValueType quda::conj ( ValueType x )

inline

Definition at line 130 of file complex_quda.h.

◆ contractQuda()

void quda::contractQuda	(	const ColorSpinorField &	x,
		const ColorSpinorField &	y,
		void *	result,
		QudaContractType	cType
	)

◆ coordinate_from_index()

constexpr CommKey quda::coordinate_from_index	(	int	index,
		CommKey	dim
	)

inlineconstexpr

Definition at line 74 of file comm_key.h.

◆ copy() [1/3]

template<typename T1 , typename T2 >

__host__ __device__ std::enable_if<!isFixed<T1>::value && !isFixed<T2>::value, void>::type quda::copy	(	T1 &	a,
		const T2 &	b
	)

inline

Copy function which is trival between floating point types. When converting to an integer type, the input float is assumed to be in the range [-1,1] and we rescale to saturate the integer range. When converting from an integer type, we scale the output to be on the same range.

Definition at line 64 of file convert.h.

◆ copy() [2/3]

template<typename T1 , typename T2 >

__host__ __device__ std::enable_if<!isFixed<T1>::value && isFixed<T2>::value, void>::type quda::copy	(	T1 &	a,
		const T2 &	b
	)

inline

Definition at line 71 of file convert.h.

◆ copy() [3/3]

template<typename T1 , typename T2 >

__host__ __device__ std::enable_if<isFixed<T1>::value && !isFixed<T2>::value, void>::type quda::copy	(	T1 &	a,
		const T2 &	b
	)

inline

Definition at line 78 of file convert.h.

◆ copy_and_scale() [1/2]

template<typename T1 , typename T2 , typename T3 >

__host__ __device__ std::enable_if<!isFixed<T2>::value, void>::type quda::copy_and_scale	(	T1 &	a,
		const T2 &	b,
		const T3 &	c
	)

inline

Specialized variants of the copy function that include an additional scale factor. Note the scale factor is ignored unless the input type (b) is either a short or char vector.

Definition at line 105 of file convert.h.

◆ copy_and_scale() [2/2]

template<typename T1 , typename T2 , typename T3 >

__host__ __device__ std::enable_if<isFixed<T2>::value, void>::type quda::copy_and_scale	(	T1 &	a,
		const T2 &	b,
		const T3 &	c
	)

inline

Definition at line 112 of file convert.h.

◆ copy_scaled() [1/2]

template<typename T1 , typename T2 >

__host__ __device__ std::enable_if<!isFixed<T1>::value, void>::type quda::copy_scaled	(	T1 &	a,
		const T2 &	b
	)

inline

Specialized variants of the copy function that assumes the scaling factor has already been done.

Definition at line 88 of file convert.h.

◆ copy_scaled() [2/2]

template<typename T1 , typename T2 >

__host__ __device__ std::enable_if<isFixed<T1>::value, void>::type quda::copy_scaled	(	T1 &	a,
		const T2 &	b
	)

inline

Definition at line 94 of file convert.h.

◆ copyArrayToLink() [1/2]

template<class Cmplx , class Real >

void quda::copyArrayToLink	(	Matrix< Cmplx, 3 > *	link,
		Real *	array
	)

inline

Definition at line 877 of file quda_matrix.h.

◆ copyArrayToLink() [2/2]

void quda::copyArrayToLink	(	Matrix< float2, 3 > *	link,
		float *	array
	)

inline

Definition at line 865 of file quda_matrix.h.

◆ copyColumn()

template<class T , int N>

__device__ __host__ void quda::copyColumn	(	const Matrix< T, N > &	m,
		int	c,
		Array< T, N > *	a
	)

inline

Definition at line 796 of file quda_matrix.h.

◆ copyExtendedColorSpinor()

void quda::copyExtendedColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		const int	parity,
		void *	Dst,
		void *	Src,
		void *	dstNorm,
		void *	srcNorm
	)

◆ copyExtendedGauge()

void quda::copyExtendedGauge	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out = `0`,
		void *	In = `0`
	)

This function is used for copying the gauge field into an extended gauge field. Defined in copy_extended_gauge.cu.

Parameters

out	The extended output field to which we are copying
in	The input field from which we are copying
location	The location of where we are doing the copying (CPU or CUDA)
Out	The output buffer (optional)
In	The input buffer (optional)

◆ copyFieldOffset() [1/3]

void quda::copyFieldOffset	(	CloverField &	out,
		const CloverField &	in,
		CommKey	offset,
		QudaPCType	pc_type
	)

This function is used for copying from a source clover field to a destination clover field with an offset.

Parameters

out	The output field to which we are copying
in	The input field from which we are copying
offset	The offset for the larger field between out and in.
pc_type	Whether the field order uses 4d or 5d even-odd preconditioning.

◆ copyFieldOffset() [2/3]

void quda::copyFieldOffset	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		CommKey	offset,
		QudaPCType	pc_type
	)

This function is used for copying from a source colorspinor field to a destination field with an offset.

Parameters

out	The output field to which we are copying
in	The input field from which we are copying
offset	The offset for the larger field between out and in.
pc_type	Whether the field order uses 4d or 5d even-odd preconditioning.

◆ copyFieldOffset() [3/3]

void quda::copyFieldOffset	(	GaugeField &	out,
		const GaugeField &	in,
		CommKey	offset,
		QudaPCType	pc_type
	)

This function is used for copying from a source gauge field to a destination gauge field with an offset.

Parameters

out	The output field to which we are copying
in	The input field from which we are copying
offset	The offset for the larger field between out and in.
pc_type	Whether the field order uses 4d or 5d even-odd preconditioning.

◆ copyGenericClover()

void quda::copyGenericClover	(	CloverField &	out,
		const CloverField &	in,
		bool	inverse,
		QudaFieldLocation	location,
		void *	Out = `0`,
		void *	In = `0`,
		void *	outNorm = `0`,
		void *	inNorm = `0`
	)

This generic function is used for copying the clover field where in the input and output can be in any order and location.

Parameters

out	The output field to which we are copying
in	The input field from which we are copying
inverse	Whether we are copying the inverse term or not
location	The location of where we are doing the copying (CPU or CUDA)
Out	The output buffer (optional)
In	The input buffer (optional)
outNorm	The output norm buffer (optional)
inNorm	The input norm buffer (optional)

◆ copyGenericColorSpinor()

void quda::copyGenericColorSpinor	(	ColorSpinorField &	dst,
		const ColorSpinorField &	src,
		QudaFieldLocation	location,
		void *	Dst = `0`,
		void *	Src = `0`,
		void *	dstNorm = `0`,
		void *	srcNorm = `0`
	)

Definition at line 39 of file copy_color_spinor.cpp.

◆ copyGenericColorSpinorDD()

void quda::copyGenericColorSpinorDD	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorDH()

void quda::copyGenericColorSpinorDH	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorDQ()

void quda::copyGenericColorSpinorDQ	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorDS()

void quda::copyGenericColorSpinorDS	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorHD()

void quda::copyGenericColorSpinorHD	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorHH()

void quda::copyGenericColorSpinorHH	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorHQ()

void quda::copyGenericColorSpinorHQ	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorHS()

void quda::copyGenericColorSpinorHS	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorMGDD()

void quda::copyGenericColorSpinorMGDD	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorMGDS()

void quda::copyGenericColorSpinorMGDS	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorMGHH()

void quda::copyGenericColorSpinorMGHH	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorMGHQ()

void quda::copyGenericColorSpinorMGHQ	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorMGHS()

void quda::copyGenericColorSpinorMGHS	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorMGQH()

void quda::copyGenericColorSpinorMGQH	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorMGQQ()

void quda::copyGenericColorSpinorMGQQ	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorMGQS()

void quda::copyGenericColorSpinorMGQS	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorMGSD()

void quda::copyGenericColorSpinorMGSD	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorMGSH()

void quda::copyGenericColorSpinorMGSH	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorMGSQ()

void quda::copyGenericColorSpinorMGSQ	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorMGSS()

void quda::copyGenericColorSpinorMGSS	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorQD()

void quda::copyGenericColorSpinorQD	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorQH()

void quda::copyGenericColorSpinorQH	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorQQ()

void quda::copyGenericColorSpinorQQ	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorQS()

void quda::copyGenericColorSpinorQS	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorSD()

void quda::copyGenericColorSpinorSD	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorSH()

void quda::copyGenericColorSpinorSH	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorSQ()

void quda::copyGenericColorSpinorSQ	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericColorSpinorSS()

void quda::copyGenericColorSpinorSS	(	ColorSpinorField &	,
		const ColorSpinorField &	,
		QudaFieldLocation	,
		void *	,
		void *	,
		void *	a = `0`,
		void *	b = `0`
	)

◆ copyGenericGauge()

void quda::copyGenericGauge	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out = `0`,
		void *	In = `0`,
		void **	ghostOut = `0`,
		void **	ghostIn = `0`,
		int	type = `0`
	)

This function is used for extracting the gauge ghost zone from a gauge field array. Defined in copy_gauge.cu.

Parameters

out	The output field to which we are copying
in	The input field from which we are copying
location	The location of where we are doing the copying (CPU or CUDA)
Out	The output buffer (optional)
In	The input buffer (optional)
ghostOut	The output ghost buffer (optional)
ghostIn	The input ghost buffer (optional)
type	The type of copy we doing (0 body and ghost else ghost only)

Definition at line 44 of file copy_gauge.cpp.

◆ copyGenericGaugeDoubleIn()

void quda::copyGenericGaugeDoubleIn	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out,
		void *	In,
		void **	ghostOut,
		void **	ghostIn,
		int	type
	)

◆ copyGenericGaugeHalfIn()

void quda::copyGenericGaugeHalfIn	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out,
		void *	In,
		void **	ghostOut,
		void **	ghostIn,
		int	type
	)

◆ copyGenericGaugeMG()

void quda::copyGenericGaugeMG	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out,
		void *	In,
		void **	ghostOut,
		void **	ghostIn,
		int	type
	)

◆ copyGenericGaugeQuarterIn()

void quda::copyGenericGaugeQuarterIn	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out,
		void *	In,
		void **	ghostOut,
		void **	ghostIn,
		int	type
	)

◆ copyGenericGaugeSingleIn()

void quda::copyGenericGaugeSingleIn	(	GaugeField &	out,
		const GaugeField &	in,
		QudaFieldLocation	location,
		void *	Out,
		void *	In,
		void **	ghostOut,
		void **	ghostIn,
		int	type
	)

◆ copyLinkToArray() [1/2]

void quda::copyLinkToArray	(	float *	array,
		const Matrix< float2, 3 > &	link
	)

inline

Definition at line 890 of file quda_matrix.h.

◆ copyLinkToArray() [2/2]

template<class Cmplx , class Real >

void quda::copyLinkToArray	(	Real *	array,
		const Matrix< Cmplx, 3 > &	link
	)

inline

Definition at line 903 of file quda_matrix.h.

◆ cos() [1/3]

template<>

__host__ __device__ complex<float> quda::cos ( const complex< float > & z )

inline

Definition at line 1121 of file complex_quda.h.

◆ cos() [2/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::cos ( const complex< ValueType > & z )

inline

Definition at line 1113 of file complex_quda.h.

◆ cos() [3/3]

template<typename ValueType >

__host__ __device__ ValueType quda::cos ( ValueType x )

inline

Definition at line 46 of file complex_quda.h.

◆ cosh() [1/3]

template<>

__host__ __device__ complex<float> quda::cosh ( const complex< float > & z )

inline

Definition at line 1137 of file complex_quda.h.

◆ cosh() [2/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::cosh ( const complex< ValueType > & z )

inline

Definition at line 1129 of file complex_quda.h.

◆ cosh() [3/3]

template<typename ValueType >

__host__ __device__ ValueType quda::cosh ( ValueType x )

inline

Definition at line 81 of file complex_quda.h.

◆ create_gauge_buffer()

void * quda::create_gauge_buffer	(	size_t	bytes,
		QudaGaugeFieldOrder	order,
		QudaFieldGeometry	geometry
	)

Definition at line 492 of file cuda_gauge_field.cpp.

◆ create_ghost_buffer()

void ** quda::create_ghost_buffer	(	size_t	bytes[],
		QudaGaugeFieldOrder	order,
		QudaFieldGeometry	geometry
	)

Definition at line 503 of file cuda_gauge_field.cpp.

◆ createDirac()

void quda::createDirac	(	Dirac *&	d,
		Dirac *&	dSloppy,
		Dirac *&	dPre,
		QudaInvertParam &	param,
		const bool	pc_solve
	)

Create the Dirac operator. By default, we also create operators with possibly different precisions: Sloppy, and Preconditioner.

Parameters

	[in/out]	d User prec
	[in/out]	dSloppy Sloppy prec
	[in/out]	dPre Preconditioner prec
[in]	param	Invert param container
[in]	pc_solve	Whether or not to perform an even/odd preconditioned solve

Definition at line 1787 of file interface_quda.cpp.

◆ createDiracWithEig()

void quda::createDiracWithEig	(	Dirac *&	d,
		Dirac *&	dSloppy,
		Dirac *&	dPre,
		Dirac *&	dRef,
		QudaInvertParam &	param,
		const bool	pc_solve
	)

Create the Dirac operator. By default, we also create operators with possibly different precisions: Sloppy, and Preconditioner. This function also creates a dirac operator for an eigensolver that creates a deflation space, dEig. We may not use dPrecon for this as, for example, the MSPCG solver uses dPrecon for a different purpose.

Parameters

	[in/out]	d User prec
	[in/out]	dSloppy Sloppy prec
	[in/out]	dPre Preconditioner prec
	[in/out]	dEig Eigensolver prec
[in]	param	Invert param container
[in]	pc_solve	Whether or not to perform an even/odd preconditioned solve

Definition at line 1825 of file interface_quda.cpp.

◆ createDiracWithRefine()

void quda::createDiracWithRefine	(	Dirac *&	d,
		Dirac *&	dSloppy,
		Dirac *&	dPre,
		Dirac *&	dRef,
		QudaInvertParam &	param,
		const bool	pc_solve
	)

Create the Dirac operator. By default, we also create operators with possibly different precisions: Sloppy, and Preconditioner. This function also creates a dirac operator for refinement, dRef, used in invertMultiShiftQuda().

Parameters

	[in/out]	d User prec
	[in/out]	dSloppy Sloppy prec
	[in/out]	dPre Preconditioner prec
	[in/out]	dRef Refine prec (EigCG and deflation)
[in]	param	Invert param container
[in]	pc_solve	Whether or not to perform an even/odd preconditioned solve

Definition at line 1804 of file interface_quda.cpp.

◆ createDslashEvents()

void quda::createDslashEvents ( )

◆ createExtendedGauge() [1/2]

cudaGaugeField * quda::createExtendedGauge	(	cudaGaugeField &	in,
		const int *	R,
		TimeProfile &	profile,
		bool	redundant_comms = `false`,
		QudaReconstructType	recon = `QUDA_RECONSTRUCT_INVALID`
	)

This function is used for creating an exteneded gauge field from the input, and copying the gauge field into the extended gauge field. Defined in lib/gauge_field.cpp.

Parameters

in	The input field from which we are extending
R	By how many do we want to extend the gauge field in each direction
profile	The `TimeProfile`
redundant_comms
recon	The reconsturction type

Returns: the pointer to the extended gauge field

Definition at line 364 of file gauge_field.cpp.

◆ createExtendedGauge() [2/2]

cpuGaugeField * quda::createExtendedGauge	(	void **	gauge,
		QudaGaugeParam &	gauge_param,
		const int *	R
	)

This function is used for creating an exteneded (cpu) gauge field from the input, and copying the gauge field into the extended gauge field. Defined in lib/gauge_field.cpp.

Parameters

in	The input field from which we are extending
R	By how many do we want to extend the gauge field in each direction

Returns: the pointer to the extended gauge field

Definition at line 393 of file gauge_field.cpp.

◆ crossProduct()

template<typename Float , int Ns>

__device__ __host__ ColorSpinor<Float, 3, 1> quda::crossProduct	(	const ColorSpinor< Float, 3, Ns > &	a,
		const ColorSpinor< Float, 3, Ns > &	b,
		int	sa,
		int	sb
	)

inline

Compute the cross product of two color vectors at spin sa and sb cProd = \sum_{j,k} \epsilon_{i,j,k} a(s1,j) b(s2,k) NB: Implemented for Nc=3 only

Parameters

a	j ColorSpinor
b	k ColorSpinor
sa	j spin index
sb	k spin index

Returns: The cross product

Definition at line 1017 of file color_spinor.h.

◆ d2i()

__device__ __host__ int quda::d2i ( double d )

inline

Definition at line 45 of file convert.h.

◆ destroyDslashEvents()

void quda::destroyDslashEvents ( )

◆ device_allocated()

size_t quda::device_allocated ( )

Returns: device memory allocated

Definition at line 69 of file malloc.cpp.

◆ device_allocated_peak()

long quda::device_allocated_peak ( )

Returns: peak device memory allocated

Definition at line 79 of file malloc.cpp.

◆ device_comms_pinned_free_()

void quda::device_comms_pinned_free_	(	const char *	func,
		const char *	file,
		int	line,
		void *	ptr
	)

Free device comms memory allocated with device_comms_pinned_malloc(). This function should only be called via the device_comms_pinned_free() macro, defined in malloc_quda.h

Definition at line 530 of file malloc.cpp.

◆ device_comms_pinned_malloc_()

void * quda::device_comms_pinned_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Allocate shemm device memory. This function should only be called via device_comms_pinned_malloc_() Allocate pinned or symmetric (shmem) device memory for comms. Should only be called via the device_comms_pinned_malloc macro, defined in malloc_quda.h

Definition at line 401 of file malloc.cpp.

◆ device_free_()

void quda::device_free_	(	const char *	func,
		const char *	file,
		int	line,
		void *	ptr
	)

Free device memory allocated with device_malloc(). This function should only be called via the device_free() macro, defined in malloc_quda.h

Definition at line 415 of file malloc.cpp.

◆ device_malloc_()

void * quda::device_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Perform a standard cudaMalloc() with error-checking. This function should only be called via the device_malloc() macro, defined in malloc_quda.h

Definition at line 223 of file malloc.cpp.

◆ device_pinned_free_()

void quda::device_pinned_free_	(	const char *	func,
		const char *	file,
		int	line,
		void *	ptr
	)

Free device memory allocated with device_pinned malloc(). This function should only be called via the device_pinned_free() macro, defined in malloc_quda.h

Definition at line 440 of file malloc.cpp.

◆ device_pinned_malloc_()

void * quda::device_pinned_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Perform a cuMemAlloc with error-checking. This function is to guarantee a unique memory allocation on the device, since cudaMalloc can be redirected (as is the case with QDPJIT). This should only be called via the device_pinned_malloc() macro, defined in malloc_quda.h.

Definition at line 255 of file malloc.cpp.

◆ disableProfileCount()

void quda::disableProfileCount ( )

Disable the profile kernel counting.

Definition at line 141 of file tune.cpp.

◆ dynamic_clover_inverse()

constexpr bool quda::dynamic_clover_inverse ( )

constexpr

Helper function that returns whether we have enabled dyanmic clover inversion or not.

Definition at line 518 of file clover_field.h.

◆ enableProfileCount()

void quda::enableProfileCount ( )

Enable the profile kernel counting.

Definition at line 142 of file tune.cpp.

◆ ErrorSU3()

template<class Cmplx >

__device__ __host__ double quda::ErrorSU3 ( const Matrix< Cmplx, 3 > & matrix )

Definition at line 962 of file quda_matrix.h.

◆ exchangeExtendedGhost()

void quda::exchangeExtendedGhost	(	cudaColorSpinorField *	spinor,
		int	R[],
		int	parity,
		qudaStream_t *	stream_p
	)

◆ exp() [1/3]

template<>

__host__ __device__ complex<float> quda::exp ( const complex< float > & z )

inline

Definition at line 1152 of file complex_quda.h.

◆ exp() [2/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::exp ( const complex< ValueType > & z )

inline

Definition at line 1146 of file complex_quda.h.

◆ exp() [3/3]

template<typename ValueType >

__host__ __device__ ValueType quda::exp ( ValueType x )

inline

Definition at line 96 of file complex_quda.h.

◆ exponentiate_iQ()

template<class T >

__device__ __host__ auto quda::exponentiate_iQ ( const Matrix< T, 3 > & Q )

inline

Definition at line 987 of file quda_matrix.h.

◆ expsu3()

template<typename Float >

__device__ __host__ void quda::expsu3 ( Matrix< complex< Float >, 3 > & q )

Direct port of the TIFR expsu3 algorithm

Definition at line 1124 of file quda_matrix.h.

◆ extractExtendedGaugeGhost()

void quda::extractExtendedGaugeGhost	(	const GaugeField &	u,
		int	dim,
		const int *	R,
		void **	ghost,
		bool	extract
	)

This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.

Parameters

u	The gauge field from which we want to extract/pack the ghost zone
dim	The dimension in which we are packing/unpacking
ghost	The array where we want to pack/unpack the ghost zone into/from
extract	Whether we are extracting into ghost or injecting from ghost

◆ extractGaugeGhost()

void quda::extractGaugeGhost	(	const GaugeField &	u,
		void **	ghost,
		bool	extract = `true`,
		int	offset = `0`
	)

This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.

Parameters

u	The gauge field from which we want to extract the ghost zone
ghost	The array where we want to pack the ghost zone into
extract	Where we are extracting into ghost or injecting from ghost
offset	By default we exchange the nDim site-vector of links in the first nDim dimensions; offset allows us to instead exchange the links in nDim+offset dimensions. This is used to faciliate sending bi-directional links which is needed for the coarse links.

◆ f2i()

__device__ __host__ int quda::f2i ( float f )

inline

Definition at line 34 of file convert.h.

◆ fatKSLink()

void quda::fatKSLink	(	GaugeField *	fat,
		const GaugeField &	u,
		const double *	coeff
	)

Compute the fat links for an improved staggered (Kogut-Susskind) fermions.

Parameters

fat[out]	The computed fat link
u[in]	The input gauge field
coeff[in]	Array of path coefficients

◆ file_name()

constexpr const char* quda::file_name ( const char * str )

inlineconstexpr

Definition at line 83 of file malloc_quda.h.

◆ fillFGMResDRInnerSolveParam()

void quda::fillFGMResDRInnerSolveParam	(	SolverParam &	inner,
		const SolverParam &	outer
	)

Definition at line 197 of file inv_gmresdr_quda.cpp.

◆ fillInnerSolveParam()

void quda::fillInnerSolveParam	(	SolverParam &	inner,
		const SolverParam &	outer
	)

Definition at line 25 of file inv_gcr_quda.cpp.

◆ flushForceMonitor()

void quda::flushForceMonitor ( )

Flush any outstanding force monitoring information.

◆ flushProfile()

void quda::flushProfile ( )

Flush profile contents, setting all counts to zero.

Definition at line 522 of file tune.cpp.

◆ forceMonitor()

bool quda::forceMonitor ( )

Whether we are monitoring the force or not.

Returns: Boolean whether we are monitoring the force

◆ free_gauge_buffer()

void quda::free_gauge_buffer	(	void *	buffer,
		QudaGaugeFieldOrder	order,
		QudaFieldGeometry	geometry
	)

Definition at line 515 of file cuda_gauge_field.cpp.

◆ free_ghost_buffer()

void quda::free_ghost_buffer	(	void **	buffer,
		QudaGaugeFieldOrder	order,
		QudaFieldGeometry	geometry
	)

Definition at line 524 of file cuda_gauge_field.cpp.

◆ gamma5()

void quda::gamma5	(	ColorSpinorField &	out,
		const ColorSpinorField &	in
	)

Applies a gamma5 matrix to a spinor (wrapper to ApplyGamma)

Parameters

[out]	out	Output field
[in]	in	Input field

◆ gaugeFixingFFT()

void quda::gaugeFixingFFT	(	GaugeField &	data,
		const int	gauge_dir,
		const int	Nsteps,
		const int	verbose_interval,
		const double	alpha,
		const int	autotune,
		const double	tolerance,
		const int	stopWtheta
	)

Gauge fixing with Steepest descent method with FFTs with support for single GPU only.

Parameters

[in,out]	data,quda	gauge field
[in]	gauge_dir,3	for Coulomb gauge fixing, other for Landau gauge fixing
[in]	Nsteps,maximum	number of steps to perform gauge fixing
[in]	verbose_interval,print	gauge fixing info when iteration count is a multiple of this
[in]	alpha,gauge	fixing parameter of the method, most common value is 0.08
[in]	autotune,1	to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value
[in]	tolerance,torelance	value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
[in]	stopWtheta,0	for MILC criterium and 1 to use the theta value

◆ gaugeFixingOVR()

void quda::gaugeFixingOVR	(	GaugeField &	data,
		const int	gauge_dir,
		const int	Nsteps,
		const int	verbose_interval,
		const double	relax_boost,
		const double	tolerance,
		const int	reunit_interval,
		const int	stopWtheta
	)

Gauge fixing with overrelaxation with support for single and multi GPU.

Parameters

[in,out]	data,quda	gauge field
[in]	gauge_dir,3	for Coulomb gauge fixing, other for Landau gauge fixing
[in]	Nsteps,maximum	number of steps to perform gauge fixing
[in]	verbose_interval,print	gauge fixing info when iteration count is a multiple of this
[in]	relax_boost,gauge	fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
[in]	tolerance,torelance	value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
[in]	reunit_interval,reunitarize	gauge field when iteration count is a multiple of this
[in]	stopWtheta,0	for MILC criterium and 1 to use the theta value

◆ gaugeForce()

void quda::gaugeForce	(	GaugeField &	mom,
		const GaugeField &	u,
		double	coeff,
		int ***	input_path,
		int *	length,
		double *	path_coeff,
		int	num_paths,
		int	max_length
	)

Compute the gauge-force contribution to the momentum.

Parameters

[out]	mom	Momentum field
[in]	u	Gauge field (extended when running no multiple GPUs)
[in]	coeff	Step-size coefficient
[in]	input_path	Host-array holding all path contributions for the gauge action
[in]	length	Host array holding the length of all paths
[in]	path_coeff	Coefficient of each path
[in]	num_paths	Numer of paths
[in]	max_length	Maximum length of each path

◆ gaugeGauss() [1/2]

void quda::gaugeGauss	(	GaugeField &	U,
		RNG &	rngstate,
		double	epsilon
	)

Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder).

Parameters

[out]	U	The output gauge field
[in]	rngstate	random states
[in]	sigma	Width of Gaussian distrubution

◆ gaugeGauss() [2/2]

void quda::gaugeGauss	(	GaugeField &	U,
		unsigned long long	seed,
		double	epsilon
	)

Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder).

Parameters

[out]	U	The GaugeField
[in]	seed	The seed used for the RNG
[in]	sigma	Wdith of the Gaussian distribution

◆ gaugeObservables()

void quda::gaugeObservables	(	GaugeField &	u,
		QudaGaugeObservableParam &	param,
		TimeProfile &	profile
	)

Calculates a variety of gauge-field observables.

Parameters

[in]	Gauge	field upon which we are measuring.
[in,out]	param	Parameter struct that defines which observables we are making and the resulting observables.
[in]	profile	TimeProfile instance used for profiling.

Definition at line 7 of file gauge_observable.cpp.

◆ genericCompare()

int quda::genericCompare	(	const cpuColorSpinorField &	a,
		const cpuColorSpinorField &	b,
		int	tol
	)

◆ genericCudaPrintVector()

void quda::genericCudaPrintVector	(	const cudaColorSpinorField &	a,
		unsigned	x
	)

◆ genericPackGhost()

void quda::genericPackGhost	(	void **	ghost,
		const ColorSpinorField &	a,
		QudaParity	parity,
		int	nFace,
		int	dagger,
		MemoryLocation *	destination = `nullptr`
	)

Generic ghost packing routine.

Parameters

[out]	ghost	Array of packed ghosts with array ordering [2*dim+dir]
[in]	a	Input field that is being packed
[in]	parity	Which parity are we packing
[in]	dagger	Is for a dagger operator (presently ignored)
	[in[	location Array specifiying the memory location of each resulting ghost [2*dim+dir]

◆ genericPrintVector()

void quda::genericPrintVector	(	const cpuColorSpinorField &	a,
		unsigned int	x
	)

◆ genericSource()

void quda::genericSource	(	cpuColorSpinorField &	a,
		QudaSourceType	sourceType,
		int	x,
		int	s,
		int	c
	)

◆ get_mapped_device_pointer_()

void * quda::get_mapped_device_pointer_	(	const char *	func,
		const char *	file,
		int	line,
		const void *	ptr
	)

Definition at line 590 of file malloc.cpp.

◆ get_pointer_location()

QudaFieldLocation quda::get_pointer_location ( const void * ptr )

Definition at line 566 of file malloc.cpp.

◆ getDeterminant()

template<template< typename, int > class Mat, class T >

__device__ __host__ T quda::getDeterminant ( const Mat< T, 3 > & a )

inline

Definition at line 417 of file quda_matrix.h.

◆ getDslashLaunch()

bool quda::getDslashLaunch ( )

◆ getKernelPackT()

bool quda::getKernelPackT ( )

Returns: Whether the T dimension is kernel packed or not

◆ getLinkDeterminant()

double2 quda::getLinkDeterminant ( GaugeField & data )

Calculate the Determinant.

Parameters

[in] data Gauge field

Returns: double2 complex Determinant value

◆ getLinkTrace()

double2 quda::getLinkTrace ( GaugeField & data )

Calculate the Trace.

Parameters

[in] data Gauge field

Returns: double2 complex trace value

◆ getRealTraceUVdagger()

template<class T >

__device__ __host__ double quda::getRealTraceUVdagger	(	const Matrix< T, 3 > &	a,
		const Matrix< T, 3 > &	b
	)

inline

Definition at line 931 of file quda_matrix.h.

◆ getSubTraceUnit()

template<class T >

__device__ __host__ Matrix<T,3> quda::getSubTraceUnit ( const Matrix< T, 3 > & a )

inline

Definition at line 915 of file quda_matrix.h.

◆ getTrace()

template<class T >

__device__ __host__ T quda::getTrace ( const Matrix< T, 3 > & a )

inline

Definition at line 410 of file quda_matrix.h.

◆ getTuneCache()

const map & quda::getTuneCache ( )

Returns a reference to the tunecache map.

Returns: tunecache reference

Definition at line 144 of file tune.cpp.

◆ host_allocated()

size_t quda::host_allocated ( )

Returns: host memory allocated

Definition at line 77 of file malloc.cpp.

◆ host_allocated_peak()

long quda::host_allocated_peak ( )

Returns: peak host memory allocated

Definition at line 87 of file malloc.cpp.

◆ host_free_()

void quda::host_free_	(	const char *	func,
		const char *	file,
		int	line,
		void *	ptr
	)

Free host memory allocated with safe_malloc(), pinned_malloc(), or mapped_malloc(). This function should only be called via the host_free() macro, defined in malloc_quda.h

Definition at line 477 of file malloc.cpp.

◆ i2f()

template<typename T >

__host__ __device__ float quda::i2f ( T a )

inline

Definition at line 18 of file convert.h.

◆ i32toa()

void quda::i32toa	(	char *	buffer,
		int32_t	value
	)

inline

Definition at line 117 of file uint_to_char.h.

◆ i64toa()

void quda::i64toa	(	char *	buffer,
		int64_t	value
	)

inline

Definition at line 284 of file uint_to_char.h.

◆ i_()

template<typename real >

__host__ __device__ complex<real> quda::i_ ( const complex< real > & a )

inline

Definition at line 1378 of file complex_quda.h.

◆ impliedParityFromMatPC()

constexpr QudaParity quda::impliedParityFromMatPC ( const QudaMatPCType & matpc_type )

constexpr

Helper function for getting the implied spinor parity from a matrix preconditioning type.

Parameters

[in] matpc_type The matrix preconditioning type

Returns: Even or Odd as appropriate, invalid if the preconditioning type is invalid (implicitly non-preconditioned)

Definition at line 59 of file color_spinor_field.h.

◆ index_from_coordinate()

constexpr int quda::index_from_coordinate	(	CommKey	coord,
		CommKey	dim
	)

inlineconstexpr

Definition at line 84 of file comm_key.h.

◆ init_value()

template<typename T >

constexpr T quda::init_value ( )

constexpr

The initialization value we used to check for completion.

Definition at line 38 of file reduce_helper.h.

◆ InitGaugeField() [1/2]

void quda::InitGaugeField ( GaugeField & data )

Perform a cold start to the gauge field, identity SU(3) matrix, also fills the ghost links in multi-GPU case (no need to exchange data)

Parameters

[in,out] data Gauge field

◆ InitGaugeField() [2/2]

void quda::InitGaugeField	(	GaugeField &	data,
		RNG &	rngstate
	)

Perform a hot start to the gauge field, random SU(3) matrix, followed by reunitarization, also exchange borders links in multi-GPU case.

Parameters

[in,out]	data	Gauge field
[in,out]	rngstate	state of the CURAND random number generator

◆ innerProduct() [1/4]

template<typename Float , int Nc, int Ns>

__device__ __host__ complex<Float> quda::innerProduct	(	const ColorSpinor< Float, Nc, Ns > &	a,
		const ColorSpinor< Float, Nc, Ns > &	b
	)

inline

Compute the inner product over color and spin dot = \sum_s,c conj(a(s,c)) * b(s,c)

Parameters

a	Left-hand side ColorSpinor
b	Right-hand side ColorSpinor

Returns: The inner product

Definition at line 913 of file color_spinor.h.

◆ innerProduct() [2/4]

template<typename Float , int Nc, int Ns>

__device__ __host__ complex<Float> quda::innerProduct	(	const ColorSpinor< Float, Nc, Ns > &	a,
		const ColorSpinor< Float, Nc, Ns > &	b,
		int	s
	)

inline

Compute the inner product over color at spin s between two ColorSpinor fields dot = \sum_c conj(a(s,c)) * b(s,c)

Parameters

a	Left-hand side ColorSpinor
b	Right-hand side ColorSpinor
s	diagonal spin index

Returns: The inner product

Definition at line 953 of file color_spinor.h.

◆ innerProduct() [3/4]

template<typename Float , int Nc, int Ns>

__device__ __host__ complex<Float> quda::innerProduct	(	const ColorSpinor< Float, Nc, Ns > &	a,
		const ColorSpinor< Float, Nc, Ns > &	b,
		int	sa,
		int	sb
	)

inline

Compute the inner product over color at spin sa and sb between two ColorSpinor fields dot = \sum_c conj(a(s1,c)) * b(s2,c)

Parameters

a	Left-hand side ColorSpinor
b	Right-hand side ColorSpinor
sa	Left-hand side spin index
sb	Right-hand side spin index

Returns: The inner product

Definition at line 969 of file color_spinor.h.

◆ innerProduct() [4/4]

template<typename Float , int Nc, int Nsa, int Nsb>

__device__ __host__ complex<Float> quda::innerProduct	(	const ColorSpinor< Float, Nc, Nsa > &	a,
		const ColorSpinor< Float, Nc, Nsb > &	b,
		int	sa,
		int	sb
	)

inline

Compute the inner product over color at spin sa and sb between a color spinors a and b of different spin length dot = \sum_c conj(a(c)) * b(s,c)

Parameters

a	Left-hand side ColorSpinor
b	Right-hand side ColorSpinor

Returns: The inner product

Definition at line 992 of file color_spinor.h.

◆ instantiate() [1/8]

template<template< typename > class Apply, typename C , typename... Args>

constexpr void quda::instantiate	(	C &	c,
		Args &&...	args
	)

constexpr

This instantiate function is used to instantiate the clover precision.

Parameters

[in]	c	CloverField we wish to instantiate
[in,out]	args	Any additional arguments required for the computation at hand

Definition at line 163 of file instantiate.h.

◆ instantiate() [2/8]

template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , int nColor, typename... Args>

void quda::instantiate	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		Args &&...	args
	)

inline

This instantiate function is used to instantiate the reconstruct types used.

Parameters

[out]	out	Output result field
[in]	in	Input field
[in]	U	Gauge field
[in]	args	Additional arguments for different dslash kernels

Definition at line 21 of file instantiate_dslash.h.

◆ instantiate() [3/8]

template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , typename... Args>

void quda::instantiate	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		Args &&...	args
	)

inline

This instantiate function is used to instantiate the colors.

Parameters

[out]	out	Output result field
[in]	in	Input field
[in]	U	Gauge field
[in]	args	Additional arguments for different dslash kernels

Definition at line 54 of file instantiate_dslash.h.

◆ instantiate() [4/8]

template<template< typename, int, QudaReconstructType > class Apply, typename Recon = WilsonReconstruct, typename... Args>

void quda::instantiate	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		Args &&...	args
	)

inline

This instantiate function is used to instantiate the precisions.

Parameters

[out]	out	Output result field
[in]	in	Input field
[in]	U	Gauge field
[in]	args	Additional arguments for different dslash kernels

Definition at line 71 of file instantiate_dslash.h.

◆ instantiate() [5/8]

template<template< typename, int > class Apply, typename store_t , typename F , typename... Args>

constexpr void quda::instantiate	(	F &	field,
		Args &&...	args
	)

constexpr

This instantiate function is used to instantiate the colors.

Parameters

[in]	field	LatticeField we wish to instantiate
[in,out]	args	Additional arguments for kernels

Definition at line 200 of file instantiate.h.

◆ instantiate() [6/8]

template<template< typename, int > class Apply, typename F , typename... Args>

constexpr void quda::instantiate	(	F &	field,
		Args &&...	args
	)

constexpr

This instantiate function is used to instantiate the precision and number of colors.

Parameters

[in]	field	LatticeField we wish to instantiate
[in,out]	args	Any additional arguments required for the computation at hand

Definition at line 221 of file instantiate.h.

◆ instantiate() [7/8]

template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , typename G , typename... Args>

constexpr void quda::instantiate	(	G &	U,
		Args &&...	args
	)

constexpr

This instantiate function is used to instantiate the colors.

Parameters

[in]	U	Gauge field
[in,out]	args	Additional arguments for kernels

Definition at line 117 of file instantiate.h.

◆ instantiate() [8/8]

template<template< typename, int, QudaReconstructType > class Apply, typename Recon = ReconstructFull, typename G , typename... Args>

constexpr void quda::instantiate	(	G &	U,
		Args &&...	args
	)

constexpr

This instantiate function is used to instantiate the precisions.

Parameters

[in]	U	Gauge field
[in,out]	args	Any additional arguments required for the computation at hand

Definition at line 134 of file instantiate.h.

◆ instantiatePrecision()

template<template< typename > class Apply, typename F , typename... Args>

constexpr void quda::instantiatePrecision	(	F &	field,
		Args &&...	args
	)

constexpr

The instantiatePrecision function is used to instantiate the precision. Note unlike the "instantiate" functions above, this helper always instantiates double precision regardless of the QUDA_PRECISION value: this enables its use for copy interface routines which should always enable double precision support.

Parameters

[in]	field	LatticeField we wish to instantiate
[in,out]	args	Any additional arguments required for the computation at hand

Definition at line 264 of file instantiate.h.

◆ instantiatePrecision2()

template<template< typename, typename > class Apply, typename T , typename F , typename... Args>

constexpr void quda::instantiatePrecision2	(	F &	field,
		Args &&...	args
	)

constexpr

The instantiatePrecision2 function is used to instantiate the precision for a class that accepts 2 typename arguments, with the first typename corresponding to the precision being instantiated at hand. This is useful for copy routines, where we need to instantiate a second, e.g., destination, precision after already instantiating the first, e.g., source, precision. Similar to the "instantiatePrecision" function above, this helper always instantiates double precision regardless of the QUDA_PRECISION value: this enables its use for copy interface routines which should always enable double precision support.

Parameters

[in]	field	LatticeField we wish to instantiate
[in,out]	args	Any additional arguments required for the computation at hand

Definition at line 309 of file instantiate.h.

◆ instantiatePrecisionMG()

template<template< typename > class Apply, typename F , typename... Args>

constexpr void quda::instantiatePrecisionMG	(	F &	field,
		Args &&...	args
	)

constexpr

The instantiatePrecision function is used to instantiate the precision.

Parameters

[in]	field	LatticeField we wish to instantiate
[in,out]	args	Any additional arguments required for the computation at hand

Definition at line 345 of file instantiate.h.

◆ instantiatePreconditioner()

template<template< typename, int, QudaReconstructType > class Apply, typename Recon = WilsonReconstruct, typename... Args>

void quda::instantiatePreconditioner	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const GaugeField &	U,
		Args &&...	args
	)

inline

This instantiatePrecondtiioner function is used to instantiate the precisions for a preconditioner. This is the same as the instantiate helper above, except it only handles half and quarter precision.

Parameters

[out]	out	Output result field
[in]	in	Input field
[in]	U	Gauge field
[in]	args	Additional arguments for different dslash kernels

Definition at line 113 of file instantiate_dslash.h.

◆ inverse()

template<class T >

__device__ __host__ Matrix<T,3> quda::inverse ( const Matrix< T, 3 > & u )

inline

Definition at line 605 of file quda_matrix.h.

◆ is_aligned()

bool quda::is_aligned	(	const void *	ptr,
		size_t	alignment
	)

inline

Returns: whether the pointer is aligned

Definition at line 95 of file malloc_quda.h.

◆ is_enabled()

template<QudaReconstructType recon>

constexpr bool quda::is_enabled ( )

constexpr

Definition at line 10 of file instantiate.h.

◆ is_enabled< QUDA_RECONSTRUCT_12 >()

template<>

constexpr bool quda::is_enabled< QUDA_RECONSTRUCT_12 > ( )

constexpr

Definition at line 16 of file instantiate.h.

◆ is_enabled< QUDA_RECONSTRUCT_13 >()

template<>

constexpr bool quda::is_enabled< QUDA_RECONSTRUCT_13 > ( )

constexpr

Definition at line 15 of file instantiate.h.

◆ is_enabled< QUDA_RECONSTRUCT_8 >()

template<>

constexpr bool quda::is_enabled< QUDA_RECONSTRUCT_8 > ( )

constexpr

Definition at line 20 of file instantiate.h.

◆ is_enabled< QUDA_RECONSTRUCT_9 >()

template<>

constexpr bool quda::is_enabled< QUDA_RECONSTRUCT_9 > ( )

constexpr

Definition at line 19 of file instantiate.h.

◆ is_enabled< QUDA_RECONSTRUCT_NO >()

template<>

constexpr bool quda::is_enabled< QUDA_RECONSTRUCT_NO > ( )

constexpr

Definition at line 12 of file instantiate.h.

◆ is_prefetch_enabled()

bool quda::is_prefetch_enabled ( )

Returns: is prefetching support enabled (assumes managed memory is enabled)

Definition at line 198 of file malloc.cpp.

◆ isUnitary()

bool quda::isUnitary	(	const cpuGaugeField &	field,
		double	max_error
	)

◆ join_field()

template<class Field >

void quda::join_field	(	std::vector< Field * > &	v_base_field,
		const Field &	collect_field,
		const CommKey &	comm_key,
		QudaPCType	pc_type = `QUDA_4D_PC`
	)

inline

Definition at line 121 of file split_grid.h.

◆ Length_() [1/2]

int quda::Length_	(	const char *	func,
		const char *	file,
		int	line,
		const ColorSpinorField &	a,
		const ColorSpinorField &	b
	)

inline

Helper function for determining if the length of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field

Returns: If length is unique return the length

Definition at line 1207 of file color_spinor_field.h.

◆ Length_() [2/2]

template<typename... Args>

int quda::Length_	(	const char *	func,
		const char *	file,
		int	line,
		const ColorSpinorField &	a,
		const ColorSpinorField &	b,
		const Args &...	args
	)

inline

Helper function for determining if the length of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field
[in]	args	List of additional fields to check length on

Returns: If length is unique return the length

Definition at line 1225 of file color_spinor_field.h.

◆ load_cached_short2()

__device__ void quda::load_cached_short2	(	short2 &	a,
		const short2 *	addr
	)

inline

Definition at line 45 of file inline_ptx.h.

◆ load_cached_short4()

__device__ void quda::load_cached_short4	(	short4 &	a,
		const short4 *	addr
	)

inline

Definition at line 35 of file inline_ptx.h.

◆ load_global_float4()

__device__ void quda::load_global_float4	(	float4 &	a,
		const float4 *	addr
	)

inline

Definition at line 71 of file inline_ptx.h.

◆ load_global_short2()

__device__ void quda::load_global_short2	(	short2 &	a,
		const short2 *	addr
	)

inline

Definition at line 63 of file inline_ptx.h.

◆ load_global_short4()

__device__ void quda::load_global_short4	(	short4 &	a,
		const short4 *	addr
	)

inline

Definition at line 53 of file inline_ptx.h.

◆ load_streaming_double2()

__device__ void quda::load_streaming_double2	(	double2 &	a,
		const double2 *	addr
	)

inline

Definition at line 21 of file inline_ptx.h.

◆ load_streaming_float4()

__device__ void quda::load_streaming_float4	(	float4 &	a,
		const float4 *	addr
	)

inline

Definition at line 28 of file inline_ptx.h.

◆ loadTuneCache()

void quda::loadTuneCache ( )

Definition at line 337 of file tune.cpp.

◆ Location_() [1/2]

QudaFieldLocation quda::Location_	(	const char *	func,
		const char *	file,
		int	line,
		const LatticeField &	a,
		const LatticeField &	b
	)

inline

Helper function for determining if the location of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field

Returns: If location is unique return the location

Definition at line 738 of file lattice_field.h.

◆ Location_() [2/2]

template<typename... Args>

QudaFieldLocation quda::Location_	(	const char *	func,
		const char *	file,
		int	line,
		const LatticeField &	a,
		const LatticeField &	b,
		const Args &...	args
	)

inline

Helper function for determining if the location of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field
[in]	args	List of additional fields to check location on

Returns: If location is unique return the location

Definition at line 755 of file lattice_field.h.

◆ log() [1/3]

template<>

__host__ __device__ complex<float> quda::log ( const complex< float > & z )

inline

Definition at line 1164 of file complex_quda.h.

◆ log() [2/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::log ( const complex< ValueType > & z )

inline

Definition at line 1158 of file complex_quda.h.

◆ log() [3/3]

template<typename ValueType >

__host__ __device__ ValueType quda::log ( ValueType x )

inline

Definition at line 101 of file complex_quda.h.

◆ log10() [1/2]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::log10 ( const complex< ValueType > & z )

inline

Definition at line 1171 of file complex_quda.h.

◆ log10() [2/2]

template<typename ValueType >

__host__ __device__ ValueType quda::log10 ( ValueType x )

inline

Definition at line 106 of file complex_quda.h.

◆ longKSLink()

void quda::longKSLink	(	GaugeField *	lng,
		const GaugeField &	u,
		const double *	coeff
	)

Compute the long links for an improved staggered (Kogut-Susskind) fermions.

Parameters

lng[out]	The computed long link (only computed if lng!=0)
u[in]	The input gauge field
coeff[in]	Array of path coefficients

◆ makeAntiHerm()

template<typename Complex , int N>

__device__ __host__ void quda::makeAntiHerm ( Matrix< Complex, N > & m )

inline

Definition at line 734 of file quda_matrix.h.

◆ makeHerm()

template<typename Complex , int N>

__device__ __host__ void quda::makeHerm ( Matrix< Complex, N > & m )

inline

Definition at line 750 of file quda_matrix.h.

◆ managed_allocated()

size_t quda::managed_allocated ( )

Definition at line 75 of file malloc.cpp.

◆ managed_allocated_peak()

long quda::managed_allocated_peak ( )

Definition at line 85 of file malloc.cpp.

◆ managed_free_()

void quda::managed_free_	(	const char *	func,
		const char *	file,
		int	line,
		void *	ptr
	)

Free device memory allocated with device_malloc(). This function should only be called via the device_free() macro, defined in malloc_quda.h

Definition at line 461 of file malloc.cpp.

◆ managed_malloc_()

void * quda::managed_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Perform a standard cudaMallocManaged() with error-checking. This function should only be called via the managed_malloc() macro, defined in malloc_quda.h

Definition at line 356 of file malloc.cpp.

◆ mapped_allocated()

size_t quda::mapped_allocated ( )

Returns: mapped memory allocated

Definition at line 73 of file malloc.cpp.

◆ mapped_allocated_peak()

long quda::mapped_allocated_peak ( )

Returns: peak mapped memory allocated

Definition at line 83 of file malloc.cpp.

◆ mapped_malloc_()

void * quda::mapped_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Allocate page-locked ("pinned") host memory, and map it into the GPU address space. This function should only be called via the mapped_malloc() macro, defined in malloc_quda.h

Definition at line 324 of file malloc.cpp.

◆ massRescale()

void quda::massRescale	(	cudaColorSpinorField &	b,
		QudaInvertParam &	param,
		bool	for_multishift
	)

Definition at line 1846 of file interface_quda.cpp.

◆ max_n_reduce()

constexpr int quda::max_n_reduce ( )

constexpr

Definition at line 33 of file reduce_helper.h.

◆ Monte()

void quda::Monte	(	GaugeField &	data,
		RNG &	rngstate,
		double	Beta,
		int	nhb,
		int	nover
	)

Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps.

Parameters

[in,out]	data	Gauge field
[in,out]	rngstate	state of the CURAND random number generator
[in]	Beta	inverse of the gauge coupling, beta = 2 Nc / g_0^2
[in]	nhb	number of heatbath steps
[in]	nover	number of overrelaxation steps

◆ mv_add()

template<typename Float , int Nc, int Ns>

__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::mv_add	(	const Matrix< complex< Float >, Nc > &	A,
		const ColorSpinor< Float, Nc, Ns > &	x,
		const ColorSpinor< Float, Nc, Ns > &	y
	)

inline

Compute the matrix-vector product z = A * x + y.

Parameters

[in]	A	Input matrix
[in]	x	Input vector
[in]	z	Input vector

Returns: The vector z = A * x + y

Definition at line 1203 of file color_spinor.h.

◆ Native_() [1/2]

bool quda::Native_	(	const char *	func,
		const char *	file,
		int	line,
		const LatticeField &	a
	)

inline

Helper function for determining if the field is in native order.

Parameters

[in] a Input field

Returns: true if field is in native order

Definition at line 798 of file lattice_field.h.

◆ Native_() [2/2]

template<typename... Args>

bool quda::Native_	(	const char *	func,
		const char *	file,
		int	line,
		const LatticeField &	a,
		const Args &...	args
	)

inline

Helper function for determining if the fields are in native order.

Parameters

[in]	a	Input field
[in]	args	List of additional fields to check

Returns: true if all fields are in native order

Definition at line 811 of file lattice_field.h.

◆ norm()

template<typename ValueType >

__host__ __device__ ValueType quda::norm ( const complex< ValueType > & z )

inline

Returns the magnitude of z squared.

Definition at line 1088 of file complex_quda.h.

◆ norm1() [1/2]

double quda::norm1	(	const CloverField &	u,
		bool	inverse = `false`
	)

This is a debugging function, where we cast a clover field into a spinor field so we can compute its L1 norm.

Parameters

a	The clover field that we want the norm of

Returns: The L1 norm of the gauge field

Definition at line 493 of file clover_field.cpp.

◆ norm1() [2/2]

double quda::norm1 ( const GaugeField & u )

This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L1 norm.

Parameters

u	The gauge field that we want the norm of

Returns: The L1 norm of the gauge field

Definition at line 331 of file gauge_field.cpp.

◆ norm2() [1/2]

double quda::norm2	(	const CloverField &	a,
		bool	inverse = `false`
	)

This is a debugging function, where we cast a clover field into a spinor field so we can compute its L2 norm.

Parameters

a	The clover field that we want the norm of

Returns: The L2 norm squared of the gauge field

Definition at line 485 of file clover_field.cpp.

◆ norm2() [2/2]

double quda::norm2 ( const GaugeField & u )

This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L2 norm.

Parameters

u	The gauge field that we want the norm of

Returns: The L2 norm squared of the gauge field

Definition at line 323 of file gauge_field.cpp.

◆ operator!=() [1/3]

template<typename ValueType >

__host__ __device__ bool quda::operator!=	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 1031 of file complex_quda.h.

◆ operator!=() [2/3]

template<typename ValueType >

__host__ __device__ bool quda::operator!=	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 1043 of file complex_quda.h.

◆ operator!=() [3/3]

template<typename ValueType >

__host__ __device__ bool quda::operator!=	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 1037 of file complex_quda.h.

◆ operator%()

constexpr CommKey quda::operator%	(	const CommKey &	lhs,
		const CommKey &	rhs
	)

inlineconstexpr

Definition at line 51 of file comm_key.h.

◆ operator*() [1/18]

constexpr CommKey quda::operator*	(	const CommKey &	lhs,
		const CommKey &	rhs
	)

inlineconstexpr

Definition at line 37 of file comm_key.h.

◆ operator*() [2/18]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator*	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 898 of file complex_quda.h.

◆ operator*() [3/18]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator*	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 907 of file complex_quda.h.

◆ operator*() [4/18]

__host__ __device__ double2 quda::operator*	(	const double &	a,
		const double2 &	x
	)

inline

Definition at line 70 of file float_vector.h.

◆ operator*() [5/18]

__host__ __device__ double4 quda::operator*	(	const double &	a,
		const double4 &	x
	)

inline

Definition at line 78 of file float_vector.h.

◆ operator*() [6/18]

__host__ __device__ float2 quda::operator*	(	const float &	a,
		const float2 &	x
	)

inline

Definition at line 62 of file float_vector.h.

◆ operator*() [7/18]

__host__ __device__ float4 quda::operator*	(	const float &	a,
		const float4 &	x
	)

inline

Definition at line 52 of file float_vector.h.

◆ operator*() [8/18]

__host__ __device__ float8 quda::operator*	(	const float &	a,
		const float8 &	x
	)

inline

Definition at line 88 of file float_vector.h.

◆ operator*() [9/18]

template<typename Float , int Nc, int Ns>

__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::operator*	(	const HMatrix< Float, Nc *Ns > &	A,
		const ColorSpinor< Float, Nc, Ns > &	x
	)

inline

Compute the matrix-vector product y = A * x.

Parameters

[in]	A	Input Hermitian matrix with dimensions NcxNs x NcxNs
[in]	x	Input vector

Returns: The vector A * x

Definition at line 1238 of file color_spinor.h.

◆ operator*() [10/18]

template<template< typename, int > class Mat, class T , int N>

__device__ __host__ Mat<T,N> quda::operator*	(	const Mat< T, N > &	a,
		const Mat< T, N > &	b
	)

inline

Generic implementation of matrix multiplication.

Definition at line 502 of file quda_matrix.h.

◆ operator*() [11/18]

template<template< typename, int > class Mat, class T , int N, class S >

__device__ __host__ Mat<T,N> quda::operator*	(	const Mat< T, N > &	a,
		const S &	scalar
	)

inline

Definition at line 479 of file quda_matrix.h.

◆ operator*() [12/18]

template<typename Float , int Nc, int Ns>

__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::operator*	(	const Matrix< complex< Float >, Nc > &	A,
		const ColorSpinor< Float, Nc, Ns > &	x
	)

inline

Compute the matrix-vector product y = A * x.

Parameters

[in]	A	Input matrix
[in]	x	Input vector

Returns: The vector A * x

Definition at line 1167 of file color_spinor.h.

◆ operator*() [13/18]

template<template< typename > class complex, typename T , int N>

__device__ __host__ Matrix<complex<T>,N> quda::operator*	(	const Matrix< complex< T >, N > &	a,
		const Matrix< complex< T >, N > &	b
	)

inline

Specialization of complex matrix multiplication that will issue optimal fma instructions.

Definition at line 523 of file quda_matrix.h.

◆ operator*() [14/18]

template<class T >

__device__ __host__ Matrix<T,2> quda::operator*	(	const Matrix< T, 2 > &	a,
		const Matrix< T, 2 > &	b
	)

inline

Definition at line 577 of file quda_matrix.h.

◆ operator*() [15/18]

template<class T , class U , int N>

__device__ __host__ Matrix<typename PromoteTypeId<T, U>::type, N> quda::operator*	(	const Matrix< T, N > &	a,
		const Matrix< U, N > &	b
	)

inline

Definition at line 557 of file quda_matrix.h.

◆ operator*() [16/18]

template<typename Float , int Nc, int Ns, typename S >

__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::operator*	(	const S &	a,
		const ColorSpinor< Float, Nc, Ns > &	x
	)

inline

Compute the scalar-vector product y = a * x.

Parameters

[in]	a	Input scalar
[in]	x	Input vector

Returns: The vector a * x

Definition at line 1145 of file color_spinor.h.

◆ operator*() [17/18]

template<template< typename, int > class Mat, class T , int N, class S >

__device__ __host__ Mat<T,N> quda::operator*	(	const S &	scalar,
		const Mat< T, N > &	a
	)

inline

Definition at line 471 of file quda_matrix.h.

◆ operator*() [18/18]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator*	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 914 of file complex_quda.h.

◆ operator*=() [1/8]

__host__ __device__ double2 quda::operator*=	(	double2 &	a,
		const double &	b
	)

inline

Definition at line 228 of file float_vector.h.

◆ operator*=() [2/8]

__host__ __device__ double2 quda::operator*=	(	double2 &	x,
		const float &	a
	)

inline

Definition at line 206 of file float_vector.h.

◆ operator*=() [3/8]

__host__ __device__ double4 quda::operator*=	(	double4 &	a,
		const double &	b
	)

inline

Definition at line 234 of file float_vector.h.

◆ operator*=() [4/8]

__host__ __device__ float2 quda::operator*=	(	float2 &	x,
		const float &	a
	)

inline

Definition at line 199 of file float_vector.h.

◆ operator*=() [5/8]

__host__ __device__ float4 quda::operator*=	(	float4 &	a,
		const float &	b
	)

inline

Definition at line 213 of file float_vector.h.

◆ operator*=() [6/8]

__host__ __device__ float8 quda::operator*=	(	float8 &	a,
		const float &	b
	)

inline

Definition at line 221 of file float_vector.h.

◆ operator*=() [7/8]

template<template< typename, int > class Mat, class T , int N, class S >

__device__ __host__ Mat<T,N> quda::operator*=	(	Mat< T, N > &	a,
		const S &	scalar
	)

inline

Definition at line 484 of file quda_matrix.h.

◆ operator*=() [8/8]

template<class T , int N>

__device__ __host__ Matrix<T,N> quda::operator*=	(	Matrix< T, N > &	a,
		const Matrix< T, N > &	b
	)

inline

Definition at line 547 of file quda_matrix.h.

◆ operator+() [1/15]

template<typename Float , int Nc, int Ns>

__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::operator+	(	const ColorSpinor< Float, Nc, Ns > &	x,
		const ColorSpinor< Float, Nc, Ns > &	y
	)

inline

ColorSpinor addition operator.

Parameters

[in]	x	Input vector
[in]	y	Input vector

Returns: The vector x + y

Definition at line 1101 of file color_spinor.h.

◆ operator+() [2/15]

constexpr CommKey quda::operator+	(	const CommKey &	lhs,
		const CommKey &	rhs
	)

inlineconstexpr

Definition at line 30 of file comm_key.h.

◆ operator+() [3/15]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator+	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 850 of file complex_quda.h.

◆ operator+() [4/15]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator+	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 866 of file complex_quda.h.

◆ operator+() [5/15]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator+ ( const complex< ValueType > & rhs )

inline

Definition at line 992 of file complex_quda.h.

◆ operator+() [6/15]

__host__ __device__ double2 quda::operator+	(	const double2 &	x,
		const double2 &	y
	)

inline

Definition at line 14 of file float_vector.h.

◆ operator+() [7/15]

__host__ __device__ double3 quda::operator+	(	const double3 &	x,
		const double3 &	y
	)

inline

Definition at line 42 of file float_vector.h.

◆ operator+() [8/15]

__host__ __device__ double4 quda::operator+	(	const double4 &	x,
		const double4 &	y
	)

inline

Definition at line 47 of file float_vector.h.

◆ operator+() [9/15]

__host__ __device__ float2 quda::operator+	(	const float2 &	x,
		const float2 &	y
	)

inline

Definition at line 96 of file float_vector.h.

◆ operator+() [10/15]

__host__ __device__ float4 quda::operator+	(	const float4 &	x,
		const float4 &	y
	)

inline

Definition at line 104 of file float_vector.h.

◆ operator+() [11/15]

__host__ __device__ float8 quda::operator+	(	const float8 &	x,
		const float8 &	y
	)

inline

Definition at line 114 of file float_vector.h.

◆ operator+() [12/15]

template<template< typename, int > class Mat, class T , int N>

__device__ __host__ Mat<T,N> quda::operator+	(	const Mat< T, N > &	a,
		const Mat< T, N > &	b
	)

inline

Definition at line 428 of file quda_matrix.h.

◆ operator+() [13/15]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator+	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 872 of file complex_quda.h.

◆ operator+() [14/15]

template<typename scalar , int n>

__device__ __host__ vector_type<scalar, n> quda::operator+	(	const vector_type< scalar, n > &	a,
		const vector_type< scalar, n > &	b
	)

inline

Definition at line 415 of file float_vector.h.

◆ operator+() [15/15]

template<typename ValueType >

__host__ __device__ complex<ValueType> quda::operator+	(	const volatile complex< ValueType > &	lhs,
		const volatile complex< ValueType > &	rhs
	)

inline

Definition at line 858 of file complex_quda.h.

◆ operator+=() [1/8]

__host__ __device__ double2 quda::operator+=	(	double2 &	x,
		const double2 &	y
	)

inline

Definition at line 145 of file float_vector.h.

◆ operator+=() [2/8]

__host__ __device__ double3 quda::operator+=	(	double3 &	x,
		const double3 &	y
	)

inline

Definition at line 152 of file float_vector.h.

◆ operator+=() [3/8]

__host__ __device__ double4 quda::operator+=	(	double4 &	x,
		const double4 &	y
	)

inline

Definition at line 160 of file float_vector.h.

◆ operator+=() [4/8]

__host__ __device__ float2 quda::operator+=	(	float2 &	x,
		const float2 &	y
	)

inline

Definition at line 131 of file float_vector.h.

◆ operator+=() [5/8]

__host__ __device__ float4 quda::operator+=	(	float4 &	x,
		const float4 &	y
	)

inline

Definition at line 122 of file float_vector.h.

◆ operator+=() [6/8]

__host__ __device__ float8 quda::operator+=	(	float8 &	x,
		const float8 &	y
	)

inline

Definition at line 138 of file float_vector.h.

◆ operator+=() [7/8]

template<template< typename, int > class Mat, class T , int N>

__device__ __host__ Mat<T,N> quda::operator+=	(	Mat< T, N > &	a,
		const Mat< T, N > &	b
	)

inline

Definition at line 438 of file quda_matrix.h.

◆ operator+=() [8/8]

template<template< typename, int > class Mat, class T , int N>

__device__ __host__ Mat<T,N> quda::operator+=	(	Mat< T, N > &	a,
		const T &	b
	)

inline

Definition at line 446 of file quda_matrix.h.

◆ operator-() [1/13]

template<typename Float , int Nc, int Ns>

__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::operator-	(	const ColorSpinor< Float, Nc, Ns > &	x,
		const ColorSpinor< Float, Nc, Ns > &	y
	)

inline

ColorSpinor subtraction operator.

Parameters

[in]	x	Input vector
[in]	y	Input vector

Returns: The vector x + y

Definition at line 1123 of file color_spinor.h.

◆ operator-() [2/13]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator-	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 879 of file complex_quda.h.

◆ operator-() [3/13]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator-	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 885 of file complex_quda.h.

◆ operator-() [4/13]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator- ( const complex< ValueType > & rhs )

inline

Definition at line 997 of file complex_quda.h.

◆ operator-() [5/13]

__host__ __device__ double2 quda::operator- ( const double2 & x )

inline

Definition at line 246 of file float_vector.h.

◆ operator-() [6/13]

__host__ __device__ double2 quda::operator-	(	const double2 &	x,
		const double2 &	y
	)

inline

Definition at line 19 of file float_vector.h.

◆ operator-() [7/13]

__host__ __device__ float2 quda::operator- ( const float2 & x )

inline

Definition at line 242 of file float_vector.h.

◆ operator-() [8/13]

__host__ __device__ float2 quda::operator-	(	const float2 &	x,
		const float2 &	y
	)

inline

Definition at line 24 of file float_vector.h.

◆ operator-() [9/13]

__host__ __device__ float4 quda::operator-	(	const float4 &	x,
		const float4 &	y
	)

inline

Definition at line 29 of file float_vector.h.

◆ operator-() [10/13]

__host__ __device__ float8 quda::operator-	(	const float8 &	x,
		const float8 &	y
	)

inline

Definition at line 34 of file float_vector.h.

◆ operator-() [11/13]

template<template< typename, int > class Mat, class T , int N>

__device__ __host__ Mat<T,N> quda::operator- ( const Mat< T, N > & a )

inline

Definition at line 490 of file quda_matrix.h.

◆ operator-() [12/13]

template<template< typename, int > class Mat, class T , int N>

__device__ __host__ Mat<T,N> quda::operator-	(	const Mat< T, N > &	a,
		const Mat< T, N > &	b
	)

inline

Definition at line 462 of file quda_matrix.h.

◆ operator-() [13/13]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator-	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 891 of file complex_quda.h.

◆ operator-=() [1/5]

__host__ __device__ double2 quda::operator-=	(	double2 &	x,
		const double2 &	y
	)

inline

Definition at line 192 of file float_vector.h.

◆ operator-=() [2/5]

__host__ __device__ float2 quda::operator-=	(	float2 &	x,
		const float2 &	y
	)

inline

Definition at line 178 of file float_vector.h.

◆ operator-=() [3/5]

__host__ __device__ float4 quda::operator-=	(	float4 &	x,
		const float4 &	y
	)

inline

Definition at line 169 of file float_vector.h.

◆ operator-=() [4/5]

__host__ __device__ float8 quda::operator-=	(	float8 &	x,
		const float8 &	y
	)

inline

Definition at line 185 of file float_vector.h.

◆ operator-=() [5/5]

template<template< typename, int > class Mat, class T , int N>

__device__ __host__ Mat<T,N> quda::operator-=	(	Mat< T, N > &	a,
		const Mat< T, N > &	b
	)

inline

Definition at line 454 of file quda_matrix.h.

◆ operator/() [1/8]

constexpr CommKey quda::operator/	(	const CommKey &	lhs,
		const CommKey &	rhs
	)

inlineconstexpr

Definition at line 44 of file comm_key.h.

◆ operator/() [2/8]

template<>

__host__ __device__ complex< double > quda::operator/	(	const complex< double > &	lhs,
		const complex< double > &	rhs
	)

inline

Definition at line 948 of file complex_quda.h.

◆ operator/() [3/8]

template<>

__host__ __device__ complex< float > quda::operator/	(	const complex< float > &	lhs,
		const complex< float > &	rhs
	)

inline

Definition at line 931 of file complex_quda.h.

◆ operator/() [4/8]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::operator/	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 922 of file complex_quda.h.

◆ operator/() [5/8]

template<typename ValueType >

__host__ __device__ complex<ValueType> quda::operator/	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 965 of file complex_quda.h.

◆ operator/() [6/8]

template<>

__host__ __device__ complex<double> quda::operator/	(	const double &	lhs,
		const complex< double > &	rhs
	)

inline

Definition at line 985 of file complex_quda.h.

◆ operator/() [7/8]

template<>

__host__ __device__ complex<float> quda::operator/	(	const float &	lhs,
		const complex< float > &	rhs
	)

inline

Definition at line 980 of file complex_quda.h.

◆ operator/() [8/8]

template<typename ValueType >

__host__ __device__ complex<ValueType> quda::operator/	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 972 of file complex_quda.h.

◆ operator<()

constexpr bool quda::operator<	(	const CommKey &	lhs,
		const CommKey &	rhs
	)

inlineconstexpr

Definition at line 58 of file comm_key.h.

◆ operator<<() [1/12]

template<typename ValueType , class charT , class traits >

std::basic_ostream< charT, traits > & quda::operator<<	(	std::basic_ostream< charT, traits > &	os,
		const complex< ValueType > &	z
	)

Definition at line 305 of file complex_quda.h.

◆ operator<<() [2/12]

template<class T , int N>

std::ostream& quda::operator<<	(	std::ostream &	os,
		const Array< T, N > &	a
	)

Definition at line 821 of file quda_matrix.h.

◆ operator<<() [3/12]

template<class T , int N>

std::ostream& quda::operator<<	(	std::ostream &	os,
		const Matrix< T, N > &	m
	)

Definition at line 807 of file quda_matrix.h.

◆ operator<<() [4/12]

std::ostream& quda::operator<<	(	std::ostream &	out,
		const ColorSpinorField &	a
	)

Definition at line 865 of file color_spinor_field.cpp.

◆ operator<<() [5/12]

std::ostream& quda::operator<<	(	std::ostream &	out,
		const cudaColorSpinorField &	a
	)

Definition at line 1199 of file cuda_color_spinor_field.cpp.

◆ operator<<() [6/12]

std::ostream & quda::operator<<	(	std::ostream &	output,
		const CloverFieldParam &	param
	)

Definition at line 441 of file clover_field.cpp.

◆ operator<<() [7/12]

std::ostream& quda::operator<<	(	std::ostream &	output,
		const double2 &	a
	)

inline

Definition at line 299 of file float_vector.h.

◆ operator<<() [8/12]

std::ostream& quda::operator<<	(	std::ostream &	output,
		const double3 &	a
	)

inline

Definition at line 305 of file float_vector.h.

◆ operator<<() [9/12]

std::ostream& quda::operator<<	(	std::ostream &	output,
		const double4 &	a
	)

inline

Definition at line 311 of file float_vector.h.

◆ operator<<() [10/12]

std::ostream & quda::operator<<	(	std::ostream &	output,
		const GaugeFieldParam &	param
	)

Definition at line 274 of file gauge_field.cpp.

◆ operator<<() [11/12]

std::ostream & quda::operator<<	(	std::ostream &	output,
		const LatticeFieldParam &	param
	)

Definition at line 727 of file lattice_field.cpp.

◆ operator<<() [12/12]

template<typename T , int n>

std::ostream& quda::operator<<	(	std::ostream &	output,
		const vector_type< T, n > &	a
	)

Definition at line 400 of file float_vector.h.

◆ operator==() [1/3]

template<typename ValueType >

__host__ __device__ bool quda::operator==	(	const complex< ValueType > &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 1004 of file complex_quda.h.

◆ operator==() [2/3]

template<typename ValueType >

__host__ __device__ bool quda::operator==	(	const complex< ValueType > &	lhs,
		const ValueType &	rhs
	)

inline

Definition at line 1021 of file complex_quda.h.

◆ operator==() [3/3]

template<typename ValueType >

__host__ __device__ bool quda::operator==	(	const ValueType &	lhs,
		const complex< ValueType > &	rhs
	)

inline

Definition at line 1013 of file complex_quda.h.

◆ operator>()

constexpr bool quda::operator>	(	const CommKey &	lhs,
		const CommKey &	rhs
	)

inlineconstexpr

Definition at line 66 of file comm_key.h.

◆ operator>>()

template<typename ValueType , typename charT , class traits >

std::basic_istream< charT, traits > & quda::operator>>	(	std::basic_istream< charT, traits > &	is,
		complex< ValueType > &	z
	)

Definition at line 318 of file complex_quda.h.

◆ Order_() [1/2]

QudaFieldOrder quda::Order_	(	const char *	func,
		const char *	file,
		int	line,
		const ColorSpinorField &	a,
		const ColorSpinorField &	b
	)

inline

Helper function for determining if the order of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field

Returns: If order is unique return the order

Definition at line 1174 of file color_spinor_field.h.

◆ Order_() [2/2]

template<typename... Args>

QudaFieldOrder quda::Order_	(	const char *	func,
		const char *	file,
		int	line,
		const ColorSpinorField &	a,
		const ColorSpinorField &	b,
		const Args &...	args
	)

inline

Helper function for determining if the order of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field
[in]	args	List of additional fields to check order on

Returns: If order is unique return the order

Definition at line 1193 of file color_spinor_field.h.

◆ orthoDir()

void quda::orthoDir	(	Complex **	beta,
		std::vector< ColorSpinorField * >	Ap,
		int	k,
		int	pipeline
	)

Definition at line 96 of file inv_gcr_quda.cpp.

◆ outerProdSpinTrace()

template<typename Float , int Nc, int Ns>

__device__ __host__ Matrix<complex<Float>, Nc> quda::outerProdSpinTrace	(	const ColorSpinor< Float, Nc, Ns > &	a,
		const ColorSpinor< Float, Nc, Ns > &	b
	)

inline

Compute the outer product over color and take the spin trace out(j,i) = \sum_s a(s,j) * conj (b(s,i))

Parameters

a	Left-hand side ColorSpinor
b	Right-hand side ColorSpinor

Returns: The spin traced matrix

Definition at line 1035 of file color_spinor.h.

◆ outerProduct()

template<typename Float , int Nc>

__device__ __host__ Matrix<complex<Float>, Nc> quda::outerProduct	(	const ColorSpinor< Float, Nc, 1 > &	a,
		const ColorSpinor< Float, Nc, 1 > &	b
	)

inline

Compute the outer product over color and take the spin trace out(j,i) = \sum_s a(s,j) * conj (b(s,i))

Parameters

a	Left-hand side ColorSpinor
b	Right-hand side ColorSpinor

Returns: The spin traced matrix

Definition at line 1073 of file color_spinor.h.

◆ OvrImpSTOUTStep()

void quda::OvrImpSTOUTStep	(	GaugeField &	dataDs,
		GaugeField &	dataOr,
		double	rho,
		double	epsilon
	)

Apply Over Improved STOUT smearing to the gauge field.

Parameters

[out]	dataDs	Output smeared field
[in]	dataOr	Input gauge field
[in]	rho	smearing parameter
[in]	epsilon	smearing parameter

◆ PackGhost()

void quda::PackGhost	(	void *	ghost[2 *QUDA_MAX_DIM],
		const ColorSpinorField &	field,
		MemoryLocation	location,
		int	nFace,
		bool	dagger,
		int	parity,
		bool	spin_project,
		double	a,
		double	b,
		double	c,
		int	shmem,
		const qudaStream_t &	stream
	)

Dslash face packing routine.

Parameters

[out]	ghost_buf	Array of packed halos, order is [2*dim+dir]
[in]	field	ColorSpinorField to be packed
[in]	location	Locations where the packed fields are (Device, Host and/or Remote)
[in]	nFace	Depth of halo
[in]	dagger	Whether this is for the dagger operator
[in]	parity	Field parity
[in]	spin_project	Whether to spin_project when packing
[in]	a	Twisted mass scale factor (for preconditioned twisted-mass dagger operator)
[in]	b	Twisted mass chiral twist factor (for preconditioned twisted-mass dagger operator)
[in]	c	Twisted mass flavor twist factor (for preconditioned non degenerate twisted-mass dagger operator)
[in]	stream	Which stream are we executing in

◆ PCType_() [1/2]

QudaPCType quda::PCType_	(	const char *	func,
		const char *	file,
		int	line,
		const ColorSpinorField &	a,
		const ColorSpinorField &	b
	)

inline

Helper function for determining if the preconditioning type of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field

Returns: If PCType is unique return this

Definition at line 1141 of file color_spinor_field.h.

◆ PCType_() [2/2]

template<typename... Args>

QudaPCType quda::PCType_	(	const char *	func,
		const char *	file,
		int	line,
		const ColorSpinorField &	a,
		const ColorSpinorField &	b,
		const Args &...	args
	)

inline

Helper function for determining if the precision of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field
[in]	args	List of additional fields to check precision on

Returns: If precision is unique return the precision

Definition at line 1160 of file color_spinor_field.h.

◆ PGaugeExchange()

void quda::PGaugeExchange	(	GaugeField &	data,
		const int	n_dim,
		const int	parity
	)

Exchange "borders" between nodes. Although the radius border is 2, it only updates the interior radius border, i.e., at 1 and X[d-2] where X[d] already includes the Radius border, and don't update at 0 and X[d-1] faces.

Parameters

[in,out]	data	Gauge field
[in]	n_dim	Number of dimensions to exchange
[in]	parity	Field parity

◆ PGaugeExchangeFree()

void quda::PGaugeExchangeFree ( )

Release all allocated memory used to exchange data between nodes.

◆ pinned_allocated()

size_t quda::pinned_allocated ( )

Returns: pinned memory allocated

Definition at line 71 of file malloc.cpp.

◆ pinned_allocated_peak()

long quda::pinned_allocated_peak ( )

Returns: peak pinned memory allocated

Definition at line 81 of file malloc.cpp.

◆ pinned_malloc_()

void * quda::pinned_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Allocate page-locked ("pinned") host memory. This function should only be called via the pinned_malloc() macro, defined in malloc_quda.h

Note that we do not rely on cudaHostAlloc(), since buffers allocated in this way have been observed to cause problems when shared with MPI via GPU Direct on some systems.

Definition at line 303 of file malloc.cpp.

◆ plaquette()

double3 quda::plaquette ( const GaugeField & U )

Compute the plaquette of the gauge field.

Parameters

[in] U The gauge field upon which to compute the plaquette

Returns: double3 variable returning (plaquette, spatial plaquette, temporal plaquette) site averages normalized such that each plaquette is in the range [0,1]

◆ polar() [1/3]

template<>

__host__ __device__ complex<double> quda::polar	(	const double &	magnitude,
		const double &	angle
	)

inline

Definition at line 1106 of file complex_quda.h.

◆ polar() [2/3]

template<>

__host__ __device__ complex<float> quda::polar	(	const float &	magnitude,
		const float &	angle
	)

inline

Definition at line 1100 of file complex_quda.h.

◆ polar() [3/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::polar	(	const ValueType &	m,
		const ValueType &	theta = `0`
	)

inline

Returns the complex with magnitude m and angle theta in radians.

Definition at line 1094 of file complex_quda.h.

◆ policyTuning()

bool quda::policyTuning ( )

Query whether we are currently tuning a policy.

Definition at line 512 of file tune.cpp.

◆ popKernelPackT()

void quda::popKernelPackT ( )

◆ postTrace_()

void quda::postTrace_	(	const char *	func,
		const char *	file,
		int	line
	)

Post an event in the trace, recording where it was posted.

Definition at line 106 of file tune.cpp.

◆ pow() [1/6]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::pow	(	const complex< ValueType > &	z,
		const complex< ValueType > &	z2
	)

inline

Definition at line 1186 of file complex_quda.h.

◆ pow() [2/6]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::pow	(	const complex< ValueType > &	z,
		const int &	n
	)

inline

Definition at line 1204 of file complex_quda.h.

◆ pow() [3/6]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::pow	(	const complex< ValueType > &	z,
		const ValueType &	x
	)

inline

Definition at line 1180 of file complex_quda.h.

◆ pow() [4/6]

template<>

__host__ __device__ complex<float> quda::pow	(	const float &	x,
		const complex< float > &	exponent
	)

inline

Definition at line 1198 of file complex_quda.h.

◆ pow() [5/6]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::pow	(	const ValueType &	x,
		const complex< ValueType > &	z
	)

inline

Definition at line 1192 of file complex_quda.h.

◆ pow() [6/6]

template<typename ValueType , typename ExponentType >

__host__ __device__ ValueType quda::pow	(	ValueType	x,
		ExponentType	e
	)

inline

Definition at line 111 of file complex_quda.h.

◆ Precision_() [1/2]

QudaPrecision quda::Precision_	(	const char *	func,
		const char *	file,
		int	line,
		const LatticeField &	a,
		const LatticeField &	b
	)

inline

Helper function for determining if the precision of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field

Returns: If precision is unique return the precision

Definition at line 768 of file lattice_field.h.

◆ Precision_() [2/2]

template<typename... Args>

QudaPrecision quda::Precision_	(	const char *	func,
		const char *	file,
		int	line,
		const LatticeField &	a,
		const LatticeField &	b,
		const Args &...	args
	)

inline

Helper function for determining if the precision of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field
[in]	args	List of additional fields to check precision on

Returns: If precision is unique return the precision

Definition at line 785 of file lattice_field.h.

◆ print()

void quda::print	(	const double	d[],
		int	n
	)

Definition at line 44 of file inv_mpcg_quda.cpp.

◆ printAPIProfile()

void quda::printAPIProfile ( )

Print out the timer profile for CUDA API calls.

Definition at line 495 of file quda_api.cpp.

◆ printLaunchTimer()

void quda::printLaunchTimer ( )

Definition at line 880 of file tune.cpp.

◆ printLink()

template<class Cmplx >

__host__ __device__ void quda::printLink ( const Matrix< Cmplx, 3 > & link )

inline

Definition at line 947 of file quda_matrix.h.

◆ printPeakMemUsage()

void quda::printPeakMemUsage ( )

Definition at line 539 of file malloc.cpp.

◆ product()

constexpr int quda::product ( const CommKey & input )

inlineconstexpr

Definition at line 28 of file comm_key.h.

◆ projectSU3()

void quda::projectSU3	(	GaugeField &	U,
		double	tol,
		int *	fails
	)

Project the input gauge field onto the SU(3) group. This is a destructive operation. The number of link failures is reported so appropriate action can be taken.

Parameters

U	Gauge field that we are projecting onto SU(3)
tol	Tolerance to which the iterative algorithm works
fails	Number of link failures (device pointer)

◆ Prolongate()

void quda::Prolongate	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const ColorSpinorField &	v,
		int	Nvec,
		const int *	fine_to_coarse,
		const int const	spin_map,
		int	parity = `QUDA_INVALID_PARITY`
	)

Apply the prolongation operator.

Parameters

[out]	out	Resulting fine grid field
[in]	in	Input field on coarse grid
[in]	v	Matrix field containing the null-space components
[in]	Nvec	Number of null-space components
[in]	fine_to_coarse	Fine-to-coarse lookup table (linear indices)
[in]	spin_map	Spin blocking lookup table
[in]	parity	of the output fine field (if single parity output field)

◆ pushKernelPackT()

void quda::pushKernelPackT ( bool pack )

◆ qudaDeviceSynchronize_()

void quda::qudaDeviceSynchronize_	(	const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize with built-in error checking.

Definition at line 464 of file quda_api.cpp.

◆ qudaEventQuery_()

bool quda::qudaEventQuery_	(	cudaEvent_t &	event,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaEventQuery or cuEventQuery with built-in error checking.

Parameters

[in] event Event we are querying

Returns: true if event has been reached

Definition at line 378 of file quda_api.cpp.

◆ qudaEventRecord_()

void quda::qudaEventRecord_	(	cudaEvent_t &	event,
		qudaStream_t	stream,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaEventRecord or cuEventRecord with built-in error checking.

Parameters

[in,out]	event	Event we are recording
[in,out]	stream	Stream where to record the event

Definition at line 402 of file quda_api.cpp.

◆ qudaEventSynchronize_()

void quda::qudaEventSynchronize_	(	cudaEvent_t &	event,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaEventSynchronize or cuEventSynchronize with built-in error checking.

Parameters

[in] event Event which we are synchronizing with respect to

Definition at line 433 of file quda_api.cpp.

◆ qudaFuncGetAttributes_()

void quda::qudaFuncGetAttributes_	(	cudaFuncAttributes &	attr,
		const void *	kernel,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaFuncGetAttributes with built-in error checking.

Parameters

[in]	attr	the cudaFuncGetAttributes object to store the output
[in]	kernel	Kernel function for which we are setting the attribute

Definition at line 487 of file quda_api.cpp.

◆ qudaFuncSetAttribute_()

void quda::qudaFuncSetAttribute_	(	const void *	kernel,
		cudaFuncAttribute	attr,
		int	value,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaFuncSetAttribute with built-in error checking.

Parameters

[in]	kernel	Kernel function for which we are setting the attribute
[in]	attr	Attribute to set
[in]	value	Value to set

Definition at line 479 of file quda_api.cpp.

◆ qudaLaunchKernel() [1/2]

qudaError_t quda::qudaLaunchKernel	(	const void *	func,
		const TuneParam &	tp,
		void **	args,
		qudaStream_t	stream
	)

Wrapper around cudaLaunchKernel.

Parameters

[in]	func	Device function symbol
[in]	tp	TuneParam containing the launch parameters
[in]	args	Arguments
[in]	stream	Stream identifier

Definition at line 57 of file quda_api.cpp.

◆ qudaLaunchKernel() [2/2]

template<typename T , typename... Arg>

qudaError_t quda::qudaLaunchKernel	(	T *	func,
		const TuneParam &	tp,
		qudaStream_t	stream,
		const Arg &...	arg
	)

Templated wrapper around qudaLaunchKernel which can accept a templated kernel, and expects a kernel with a single Arg argument.

Parameters

[in]	func	Device function symbol
[in]	tp	TuneParam containing the launch parameters
[in]	args	Arguments
[in]	stream	Stream identifier

Definition at line 43 of file quda_api.h.

◆ qudaMemcpy2D_()

void quda::qudaMemcpy2D_	(	void *	dst,
		size_t	dpitch,
		const void *	src,
		size_t	spitch,
		size_t	width,
		size_t	height,
		cudaMemcpyKind	kind,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaMemcpy2DAsync or driver API equivalent.

Parameters

[out]	dst	Destination pointer
[in]	dpitch	Destination pitch in bytes
[in]	src	Source pointer
[in]	spitch	Source pitch in bytes
[in]	width	Width in bytes
[in]	height	Number of rows
[in]	kind	Type of memory copy

Definition at line 272 of file quda_api.cpp.

◆ qudaMemcpy2DAsync_()

void quda::qudaMemcpy2DAsync_	(	void *	dst,
		size_t	dpitch,
		const void *	src,
		size_t	spitch,
		size_t	width,
		size_t	height,
		cudaMemcpyKind	kind,
		const qudaStream_t &	stream,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaMemcpy2DAsync or driver API equivalent.

Parameters

[out]	dst	Destination pointer
[in]	dpitch	Destination pitch in bytes
[in]	src	Source pointer
[in]	spitch	Source pitch in bytes
[in]	width	Width in bytes
[in]	height	Number of rows
[in]	kind	Type of memory copy
[in]	stream	Stream to issue copy

Definition at line 301 of file quda_api.cpp.

◆ qudaMemcpy_()

void quda::qudaMemcpy_	(	void *	dst,
		const void *	src,
		size_t	count,
		cudaMemcpyKind	kind,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaMemcpy or driver API equivalent.

Parameters

[out]	dst	Destination pointer
[in]	src	Source pointer
[in]	count	Size of transfer
[in]	kind	Type of memory copy

Definition at line 232 of file quda_api.cpp.

◆ qudaMemcpyAsync_()

void quda::qudaMemcpyAsync_	(	void *	dst,
		const void *	src,
		size_t	count,
		cudaMemcpyKind	kind,
		const qudaStream_t &	stream,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaMemcpyAsync or driver API equivalent.

Parameters

[out]	dst	Destination pointer
[in]	src	Source pointer
[in]	count	Size of transfer
[in]	kind	Type of memory copy
[in]	stream	Stream to issue copy

Definition at line 241 of file quda_api.cpp.

◆ qudaMemPrefetchAsync_()

void quda::qudaMemPrefetchAsync_	(	void *	ptr,
		size_t	count,
		QudaFieldLocation	mem_space,
		const qudaStream_t &	stream,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaMemPrefetchAsync or driver API equivalent.

Parameters

[out]	ptr	Starting address pointer to be prefetched
[in]	count	Size in bytes to prefetch
[in]	mem_space	Memory space to prefetch to
[in]	stream	Stream to issue prefetch

Definition at line 363 of file quda_api.cpp.

◆ qudaMemset2D_()

void quda::qudaMemset2D_	(	void *	ptr,
		size_t	pitch,
		int	value,
		size_t	width,
		size_t	height,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaMemset2D or driver API equivalent.

Parameters

[out]	ptr	Starting address pointer
[in]	Pitch	in bytes
[in]	value	Value to set for each byte of specified memory
[in]	width	Width in bytes
[in]	height	Height in bytes

Definition at line 349 of file quda_api.cpp.

◆ qudaMemset2DAsync_()

void quda::qudaMemset2DAsync_	(	void *	ptr,
		size_t	pitch,
		int	value,
		size_t	width,
		size_t	height,
		const qudaStream_t &	stream,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaMemsetAsync or driver API equivalent.

Parameters

[out]	ptr	Starting address pointer
[in]	Pitch	in bytes
[in]	value	Value to set for each byte of specified memory
[in]	width	Width in bytes
[in]	height	Height in bytes
[in]	stream	Stream to issue memset

Definition at line 356 of file quda_api.cpp.

◆ qudaMemset_()

void quda::qudaMemset_	(	void *	ptr,
		int	value,
		size_t	count,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaMemset or driver API equivalent.

Parameters

[out]	ptr	Starting address pointer
[in]	value	Value to set for each byte of specified memory
[in]	count	Size in bytes to set

Definition at line 331 of file quda_api.cpp.

◆ qudaMemsetAsync_()

void quda::qudaMemsetAsync_	(	void *	ptr,
		int	value,
		size_t	count,
		const qudaStream_t &	stream,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaMemsetAsync or driver API equivalent.

Parameters

[out]	ptr	Starting address pointer
[in]	value	Value to set for each byte of specified memory
[in]	count	Size in bytes to set
[in]	stream	Stream to issue memset

Definition at line 340 of file quda_api.cpp.

◆ qudaStreamSynchronize_()

void quda::qudaStreamSynchronize_	(	qudaStream_t &	stream,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaStreamSynchronize or cuStreamSynchronize with built-in error checking.

Parameters

[in] stream Stream which we are synchronizing

Definition at line 448 of file quda_api.cpp.

◆ qudaStreamWaitEvent_()

void quda::qudaStreamWaitEvent_	(	qudaStream_t	stream,
		cudaEvent_t	event,
		unsigned int	flags,
		const char *	func,
		const char *	file,
		const char *	line
	)

Wrapper around cudaStreamWaitEvent or cuStreamWaitEvent with built-in error checking.

Parameters

[in,out]	stream	Stream which we are instructing to wait
[in]	event	Event we are waiting on
[in]	flags	Flags to pass to function

Definition at line 417 of file quda_api.cpp.

◆ r_slant()

constexpr const char* quda::r_slant ( const char * str )

inlineconstexpr

Definition at line 82 of file malloc_quda.h.

◆ Random() [1/2]

template<class Real >

__device__ Real quda::Random ( cuRNGState & state )

inline

Return a random number between 0 and 1.

Parameters

state curand rng state

Returns: random number in range 0,1

Definition at line 96 of file random_quda.h.

◆ Random() [2/2]

template<class Real >

__device__ Real quda::Random	(	cuRNGState &	state,
		Real	a,
		Real	b
	)

inline

Return a random number between a and b.

Parameters

state	curand rng state
a	lower range
b	upper range

Returns: random number in range a,b

Definition at line 75 of file random_quda.h.

◆ Random< double >() [1/2]

template<>

__device__ double quda::Random< double > ( cuRNGState & state )

inline

Definition at line 107 of file random_quda.h.

◆ Random< double >() [2/2]

template<>

__device__ double quda::Random< double >	(	cuRNGState &	state,
		double	a,
		double	b
	)

inline

Definition at line 86 of file random_quda.h.

◆ Random< float >() [1/2]

template<>

__device__ float quda::Random< float > ( cuRNGState & state )

inline

Definition at line 102 of file random_quda.h.

◆ Random< float >() [2/2]

template<>

__device__ float quda::Random< float >	(	cuRNGState &	state,
		float	a,
		float	b
	)

inline

Definition at line 81 of file random_quda.h.

◆ Reconstruct_() [1/2]

QudaReconstructType quda::Reconstruct_	(	const char *	func,
		const char *	file,
		int	line,
		const GaugeField &	a,
		const GaugeField &	b
	)

inline

Helper function for determining if the reconstruct of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field

Returns: If reconstruct is unique return the reconstruct

Definition at line 850 of file gauge_field.h.

◆ Reconstruct_() [2/2]

template<typename... Args>

QudaReconstructType quda::Reconstruct_	(	const char *	func,
		const char *	file,
		int	line,
		const GaugeField &	a,
		const GaugeField &	b,
		const Args &...	args
	)

inline

Helper function for determining if the reconstruct of the fields is the same.

Parameters

[in]	a	Input field
[in]	b	Input field
[in]	args	List of additional fields to check reconstruct on

Returns: If reconstruct is unique return the reconstrict

Definition at line 869 of file gauge_field.h.

◆ reduce() [1/2]

template<typename reduce_t , typename T , typename I , typename reducer >

reduce_t quda::reduce	(	QudaFieldLocation	location,
		const T *	v,
		I	n_items,
		reduce_t	init,
		reducer	r
	)

QUDA implementation providing thrust::reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory.

Parameters

[in]	location	Location where the reduction will take place
[out]	result	Result
[in]	v	Input vector
[in]	n_items	Number of elements to be reduced
[in]	init	Result is initialized to this value
[in]	reducer	Functor that applies the reduction to each transformed element

Definition at line 240 of file transform_reduce.h.

◆ reduce() [2/2]

template<typename reduce_t , typename T , typename I , typename transformer , typename reducer >

void quda::reduce	(	QudaFieldLocation	location,
		std::vector< reduce_t > &	result,
		const std::vector< T * > &	v,
		I	n_items,
		reduce_t	init,
		reducer	r
	)

QUDA implementation providing thrust::reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory, and is a batched implementation.

Parameters

[in]	location	Location where the reduction will take place
[out]	result	Result
[in]	v	Input vector
[in]	n_items	Number of elements to be reduced
[in]	init	The results are initialized to this value
[in]	reducer	Functor that applies the reduction to each transformed element

Definition at line 221 of file transform_reduce.h.

◆ reliable()

int quda::reliable	(	double &	rNorm,
		double &	maxrx,
		double &	maxrr,
		const double &	r2,
		const double &	delta
	)

Definition at line 39 of file inv_bicgstab_quda.cpp.

◆ reorder_location()

QudaFieldLocation quda::reorder_location ( )

Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION.

Returns: Reorder location

Definition at line 748 of file lattice_field.cpp.

◆ reorder_location_set()

void quda::reorder_location_set ( QudaFieldLocation reorder_location_ )

Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION.

Parameters

reorder_location_ The location to set where data will be reordered

Definition at line 749 of file lattice_field.cpp.

◆ Restrict()

void quda::Restrict	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const ColorSpinorField &	v,
		int	Nvec,
		const int *	fine_to_coarse,
		const int *	coarse_to_fine,
		const int const	spin_map,
		int	parity = `QUDA_INVALID_PARITY`
	)

Apply the restriction operator.

Parameters

[out]	out	Resulting coarsened field
[in]	in	Input field on fine grid
[in]	v	Matrix field containing the null-space components
[in]	Nvec	Number of null-space components
[in]	fine_to_coarse	Fine-to-coarse lookup table (linear indices)
[in]	spin_map	Spin blocking lookup table
[in]	parity	of the input fine field (if single parity input field)

◆ safe_malloc_()

void * quda::safe_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Perform a standard malloc() with error-checking. This function should only be called via the safe_malloc() macro, defined in malloc_quda.h

Definition at line 280 of file malloc.cpp.

◆ saveProfile()

void quda::saveProfile ( const std::string label = "" )

Save profile to disk.

Definition at line 532 of file tune.cpp.

◆ saveTuneCache()

void quda::saveTuneCache ( bool error )

Write tunecache to disk.

Definition at line 439 of file tune.cpp.

◆ setDiracEigParam()

void quda::setDiracEigParam	(	DiracParam &	diracParam,
		QudaInvertParam *	inv_param,
		const bool	pc,
		bool	comms
	)

Definition at line 1758 of file interface_quda.cpp.

◆ setDiracParam()

void quda::setDiracParam	(	DiracParam &	diracParam,
		QudaInvertParam *	inv_param,
		bool	pc
	)

Definition at line 1570 of file interface_quda.cpp.

◆ setDiracPreParam()

void quda::setDiracPreParam	(	DiracParam &	diracParam,
		QudaInvertParam *	inv_param,
		const bool	pc,
		bool	comms
	)

Definition at line 1726 of file interface_quda.cpp.

◆ setDiracRefineParam()

void quda::setDiracRefineParam	(	DiracParam &	diracParam,
		QudaInvertParam *	inv_param,
		const bool	pc
	)

Definition at line 1707 of file interface_quda.cpp.

◆ setDiracSloppyParam()

void quda::setDiracSloppyParam	(	DiracParam &	diracParam,
		QudaInvertParam *	inv_param,
		bool	pc
	)

Definition at line 1689 of file interface_quda.cpp.

◆ setIdentity() [1/3]

template<int N>

__device__ __host__ void quda::setIdentity ( Matrix< double2, N > * m )

inline

Definition at line 677 of file quda_matrix.h.

◆ setIdentity() [2/3]

template<int N>

__device__ __host__ void quda::setIdentity ( Matrix< float2, N > * m )

inline

Definition at line 662 of file quda_matrix.h.

◆ setIdentity() [3/3]

template<class T , int N>

__device__ __host__ void quda::setIdentity ( Matrix< T, N > * m )

inline

Definition at line 647 of file quda_matrix.h.

◆ setKernelPackT()

void quda::setKernelPackT ( bool pack )

Parameters

pack	Sets whether to use a kernel to pack the T dimension

◆ setPackComms()

void quda::setPackComms ( const int * dim_pack )

Helper function that sets which dimensions the packing kernel should be packing for.

Parameters

[in] dim_pack Array that specifies which dimenstions need to be packed.

◆ setPolicyTuning()

void quda::setPolicyTuning ( bool policy_tuning_ )

Enable / disable whether are tuning a policy.

Definition at line 514 of file tune.cpp.

◆ setTransferGPU()

void quda::setTransferGPU ( bool )

◆ setUberTuning()

void quda::setUberTuning ( bool uber_tuning_ )

Enable / disable whether we are tuning an uber kernel.

Definition at line 519 of file tune.cpp.

◆ setUnitarizeLinksConstants()

void quda::setUnitarizeLinksConstants	(	double	unitarize_eps,
		double	max_error,
		bool	allow_svd,
		bool	svd_only,
		double	svd_rel_error,
		double	svd_abs_error
	)

◆ setZero() [1/3]

template<int N>

__device__ __host__ void quda::setZero ( Matrix< double2, N > * m )

inline

Definition at line 721 of file quda_matrix.h.

◆ setZero() [2/3]

template<int N>

__device__ __host__ void quda::setZero ( Matrix< float2, N > * m )

inline

Definition at line 707 of file quda_matrix.h.

◆ setZero() [3/3]

template<class T , int N>

__device__ __host__ void quda::setZero ( Matrix< T, N > * m )

inline

Definition at line 693 of file quda_matrix.h.

◆ sin() [1/3]

template<>

__host__ __device__ complex<float> quda::sin ( const complex< float > & z )

inline

Definition at line 1218 of file complex_quda.h.

◆ sin() [2/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::sin ( const complex< ValueType > & z )

inline

Definition at line 1210 of file complex_quda.h.

◆ sin() [3/3]

template<typename ValueType >

__host__ __device__ ValueType quda::sin ( ValueType x )

inline

Definition at line 51 of file complex_quda.h.

◆ sinh() [1/3]

template<>

__host__ __device__ complex<float> quda::sinh ( const complex< float > & z )

inline

Definition at line 1234 of file complex_quda.h.

◆ sinh() [2/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::sinh ( const complex< ValueType > & z )

inline

Definition at line 1226 of file complex_quda.h.

◆ sinh() [3/3]

template<typename ValueType >

__host__ __device__ ValueType quda::sinh ( ValueType x )

inline

Definition at line 86 of file complex_quda.h.

◆ spinorNoise() [1/2]

void quda::spinorNoise	(	ColorSpinorField &	src,
		RNG &	randstates,
		QudaNoiseType	type
	)

Generate a random noise spinor. This variant allows the user to manage the RNG state.

Parameters

src	The colorspinorfield
randstates	Random state
type	The type of noise to create (QUDA_NOISE_GAUSSIAN or QUDA_NOISE_UNIFORM)

◆ spinorNoise() [2/2]

void quda::spinorNoise	(	ColorSpinorField &	src,
		unsigned long long	seed,
		QudaNoiseType	type
	)

Generate a random noise spinor. This variant just requires a seed and will create and destroy the random number state.

Parameters

src	The colorspinorfield
seed	Seed
type	The type of noise to create (QUDA_NOISE_GAUSSIAN or QUDA_NOISE_UNIFORM)

◆ split_field()

template<class Field >

void quda::split_field	(	Field &	collect_field,
		std::vector< Field * > &	v_base_field,
		const CommKey &	comm_key,
		QudaPCType	pc_type = `QUDA_4D_PC`
	)

inline

The term partition in the variable names and comments can mean two things:

The processor grid (with dimension comm_grid_dim) is divided into (sub)partitions.
For the collecting field, on each processor it contains several partitions, each partition is a copy of the base field. The term partition_dim means the number of partitions in each direction, and (unsurprisingly) partition_dim is the same for the above two meanings, i.e. if I divide the overall processor grid by 3 in one direction, the collect field will be 3 times fatter compared to the base field, in that direction.

In this file the term *_dim and *_idx are all arrays of 4 int's - one can simplify them as 1d-int to understand things and the extension to 4d is trivial.

Definition at line 17 of file split_grid.h.

◆ sqrt() [1/3]

template<>

__host__ __device__ complex<float> quda::sqrt ( const complex< float > & z )

inline

Definition at line 1248 of file complex_quda.h.

◆ sqrt() [2/3]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::sqrt ( const complex< ValueType > & z )

inline

Definition at line 1242 of file complex_quda.h.

◆ sqrt() [3/3]

template<typename ValueType >

__host__ __device__ ValueType quda::sqrt ( ValueType x )

inline

Definition at line 120 of file complex_quda.h.

◆ StaggeredCoarseOp()

void quda::StaggeredCoarseOp	(	GaugeField &	Y,
		GaugeField &	X,
		const Transfer &	T,
		const cudaGaugeField &	gauge,
		const cudaGaugeField *	XinvKD,
		double	mass,
		QudaDiracType	dirac,
		QudaMatPCType	matpc
	)

Coarse operator construction from a fine-grid operator (Staggered)

Parameters

Y[out]	Coarse link field
X[out]	Coarse clover field
T[in]	Transfer operator that defines the coarse space
gauge[in]	Gauge field from fine grid, needs to be generalized for long link.
XinvKD[in]	Inverse Kahler-Dirac block
mass[in]	Mass parameter
matpc[in]	The type of even-odd preconditioned fine-grid operator we are constructing the coarse grid operator from. For staggered, should always be QUDA_MATPC_INVALID.

◆ StaggeredProlongate()

void quda::StaggeredProlongate	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const int *	fine_to_coarse,
		const int const	spin_map,
		int	parity = `QUDA_INVALID_PARITY`
	)

Apply the unitary "prolongation" operator for Kahler-Dirac preconditioning.

Parameters

[out]	out	Resulting fine grid field
[in]	in	Input field on coarse grid
[in]	fine_to_coarse	Fine-to-coarse lookup table (linear indices)
[in]	spin_map	Spin blocking lookup table
[in]	parity	of the output fine field (if single parity output field)

◆ StaggeredRestrict()

void quda::StaggeredRestrict	(	ColorSpinorField &	out,
		const ColorSpinorField &	in,
		const int *	fine_to_coarse,
		const int const	spin_map,
		int	parity = `QUDA_INVALID_PARITY`
	)

Apply the unitary "restriction" operator for Kahler-Dirac preconditioning.

Parameters

[out]	out	Resulting coarse grid field
[in]	in	Input field on fine grid
[in]	fine_to_coarse	Fine-to-coarse lookup table (linear indices)
[in]	spin_map	Spin blocking lookup table
[in]	parity	of the output fine field (if single parity output field)

◆ store_streaming_double2()

__device__ void quda::store_streaming_double2	(	double2 *	addr,
		double	x,
		double	y
	)

inline

Definition at line 88 of file inline_ptx.h.

◆ store_streaming_float2()

__device__ void quda::store_streaming_float2	(	float2 *	addr,
		float	x,
		float	y
	)

inline

Definition at line 93 of file inline_ptx.h.

◆ store_streaming_float4()

__device__ void quda::store_streaming_float4	(	float4 *	addr,
		float	x,
		float	y,
		float	z,
		float	w
	)

inline

Definition at line 78 of file inline_ptx.h.

◆ store_streaming_short2()

__device__ void quda::store_streaming_short2	(	short2 *	addr,
		short	x,
		short	y
	)

inline

Definition at line 98 of file inline_ptx.h.

◆ store_streaming_short4()

__device__ void quda::store_streaming_short4	(	short4 *	addr,
		short	x,
		short	y,
		short	z,
		short	w
	)

inline

Definition at line 83 of file inline_ptx.h.

◆ STOUTStep()

void quda::STOUTStep	(	GaugeField &	dataDs,
		GaugeField &	dataOr,
		double	rho
	)

Apply STOUT smearing to the gauge field.

Parameters

[out]	dataDs	Output smeared field
[in]	dataOr	Input gauge field
[in]	rho	smearing parameter

◆ str_end()

constexpr const char* quda::str_end ( const char * str )

inlineconstexpr

Definition at line 80 of file malloc_quda.h.

◆ str_slant()

constexpr bool quda::str_slant ( const char * str )

inlineconstexpr

Definition at line 81 of file malloc_quda.h.

◆ SubTraceUnit()

template<class T >

__device__ __host__ void quda::SubTraceUnit ( Matrix< T, 3 > & a )

inline

Definition at line 925 of file quda_matrix.h.

◆ tan() [1/2]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::tan ( const complex< ValueType > & z )

inline

Definition at line 1254 of file complex_quda.h.

◆ tan() [2/2]

template<typename ValueType >

__host__ __device__ ValueType quda::tan ( ValueType x )

inline

Definition at line 56 of file complex_quda.h.

◆ tanh() [1/2]

template<typename ValueType >

__host__ __device__ complex< ValueType > quda::tanh ( const complex< ValueType > & z )

inline

Definition at line 1260 of file complex_quda.h.

◆ tanh() [2/2]

template<typename ValueType >

__host__ __device__ ValueType quda::tanh ( ValueType x )

inline

Definition at line 91 of file complex_quda.h.

◆ terminate_value()

template<typename T >

constexpr T quda::terminate_value ( )

constexpr

The termination value we use to prevent a possible hang in case the computed reduction is equal to the initialization.

Definition at line 44 of file reduce_helper.h.

◆ timeInterval()

double quda::timeInterval	(	struct timeval	start,
		struct timeval	end
	)

Definition at line 18 of file inv_gcr_quda.cpp.

◆ traceEnabled()

int quda::traceEnabled ( )

Definition at line 86 of file tune.cpp.

◆ transform_reduce() [1/3]

template<typename Arg >

void quda::transform_reduce ( Arg & arg )

Definition at line 58 of file transform_reduce.h.

◆ transform_reduce() [2/3]

template<typename reduce_t , typename T , typename I , typename transformer , typename reducer >

reduce_t quda::transform_reduce	(	QudaFieldLocation	location,
		const T *	v,
		I	n_items,
		transformer	h,
		reduce_t	init,
		reducer	r
	)

QUDA implementation providing thrust::transform_reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory.

Parameters

[in]	location	Location where the reduction will take place
[out]	result	Result
[in]	v	Input vector
[in]	n_items	Number of elements to be reduced
[in]	transformer	Functor that applies transform to each element
[in]	init	Results is initialized to this value
[in]	reducer	Functor that applies the reduction to each transformed element

Definition at line 200 of file transform_reduce.h.

◆ transform_reduce() [3/3]

template<typename reduce_t , typename T , typename I , typename transformer , typename reducer >

void quda::transform_reduce	(	QudaFieldLocation	location,
		std::vector< reduce_t > &	result,
		const std::vector< T * > &	v,
		I	n_items,
		transformer	h,
		reduce_t	init,
		reducer	r
	)

QUDA implementation providing thrust::transform_reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory, and is a batched implementation.

Parameters

[in]	location	Location where the reduction will take place
[out]	result	Vector of results
[in]	v	Vector of inputs
[in]	n_items	Number of elements to be reduced in each input
[in]	transformer	Functor that applies transform to each element
[in]	init	The results are initialized to this value
[in]	reducer	Functor that applies the reduction to each transformed element

Definition at line 178 of file transform_reduce.h.

◆ tuneLaunch()

TuneParam quda::tuneLaunch	(	Tunable &	tunable,
		QudaTune	enabled,
		QudaVerbosity	verbosity
	)

Return the optimal launch parameters for a given kernel, either by retrieving them from tunecache or autotuning on the spot.

Definition at line 677 of file tune.cpp.

◆ u32toa()

void quda::u32toa	(	char *	buffer,
		uint32_t	value
	)

inline

Definition at line 45 of file uint_to_char.h.

◆ u64toa()

void quda::u64toa	(	char *	buffer,
		uint64_t	value
	)

inline

Definition at line 127 of file uint_to_char.h.

◆ uberTuning()

bool quda::uberTuning ( )

Query whether we are tuning an uber kernel.

Definition at line 517 of file tune.cpp.

◆ unitarizeLinks() [1/2]

void quda::unitarizeLinks	(	GaugeField &	outfield,
		const GaugeField &	infield,
		int *	fails
	)

◆ unitarizeLinks() [2/2]

void quda::unitarizeLinks	(	GaugeField &	outfield,
		int *	fails
	)

◆ unitarizeLinksCPU()

void quda::unitarizeLinksCPU	(	GaugeField &	outfield,
		const GaugeField &	infield
	)

◆ updateAlphaZeta()

void quda::updateAlphaZeta	(	double *	alpha,
		double *	zeta,
		double *	zeta_old,
		const double *	r2,
		const double *	beta,
		const double	pAp,
		const double *	offset,
		const int	nShift,
		const int	j_low
	)

Compute the new values of alpha and zeta

Definition at line 126 of file inv_multi_cg_quda.cpp.

◆ updateAp()

void quda::updateAp	(	Complex **	beta,
		std::vector< ColorSpinorField * >	Ap,
		int	begin,
		int	size,
		int	k
	)

Definition at line 83 of file inv_gcr_quda.cpp.

◆ updateGaugeField()

void quda::updateGaugeField	(	GaugeField &	out,
		double	dt,
		const GaugeField &	in,
		const GaugeField &	mom,
		bool	conj_mom,
		bool	exact
	)

Evolve the gauge field by step size dt using the momentuim field

Parameters

out	Updated gauge field
dt	Step size
in	Input gauge field
mom	Momentum field
conj_mom	Whether we conjugate the momentum in the exponential
exact	Calculate exact exponential or use an expansion

◆ updateMomentum()

void quda::updateMomentum	(	GaugeField &	mom,
		double	coeff,
		GaugeField &	force,
		const char *	fname
	)

Update the momentum field from the force field

mom = mom - coeff * [force]_TA

where [A]_TA means the traceless anti-hermitian projection of A

Parameters

mom	Momentum field
coeff	Integration stepsize
force	Force field
func	The function calling this (fname will be printed if force monitoring is enabled)

◆ updateSolution()

void quda::updateSolution	(	ColorSpinorField &	x,
		const Complex *	alpha,
		Complex **const	beta,
		double *	gamma,
		int	k,
		std::vector< ColorSpinorField * >	p
	)

Definition at line 146 of file inv_gcr_quda.cpp.

◆ use_managed_memory()

bool quda::use_managed_memory ( )

Returns: are we using managed memory for device allocations

Definition at line 178 of file malloc.cpp.

◆ vector_load() [1/2]

template<typename VectorType >

__device__ __host__ VectorType quda::vector_load	(	const void *	ptr,
		int	idx
	)

inline

Definition at line 494 of file register_traits.h.

◆ vector_load() [2/2]

template<>

__device__ __host__ char8 quda::vector_load	(	const void *	ptr,
		int	idx
	)

inline

Definition at line 503 of file register_traits.h.

◆ vector_store() [1/10]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const char2 &	value
	)

inline

Definition at line 580 of file register_traits.h.

◆ vector_store() [2/10]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const char4 &	value
	)

inline

Definition at line 571 of file register_traits.h.

◆ vector_store() [3/10]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const char8 &	value
	)

inline

Definition at line 597 of file register_traits.h.

◆ vector_store() [4/10]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const double2 &	value
	)

inline

Definition at line 525 of file register_traits.h.

◆ vector_store() [5/10]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const float2 &	value
	)

inline

Definition at line 543 of file register_traits.h.

◆ vector_store() [6/10]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const float4 &	value
	)

inline

Definition at line 534 of file register_traits.h.

◆ vector_store() [7/10]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const short2 &	value
	)

inline

Definition at line 561 of file register_traits.h.

◆ vector_store() [8/10]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const short4 &	value
	)

inline

Definition at line 552 of file register_traits.h.

◆ vector_store() [9/10]

template<>

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const short8 &	value
	)

inline

Definition at line 588 of file register_traits.h.

◆ vector_store() [10/10]

template<typename VectorType >

__device__ __host__ void quda::vector_store	(	void *	ptr,
		int	idx,
		const VectorType &	value
	)

inline

Definition at line 520 of file register_traits.h.

◆ WFlowStep()

void quda::WFlowStep	(	GaugeField &	out,
		GaugeField &	temp,
		GaugeField &	in,
		double	epsilon,
		QudaWFlowType	wflow_type
	)

Apply Wilson Flow steps W1, W2, Vt to the gauge field. This routine assumes that the input and output fields are extended, with the input field being exchanged prior to calling this function. On exit from this routine, the output field will have been exchanged.

Parameters

[out]	dataDs	Output smeared field
[in]	dataTemp	Temp space
[in]	dataOr	Input gauge field
[in]	epsilon	Step size
[in]	wflow_type	Wilson (1x1) or Symanzik improved (2x1) staples

◆ zero() [1/11]

__device__ __host__ void quda::zero ( char & a )

inline

Definition at line 359 of file float_vector.h.

◆ zero() [2/11]

__device__ __host__ void quda::zero ( double & a )

inline

Definition at line 318 of file float_vector.h.

◆ zero() [3/11]

__device__ __host__ void quda::zero ( double2 & a )

inline

Definition at line 319 of file float_vector.h.

◆ zero() [4/11]

__device__ __host__ void quda::zero ( double3 & a )

inline

Definition at line 324 of file float_vector.h.

◆ zero() [5/11]

__device__ __host__ void quda::zero ( double4 & a )

inline

Definition at line 330 of file float_vector.h.

◆ zero() [6/11]

__device__ __host__ void quda::zero ( float & a )

inline

Definition at line 338 of file float_vector.h.

◆ zero() [7/11]

__device__ __host__ void quda::zero ( float2 & a )

inline

Definition at line 339 of file float_vector.h.

◆ zero() [8/11]

__device__ __host__ void quda::zero ( float3 & a )

inline

Definition at line 344 of file float_vector.h.

◆ zero() [9/11]

__device__ __host__ void quda::zero ( float4 & a )

inline

Definition at line 350 of file float_vector.h.

◆ zero() [10/11]

__device__ __host__ void quda::zero ( short & a )

inline

Definition at line 358 of file float_vector.h.

◆ zero() [11/11]

template<typename scalar , int n>

__device__ __host__ void quda::zero ( vector_type< scalar, n > & v )

inline

Definition at line 408 of file float_vector.h.

Variable Documentation

◆ Nstream

const int quda::Nstream = 9

Definition at line 137 of file quda_internal.h.

◆ stream

qudaStream_t* quda::stream

Definition at line 644 of file cuda_color_spinor_field.cpp.

Namespaces

Classes

Typedefs

Enumerations

Functions

Variables

Detailed Description

Typedef Documentation

◆ ColorSpinorFieldSet

◆ Complex

◆ CompositeColorSpinorField

◆ cuRNGState

◆ DenseMatrix

◆ DynamicStride

◆ map

◆ mgarray

◆ RealVector

◆ RowMajorDenseMatrix

◆ Vector

◆ VectorSet

Enumeration Type Documentation

◆ AllocType [1/2]

◆ AllocType [2/2]

◆ BiCGstabLUpdateType

◆ blockType

◆ CloverPrefetchType

◆ Dslash5Type

◆ libtype [1/2]

◆ libtype [2/2]

◆ MdwfFusedDslashType

◆ MemoryLocation

◆ QudaOffsetCopyMode

◆ QudaProfileType

Function Documentation

◆ __launch_bounds__()

◆ abs() [1/4]

◆ abs() [2/4]

◆ abs() [3/4]

◆ abs() [4/4]

◆ acos() [1/2]

◆ acos() [2/2]

◆ acosh()

◆ activeTuning()

◆ AllocateAndBuildStaggeredKahlerDiracInverse()

◆ APEStep()

◆ ApplyClover()

◆ ApplyCoarse()

◆ ApplyCovDev()

◆ ApplyDomainWall4D()

◆ ApplyDomainWall5D()

◆ ApplyDslash5()

◆ applyGaugePhase()

◆ ApplyImprovedStaggered()

◆ ApplyLaplace()

◆ ApplyNdegTwistedMass()

◆ ApplyNdegTwistedMassPreconditioned()

◆ ApplyStaggered()

◆ ApplyStaggeredKahlerDiracInverse()

◆ ApplyTwistClover()

◆ ApplyTwistedClover()

◆ ApplyTwistedCloverPreconditioned()

◆ ApplyTwistedMass()

◆ ApplyTwistedMassPreconditioned()

◆ ApplyTwistGamma()

◆ applyU()

◆ ApplyWilson()

◆ ApplyWilsonClover()

◆ ApplyWilsonCloverHasenbuschTwist()

◆ ApplyWilsonCloverHasenbuschTwistPCClovInv()

◆ ApplyWilsonCloverHasenbuschTwistPCNoClovInv()

◆ ApplyWilsonCloverPreconditioned()

◆ arg() [1/3]

◆ arg() [2/3]

◆ arg() [3/3]

◆ arpack_solve()

◆ asin() [1/2]

◆ asin() [2/2]

◆ asinh()

◆ assertAllMemFree()

◆ atan() [1/2]