Namespaces
namespace	copy

namespace	fermion_force

namespace	quda

namespace	reduce

Classes
struct	CloverFieldParam

class	CloverField

class	cudaCloverField

class	cpuCloverField

struct	FullClover

class	ColorSpinorParam

class	ColorSpinorField

class	cudaColorSpinorField

class	cpuColorSpinorField

class	ColorSpinorFieldOrder

class	SpaceSpinColorOrder

class	SpaceColorSpinOrder

class	QOPDomainWallOrder

class	DiracParam

class	Dirac

class	DiracWilson

class	DiracWilsonPC

class	DiracClover

class	DiracCloverPC

class	DiracDomainWall

class	DiracDomainWallPC

class	DiracTwistedMass

class	DiracTwistedMassPC

class	DiracStaggered

class	DiracStaggeredPC

class	DiracMatrix

class	DiracM

class	DiracMdagM

class	DiracMdag

class	FaceBuffer

struct	GaugeFieldParam

class	GaugeField

class	cudaGaugeField

class	cpuGaugeField

struct	kernel_param_s

class	Solver

class	CG

class	BiCGstab

class	GCR

class	MR

class	alphaSA

class	MultiShiftSolver

class	MultiShiftCG

class	MinResExt

struct	LatticeFieldParam

class	LatticeField

struct	llfat_kernel_param_s

struct	Timer

struct	TimeProfile

class	TuneKey

class	TuneParam

class	Tunable

struct	axpby

struct	xpy

struct	axpy

struct	xpay

struct	mxpy

struct	ax

struct	caxpy

struct	caxpby

struct	cxpaypbz

struct	axpyBzpcx

struct	axpyZpbx

struct	caxpbypzYmbw

struct	cabxpyAx

struct	caxpbypz

struct	caxpbypczpw

struct	caxpyxmaz

struct	tripleCGUpdate

struct	CloverParam

struct	DslashParam

class	DslashCuda

class	SharedDslashCuda

class	WilsonDslashCuda

class	CloverDslashCuda

class	AsymCloverDslashCuda

class	TwistedDslashCuda

class	DomainWallDslashCuda

class	StaggeredDslashCuda

class	CloverCuda

class	TwistGamma5Cuda

class	GaugeForceCuda

class	MemAlloc

struct	ComplexTypeId< float >

struct	ComplexTypeId< double >

struct	RealTypeId< float2 >

struct	RealTypeId< double2 >

struct	PromoteTypeId

struct	PromoteTypeId< float2, float >

struct	PromoteTypeId< float, float2 >

struct	PromoteTypeId< double2, double >

struct	PromoteTypeId< double, double2 >

struct	PromoteTypeId< double, int >

struct	PromoteTypeId< int, double >

struct	PromoteTypeId< float, int >

struct	PromoteTypeId< int, float >

struct	Zero

struct	Identity

class	Matrix

class	Array

struct	ReduceFunctor

struct	Norm2

struct	Dot

struct	axpyNorm2

struct	xmyNorm2

struct	caxpyNorm2

struct	caxpyxmaznormx

struct	cabxpyaxnorm

struct	Cdot

struct	xpaycdotzy

struct	caxpydotzy

struct	CdotNormA

struct	CdotNormB

struct	caxpbypzYmbwcDotProductUYNormY

struct	axpyCGNorm2

struct	tripleCGReduction

class	UnitarizeLinksCuda

Typedefs
typedef std::complex< double >	Complex

typedef struct quda::kernel_param_s	kernel_param_t

typedef struct quda::llfat_kernel_param_s	llfat_kernel_param_t

Enumerations
enum	QudaProfileType { QUDA_PROFILE_H2D, QUDA_PROFILE_D2H, QUDA_PROFILE_INIT, QUDA_PROFILE_PREAMBLE, QUDA_PROFILE_COMPUTE, QUDA_PROFILE_EPILOGUE, QUDA_PROFILE_FREE, QUDA_PROFILE_TOTAL, QUDA_PROFILE_COUNT }

enum	AllocType { DEVICE, HOST, PINNED, MAPPED, N_ALLOC_TYPE }

Functions
void	initBlas ()

void	endBlas (void)

void	setBlasTuning (QudaTune tune, QudaVerbosity verbose)

void	setBlasParam (int kernel, int prec, int threads, int blocks)

double	norm2 (const ColorSpinorField &)

void	zeroCuda (cudaColorSpinorField &a)

void	copyCuda (cudaColorSpinorField &dst, const cudaColorSpinorField &src)

double	axpyNormCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)

double	normCuda (const cudaColorSpinorField &b)

double	reDotProductCuda (cudaColorSpinorField &a, cudaColorSpinorField &b)

double	xmyNormCuda (cudaColorSpinorField &a, cudaColorSpinorField &b)

void	axpbyCuda (const double &a, cudaColorSpinorField &x, const double &b, cudaColorSpinorField &y)

void	axpyCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)

void	axCuda (const double &a, cudaColorSpinorField &x)

void	xpyCuda (cudaColorSpinorField &x, cudaColorSpinorField &y)

void	xpayCuda (cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y)

void	mxpyCuda (cudaColorSpinorField &x, cudaColorSpinorField &y)

void	axpyZpbxCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, const double &b)

void	axpyBzpcxCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, const double &b, cudaColorSpinorField &z, const double &c)

void	caxpbyCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y)

void	caxpyCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y)

void	cxpaypbzCuda (cudaColorSpinorField &, const Complex &b, cudaColorSpinorField &y, const Complex &c, cudaColorSpinorField &z)

void	caxpbypzYmbwCuda (const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &, cudaColorSpinorField &)

Complex	cDotProductCuda (cudaColorSpinorField &, cudaColorSpinorField &)

Complex	xpaycDotzyCuda (cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y, cudaColorSpinorField &z)

double3	cDotProductNormACuda (cudaColorSpinorField &a, cudaColorSpinorField &b)

double3	cDotProductNormBCuda (cudaColorSpinorField &a, cudaColorSpinorField &b)

double3	caxpbypzYmbwcDotProductUYNormYCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w, cudaColorSpinorField &u)

void	cabxpyAxCuda (const double &a, const Complex &b, cudaColorSpinorField &x, cudaColorSpinorField &y)

double	caxpyNormCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y)

void	caxpyXmazCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)

double	caxpyXmazNormXCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)

double	cabxpyAxNormCuda (const double &a, const Complex &b, cudaColorSpinorField &x, cudaColorSpinorField &y)

void	caxpbypzCuda (const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &)

void	caxpbypczpwCuda (const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &)

Complex	caxpyDotzyCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)

Complex	axpyCGNormCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)

double3	HeavyQuarkResidualNormCuda (cudaColorSpinorField &x, cudaColorSpinorField &r)

double3	xpyHeavyQuarkResidualNormCuda (cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &r)

void	tripleCGUpdateCuda (const double &alpha, const double &beta, cudaColorSpinorField &q, cudaColorSpinorField &r, cudaColorSpinorField &x, cudaColorSpinorField &p)

double3	tripleCGReductionCuda (cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)

double	axpyNormCpu (const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)

double	normCpu (const cpuColorSpinorField &b)

double	reDotProductCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)

double	xmyNormCpu (const cpuColorSpinorField &a, cpuColorSpinorField &b)

void	axpbyCpu (const double &a, const cpuColorSpinorField &x, const double &b, cpuColorSpinorField &y)

void	axpyCpu (const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)

void	axCpu (const double &a, cpuColorSpinorField &x)

void	xpyCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y)

void	xpayCpu (const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y)

void	mxpyCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y)

void	axpyZpbxCpu (const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const cpuColorSpinorField &z, const double &b)

void	axpyBzpcxCpu (const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const double &b, const cpuColorSpinorField &z, const double &c)

void	caxpbyCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y)

void	caxpyCpu (const Complex &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)

void	cxpaypbzCpu (const cpuColorSpinorField &x, const Complex &b, const cpuColorSpinorField &y, const Complex &c, cpuColorSpinorField &z)

void	caxpbypzYmbwCpu (const Complex &, const cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &, const cpuColorSpinorField &)

Complex	cDotProductCpu (const cpuColorSpinorField &, const cpuColorSpinorField &)

Complex	xpaycDotzyCpu (const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y, const cpuColorSpinorField &z)

double3	cDotProductNormACpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)

double3	cDotProductNormBCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)

double3	caxpbypzYmbwcDotProductUYNormYCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z, const cpuColorSpinorField &w, const cpuColorSpinorField &u)

void	cabxpyAxCpu (const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)

double	caxpyNormCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y)

void	caxpyXmazCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)

double	caxpyXmazNormXCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)

double	cabxpyAxNormCpu (const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)

void	caxpbypzCpu (const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &)

void	caxpbypczpwCpu (const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &)

Complex	caxpyDotzyCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)

double3	HeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &r)

double3	xpyHeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &r)

void	computeCloverCuda (cudaCloverField &clover, const cudaGaugeField &gauge)

void	setDiracParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)

void	setDiracSloppyParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)

void	setDslashTuning (QudaTune tune, QudaVerbosity verbose)

void	setKernelPackT (bool pack)

bool	getKernelPackT ()

void	setFace (const FaceBuffer &face)

bool	getDslashLaunch ()

void	createDslashEvents ()

void	destroyDslashEvents ()

void	initLatticeConstants (const LatticeField &lat)

void	initGaugeConstants (const cudaGaugeField &gauge)

void	initSpinorConstants (const cudaColorSpinorField &spinor)

void	initDslashConstants ()

void	initCloverConstants (const cudaCloverField &clover)

void	initStaggeredConstants (const cudaGaugeField &fatgauge, const cudaGaugeField &longgauge)

void	initTwistedMassConstants (const int flv_stride)
	ndeg tm:

void	wilsonDslashCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const cudaColorSpinorField in, const int oddBit, const int daggerBit, const cudaColorSpinorField x, const double &k, const int commDim)

void	cloverDslashCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const FullClover cloverInv, const cudaColorSpinorField in, const int oddBit, const int daggerBit, const cudaColorSpinorField x, const double &k, const int commDim)

void	asymCloverDslashCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const FullClover cloverInv, const cudaColorSpinorField in, const int oddBit, const int daggerBit, const cudaColorSpinorField x, const double &k, const int commDim)

void	cloverCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const FullClover clover, const cudaColorSpinorField in, const int oddBit)

void	domainWallDslashCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const cudaColorSpinorField in, const int parity, const int dagger, const cudaColorSpinorField x, const double &m_f, const double &k, const int commDim)

void	staggeredDslashCuda (cudaColorSpinorField out, const cudaGaugeField &fatGauge, const cudaGaugeField &longGauge, const cudaColorSpinorField in, const int parity, const int dagger, const cudaColorSpinorField x, const double &k, const int commDim)
	NEW:extra argument.

void	twistedMassDslashCuda (cudaColorSpinorField out, const cudaGaugeField &gauge, const cudaColorSpinorField in, const int parity, const int dagger, const cudaColorSpinorField x, const double &kappa, const double &mu, const double &epsilon, const int commDim)
	ndeg tm:

void	twistGamma5Cuda (cudaColorSpinorField out, const cudaColorSpinorField in, const int dagger, const double &kappa, const double &mu, const double &epsilon, const QudaTwistGamma5Type)
	ndeg tm:

void	packFace (void *ghost_buf, cudaColorSpinorField &in, const int dagger, const int parity, const cudaStream_t &stream)

void	loadLinkToGPU (cudaGaugeField cudaGauge, cpuGaugeField cpuGauge, QudaGaugeParam *param)

void	loadLinkToGPU_ex (cudaGaugeField cudaGauge, cpuGaugeField cpuGauge)

void	loadLinkToGPU_gf (cudaGaugeField cudaGauge, cpuGaugeField cpuGauge, QudaGaugeParam *param)

void	storeLinkToCPU (cpuGaugeField cpuGauge, cudaGaugeField cudaGauge, QudaGaugeParam *param)

void	packGhostStaple (int X, void even, void odd, int volume, QudaPrecision prec, int stride, int dir, int whichway, void fwd_nbr_buf_gpu, void back_nbr_buf_gpu, void fwd_nbr_buf, void back_nbr_buf, cudaStream_t stream)

void	unpackGhostStaple (int X, void _even, void _odd, int volume, QudaPrecision prec, int stride, int dir, int whichway, void fwd_nbr_buf, void back_nbr_buf, cudaStream_t stream)

void	pack_ghost_all_staples_cpu (void staple, void cpuGhostStapleBack, void cpuGhostStapleFwd, int nFace, QudaPrecision precision, int X)

void	pack_ghost_all_links (void cpuLink, void cpuGhostBack, void *cpuGhostFwd, int dir, int nFace, QudaPrecision precision, int X)

void	pack_gauge_diag (void buf, int X, void **sitelink, int nu, int mu, int dir1, int dir2, QudaPrecision prec)

void	fermion_force_init_cuda (QudaGaugeParam *param)

void	fermion_force_cuda (double eps, double weight1, double weight2, void act_path_coeff, FullHw cudaHw, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaMom, QudaGaugeParam param)

std::ostream &	operator<< (std::ostream &output, const GaugeFieldParam &param)

double	norm2 (const cudaGaugeField &a)

void	gauge_force_init_cuda (QudaGaugeParam *param, int max_length)

void	gauge_force_cuda (cudaGaugeField &cudaMom, double eb3, cudaGaugeField &cudaSiteLink, QudaGaugeParam param, int *input_path, int length, void *path_coeff, int num_paths, int max_length)

void	setUnitarizeLinksPadding (int input_padding, int output_padding)

void	setUnitarizeLinksConstants (double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error, bool check_unitarization=true)

void	unitarizeLinksCuda (const QudaGaugeParam &param, cudaGaugeField &infield, cudaGaugeField outfield, int num_failures)

void	unitarizeLinksCPU (const QudaGaugeParam &param, cpuGaugeField &infield, cpuGaugeField *outfield)

bool	isUnitary (const QudaGaugeParam &param, cpuGaugeField &field, double max_error)

std::ostream &	operator<< (std::ostream &output, const LatticeFieldParam &param)

void	llfat_cuda (cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam param, double act_path_coeff)

void	llfat_cuda_ex (cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam param, double act_path_coeff)

void	llfat_init_cuda (QudaGaugeParam *param)

void	llfat_init_cuda_ex (QudaGaugeParam *param_ex)

void	computeGenStapleFieldParityKernel (void staple_even, void staple_odd, const void sitelink_even, const void sitelink_odd, void fatlink_even, void fatlink_odd, const void mulink_even, const void mulink_odd, int mu, int nu, int save_staple, double mycoeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam, cudaStream_t *stream)

void	computeGenStapleFieldParityKernel_ex (void staple_even, void staple_odd, const void sitelink_even, const void sitelink_odd, void fatlink_even, void fatlink_odd, const void mulink_even, const void mulink_odd, int mu, int nu, int save_staple, double mycoeff, QudaReconstructType recon, QudaPrecision prec, llfat_kernel_param_t kparam)

void	siteComputeGenStapleParityKernel (void staple_even, void staple_odd, const void sitelink_even, const void sitelink_odd, void fatlink_even, void fatlink_odd, int mu, int nu, double mycoeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam, cudaStream_t *stream)

void	siteComputeGenStapleParityKernel_ex (void staple_even, void staple_odd, const void sitelink_even, const void sitelink_odd, void fatlink_even, void fatlink_odd, int mu, int nu, double mycoeff, QudaReconstructType recon, QudaPrecision prec, llfat_kernel_param_t kparam)

void	llfatOneLinkKernel (cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam param, double act_path_coeff)

void	llfatOneLinkKernel_ex (cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam param, double act_path_coeff, llfat_kernel_param_t kparam)

void	computeFatLinkCore (cudaGaugeField cudaSiteLink, double act_path_coeff, QudaGaugeParam qudaGaugeParam, QudaComputeFatMethod method, cudaGaugeField cudaFatLink, struct timeval time_array[])

void	printPeakMemUsage ()

void	assertAllMemFree ()

void *	device_malloc_ (const char func, const char file, int line, size_t size)

void *	safe_malloc_ (const char func, const char file, int line, size_t size)

void *	pinned_malloc_ (const char func, const char file, int line, size_t size)

void *	mapped_malloc_ (const char func, const char file, int line, size_t size)

void	device_free_ (const char func, const char file, int line, void *ptr)

void	host_free_ (const char func, const char file, int line, void *ptr)

void	link_format_cpu_to_gpu (void dst, void src, int reconstruct, int Vh, int pad, int ghostV, QudaPrecision prec, QudaGaugeFieldOrder cpu_order, cudaStream_t stream)

void	link_format_gpu_to_cpu (void dst, void src, int Vh, int stride, QudaPrecision prec, cudaStream_t stream)

void	collectGhostStaple (int X, void even, void odd, int volume, QudaPrecision precision, void ghost_staple_gpu, int dir, int whichway, cudaStream_t *stream)

void	loadTuneCache (QudaVerbosity verbosity)

void	saveTuneCache (QudaVerbosity verbosity)

TuneParam	tuneLaunch (Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)

template<typename Float >
void	axpby (const Float &a, const Float x, const Float &b, Float y, const int N)

template<typename Float >
void	caxpby (const std::complex< Float > &a, const std::complex< Float > x, const std::complex< Float > &b, std::complex< Float > y, int N)

template<typename Float >
void	caxpbypcz (const std::complex< Float > &a, const std::complex< Float > x, const std::complex< Float > &b, const std::complex< Float > y, const std::complex< Float > &c, std::complex< Float > *z, int N)

template<typename Float >
double	norm (const Float *a, const int N)

template<typename Float >
double	reDotProduct (const Float a, const Float b, const int N)

template<typename Float >
Complex	cDotProduct (const std::complex< Float > a, const std::complex< Float > b, const int N)

template<typename Float >
double3	HeavyQuarkResidualNorm (const Float x, const Float r, const int volume, const int Nint)

double3	HeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &r)

void	initReduce ()

void	endReduce ()

QudaTune	getBlasTuning ()

QudaVerbosity	getBlasVerbosity ()

cudaStream_t *	getBlasStream ()

__device__ void	caxpy_ (const float2 &a, const float4 &x, float4 &y)

__device__ void	caxpy_ (const float2 &a, const float2 &x, float2 &y)

__device__ void	caxpy_ (const double2 &a, const double2 &x, double2 &y)

__device__ void	caxpby_ (const float2 &a, const float4 &x, const float2 &b, float4 &y)

__device__ void	caxpby_ (const float2 &a, const float2 &x, const float2 &b, float2 &y)

__device__ void	caxpby_ (const double2 &a, const double2 &x, const double2 &b, double2 &y)

__device__ void	cxpaypbz_ (const float4 &x, const float2 &a, const float4 &y, const float2 &b, float4 &z)

__device__ void	cxpaypbz_ (const float2 &x, const float2 &a, const float2 &y, const float2 &b, float2 &z)

__device__ void	cxpaypbz_ (const double2 &x, const double2 &a, const double2 &y, const double2 &b, double2 &z)

__device__ int	linkIndex (int x[], int dx[], const CloverParam &param)

template<typename Cmplx >
__global__ void	computeFmunuKernel (Cmplx Fmunu, const Cmplx gauge, const CloverParam param)

std::ostream &	operator<< (std::ostream &out, const ColorSpinorField &a)

template<class D , class S >
void	genericCopy (D &dst, const S &src)

template<class T >
void	random (T &t)

template<class T >
void	point (T &t, const int x, const int s, const int c)

template<class U , class V >
int	compareSpinor (const U &u, const V &v, const int tol)

template<class Order >
void	print_vector (const Order &o, unsigned int x)

template<typename Float >
void	transpose (Float gT, const Float g)

template<typename Float >
void	packGhost (Float ghost, const Float gauge, const int nFace, const int X, const int volumeCB, const int surfaceCB, const QudaGaugeFieldOrder order)

std::ostream &	operator<< (std::ostream &out, const cudaColorSpinorField &a)

template<typename Float , typename Float2 >
void	loadMomField (Float2 even, Float2 odd, Float mom, int bytes, int Vh, int pad, void buffer)

template<typename Float , typename Float2 >
void	storeMomToCPUArray (Float mom, Float2 even, Float2 odd, int bytes, int V, int pad, void buffer)

void	setTwistParam (double &a, double &b, const double &kappa, const double &mu, const int dagger, const QudaTwistGamma5Type twist)

void	initDslashCommsPattern ()

void	dslashCuda (DslashCuda &dslash, const size_t regSize, const int parity, const int dagger, const int volume, const int *faceVolumeCB)

template<int sig_positive, int mu_positive, int oddBit, typename Float2 >
__global__ void	do_middle_link_kernel (Float2 tempxEven, Float2 tempxOdd, Float2 PmuEven, Float2 PmuOdd, Float2 P3Even, Float2 P3Odd, int sig, int mu, Float2 coeff, float4 linkEven, float4 linkOdd, Float2 momEven, Float2 momOdd)

template<int sig_positive, int mu_positive, int oddBit, typename Float2 >
__global__ void	do_side_link_kernel (Float2 P3Even, Float2 P3Odd, Float2 P3muEven, Float2 P3muOdd, Float2 TempxEven, Float2 TempxOdd, Float2 PmuEven, Float2 PmuOdd, Float2 shortPEven, Float2 shortPOdd, int sig, int mu, Float2 coeff, Float2 accumu_coeff, float4 linkEven, float4 linkOdd, Float2 momEven, Float2 momOdd)

template<int sig_positive, int mu_positive, int oddBit, typename Float2 >
__global__ void	do_all_link_kernel (Float2 tempxEven, Float2 tempxOdd, Float2 PmuEven, Float2 PmuOdd, Float2 P3Even, Float2 P3Odd, Float2 P3muEven, Float2 P3muOdd, Float2 shortPEven, Float2 shortPOdd, int sig, int mu, Float2 coeff, Float2 mcoeff, Float2 accumu_coeff, float4 linkEven, float4 linkOdd, Float2 momEven, Float2 momOdd)

template<int oddBit, typename Float2 >
__global__ void	do_one_and_naik_terms_kernel (Float2 TempxEven, Float2 TempxOdd, Float2 PmuEven, Float2 PmuOdd, Float2 PnumuEven, Float2 PnumuOdd, int mu, Float2 OneLink, Float2 Naik, Float2 mNaik, float4 linkEven, float4 linkOdd, Float2 momEven, Float2 momOdd)

template<int oddBit, typename Float2 , typename FloatN , typename Float >
__global__ void	GAUGE_FORCE_KERN_NAME (Float2 momEven, Float2 momOdd, const int dir, const double eb3, const FloatN linkEven, const FloatN linkOdd, const int input_path, const int length, const Float *path_coeff, const int num_paths, const kernel_param_t kparam)

void	gauge_force_cuda_dir (cudaGaugeField &cudaMom, const int dir, const double eb3, const cudaGaugeField &cudaSiteLink, const QudaGaugeParam param, int input_path, const int length, const void *path_coeff, const int num_paths, const int max_length)

void	setDiracPreParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc)

void	createDirac (Dirac &d, Dirac &dSloppy, Dirac *&dPre, QudaInvertParam &param, const bool pc_solve)

void	massRescale (QudaDslashType dslash_type, double &kappa, QudaSolutionType solution_type, QudaMassNormalization mass_normalization, cudaColorSpinorField &b)

void	massRescaleCoeff (QudaDslashType dslash_type, double &kappa, QudaSolutionType solution_type, QudaMassNormalization mass_normalization, double &coeff)

void	fillInnerInvertParam (QudaInvertParam &inner, const QudaInvertParam &outer)

double	resNorm (const DiracMatrix &mat, cudaColorSpinorField &b, cudaColorSpinorField &x)

double	timeInterval (struct timeval start, struct timeval end)

void	orthoDir (Complex *beta, cudaColorSpinorField Ap[], int k)

void	backSubs (const Complex alpha, Complex const beta, const double gamma, Complex *delta, int n)

void	updateSolution (cudaColorSpinorField &x, const Complex alpha, Complex const beta, double gamma, int k, cudaColorSpinorField *p[])

void	updateAlphaZeta (double alpha, double zeta, double zeta_old, const double r2, const double beta, const double pAp, const double offset, const int nShift, const int j_low)

template<int mu, int nu, int odd_bit>
__global__ void	LLFAT_KERNEL (do_siteComputeGenStapleParity, RECONSTRUCT)(FloatM *staple_even

	if (kparam.kernel_type==LLFAT_EXTERIOR_KERNEL_FWD_X &&x1!=X1m1) return

	COMPUTE_RECONSTRUCT_SIGN (sign, nu, x1, x2, x3, x4)

	RECONSTRUCT_SITE_LINK (sign, a)

	LLFAT_COMPUTE_NEW_IDX_PLUS (nu, X)

	LOAD_ODD_SITE_MATRIX (mu, new_mem_idx, B)

	COMPUTE_RECONSTRUCT_SIGN (sign, mu, new_x1, new_x2, new_x3, new_x4)

	RECONSTRUCT_SITE_LINK (sign, b)

	MULT_SU3_NN (a, b, tempa)

	LLFAT_COMPUTE_NEW_IDX_PLUS (mu, X)

	LOAD_ODD_SITE_MATRIX (nu, new_mem_idx, C)

	COMPUTE_RECONSTRUCT_SIGN (sign, nu, new_x1, new_x2, new_x3, new_x4)

	RECONSTRUCT_SITE_LINK (sign, c)

	MULT_SU3_NA (tempa, c, staple)

	LOAD_ODD_SITE_MATRIX (nu,(new_mem_idx), A)

	LOAD_ODD_SITE_MATRIX (mu,(new_mem_idx), B)

	MULT_SU3_AN (a, b, tempa)

	LLFAT_COMPUTE_NEW_IDX_LOWER_STAPLE (nu, mu)

	LOAD_EVEN_SITE_MATRIX (nu, new_mem_idx, C)

	MULT_SU3_NN (tempa, c, b)

	LLFAT_ADD_SU3_MATRIX (b, staple, staple)

	WRITE_STAPLE_MATRIX (staple_even, mem_idx)

template<int mu, int nu, int odd_bit, int save_staple>
__global__ void	LLFAT_KERNEL (do_computeGenStapleFieldParity, RECONSTRUCT)(FloatM *staple_even

	LOAD_ODD_MULINK_MATRIX (0, new_mem_idx, BB)

	MULT_SU3_NN (a, bb, tempa)

	if (save_staple)

	LOAD_ODD_SITE_MATRIX (nu, new_mem_idx, A)

	LLFAT_COMPUTE_NEW_IDX_MINUS (nu, X)

	MULT_SU3_AN (a, bb, tempa)

	MULT_SU3_NN (tempa, c, a)

	LOAD_EVEN_FAT_MATRIX (mu, mem_idx)

__global__ void	LLFAT_KERNEL (llfatOneLink, RECONSTRUCT)(const FloatN *sitelink_even

	if (mem_idx >=Vh)

	for (int dir=0;dir< 4;dir++)

template<int mu, int nu, int odd_bit>
__global__ void	LLFAT_KERNEL_EX (do_siteComputeGenStapleParity, RECONSTRUCT)(FloatM *staple_even

	if (mem_idx >=kparam.threads) return

	COMPUTE_RECONSTRUCT_SIGN (sign, nu,(x1-2),(x2-2),(x3-2),(x4-2))

	LLFAT_COMPUTE_NEW_IDX_PLUS_EX (nu, X)

	COMPUTE_RECONSTRUCT_SIGN (sign, mu,(new_x1-2),(new_x2-2),(new_x3-2),(new_x4-2))

	LLFAT_COMPUTE_NEW_IDX_PLUS_EX (mu, X)

	COMPUTE_RECONSTRUCT_SIGN (sign, nu,(new_x1-2),(new_x2-2),(new_x3-2),(new_x4-2))

	LLFAT_COMPUTE_NEW_IDX_LOWER_STAPLE_EX (nu, mu)

	if (!(x1==1\|\|x1==X1+2\|\|x2==1\|\|x2==X2+2\|\|x3==1\|\|x3==X3+2\|\|x4==1\|\|x4==X4+2))

template<int mu, int nu, int odd_bit, int save_staple>
__global__ void	LLFAT_KERNEL_EX (do_computeGenStapleFieldParity, RECONSTRUCT)(FloatM *staple_even

	LLFAT_COMPUTE_NEW_IDX_MINUS_EX (nu, X)

	LLFAT_ADD_SU3_MATRIX (a, staple, staple)

__global__ void	LLFAT_KERNEL_EX (llfatOneLink, RECONSTRUCT)(const FloatN *sitelink_even

	if (sid >=2 *kparam.threads) return

	if (idx >=kparam.threads)

template<int N, typename FloatN , typename Float2 >
__global__ void	do_link_format_cpu_to_gpu (FloatN dst, Float2 src, int reconstruct, int Vh, int pad, int ghostV, size_t threads)

template<int N, typename FloatN , typename Float2 >
__global__ void	do_link_format_cpu_to_gpu_milc (FloatN dst, Float2 src, int reconstruct, int Vh, int pad, int ghostV, size_t threads)

template<typename FloatN >
__global__ void	do_link_format_gpu_to_cpu (FloatN dst, FloatN src, int Vh, int stride)

template<int dir, int whichway, typename Float2 >
__global__ void	collectGhostStapleKernel (Float2 in, const int oddBit, Float2 nbr_staple_gpu)

template<class Cmplx >
__device__ __host__ Cmplx	makeComplex (const typename RealTypeId< Cmplx >::Type &a, const typename RealTypeId< Cmplx >::Type &b)

__device__ __host__ double2	makeComplex (const double &a, const double &b)

__device__ __host__ float2	makeComplex (const float &a, const float &b)

template<class Cmplx >
__device__ __host__ Cmplx &	operator+= (Cmplx &a, const Cmplx &b)

template<class Cmplx >
__device__ __host__ Cmplx	operator+ (const Cmplx &a, const Cmplx &b)

template<class Cmplx >
__device__ __host__ Cmplx	operator- (const Cmplx &a, const Cmplx &b)

template<class Cmplx >
__device__ __host__ Cmplx	operator* (const Cmplx &a, const typename RealTypeId< Cmplx >::Type &scalar)

template<class Cmplx >
__device__ __host__ Cmplx	operator/ (const Cmplx &a, const typename RealTypeId< Cmplx >::Type &scalar)

template<class Cmplx >
__device__ __host__ Cmplx	operator+ (const Cmplx &a, const typename RealTypeId< Cmplx >::Type &scalar)

template<class Cmplx >
__device__ __host__ Cmplx	operator+ (const typename RealTypeId< Cmplx >::Type &scalar, const Cmplx &a)

template<class Cmplx >
__device__ __host__ Cmplx	operator- (const Cmplx &a, const typename RealTypeId< Cmplx >::Type &scalar)

template<class Cmplx >
__device__ __host__ Cmplx	operator- (const typename RealTypeId< Cmplx >::Type &scalar, const Cmplx &a)

template<class Cmplx >
__device__ __host__ Cmplx	operator* (const typename RealTypeId< Cmplx >::Type &scalar, const Cmplx &b)

template<class Cmplx >
__device__ __host__ Cmplx	operator* (const Cmplx &a, const Cmplx &b)

template<class Cmplx >
__device__ __host__ Cmplx	conj (const Cmplx &a)

__device__ __host__ double	conj (const double &a)

__device__ __host__ float	conj (const float &a)

template<class Cmplx >
__device__ __host__ Cmplx	getPreciseInverse (const Cmplx &z)

std::ostream &	operator<< (std::ostream &os, const float2 &z)

std::ostream &	operator<< (std::ostream &os, const double2 &z)

template<int N>
__device__ __host__ int	index (int i, int j)

template<class T >
__device__ __host__ T	getTrace (const Matrix< T, 3 > &a)

template<class T >
__device__ __host__ T	getDeterminant (const Matrix< T, 3 > &a)

template<class T , int N>
__device__ __host__ Matrix< T, N >	operator+ (const Matrix< T, N > &a, const Matrix< T, N > &b)

template<class T , int N>
__device__ __host__ Matrix< T, N >	operator+= (Matrix< T, N > &a, const Matrix< T, N > &b)

template<class T , int N>
__device__ __host__ Matrix< T, N >	operator- (const Matrix< T, N > &a, const Matrix< T, N > &b)

template<class T , int N, class S >
__device__ __host__ Matrix< T, N >	operator* (const S &scalar, const Matrix< T, N > &a)

template<class T , int N, class S >
__device__ __host__ Matrix< T, N >	operator* (const Matrix< T, N > &a, const S &scalar)

template<class T >
__device__ __host__ Matrix< T, 3 >	operator* (const Matrix< T, 3 > &a, const Matrix< T, 3 > &b)

template<class T , class U >
__device__ __host__ Matrix < typename PromoteTypeId< T, U > ::Type, 3 >	operator* (const Matrix< T, 3 > &a, const Matrix< U, 3 > &b)

template<class T >
__device__ __host__ Matrix< T, 2 >	operator* (const Matrix< T, 2 > &a, const Matrix< T, 2 > &b)

template<class T , int N>
__device__ __host__ Matrix< T, N >	conj (const Matrix< T, N > &other)

template<class T >
__device__ __host__ void	computeMatrixInverse (const Matrix< T, 3 > &u, Matrix< T, 3 > *uinv)

template<class T , int N>
__device__ __host__ void	setIdentity (Matrix< T, N > *m)

template<int N>
__device__ __host__ void	setIdentity (Matrix< float2, N > *m)

template<int N>
__device__ __host__ void	setIdentity (Matrix< double2, N > *m)

template<class T , int N>
__device__ __host__ void	setZero (Matrix< T, N > *m)

template<int N>
__device__ __host__ void	setZero (Matrix< float2, N > *m)

template<int N>
__device__ __host__ void	setZero (Matrix< double2, N > *m)

template<class T , int N>
__device__ __host__ void	copyColumn (const Matrix< T, N > &m, int c, Array< T, N > *a)

template<class T , int N>
__device__ __host__ void	outerProd (const Array< T, N > &a, const Array< T, N > &b, Matrix< T, N > *m)

template<class T , int N>
std::ostream &	operator<< (std::ostream &os, const Matrix< T, N > &m)

template<class T , int N>
std::ostream &	operator<< (std::ostream &os, const Array< T, N > &a)

template<class T >
__device__ void	loadLinkVariableFromArray (const T const array, int dir, int idx, int stride, Matrix< T, 3 > link)

__device__ void	loadLinkVariableFromArray (const float2 const array, int dir, int idx, int stride, Matrix< double2, 3 > link)

template<class T >
__device__ void	writeLinkVariableToArray (const Matrix< T, 3 > &link, int dir, int idx, int stride, T *const array)

__device__ void	writeLinkVariableToArray (const Matrix< double2, 3 > &link, int dir, int idx, int stride, float2 *const array)

template<class Cmplx >
__device__ __host__ void	computeLinkInverse (Matrix< Cmplx, 3 > *uinv, const Matrix< Cmplx, 3 > &u)

void	copyArrayToLink (Matrix< float2, 3 > link, float array)

template<class Cmplx , class Real >
void	copyArrayToLink (Matrix< Cmplx, 3 > link, Real array)

void	copyLinkToArray (float *array, const Matrix< float2, 3 > &link)

template<class Cmplx , class Real >
void	copyLinkToArray (Real *array, const Matrix< Cmplx, 3 > &link)

template<class Cmplx >
__host__ __device__ void	printLink (const Matrix< Cmplx, 3 > &link)

__device__ double	norm2_ (const double2 &a)

__device__ float	norm2_ (const float2 &a)

__device__ float	norm2_ (const float4 &a)

__device__ double	dot_ (const double2 &a, const double2 &b)

__device__ float	dot_ (const float2 &a, const float2 &b)

__device__ float	dot_ (const float4 &a, const float4 &b)

__device__ void	Caxpy_ (const float2 &a, const float4 &x, float4 &y)

__device__ void	Caxpy_ (const float2 &a, const float2 &x, float2 &y)

__device__ void	Caxpy_ (const double2 &a, const double2 &x, double2 &y)

__device__ double2	cdot_ (const double2 &a, const double2 &b)

__device__ double2	cdot_ (const float2 &a, const float2 &b)

__device__ double2	cdot_ (const float4 &a, const float4 &b)

__device__ double3	cdotNormA_ (const double2 &a, const double2 &b)

__device__ double3	cdotNormA_ (const float2 &a, const float2 &b)

__device__ double3	cdotNormA_ (const float4 &a, const float4 &b)

__device__ double3	cdotNormB_ (const double2 &a, const double2 &b)

__device__ double3	cdotNormB_ (const float2 &a, const float2 &b)

__device__ double3	cdotNormB_ (const float4 &a, const float4 &b)

template<class Cmplx >
DEVICEHOST RealTypeId< Cmplx > ::Type	cabs (const Cmplx &z)

template<class T , class U >
DEVICEHOST PromoteTypeId< T, U > ::Type	quadSum (const T &a, const U &b)

DEVICEHOST float	getNorm (const Array< float2, 3 > &a)

DEVICEHOST double	getNorm (const Array< double2, 3 > &a)

template<class T >
DEVICEHOST void	constructHHMat (const T &tau, const Array< T, 3 > &v, Matrix< T, 3 > &hh)

template<class Real >
DEVICEHOST void	getLambdaMax (const Matrix< Real, 3 > &b, Real &lambda_max)

template<class Real >
DEVICEHOST void	getGivensRotation (const Real &alpha, const Real &beta, Real &c, Real &s)

template<class Real >
DEVICEHOST void	accumGivensRotation (int index, const Real &c, const Real &s, Matrix< Real, 3 > &m)

template<class Real >
DEVICEHOST void	assignGivensRotation (const Real &c, const Real &s, Matrix< Real, 2 > &m)

template<class Real >
DEVICEHOST void	swap (Real &a, Real &b)

template<class Real >
DEVICEHOST void	smallSVD (Matrix< Real, 2 > &u, Matrix< Real, 2 > &v, Matrix< Real, 2 > &m)

template<class Cmplx >
DEVICEHOST void	getRealBidiagMatrix (const Matrix< Cmplx, 3 > &mat, Matrix< Cmplx, 3 > &u, Matrix< Cmplx, 3 > &v)

template<class Real >
DEVICEHOST void	bdSVD (Matrix< Real, 3 > &u, Matrix< Real, 3 > &v, Matrix< Real, 3 > &b, int max_it)

template<class Cmplx >
DEVICEHOST void	computeSVD (const Matrix< Cmplx, 3 > &m, Matrix< Cmplx, 3 > &u, Matrix< Cmplx, 3 > &v, typename RealTypeId< Cmplx >::Type singular_values[3])

template<class Cmplx >
__device__ __host__ bool	isUnitary (const Matrix< Cmplx, 3 > &matrix, double max_error)

template<class Cmplx >
__device__ __host__ bool	isUnitarizedLinkConsistent (const Matrix< Cmplx, 3 > &initial_matrix, const Matrix< Cmplx, 3 > &unitary_matrix, double max_error)

template<class T >
__device__ __host__ T	getAbsMin (const T *const array, int size)

template<class Real >
__device__ __host__ bool	checkAbsoluteError (Real a, Real b, Real epsilon)

template<class Real >
__device__ __host__ bool	checkRelativeError (Real a, Real b, Real epsilon)

template<class Cmplx >
__device__ __host__ bool	reciprocalRoot (const Matrix< Cmplx, 3 > &q, Matrix< Cmplx, 3 > *res)

template<class Cmplx >
__host__ __device__ bool	unitarizeLinkMILC (const Matrix< Cmplx, 3 > &in, Matrix< Cmplx, 3 > *const result)

template<class Cmplx >
__host__ __device__ bool	unitarizeLinkSVD (const Matrix< Cmplx, 3 > &in, Matrix< Cmplx, 3 > *const result)

template<class Cmplx >
__host__ __device__ bool	unitarizeLinkNewton (const Matrix< Cmplx, 3 > &in, Matrix< Cmplx, 3 > *const result)

template<class Cmplx >
__global__ void	getUnitarizedField (const Cmplx inlink_even, const Cmplx inlink_odd, Cmplx outlink_even, Cmplx outlink_odd, int *num_failures, const int threads)

Variables
unsigned long long	blas_flops

unsigned long long	blas_bytes

const int	Nstream = 1

DslashParam	dslashParam

int	Vspatial

int	gatherCompleted [Nstream]

int	previousDir [Nstream]

int	commsCompleted [Nstream]

int	dslashCompleted [Nstream]

int	commDimTotal

__constant__ int	dir1_array [16]

__constant__ int	dir2_array [16]

unsigned long	staple_bytes =0

__global__ void FloatM *	staple_odd

__global__ void FloatM const FloatN *	sitelink_even

__global__ void FloatM const FloatN const FloatN *	sitelink_odd

__global__ void FloatM const FloatN const FloatN FloatM *	fatlink_even

__global__ void FloatM const FloatN const FloatN FloatM FloatM *	fatlink_odd

__global__ void FloatM const FloatN const FloatN FloatM FloatM Float	mycoeff

__global__ void FloatM const FloatN const FloatN FloatM FloatM Float llfat_kernel_param_t	kparam

FloatM	TEMPA5

FloatM	TEMPA6

FloatM	TEMPA7

FloatM	TEMPA8

FloatM	STAPLE0

FloatM	STAPLE1

FloatM	STAPLE2

FloatM	STAPLE3

FloatM	STAPLE4

FloatM	STAPLE5

FloatM	STAPLE6

FloatM	STAPLE7

FloatM	STAPLE8

int	mem_idx = blockIdx.x*blockDim.x + threadIdx.x

int	z1 = mem_idx / X1h

short	x1h = mem_idx - z1*X1h

int	z2 = z1 / X2

short	x2 = z1 - z2*X2

short	x4 = z2 / X3

short	x3 = z2 - x4*X3

short	x1odd = (x2 + x3 + x4 + odd_bit) & 1

short	x1 = 2*x1h + x1odd

int	X = 2*mem_idx + x1odd

int	new_mem_idx

	DECLARE_VAR_SIGN

	DECLARE_NEW_X

	DECLARE_X_ARRAY

	return

__global__ void FloatM const FloatN const FloatN FloatM FloatM const FloatM *	mulink_even

__global__ void FloatM const FloatN const FloatN FloatM FloatM const FloatM const FloatM *	mulink_odd

FloatM	TEMPB0

FloatM	TEMPB1

FloatM	TEMPB2

FloatM	TEMPB3

FloatM	TEMPB4

FloatM	TEMPB5

FloatM	TEMPB6

FloatM	TEMPB7

FloatM	TEMPB8

	else

__global__ void const FloatN FloatM FloatM Float	coeff0

__global__ void const FloatN FloatM FloatM Float Float	coeff5

FloatM *	my_fatlink = fatlink_even

int	sid = blockIdx.x*blockDim.x + threadIdx.x

int	odd_bit = 0

	my_sitelink = sitelink_even

int	idx = sid

__constant__ double	DEV_HISQ_UNITARIZE_EPS

__constant__ double	DEV_HISQ_FORCE_FILTER

__constant__ double	DEV_MAX_DET_ERROR

__constant__ bool	DEV_REUNIT_ALLOW_SVD

__constant__ bool	DEV_REUNIT_SVD_ONLY

__constant__ double	DEV_REUNIT_SVD_REL_ERROR

__constant__ double	DEV_REUNIT_SVD_ABS_ERROR

__constant__ int	INPUT_PADDING =0

__constant__ int	OUTPUT_PADDING =0

__constant__ int	DEV_MAX_ITER = 20

__constant__ double	DEV_FL_MAX_ERROR

__constant__ double	DEV_FL_UNITARIZE_EPS

__constant__ bool	DEV_FL_REUNIT_ALLOW_SVD

__constant__ bool	DEV_FL_REUNIT_SVD_ONLY

__constant__ double	DEV_FL_REUNIT_SVD_REL_ERROR

__constant__ double	DEV_FL_REUNIT_SVD_ABS_ERROR

__constant__ bool	DEV_FL_CHECK_UNITARIZATION

Detailed Description

Generic Multi Shift Solver

For staggered, the mass is folded into the dirac operator Otherwise the matrix mass is 'unmodified'.

The lowest offset is in offsets[0]

Typedef Documentation

typedef std::complex<double> quda::Complex

Definition at line 13 of file color_spinor_field.h.

typedef struct quda::kernel_param_s quda::kernel_param_t

typedef struct quda::llfat_kernel_param_s quda::llfat_kernel_param_t

Enumeration Type Documentation

enum quda::AllocType

Enumerator:

DEVICE
HOST
PINNED
MAPPED
N_ALLOC_TYPE

Definition at line 10 of file malloc.cpp.

enum quda::QudaProfileType

Enumerator:

QUDA_PROFILE_H2D	host -> device transfers
QUDA_PROFILE_D2H	The time in seconds for device -> host transfers
QUDA_PROFILE_INIT	The time in seconds taken for initiation
QUDA_PROFILE_PREAMBLE	The time in seconds taken for any preamble
QUDA_PROFILE_COMPUTE	The time in seconds taken for the actual computation
QUDA_PROFILE_EPILOGUE	The time in seconds taken for any epilogue
QUDA_PROFILE_FREE	The time in seconds for freeing resources
QUDA_PROFILE_TOTAL	The total time in seconds for the algorithm. Must be the penultimate type.
QUDA_PROFILE_COUNT	The total number of timers we have. Must be last enum type.

Definition at line 126 of file quda_internal.h.

Function Documentation

template<class Real >

DEVICEHOST void quda::accumGivensRotation	(	int	index,
		const Real &	c,
		const Real &	s,
		Matrix< Real, 3 > &	m
	)

inline

Definition at line 136 of file svd_quda.h.

void quda::assertAllMemFree ( )

Definition at line 290 of file malloc.cpp.

template<class Real >

DEVICEHOST void quda::assignGivensRotation	(	const Real &	c,
		const Real &	s,
		Matrix< Real, 2 > &	m
	)

inline

Definition at line 150 of file svd_quda.h.

void quda::asymCloverDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const FullClover	cloverInv,
		const cudaColorSpinorField *	in,
		const int	oddBit,
		const int	daggerBit,
		const cudaColorSpinorField *	x,
		const double &	k,
		const int *	commDim
	)

Definition at line 1451 of file dslash_quda.cu.

void quda::axCpu	(	const double &	a,
		cpuColorSpinorField &	x
	)

Definition at line 60 of file blas_cpu.cpp.

void quda::axCuda	(	const double &	a,
		cudaColorSpinorField &	x
	)

Definition at line 166 of file blas_quda.cu.

template<typename Float >

void quda::axpby	(	const Float &	a,
		const Float *	x,
		const Float &	b,
		Float *	y,
		const int	N
	)

Definition at line 8 of file blas_cpu.cpp.

void quda::axpbyCpu	(	const double &	a,
		const cpuColorSpinorField &	x,
		const double &	b,
		cpuColorSpinorField &	y
	)

Definition at line 12 of file blas_cpu.cpp.

void quda::axpbyCuda	(	const double &	a,
		cudaColorSpinorField &	x,
		const double &	b,
		cudaColorSpinorField &	y
	)

Definition at line 83 of file blas_quda.cu.

void quda::axpyBzpcxCpu	(	const double &	a,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y,
		const double &	b,
		const cpuColorSpinorField &	z,
		const double &	c
	)

Definition at line 129 of file blas_cpu.cpp.

void quda::axpyBzpcxCuda	(	const double &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y,
		const double &	b,
		cudaColorSpinorField &	z,
		const double &	c
	)

Definition at line 304 of file blas_quda.cu.

Complex quda::axpyCGNormCuda	(	const double &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 496 of file reduce_quda.cu.

void quda::axpyCpu	(	const double &	a,
		const cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 31 of file blas_cpu.cpp.

void quda::axpyCuda	(	const double &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 116 of file blas_quda.cu.

double quda::axpyNormCpu	(	const double &	a,
		const cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 178 of file blas_cpu.cpp.

double quda::axpyNormCuda	(	const double &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 182 of file reduce_quda.cu.

void quda::axpyZpbxCpu	(	const double &	a,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y,
		const cpuColorSpinorField &	z,
		const double &	b
	)

Definition at line 136 of file blas_cpu.cpp.

void quda::axpyZpbxCuda	(	const double &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z,
		const double &	b
	)

Definition at line 324 of file blas_quda.cu.

void quda::backSubs	(	const Complex *	alpha,
		Complex **const	beta,
		const double *	gamma,
		Complex *	delta,
		int	n
	)

Definition at line 103 of file inv_gcr_quda.cpp.

template<class Real >

DEVICEHOST void quda::bdSVD	(	Matrix< Real, 3 > &	u,
		Matrix< Real, 3 > &	v,
		Matrix< Real, 3 > &	b,
		int	max_it
	)

Definition at line 409 of file svd_quda.h.

template<class Cmplx >

DEVICEHOST RealTypeId<Cmplx>::Type quda::cabs ( const Cmplx & z )

inline

Definition at line 20 of file svd_quda.h.

void quda::cabxpyAxCpu	(	const double &	a,
		const Complex &	b,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 259 of file blas_cpu.cpp.

void quda::cabxpyAxCuda	(	const double &	a,
		const Complex &	b,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 366 of file blas_quda.cu.

double quda::cabxpyAxNormCpu	(	const double &	a,
		const Complex &	b,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 283 of file blas_cpu.cpp.

double quda::cabxpyAxNormCuda	(	const double &	a,
		const Complex &	b,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 301 of file reduce_quda.cu.

template<typename Float >

void quda::caxpby	(	const std::complex< Float > &	a,
		const std::complex< Float > *	x,
		const std::complex< Float > &	b,
		std::complex< Float > *	y,
		int	N
	)

Definition at line 70 of file blas_cpu.cpp.

__device__ void quda::caxpby_	(	const float2 &	a,
		const float4 &	x,
		const float2 &	b,
		float4 &	y
	)

Functor to perform the operation y = a*x + b*y (complex-valued)

Definition at line 210 of file blas_quda.cu.

__device__ void quda::caxpby_	(	const float2 &	a,
		const float2 &	x,
		const float2 &	b,
		float2 &	y
	)

Definition at line 218 of file blas_quda.cu.

__device__ void quda::caxpby_	(	const double2 &	a,
		const double2 &	x,
		const double2 &	b,
		double2 &	y
	)

Definition at line 224 of file blas_quda.cu.

void quda::caxpbyCpu	(	const Complex &	a,
		const cpuColorSpinorField &	x,
		const Complex &	b,
		cpuColorSpinorField &	y
	)

Definition at line 92 of file blas_cpu.cpp.

void quda::caxpbyCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		const Complex &	b,
		cudaColorSpinorField &	y
	)

Definition at line 240 of file blas_quda.cu.

template<typename Float >

void quda::caxpbypcz	(	const std::complex< Float > &	a,
		const std::complex< Float > *	x,
		const std::complex< Float > &	b,
		const std::complex< Float > *	y,
		const std::complex< Float > &	c,
		std::complex< Float > *	z,
		int	N
	)

Definition at line 105 of file blas_cpu.cpp.

void quda::caxpbypczpwCpu	(	const Complex &	a,
		cpuColorSpinorField &	x,
		const Complex &	b,
		cpuColorSpinorField &	y,
		const Complex &	c,
		cpuColorSpinorField &	z,
		cpuColorSpinorField &	w
	)

Definition at line 295 of file blas_cpu.cpp.

void quda::caxpbypczpwCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		const Complex &	b,
		cudaColorSpinorField &	y,
		const Complex &	c,
		cudaColorSpinorField &	z,
		cudaColorSpinorField &	w
	)

Definition at line 409 of file blas_quda.cu.

void quda::caxpbypzCpu	(	const Complex &	a,
		cpuColorSpinorField &	x,
		const Complex &	b,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	z
	)

Definition at line 289 of file blas_cpu.cpp.

void quda::caxpbypzCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		const Complex &	b,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z
	)

Definition at line 387 of file blas_quda.cu.

double3 quda::caxpbypzYmbwcDotProductUYNormYCpu	(	const Complex &	a,
		const cpuColorSpinorField &	x,
		const Complex &	b,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	z,
		const cpuColorSpinorField &	w,
		const cpuColorSpinorField &	u
	)

Definition at line 250 of file blas_cpu.cpp.

double3 quda::caxpbypzYmbwcDotProductUYNormYCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		const Complex &	b,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z,
		cudaColorSpinorField &	w,
		cudaColorSpinorField &	u
	)

Definition at line 463 of file reduce_quda.cu.

void quda::caxpbypzYmbwCpu	(	const Complex &	a,
		const cpuColorSpinorField &	x,
		const Complex &	b,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	z,
		const cpuColorSpinorField &	w
	)

Definition at line 143 of file blas_cpu.cpp.

void quda::caxpbypzYmbwCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		const Complex &	b,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z,
		cudaColorSpinorField &	w
	)

Definition at line 346 of file blas_quda.cu.

__device__ void quda::caxpy_	(	const float2 &	a,
		const float4 &	x,
		float4 &	y
	)

Functor to perform the operation y += a * x (complex-valued)

Definition at line 175 of file blas_quda.cu.

__device__ void quda::caxpy_	(	const float2 &	a,
		const float2 &	x,
		float2 &	y
	)

Definition at line 182 of file blas_quda.cu.

__device__ void quda::caxpy_	(	const double2 &	a,
		const double2 &	x,
		double2 &	y
	)

Definition at line 187 of file blas_quda.cu.

__device__ void quda::Caxpy_	(	const float2 &	a,
		const float4 &	x,
		float4 &	y
	)

Functor to perform the operation y += a * x (complex-valued)

Definition at line 214 of file reduce_quda.cu.

__device__ void quda::Caxpy_	(	const float2 &	a,
		const float2 &	x,
		float2 &	y
	)

Definition at line 221 of file reduce_quda.cu.

__device__ void quda::Caxpy_	(	const double2 &	a,
		const double2 &	x,
		double2 &	y
	)

Definition at line 226 of file reduce_quda.cu.

void quda::caxpyCpu	(	const Complex &	a,
		const cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 79 of file blas_cpu.cpp.

void quda::caxpyCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 201 of file blas_quda.cu.

Complex quda::caxpyDotzyCpu	(	const Complex &	a,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	z
	)

Definition at line 303 of file blas_cpu.cpp.

Complex quda::caxpyDotzyCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z
	)

Definition at line 379 of file reduce_quda.cu.

double quda::caxpyNormCpu	(	const Complex &	a,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 264 of file blas_cpu.cpp.

double quda::caxpyNormCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 249 of file reduce_quda.cu.

void quda::caxpyXmazCpu	(	const Complex &	a,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	z
	)

Definition at line 277 of file blas_cpu.cpp.

void quda::caxpyXmazCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z
	)

Definition at line 432 of file blas_quda.cu.

double quda::caxpyXmazNormXCpu	(	const Complex &	a,
		cpuColorSpinorField &	x,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	z
	)

Definition at line 270 of file blas_cpu.cpp.

double quda::caxpyXmazNormXCuda	(	const Complex &	a,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z
	)

Definition at line 274 of file reduce_quda.cu.

__device__ double2 quda::cdot_	(	const double2 &	a,
		const double2 &	b
	)

Returns complex-valued dot product of x and y

Definition at line 310 of file reduce_quda.cu.

__device__ double2 quda::cdot_	(	const float2 &	a,
		const float2 &	b
	)

Definition at line 312 of file reduce_quda.cu.

__device__ double2 quda::cdot_	(	const float4 &	a,
		const float4 &	b
	)

Definition at line 314 of file reduce_quda.cu.

__device__ double3 quda::cdotNormA_	(	const double2 &	a,
		const double2 &	b
	)

First returns the dot product (x,y) Returns the norm of x

Definition at line 390 of file reduce_quda.cu.

__device__ double3 quda::cdotNormA_	(	const float2 &	a,
		const float2 &	b
	)

Definition at line 392 of file reduce_quda.cu.

__device__ double3 quda::cdotNormA_	(	const float4 &	a,
		const float4 &	b
	)

Definition at line 394 of file reduce_quda.cu.

__device__ double3 quda::cdotNormB_	(	const double2 &	a,
		const double2 &	b
	)

First returns the dot product (x,y) Returns the norm of y

Definition at line 420 of file reduce_quda.cu.

__device__ double3 quda::cdotNormB_	(	const float2 &	a,
		const float2 &	b
	)

Definition at line 422 of file reduce_quda.cu.

__device__ double3 quda::cdotNormB_	(	const float4 &	a,
		const float4 &	b
	)

Definition at line 424 of file reduce_quda.cu.

template<typename Float >

Complex quda::cDotProduct	(	const std::complex< Float > *	a,
		const std::complex< Float > *	b,
		const int	N
	)

Definition at line 211 of file blas_cpu.cpp.

Complex quda::cDotProductCpu	(	const cpuColorSpinorField &	a,
		const cpuColorSpinorField &	b
	)

Definition at line 217 of file blas_cpu.cpp.

Complex quda::cDotProductCuda	(	cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 329 of file reduce_quda.cu.

double3 quda::cDotProductNormACpu	(	const cpuColorSpinorField &	a,
		const cpuColorSpinorField &	b
	)

Definition at line 237 of file blas_cpu.cpp.

double3 quda::cDotProductNormACuda	(	cudaColorSpinorField &	a,
		cudaColorSpinorField &	b
	)

Definition at line 411 of file reduce_quda.cu.

double3 quda::cDotProductNormBCpu	(	const cpuColorSpinorField &	a,
		const cpuColorSpinorField &	b
	)

Definition at line 243 of file blas_cpu.cpp.

double3 quda::cDotProductNormBCuda	(	cudaColorSpinorField &	a,
		cudaColorSpinorField &	b
	)

Definition at line 440 of file reduce_quda.cu.

template<class Real >

__device__ __host__ bool quda::checkAbsoluteError	(	Real	a,
		Real	b,
		Real	epsilon
	)

inline

Definition at line 141 of file unitarize_links_quda.cu.

template<class Real >

__device__ __host__ bool quda::checkRelativeError	(	Real	a,
		Real	b,
		Real	epsilon
	)

inline

Definition at line 150 of file unitarize_links_quda.cu.

void quda::cloverCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const FullClover	clover,
		const cudaColorSpinorField *	in,
		const int	oddBit
	)

Definition at line 1781 of file dslash_quda.cu.

void quda::cloverDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const FullClover	cloverInv,
		const cudaColorSpinorField *	in,
		const int	oddBit,
		const int	daggerBit,
		const cudaColorSpinorField *	x,
		const double &	k,
		const int *	commDim
	)

Definition at line 1388 of file dslash_quda.cu.

void quda::collectGhostStaple	(	int *	X,
		void *	even,
		void *	odd,
		int	volume,
		QudaPrecision	precision,
		void *	ghost_staple_gpu,
		int	dir,
		int	whichway,
		cudaStream_t *	stream
	)

Definition at line 472 of file misc_helpers.cu.

template<int dir, int whichway, typename Float2 >

__global__ void quda::collectGhostStapleKernel	(	Float2 *	in,
		const int	oddBit,
		Float2 *	nbr_staple_gpu
	)

Definition at line 392 of file misc_helpers.cu.

template<class U , class V >

int quda::compareSpinor	(	const U &	u,
		const V &	v,
		const int	tol
	)

Definition at line 288 of file cpu_color_spinor_field.cpp.

quda::COMPUTE_RECONSTRUCT_SIGN	(	sign	,
		nu	,
		x1	,
		x2	,
		x3	,
		x4
	)

Initial value:

{               
    
    LOAD_EVEN_SITE_MATRIX(nu, mem_idx, A)

quda::COMPUTE_RECONSTRUCT_SIGN	(	sign	,
		mu	,
		new_x1	,
		new_x2	,
		new_x3	,
		new_x4
	)

quda::COMPUTE_RECONSTRUCT_SIGN	(	sign	,
		nu	,
		new_x1	,
		new_x2	,
		new_x3	,
		new_x4
	)

quda::COMPUTE_RECONSTRUCT_SIGN	(	sign	,
		nu	,
		(x1-2)	,
		(x2-2)	,
		(x3-2)	,
		(x4-2)
	)

Initial value:

{               
    
    LOAD_EVEN_SITE_MATRIX(nu, mem_idx, A)

quda::COMPUTE_RECONSTRUCT_SIGN	(	sign	,
		mu	,
		(new_x1-2)	,
		(new_x2-2)	,
		(new_x3-2)	,
		(new_x4-2)
	)

quda::COMPUTE_RECONSTRUCT_SIGN	(	sign	,
		nu	,
		(new_x1-2)	,
		(new_x2-2)	,
		(new_x3-2)	,
		(new_x4-2)
	)

void quda::computeCloverCuda	(	cudaCloverField &	clover,
		const cudaGaugeField &	gauge
	)

Definition at line 248 of file clover_quda.cu.

void quda::computeFatLinkCore	(	cudaGaugeField *	cudaSiteLink,
		double *	act_path_coeff,
		QudaGaugeParam *	qudaGaugeParam,
		QudaComputeFatMethod	method,
		cudaGaugeField *	cudaFatLink,
		struct timeval	time_array[]
	)

template<typename Cmplx >

__global__ void quda::computeFmunuKernel	(	Cmplx *	Fmunu,
		const Cmplx *	gauge,
		const CloverParam	param
	)

Construct the field-strength tensor field Fmunu First pass only supports no reconstruct for expediency

Parameters

Fmunu	- Pointer to field-strength tensor array. Result is stored here.
gauge	- Pointer to gauge field.
param	- CloverParam struct

Definition at line 41 of file clover_quda.cu.

void quda::computeGenStapleFieldParityKernel	(	void *	staple_even,
		void *	staple_odd,
		const void *	sitelink_even,
		const void *	sitelink_odd,
		void *	fatlink_even,
		void *	fatlink_odd,
		const void *	mulink_even,
		const void *	mulink_odd,
		int	mu,
		int	nu,
		int	save_staple,
		double	mycoeff,
		QudaReconstructType	recon,
		QudaPrecision	prec,
		dim3	halfGridDim,
		llfat_kernel_param_t	kparam,
		cudaStream_t *	stream
	)

Definition at line 856 of file llfat_quda.cu.

void quda::computeGenStapleFieldParityKernel_ex	(	void *	staple_even,
		void *	staple_odd,
		const void *	sitelink_even,
		const void *	sitelink_odd,
		void *	fatlink_even,
		void *	fatlink_odd,
		const void *	mulink_even,
		const void *	mulink_odd,
		int	mu,
		int	nu,
		int	save_staple,
		double	mycoeff,
		QudaReconstructType	recon,
		QudaPrecision	prec,
		llfat_kernel_param_t	kparam
	)

Definition at line 1013 of file llfat_quda.cu.

template<class Cmplx >

__device__ __host__ void quda::computeLinkInverse	(	Matrix< Cmplx, 3 > *	uinv,
		const Matrix< Cmplx, 3 > &	u
	)

inline

Definition at line 692 of file quda_matrix.h.

template<class T >

__device__ __host__ void quda::computeMatrixInverse	(	const Matrix< T, 3 > &	u,
		Matrix< T, 3 > *	uinv
	)

inline

Definition at line 438 of file quda_matrix.h.

template<class Cmplx >

DEVICEHOST void quda::computeSVD	(	const Matrix< Cmplx, 3 > &	m,
		Matrix< Cmplx, 3 > &	u,
		Matrix< Cmplx, 3 > &	v,
		typename RealTypeId< Cmplx >::Type	singular_values[3]
	)

Definition at line 637 of file svd_quda.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::conj ( const Cmplx & a )

inline

Definition at line 195 of file quda_matrix.h.

__device__ __host__ double quda::conj ( const double & a )

inline

Definition at line 200 of file quda_matrix.h.

__device__ __host__ float quda::conj ( const float & a )

inline

Definition at line 205 of file quda_matrix.h.

template<class T , int N>

__device__ __host__ Matrix<T,N> quda::conj ( const Matrix< T, N > & other )

inline

Definition at line 425 of file quda_matrix.h.

template<class T >

DEVICEHOST void quda::constructHHMat	(	const T &	tau,
		const Array< T, 3 > &	v,
		Matrix< T, 3 > &	hh
	)

Definition at line 82 of file svd_quda.h.

void quda::copyArrayToLink	(	Matrix< float2, 3 > *	link,
		float *	array
	)

inline

Definition at line 730 of file quda_matrix.h.

template<class Cmplx , class Real >

void quda::copyArrayToLink	(	Matrix< Cmplx, 3 > *	link,
		Real *	array
	)

inline

Definition at line 741 of file quda_matrix.h.

template<class T , int N>

__device__ __host__ void quda::copyColumn	(	const Matrix< T, N > &	m,
		int	c,
		Array< T, N > *	a
	)

inline

Definition at line 592 of file quda_matrix.h.

void quda::copyCuda	(	cudaColorSpinorField &	dst,
		const cudaColorSpinorField &	src
	)

Definition at line 234 of file copy_quda.cu.

void quda::copyLinkToArray	(	float *	array,
		const Matrix< float2, 3 > &	link
	)

inline

Definition at line 753 of file quda_matrix.h.

template<class Cmplx , class Real >

void quda::copyLinkToArray	(	Real *	array,
		const Matrix< Cmplx, 3 > &	link
	)

inline

Definition at line 765 of file quda_matrix.h.

void quda::createDirac	(	Dirac *&	d,
		Dirac *&	dSloppy,
		Dirac *&	dPre,
		QudaInvertParam &	param,
		const bool	pc_solve
	)

Definition at line 695 of file interface_quda.cpp.

void quda::createDslashEvents ( )

Definition at line 220 of file dslash_quda.cu.

__device__ void quda::cxpaypbz_	(	const float4 &	x,
		const float2 &	a,
		const float4 &	y,
		const float2 &	b,
		float4 &	z
	)

Functor to performs the operation z[i] = x[i] + a*y[i] + b*z[i]

Definition at line 249 of file blas_quda.cu.

__device__ void quda::cxpaypbz_	(	const float2 &	x,
		const float2 &	a,
		const float2 &	y,
		const float2 &	b,
		float2 &	z
	)

Definition at line 258 of file blas_quda.cu.

__device__ void quda::cxpaypbz_	(	const double2 &	x,
		const double2 &	a,
		const double2 &	y,
		const double2 &	b,
		double2 &	z
	)

Definition at line 265 of file blas_quda.cu.

void quda::cxpaypbzCpu	(	const cpuColorSpinorField &	x,
		const Complex &	b,
		const cpuColorSpinorField &	y,
		const Complex &	c,
		cpuColorSpinorField &	z
	)

Definition at line 115 of file blas_cpu.cpp.

void quda::cxpaypbzCuda	(	cudaColorSpinorField &	x,
		const Complex &	b,
		cudaColorSpinorField &	y,
		const Complex &	c,
		cudaColorSpinorField &	z
	)

Definition at line 283 of file blas_quda.cu.

void quda::destroyDslashEvents ( )

Definition at line 267 of file dslash_quda.cu.

void quda::device_free_	(	const char *	func,
		const char *	file,
		int	line,
		void *	ptr
	)

Free device memory allocated with device_malloc(). This function should only be called via the device_free() macro, defined in malloc_quda.h

void* quda::device_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Perform a standard cudaMalloc() with error-checking. This function should only be called via the device_malloc() macro, defined in malloc_quda.h

template<int sig_positive, int mu_positive, int oddBit, typename Float2 >

__global__ void quda::do_all_link_kernel	(	Float2 *	tempxEven,
		Float2 *	tempxOdd,
		Float2 *	PmuEven,
		Float2 *	PmuOdd,
		Float2 *	P3Even,
		Float2 *	P3Odd,
		Float2 *	P3muEven,
		Float2 *	P3muOdd,
		Float2 *	shortPEven,
		Float2 *	shortPOdd,
		int	sig,
		int	mu,
		Float2	coeff,
		Float2	mcoeff,
		Float2	accumu_coeff,
		float4 *	linkEven,
		float4 *	linkOdd,
		Float2 *	momEven,
		Float2 *	momOdd
	)

Definition at line 811 of file fermion_force_quda.cu.

template<int N, typename FloatN , typename Float2 >

__global__ void quda::do_link_format_cpu_to_gpu	(	FloatN *	dst,
		Float2 *	src,
		int	reconstruct,
		int	Vh,
		int	pad,
		int	ghostV,
		size_t	threads
	)

Definition at line 42 of file misc_helpers.cu.

template<int N, typename FloatN , typename Float2 >

__global__ void quda::do_link_format_cpu_to_gpu_milc	(	FloatN *	dst,
		Float2 *	src,
		int	reconstruct,
		int	Vh,
		int	pad,
		int	ghostV,
		size_t	threads
	)

Definition at line 102 of file misc_helpers.cu.

template<typename FloatN >

__global__ void quda::do_link_format_gpu_to_cpu	(	FloatN *	dst,
		FloatN *	src,
		int	Vh,
		int	stride
	)

Definition at line 320 of file misc_helpers.cu.

template<int sig_positive, int mu_positive, int oddBit, typename Float2 >

__global__ void quda::do_middle_link_kernel	(	Float2 *	tempxEven,
		Float2 *	tempxOdd,
		Float2 *	PmuEven,
		Float2 *	PmuOdd,
		Float2 *	P3Even,
		Float2 *	P3Odd,
		int	sig,
		int	mu,
		Float2	coeff,
		float4 *	linkEven,
		float4 *	linkOdd,
		Float2 *	momEven,
		Float2 *	momOdd
	)

Definition at line 427 of file fermion_force_quda.cu.

template<int oddBit, typename Float2 >

__global__ void quda::do_one_and_naik_terms_kernel	(	Float2 *	TempxEven,
		Float2 *	TempxOdd,
		Float2 *	PmuEven,
		Float2 *	PmuOdd,
		Float2 *	PnumuEven,
		Float2 *	PnumuOdd,
		int	mu,
		Float2	OneLink,
		Float2	Naik,
		Float2	mNaik,
		float4 *	linkEven,
		float4 *	linkOdd,
		Float2 *	momEven,
		Float2 *	momOdd
	)

Definition at line 1065 of file fermion_force_quda.cu.

template<int sig_positive, int mu_positive, int oddBit, typename Float2 >

__global__ void quda::do_side_link_kernel	(	Float2 *	P3Even,
		Float2 *	P3Odd,
		Float2 *	P3muEven,
		Float2 *	P3muOdd,
		Float2 *	TempxEven,
		Float2 *	TempxOdd,
		Float2 *	PmuEven,
		Float2 *	PmuOdd,
		Float2 *	shortPEven,
		Float2 *	shortPOdd,
		int	sig,
		int	mu,
		Float2	coeff,
		Float2	accumu_coeff,
		float4 *	linkEven,
		float4 *	linkOdd,
		Float2 *	momEven,
		Float2 *	momOdd
	)

Definition at line 633 of file fermion_force_quda.cu.

void quda::domainWallDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const cudaColorSpinorField *	in,
		const int	parity,
		const int	dagger,
		const cudaColorSpinorField *	x,
		const double &	m_f,
		const double &	k,
		const int *	commDim
	)

Definition at line 1567 of file dslash_quda.cu.

__device__ double quda::dot_	(	const double2 &	a,
		const double2 &	b
	)

Return the real dot product of x and y

Definition at line 143 of file reduce_quda.cu.

__device__ float quda::dot_	(	const float2 &	a,
		const float2 &	b
	)

Definition at line 144 of file reduce_quda.cu.

__device__ float quda::dot_	(	const float4 &	a,
		const float4 &	b
	)

Definition at line 145 of file reduce_quda.cu.

void quda::dslashCuda	(	DslashCuda &	dslash,
		const size_t	regSize,
		const int	parity,
		const int	dagger,
		const int	volume,
		const int *	faceVolumeCB
	)

Definition at line 1238 of file dslash_quda.cu.

void quda::endBlas ( void )

Definition at line 53 of file blas_quda.cu.

void quda::endReduce ( void )

Definition at line 75 of file reduce_quda.cu.

void quda::fermion_force_cuda	(	double	eps,
		double	weight1,
		double	weight2,
		void *	act_path_coeff,
		FullHw	cudaHw,
		cudaGaugeField &	cudaSiteLink,
		cudaGaugeField &	cudaMom,
		QudaGaugeParam *	param
	)

Definition at line 1387 of file fermion_force_quda.cu.

void quda::fermion_force_init_cuda ( QudaGaugeParam * param )

Definition at line 401 of file fermion_force_quda.cu.

void quda::fillInnerInvertParam	(	QudaInvertParam &	inner,
		const QudaInvertParam &	outer
	)

Definition at line 28 of file inv_gcr_quda.cpp.

quda::for ( )

Definition at line 1091 of file llfat_quda.cu.

void quda::gauge_force_cuda	(	cudaGaugeField &	cudaMom,
		double	eb3,
		cudaGaugeField &	cudaSiteLink,
		QudaGaugeParam *	param,
		int ***	input_path,
		int *	length,
		void *	path_coeff,
		int	num_paths,
		int	max_length
	)

Definition at line 334 of file gauge_force_quda.cu.

void quda::gauge_force_cuda_dir	(	cudaGaugeField &	cudaMom,
		const int	dir,
		const double	eb3,
		const cudaGaugeField &	cudaSiteLink,
		const QudaGaugeParam *	param,
		int **	input_path,
		const int *	length,
		const void *	path_coeff,
		const int	num_paths,
		const int	max_length
	)

Definition at line 279 of file gauge_force_quda.cu.

void quda::gauge_force_init_cuda	(	QudaGaugeParam *	param,
		int	max_length
	)

Definition at line 102 of file gauge_force_quda.cu.

template<int oddBit, typename Float2 , typename FloatN , typename Float >

__global__ void quda::GAUGE_FORCE_KERN_NAME	(	Float2 *	momEven,
		Float2 *	momOdd,
		const int	dir,
		const double	eb3,
		const FloatN *	linkEven,
		const FloatN *	linkOdd,
		const int *	input_path,
		const int *	length,
		const Float *	path_coeff,
		const int	num_paths,
		const kernel_param_t	kparam
	)

Definition at line 477 of file gauge_force_quda.cu.

template<class D , class S >

void quda::genericCopy	(	D &	dst,
		const S &	src
	)

Definition at line 201 of file cpu_color_spinor_field.cpp.

template<class T >

__device__ __host__ T quda::getAbsMin	(	const T *const	array,
		int	size
	)

Definition at line 129 of file unitarize_links_quda.cu.

cudaStream_t * quda::getBlasStream ( )

Definition at line 66 of file blas_quda.cu.

QudaTune quda::getBlasTuning ( )

Definition at line 64 of file blas_quda.cu.

QudaVerbosity quda::getBlasVerbosity ( )

Definition at line 65 of file blas_quda.cu.

template<class T >

__device__ __host__ T quda::getDeterminant ( const Matrix< T, 3 > & a )

inline

Definition at line 308 of file quda_matrix.h.

bool quda::getDslashLaunch ( )

template<class Real >

DEVICEHOST void quda::getGivensRotation	(	const Real &	alpha,
		const Real &	beta,
		Real &	c,
		Real &	s
	)

Definition at line 117 of file svd_quda.h.

bool quda::getKernelPackT ( )

Returns: Whether the T dimension is kernel packed or not

Definition at line 157 of file dslash_quda.cu.

template<class Real >

DEVICEHOST void quda::getLambdaMax	(	const Matrix< Real, 3 > &	b,
		Real &	lambda_max
	)

Definition at line 98 of file svd_quda.h.

DEVICEHOST float quda::getNorm ( const Array< float2, 3 > & a )

Definition at line 59 of file svd_quda.h.

DEVICEHOST double quda::getNorm ( const Array< double2, 3 > & a )

Definition at line 70 of file svd_quda.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::getPreciseInverse ( const Cmplx & z )

inline

Definition at line 214 of file quda_matrix.h.

template<class Cmplx >

DEVICEHOST void quda::getRealBidiagMatrix	(	const Matrix< Cmplx, 3 > &	mat,
		Matrix< Cmplx, 3 > &	u,
		Matrix< Cmplx, 3 > &	v
	)

Definition at line 286 of file svd_quda.h.

template<class T >

__device__ __host__ T quda::getTrace ( const Matrix< T, 3 > & a )

inline

Definition at line 301 of file quda_matrix.h.

template<class Cmplx >

__global__ void quda::getUnitarizedField	(	const Cmplx *	inlink_even,
		const Cmplx *	inlink_odd,
		Cmplx *	outlink_even,
		Cmplx *	outlink_odd,
		int *	num_failures,
		const int	threads
	)

Definition at line 350 of file unitarize_links_quda.cu.

template<typename Float >

double3 quda::HeavyQuarkResidualNorm	(	const Float *	x,
		const Float *	r,
		const int	volume,
		const int	Nint
	)

Definition at line 310 of file blas_cpu.cpp.

double3 quda::HeavyQuarkResidualNormCpu	(	cpuColorSpinorField &	x,
		cpuColorSpinorField &	r
	)

Definition at line 331 of file blas_cpu.cpp.

double3 quda::HeavyQuarkResidualNormCpu	(	cpuColorSpinorField &	x,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	r
	)

Definition at line 350 of file blas_cpu.cpp.

double3 quda::HeavyQuarkResidualNormCuda	(	cudaColorSpinorField &	x,
		cudaColorSpinorField &	r
	)

Definition at line 591 of file reduce_quda.cu.

void quda::host_free_	(	const char *	func,
		const char *	file,
		int	line,
		void *	ptr
	)

Free host memory allocated with safe_malloc(), pinned_malloc(), or mapped_malloc(). This function should only be called via the host_free() macro, defined in malloc_quda.h

quda::if ( kparam. kernel_type = =LLFAT_EXTERIOR_KERNEL_FWD_X &&x1!=X1m1 )

Definition at line 890 of file llfat_quda.cu.

quda::if ( save_staple )

Definition at line 976 of file llfat_quda.cu.

quda::if ( mem_idx >= Vh )

Definition at line 1070 of file llfat_quda.cu.

quda::if ( mem_idx >=kparam. threads )

quda::if ( ! x1==1||x1==X1+2||x2==1||x2==X2+2||x3==1||x3==X3+2||x4==1||x4==X4+2 )

Definition at line 1226 of file llfat_quda.cu.

quda::if ( sid >=2 *kparam. threads )

quda::if ( idx >=kparam. threads )

Definition at line 1382 of file llfat_quda.cu.

template<int N>

__device__ __host__ int quda::index	(	int	i,
		int	j
	)

inline

Definition at line 280 of file quda_matrix.h.

void quda::initBlas ( )

Definition at line 47 of file blas_quda.cu.

void quda::initCloverConstants ( const cudaCloverField & clover )

void quda::initDslashCommsPattern ( )

Initialize the arrays used for the dynamic scheduling.

Definition at line 1209 of file dslash_quda.cu.

void quda::initDslashConstants ( )

Definition at line 362 of file dslash_constants.h.

void quda::initGaugeConstants ( const cudaGaugeField & gauge )

void quda::initLatticeConstants ( const LatticeField & lat )

void quda::initReduce ( )

Definition at line 45 of file reduce_quda.cu.

void quda::initSpinorConstants ( const cudaColorSpinorField & spinor )

void quda::initStaggeredConstants	(	const cudaGaugeField &	fatgauge,
		const cudaGaugeField &	longgauge
	)

void quda::initTwistedMassConstants ( const int flv_stride )

ndeg tm:

Definition at line 402 of file dslash_constants.h.

template<class Cmplx >

__device__ __host__ bool quda::isUnitarizedLinkConsistent	(	const Matrix< Cmplx, 3 > &	initial_matrix,
		const Matrix< Cmplx, 3 > &	unitary_matrix,
		double	max_error
	)

Definition at line 73 of file unitarize_links_quda.cu.

bool quda::isUnitary	(	const QudaGaugeParam &	param,
		cpuGaugeField &	field,
		double	max_error
	)

Definition at line 517 of file unitarize_links_quda.cu.

template<class Cmplx >

__device__ __host__ bool quda::isUnitary	(	const Matrix< Cmplx, 3 > &	matrix,
		double	max_error
	)

Definition at line 53 of file unitarize_links_quda.cu.

void quda::link_format_cpu_to_gpu	(	void *	dst,
		void *	src,
		int	reconstruct,
		int	Vh,
		int	pad,
		int	ghostV,
		QudaPrecision	prec,
		QudaGaugeFieldOrder	cpu_order,
		cudaStream_t	stream
	)

Definition at line 143 of file misc_helpers.cu.

void quda::link_format_gpu_to_cpu	(	void *	dst,
		void *	src,
		int	Vh,
		int	stride,
		QudaPrecision	prec,
		cudaStream_t	stream
	)

Definition at line 345 of file misc_helpers.cu.

__device__ int quda::linkIndex	(	int	x[],
		int	dx[],
		const CloverParam &	param
	)

inline

linkIndex computes the spacetime index of the link with coordinate y = x + dx.

Parameters

x	- coordinate in spacetime
dx	- coordinate offsets in spacetime
param	- CloverParam struct

Definition at line 25 of file clover_quda.cu.

quda::LLFAT_ADD_SU3_MATRIX	(	b	,
		staple	,
		staple
	)

quda::LLFAT_ADD_SU3_MATRIX	(	a	,
		staple	,
		staple
	)

quda::LLFAT_COMPUTE_NEW_IDX_LOWER_STAPLE	(	nu	,
		mu
	)

quda::LLFAT_COMPUTE_NEW_IDX_LOWER_STAPLE_EX	(	nu	,
		mu
	)

quda::LLFAT_COMPUTE_NEW_IDX_MINUS	(	nu	,
		X
	)

quda::LLFAT_COMPUTE_NEW_IDX_MINUS_EX	(	nu	,
		X
	)

quda::LLFAT_COMPUTE_NEW_IDX_PLUS	(	nu	,
		X
	)

quda::LLFAT_COMPUTE_NEW_IDX_PLUS	(	mu	,
		X
	)

quda::LLFAT_COMPUTE_NEW_IDX_PLUS_EX	(	nu	,
		X
	)

quda::LLFAT_COMPUTE_NEW_IDX_PLUS_EX	(	mu	,
		X
	)

void quda::llfat_cuda	(	cudaGaugeField &	cudaFatLink,
		cudaGaugeField &	cudaSiteLink,
		cudaGaugeField &	cudaStaple,
		cudaGaugeField &	cudaStaple1,
		QudaGaugeParam *	param,
		double *	act_path_coeff
	)

Definition at line 22 of file llfat_quda_itf.cpp.

void quda::llfat_cuda_ex	(	cudaGaugeField &	cudaFatLink,
		cudaGaugeField &	cudaSiteLink,
		cudaGaugeField &	cudaStaple,
		cudaGaugeField &	cudaStaple1,
		QudaGaugeParam *	param,
		double *	act_path_coeff
	)

Definition at line 249 of file llfat_quda_itf.cpp.

void quda::llfat_init_cuda ( QudaGaugeParam * param )

Definition at line 197 of file llfat_quda.cu.

void quda::llfat_init_cuda_ex ( QudaGaugeParam * param_ex )

Definition at line 246 of file llfat_quda.cu.

template<int mu, int nu, int odd_bit>

__global__ void quda::LLFAT_KERNEL	(	do_siteComputeGenStapleParity	,
		RECONSTRUCT
	)

template<int mu, int nu, int odd_bit, int save_staple>

__global__ void quda::LLFAT_KERNEL	(	do_computeGenStapleFieldParity	,
		RECONSTRUCT
	)

__global__ void quda::LLFAT_KERNEL	(	llfatOneLink	,
		RECONSTRUCT
	)		const

template<int mu, int nu, int odd_bit>

__global__ void quda::LLFAT_KERNEL_EX	(	do_siteComputeGenStapleParity	,
		RECONSTRUCT
	)

template<int mu, int nu, int odd_bit, int save_staple>

__global__ void quda::LLFAT_KERNEL_EX	(	do_computeGenStapleFieldParity	,
		RECONSTRUCT
	)

__global__ void quda::LLFAT_KERNEL_EX	(	llfatOneLink	,
		RECONSTRUCT
	)		const

void quda::llfatOneLinkKernel	(	cudaGaugeField &	cudaFatLink,
		cudaGaugeField &	cudaSiteLink,
		cudaGaugeField &	cudaStaple,
		cudaGaugeField &	cudaStaple1,
		QudaGaugeParam *	param,
		double *	act_path_coeff
	)

Definition at line 1100 of file llfat_quda.cu.

void quda::llfatOneLinkKernel_ex	(	cudaGaugeField &	cudaFatLink,
		cudaGaugeField &	cudaSiteLink,
		cudaGaugeField &	cudaStaple,
		cudaGaugeField &	cudaStaple1,
		QudaGaugeParam *	param,
		double *	act_path_coeff,
		llfat_kernel_param_t	kparam
	)

Definition at line 1141 of file llfat_quda.cu.

quda::LOAD_EVEN_FAT_MATRIX	(	mu	,
		mem_idx
	)

quda::LOAD_EVEN_SITE_MATRIX	(	nu	,
		new_mem_idx	,
		C
	)

quda::LOAD_ODD_MULINK_MATRIX	(	0	,
		new_mem_idx	,
		BB
	)

quda::LOAD_ODD_SITE_MATRIX	(	mu	,
		new_mem_idx	,
		B
	)

quda::LOAD_ODD_SITE_MATRIX	(	nu	,
		new_mem_idx	,
		C
	)

quda::LOAD_ODD_SITE_MATRIX	(	nu	,
		(new_mem_idx)	,
		A
	)

Initial value:

{
    
    LLFAT_COMPUTE_NEW_IDX_MINUS_EX(nu,X)

quda::LOAD_ODD_SITE_MATRIX	(	mu	,
		(new_mem_idx)	,
		B
	)

quda::LOAD_ODD_SITE_MATRIX	(	nu	,
		new_mem_idx	,
		A
	)

Initial value:

{
    
    LLFAT_COMPUTE_NEW_IDX_MINUS_EX(nu, X)

void quda::loadLinkToGPU	(	cudaGaugeField *	cudaGauge,
		cpuGaugeField *	cpuGauge,
		QudaGaugeParam *	param
	)

void quda::loadLinkToGPU_ex	(	cudaGaugeField *	cudaGauge,
		cpuGaugeField *	cpuGauge
	)

void quda::loadLinkToGPU_gf	(	cudaGaugeField *	cudaGauge,
		cpuGaugeField *	cpuGauge,
		QudaGaugeParam *	param
	)

template<class T >

__device__ void quda::loadLinkVariableFromArray	(	const T *const	array,
		int	dir,
		int	idx,
		int	stride,
		Matrix< T, 3 > *	link
	)

inline

Definition at line 638 of file quda_matrix.h.

__device__ void quda::loadLinkVariableFromArray	(	const float2 *const	array,
		int	dir,
		int	idx,
		int	stride,
		Matrix< double2, 3 > *	link
	)

inline

Definition at line 648 of file quda_matrix.h.

template<typename Float , typename Float2 >

void quda::loadMomField	(	Float2 *	even,
		Float2 *	odd,
		Float *	mom,
		int	bytes,
		int	Vh,
		int	pad,
		void *	buffer
	)

Definition at line 187 of file cuda_gauge_field.cpp.

void quda::loadTuneCache ( QudaVerbosity verbosity )

Definition at line 104 of file tune.cpp.

template<class Cmplx >

__device__ __host__ Cmplx quda::makeComplex	(	const typename RealTypeId< Cmplx >::Type &	a,
		const typename RealTypeId< Cmplx >::Type &	b
	)

inline

Definition at line 107 of file quda_matrix.h.

__device__ __host__ double2 quda::makeComplex	(	const double &	a,
		const double &	b
	)

inline

Definition at line 116 of file quda_matrix.h.

__device__ __host__ float2 quda::makeComplex	(	const float &	a,
		const float &	b
	)

inline

Definition at line 121 of file quda_matrix.h.

void* quda::mapped_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Allocate page-locked ("pinned") host memory, and map it into the GPU address space. This function should only be called via the mapped_malloc() macro, defined in malloc_quda.h

void quda::massRescale	(	QudaDslashType	dslash_type,
		double &	kappa,
		QudaSolutionType	solution_type,
		QudaMassNormalization	mass_normalization,
		cudaColorSpinorField &	b
	)

Definition at line 710 of file interface_quda.cpp.

void quda::massRescaleCoeff	(	QudaDslashType	dslash_type,
		double &	kappa,
		QudaSolutionType	solution_type,
		QudaMassNormalization	mass_normalization,
		double &	coeff
	)

Definition at line 769 of file interface_quda.cpp.

quda::MULT_SU3_AN	(	a	,
		b	,
		tempa
	)

quda::MULT_SU3_AN	(	a	,
		bb	,
		tempa
	)

quda::MULT_SU3_NA	(	tempa	,
		c	,
		staple
	)

quda::MULT_SU3_NN	(	a	,
		b	,
		tempa
	)

quda::MULT_SU3_NN	(	tempa	,
		c	,
		b
	)

quda::MULT_SU3_NN	(	a	,
		bb	,
		tempa
	)

quda::MULT_SU3_NN	(	tempa	,
		c	,
		a
	)

void quda::mxpyCpu	(	const cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 51 of file blas_cpu.cpp.

void quda::mxpyCuda	(	cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 149 of file blas_quda.cu.

template<typename Float >

double quda::norm	(	const Float *	a,
		const int	N
	)

Definition at line 160 of file blas_cpu.cpp.

double quda::norm2 ( const ColorSpinorField & a )

Definition at line 315 of file color_spinor_field.cpp.

double quda::norm2 ( const cudaGaugeField & a )

This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L2 norm.

Parameters

a	The gauge field that we want the norm of

Returns: The L2 norm squared of the gauge field

Definition at line 530 of file cuda_gauge_field.cpp.

__device__ double quda::norm2_ ( const double2 & a )

Return the L2 norm of x

Definition at line 118 of file reduce_quda.cu.

__device__ float quda::norm2_ ( const float2 & a )

Definition at line 119 of file reduce_quda.cu.

__device__ float quda::norm2_ ( const float4 & a )

Definition at line 120 of file reduce_quda.cu.

double quda::normCpu ( const cpuColorSpinorField & b )

Definition at line 166 of file blas_cpu.cpp.

double quda::normCuda ( const cudaColorSpinorField & b )

Definition at line 134 of file reduce_quda.cu.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator*	(	const Cmplx &	a,
		const typename RealTypeId< Cmplx >::Type &	scalar
	)

inline

Definition at line 146 of file quda_matrix.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator*	(	const typename RealTypeId< Cmplx >::Type &	scalar,
		const Cmplx &	b
	)

inline

Definition at line 183 of file quda_matrix.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator*	(	const Cmplx &	a,
		const Cmplx &	b
	)

inline

Definition at line 189 of file quda_matrix.h.

template<class T , int N, class S >

__device__ __host__ Matrix<T,N> quda::operator*	(	const S &	scalar,
		const Matrix< T, N > &	a
	)

inline

Definition at line 353 of file quda_matrix.h.

template<class T , int N, class S >

__device__ __host__ Matrix<T,N> quda::operator*	(	const Matrix< T, N > &	a,
		const S &	scalar
	)

inline

Definition at line 363 of file quda_matrix.h.

template<class T >

__device__ __host__ Matrix<T,3> quda::operator*	(	const Matrix< T, 3 > &	a,
		const Matrix< T, 3 > &	b
	)

inline

Definition at line 371 of file quda_matrix.h.

template<class T , class U >

__device__ __host__ Matrix<typename PromoteTypeId<T,U>::Type,3> quda::operator*	(	const Matrix< T, 3 > &	a,
		const Matrix< U, 3 > &	b
	)

inline

Definition at line 393 of file quda_matrix.h.

template<class T >

__device__ __host__ Matrix<T,2> quda::operator*	(	const Matrix< T, 2 > &	a,
		const Matrix< T, 2 > &	b
	)

inline

Definition at line 412 of file quda_matrix.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator+	(	const Cmplx &	a,
		const Cmplx &	b
	)

inline

Definition at line 135 of file quda_matrix.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator+	(	const Cmplx &	a,
		const typename RealTypeId< Cmplx >::Type &	scalar
	)

inline

Definition at line 158 of file quda_matrix.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator+	(	const typename RealTypeId< Cmplx >::Type &	scalar,
		const Cmplx &	a
	)

inline

Definition at line 164 of file quda_matrix.h.

template<class T , int N>

__device__ __host__ Matrix<T,N> quda::operator+	(	const Matrix< T, N > &	a,
		const Matrix< T, N > &	b
	)

inline

Definition at line 319 of file quda_matrix.h.

template<class Cmplx >

__device__ __host__ Cmplx& quda::operator+=	(	Cmplx &	a,
		const Cmplx &	b
	)

inline

Definition at line 127 of file quda_matrix.h.

template<class T , int N>

__device__ __host__ Matrix<T,N> quda::operator+=	(	Matrix< T, N > &	a,
		const Matrix< T, N > &	b
	)

inline

Definition at line 330 of file quda_matrix.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator-	(	const Cmplx &	a,
		const Cmplx &	b
	)

inline

Definition at line 140 of file quda_matrix.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator-	(	const Cmplx &	a,
		const typename RealTypeId< Cmplx >::Type &	scalar
	)

inline

Definition at line 170 of file quda_matrix.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator-	(	const typename RealTypeId< Cmplx >::Type &	scalar,
		const Cmplx &	a
	)

inline

Definition at line 176 of file quda_matrix.h.

template<class T , int N>

__device__ __host__ Matrix<T,N> quda::operator-	(	const Matrix< T, N > &	a,
		const Matrix< T, N > &	b
	)

inline

Definition at line 341 of file quda_matrix.h.

template<class Cmplx >

__device__ __host__ Cmplx quda::operator/	(	const Cmplx &	a,
		const typename RealTypeId< Cmplx >::Type &	scalar
	)

inline

Definition at line 152 of file quda_matrix.h.

std::ostream & quda::operator<<	(	std::ostream &	output,
		const LatticeFieldParam &	param
	)

Definition at line 75 of file lattice_field.cpp.

std::ostream & quda::operator<<	(	std::ostream &	output,
		const GaugeFieldParam &	param
	)

Definition at line 48 of file gauge_field.cpp.

std::ostream& quda::operator<<	(	std::ostream &	os,
		const float2 &	z
	)

inline

Definition at line 223 of file quda_matrix.h.

std::ostream& quda::operator<<	(	std::ostream &	os,
		const double2 &	z
	)

inline

Definition at line 228 of file quda_matrix.h.

std::ostream& quda::operator<<	(	std::ostream &	out,
		const ColorSpinorField &	a
	)

Definition at line 329 of file color_spinor_field.cpp.

template<class T , int N>

std::ostream& quda::operator<<	(	std::ostream &	os,
		const Matrix< T, N > &	m
	)

Definition at line 616 of file quda_matrix.h.

template<class T , int N>

std::ostream& quda::operator<<	(	std::ostream &	os,
		const Array< T, N > &	a
	)

Definition at line 628 of file quda_matrix.h.

std::ostream& quda::operator<<	(	std::ostream &	out,
		const cudaColorSpinorField &	a
	)

Definition at line 767 of file cuda_color_spinor_field.cu.

void quda::orthoDir	(	Complex **	beta,
		cudaColorSpinorField *	Ap[],
		int	k
	)

Definition at line 50 of file inv_gcr_quda.cpp.

template<class T , int N>

__device__ __host__ void quda::outerProd	(	const Array< T, N > &	a,
		const Array< T, N > &	b,
		Matrix< T, N > *	m
	)

inline

Definition at line 603 of file quda_matrix.h.

void quda::pack_gauge_diag	(	void *	buf,
		int *	X,
		void **	sitelink,
		int	nu,
		int	mu,
		int	dir1,
		int	dir2,
		QudaPrecision	prec
	)

void quda::pack_ghost_all_links	(	void **	cpuLink,
		void **	cpuGhostBack,
		void **	cpuGhostFwd,
		int	dir,
		int	nFace,
		QudaPrecision	precision,
		int *	X
	)

void quda::pack_ghost_all_staples_cpu	(	void *	staple,
		void **	cpuGhostStapleBack,
		void **	cpuGhostStapleFwd,
		int	nFace,
		QudaPrecision	precision,
		int *	X
	)

void quda::packFace	(	void *	ghost_buf,
		cudaColorSpinorField &	in,
		const int	dagger,
		const int	parity,
		const cudaStream_t &	stream
	)

template<typename Float >

void quda::packGhost	(	Float **	ghost,
		const Float **	gauge,
		const int	nFace,
		const int *	X,
		const int	volumeCB,
		const int *	surfaceCB,
		const QudaGaugeFieldOrder	order
	)

Definition at line 108 of file cpu_gauge_field.cpp.

void quda::packGhostStaple	(	int *	X,
		void *	even,
		void *	odd,
		int	volume,
		QudaPrecision	prec,
		int	stride,
		int	dir,
		int	whichway,
		void **	fwd_nbr_buf_gpu,
		void **	back_nbr_buf_gpu,
		void **	fwd_nbr_buf,
		void **	back_nbr_buf,
		cudaStream_t *	stream
	)

void* quda::pinned_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Allocate page-locked ("pinned") host memory. This function should only be called via the pinned_malloc() macro, defined in malloc_quda.h

Note that we do rely on cudaHostAlloc(), since buffers allocated in this way have been observed to cause problems when shared with MPI via GPU Direct on some systems.

template<class T >

void quda::point	(	T &	t,
		const int	x,
		const int	s,
		const int	c
	)

Definition at line 260 of file cpu_color_spinor_field.cpp.

template<class Order >

void quda::print_vector	(	const Order &	o,
		unsigned int	x
	)

Definition at line 352 of file cpu_color_spinor_field.cpp.

template<class Cmplx >

__host__ __device__ void quda::printLink ( const Matrix< Cmplx, 3 > & link )

inline

Definition at line 780 of file quda_matrix.h.

void quda::printPeakMemUsage ( )

Definition at line 282 of file malloc.cpp.

template<class T , class U >

DEVICEHOST PromoteTypeId<T,U>::Type quda::quadSum	(	const T &	a,
		const U &	b
	)

inline

Definition at line 39 of file svd_quda.h.

template<class T >

void quda::random ( T & t )

Definition at line 246 of file cpu_color_spinor_field.cpp.

template<class Cmplx >

__device__ __host__ bool quda::reciprocalRoot	(	const Matrix< Cmplx, 3 > &	q,
		Matrix< Cmplx, 3 > *	res
	)

Definition at line 163 of file unitarize_links_quda.cu.

quda::RECONSTRUCT_SITE_LINK	(	sign	,
		a
	)

quda::RECONSTRUCT_SITE_LINK	(	sign	,
		b
	)

quda::RECONSTRUCT_SITE_LINK	(	sign	,
		c
	)

template<typename Float >

double quda::reDotProduct	(	const Float *	a,
		const Float *	b,
		const int	N
	)

Definition at line 185 of file blas_cpu.cpp.

double quda::reDotProductCpu	(	const cpuColorSpinorField &	a,
		const cpuColorSpinorField &	b
	)

Definition at line 191 of file blas_cpu.cpp.

double quda::reDotProductCuda	(	cudaColorSpinorField &	a,
		cudaColorSpinorField &	b
	)

Definition at line 159 of file reduce_quda.cu.

double quda::resNorm	(	const DiracMatrix &	mat,
		cudaColorSpinorField &	b,
		cudaColorSpinorField &	x
	)

Definition at line 22 of file inv_bicgstab_quda.cpp.

void* quda::safe_malloc_	(	const char *	func,
		const char *	file,
		int	line,
		size_t	size
	)

Perform a standard malloc() with error-checking. This function should only be called via the safe_malloc() macro, defined in malloc_quda.h

void quda::saveTuneCache ( QudaVerbosity verbosity )

Write tunecache to disk.

void quda::setBlasParam	(	int	kernel,
		int	prec,
		int	threads,
		int	blocks
	)

void quda::setBlasTuning	(	QudaTune	tune,
		QudaVerbosity	verbose
	)

Definition at line 58 of file blas_quda.cu.

void quda::setDiracParam	(	DiracParam &	diracParam,
		QudaInvertParam *	inv_param,
		bool	pc
	)

Definition at line 608 of file interface_quda.cpp.

void quda::setDiracPreParam	(	DiracParam &	diracParam,
		QudaInvertParam *	inv_param,
		const bool	pc
	)

Definition at line 680 of file interface_quda.cpp.

void quda::setDiracSloppyParam	(	DiracParam &	diracParam,
		QudaInvertParam *	inv_param,
		bool	pc
	)

Definition at line 664 of file interface_quda.cpp.

void quda::setDslashTuning	(	QudaTune	tune,
		QudaVerbosity	verbose
	)

Parameters

tune	Sets whether to tune the dslash kernels or not
verbose	The verbosity level to use in the dslash tuning functions

Definition at line 145 of file dslash_quda.cu.

void quda::setFace ( const FaceBuffer & face )

Definition at line 215 of file dslash_quda.cu.

template<class T , int N>

__device__ __host__ void quda::setIdentity ( Matrix< T, N > * m )

inline

Definition at line 480 of file quda_matrix.h.

template<int N>

__device__ __host__ void quda::setIdentity ( Matrix< float2, N > * m )

inline

Definition at line 494 of file quda_matrix.h.

template<int N>

__device__ __host__ void quda::setIdentity ( Matrix< double2, N > * m )

inline

Definition at line 508 of file quda_matrix.h.

void quda::setKernelPackT ( bool pack )

Parameters

pack	Sets whether to use a kernel to pack the T dimension

Definition at line 155 of file dslash_quda.cu.

void quda::setTwistParam	(	double &	a,
		double &	b,
		const double &	kappa,
		const double &	mu,
		const int	dagger,
		const QudaTwistGamma5Type	twist
	)

Definition at line 840 of file dslash_quda.cu.

void quda::setUnitarizeLinksConstants	(	double	unitarize_eps,
		double	max_error,
		bool	allow_svd,
		bool	svd_only,
		double	svd_rel_error,
		double	svd_abs_error,
		bool	check_unitarization = `true`
	)

Definition at line 93 of file unitarize_links_quda.cu.

void quda::setUnitarizeLinksPadding	(	int	input_padding,
		int	output_padding
	)

Definition at line 43 of file unitarize_links_quda.cu.

template<class T , int N>

__device__ __host__ void quda::setZero ( Matrix< T, N > * m )

inline

Definition at line 523 of file quda_matrix.h.

template<int N>

__device__ __host__ void quda::setZero ( Matrix< float2, N > * m )

inline

Definition at line 536 of file quda_matrix.h.

template<int N>

__device__ __host__ void quda::setZero ( Matrix< double2, N > * m )

inline

Definition at line 549 of file quda_matrix.h.

void quda::siteComputeGenStapleParityKernel	(	void *	staple_even,
		void *	staple_odd,
		const void *	sitelink_even,
		const void *	sitelink_odd,
		void *	fatlink_even,
		void *	fatlink_odd,
		int	mu,
		int	nu,
		double	mycoeff,
		QudaReconstructType	recon,
		QudaPrecision	prec,
		dim3	halfGridDim,
		llfat_kernel_param_t	kparam,
		cudaStream_t *	stream
	)

Definition at line 783 of file llfat_quda.cu.

void quda::siteComputeGenStapleParityKernel_ex	(	void *	staple_even,
		void *	staple_odd,
		const void *	sitelink_even,
		const void *	sitelink_odd,
		void *	fatlink_even,
		void *	fatlink_odd,
		int	mu,
		int	nu,
		double	mycoeff,
		QudaReconstructType	recon,
		QudaPrecision	prec,
		llfat_kernel_param_t	kparam
	)

Definition at line 937 of file llfat_quda.cu.

template<class Real >

DEVICEHOST void quda::smallSVD	(	Matrix< Real, 2 > &	u,
		Matrix< Real, 2 > &	v,
		Matrix< Real, 2 > &	m
	)

inline

Definition at line 169 of file svd_quda.h.

void quda::staggeredDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	fatGauge,
		const cudaGaugeField &	longGauge,
		const cudaColorSpinorField *	in,
		const int	parity,
		const int	dagger,
		const cudaColorSpinorField *	x,
		const double &	k,
		const int *	commDim
	)

NEW:extra argument.

Definition at line 1626 of file dslash_quda.cu.

void quda::storeLinkToCPU	(	cpuGaugeField *	cpuGauge,
		cudaGaugeField *	cudaGauge,
		QudaGaugeParam *	param
	)

template<typename Float , typename Float2 >

void quda::storeMomToCPUArray	(	Float *	mom,
		Float2 *	even,
		Float2 *	odd,
		int	bytes,
		int	V,
		int	pad,
		void *	buffer
	)

Definition at line 389 of file cuda_gauge_field.cpp.

template<class Real >

DEVICEHOST void quda::swap	(	Real &	a,
		Real &	b
	)

inline

Definition at line 160 of file svd_quda.h.

double quda::timeInterval	(	struct timeval	start,
		struct timeval	end
	)

Definition at line 21 of file inv_gcr_quda.cpp.

template<typename Float >

void quda::transpose	(	Float *	gT,
		const Float *	g
	)

inline

Definition at line 96 of file cpu_gauge_field.cpp.

double3 quda::tripleCGReductionCuda	(	cudaColorSpinorField &	x,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z
	)

Definition at line 625 of file reduce_quda.cu.

void quda::tripleCGUpdateCuda	(	const double &	alpha,
		const double &	beta,
		cudaColorSpinorField &	q,
		cudaColorSpinorField &	r,
		cudaColorSpinorField &	x,
		cudaColorSpinorField &	p
	)

Definition at line 455 of file blas_quda.cu.

TuneParam quda::tuneLaunch	(	Tunable &	tunable,
		QudaTune	enabled,
		QudaVerbosity	verbosity
	)

Return the optimal launch parameters for a given kernel, either by retrieving them from tunecache or autotuning on the spot.

void quda::twistedMassDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const cudaColorSpinorField *	in,
		const int	parity,
		const int	dagger,
		const cudaColorSpinorField *	x,
		const double &	kappa,
		const double &	mu,
		const double &	epsilon,
		const int *	commDim
	)

ndeg tm:

Definition at line 1513 of file dslash_quda.cu.

void quda::twistGamma5Cuda	(	cudaColorSpinorField *	out,
		const cudaColorSpinorField *	in,
		const int	dagger,
		const double &	kappa,
		const double &	mu,
		const double &	epsilon,
		const QudaTwistGamma5Type	twist
	)

ndeg tm:

template<class Cmplx >

__host__ __device__ bool quda::unitarizeLinkMILC	(	const Matrix< Cmplx, 3 > &	in,
		Matrix< Cmplx, 3 > *const	result
	)

Definition at line 256 of file unitarize_links_quda.cu.

template<class Cmplx >

__host__ __device__ bool quda::unitarizeLinkNewton	(	const Matrix< Cmplx, 3 > &	in,
		Matrix< Cmplx, 3 > *const	result
	)

Definition at line 315 of file unitarize_links_quda.cu.

void quda::unitarizeLinksCPU	(	const QudaGaugeParam &	param,
		cpuGaugeField &	infield,
		cpuGaugeField *	outfield
	)

Definition at line 495 of file unitarize_links_quda.cu.

void quda::unitarizeLinksCuda	(	const QudaGaugeParam &	param,
		cudaGaugeField &	infield,
		cudaGaugeField *	outfield,
		int *	num_failures
	)

Definition at line 487 of file unitarize_links_quda.cu.

template<class Cmplx >

__host__ __device__ bool quda::unitarizeLinkSVD	(	const Matrix< Cmplx, 3 > &	in,
		Matrix< Cmplx, 3 > *const	result
	)

Definition at line 287 of file unitarize_links_quda.cu.

void quda::unpackGhostStaple	(	int *	X,
		void *	_even,
		void *	_odd,
		int	volume,
		QudaPrecision	prec,
		int	stride,
		int	dir,
		int	whichway,
		void **	fwd_nbr_buf,
		void **	back_nbr_buf,
		cudaStream_t *	stream
	)

void quda::updateAlphaZeta	(	double *	alpha,
		double *	zeta,
		double *	zeta_old,
		const double *	r2,
		const double *	beta,
		const double	pAp,
		const double *	offset,
		const int	nShift,
		const int	j_low
	)

Compute the new values of alpha and zeta

Definition at line 38 of file inv_multi_cg_quda.cpp.

void quda::updateSolution	(	cudaColorSpinorField &	x,
		const Complex *	alpha,
		Complex **const	beta,
		double *	gamma,
		int	k,
		cudaColorSpinorField *	p[]
	)

Definition at line 113 of file inv_gcr_quda.cpp.

void quda::wilsonDslashCuda	(	cudaColorSpinorField *	out,
		const cudaGaugeField &	gauge,
		const cudaColorSpinorField *	in,
		const int	oddBit,
		const int	daggerBit,
		const cudaColorSpinorField *	x,
		const double &	k,
		const int *	commDim
	)

Definition at line 1338 of file dslash_quda.cu.

quda::WRITE_STAPLE_MATRIX	(	staple_even	,
		mem_idx
	)

template<class T >

__device__ void quda::writeLinkVariableToArray	(	const Matrix< T, 3 > &	link,
		int	dir,
		int	idx,
		int	stride,
		T *const	array
	)

inline

Definition at line 665 of file quda_matrix.h.

__device__ void quda::writeLinkVariableToArray	(	const Matrix< double2, 3 > &	link,
		int	dir,
		int	idx,
		int	stride,
		float2 *const	array
	)

inline

Definition at line 677 of file quda_matrix.h.

double quda::xmyNormCpu	(	const cpuColorSpinorField &	a,
		cpuColorSpinorField &	b
	)

Definition at line 205 of file blas_cpu.cpp.

double quda::xmyNormCuda	(	cudaColorSpinorField &	a,
		cudaColorSpinorField &	b
	)

Definition at line 204 of file reduce_quda.cu.

Complex quda::xpaycDotzyCpu	(	const cpuColorSpinorField &	x,
		const double &	a,
		cpuColorSpinorField &	y,
		const cpuColorSpinorField &	z
	)

Definition at line 231 of file blas_cpu.cpp.

Complex quda::xpaycDotzyCuda	(	cudaColorSpinorField &	x,
		const double &	a,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	z
	)

Definition at line 354 of file reduce_quda.cu.

void quda::xpayCpu	(	const cpuColorSpinorField &	x,
		const double &	a,
		cpuColorSpinorField &	y
	)

Definition at line 41 of file blas_cpu.cpp.

void quda::xpayCuda	(	cudaColorSpinorField &	x,
		const double &	a,
		cudaColorSpinorField &	y
	)

Definition at line 133 of file blas_quda.cu.

void quda::xpyCpu	(	const cpuColorSpinorField &	x,
		cpuColorSpinorField &	y
	)

Definition at line 22 of file blas_cpu.cpp.

void quda::xpyCuda	(	cudaColorSpinorField &	x,
		cudaColorSpinorField &	y
	)

Definition at line 99 of file blas_quda.cu.

double3 quda::xpyHeavyQuarkResidualNormCpu	(	cpuColorSpinorField &	x,
		cpuColorSpinorField &	y,
		cpuColorSpinorField &	r
	)

double3 quda::xpyHeavyQuarkResidualNormCuda	(	cudaColorSpinorField &	x,
		cudaColorSpinorField &	y,
		cudaColorSpinorField &	r
	)

Definition at line 596 of file reduce_quda.cu.

void quda::zeroCuda ( cudaColorSpinorField & a )

Definition at line 32 of file blas_quda.cu.

Variable Documentation

unsigned long long quda::blas_bytes

Definition at line 30 of file blas_quda.cu.

unsigned long long quda::blas_flops

Definition at line 29 of file blas_quda.cu.

__global__ void const FloatN FloatM FloatM Float quda::coeff0

Definition at line 1055 of file llfat_quda.cu.

__global__ void const FloatN FloatM FloatM Float Float quda::coeff5

Initial value:

{

const FloatN* my_sitelink

Definition at line 1058 of file llfat_quda.cu.

int quda::commDimTotal

Definition at line 1204 of file dslash_quda.cu.

int quda::commsCompleted[Nstream]

Definition at line 1202 of file dslash_quda.cu.

quda::DECLARE_NEW_X

Definition at line 795 of file llfat_quda.cu.

quda::DECLARE_VAR_SIGN

Definition at line 794 of file llfat_quda.cu.

quda::DECLARE_X_ARRAY

Definition at line 796 of file llfat_quda.cu.

__constant__ bool quda::DEV_FL_CHECK_UNITARIZATION

Definition at line 33 of file unitarize_links_quda.cu.

__constant__ double quda::DEV_FL_MAX_ERROR

Definition at line 27 of file unitarize_links_quda.cu.

__constant__ bool quda::DEV_FL_REUNIT_ALLOW_SVD

Definition at line 29 of file unitarize_links_quda.cu.

__constant__ double quda::DEV_FL_REUNIT_SVD_ABS_ERROR

Definition at line 32 of file unitarize_links_quda.cu.

__constant__ bool quda::DEV_FL_REUNIT_SVD_ONLY

Definition at line 30 of file unitarize_links_quda.cu.

__constant__ double quda::DEV_FL_REUNIT_SVD_REL_ERROR

Definition at line 31 of file unitarize_links_quda.cu.

__constant__ double quda::DEV_FL_UNITARIZE_EPS

Definition at line 28 of file unitarize_links_quda.cu.

__constant__ double quda::DEV_HISQ_FORCE_FILTER

Definition at line 18 of file unitarize_force_quda.cu.

__constant__ double quda::DEV_HISQ_UNITARIZE_EPS

Definition at line 17 of file unitarize_force_quda.cu.

__constant__ double quda::DEV_MAX_DET_ERROR

Definition at line 19 of file unitarize_force_quda.cu.

__constant__ int quda::DEV_MAX_ITER = 20

Definition at line 23 of file unitarize_links_quda.cu.

__constant__ bool quda::DEV_REUNIT_ALLOW_SVD

Definition at line 20 of file unitarize_force_quda.cu.

__constant__ double quda::DEV_REUNIT_SVD_ABS_ERROR

Definition at line 23 of file unitarize_force_quda.cu.

__constant__ bool quda::DEV_REUNIT_SVD_ONLY

Definition at line 21 of file unitarize_force_quda.cu.

__constant__ double quda::DEV_REUNIT_SVD_REL_ERROR

Definition at line 22 of file unitarize_force_quda.cu.

__constant__ int quda::dir1_array[16]

Definition at line 191 of file llfat_quda.cu.

__constant__ int quda::dir2_array[16]

Definition at line 192 of file llfat_quda.cu.

int quda::dslashCompleted[Nstream]

Definition at line 1203 of file dslash_quda.cu.

DslashParam quda::dslashParam

Definition at line 88 of file dslash_quda.cu.

quda::else

Initial value:

{

MULT_SU3_NA(tempa, c, tempb)

Definition at line 978 of file llfat_quda.cu.

__global__ void const FloatN FloatM * quda::fatlink_even

Definition at line 759 of file llfat_quda.cu.

__global__ void const FloatN FloatM FloatM * quda::fatlink_odd

Definition at line 759 of file llfat_quda.cu.

int quda::gatherCompleted[Nstream]

Definition at line 1200 of file dslash_quda.cu.

int quda::idx = sid

Definition at line 1374 of file llfat_quda.cu.

__constant__ int quda::INPUT_PADDING =0

Definition at line 21 of file unitarize_links_quda.cu.

__global__ void const FloatN FloatM FloatM Float Float llfat_kernel_param_t quda::kparam

Initial value:

{

__shared__ FloatM sd_data[NUM_FLOATS*64]

Definition at line 763 of file llfat_quda.cu.

int quda::Ls

Definition at line 135 of file dslash_quda.cu.

int quda::mem_idx = blockIdx.x*blockDim.x + threadIdx.x

Definition at line 771 of file llfat_quda.cu.

__global__ void FloatM const FloatN const FloatN FloatM FloatM const FloatM * quda::mulink_even

Definition at line 902 of file llfat_quda.cu.

__global__ void FloatM const FloatN const FloatN FloatM FloatM const FloatM const FloatM * quda::mulink_odd

Definition at line 902 of file llfat_quda.cu.

FloatM * quda::my_fatlink = fatlink_even

Definition at line 1060 of file llfat_quda.cu.

quda::my_sitelink = sitelink_even

Definition at line 1068 of file llfat_quda.cu.

__global__ void FloatM const FloatN const FloatN FloatM FloatM const FloatM const FloatM Float quda::mycoeff

Definition at line 759 of file llfat_quda.cu.

int quda::new_mem_idx

Definition at line 793 of file llfat_quda.cu.

const int quda::Nstream = 1

Definition at line 156 of file quda_internal.h.

short quda::odd_bit = 0

Definition at line 1065 of file llfat_quda.cu.

__constant__ int quda::OUTPUT_PADDING =0

Definition at line 22 of file unitarize_links_quda.cu.

int quda::previousDir[Nstream]

Definition at line 1201 of file dslash_quda.cu.

quda::return

Definition at line 897 of file llfat_quda.cu.

int quda::sid = blockIdx.x*blockDim.x + threadIdx.x

Definition at line 1061 of file llfat_quda.cu.

__global__ void FloatM const FloatN * quda::sitelink_even

Definition at line 759 of file llfat_quda.cu.

__global__ void const FloatN * quda::sitelink_odd

Definition at line 759 of file llfat_quda.cu.

FloatM quda::STAPLE0

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE1

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE2

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE3

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE4

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE5

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE6

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE7

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE8

Definition at line 768 of file llfat_quda.cu.

unsigned long quda::staple_bytes =0

Definition at line 194 of file llfat_quda.cu.

__global__ void FloatM * quda::staple_odd

Definition at line 759 of file llfat_quda.cu.

int quda::stride

Definition at line 41 of file blas_quda.cu.

FloatM quda::TEMPA5

Definition at line 767 of file llfat_quda.cu.

FloatM quda::TEMPA6

Definition at line 767 of file llfat_quda.cu.

FloatM quda::TEMPA7

Definition at line 767 of file llfat_quda.cu.

FloatM quda::TEMPA8

Definition at line 767 of file llfat_quda.cu.

FloatM quda::TEMPB0

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB1

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB2

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB3

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB4

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB5

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB6

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB7

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB8

Definition at line 911 of file llfat_quda.cu.

int quda::Vspatial

Definition at line 91 of file dslash_quda.cu.

int quda::x[4]

Definition at line 40 of file blas_quda.cu.

int quda::X = 2*mem_idx + x1odd

Definition at line 782 of file llfat_quda.cu.

short quda::x1 = 2*x1h + x1odd

Definition at line 781 of file llfat_quda.cu.

short quda::x1h = mem_idx - z1*X1h

Definition at line 774 of file llfat_quda.cu.

short quda::x1odd = (x2 + x3 + x4 + odd_bit) & 1

Definition at line 780 of file llfat_quda.cu.

short quda::x2 = z1 - z2*X2

Definition at line 776 of file llfat_quda.cu.

short quda::x3 = z2 - x4*X3

Definition at line 778 of file llfat_quda.cu.

int quda::x4 = z2 / X3

Definition at line 777 of file llfat_quda.cu.

int quda::z1 = mem_idx / X1h

Definition at line 773 of file llfat_quda.cu.

int quda::z2 = z1 / X2

Definition at line 775 of file llfat_quda.cu.

Namespaces

Classes

Typedefs

Enumerations

Functions

Variables

Detailed Description

Typedef Documentation

Enumeration Type Documentation

Function Documentation

Variable Documentation