QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Namespaces | Classes | Typedefs | Enumerations | Functions | Variables
quda Namespace Reference

Namespaces

 asym_clover
 
 clover
 
 copy
 
 domainwall
 
 domainwall4d
 
 dslash
 
 dslash_aux
 
 
 fermion_force
 
 fermionforce
 
 gaugeforce
 
 improvedstaggered
 
 mobius
 
 ndegtwisted
 
 pack
 
 reduce
 
 staggered
 
 twisted
 
 twistedclover
 
 wilson
 

Classes

struct  CloverFieldParam
 
class  CloverField
 
class  cudaCloverField
 
class  cpuCloverField
 
struct  FullClover
 
struct  FloatNOrder
 
struct  QDPOrder
 
struct  QDPJITOrder
 
struct  BQCDOrder
 
class  ColorSpinorParam
 
class  ColorSpinorField
 
class  cudaColorSpinorField
 
class  ColorSpinorFieldOrder
 
class  SpaceColorSpinOrder
 
class  SpaceSpinColorOrder
 
class  QOPDomainWallOrder
 
class  cpuColorSpinorField
 
struct  SpaceColorSpinorOrder
 
struct  SpaceSpinorColorOrder
 
struct  QDPJITDiracOrder
 
struct  complex
 
struct  norm_type
 
struct  norm_type< complex< T > >
 
struct  complex< float >
 
struct  complex< double >
 
class  DiracParam
 
class  Dirac
 
class  DiracWilson
 
class  DiracWilsonPC
 
class  DiracClover
 
class  DiracCloverPC
 
class  DiracDomainWall
 
class  DiracDomainWallPC
 
class  DiracDomainWall4DPC
 
class  DiracMobiusDomainWallPC
 
class  DiracTwistedMass
 
class  DiracTwistedMassPC
 
class  DiracTwistedClover
 
class  DiracTwistedCloverPC
 
class  DiracStaggered
 
class  DiracStaggeredPC
 
class  DiracImprovedStaggered
 
class  DiracImprovedStaggeredPC
 
class  DiracMatrix
 
class  DiracM
 
class  DiracMdagM
 
class  DiracMMdag
 
class  DiracMdag
 
class  EigParam
 
class  FaceBuffer
 
struct  GaugeFieldParam
 
class  GaugeField
 
class  cudaGaugeField
 
class  cpuGaugeField
 
struct  Reconstruct
 
struct  Reconstruct< 19, Float >
 
struct  Reconstruct< 12, Float >
 
struct  Reconstruct< 11, Float >
 
struct  Reconstruct< 13, Float >
 
struct  Reconstruct< 8, Float >
 
struct  Reconstruct< 9, Float >
 
struct  LegacyOrder
 
struct  MILCOrder
 
struct  CPSOrder
 
struct  TIFROrder
 
struct  kernel_param_s
 
struct  SolverParam
 
class  Solver
 
class  CG
 
class  MPCG
 
class  PreconCG
 
class  BiCGstab
 
class  SimpleBiCGstab
 
class  MPBiCGstab
 
class  GCR
 
class  MR
 
class  SD
 
class  XSD
 
class  alphaSA
 
class  MultiShiftSolver
 
class  MultiShiftCG
 
class  MinResExt
 
class  DeflatedSolver
 
class  IncEigCG
 
class  Eig_Solver
 
class  Lanczos
 
class  ImpRstLanczos
 
struct  LatticeFieldParam
 
class  LatticeField
 
struct  llfat_kernel_param_s
 
struct  Timer
 
struct  TimeProfile
 
struct  mapper
 
struct  mapper< double >
 
struct  mapper< float >
 
struct  mapper< short >
 
struct  mapper< double2 >
 
struct  mapper< float2 >
 
struct  mapper< short2 >
 
struct  mapper< double4 >
 
struct  mapper< float4 >
 
struct  mapper< short4 >
 
struct  isHalf
 
struct  isHalf< short >
 
struct  Trig
 
struct  Trig< true >
 
class  RitzMat
 
struct  TuneKey
 
class  TuneParam
 
class  Tunable
 
struct  axpby
 
struct  xpy
 
struct  axpy
 
struct  xpay
 
struct  mxpy
 
struct  ax
 
struct  caxpy
 
struct  caxpby
 
struct  cxpaypbz
 
struct  axpyBzpcx
 
struct  axpyZpbx
 
struct  caxpbypzYmbw
 
struct  cabxpyAx
 
struct  caxpbypz
 
struct  caxpbypczpw
 
struct  caxpyxmaz
 
struct  tripleCGUpdate
 
class  Gamma5Cuda
 
class  ContractCuda
 
class  PreserveBasis
 
struct  NonRelBasis
 
struct  RelBasis
 
struct  ChiralToNonRelBasis
 
struct  NonRelToChiralBasis
 
class  PackSpinor
 
struct  CopyGaugeExArg
 
class  CopyGaugeEx
 
struct  CopyGaugeArg
 
class  CopyGauge
 
struct  RealType
 
struct  RealType< double2 >
 
struct  RealType< float2 >
 
struct  RealType< float4 >
 
struct  RealType< short2 >
 
struct  RealType< short4 >
 
class  CloverCuda
 
class  TwistGamma5Cuda
 
class  TwistCloverGamma5Cuda
 
struct  CopySpinorExArg
 
class  CopySpinorEx
 
struct  ExtractGhostArg
 
class  ExtractGhost
 
struct  ExtractGhostExArg
 
class  ExtractGhostEx
 
class  GaugeForceCuda
 
struct  DeflationParam
 
class  EigCGArgs
 
struct  KSForceArg
 
class  KSForceComplete
 
struct  KSLongLinkArg
 
class  KSLongLinkForce
 
class  MemAlloc
 
struct  GhostStapleParam
 
struct  ComplexTypeId
 
struct  ComplexTypeId< float >
 
struct  ComplexTypeId< double >
 
struct  RealTypeId
 
struct  RealTypeId< float >
 
struct  RealTypeId< double >
 
struct  RealTypeId< float2 >
 
struct  RealTypeId< double2 >
 
struct  PromoteTypeId
 
struct  PromoteTypeId< float2, float >
 
struct  PromoteTypeId< float, float2 >
 
struct  PromoteTypeId< double2, double >
 
struct  PromoteTypeId< double, double2 >
 
struct  PromoteTypeId< double, int >
 
struct  PromoteTypeId< int, double >
 
struct  PromoteTypeId< float, int >
 
struct  PromoteTypeId< int, float >
 
struct  Zero
 
struct  Identity
 
class  Matrix
 
class  Array
 
struct  ReduceFunctor
 
struct  Norm2
 
struct  Dot
 
struct  DotNormA
 
struct  axpyNorm2
 
struct  xmyNorm2
 
struct  caxpyNorm2
 
struct  caxpyxmaznormx
 
struct  cabxpyaxnorm
 
struct  Cdot
 
struct  xpaycdotzy
 
struct  caxpydotzy
 
struct  CdotNormA
 
struct  CdotNormB
 
struct  caxpbypzYmbwcDotProductUYNormY
 
struct  axpyCGNorm2
 
struct  tripleCGReduction
 
struct  ShiftColorSpinorFieldArg
 
class  ShiftColorSpinorField
 

Typedefs

typedef std::complex< double > Complex
 
typedef struct quda::kernel_param_s kernel_param_t
 
typedef struct
quda::llfat_kernel_param_s 
llfat_kernel_param_t
 
typedef std::map< TuneKey,
TuneParam
map
 

Enumerations

enum  QudaProfileType {
  QUDA_PROFILE_H2D, QUDA_PROFILE_D2H, QUDA_PROFILE_INIT, QUDA_PROFILE_PREAMBLE,
  QUDA_PROFILE_COMPUTE, QUDA_PROFILE_EPILOGUE, QUDA_PROFILE_FREE, QUDA_PROFILE_PACK_KERNEL,
  QUDA_PROFILE_DSLASH_KERNEL, QUDA_PROFILE_GATHER, QUDA_PROFILE_SCATTER, QUDA_PROFILE_EVENT_RECORD,
  QUDA_PROFILE_EVENT_QUERY, QUDA_PROFILE_STREAM_WAIT_EVENT, QUDA_PROFILE_COMMS, QUDA_PROFILE_COMMS_START,
  QUDA_PROFILE_COMMS_QUERY, QUDA_PROFILE_CONSTANT, QUDA_PROFILE_TOTAL, QUDA_PROFILE_COUNT
}
 
enum  AllocType {
  DEVICE, HOST, PINNED, MAPPED,
  N_ALLOC_TYPE
}
 

Functions

void initBlas ()
 
void endBlas (void)
 
void setBlasParam (int kernel, int prec, int threads, int blocks)
 
double norm2 (const ColorSpinorField &)
 
void zeroCuda (cudaColorSpinorField &a)
 
void copyCuda (cudaColorSpinorField &dst, const cudaColorSpinorField &src)
 
double axpyNormCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
 
double normCuda (const cudaColorSpinorField &b)
 
double reDotProductCuda (cudaColorSpinorField &a, cudaColorSpinorField &b)
 
void reDotProductCuda (double *result, std::vector< cudaColorSpinorField * > &a, std::vector< cudaColorSpinorField * > &b)
 
double xmyNormCuda (cudaColorSpinorField &a, cudaColorSpinorField &b)
 
double2 reDotProductNormACuda (cudaColorSpinorField &a, cudaColorSpinorField &b)
 
void axpbyCuda (const double &a, cudaColorSpinorField &x, const double &b, cudaColorSpinorField &y)
 
void axpyCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
 
void axCuda (const double &a, cudaColorSpinorField &x)
 
void xpyCuda (cudaColorSpinorField &x, cudaColorSpinorField &y)
 
void xpayCuda (cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y)
 
void mxpyCuda (cudaColorSpinorField &x, cudaColorSpinorField &y)
 
void axpyZpbxCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, const double &b)
 
void axpyBzpcxCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, const double &b, cudaColorSpinorField &z, const double &c)
 
void caxpbyCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y)
 
void caxpyCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
 
void cxpaypbzCuda (cudaColorSpinorField &, const Complex &b, cudaColorSpinorField &y, const Complex &c, cudaColorSpinorField &z)
 
void caxpbypzYmbwCuda (const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &, cudaColorSpinorField &)
 
Complex cDotProductCuda (cudaColorSpinorField &, cudaColorSpinorField &)
 
void cDotProductCuda (Complex *result, std::vector< cudaColorSpinorField * > &a, std::vector< cudaColorSpinorField * > &b)
 
Complex xpaycDotzyCuda (cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y, cudaColorSpinorField &z)
 
double3 cDotProductNormACuda (cudaColorSpinorField &a, cudaColorSpinorField &b)
 
double3 cDotProductNormBCuda (cudaColorSpinorField &a, cudaColorSpinorField &b)
 
double3 caxpbypzYmbwcDotProductUYNormYCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w, cudaColorSpinorField &u)
 
void cabxpyAxCuda (const double &a, const Complex &b, cudaColorSpinorField &x, cudaColorSpinorField &y)
 
double caxpyNormCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
 
void caxpyXmazCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)
 
double caxpyXmazNormXCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)
 
double cabxpyAxNormCuda (const double &a, const Complex &b, cudaColorSpinorField &x, cudaColorSpinorField &y)
 
void caxpbypzCuda (const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &)
 
void caxpbypczpwCuda (const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &)
 
Complex caxpyDotzyCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)
 
Complex axpyCGNormCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
 
double3 HeavyQuarkResidualNormCuda (cudaColorSpinorField &x, cudaColorSpinorField &r)
 
double3 xpyHeavyQuarkResidualNormCuda (cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &r)
 
void tripleCGUpdateCuda (const double &alpha, const double &beta, cudaColorSpinorField &q, cudaColorSpinorField &r, cudaColorSpinorField &x, cudaColorSpinorField &p)
 
double3 tripleCGReductionCuda (cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)
 
double axpyNormCpu (const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
double normCpu (const cpuColorSpinorField &b)
 
double reDotProductCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)
 
double xmyNormCpu (const cpuColorSpinorField &a, cpuColorSpinorField &b)
 
void axpbyCpu (const double &a, const cpuColorSpinorField &x, const double &b, cpuColorSpinorField &y)
 
void axpyCpu (const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void axCpu (const double &a, cpuColorSpinorField &x)
 
void xpyCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void xpayCpu (const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y)
 
void mxpyCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void axpyZpbxCpu (const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const cpuColorSpinorField &z, const double &b)
 
void axpyBzpcxCpu (const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const double &b, const cpuColorSpinorField &z, const double &c)
 
void caxpbyCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y)
 
void caxpyCpu (const Complex &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void cxpaypbzCpu (const cpuColorSpinorField &x, const Complex &b, const cpuColorSpinorField &y, const Complex &c, cpuColorSpinorField &z)
 
void caxpbypzYmbwCpu (const Complex &, const cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &, const cpuColorSpinorField &)
 
Complex cDotProductCpu (const cpuColorSpinorField &, const cpuColorSpinorField &)
 
Complex xpaycDotzyCpu (const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y, const cpuColorSpinorField &z)
 
double3 cDotProductNormACpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)
 
double3 cDotProductNormBCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)
 
double3 caxpbypzYmbwcDotProductUYNormYCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z, const cpuColorSpinorField &w, const cpuColorSpinorField &u)
 
void cabxpyAxCpu (const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)
 
double caxpyNormCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void caxpyXmazCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
 
double caxpyXmazNormXCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
 
double cabxpyAxNormCpu (const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void caxpbypzCpu (const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &)
 
void caxpbypczpwCpu (const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &)
 
Complex caxpyDotzyCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
 
double3 HeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &r)
 
double3 xpyHeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &r)
 
std::ostream & operator<< (std::ostream &output, const CloverFieldParam &param)
 
void computeClover (CloverField &clover, const GaugeField &gauge, double coeff, QudaFieldLocation location)
 
void computeCloverSigmaTrace (GaugeField &gauge, const CloverField &clover, int dir1, int dir2, QudaFieldLocation location)
 
void copyGenericClover (CloverField &out, const CloverField &in, bool inverse, QudaFieldLocation location, void *Out=0, void *In=0, void *outNorm=0, void *inNorm=0)
 
void cloverDerivative (cudaGaugeField &out, cudaGaugeField &gauge, cudaGaugeField &oprod, int mu, int nu, double coeff, QudaParity parity, int conjugate)
 
void cloverInvert (CloverField &clover, bool computeTraceLog, QudaFieldLocation location)
 
void copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, void *Dst=0, void *Src=0, void *dstNorm=0, void *srcNorm=0)
 
void genericSource (cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c)
 
int genericCompare (const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol)
 
void genericPrintVector (cpuColorSpinorField &a, unsigned int x)
 
void exchangeExtendedGhost (cudaColorSpinorField *spinor, int R[], int parity, cudaStream_t *stream_p)
 
void copyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, const int parity, void *Dst, void *Src, void *dstNorm, void *srcNorm)
 
template<typename Float , int Ns, int Nc>
__device__ void load_shared (typename mapper< Float >::type v[Ns *Nc *2], Float *field, int x, int volume)
 
template<typename Float , int Ns, int Nc>
__device__ void save_shared (Float *field, const typename mapper< Float >::type v[Ns *Nc *2], int x, int volumeCB)
 
template<typename ValueType >
__host__ __device__ ValueType cos (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType sin (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType tan (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType acos (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType asin (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType atan (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType atan2 (ValueType x, ValueType y)
 
template<typename ValueType >
__host__ __device__ ValueType cosh (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType sinh (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType tanh (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType exp (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType log (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType log10 (ValueType x)
 
template<typename ValueType , typename ExponentType >
__host__ __device__ ValueType pow (ValueType x, ExponentType e)
 
template<typename ValueType >
__host__ __device__ ValueType sqrt (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType abs (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType conj (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType abs (const complex< ValueType > &z)
 Returns the magnitude of z. More...
 
template<typename ValueType >
__host__ __device__ ValueType arg (const complex< ValueType > &z)
 Returns the phase angle of z. More...
 
template<typename ValueType >
__host__ __device__ ValueType norm (const complex< ValueType > &z)
 Returns the magnitude of z squared. More...
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
conj (const complex< ValueType > &z)
 Returns the complex conjugate of z. More...
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
polar (const ValueType &m, const ValueType &theta=0)
 Returns the complex with magnitude m and angle theta in radians. More...
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
operator* (const complex< ValueType > &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
operator* (const complex< ValueType > &lhs, const ValueType &rhs)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
operator* (const ValueType &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
operator/ (const complex< ValueType > &lhs, const complex< ValueType > &rhs)
 
template<>
__host__ __device__ complex
< float > 
operator/ (const complex< float > &lhs, const complex< float > &rhs)
 
template<>
__host__ __device__ complex
< double > 
operator/ (const complex< double > &lhs, const complex< double > &rhs)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
operator+ (const complex< ValueType > &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
operator+ (const complex< ValueType > &lhs, const ValueType &rhs)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
operator+ (const ValueType &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
operator- (const complex< ValueType > &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
operator- (const complex< ValueType > &lhs, const ValueType &rhs)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
operator- (const ValueType &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
operator+ (const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
operator- (const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
cos (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
cosh (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
exp (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
log (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
log10 (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
pow (const complex< ValueType > &z, const int &n)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
pow (const complex< ValueType > &z, const ValueType &x)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
pow (const complex< ValueType > &z, const complex< ValueType > &z2)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
pow (const ValueType &x, const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
sin (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
sinh (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
sqrt (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
tan (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
tanh (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
acos (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
asin (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
atan (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
acosh (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
asinh (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
atanh (const complex< ValueType > &z)
 
template<typename ValueType , class charT , class traits >
std::basic_ostream< charT,
traits > & 
operator<< (std::basic_ostream< charT, traits > &os, const complex< ValueType > &z)
 
template<typename ValueType , typename charT , class traits >
std::basic_istream< charT,
traits > & 
operator>> (std::basic_istream< charT, traits > &is, complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
operator+ (const volatile complex< ValueType > &lhs, const volatile complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
operator/ (const complex< ValueType > &lhs, const ValueType &rhs)
 
template<typename ValueType >
__host__ __device__ complex
< ValueType > 
operator/ (const ValueType &lhs, const complex< ValueType > &rhs)
 
template<>
__host__ __device__ complex
< float > 
operator/ (const float &lhs, const complex< float > &rhs)
 
template<>
__host__ __device__ complex
< double > 
operator/ (const double &lhs, const complex< double > &rhs)
 
template<typename ValueType >
__host__ __device__ bool operator== (const complex< ValueType > &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ bool operator== (const ValueType &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ bool operator== (const complex< ValueType > &lhs, const ValueType &rhs)
 
template<typename ValueType >
__host__ __device__ bool operator!= (const complex< ValueType > &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ bool operator!= (const ValueType &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ bool operator!= (const complex< ValueType > &lhs, const ValueType &rhs)
 
template<>
__host__ __device__ float abs (const complex< float > &z)
 
template<>
__host__ __device__ double abs (const complex< double > &z)
 
template<>
__host__ __device__ float arg (const complex< float > &z)
 
template<>
__host__ __device__ double arg (const complex< double > &z)
 
template<>
__host__ __device__ complex
< float > 
polar (const float &magnitude, const float &angle)
 
template<>
__host__ __device__ complex
< double > 
polar (const double &magnitude, const double &angle)
 
template<>
__host__ __device__ complex
< float > 
cos (const complex< float > &z)
 
template<>
__host__ __device__ complex
< float > 
cosh (const complex< float > &z)
 
template<>
__host__ __device__ complex
< float > 
exp (const complex< float > &z)
 
template<>
__host__ __device__ complex
< float > 
log (const complex< float > &z)
 
template<>
__host__ __device__ complex
< float > 
pow (const float &x, const complex< float > &exponent)
 
template<>
__host__ __device__ complex
< float > 
sin (const complex< float > &z)
 
template<>
__host__ __device__ complex
< float > 
sinh (const complex< float > &z)
 
template<typename ValueType >
__host__ __device__ complex
< float > 
sqrt (const complex< float > &z)
 
template<typename ValueType >
__host__ __device__ complex
< float > 
atanh (const complex< float > &z)
 
void contractCuda (const cudaColorSpinorField &x, const cudaColorSpinorField &y, void *result, const QudaContractType contract_type, const QudaParity parity)
 
void contractCuda (const cudaColorSpinorField &x, const cudaColorSpinorField &y, void *result, const QudaContractType contract_type, const int tSlice, const QudaParity parity)
 
void gamma5Cuda (cudaColorSpinorField *out, const cudaColorSpinorField *in)
 
void covDev (cudaColorSpinorField *out, cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int mu, TimeProfile &profile)
 
void setDiracParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
 
void setDiracSloppyParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
 
void setKernelPackT (bool pack)
 
bool getKernelPackT ()
 
void setTwistPack (bool pack)
 
bool getTwistPack ()
 
void setPackComms (const int *commDim)
 
bool getDslashLaunch ()
 
void createDslashEvents ()
 
void destroyDslashEvents ()
 
void wilsonDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int oddBit, const int daggerBit, const cudaColorSpinorField *x, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)
 
void cloverDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const FullClover cloverInv, const cudaColorSpinorField *in, const int oddBit, const int daggerBit, const cudaColorSpinorField *x, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)
 
void asymCloverDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const FullClover cloverInv, const cudaColorSpinorField *in, const int oddBit, const int daggerBit, const cudaColorSpinorField *x, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)
 
void cloverCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const FullClover clover, const cudaColorSpinorField *in, const int oddBit)
 
void domainWallDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const double &m_f, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH)
 
void domainWallDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const double &m_f, const double &k, const int *commDim, const int DS_type, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)
 
void MDWFDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const double &m_f, const double &k, const int *commDim, const int DS_type, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)
 
void staggeredDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)
 
void improvedStaggeredDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &fatGauge, const cudaGaugeField &longGauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)
 
void twistedMassDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const QudaTwistDslashType type, const double &kappa, const double &mu, const double &epsilon, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)
 
void ndegTwistedMassDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const QudaTwistDslashType type, const double &kappa, const double &mu, const double &epsilon, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH)
 
void twistedCloverDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const FullClover *clover, const FullClover *cloverInv, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const QudaTwistCloverDslashType type, const double &kappa, const double &mu, const double &epsilon, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)
 
void twistGamma5Cuda (cudaColorSpinorField *out, const cudaColorSpinorField *in, const int dagger, const double &kappa, const double &mu, const double &epsilon, const QudaTwistGamma5Type twist)
 ndeg tm: More...
 
void twistCloverGamma5Cuda (cudaColorSpinorField *out, const cudaColorSpinorField *in, const int dagger, const double &kappa, const double &mu, const double &epsilon, const QudaTwistGamma5Type twist, const FullClover *clov, const FullClover *clovInv, const int parity)
 
void packFace (void *ghost_buf, cudaColorSpinorField &in, const int nFace, const int dagger, const int parity, const int dim, const int face_num, const cudaStream_t &stream, const double a=0.0, const double b=0.0)
 
void packFaceExtended (void *ghost_buf, cudaColorSpinorField &field, const int nFace, const int R[], const int dagger, const int parity, const int dim, const int face_num, const cudaStream_t &stream, const bool unpack=false)
 
void packFace (void *ghost_buf, cudaColorSpinorField &in, FullClover &clov, FullClover &clovInv, const int nFace, const int dagger, const int parity, const int dim, const int face_num, const cudaStream_t &stream, const double a=0.0)
 
void loadLinkToGPU (cudaGaugeField *cudaGauge, cpuGaugeField *cpuGauge, QudaGaugeParam *param)
 
void loadLinkToGPU_ex (cudaGaugeField *cudaGauge, cpuGaugeField *cpuGauge)
 
void loadLinkToGPU_gf (cudaGaugeField *cudaGauge, cpuGaugeField *cpuGauge, QudaGaugeParam *param)
 
void storeLinkToCPU (cpuGaugeField *cpuGauge, cudaGaugeField *cudaGauge, QudaGaugeParam *param)
 
void packGhostStaple (int *X, void *even, void *odd, int volume, QudaPrecision prec, int stride, int dir, int whichway, void **fwd_nbr_buf_gpu, void **back_nbr_buf_gpu, void **fwd_nbr_buf, void **back_nbr_buf, cudaStream_t *stream)
 
void unpackGhostStaple (int *X, void *_even, void *_odd, int volume, QudaPrecision prec, int stride, int dir, int whichway, void **fwd_nbr_buf, void **back_nbr_buf, cudaStream_t *stream)
 
void pack_ghost_all_staples_cpu (void *staple, void **cpuGhostStapleBack, void **cpuGhostStapleFwd, int nFace, QudaPrecision precision, int *X)
 
void pack_ghost_all_links (void **cpuLink, void **cpuGhostBack, void **cpuGhostFwd, int dir, int nFace, QudaPrecision precision, int *X)
 
void pack_gauge_diag (void *buf, int *X, void **sitelink, int nu, int mu, int dir1, int dir2, QudaPrecision prec)
 
void fermion_force_init_cuda (QudaGaugeParam *param)
 
void fermion_force_cuda (double eps, double weight1, double weight2, void *act_path_coeff, FullHw cudaHw, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaMom, QudaGaugeParam *param)
 
__host__ __device__ double2 operator+ (const double2 &x, const double2 &y)
 
__host__ __device__ double2 operator- (const double2 &x, const double2 &y)
 
__host__ __device__ float2 operator- (const float2 &x, const float2 &y)
 
__host__ __device__ float4 operator- (const float4 &x, const float4 &y)
 
__host__ __device__ double3 operator+ (const double3 &x, const double3 &y)
 
__host__ __device__ float4 operator* (const float a, const float4 x)
 
__host__ __device__ float2 operator* (const float a, const float2 x)
 
__host__ __device__ double2 operator* (const double a, const double2 x)
 
__host__ __device__ double4 operator* (const double a, const double4 x)
 
__host__ __device__ float2 operator+ (const float2 x, const float2 y)
 
__host__ __device__ float4 operator+ (const float4 x, const float4 y)
 
__host__ __device__ float4 operator+= (float4 &x, const float4 y)
 
__host__ __device__ float2 operator+= (float2 &x, const float2 y)
 
__host__ __device__ double2 operator+= (double2 &x, const double2 y)
 
__host__ __device__ double3 operator+= (double3 &x, const double3 y)
 
__host__ __device__ float4 operator-= (float4 &x, const float4 y)
 
__host__ __device__ float2 operator-= (float2 &x, const float2 y)
 
__host__ __device__ double2 operator-= (double2 &x, const double2 y)
 
__host__ __device__ float2 operator*= (float2 &x, const float a)
 
__host__ __device__ double2 operator*= (double2 &x, const float a)
 
__host__ __device__ float4 operator*= (float4 &a, const float &b)
 
__host__ __device__ double2 operator*= (double2 &a, const double &b)
 
__host__ __device__ double4 operator*= (double4 &a, const double &b)
 
__host__ __device__ float2 operator- (const float2 &x)
 
__host__ __device__ double2 operator- (const double2 &x)
 
__forceinline__ __host__
__device__ float 
max_fabs (const float4 &c)
 
__forceinline__ __host__
__device__ float 
max_fabs (const float2 &b)
 
__forceinline__ __host__
__device__ double 
max_fabs (const double4 &c)
 
__forceinline__ __host__
__device__ double 
max_fabs (const double2 &b)
 
__forceinline__ __host__
__device__ float2 
make_FloatN (const double2 &a)
 
__forceinline__ __host__
__device__ float4 
make_FloatN (const double4 &a)
 
__forceinline__ __host__
__device__ double2 
make_FloatN (const float2 &a)
 
__forceinline__ __host__
__device__ double4 
make_FloatN (const float4 &a)
 
__forceinline__ __host__
__device__ short4 
make_shortN (const float4 &a)
 
__forceinline__ __host__
__device__ short2 
make_shortN (const float2 &a)
 
__forceinline__ __host__
__device__ short4 
make_shortN (const double4 &a)
 
__forceinline__ __host__
__device__ short2 
make_shortN (const double2 &a)
 
std::ostream & operator<< (std::ostream &output, const GaugeFieldParam &param)
 
double norm2 (const cudaGaugeField &u)
 
void copyGenericGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0, void **ghostOut=0, void **ghostIn=0, int type=0)
 
void copyExtendedGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0)
 
void extractGaugeGhost (const GaugeField &u, void **ghost)
 
void extractExtendedGaugeGhost (const GaugeField &u, int dim, const int *R, void **ghost, bool extract)
 
double maxGauge (const GaugeField &u)
 
void applyGaugePhase (GaugeField &u)
 
template<typename Float >
__device__ __host__ void accumulateComplexProduct (Float *a, const Float *b, const Float *c, Float sign)
 
template<typename Float >
__device__ __host__ void complexProduct (Float *a, const Float *b, const Float *c)
 
template<typename Float >
__device__ __host__ void complexDotProduct (Float *a, const Float *b, const Float *c)
 
template<typename Float >
__device__ __host__ void complexQuotient (Float *a, const Float *b, const Float *c)
 
template<typename Float >
__device__ __host__ void accumulateConjugateProduct (Float *a, const Float *b, const Float *c, int sign)
 
template<typename Float >
__device__ __host__ void complexConjugateProduct (Float *a, const Float *b, const Float *c)
 
template<typename Float >
__device__ __host__ Float timeBoundary (int idx, const int X[QUDA_MAX_DIM], QudaTboundary tBoundary, bool isFirstTimeSlice, bool isLastTimeSlice)
 
template<typename Float >
__device__ __host__ Float timeBoundary (int idx, const int X[QUDA_MAX_DIM], const int R[QUDA_MAX_DIM], QudaTboundary tBoundary, bool isFirstTimeSlice, bool isLastTimeSlice, QudaGhostExchange ghostExchange)
 
void gauge_force_init_cuda (QudaGaugeParam *param, int max_length)
 
void gauge_force_cuda (cudaGaugeField &cudaMom, double eb3, cudaGaugeField &cudaSiteLink, QudaGaugeParam *param, int ***input_path, int *length, double *path_coeff, int num_paths, int max_length)
 
double plaquette (const GaugeField &data, QudaFieldLocation location)
 
void APEStep (GaugeField &dataDs, const GaugeField &dataOr, double alpha, QudaFieldLocation location)
 
void updateGaugeField (GaugeField &out, double dt, const GaugeField &in, const GaugeField &mom, bool conj_mom, bool exact)
 
void setUnitarizeLinksPadding (int input_padding, int output_padding)
 
void setUnitarizeLinksConstants (double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error, bool check_unitarization=true)
 
void unitarizeLinksCuda (const QudaGaugeParam &param, cudaGaugeField &infield, cudaGaugeField *outfield, int *num_failures)
 
void unitarizeLinksCPU (const QudaGaugeParam &param, cpuGaugeField &infield, cpuGaugeField *outfield)
 
bool isUnitary (const QudaGaugeParam &param, cpuGaugeField &field, double max_error)
 
void completeKSForce (GaugeField &mom, const GaugeField &oprod, const GaugeField &gauge, QudaFieldLocation location, long long *flops=NULL)
 
std::ostream & operator<< (std::ostream &output, const LatticeFieldParam &param)
 
void llfat_cuda (cudaGaugeField *cudaFatLink, cudaGaugeField *cudaLongLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff)
 
void llfat_cuda_ex (cudaGaugeField *cudaFatLink, cudaGaugeField *cudaLongLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff)
 
void llfat_init_cuda (QudaGaugeParam *param)
 
void llfat_init_cuda_ex (QudaGaugeParam *param_ex)
 
void computeLongLinkCuda (void *outEven, void *outOdd, const void *const inEven, const void *const inOdd, double coeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam)
 
void computeGenStapleFieldParityKernel (void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, const void *mulink_even, const void *mulink_odd, int mu, int nu, int save_staple, double mycoeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam, cudaStream_t *stream)
 
void computeGenStapleFieldParityKernel_ex (void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, const void *mulink_even, const void *mulink_odd, int mu, int nu, int save_staple, double mycoeff, QudaReconstructType recon, QudaPrecision prec, llfat_kernel_param_t kparam)
 
void siteComputeGenStapleParityKernel (void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, int mu, int nu, double mycoeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam, cudaStream_t *stream)
 
void siteComputeGenStapleParityKernel_ex (void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, int mu, int nu, double mycoeff, QudaReconstructType recon, QudaPrecision prec, llfat_kernel_param_t kparam)
 
void llfatOneLinkKernel (cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff)
 
void llfatOneLinkKernel_ex (cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff, llfat_kernel_param_t kparam)
 
void computeFatLinkCore (cudaGaugeField *cudaSiteLink, double *act_path_coeff, QudaGaugeParam *qudaGaugeParam, QudaComputeFatMethod method, cudaGaugeField *cudaFatLink, cudaGaugeField *cudaLongLink, TimeProfile &profile)
 
void printPeakMemUsage ()
 
void assertAllMemFree ()
 
void * device_malloc_ (const char *func, const char *file, int line, size_t size)
 
void * safe_malloc_ (const char *func, const char *file, int line, size_t size)
 
void * pinned_malloc_ (const char *func, const char *file, int line, size_t size)
 
void * mapped_malloc_ (const char *func, const char *file, int line, size_t size)
 
void device_free_ (const char *func, const char *file, int line, void *ptr)
 
void host_free_ (const char *func, const char *file, int line, void *ptr)
 
void link_format_cpu_to_gpu (void *dst, void *src, int reconstruct, int Vh, int pad, int ghostV, QudaPrecision prec, QudaGaugeFieldOrder cpu_order, cudaStream_t stream)
 
void link_format_gpu_to_cpu (void *dst, void *src, int Vh, int stride, QudaPrecision prec, cudaStream_t stream)
 
void collectGhostStaple (int *X, void *even, void *odd, int volumeCB, int stride, QudaPrecision precision, void *ghost_staple_gpu, int dir, int whichway, cudaStream_t *stream)
 
template<typename T1 , typename T2 >
__host__ __device__ void copy (T1 &a, const T2 &b)
 
template<>
__host__ __device__ void copy (float &a, const short &b)
 
template<>
__host__ __device__ void copy (short &a, const float &b)
 
void createStaggeredOprodEvents ()
 
void destroyStaggeredOprodEvents ()
 
void computeStaggeredOprod (cudaGaugeField &out, cudaColorSpinorField &in, FaceBuffer &facebuffer, const unsigned int parity, const double coeff, const unsigned int displacement)
 
void computeStaggeredOprod (cudaGaugeField &outA, cudaGaugeField &outB, cudaColorSpinorField &inEven, cudaColorSpinorField &inOdd, FaceBuffer &faceBuffer, const unsigned int parity, const double coeff[2])
 
void loadTuneCache (QudaVerbosity verbosity)
 
void saveTuneCache (QudaVerbosity verbosity)
 
TuneParamtuneLaunch (Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
 
template<typename Float >
void axpby (const Float &a, const Float *x, const Float &b, Float *y, const int N)
 
template<typename Float >
void caxpby (const std::complex< Float > &a, const std::complex< Float > *x, const std::complex< Float > &b, std::complex< Float > *y, int N)
 
template<typename Float >
void caxpbypcz (const std::complex< Float > &a, const std::complex< Float > *x, const std::complex< Float > &b, const std::complex< Float > *y, const std::complex< Float > &c, std::complex< Float > *z, int N)
 
template<typename Float >
double norm (const Float *a, const int N)
 
template<typename Float >
double reDotProduct (const Float *a, const Float *b, const int N)
 
template<typename Float >
Complex cDotProduct (const std::complex< Float > *a, const std::complex< Float > *b, const int N)
 
template<typename Float >
double3 HeavyQuarkResidualNorm (const Float *x, const Float *r, const int volume, const int Nint)
 
double3 HeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &r)
 
void initReduce ()
 
void endReduce ()
 
cudaStream_t * getBlasStream ()
 
__device__ void caxpy_ (const float2 &a, const float4 &x, float4 &y)
 
__device__ void caxpy_ (const float2 &a, const float2 &x, float2 &y)
 
__device__ void caxpy_ (const double2 &a, const double2 &x, double2 &y)
 
__device__ void caxpby_ (const float2 &a, const float4 &x, const float2 &b, float4 &y)
 
__device__ void caxpby_ (const float2 &a, const float2 &x, const float2 &b, float2 &y)
 
__device__ void caxpby_ (const double2 &a, const double2 &x, const double2 &b, double2 &y)
 
__device__ void cxpaypbz_ (const float4 &x, const float2 &a, const float4 &y, const float2 &b, float4 &z)
 
__device__ void cxpaypbz_ (const float2 &x, const float2 &a, const float2 &y, const float2 &b, float2 &z)
 
__device__ void cxpaypbz_ (const double2 &x, const double2 &a, const double2 &y, const double2 &b, double2 &z)
 
void setGhostSpinor (bool value)
 
std::ostream & operator<< (std::ostream &out, const ColorSpinorField &a)
 
template<typename Float >
ColorSpinorFieldOrder< Float > * createOrder (const cpuColorSpinorField &a)
 
template<class T >
void random (T &t)
 
template<class T >
void point (T &t, int x, int s, int c)
 
template<class U , class V >
int compareSpinor (const U &u, const V &v, const int tol)
 
template<class Order >
void print_vector (const Order &o, unsigned int x)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >
void packSpinor (OutOrder &outOrder, const InOrder &inOrder, Basis basis, int volume)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >
__global__ void packSpinorKernel (OutOrder outOrder, const InOrder inOrder, Basis basis, int volume)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
void genericCopyColorSpinor (OutOrder &outOrder, const InOrder &inOrder, QudaGammaBasis dstBasis, QudaGammaBasis srcBasis, const ColorSpinorField &out, QudaFieldLocation location)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >
void genericCopyColorSpinor (InOrder &inOrder, ColorSpinorField &out, QudaGammaBasis inBasis, QudaFieldLocation location, FloatOut *Out, float *outNorm)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc>
void genericCopyColorSpinor (ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In, float *outNorm, float *inNorm)
 
template<int Ns, typename dstFloat , typename srcFloat >
void copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm, float *srcNorm)
 
template<typename dstFloat , typename srcFloat >
void CopyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm=0, float *srcNorm=0)
 
void copyGenericGaugeDoubleOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type)
 
void copyGenericGaugeHalfOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type)
 
void copyGenericGaugeSingleOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type)
 
void checkMomOrder (const GaugeField &u)
 
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
__device__ __host__ void copyGaugeEx (CopyGaugeExArg< OutOrder, InOrder > &arg, int X, int parity)
 
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void copyGaugeEx (CopyGaugeExArg< OutOrder, InOrder > arg)
 
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
__global__ void copyGaugeExKernel (CopyGaugeExArg< OutOrder, InOrder > arg)
 
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void copyGaugeEx (OutOrder outOrder, const InOrder inOrder, const int *E, const int *X, const int *faceVolumeCB, const GaugeField &meta, QudaFieldLocation location)
 
template<typename FloatOut , typename FloatIn , int length, typename InOrder >
void copyGaugeEx (const InOrder &inOrder, const int *X, GaugeField &out, QudaFieldLocation location, FloatOut *Out)
 
template<typename FloatOut , typename FloatIn , int length>
void copyGaugeEx (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In)
 
template<typename FloatOut , typename FloatIn >
void copyGaugeEx (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In)
 
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void copyGauge (CopyGaugeArg< OutOrder, InOrder > arg)
 
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
__global__ void copyGaugeKernel (CopyGaugeArg< OutOrder, InOrder > arg)
 
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void copyGhost (CopyGaugeArg< OutOrder, InOrder > arg)
 
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
__global__ void copyGhostKernel (CopyGaugeArg< OutOrder, InOrder > arg)
 
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void copyGauge (OutOrder outOrder, const InOrder inOrder, int volume, const int *faceVolumeCB, int nDim, int geometry, const GaugeField &out, QudaFieldLocation location, int type)
 
template<typename FloatOut , typename FloatIn , int length, typename InOrder >
void copyGauge (const InOrder &inOrder, GaugeField &out, QudaFieldLocation location, FloatOut *Out, FloatOut **outGhost, int type)
 
template<typename FloatOut , typename FloatIn , int length>
void copyGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In, FloatOut **outGhost, FloatIn **inGhost, int type)
 
template<typename FloatOut , typename FloatIn >
void copyGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In, FloatOut **outGhost, FloatIn **inGhost, int type)
 
std::ostream & operator<< (std::ostream &out, const cudaColorSpinorField &a)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
__device__ __host__ void copyInterior (CopySpinorExArg< OutOrder, InOrder, Basis > &arg, int X)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
__global__ void copyInteriorKernel (CopySpinorExArg< OutOrder, InOrder, Basis > arg)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
void copyInterior (CopySpinorExArg< OutOrder, InOrder, Basis > &arg)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >
void copySpinorEx (OutOrder outOrder, const InOrder inOrder, const Basis basis, const int *E, const int *X, const int parity, const bool extend, const ColorSpinorField &meta, QudaFieldLocation location)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
void copySpinorEx (OutOrder outOrder, InOrder inOrder, const QudaGammaBasis outBasis, const QudaGammaBasis inBasis, const int *E, const int *X, const int parity, const bool extend, const ColorSpinorField &meta, QudaFieldLocation location)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >
void extendedCopyColorSpinor (InOrder &inOrder, ColorSpinorField &out, QudaGammaBasis inBasis, const int *E, const int *X, const int parity, const bool extend, QudaFieldLocation location, FloatOut *Out, float *outNorm)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc>
void extendedCopyColorSpinor (ColorSpinorField &out, const ColorSpinorField &in, const int parity, const QudaFieldLocation location, FloatOut *Out, FloatIn *In, float *outNorm, float *inNorm)
 
template<int Ns, typename dstFloat , typename srcFloat >
void copyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, const int parity, const QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm, float *srcNorm)
 
template<typename dstFloat , typename srcFloat >
void CopyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, const int parity, const QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm=0, float *srcNorm=0)
 
template<typename Float , int length, int nDim, typename Order >
void extractGhost (ExtractGhostArg< Order, nDim > arg)
 
template<typename Float , int length, int nDim, typename Order >
__global__ void extractGhostKernel (ExtractGhostArg< Order, nDim > arg)
 
template<typename Float , int length, typename Order >
void extractGhost (Order order, const GaugeField &u, QudaFieldLocation location)
 
template<typename Float >
void extractGhost (const GaugeField &u, Float **Ghost)
 
template<typename Float , int length, typename Arg >
__device__ __host__ void extractor (Arg &arg, int dir, int a, int b, int c, int d, int g, int parity)
 
template<typename Float , int length, typename Arg >
__device__ __host__ void injector (Arg &arg, int dir, int a, int b, int c, int d, int g, int parity)
 
template<typename Float , int length, int nDim, typename Order , bool extract>
void extractGhostEx (ExtractGhostExArg< Order, nDim > arg)
 
template<typename Float , int length, int nDim, typename Order , bool extract>
__global__ void extractGhostExKernel (ExtractGhostExArg< Order, nDim > arg)
 
template<typename Float , int length, typename Order >
void extractGhostEx (Order order, const int dim, const int *surfaceCB, const int *E, const int *R, bool extract, const GaugeField &u, QudaFieldLocation location)
 
template<typename Float >
void extractGhostEx (const GaugeField &u, int dim, const int *R, Float **Ghost, bool extract)
 
template<int oddBit, typename Float , typename Float2 , typename FloatN >
__global__ void GAUGE_FORCE_KERN_NAME (Float2 *momEven, Float2 *momOdd, const int dir, const double eb3, const FloatN *linkEven, const FloatN *linkOdd, const int *input_path, const int *length, const double *path_coeff, const int num_paths, const kernel_param_t kparam)
 
void gauge_force_cuda_dir (cudaGaugeField &cudaMom, const int dir, const double eb3, const cudaGaugeField &cudaSiteLink, const QudaGaugeParam *param, int **input_path, const int *length, const double *path_coeff, const int num_paths, const int max_length)
 
void printLaunchTimer ()
 
void setDiracPreParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc)
 
void createDirac (Dirac *&d, Dirac *&dSloppy, Dirac *&dPre, QudaInvertParam &param, const bool pc_solve)
 
void massRescale (cudaColorSpinorField &b, QudaInvertParam &param)
 
void fillInnerSolveParam (SolverParam &inner, const SolverParam &outer)
 
double resNorm (const DiracMatrix &mat, cudaColorSpinorField &b, cudaColorSpinorField &x)
 
int reliable (double &rNorm, double &maxrx, double &maxrr, const double &r2, const double &delta)
 
void fillInitCGSolveParam (SolverParam &initCGparam)
 
double timeInterval (struct timeval start, struct timeval end)
 
void orthoDir (Complex **beta, cudaColorSpinorField *Ap[], int k)
 
void backSubs (const Complex *alpha, Complex **const beta, const double *gamma, Complex *delta, int n)
 
void updateSolution (cudaColorSpinorField &x, const Complex *alpha, Complex **const beta, double *gamma, int k, cudaColorSpinorField *p[])
 
void print (const double d[], int n)
 
void updateAlphaZeta (double *alpha, double *zeta, double *zeta_old, const double *r2, const double *beta, const double pAp, const double *offset, const int nShift, const int j_low)
 
__device__ __host__ int linkIndex (int x[], int dx[], const int X[4])
 
__device__ __host__ void getCoords (int x[4], int cb_index, const int X[4], int parity)
 
template<typename Float , typename Oprod , typename Gauge , typename Mom >
__host__ __device__ void completeKSForceCore (KSForceArg< Oprod, Gauge, Mom > &arg, int idx)
 
template<typename Float , typename Oprod , typename Gauge , typename Mom >
__global__ void completeKSForceKernel (KSForceArg< Oprod, Gauge, Mom > arg)
 
template<typename Float , typename Oprod , typename Gauge , typename Mom >
void completeKSForceCPU (KSForceArg< Oprod, Gauge, Mom > &arg)
 
template<typename Float , typename Oprod , typename Gauge , typename Mom >
void completeKSForce (Oprod oprod, Gauge gauge, Mom mom, int dim[4], const GaugeField &meta, QudaFieldLocation location, long long *flops)
 
template<typename Float , typename Result , typename Oprod , typename Gauge >
__host__ __device__ void computeKSLongLinkForceCore (KSLongLinkArg< Result, Oprod, Gauge > &arg, int idx)
 
template<typename Float , typename Result , typename Oprod , typename Gauge >
__global__ void computeKSLongLinkForceKernel (KSLongLinkArg< Result, Oprod, Gauge > arg)
 
template<typename Float , typename Result , typename Oprod , typename Gauge >
void computeKSLongLinkForceCPU (KSLongLinkArg< Result, Oprod, Gauge > &arg)
 
template<typename Float , typename Result , typename Oprod , typename Gauge >
void computeKSLongLinkForce (Result res, Oprod oprod, Gauge gauge, int dim[4], const GaugeField &meta, QudaFieldLocation location)
 
template<typename Float >
void computeKSLongLinkForce (GaugeField &result, const GaugeField &oprod, const GaugeField &gauge, QudaFieldLocation location)
 
template<typename Float , int Nc, typename Order >
double maxGauge (const Order order, int volume, int nDim)
 
template<int N, typename FloatN , typename Float2 >
__global__ void do_link_format_cpu_to_gpu (FloatN *dst, Float2 *src, int reconstruct, int Vh, int pad, int ghostV, size_t threads)
 
template<int N, typename FloatN , typename Float2 >
__global__ void do_link_format_cpu_to_gpu_milc (FloatN *dst, Float2 *src, int reconstruct, int Vh, int pad, int ghostV, size_t threads)
 
template<typename FloatN >
__global__ void do_link_format_gpu_to_cpu (FloatN *dst, FloatN *src, int Vh, int stride)
 
template<int dir, int whichway, typename Float2 >
__global__ void collectGhostStapleKernel (Float2 *out, Float2 *in, int parity, GhostStapleParam param)
 
template<class Cmplx >
__device__ __host__ Cmplx makeComplex (const typename RealTypeId< Cmplx >::Type &a, const typename RealTypeId< Cmplx >::Type &b)
 
__device__ __host__ double2 makeComplex (const double &a, const double &b)
 
__device__ __host__ float2 makeComplex (const float &a, const float &b)
 
template<class Cmplx >
__device__ __host__ Cmplx operator- (const Cmplx &a)
 
template<class Cmplx >
__device__ __host__ Cmplx & operator+= (Cmplx &a, const Cmplx &b)
 
template<class Cmplx >
__device__ __host__ Cmplx & operator-= (Cmplx &a, const Cmplx &b)
 
template<class Cmplx >
__device__ __host__ Cmplx operator+ (const Cmplx &a, const Cmplx &b)
 
template<class Cmplx >
__device__ __host__ Cmplx operator- (const Cmplx &a, const Cmplx &b)
 
__device__ __host__ double2 operator* (const double2 &a, const double &scalar)
 
__device__ __host__ float2 operator* (const float2 &a, const float &scalar)
 
template<class Cmplx , class Float >
__device__ __host__ Cmplx operator+ (const Cmplx &a, const Float &scalar)
 
template<class Cmplx >
__device__ __host__ Cmplx operator/ (const Cmplx &a, const typename RealTypeId< Cmplx >::Type &scalar)
 
template<class Cmplx >
__device__ __host__ Cmplx operator+ (const typename RealTypeId< Cmplx >::Type &scalar, const Cmplx &a)
 
template<class Cmplx >
__device__ __host__ Cmplx operator- (const Cmplx &a, const typename RealTypeId< Cmplx >::Type &scalar)
 
template<class Cmplx >
__device__ __host__ Cmplx operator- (const typename RealTypeId< Cmplx >::Type &scalar, const Cmplx &a)
 
template<class Cmplx >
__device__ __host__ Cmplx operator* (const Cmplx &a, const Cmplx &b)
 
template<class Cmplx >
__device__ __host__ Cmplx conj (const Cmplx &a)
 
__device__ __host__ double conj (const double &a)
 
__device__ __host__ float conj (const float &a)
 
template<typename Cmplx >
__device__ __host__ Cmplx Conj (const Cmplx &a)
 
template<class Cmplx >
__device__ __host__ Cmplx getPreciseInverse (const Cmplx &z)
 
std::ostream & operator<< (std::ostream &os, const float2 &z)
 
std::ostream & operator<< (std::ostream &os, const double2 &z)
 
template<int N>
__device__ __host__ int index (int i, int j)
 
template<class T >
__device__ __host__ T getTrace (const Matrix< T, 3 > &a)
 
template<class T >
__device__ __host__ T getDeterminant (const Matrix< T, 3 > &a)
 
template<class T , int N>
__device__ __host__ Matrix< T, N > operator+ (const Matrix< T, N > &a, const Matrix< T, N > &b)
 
template<class T , int N>
__device__ __host__ Matrix< T, N > operator+= (Matrix< T, N > &a, const Matrix< T, N > &b)
 
template<class T , int N>
__device__ __host__ Matrix< T, N > operator-= (Matrix< T, N > &a, const Matrix< T, N > &b)
 
template<class T , int N>
__device__ __host__ Matrix< T, N > operator- (const Matrix< T, N > &a, const Matrix< T, N > &b)
 
template<class T , int N, class S >
__device__ __host__ Matrix< T, N > operator* (const S &scalar, const Matrix< T, N > &a)
 
template<class T , int N, class S >
__device__ __host__ Matrix< T, N > operator* (const Matrix< T, N > &a, const S &scalar)
 
template<class T , int N, class S >
__device__ __host__ Matrix< T, N > operator*= (Matrix< T, N > &a, const S &scalar)
 
template<class T , int N>
__device__ __host__ Matrix< T, N > operator- (const Matrix< T, N > &a)
 
template<class T >
__device__ __host__ Matrix< T, 3 > operator* (const Matrix< T, 3 > &a, const Matrix< T, 3 > &b)
 
template<class T , int N>
__device__ __host__ Matrix< T, N > operator*= (Matrix< T, N > &a, const Matrix< T, N > &b)
 
template<class T , class U >
__device__ __host__ Matrix
< typename PromoteTypeId< T, U >
::Type, 3 > 
operator* (const Matrix< T, 3 > &a, const Matrix< U, 3 > &b)
 
template<class T >
__device__ __host__ Matrix< T, 2 > operator* (const Matrix< T, 2 > &a, const Matrix< T, 2 > &b)
 
template<class T , int N>
__device__ __host__ Matrix< T, N > conj (const Matrix< T, N > &other)
 
template<class T >
__device__ __host__ void computeMatrixInverse (const Matrix< T, 3 > &u, Matrix< T, 3 > *uinv)
 
template<class T , int N>
__device__ __host__ void setIdentity (Matrix< T, N > *m)
 
template<int N>
__device__ __host__ void setIdentity (Matrix< float2, N > *m)
 
template<int N>
__device__ __host__ void setIdentity (Matrix< double2, N > *m)
 
template<class T , int N>
__device__ __host__ void setZero (Matrix< T, N > *m)
 
template<int N>
__device__ __host__ void setZero (Matrix< float2, N > *m)
 
template<int N>
__device__ __host__ void setZero (Matrix< double2, N > *m)
 
template<class T , int N>
__device__ __host__ void copyColumn (const Matrix< T, N > &m, int c, Array< T, N > *a)
 
template<class T , int N>
__device__ __host__ void outerProd (const Array< T, N > &a, const Array< T, N > &b, Matrix< T, N > *m)
 
template<class T , int N>
__device__ __host__ void outerProd (const T(&a)[N], const T(&b)[N], Matrix< T, N > *m)
 
template<class T , int N>
std::ostream & operator<< (std::ostream &os, const Matrix< T, N > &m)
 
template<class T , int N>
std::ostream & operator<< (std::ostream &os, const Array< T, N > &a)
 
template<class T >
__device__ void loadLinkVariableFromArray (const T *const array, const int dir, const int idx, const int stride, Matrix< T, 3 > *link)
 
template<class T , int N>
__device__ void loadMatrixFromArray (const T *const array, const int idx, const int stride, Matrix< T, N > *mat)
 
__device__ void loadLinkVariableFromArray (const float2 *const array, const int dir, const int idx, const int stride, Matrix< double2, 3 > *link)
 
template<class T , int N>
__device__ void writeMatrixToArray (const Matrix< T, N > &mat, const int idx, const int stride, T *const array)
 
__device__ void appendMatrixToArray (const Matrix< double2, 3 > &mat, const int idx, const int stride, double2 *const array)
 
__device__ void appendMatrixToArray (const Matrix< float2, 3 > &mat, const int idx, const int stride, float2 *const array)
 
template<class T >
__device__ void writeLinkVariableToArray (const Matrix< T, 3 > &link, const int dir, const int idx, const int stride, T *const array)
 
__device__ void writeLinkVariableToArray (const Matrix< double2, 3 > &link, const int dir, const int idx, const int stride, float2 *const array)
 
template<class T >
__device__ void loadMomentumFromArray (const T *const array, const int dir, const int idx, const int stride, Matrix< T, 3 > *mom)
 
template<class T , class U >
__device__ void writeMomentumToArray (const Matrix< T, 3 > &mom, const int dir, const int idx, const U coeff, const int stride, T *const array)
 
template<class Cmplx >
__device__ __host__ void computeLinkInverse (Matrix< Cmplx, 3 > *uinv, const Matrix< Cmplx, 3 > &u)
 
void copyArrayToLink (Matrix< float2, 3 > *link, float *array)
 
template<class Cmplx , class Real >
void copyArrayToLink (Matrix< Cmplx, 3 > *link, Real *array)
 
void copyLinkToArray (float *array, const Matrix< float2, 3 > &link)
 
template<class Cmplx , class Real >
void copyLinkToArray (Real *array, const Matrix< Cmplx, 3 > &link)
 
template<class Cmplx >
__host__ __device__ void printLink (const Matrix< Cmplx, 3 > &link)
 
__device__ double norm2_ (const double2 &a)
 
__device__ float norm2_ (const float2 &a)
 
__device__ float norm2_ (const float4 &a)
 
__device__ double dot_ (const double2 &a, const double2 &b)
 
__device__ float dot_ (const float2 &a, const float2 &b)
 
__device__ float dot_ (const float4 &a, const float4 &b)
 
__device__ double2 dotNormA_ (const double2 &a, const double2 &b)
 
__device__ double2 dotNormA_ (const float2 &a, const float2 &b)
 
__device__ double2 dotNormA_ (const float4 &a, const float4 &b)
 
__device__ void Caxpy_ (const float2 &a, const float4 &x, float4 &y)
 
__device__ void Caxpy_ (const float2 &a, const float2 &x, float2 &y)
 
__device__ void Caxpy_ (const double2 &a, const double2 &x, double2 &y)
 
__device__ double2 cdot_ (const double2 &a, const double2 &b)
 
__device__ double2 cdot_ (const float2 &a, const float2 &b)
 
__device__ double2 cdot_ (const float4 &a, const float4 &b)
 
__device__ double3 cdotNormA_ (const double2 &a, const double2 &b)
 
__device__ double3 cdotNormA_ (const float2 &a, const float2 &b)
 
__device__ double3 cdotNormA_ (const float4 &a, const float4 &b)
 
__device__ double3 cdotNormB_ (const double2 &a, const double2 &b)
 
__device__ double3 cdotNormB_ (const float2 &a, const float2 &b)
 
__device__ double3 cdotNormB_ (const float4 &a, const float4 &b)
 
template<IndexType idxType, typename Int >
__device__ __forceinline__ int neighborIndex (const unsigned int &cb_idx, const int(&shift)[4], const bool(&partitioned)[4], const unsigned int &parity)
 
template<typename FloatN , int N, typename Output , typename Input >
__global__ void shiftColorSpinorFieldKernel (ShiftQuarkArg< Output, Input > arg)
 
template<typename FloatN , int N, typename Output , typename Input >
__global__ void shiftColorSpinorFieldExternalKernel (ShiftQuarkArg< Output, Input > arg)
 
void shiftColorSpinorField (cudaColorSpinorField &dst, const cudaColorSpinorField &src, const unsigned int parity, const unsigned int dim, const int shift)
 

Variables

unsigned long long blas_flops
 
unsigned long long blas_bytes
 
const int maxNface = 3
 
const int Nstream = 1
 
cudaStream_t * stream
 

Detailed Description

This code has not been checked. In particular, I suspect it is erroneous in multi-GPU since it looks like the halo ghost region isn't being treated here.

Generic Multi Shift Solver

For staggered, the mass is folded into the dirac operator Otherwise the matrix mass is 'unmodified'.

The lowest offset is in offsets[0]

Typedef Documentation

typedef std::complex< double > quda::Complex

Definition at line 13 of file eig_variables.h.

typedef std::map<TuneKey, TuneParam> quda::map

Definition at line 24 of file tune.cpp.

Enumeration Type Documentation

Enumerator
DEVICE 
HOST 
PINNED 
MAPPED 
N_ALLOC_TYPE 

Definition at line 14 of file malloc.cpp.

Enumerator
QUDA_PROFILE_H2D 

host -> device transfers

QUDA_PROFILE_D2H 

The time in seconds for device -> host transfers

QUDA_PROFILE_INIT 

The time in seconds taken for initiation

QUDA_PROFILE_PREAMBLE 

The time in seconds taken for any preamble

QUDA_PROFILE_COMPUTE 

The time in seconds taken for the actual computation

QUDA_PROFILE_EPILOGUE 

The time in seconds taken for any epilogue

QUDA_PROFILE_FREE 

The time in seconds for freeing resources

QUDA_PROFILE_PACK_KERNEL 

face packing kernel

QUDA_PROFILE_DSLASH_KERNEL 

dslash kernel

QUDA_PROFILE_GATHER 

gather (device -> host)

QUDA_PROFILE_SCATTER 

scatter (host -> device)

QUDA_PROFILE_EVENT_RECORD 

cuda event record

QUDA_PROFILE_EVENT_QUERY 

cuda event querying

QUDA_PROFILE_STREAM_WAIT_EVENT 

stream waiting for event completion

QUDA_PROFILE_COMMS 

synchronous communication

QUDA_PROFILE_COMMS_START 

initiating communication

QUDA_PROFILE_COMMS_QUERY 

querying communication

QUDA_PROFILE_CONSTANT 

time spent setting CUDA constant parameters

QUDA_PROFILE_TOTAL 

The total time in seconds for the algorithm. Must be the penultimate type.

QUDA_PROFILE_COUNT 

The total number of timers we have. Must be last enum type.

Definition at line 143 of file quda_internal.h.

Function Documentation

template<typename ValueType >
__host__ __device__ ValueType quda::abs ( ValueType  x)
inline

Definition at line 110 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ ValueType quda::abs ( const complex< ValueType > &  z)
inline

Returns the magnitude of z.

Definition at line 827 of file complex_quda.h.

template<>
__host__ __device__ float quda::abs ( const complex< float > &  z)
inline

Definition at line 832 of file complex_quda.h.

template<>
__host__ __device__ double quda::abs ( const complex< double > &  z)
inline

Definition at line 837 of file complex_quda.h.

template<typename Float >
__device__ __host__ void quda::accumulateComplexProduct ( Float *  a,
const Float *  b,
const Float *  c,
Float  sign 
)
inline

Definition at line 9 of file gauge_field_order.h.

template<typename Float >
__device__ __host__ void quda::accumulateConjugateProduct ( Float *  a,
const Float *  b,
const Float *  c,
int  sign 
)
inline

Definition at line 40 of file gauge_field_order.h.

template<typename ValueType >
__host__ __device__ ValueType quda::acos ( ValueType  x)
inline

Definition at line 50 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::acos ( const complex< ValueType > &  z)
inline

Definition at line 1041 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::acosh ( const complex< ValueType > &  z)
inline

Definition at line 1062 of file complex_quda.h.

void quda::APEStep ( GaugeField &  dataDs,
const GaugeField &  dataOr,
double  alpha,
QudaFieldLocation  location 
)

Definition at line 497 of file gauge_ape.cu.

__device__ void quda::appendMatrixToArray ( const Matrix< double2, 3 > &  mat,
const int  idx,
const int  stride,
double2 *const  array 
)
inline

Definition at line 810 of file quda_matrix.h.

__device__ void quda::appendMatrixToArray ( const Matrix< float2, 3 > &  mat,
const int  idx,
const int  stride,
float2 *const  array 
)
inline

Definition at line 819 of file quda_matrix.h.

void quda::applyGaugePhase ( GaugeField &  u)

Apply the staggered phase factor to the gauge field.

Parameters
uThe gauge field to which we apply the staggered phase factors

Definition at line 261 of file gauge_phase.cu.

template<typename ValueType >
__host__ __device__ ValueType quda::arg ( const complex< ValueType > &  z)
inline

Returns the phase angle of z.

Definition at line 843 of file complex_quda.h.

template<>
__host__ __device__ float quda::arg ( const complex< float > &  z)
inline

Definition at line 848 of file complex_quda.h.

template<>
__host__ __device__ double quda::arg ( const complex< double > &  z)
inline

Definition at line 853 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ ValueType quda::asin ( ValueType  x)
inline

Definition at line 55 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::asin ( const complex< ValueType > &  z)
inline

Definition at line 1048 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::asinh ( const complex< ValueType > &  z)
inline

Definition at line 1087 of file complex_quda.h.

void quda::assertAllMemFree ( )

Definition at line 294 of file malloc.cpp.

void quda::asymCloverDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const FullClover  cloverInv,
const cudaColorSpinorField *  in,
const int  oddBit,
const int  daggerBit,
const cudaColorSpinorField *  x,
const double &  k,
const int *  commDim,
TimeProfile &  profile,
const QudaDslashPolicy dslashPolicy = QUDA_DSLASH2 
)

Definition at line 118 of file dslash_clover_asym.cu.

template<typename ValueType >
__host__ __device__ ValueType quda::atan ( ValueType  x)
inline

Definition at line 60 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::atan ( const complex< ValueType > &  z)
inline

Definition at line 1055 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ ValueType quda::atan2 ( ValueType  x,
ValueType  y 
)
inline

Definition at line 65 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::atanh ( const complex< ValueType > &  z)
inline

Definition at line 1093 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex<float> quda::atanh ( const complex< float > &  z)
inline

Definition at line 1111 of file complex_quda.h.

void quda::axCpu ( const double &  a,
cpuColorSpinorField &  x 
)

Definition at line 60 of file blas_cpu.cpp.

void quda::axCuda ( const double &  a,
cudaColorSpinorField &  x 
)

Definition at line 171 of file blas_quda.cu.

template<typename Float >
void quda::axpby ( const Float &  a,
const Float *  x,
const Float &  b,
Float *  y,
const int  N 
)

Definition at line 8 of file blas_cpu.cpp.

void quda::axpbyCpu ( const double &  a,
const cpuColorSpinorField &  x,
const double &  b,
cpuColorSpinorField &  y 
)

Definition at line 12 of file blas_cpu.cpp.

void quda::axpbyCuda ( const double &  a,
cudaColorSpinorField &  x,
const double &  b,
cudaColorSpinorField &  y 
)

Definition at line 82 of file blas_quda.cu.

void quda::axpyBzpcxCpu ( const double &  a,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y,
const double &  b,
const cpuColorSpinorField &  z,
const double &  c 
)

Definition at line 129 of file blas_cpu.cpp.

void quda::axpyBzpcxCuda ( const double &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
const double &  b,
cudaColorSpinorField &  z,
const double &  c 
)

Definition at line 311 of file blas_quda.cu.

Complex quda::axpyCGNormCuda ( const double &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 682 of file reduce_quda.cu.

void quda::axpyCpu ( const double &  a,
const cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 31 of file blas_cpu.cpp.

void quda::axpyCuda ( const double &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 115 of file blas_quda.cu.

double quda::axpyNormCpu ( const double &  a,
const cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 178 of file blas_cpu.cpp.

double quda::axpyNormCuda ( const double &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 321 of file reduce_quda.cu.

void quda::axpyZpbxCpu ( const double &  a,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y,
const cpuColorSpinorField &  z,
const double &  b 
)

Definition at line 136 of file blas_cpu.cpp.

void quda::axpyZpbxCuda ( const double &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z,
const double &  b 
)

Definition at line 338 of file blas_quda.cu.

void quda::backSubs ( const Complex *  alpha,
Complex **const  beta,
const double *  gamma,
Complex *  delta,
int  n 
)

Definition at line 101 of file inv_gcr_quda.cpp.

void quda::cabxpyAxCpu ( const double &  a,
const Complex &  b,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 259 of file blas_cpu.cpp.

void quda::cabxpyAxCuda ( const double &  a,
const Complex &  b,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 386 of file blas_quda.cu.

double quda::cabxpyAxNormCpu ( const double &  a,
const Complex &  b,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 283 of file blas_cpu.cpp.

double quda::cabxpyAxNormCuda ( const double &  a,
const Complex &  b,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 440 of file reduce_quda.cu.

template<typename Float >
void quda::caxpby ( const std::complex< Float > &  a,
const std::complex< Float > *  x,
const std::complex< Float > &  b,
std::complex< Float > *  y,
int  N 
)

Definition at line 70 of file blas_cpu.cpp.

__device__ void quda::caxpby_ ( const float2 &  a,
const float4 &  x,
const float2 &  b,
float4 &  y 
)

Functor to perform the operation y = a*x + b*y (complex-valued)

Definition at line 217 of file blas_quda.cu.

__device__ void quda::caxpby_ ( const float2 &  a,
const float2 &  x,
const float2 &  b,
float2 &  y 
)

Definition at line 225 of file blas_quda.cu.

__device__ void quda::caxpby_ ( const double2 &  a,
const double2 &  x,
const double2 &  b,
double2 &  y 
)

Definition at line 231 of file blas_quda.cu.

void quda::caxpbyCpu ( const Complex &  a,
const cpuColorSpinorField &  x,
const Complex &  b,
cpuColorSpinorField &  y 
)

Definition at line 92 of file blas_cpu.cpp.

void quda::caxpbyCuda ( const Complex &  a,
cudaColorSpinorField &  x,
const Complex &  b,
cudaColorSpinorField &  y 
)

Definition at line 247 of file blas_quda.cu.

template<typename Float >
void quda::caxpbypcz ( const std::complex< Float > &  a,
const std::complex< Float > *  x,
const std::complex< Float > &  b,
const std::complex< Float > *  y,
const std::complex< Float > &  c,
std::complex< Float > *  z,
int  N 
)

Definition at line 105 of file blas_cpu.cpp.

void quda::caxpbypczpwCpu ( const Complex &  a,
cpuColorSpinorField &  x,
const Complex &  b,
cpuColorSpinorField &  y,
const Complex &  c,
cpuColorSpinorField &  z,
cpuColorSpinorField &  w 
)

Definition at line 295 of file blas_cpu.cpp.

void quda::caxpbypczpwCuda ( const Complex &  a,
cudaColorSpinorField &  x,
const Complex &  b,
cudaColorSpinorField &  y,
const Complex &  c,
cudaColorSpinorField &  z,
cudaColorSpinorField &  w 
)

Definition at line 429 of file blas_quda.cu.

void quda::caxpbypzCpu ( const Complex &  a,
cpuColorSpinorField &  x,
const Complex &  b,
cpuColorSpinorField &  y,
cpuColorSpinorField &  z 
)

Definition at line 289 of file blas_cpu.cpp.

void quda::caxpbypzCuda ( const Complex &  a,
cudaColorSpinorField &  x,
const Complex &  b,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z 
)

Definition at line 407 of file blas_quda.cu.

double3 quda::caxpbypzYmbwcDotProductUYNormYCpu ( const Complex &  a,
const cpuColorSpinorField &  x,
const Complex &  b,
cpuColorSpinorField &  y,
cpuColorSpinorField &  z,
const cpuColorSpinorField &  w,
const cpuColorSpinorField &  u 
)

Definition at line 250 of file blas_cpu.cpp.

double3 quda::caxpbypzYmbwcDotProductUYNormYCuda ( const Complex &  a,
cudaColorSpinorField &  x,
const Complex &  b,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z,
cudaColorSpinorField &  w,
cudaColorSpinorField &  u 
)

Definition at line 643 of file reduce_quda.cu.

void quda::caxpbypzYmbwCpu ( const Complex &  a,
const cpuColorSpinorField &  x,
const Complex &  b,
cpuColorSpinorField &  y,
cpuColorSpinorField &  z,
const cpuColorSpinorField &  w 
)

Definition at line 143 of file blas_cpu.cpp.

void quda::caxpbypzYmbwCuda ( const Complex &  a,
cudaColorSpinorField &  x,
const Complex &  b,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z,
cudaColorSpinorField &  w 
)

Definition at line 366 of file blas_quda.cu.

__device__ void quda::caxpy_ ( const float2 &  a,
const float4 &  x,
float4 &  y 
)

Functor to perform the operation y += a * x (complex-valued)

Definition at line 180 of file blas_quda.cu.

__device__ void quda::caxpy_ ( const float2 &  a,
const float2 &  x,
float2 &  y 
)

Definition at line 187 of file blas_quda.cu.

__device__ void quda::caxpy_ ( const double2 &  a,
const double2 &  x,
double2 &  y 
)

Definition at line 192 of file blas_quda.cu.

__device__ void quda::Caxpy_ ( const float2 &  a,
const float4 &  x,
float4 &  y 
)

Functor to perform the operation y += a * x (complex-valued)

Definition at line 353 of file reduce_quda.cu.

__device__ void quda::Caxpy_ ( const float2 &  a,
const float2 &  x,
float2 &  y 
)

Definition at line 360 of file reduce_quda.cu.

__device__ void quda::Caxpy_ ( const double2 &  a,
const double2 &  x,
double2 &  y 
)

Definition at line 365 of file reduce_quda.cu.

void quda::caxpyCpu ( const Complex &  a,
const cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 79 of file blas_cpu.cpp.

void quda::caxpyCuda ( const Complex &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 207 of file blas_quda.cu.

Complex quda::caxpyDotzyCpu ( const Complex &  a,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y,
cpuColorSpinorField &  z 
)

Definition at line 303 of file blas_cpu.cpp.

Complex quda::caxpyDotzyCuda ( const Complex &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z 
)

Definition at line 559 of file reduce_quda.cu.

double quda::caxpyNormCpu ( const Complex &  a,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 264 of file blas_cpu.cpp.

double quda::caxpyNormCuda ( const Complex &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 388 of file reduce_quda.cu.

void quda::caxpyXmazCpu ( const Complex &  a,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y,
cpuColorSpinorField &  z 
)

Definition at line 277 of file blas_cpu.cpp.

void quda::caxpyXmazCuda ( const Complex &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z 
)

Definition at line 452 of file blas_quda.cu.

double quda::caxpyXmazNormXCpu ( const Complex &  a,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y,
cpuColorSpinorField &  z 
)

Definition at line 270 of file blas_cpu.cpp.

double quda::caxpyXmazNormXCuda ( const Complex &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z 
)

Definition at line 413 of file reduce_quda.cu.

__device__ double2 quda::cdot_ ( const double2 &  a,
const double2 &  b 
)

Returns complex-valued dot product of x and y

Definition at line 449 of file reduce_quda.cu.

__device__ double2 quda::cdot_ ( const float2 &  a,
const float2 &  b 
)

Definition at line 451 of file reduce_quda.cu.

__device__ double2 quda::cdot_ ( const float4 &  a,
const float4 &  b 
)

Definition at line 453 of file reduce_quda.cu.

__device__ double3 quda::cdotNormA_ ( const double2 &  a,
const double2 &  b 
)

First returns the dot product (x,y) Returns the norm of x

Definition at line 570 of file reduce_quda.cu.

__device__ double3 quda::cdotNormA_ ( const float2 &  a,
const float2 &  b 
)

Definition at line 572 of file reduce_quda.cu.

__device__ double3 quda::cdotNormA_ ( const float4 &  a,
const float4 &  b 
)

Definition at line 574 of file reduce_quda.cu.

__device__ double3 quda::cdotNormB_ ( const double2 &  a,
const double2 &  b 
)

First returns the dot product (x,y) Returns the norm of y

Definition at line 600 of file reduce_quda.cu.

__device__ double3 quda::cdotNormB_ ( const float2 &  a,
const float2 &  b 
)

Definition at line 602 of file reduce_quda.cu.

__device__ double3 quda::cdotNormB_ ( const float4 &  a,
const float4 &  b 
)

Definition at line 604 of file reduce_quda.cu.

template<typename Float >
Complex quda::cDotProduct ( const std::complex< Float > *  a,
const std::complex< Float > *  b,
const int  N 
)

Definition at line 211 of file blas_cpu.cpp.

Complex quda::cDotProductCpu ( const cpuColorSpinorField &  a,
const cpuColorSpinorField &  b 
)

Definition at line 217 of file blas_cpu.cpp.

Complex quda::cDotProductCuda ( cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 468 of file reduce_quda.cu.

void quda::cDotProductCuda ( Complex *  result,
std::vector< cudaColorSpinorField * > &  a,
std::vector< cudaColorSpinorField * > &  b 
)

Definition at line 474 of file reduce_quda.cu.

double3 quda::cDotProductNormACpu ( const cpuColorSpinorField &  a,
const cpuColorSpinorField &  b 
)

Definition at line 237 of file blas_cpu.cpp.

double3 quda::cDotProductNormACuda ( cudaColorSpinorField &  a,
cudaColorSpinorField &  b 
)

Definition at line 591 of file reduce_quda.cu.

double3 quda::cDotProductNormBCpu ( const cpuColorSpinorField &  a,
const cpuColorSpinorField &  b 
)

Definition at line 243 of file blas_cpu.cpp.

double3 quda::cDotProductNormBCuda ( cudaColorSpinorField &  a,
cudaColorSpinorField &  b 
)

Definition at line 620 of file reduce_quda.cu.

void quda::checkMomOrder ( const GaugeField &  u)

Definition at line 14 of file copy_gauge.cu.

void quda::cloverCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const FullClover  clover,
const cudaColorSpinorField *  in,
const int  oddBit 
)

Definition at line 229 of file dslash_quda.cu.

void quda::cloverDerivative ( cudaGaugeField &  out,
cudaGaugeField &  gauge,
cudaGaugeField &  oprod,
int  mu,
int  nu,
double  coeff,
QudaParity  parity,
int  conjugate 
)

Definition at line 369 of file clover_deriv_quda.cu.

void quda::cloverDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const FullClover  cloverInv,
const cudaColorSpinorField *  in,
const int  oddBit,
const int  daggerBit,
const cudaColorSpinorField *  x,
const double &  k,
const int *  commDim,
TimeProfile &  profile,
const QudaDslashPolicy dslashPolicy = QUDA_DSLASH2 
)

Definition at line 117 of file dslash_clover.cu.

void quda::cloverInvert ( CloverField &  clover,
bool  computeTraceLog,
QudaFieldLocation  location 
)

This function compute the Cholesky decomposition of each clover matrix and stores the clover inverse field.

Parameters
cloverThe clover field (contains both the field itself and its inverse)
computeTraceLogWhether to compute the trace logarithm of the clover term
locationThe location of the field

Definition at line 298 of file clover_invert.cu.

void quda::collectGhostStaple ( int *  X,
void *  even,
void *  odd,
int  volumeCB,
int  stride,
QudaPrecision  precision,
void *  ghost_staple_gpu,
int  dir,
int  whichway,
cudaStream_t *  stream 
)

Definition at line 481 of file misc_helpers.cu.

template<int dir, int whichway, typename Float2 >
__global__ void quda::collectGhostStapleKernel ( Float2 *  out,
Float2 *  in,
int  parity,
GhostStapleParam  param 
)

Definition at line 403 of file misc_helpers.cu.

template<class U , class V >
int quda::compareSpinor ( const U &  u,
const V v,
const int  tol 
)

Definition at line 60 of file color_spinor_util.cu.

void quda::completeKSForce ( GaugeField &  mom,
const GaugeField &  oprod,
const GaugeField &  gauge,
QudaFieldLocation  location,
long long *  flops = NULL 
)

Definition at line 206 of file ks_force_quda.cu.

template<typename Float , typename Oprod , typename Gauge , typename Mom >
void quda::completeKSForce ( Oprod  oprod,
Gauge  gauge,
Mom  mom,
int  dim[4],
const GaugeField &  meta,
QudaFieldLocation  location,
long long *  flops 
)

Definition at line 195 of file ks_force_quda.cu.

template<typename Float , typename Oprod , typename Gauge , typename Mom >
__host__ __device__ void quda::completeKSForceCore ( KSForceArg< Oprod, Gauge, Mom > &  arg,
int  idx 
)

Definition at line 59 of file ks_force_quda.cu.

template<typename Float , typename Oprod , typename Gauge , typename Mom >
void quda::completeKSForceCPU ( KSForceArg< Oprod, Gauge, Mom > &  arg)

Definition at line 133 of file ks_force_quda.cu.

template<typename Float , typename Oprod , typename Gauge , typename Mom >
__global__ void quda::completeKSForceKernel ( KSForceArg< Oprod, Gauge, Mom >  arg)

Definition at line 121 of file ks_force_quda.cu.

template<typename Float >
__device__ __host__ void quda::complexConjugateProduct ( Float *  a,
const Float *  b,
const Float *  c 
)
inline

Definition at line 47 of file gauge_field_order.h.

template<typename Float >
__device__ __host__ void quda::complexDotProduct ( Float *  a,
const Float *  b,
const Float *  c 
)
inline

Definition at line 23 of file gauge_field_order.h.

template<typename Float >
__device__ __host__ void quda::complexProduct ( Float *  a,
const Float *  b,
const Float *  c 
)
inline

Definition at line 16 of file gauge_field_order.h.

template<typename Float >
__device__ __host__ void quda::complexQuotient ( Float *  a,
const Float *  b,
const Float *  c 
)
inline

Definition at line 31 of file gauge_field_order.h.

void quda::computeClover ( CloverField clover,
const GaugeField gauge,
double  coeff,
QudaFieldLocation  location 
)

Definition at line 602 of file clover_quda.cu.

void quda::computeCloverSigmaTrace ( GaugeField gauge,
const CloverField clover,
int  dir1,
int  dir2,
QudaFieldLocation  location 
)

Definition at line 310 of file clover_trace_quda.cu.

void quda::computeFatLinkCore ( cudaGaugeField *  cudaSiteLink,
double *  act_path_coeff,
QudaGaugeParam qudaGaugeParam,
QudaComputeFatMethod  method,
cudaGaugeField *  cudaFatLink,
cudaGaugeField *  cudaLongLink,
TimeProfile &  profile 
)
void quda::computeGenStapleFieldParityKernel ( void *  staple_even,
void *  staple_odd,
const void *  sitelink_even,
const void *  sitelink_odd,
void *  fatlink_even,
void *  fatlink_odd,
const void *  mulink_even,
const void *  mulink_odd,
int  mu,
int  nu,
int  save_staple,
double  mycoeff,
QudaReconstructType  recon,
QudaPrecision  prec,
dim3  halfGridDim,
llfat_kernel_param_t  kparam,
cudaStream_t *  stream 
)
void quda::computeGenStapleFieldParityKernel_ex ( void *  staple_even,
void *  staple_odd,
const void *  sitelink_even,
const void *  sitelink_odd,
void *  fatlink_even,
void *  fatlink_odd,
const void *  mulink_even,
const void *  mulink_odd,
int  mu,
int  nu,
int  save_staple,
double  mycoeff,
QudaReconstructType  recon,
QudaPrecision  prec,
llfat_kernel_param_t  kparam 
)
template<typename Float , typename Result , typename Oprod , typename Gauge >
void quda::computeKSLongLinkForce ( Result  res,
Oprod  oprod,
Gauge  gauge,
int  dim[4],
const GaugeField &  meta,
QudaFieldLocation  location 
)

Definition at line 421 of file ks_force_quda.cu.

template<typename Float >
void quda::computeKSLongLinkForce ( GaugeField &  result,
const GaugeField &  oprod,
const GaugeField &  gauge,
QudaFieldLocation  location 
)

Definition at line 430 of file ks_force_quda.cu.

template<typename Float , typename Result , typename Oprod , typename Gauge >
__host__ __device__ void quda::computeKSLongLinkForceCore ( KSLongLinkArg< Result, Oprod, Gauge > &  arg,
int  idx 
)

Definition at line 276 of file ks_force_quda.cu.

template<typename Float , typename Result , typename Oprod , typename Gauge >
void quda::computeKSLongLinkForceCPU ( KSLongLinkArg< Result, Oprod, Gauge > &  arg)

Definition at line 352 of file ks_force_quda.cu.

template<typename Float , typename Result , typename Oprod , typename Gauge >
__global__ void quda::computeKSLongLinkForceKernel ( KSLongLinkArg< Result, Oprod, Gauge >  arg)

Definition at line 340 of file ks_force_quda.cu.

template<class Cmplx >
__device__ __host__ void quda::computeLinkInverse ( Matrix< Cmplx, 3 > *  uinv,
const Matrix< Cmplx, 3 > &  u 
)
inline

Definition at line 924 of file quda_matrix.h.

void quda::computeLongLinkCuda ( void *  outEven,
void *  outOdd,
const void *const  inEven,
const void *const  inOdd,
double  coeff,
QudaReconstructType  recon,
QudaPrecision  prec,
dim3  halfGridDim,
llfat_kernel_param_t  kparam 
)
template<class T >
__device__ __host__ void quda::computeMatrixInverse ( const Matrix< T, 3 > &  u,
Matrix< T, 3 > *  uinv 
)
inline

Definition at line 555 of file quda_matrix.h.

void quda::computeStaggeredOprod ( cudaGaugeField &  out,
cudaColorSpinorField &  in,
FaceBuffer &  facebuffer,
const unsigned int  parity,
const double  coeff,
const unsigned int  displacement 
)
void quda::computeStaggeredOprod ( cudaGaugeField &  outA,
cudaGaugeField &  outB,
cudaColorSpinorField &  inEven,
cudaColorSpinorField &  inOdd,
FaceBuffer &  faceBuffer,
const unsigned int  parity,
const double  coeff[2] 
)

Definition at line 635 of file staggered_oprod.cu.

template<typename ValueType >
__host__ __device__ ValueType quda::conj ( ValueType  x)
inline

Definition at line 115 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::conj ( const complex< ValueType > &  z)
inline

Returns the complex conjugate of z.

Definition at line 821 of file complex_quda.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::conj ( const Cmplx &  a)
inline

Definition at line 251 of file quda_matrix.h.

__device__ __host__ double quda::conj ( const double &  a)
inline

Definition at line 256 of file quda_matrix.h.

__device__ __host__ float quda::conj ( const float &  a)
inline

Definition at line 261 of file quda_matrix.h.

template<typename Cmplx >
__device__ __host__ Cmplx quda::Conj ( const Cmplx &  a)
inline

Definition at line 267 of file quda_matrix.h.

template<class T , int N>
__device__ __host__ Matrix<T,N> quda::conj ( const Matrix< T, N > &  other)
inline

Definition at line 540 of file quda_matrix.h.

void quda::contractCuda ( const cudaColorSpinorField &  x,
const cudaColorSpinorField &  y,
void *  result,
const QudaContractType  contract_type,
const QudaParity  parity 
)

Contracts the x and y spinors (x is daggered) and stores the result in the array result. One must specify the contract type (time-sliced or volumed contract, and whether we should include a gamma5 in the middle), as well as the time-slice (see overloaded version of the same function) in case we don't want a volume contraction. The function works only with parity spinors, and the parity must be specified.

Definition at line 290 of file contract.cu.

void quda::contractCuda ( const cudaColorSpinorField &  x,
const cudaColorSpinorField &  y,
void *  result,
const QudaContractType  contract_type,
const int  nTSlice,
const QudaParity  parity 
)

Contracts the x and y spinors (x is daggered) and stores the result in the array result. One must specify the contract type (time-sliced or volumed contract, and whether we should include a gamma5 in the middle), as well as the time-slice in case we don't want a volume contraction. The function works only with parity spinors, and the parity must be specified.

Definition at line 325 of file contract.cu.

template<typename T1 , typename T2 >
__host__ __device__ void quda::copy ( T1 &  a,
const T2 &  b 
)
inline

Definition at line 33 of file register_traits.h.

template<>
__host__ __device__ void quda::copy ( float &  a,
const short &  b 
)
inline

Definition at line 34 of file register_traits.h.

template<>
__host__ __device__ void quda::copy ( short &  a,
const float &  b 
)
inline

Definition at line 35 of file register_traits.h.

void quda::copyArrayToLink ( Matrix< float2, 3 > *  link,
float *  array 
)
inline

Definition at line 962 of file quda_matrix.h.

template<class Cmplx , class Real >
void quda::copyArrayToLink ( Matrix< Cmplx, 3 > *  link,
Real *  array 
)
inline

Definition at line 973 of file quda_matrix.h.

template<class T , int N>
__device__ __host__ void quda::copyColumn ( const Matrix< T, N > &  m,
int  c,
Array< T, N > *  a 
)
inline

Definition at line 709 of file quda_matrix.h.

void quda::copyCuda ( cudaColorSpinorField &  dst,
const cudaColorSpinorField &  src 
)

Definition at line 235 of file copy_quda.cu.

template<int Ns, typename dstFloat , typename srcFloat >
void quda::copyExtendedColorSpinor ( ColorSpinorField &  dst,
const ColorSpinorField &  src,
const int  parity,
const QudaFieldLocation  location,
dstFloat *  Dst,
srcFloat *  Src,
float *  dstNorm,
float *  srcNorm 
)

Definition at line 413 of file extended_color_spinor_utilities.cu.

template<typename dstFloat , typename srcFloat >
void quda::CopyExtendedColorSpinor ( ColorSpinorField &  dst,
const ColorSpinorField &  src,
const int  parity,
const QudaFieldLocation  location,
dstFloat *  Dst,
srcFloat *  Src,
float *  dstNorm = 0,
float *  srcNorm = 0 
)

Definition at line 481 of file extended_color_spinor_utilities.cu.

void quda::copyExtendedColorSpinor ( ColorSpinorField &  dst,
const ColorSpinorField &  src,
QudaFieldLocation  location,
const int  parity,
void *  Dst,
void *  Src,
void *  dstNorm,
void *  srcNorm 
)

Definition at line 507 of file extended_color_spinor_utilities.cu.

void quda::copyExtendedGauge ( GaugeField &  out,
const GaugeField &  in,
QudaFieldLocation  location,
void *  Out = 0,
void *  In = 0 
)

This function is used for copying the gauge field into an extended gauge field. Defined in copy_extended_gauge.cu.

Parameters
outThe extended output field to which we are copying
inThe input field from which we are copying
locationThe location of where we are doing the copying (CPU or CUDA)
OutThe output buffer (optional)
InThe input buffer (optional)

Definition at line 337 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void quda::copyGauge ( CopyGaugeArg< OutOrder, InOrder >  arg)

Generic CPU gauge reordering and packing

Definition at line 27 of file copy_gauge_inc.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void quda::copyGauge ( OutOrder  outOrder,
const InOrder  inOrder,
int  volume,
const int *  faceVolumeCB,
int  nDim,
int  geometry,
const GaugeField &  out,
QudaFieldLocation  location,
int  type 
)

Definition at line 185 of file copy_gauge_inc.cu.

template<typename FloatOut , typename FloatIn , int length, typename InOrder >
void quda::copyGauge ( const InOrder &  inOrder,
GaugeField &  out,
QudaFieldLocation  location,
FloatOut *  Out,
FloatOut **  outGhost,
int  type 
)

Definition at line 224 of file copy_gauge_inc.cu.

template<typename FloatOut , typename FloatIn , int length>
void quda::copyGauge ( GaugeField &  out,
const GaugeField &  in,
QudaFieldLocation  location,
FloatOut *  Out,
FloatIn *  In,
FloatOut **  outGhost,
FloatIn **  inGhost,
int  type 
)

Definition at line 349 of file copy_gauge_inc.cu.

template<typename FloatOut , typename FloatIn >
void quda::copyGauge ( GaugeField &  out,
const GaugeField &  in,
QudaFieldLocation  location,
FloatOut *  Out,
FloatIn *  In,
FloatOut **  outGhost,
FloatIn **  inGhost,
int  type 
)

Definition at line 460 of file copy_gauge_inc.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
__device__ __host__ void quda::copyGaugeEx ( CopyGaugeExArg< OutOrder, InOrder > &  arg,
int  X,
int  parity 
)

Copy a regular gauge field into an extended gauge field

Definition at line 35 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void quda::copyGaugeEx ( CopyGaugeExArg< OutOrder, InOrder >  arg)

Definition at line 64 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void quda::copyGaugeEx ( OutOrder  outOrder,
const InOrder  inOrder,
const int *  E,
const int *  X,
const int *  faceVolumeCB,
const GaugeField &  meta,
QudaFieldLocation  location 
)

Definition at line 141 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn , int length, typename InOrder >
void quda::copyGaugeEx ( const InOrder &  inOrder,
const int *  X,
GaugeField &  out,
QudaFieldLocation  location,
FloatOut *  Out 
)

Definition at line 152 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn , int length>
void quda::copyGaugeEx ( GaugeField &  out,
const GaugeField &  in,
QudaFieldLocation  location,
FloatOut *  Out,
FloatIn *  In 
)

Definition at line 236 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn >
void quda::copyGaugeEx ( GaugeField &  out,
const GaugeField &  in,
QudaFieldLocation  location,
FloatOut *  Out,
FloatIn *  In 
)

Definition at line 318 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
__global__ void quda::copyGaugeExKernel ( CopyGaugeExArg< OutOrder, InOrder >  arg)

Definition at line 73 of file copy_gauge_extended.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
__global__ void quda::copyGaugeKernel ( CopyGaugeArg< OutOrder, InOrder >  arg)

Generic CUDA gauge reordering and packing. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 51 of file copy_gauge_inc.cu.

void quda::copyGenericClover ( CloverField &  out,
const CloverField &  in,
bool  inverse,
QudaFieldLocation  location,
void *  Out = 0,
void *  In = 0,
void *  outNorm = 0,
void *  inNorm = 0 
)

This generic function is used for copying the clover field where in the input and output can be in any order and location.

Parameters
outThe output field to which we are copying
inThe input field from which we are copying
inverseWhether we are copying the inverse term or not
locationThe location of where we are doing the copying (CPU or CUDA)
OutThe output buffer (optional)
InThe input buffer (optional)
outNormThe output norm buffer (optional)
inNormThe input norm buffer (optional)

Definition at line 182 of file copy_clover.cu.

template<int Ns, typename dstFloat , typename srcFloat >
void quda::copyGenericColorSpinor ( ColorSpinorField &  dst,
const ColorSpinorField &  src,
QudaFieldLocation  location,
dstFloat *  Dst,
srcFloat *  Src,
float *  dstNorm,
float *  srcNorm 
)

Definition at line 337 of file copy_color_spinor.cu.

template<typename dstFloat , typename srcFloat >
void quda::CopyGenericColorSpinor ( ColorSpinorField &  dst,
const ColorSpinorField &  src,
QudaFieldLocation  location,
dstFloat *  Dst,
srcFloat *  Src,
float *  dstNorm = 0,
float *  srcNorm = 0 
)

Definition at line 405 of file copy_color_spinor.cu.

void quda::copyGenericColorSpinor ( ColorSpinorField &  dst,
const ColorSpinorField &  src,
QudaFieldLocation  location,
void *  Dst = 0,
void *  Src = 0,
void *  dstNorm = 0,
void *  srcNorm = 0 
)

Definition at line 422 of file copy_color_spinor.cu.

void quda::copyGenericGauge ( GaugeField &  out,
const GaugeField &  in,
QudaFieldLocation  location,
void *  Out = 0,
void *  In = 0,
void **  ghostOut = 0,
void **  ghostIn = 0,
int  type = 0 
)

This function is used for extracting the gauge ghost zone from a gauge field array. Defined in copy_gauge.cu.

Parameters
outThe output field to which we are copying
inThe input field from which we are copying
locationThe location of where we are doing the copying (CPU or CUDA)
OutThe output buffer (optional)
InThe input buffer (optional)
ghostOutThe output ghost buffer (optional)
ghostInThe input ghost buffer (optional)
typeThe type of copy we doing (0 body and ghost else ghost only)

Definition at line 30 of file copy_gauge.cu.

void quda::copyGenericGaugeDoubleOut ( GaugeField &  out,
const GaugeField &  in,
QudaFieldLocation  location,
void *  Out,
void *  In,
void **  ghostOut,
void **  ghostIn,
int  type 
)

Definition at line 5 of file copy_gauge_double.cu.

void quda::copyGenericGaugeHalfOut ( GaugeField &  out,
const GaugeField &  in,
QudaFieldLocation  location,
void *  Out,
void *  In,
void **  ghostOut,
void **  ghostIn,
int  type 
)

Definition at line 5 of file copy_gauge_half.cu.

void quda::copyGenericGaugeSingleOut ( GaugeField &  out,
const GaugeField &  in,
QudaFieldLocation  location,
void *  Out,
void *  In,
void **  ghostOut,
void **  ghostIn,
int  type 
)

Definition at line 5 of file copy_gauge_single.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void quda::copyGhost ( CopyGaugeArg< OutOrder, InOrder >  arg)

Generic CPU gauge ghost reordering and packing

Definition at line 74 of file copy_gauge_inc.cu.

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
__global__ void quda::copyGhostKernel ( CopyGaugeArg< OutOrder, InOrder >  arg)

Generic CUDA kernel for copying the ghost zone. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 98 of file copy_gauge_inc.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
__device__ __host__ void quda::copyInterior ( CopySpinorExArg< OutOrder, InOrder, Basis > &  arg,
int  X 
)

Definition at line 170 of file extended_color_spinor_utilities.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
void quda::copyInterior ( CopySpinorExArg< OutOrder, InOrder, Basis > &  arg)

Definition at line 220 of file extended_color_spinor_utilities.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
__global__ void quda::copyInteriorKernel ( CopySpinorExArg< OutOrder, InOrder, Basis >  arg)

Definition at line 206 of file extended_color_spinor_utilities.cu.

void quda::copyLinkToArray ( float *  array,
const Matrix< float2, 3 > &  link 
)
inline

Definition at line 985 of file quda_matrix.h.

template<class Cmplx , class Real >
void quda::copyLinkToArray ( Real *  array,
const Matrix< Cmplx, 3 > &  link 
)
inline

Definition at line 997 of file quda_matrix.h.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >
void quda::copySpinorEx ( OutOrder  outOrder,
const InOrder  inOrder,
const Basis  basis,
const int *  E,
const int *  X,
const int  parity,
const bool  extend,
const ColorSpinorField &  meta,
QudaFieldLocation  location 
)

Definition at line 281 of file extended_color_spinor_utilities.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
void quda::copySpinorEx ( OutOrder  outOrder,
InOrder  inOrder,
const QudaGammaBasis  outBasis,
const QudaGammaBasis  inBasis,
const int *  E,
const int *  X,
const int  parity,
const bool  extend,
const ColorSpinorField &  meta,
QudaFieldLocation  location 
)

Definition at line 296 of file extended_color_spinor_utilities.cu.

template<typename ValueType >
__host__ __device__ ValueType quda::cos ( ValueType  x)
inline

Definition at line 35 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::cos ( const complex< ValueType > &  z)
inline

Definition at line 884 of file complex_quda.h.

template<>
__host__ __device__ complex<float> quda::cos ( const complex< float > &  z)
inline

Definition at line 892 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ ValueType quda::cosh ( ValueType  x)
inline

Definition at line 70 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::cosh ( const complex< ValueType > &  z)
inline

Definition at line 900 of file complex_quda.h.

template<>
__host__ __device__ complex<float> quda::cosh ( const complex< float > &  z)
inline

Definition at line 908 of file complex_quda.h.

void quda::covDev ( cudaColorSpinorField *  out,
cudaGaugeField &  gauge,
const cudaColorSpinorField *  in,
const int  parity,
const int  mu,
TimeProfile &  profile 
)
void quda::createDirac ( Dirac *&  d,
Dirac *&  dSloppy,
Dirac *&  dPre,
QudaInvertParam param,
const bool  pc_solve 
)

Definition at line 1228 of file interface_quda.cpp.

void quda::createDslashEvents ( )

Definition at line 108 of file dslash_quda.cu.

template<typename Float >
ColorSpinorFieldOrder<Float>* quda::createOrder ( const cpuColorSpinorField &  a)

Definition at line 7 of file color_spinor_util.cu.

void quda::createStaggeredOprodEvents ( )
__device__ void quda::cxpaypbz_ ( const float4 &  x,
const float2 &  a,
const float4 &  y,
const float2 &  b,
float4 &  z 
)

Functor to performs the operation z[i] = x[i] + a*y[i] + b*z[i]

Definition at line 256 of file blas_quda.cu.

__device__ void quda::cxpaypbz_ ( const float2 &  x,
const float2 &  a,
const float2 &  y,
const float2 &  b,
float2 &  z 
)

Definition at line 265 of file blas_quda.cu.

__device__ void quda::cxpaypbz_ ( const double2 &  x,
const double2 &  a,
const double2 &  y,
const double2 &  b,
double2 &  z 
)

Definition at line 272 of file blas_quda.cu.

void quda::cxpaypbzCpu ( const cpuColorSpinorField &  x,
const Complex &  b,
const cpuColorSpinorField &  y,
const Complex &  c,
cpuColorSpinorField &  z 
)

Definition at line 115 of file blas_cpu.cpp.

void quda::cxpaypbzCuda ( cudaColorSpinorField &  x,
const Complex &  b,
cudaColorSpinorField &  y,
const Complex &  c,
cudaColorSpinorField &  z 
)

Definition at line 290 of file blas_quda.cu.

void quda::destroyDslashEvents ( )

Definition at line 129 of file dslash_quda.cu.

void quda::destroyStaggeredOprodEvents ( )
void quda::device_free_ ( const char *  func,
const char *  file,
int  line,
void *  ptr 
)

Free device memory allocated with device_malloc(). This function should only be called via the device_free() macro, defined in malloc_quda.h

Definition at line 232 of file malloc.cpp.

void * quda::device_malloc_ ( const char *  func,
const char *  file,
int  line,
size_t  size 
)

Perform a standard cudaMalloc() with error-checking. This function should only be called via the device_malloc() macro, defined in malloc_quda.h

Definition at line 146 of file malloc.cpp.

template<int N, typename FloatN , typename Float2 >
__global__ void quda::do_link_format_cpu_to_gpu ( FloatN *  dst,
Float2 *  src,
int  reconstruct,
int  Vh,
int  pad,
int  ghostV,
size_t  threads 
)

Definition at line 43 of file misc_helpers.cu.

template<int N, typename FloatN , typename Float2 >
__global__ void quda::do_link_format_cpu_to_gpu_milc ( FloatN *  dst,
Float2 *  src,
int  reconstruct,
int  Vh,
int  pad,
int  ghostV,
size_t  threads 
)

Definition at line 103 of file misc_helpers.cu.

template<typename FloatN >
__global__ void quda::do_link_format_gpu_to_cpu ( FloatN *  dst,
FloatN *  src,
int  Vh,
int  stride 
)

Definition at line 322 of file misc_helpers.cu.

void quda::domainWallDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const cudaColorSpinorField *  in,
const int  parity,
const int  dagger,
const cudaColorSpinorField *  x,
const double &  m_f,
const double &  k,
const int *  commDim,
TimeProfile &  profile,
const QudaDslashPolicy dslashPolicy = QUDA_DSLASH 
)

Definition at line 172 of file dslash_domain_wall.cu.

void quda::domainWallDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const cudaColorSpinorField *  in,
const int  parity,
const int  dagger,
const cudaColorSpinorField *  x,
const double &  m_f,
const double &  k,
const int *  commDim,
const int  DS_type,
TimeProfile &  profile,
const QudaDslashPolicy dslashPolicy = QUDA_DSLASH2 
)

Definition at line 234 of file dslash_domain_wall_4d.cu.

__device__ double quda::dot_ ( const double2 &  a,
const double2 &  b 
)

Return the real dot product of x and y

Definition at line 154 of file reduce_quda.cu.

__device__ float quda::dot_ ( const float2 &  a,
const float2 &  b 
)

Definition at line 155 of file reduce_quda.cu.

__device__ float quda::dot_ ( const float4 &  a,
const float4 &  b 
)

Definition at line 156 of file reduce_quda.cu.

__device__ double2 quda::dotNormA_ ( const double2 &  a,
const double2 &  b 
)

Definition at line 273 of file reduce_quda.cu.

__device__ double2 quda::dotNormA_ ( const float2 &  a,
const float2 &  b 
)

Definition at line 276 of file reduce_quda.cu.

__device__ double2 quda::dotNormA_ ( const float4 &  a,
const float4 &  b 
)

Definition at line 280 of file reduce_quda.cu.

void quda::endBlas ( void  )

Definition at line 59 of file blas_quda.cu.

void quda::endReduce ( void  )

Definition at line 85 of file reduce_quda.cu.

void quda::exchangeExtendedGhost ( cudaColorSpinorField *  spinor,
int  R[],
int  parity,
cudaStream_t *  stream_p 
)

Definition at line 24 of file extended_color_spinor_utilities.cu.

template<typename ValueType >
__host__ __device__ ValueType quda::exp ( ValueType  x)
inline

Definition at line 85 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::exp ( const complex< ValueType > &  z)
inline

Definition at line 917 of file complex_quda.h.

template<>
__host__ __device__ complex<float> quda::exp ( const complex< float > &  z)
inline

Definition at line 923 of file complex_quda.h.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >
void quda::extendedCopyColorSpinor ( InOrder &  inOrder,
ColorSpinorField &  out,
QudaGammaBasis  inBasis,
const int *  E,
const int *  X,
const int  parity,
const bool  extend,
QudaFieldLocation  location,
FloatOut *  Out,
float *  outNorm 
)

Definition at line 323 of file extended_color_spinor_utilities.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc>
void quda::extendedCopyColorSpinor ( ColorSpinorField &  out,
const ColorSpinorField &  in,
const int  parity,
const QudaFieldLocation  location,
FloatOut *  Out,
FloatIn *  In,
float *  outNorm,
float *  inNorm 
)

Definition at line 359 of file extended_color_spinor_utilities.cu.

void quda::extractExtendedGaugeGhost ( const GaugeField &  u,
int  dim,
const int *  R,
void **  ghost,
bool  extract 
)

This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.

Parameters
uThe gauge field from which we want to extract/pack the ghost zone
dimThe dimension in which we are packing/unpacking
ghostThe array where we want to pack/unpack the ghost zone into/from
extractWhether we are extracting into ghost or injecting from ghost

Definition at line 440 of file extract_gauge_ghost_extended.cu.

void quda::extractGaugeGhost ( const GaugeField &  u,
void **  ghost 
)

This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.

Parameters
uThe gauge field from which we want to extract the ghost zone
ghostThe array where we want to pack the ghost zone into

Definition at line 307 of file extract_gauge_ghost.cu.

template<typename Float , int length, int nDim, typename Order >
void quda::extractGhost ( ExtractGhostArg< Order, nDim >  arg)

Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Definition at line 33 of file extract_gauge_ghost.cu.

template<typename Float , int length, typename Order >
void quda::extractGhost ( Order  order,
const GaugeField &  u,
QudaFieldLocation  location 
)

Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Definition at line 172 of file extract_gauge_ghost.cu.

template<typename Float >
void quda::extractGhost ( const GaugeField &  u,
Float **  Ghost 
)

This is the template driver for extractGhost

Definition at line 214 of file extract_gauge_ghost.cu.

template<typename Float , int length, int nDim, typename Order , bool extract>
void quda::extractGhostEx ( ExtractGhostExArg< Order, nDim >  arg)

Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Definition at line 93 of file extract_gauge_ghost_extended.cu.

template<typename Float , int length, typename Order >
void quda::extractGhostEx ( Order  order,
const int  dim,
const int *  surfaceCB,
const int *  E,
const int *  R,
bool  extract,
const GaugeField &  u,
QudaFieldLocation  location 
)

Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Parameters
Ethe extended gauge dimensions
Rarray holding the radius of the extended region
extractWhether we are extracting or injecting the ghost zone

Definition at line 274 of file extract_gauge_ghost_extended.cu.

template<typename Float >
void quda::extractGhostEx ( const GaugeField &  u,
int  dim,
const int *  R,
Float **  Ghost,
bool  extract 
)

This is the template driver for extractGhost

Definition at line 329 of file extract_gauge_ghost_extended.cu.

template<typename Float , int length, int nDim, typename Order , bool extract>
__global__ void quda::extractGhostExKernel ( ExtractGhostExArg< Order, nDim >  arg)

Generic GPU gauge ghost extraction and packing NB This routines is specialized to four dimensions FIXME this implementation will have two-way warp divergence Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Definition at line 140 of file extract_gauge_ghost_extended.cu.

template<typename Float , int length, int nDim, typename Order >
__global__ void quda::extractGhostKernel ( ExtractGhostArg< Order, nDim >  arg)

Generic GPU gauge ghost extraction and packing NB This routines is specialized to four dimensions FIXME this implementation will have two-way warp divergence

Definition at line 78 of file extract_gauge_ghost.cu.

template<typename Float , int length, typename Arg >
__device__ __host__ void quda::extractor ( Arg &  arg,
int  dir,
int  a,
int  b,
int  c,
int  d,
int  g,
int  parity 
)

Definition at line 49 of file extract_gauge_ghost_extended.cu.

void quda::fermion_force_cuda ( double  eps,
double  weight1,
double  weight2,
void *  act_path_coeff,
FullHw  cudaHw,
cudaGaugeField &  cudaSiteLink,
cudaGaugeField &  cudaMom,
QudaGaugeParam param 
)
void quda::fermion_force_init_cuda ( QudaGaugeParam param)
void quda::fillInitCGSolveParam ( SolverParam &  initCGparam)

Definition at line 394 of file inv_eigcg_quda.cpp.

void quda::fillInnerSolveParam ( SolverParam &  inner,
const SolverParam &  outer 
)

Definition at line 28 of file inv_gcr_quda.cpp.

void quda::gamma5Cuda ( cudaColorSpinorField *  out,
const cudaColorSpinorField *  in 
)

Applies a gamma5 matrix to a spinor, this is the function to be called in interfaces and it requires only pointers to the output spinor (out) and the input spinor (in), in that order

Definition at line 85 of file contract.cu.

void quda::gauge_force_cuda ( cudaGaugeField &  cudaMom,
double  eb3,
cudaGaugeField &  cudaSiteLink,
QudaGaugeParam param,
int ***  input_path,
int *  length,
double *  path_coeff,
int  num_paths,
int  max_length 
)

Definition at line 328 of file gauge_force_quda.cu.

void quda::gauge_force_cuda_dir ( cudaGaugeField &  cudaMom,
const int  dir,
const double  eb3,
const cudaGaugeField &  cudaSiteLink,
const QudaGaugeParam param,
int **  input_path,
const int *  length,
const double *  path_coeff,
const int  num_paths,
const int  max_length 
)

Definition at line 274 of file gauge_force_quda.cu.

void quda::gauge_force_init_cuda ( QudaGaugeParam param,
int  max_length 
)

Definition at line 112 of file gauge_force_quda.cu.

template<int oddBit, typename Float , typename Float2 , typename FloatN >
__global__ void quda::GAUGE_FORCE_KERN_NAME ( Float2 *  momEven,
Float2 *  momOdd,
const int  dir,
const double  eb3,
const FloatN *  linkEven,
const FloatN *  linkOdd,
const int *  input_path,
const int *  length,
const double *  path_coeff,
const int  num_paths,
const kernel_param_t  kparam 
)

Definition at line 477 of file gauge_force_quda.cu.

int quda::genericCompare ( const cpuColorSpinorField &  a,
const cpuColorSpinorField &  b,
int  tol 
)

Definition at line 118 of file color_spinor_util.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
void quda::genericCopyColorSpinor ( OutOrder &  outOrder,
const InOrder &  inOrder,
QudaGammaBasis  dstBasis,
QudaGammaBasis  srcBasis,
const ColorSpinorField &  out,
QudaFieldLocation  location 
)

Decide whether we are changing basis or not

Definition at line 209 of file copy_color_spinor.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >
void quda::genericCopyColorSpinor ( InOrder &  inOrder,
ColorSpinorField &  out,
QudaGammaBasis  inBasis,
QudaFieldLocation  location,
FloatOut *  Out,
float *  outNorm 
)

Decide on the output order

Definition at line 268 of file copy_color_spinor.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc>
void quda::genericCopyColorSpinor ( ColorSpinorField &  out,
const ColorSpinorField &  in,
QudaFieldLocation  location,
FloatOut *  Out,
FloatIn *  In,
float *  outNorm,
float *  inNorm 
)

Decide on the input order

Definition at line 305 of file copy_color_spinor.cu.

void quda::genericPrintVector ( cpuColorSpinorField &  a,
unsigned int  x 
)

Definition at line 165 of file color_spinor_util.cu.

void quda::genericSource ( cpuColorSpinorField &  a,
QudaSourceType  sourceType,
int  x,
int  s,
int  c 
)

Definition at line 38 of file color_spinor_util.cu.

cudaStream_t * quda::getBlasStream ( )

Definition at line 64 of file blas_quda.cu.

__device__ __host__ void quda::getCoords ( int  x[4],
int  cb_index,
const int  X[4],
int  parity 
)
inline

Definition at line 48 of file ks_force_quda.cu.

template<class T >
__device__ __host__ T quda::getDeterminant ( const Matrix< T, 3 > &  a)
inline

Definition at line 385 of file quda_matrix.h.

bool quda::getDslashLaunch ( )
bool quda::getKernelPackT ( )
Returns
Whether the T dimension is kernel packed or not

Definition at line 84 of file dslash_quda.cu.

template<class Cmplx >
__device__ __host__ Cmplx quda::getPreciseInverse ( const Cmplx &  z)
inline

Definition at line 276 of file quda_matrix.h.

template<class T >
__device__ __host__ T quda::getTrace ( const Matrix< T, 3 > &  a)
inline

Definition at line 378 of file quda_matrix.h.

bool quda::getTwistPack ( )
Returns
Whether a kernel requires twisted pack or not

Definition at line 91 of file dslash_quda.cu.

template<typename Float >
double3 quda::HeavyQuarkResidualNorm ( const Float *  x,
const Float *  r,
const int  volume,
const int  Nint 
)

Definition at line 310 of file blas_cpu.cpp.

double3 quda::HeavyQuarkResidualNormCpu ( cpuColorSpinorField &  x,
cpuColorSpinorField &  r 
)

Definition at line 331 of file blas_cpu.cpp.

double3 quda::HeavyQuarkResidualNormCpu ( cpuColorSpinorField &  x,
cpuColorSpinorField &  y,
cpuColorSpinorField &  r 
)

Definition at line 352 of file blas_cpu.cpp.

double3 quda::HeavyQuarkResidualNormCuda ( cudaColorSpinorField &  x,
cudaColorSpinorField &  r 
)

Definition at line 777 of file reduce_quda.cu.

void quda::host_free_ ( const char *  func,
const char *  file,
int  line,
void *  ptr 
)

Free host memory allocated with safe_malloc(), pinned_malloc(), or mapped_malloc(). This function should only be called via the host_free() macro, defined in malloc_quda.h

Definition at line 256 of file malloc.cpp.

void quda::improvedStaggeredDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  fatGauge,
const cudaGaugeField &  longGauge,
const cudaColorSpinorField *  in,
const int  parity,
const int  dagger,
const cudaColorSpinorField *  x,
const double &  k,
const int *  commDim,
TimeProfile &  profile,
const QudaDslashPolicy dslashPolicy = QUDA_DSLASH2 
)

Definition at line 135 of file dslash_improved_staggered.cu.

template<int N>
__device__ __host__ int quda::index ( int  i,
int  j 
)
inline

Definition at line 342 of file quda_matrix.h.

void quda::initBlas ( )

Definition at line 53 of file blas_quda.cu.

void quda::initReduce ( )

Definition at line 52 of file reduce_quda.cu.

template<typename Float , int length, typename Arg >
__device__ __host__ void quda::injector ( Arg &  arg,
int  dir,
int  a,
int  b,
int  c,
int  d,
int  g,
int  parity 
)

Definition at line 70 of file extract_gauge_ghost_extended.cu.

bool quda::isUnitary ( const QudaGaugeParam param,
cpuGaugeField &  field,
double  max_error 
)
void quda::link_format_cpu_to_gpu ( void *  dst,
void *  src,
int  reconstruct,
int  Vh,
int  pad,
int  ghostV,
QudaPrecision  prec,
QudaGaugeFieldOrder  cpu_order,
cudaStream_t  stream 
)

Definition at line 144 of file misc_helpers.cu.

void quda::link_format_gpu_to_cpu ( void *  dst,
void *  src,
int  Vh,
int  stride,
QudaPrecision  prec,
cudaStream_t  stream 
)

Definition at line 347 of file misc_helpers.cu.

__device__ __host__ int quda::linkIndex ( int  x[],
int  dx[],
const int  X[4] 
)
inline

Definition at line 40 of file ks_force_quda.cu.

void quda::llfat_cuda ( cudaGaugeField *  cudaFatLink,
cudaGaugeField *  cudaLongLink,
cudaGaugeField &  cudaSiteLink,
cudaGaugeField &  cudaStaple,
cudaGaugeField &  cudaStaple1,
QudaGaugeParam param,
double *  act_path_coeff 
)

Definition at line 23 of file llfat_quda_itf.cpp.

void quda::llfat_cuda_ex ( cudaGaugeField *  cudaFatLink,
cudaGaugeField *  cudaLongLink,
cudaGaugeField &  cudaSiteLink,
cudaGaugeField &  cudaStaple,
cudaGaugeField &  cudaStaple1,
QudaGaugeParam param,
double *  act_path_coeff 
)

Definition at line 276 of file llfat_quda_itf.cpp.

void quda::llfat_init_cuda ( QudaGaugeParam param)
void quda::llfat_init_cuda_ex ( QudaGaugeParam param_ex)
void quda::llfatOneLinkKernel ( cudaGaugeField &  cudaFatLink,
cudaGaugeField &  cudaSiteLink,
cudaGaugeField &  cudaStaple,
cudaGaugeField &  cudaStaple1,
QudaGaugeParam param,
double *  act_path_coeff 
)

Definition at line 1187 of file llfat_quda.cu.

void quda::llfatOneLinkKernel_ex ( cudaGaugeField &  cudaFatLink,
cudaGaugeField &  cudaSiteLink,
cudaGaugeField &  cudaStaple,
cudaGaugeField &  cudaStaple1,
QudaGaugeParam param,
double *  act_path_coeff,
llfat_kernel_param_t  kparam 
)

Definition at line 1232 of file llfat_quda.cu.

template<typename Float , int Ns, int Nc>
__device__ void quda::load_shared ( typename mapper< Float >::type  v[Ns *Nc *2],
Float *  field,
int  x,
int  volume 
)
inline

Definition at line 236 of file color_spinor_field_order.h.

void quda::loadLinkToGPU ( cudaGaugeField *  cudaGauge,
cpuGaugeField *  cpuGauge,
QudaGaugeParam param 
)
void quda::loadLinkToGPU_ex ( cudaGaugeField *  cudaGauge,
cpuGaugeField *  cpuGauge 
)
void quda::loadLinkToGPU_gf ( cudaGaugeField *  cudaGauge,
cpuGaugeField *  cpuGauge,
QudaGaugeParam param 
)
template<class T >
__device__ void quda::loadLinkVariableFromArray ( const T *const  array,
const int  dir,
const int  idx,
const int  stride,
Matrix< T, 3 > *  link 
)
inline

Definition at line 767 of file quda_matrix.h.

__device__ void quda::loadLinkVariableFromArray ( const float2 *const  array,
const int  dir,
const int  idx,
const int  stride,
Matrix< double2, 3 > *  link 
)
inline

Definition at line 787 of file quda_matrix.h.

template<class T , int N>
__device__ void quda::loadMatrixFromArray ( const T *const  array,
const int  idx,
const int  stride,
Matrix< T, N > *  mat 
)
inline

Definition at line 778 of file quda_matrix.h.

template<class T >
__device__ void quda::loadMomentumFromArray ( const T *const  array,
const int  dir,
const int  idx,
const int  stride,
Matrix< T, 3 > *  mom 
)
inline

Definition at line 857 of file quda_matrix.h.

void quda::loadTuneCache ( QudaVerbosity  verbosity)

Definition at line 131 of file tune.cpp.

template<typename ValueType >
__host__ __device__ ValueType quda::log ( ValueType  x)
inline

Definition at line 90 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::log ( const complex< ValueType > &  z)
inline

Definition at line 929 of file complex_quda.h.

template<>
__host__ __device__ complex<float> quda::log ( const complex< float > &  z)
inline

Definition at line 935 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ ValueType quda::log10 ( ValueType  x)
inline

Definition at line 95 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::log10 ( const complex< ValueType > &  z)
inline

Definition at line 942 of file complex_quda.h.

__forceinline__ __host__ __device__ float2 quda::make_FloatN ( const double2 &  a)

Definition at line 201 of file float_vector.h.

__forceinline__ __host__ __device__ float4 quda::make_FloatN ( const double4 &  a)

Definition at line 205 of file float_vector.h.

__forceinline__ __host__ __device__ double2 quda::make_FloatN ( const float2 &  a)

Definition at line 209 of file float_vector.h.

__forceinline__ __host__ __device__ double4 quda::make_FloatN ( const float4 &  a)

Definition at line 213 of file float_vector.h.

__forceinline__ __host__ __device__ short4 quda::make_shortN ( const float4 &  a)

Definition at line 217 of file float_vector.h.

__forceinline__ __host__ __device__ short2 quda::make_shortN ( const float2 &  a)

Definition at line 221 of file float_vector.h.

__forceinline__ __host__ __device__ short4 quda::make_shortN ( const double4 &  a)

Definition at line 225 of file float_vector.h.

__forceinline__ __host__ __device__ short2 quda::make_shortN ( const double2 &  a)

Definition at line 229 of file float_vector.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::makeComplex ( const typename RealTypeId< Cmplx >::Type &  a,
const typename RealTypeId< Cmplx >::Type &  b 
)
inline

Definition at line 125 of file quda_matrix.h.

__device__ __host__ double2 quda::makeComplex ( const double &  a,
const double &  b 
)
inline

Definition at line 134 of file quda_matrix.h.

__device__ __host__ float2 quda::makeComplex ( const float &  a,
const float &  b 
)
inline

Definition at line 139 of file quda_matrix.h.

void * quda::mapped_malloc_ ( const char *  func,
const char *  file,
int  line,
size_t  size 
)

Allocate page-locked ("pinned") host memory, and map it into the GPU address space. This function should only be called via the mapped_malloc() macro, defined in malloc_quda.h

Definition at line 212 of file malloc.cpp.

void quda::massRescale ( cudaColorSpinorField b,
QudaInvertParam param 
)

Definition at line 1245 of file interface_quda.cpp.

__forceinline__ __host__ __device__ float quda::max_fabs ( const float4 &  c)

Definition at line 177 of file float_vector.h.

__forceinline__ __host__ __device__ float quda::max_fabs ( const float2 &  b)

Definition at line 183 of file float_vector.h.

__forceinline__ __host__ __device__ double quda::max_fabs ( const double4 &  c)

Definition at line 187 of file float_vector.h.

__forceinline__ __host__ __device__ double quda::max_fabs ( const double2 &  b)

Definition at line 193 of file float_vector.h.

template<typename Float , int Nc, typename Order >
double quda::maxGauge ( const Order  order,
int  volume,
int  nDim 
)

Generic CPU function find the gauge maximum

Definition at line 9 of file max_gauge.cu.

double quda::maxGauge ( const GaugeField &  u)

This function is used to calculate the maximum absolute value of a gauge field array. Defined in max_gauge.cu.

Parameters
uThe gauge field from which we want to compute the max

Definition at line 29 of file max_gauge.cu.

void quda::MDWFDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const cudaColorSpinorField *  in,
const int  parity,
const int  dagger,
const cudaColorSpinorField *  x,
const double &  m_f,
const double &  k,
const int *  commDim,
const int  DS_type,
TimeProfile &  profile,
const QudaDslashPolicy dslashPolicy = QUDA_DSLASH2 
)

Definition at line 247 of file dslash_mobius.cu.

void quda::mxpyCpu ( const cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 51 of file blas_cpu.cpp.

void quda::mxpyCuda ( cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 154 of file blas_quda.cu.

void quda::ndegTwistedMassDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const cudaColorSpinorField *  in,
const int  parity,
const int  dagger,
const cudaColorSpinorField *  x,
const QudaTwistDslashType  type,
const double &  kappa,
const double &  mu,
const double &  epsilon,
const double &  k,
const int *  commDim,
TimeProfile &  profile,
const QudaDslashPolicy dslashPolicy = QUDA_DSLASH 
)

Definition at line 127 of file dslash_ndeg_twisted_mass.cu.

template<IndexType idxType, typename Int >
__device__ __forceinline__ int quda::neighborIndex ( const unsigned int &  cb_idx,
const int(&)  shift[4],
const bool(&)  partitioned[4],
const unsigned int &  parity 
)

Definition at line 41 of file shift_quark_field.cu.

template<typename ValueType >
__host__ __device__ ValueType quda::norm ( const complex< ValueType > &  z)
inline

Returns the magnitude of z squared.

Definition at line 859 of file complex_quda.h.

template<typename Float >
double quda::norm ( const Float *  a,
const int  N 
)

Definition at line 160 of file blas_cpu.cpp.

double quda::norm2 ( const ColorSpinorField &  a)

Definition at line 486 of file color_spinor_field.cpp.

double quda::norm2 ( const cudaGaugeField &  u)

This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L2 norm.

Parameters
uThe gauge field that we want the norm of
Returns
The L2 norm squared of the gauge field

Definition at line 494 of file cuda_gauge_field.cu.

__device__ double quda::norm2_ ( const double2 &  a)

Return the L2 norm of x

Definition at line 129 of file reduce_quda.cu.

__device__ float quda::norm2_ ( const float2 &  a)

Definition at line 130 of file reduce_quda.cu.

__device__ float quda::norm2_ ( const float4 &  a)

Definition at line 131 of file reduce_quda.cu.

double quda::normCpu ( const cpuColorSpinorField &  b)

Definition at line 166 of file blas_cpu.cpp.

double quda::normCuda ( const cudaColorSpinorField &  b)

Definition at line 145 of file reduce_quda.cu.

template<typename ValueType >
__host__ __device__ bool quda::operator!= ( const complex< ValueType > &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 802 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ bool quda::operator!= ( const ValueType &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 808 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ bool quda::operator!= ( const complex< ValueType > &  lhs,
const ValueType &  rhs 
)
inline

Definition at line 814 of file complex_quda.h.

__host__ __device__ float4 quda::operator* ( const float  a,
const float4  x 
)
inline

Definition at line 35 of file float_vector.h.

__host__ __device__ float2 quda::operator* ( const float  a,
const float2  x 
)
inline

Definition at line 44 of file float_vector.h.

__host__ __device__ double2 quda::operator* ( const double  a,
const double2  x 
)
inline

Definition at line 51 of file float_vector.h.

__host__ __device__ double4 quda::operator* ( const double  a,
const double4  x 
)
inline

Definition at line 58 of file float_vector.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator* ( const complex< ValueType > &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 692 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator* ( const complex< ValueType > &  lhs,
const ValueType &  rhs 
)
inline

Definition at line 701 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator* ( const ValueType &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 708 of file complex_quda.h.

__device__ __host__ double2 quda::operator* ( const double2 &  a,
const double &  scalar 
)
inline

Definition at line 193 of file quda_matrix.h.

__device__ __host__ float2 quda::operator* ( const float2 &  a,
const float &  scalar 
)
inline

Definition at line 198 of file quda_matrix.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator* ( const Cmplx &  a,
const Cmplx &  b 
)
inline

Definition at line 245 of file quda_matrix.h.

template<class T , int N, class S >
__device__ __host__ Matrix<T,N> quda::operator* ( const S &  scalar,
const Matrix< T, N > &  a 
)
inline

Definition at line 439 of file quda_matrix.h.

template<class T , int N, class S >
__device__ __host__ Matrix<T,N> quda::operator* ( const Matrix< T, N > &  a,
const S &  scalar 
)
inline

Definition at line 449 of file quda_matrix.h.

template<class T >
__device__ __host__ Matrix<T,3> quda::operator* ( const Matrix< T, 3 > &  a,
const Matrix< T, 3 > &  b 
)
inline

Definition at line 472 of file quda_matrix.h.

template<class T , class U >
__device__ __host__ Matrix<typename PromoteTypeId<T,U>::Type,3> quda::operator* ( const Matrix< T, 3 > &  a,
const Matrix< U, 3 > &  b 
)
inline

Definition at line 508 of file quda_matrix.h.

template<class T >
__device__ __host__ Matrix<T,2> quda::operator* ( const Matrix< T, 2 > &  a,
const Matrix< T, 2 > &  b 
)
inline

Definition at line 527 of file quda_matrix.h.

__host__ __device__ float2 quda::operator*= ( float2 &  x,
const float  a 
)
inline

Definition at line 130 of file float_vector.h.

__host__ __device__ double2 quda::operator*= ( double2 &  x,
const float  a 
)
inline

Definition at line 136 of file float_vector.h.

__host__ __device__ float4 quda::operator*= ( float4 &  a,
const float &  b 
)
inline

Definition at line 142 of file float_vector.h.

__host__ __device__ double2 quda::operator*= ( double2 &  a,
const double &  b 
)
inline

Definition at line 150 of file float_vector.h.

__host__ __device__ double4 quda::operator*= ( double4 &  a,
const double &  b 
)
inline

Definition at line 156 of file float_vector.h.

template<class T , int N, class S >
__device__ __host__ Matrix<T,N> quda::operator*= ( Matrix< T, N > &  a,
const S &  scalar 
)
inline

Definition at line 454 of file quda_matrix.h.

template<class T , int N>
__device__ __host__ Matrix<T,N> quda::operator*= ( Matrix< T, N > &  a,
const Matrix< T, N > &  b 
)
inline

Definition at line 491 of file quda_matrix.h.

__host__ __device__ double2 quda::operator+ ( const double2 &  x,
const double2 &  y 
)
inline

Definition at line 13 of file float_vector.h.

__host__ __device__ double3 quda::operator+ ( const double3 &  x,
const double3 &  y 
)
inline

Definition at line 29 of file float_vector.h.

__host__ __device__ float2 quda::operator+ ( const float2  x,
const float2  y 
)
inline

Definition at line 67 of file float_vector.h.

__host__ __device__ float4 quda::operator+ ( const float4  x,
const float4  y 
)
inline

Definition at line 74 of file float_vector.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator+ ( const Cmplx &  a,
const Cmplx &  b 
)
inline

Definition at line 164 of file quda_matrix.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator+ ( const complex< ValueType > &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 644 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator+ ( const complex< ValueType > &  lhs,
const ValueType &  rhs 
)
inline

Definition at line 660 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator+ ( const ValueType &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 666 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator+ ( const complex< ValueType > &  rhs)
inline

Definition at line 765 of file complex_quda.h.

template<class Cmplx , class Float >
__device__ __host__ Cmplx quda::operator+ ( const Cmplx &  a,
const Float &  scalar 
)
inline

Definition at line 204 of file quda_matrix.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator+ ( const typename RealTypeId< Cmplx >::Type &  scalar,
const Cmplx &  a 
)
inline

Definition at line 227 of file quda_matrix.h.

template<class T , int N>
__device__ __host__ Matrix<T,N> quda::operator+ ( const Matrix< T, N > &  a,
const Matrix< T, N > &  b 
)
inline

Definition at line 396 of file quda_matrix.h.

template<typename ValueType >
__host__ __device__ complex<ValueType> quda::operator+ ( const volatile complex< ValueType > &  lhs,
const volatile complex< ValueType > &  rhs 
)
inline

Definition at line 652 of file complex_quda.h.

__host__ __device__ float4 quda::operator+= ( float4 &  x,
const float4  y 
)
inline

Definition at line 83 of file float_vector.h.

__host__ __device__ float2 quda::operator+= ( float2 &  x,
const float2  y 
)
inline

Definition at line 91 of file float_vector.h.

__host__ __device__ double2 quda::operator+= ( double2 &  x,
const double2  y 
)
inline

Definition at line 97 of file float_vector.h.

__host__ __device__ double3 quda::operator+= ( double3 &  x,
const double3  y 
)
inline

Definition at line 103 of file float_vector.h.

template<class Cmplx >
__device__ __host__ Cmplx& quda::operator+= ( Cmplx &  a,
const Cmplx &  b 
)
inline

Definition at line 149 of file quda_matrix.h.

template<class T , int N>
__device__ __host__ Matrix<T,N> quda::operator+= ( Matrix< T, N > &  a,
const Matrix< T, N > &  b 
)
inline

Definition at line 407 of file quda_matrix.h.

__host__ __device__ double2 quda::operator- ( const double2 &  x,
const double2 &  y 
)
inline

Definition at line 17 of file float_vector.h.

__host__ __device__ float2 quda::operator- ( const float2 &  x,
const float2 &  y 
)
inline

Definition at line 21 of file float_vector.h.

__host__ __device__ float4 quda::operator- ( const float4 &  x,
const float4 &  y 
)
inline

Definition at line 25 of file float_vector.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator- ( const Cmplx &  a)
inline

Definition at line 144 of file quda_matrix.h.

__host__ __device__ float2 quda::operator- ( const float2 &  x)
inline

Definition at line 164 of file float_vector.h.

__host__ __device__ double2 quda::operator- ( const double2 &  x)
inline

Definition at line 168 of file float_vector.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator- ( const Cmplx &  a,
const Cmplx &  b 
)
inline

Definition at line 169 of file quda_matrix.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator- ( const complex< ValueType > &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 673 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator- ( const complex< ValueType > &  lhs,
const ValueType &  rhs 
)
inline

Definition at line 679 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator- ( const ValueType &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 685 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator- ( const complex< ValueType > &  rhs)
inline

Definition at line 770 of file complex_quda.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator- ( const Cmplx &  a,
const typename RealTypeId< Cmplx >::Type &  scalar 
)
inline

Definition at line 233 of file quda_matrix.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator- ( const typename RealTypeId< Cmplx >::Type &  scalar,
const Cmplx &  a 
)
inline

Definition at line 239 of file quda_matrix.h.

template<class T , int N>
__device__ __host__ Matrix<T,N> quda::operator- ( const Matrix< T, N > &  a,
const Matrix< T, N > &  b 
)
inline

Definition at line 427 of file quda_matrix.h.

template<class T , int N>
__device__ __host__ Matrix<T,N> quda::operator- ( const Matrix< T, N > &  a)
inline

Definition at line 460 of file quda_matrix.h.

__host__ __device__ float4 quda::operator-= ( float4 &  x,
const float4  y 
)
inline

Definition at line 110 of file float_vector.h.

__host__ __device__ float2 quda::operator-= ( float2 &  x,
const float2  y 
)
inline

Definition at line 118 of file float_vector.h.

__host__ __device__ double2 quda::operator-= ( double2 &  x,
const double2  y 
)
inline

Definition at line 124 of file float_vector.h.

template<class Cmplx >
__device__ __host__ Cmplx& quda::operator-= ( Cmplx &  a,
const Cmplx &  b 
)
inline

Definition at line 156 of file quda_matrix.h.

template<class T , int N>
__device__ __host__ Matrix<T,N> quda::operator-= ( Matrix< T, N > &  a,
const Matrix< T, N > &  b 
)
inline

Definition at line 417 of file quda_matrix.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator/ ( const complex< ValueType > &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 716 of file complex_quda.h.

template<>
__host__ __device__ complex< float > quda::operator/ ( const complex< float > &  lhs,
const complex< float > &  rhs 
)
inline

Definition at line 725 of file complex_quda.h.

template<>
__host__ __device__ complex< double > quda::operator/ ( const complex< double > &  lhs,
const complex< double > &  rhs 
)
inline

Definition at line 731 of file complex_quda.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator/ ( const Cmplx &  a,
const typename RealTypeId< Cmplx >::Type &  scalar 
)
inline

Definition at line 221 of file quda_matrix.h.

template<typename ValueType >
__host__ __device__ complex<ValueType> quda::operator/ ( const complex< ValueType > &  lhs,
const ValueType &  rhs 
)
inline

Definition at line 737 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex<ValueType> quda::operator/ ( const ValueType &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 744 of file complex_quda.h.

template<>
__host__ __device__ complex<float> quda::operator/ ( const float &  lhs,
const complex< float > &  rhs 
)
inline

Definition at line 752 of file complex_quda.h.

template<>
__host__ __device__ complex<double> quda::operator/ ( const double &  lhs,
const complex< double > &  rhs 
)
inline

Definition at line 757 of file complex_quda.h.

std::ostream & quda::operator<< ( std::ostream &  output,
const CloverFieldParam &  param 
)

Definition at line 273 of file clover_field.cpp.

std::ostream & quda::operator<< ( std::ostream &  output,
const LatticeFieldParam &  param 
)

Definition at line 145 of file lattice_field.cpp.

std::ostream & quda::operator<< ( std::ostream &  output,
const GaugeFieldParam &  param 
)

Definition at line 122 of file gauge_field.cpp.

template<typename ValueType , class charT , class traits >
std::basic_ostream< charT, traits > & quda::operator<< ( std::basic_ostream< charT, traits > &  os,
const complex< ValueType > &  z 
)

Definition at line 295 of file complex_quda.h.

std::ostream& quda::operator<< ( std::ostream &  os,
const float2 &  z 
)
inline

Definition at line 285 of file quda_matrix.h.

std::ostream& quda::operator<< ( std::ostream &  os,
const double2 &  z 
)
inline

Definition at line 290 of file quda_matrix.h.

std::ostream& quda::operator<< ( std::ostream &  out,
const ColorSpinorField &  a 
)

Definition at line 500 of file color_spinor_field.cpp.

template<class T , int N>
std::ostream& quda::operator<< ( std::ostream &  os,
const Matrix< T, N > &  m 
)

Definition at line 745 of file quda_matrix.h.

template<class T , int N>
std::ostream& quda::operator<< ( std::ostream &  os,
const Array< T, N > &  a 
)

Definition at line 757 of file quda_matrix.h.

std::ostream& quda::operator<< ( std::ostream &  out,
const cudaColorSpinorField &  a 
)

Definition at line 1368 of file cuda_color_spinor_field.cu.

template<typename ValueType >
__host__ __device__ bool quda::operator== ( const complex< ValueType > &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 777 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ bool quda::operator== ( const ValueType &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 785 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ bool quda::operator== ( const complex< ValueType > &  lhs,
const ValueType &  rhs 
)
inline

Definition at line 793 of file complex_quda.h.

template<typename ValueType , typename charT , class traits >
std::basic_istream< charT, traits > & quda::operator>> ( std::basic_istream< charT, traits > &  is,
complex< ValueType > &  z 
)

Definition at line 303 of file complex_quda.h.

void quda::orthoDir ( Complex **  beta,
cudaColorSpinorField *  Ap[],
int  k 
)

Definition at line 48 of file inv_gcr_quda.cpp.

template<class T , int N>
__device__ __host__ void quda::outerProd ( const Array< T, N > &  a,
const Array< T, N > &  b,
Matrix< T, N > *  m 
)
inline

Definition at line 720 of file quda_matrix.h.

template<class T , int N>
__device__ __host__ void quda::outerProd ( const T(&)  a[N],
const T(&)  b[N],
Matrix< T, N > *  m 
)
inline

Definition at line 732 of file quda_matrix.h.

void quda::pack_gauge_diag ( void *  buf,
int *  X,
void **  sitelink,
int  nu,
int  mu,
int  dir1,
int  dir2,
QudaPrecision  prec 
)
void quda::pack_ghost_all_links ( void **  cpuLink,
void **  cpuGhostBack,
void **  cpuGhostFwd,
int  dir,
int  nFace,
QudaPrecision  precision,
int *  X 
)
void quda::pack_ghost_all_staples_cpu ( void *  staple,
void **  cpuGhostStapleBack,
void **  cpuGhostStapleFwd,
int  nFace,
QudaPrecision  precision,
int *  X 
)
void quda::packFace ( void *  ghost_buf,
cudaColorSpinorField &  in,
const int  nFace,
const int  dagger,
const int  parity,
const int  dim,
const int  face_num,
const cudaStream_t &  stream,
const double  a = 0.0,
const double  b = 0.0 
)
void quda::packFace ( void *  ghost_buf,
cudaColorSpinorField &  in,
FullClover &  clov,
FullClover &  clovInv,
const int  nFace,
const int  dagger,
const int  parity,
const int  dim,
const int  face_num,
const cudaStream_t &  stream,
const double  a = 0.0 
)
void quda::packFaceExtended ( void *  ghost_buf,
cudaColorSpinorField &  field,
const int  nFace,
const int  R[],
const int  dagger,
const int  parity,
const int  dim,
const int  face_num,
const cudaStream_t &  stream,
const bool  unpack = false 
)
void quda::packGhostStaple ( int *  X,
void *  even,
void *  odd,
int  volume,
QudaPrecision  prec,
int  stride,
int  dir,
int  whichway,
void **  fwd_nbr_buf_gpu,
void **  back_nbr_buf_gpu,
void **  fwd_nbr_buf,
void **  back_nbr_buf,
cudaStream_t *  stream 
)
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >
void quda::packSpinor ( OutOrder &  outOrder,
const InOrder &  inOrder,
Basis  basis,
int  volume 
)

CPU function to reorder spinor fields.

Definition at line 127 of file copy_color_spinor.cu.

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >
__global__ void quda::packSpinorKernel ( OutOrder  outOrder,
const InOrder  inOrder,
Basis  basis,
int  volume 
)

CUDA kernel to reorder spinor fields. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 141 of file copy_color_spinor.cu.

void * quda::pinned_malloc_ ( const char *  func,
const char *  file,
int  line,
size_t  size 
)

Allocate page-locked ("pinned") host memory. This function should only be called via the pinned_malloc() macro, defined in malloc_quda.h

Note that we do rely on cudaHostAlloc(), since buffers allocated in this way have been observed to cause problems when shared with MPI via GPU Direct on some systems.

Definition at line 192 of file malloc.cpp.

double quda::plaquette ( const GaugeField &  data,
QudaFieldLocation  location 
)

Definition at line 242 of file gauge_plaq.cu.

template<class T >
void quda::point ( T &  t,
int  x,
int  s,
int  c 
)

Definition at line 36 of file color_spinor_util.cu.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::polar ( const ValueType &  m,
const ValueType &  theta = 0 
)
inline

Returns the complex with magnitude m and angle theta in radians.

Definition at line 865 of file complex_quda.h.

template<>
__host__ __device__ complex<float> quda::polar ( const float &  magnitude,
const float &  angle 
)
inline

Definition at line 871 of file complex_quda.h.

template<>
__host__ __device__ complex<double> quda::polar ( const double &  magnitude,
const double &  angle 
)
inline

Definition at line 877 of file complex_quda.h.

template<typename ValueType , typename ExponentType >
__host__ __device__ ValueType quda::pow ( ValueType  x,
ExponentType  e 
)
inline

Definition at line 100 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::pow ( const complex< ValueType > &  z,
const int &  n 
)
inline

Definition at line 975 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::pow ( const complex< ValueType > &  z,
const ValueType &  x 
)
inline

Definition at line 951 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::pow ( const complex< ValueType > &  z,
const complex< ValueType > &  z2 
)
inline

Definition at line 957 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::pow ( const ValueType &  x,
const complex< ValueType > &  z 
)
inline

Definition at line 963 of file complex_quda.h.

template<>
__host__ __device__ complex<float> quda::pow ( const float &  x,
const complex< float > &  exponent 
)
inline

Definition at line 969 of file complex_quda.h.

void quda::print ( const double  d[],
int  n 
)

Definition at line 47 of file inv_mpcg_quda.cpp.

template<class Order >
void quda::print_vector ( const Order &  o,
unsigned int  x 
)

Definition at line 150 of file color_spinor_util.cu.

void quda::printLaunchTimer ( )

Definition at line 437 of file tune.cpp.

template<class Cmplx >
__host__ __device__ void quda::printLink ( const Matrix< Cmplx, 3 > &  link)
inline

Definition at line 1012 of file quda_matrix.h.

void quda::printPeakMemUsage ( )

Definition at line 286 of file malloc.cpp.

template<class T >
void quda::random ( T &  t)

Definition at line 22 of file color_spinor_util.cu.

template<typename Float >
double quda::reDotProduct ( const Float *  a,
const Float *  b,
const int  N 
)

Definition at line 185 of file blas_cpu.cpp.

double quda::reDotProductCpu ( const cpuColorSpinorField &  a,
const cpuColorSpinorField &  b 
)

Definition at line 191 of file blas_cpu.cpp.

double quda::reDotProductCuda ( cudaColorSpinorField &  a,
cudaColorSpinorField &  b 
)

Definition at line 170 of file reduce_quda.cu.

void quda::reDotProductCuda ( double *  result,
std::vector< cudaColorSpinorField * > &  a,
std::vector< cudaColorSpinorField * > &  b 
)

Definition at line 176 of file reduce_quda.cu.

double2 quda::reDotProductNormACuda ( cudaColorSpinorField &  a,
cudaColorSpinorField &  b 
)

Definition at line 297 of file reduce_quda.cu.

int quda::reliable ( double &  rNorm,
double &  maxrx,
double &  maxrr,
const double &  r2,
const double &  delta 
)

Definition at line 47 of file inv_bicgstab_quda.cpp.

double quda::resNorm ( const DiracMatrix &  mat,
cudaColorSpinorField &  b,
cudaColorSpinorField &  x 
)

Definition at line 20 of file inv_bicgstab_quda.cpp.

void * quda::safe_malloc_ ( const char *  func,
const char *  file,
int  line,
size_t  size 
)

Perform a standard malloc() with error-checking. This function should only be called via the safe_malloc() macro, defined in malloc_quda.h

Definition at line 168 of file malloc.cpp.

template<typename Float , int Ns, int Nc>
__device__ void quda::save_shared ( Float *  field,
const typename mapper< Float >::type  v[Ns *Nc *2],
int  x,
int  volumeCB 
)
inline

Definition at line 271 of file color_spinor_field_order.h.

void quda::saveTuneCache ( QudaVerbosity  verbosity)

Write tunecache to disk.

Definition at line 205 of file tune.cpp.

void quda::setBlasParam ( int  kernel,
int  prec,
int  threads,
int  blocks 
)
void quda::setDiracParam ( DiracParam diracParam,
QudaInvertParam inv_param,
bool  pc 
)

Definition at line 1102 of file interface_quda.cpp.

void quda::setDiracPreParam ( DiracParam diracParam,
QudaInvertParam inv_param,
const bool  pc 
)

Definition at line 1199 of file interface_quda.cpp.

void quda::setDiracSloppyParam ( DiracParam diracParam,
QudaInvertParam inv_param,
bool  pc 
)

Definition at line 1182 of file interface_quda.cpp.

void quda::setGhostSpinor ( bool  value)

Definition at line 42 of file color_spinor_field.cpp.

template<class T , int N>
__device__ __host__ void quda::setIdentity ( Matrix< T, N > *  m)
inline

Definition at line 597 of file quda_matrix.h.

template<int N>
__device__ __host__ void quda::setIdentity ( Matrix< float2, N > *  m)
inline

Definition at line 611 of file quda_matrix.h.

template<int N>
__device__ __host__ void quda::setIdentity ( Matrix< double2, N > *  m)
inline

Definition at line 625 of file quda_matrix.h.

void quda::setKernelPackT ( bool  pack)
Parameters
packSets whether to use a kernel to pack the T dimension

Definition at line 82 of file dslash_quda.cu.

void quda::setPackComms ( const int *  commDim)

Sets commDim array used in dslash_pack.cu

Definition at line 39 of file dslash_pack.cu.

void quda::setTwistPack ( bool  pack)
Parameters
packSets whether to use a kernel to pack twisted spinor

Definition at line 90 of file dslash_quda.cu.

void quda::setUnitarizeLinksConstants ( double  unitarize_eps,
double  max_error,
bool  allow_svd,
bool  svd_only,
double  svd_rel_error,
double  svd_abs_error,
bool  check_unitarization = true 
)
void quda::setUnitarizeLinksPadding ( int  input_padding,
int  output_padding 
)
template<class T , int N>
__device__ __host__ void quda::setZero ( Matrix< T, N > *  m)
inline

Definition at line 640 of file quda_matrix.h.

template<int N>
__device__ __host__ void quda::setZero ( Matrix< float2, N > *  m)
inline

Definition at line 653 of file quda_matrix.h.

template<int N>
__device__ __host__ void quda::setZero ( Matrix< double2, N > *  m)
inline

Definition at line 666 of file quda_matrix.h.

void quda::shiftColorSpinorField ( cudaColorSpinorField &  dst,
const cudaColorSpinorField &  src,
const unsigned int  parity,
const unsigned int  dim,
const int  shift 
)

Definition at line 210 of file shift_quark_field.cu.

template<typename FloatN , int N, typename Output , typename Input >
__global__ void quda::shiftColorSpinorFieldExternalKernel ( ShiftQuarkArg< Output, Input >  arg)

Definition at line 93 of file shift_quark_field.cu.

template<typename FloatN , int N, typename Output , typename Input >
__global__ void quda::shiftColorSpinorFieldKernel ( ShiftQuarkArg< Output, Input >  arg)

Definition at line 68 of file shift_quark_field.cu.

template<typename ValueType >
__host__ __device__ ValueType quda::sin ( ValueType  x)
inline

Definition at line 40 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::sin ( const complex< ValueType > &  z)
inline

Definition at line 981 of file complex_quda.h.

template<>
__host__ __device__ complex<float> quda::sin ( const complex< float > &  z)
inline

Definition at line 989 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ ValueType quda::sinh ( ValueType  x)
inline

Definition at line 75 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::sinh ( const complex< ValueType > &  z)
inline

Definition at line 997 of file complex_quda.h.

template<>
__host__ __device__ complex<float> quda::sinh ( const complex< float > &  z)
inline

Definition at line 1005 of file complex_quda.h.

void quda::siteComputeGenStapleParityKernel ( void *  staple_even,
void *  staple_odd,
const void *  sitelink_even,
const void *  sitelink_odd,
void *  fatlink_even,
void *  fatlink_odd,
int  mu,
int  nu,
double  mycoeff,
QudaReconstructType  recon,
QudaPrecision  prec,
dim3  halfGridDim,
llfat_kernel_param_t  kparam,
cudaStream_t *  stream 
)
void quda::siteComputeGenStapleParityKernel_ex ( void *  staple_even,
void *  staple_odd,
const void *  sitelink_even,
const void *  sitelink_odd,
void *  fatlink_even,
void *  fatlink_odd,
int  mu,
int  nu,
double  mycoeff,
QudaReconstructType  recon,
QudaPrecision  prec,
llfat_kernel_param_t  kparam 
)
template<typename ValueType >
__host__ __device__ ValueType quda::sqrt ( ValueType  x)
inline

Definition at line 105 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::sqrt ( const complex< ValueType > &  z)
inline

Definition at line 1013 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex<float> quda::sqrt ( const complex< float > &  z)
inline

Definition at line 1019 of file complex_quda.h.

void quda::staggeredDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const cudaColorSpinorField *  in,
const int  parity,
const int  dagger,
const cudaColorSpinorField *  x,
const double &  k,
const int *  commDim,
TimeProfile &  profile,
const QudaDslashPolicy dslashPolicy = QUDA_DSLASH2 
)

Definition at line 119 of file dslash_staggered.cu.

void quda::storeLinkToCPU ( cpuGaugeField *  cpuGauge,
cudaGaugeField *  cudaGauge,
QudaGaugeParam param 
)
template<typename ValueType >
__host__ __device__ ValueType quda::tan ( ValueType  x)
inline

Definition at line 45 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::tan ( const complex< ValueType > &  z)
inline

Definition at line 1025 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ ValueType quda::tanh ( ValueType  x)
inline

Definition at line 80 of file complex_quda.h.

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::tanh ( const complex< ValueType > &  z)
inline

Definition at line 1031 of file complex_quda.h.

template<typename Float >
__device__ __host__ Float quda::timeBoundary ( int  idx,
const int  X[QUDA_MAX_DIM],
QudaTboundary  tBoundary,
bool  isFirstTimeSlice,
bool  isLastTimeSlice 
)
inline

Definition at line 92 of file gauge_field_order.h.

template<typename Float >
__device__ __host__ Float quda::timeBoundary ( int  idx,
const int  X[QUDA_MAX_DIM],
const int  R[QUDA_MAX_DIM],
QudaTboundary  tBoundary,
bool  isFirstTimeSlice,
bool  isLastTimeSlice,
QudaGhostExchange  ghostExchange 
)
inline

timeBoundary variant for extended gauge field

Parameters
idxextended field linear index
Xthe gauge field dimensions
Rthe radii dimenions of the extended region
tBoundarythe boundary condition
isFirstTimeSliceif we're on the first time slice of nodes
isLastTimeSlideif we're on the last time slice of nodes
ghostExchangeif the field is extended or not (determines indexing type)

Definition at line 107 of file gauge_field_order.h.

double quda::timeInterval ( struct timeval  start,
struct timeval  end 
)

Definition at line 21 of file inv_gcr_quda.cpp.

double3 quda::tripleCGReductionCuda ( cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z 
)

Definition at line 811 of file reduce_quda.cu.

void quda::tripleCGUpdateCuda ( const double &  alpha,
const double &  beta,
cudaColorSpinorField &  q,
cudaColorSpinorField &  r,
cudaColorSpinorField &  x,
cudaColorSpinorField &  p 
)

Definition at line 480 of file blas_quda.cu.

TuneParam & quda::tuneLaunch ( Tunable &  tunable,
QudaTune  enabled,
QudaVerbosity  verbosity 
)

Return the optimal launch parameters for a given kernel, either by retrieving them from tunecache or autotuning on the spot.

Definition at line 271 of file tune.cpp.

void quda::twistCloverGamma5Cuda ( cudaColorSpinorField *  out,
const cudaColorSpinorField *  in,
const int  dagger,
const double &  kappa,
const double &  mu,
const double &  epsilon,
const QudaTwistGamma5Type  twist,
const FullClover *  clov,
const FullClover *  clovInv,
const int  parity 
)

Definition at line 495 of file dslash_quda.cu.

void quda::twistedCloverDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const FullClover *  clover,
const FullClover *  cloverInv,
const cudaColorSpinorField *  in,
const int  parity,
const int  dagger,
const cudaColorSpinorField *  x,
const QudaTwistCloverDslashType  type,
const double &  kappa,
const double &  mu,
const double &  epsilon,
const double &  k,
const int *  commDim,
TimeProfile &  profile,
const QudaDslashPolicy dslashPolicy = QUDA_DSLASH2 
)

Definition at line 155 of file dslash_twisted_clover.cu.

void quda::twistedMassDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const cudaColorSpinorField *  in,
const int  parity,
const int  dagger,
const cudaColorSpinorField *  x,
const QudaTwistDslashType  type,
const double &  kappa,
const double &  mu,
const double &  epsilon,
const double &  k,
const int *  commDim,
TimeProfile &  profile,
const QudaDslashPolicy dslashPolicy = QUDA_DSLASH2 
)

Definition at line 151 of file dslash_twisted_mass.cu.

void quda::twistGamma5Cuda ( cudaColorSpinorField *  out,
const cudaColorSpinorField *  in,
const int  dagger,
const double &  kappa,
const double &  mu,
const double &  epsilon,
const QudaTwistGamma5Type  twist 
)

ndeg tm:

Definition at line 356 of file dslash_quda.cu.

void quda::unitarizeLinksCPU ( const QudaGaugeParam param,
cpuGaugeField &  infield,
cpuGaugeField *  outfield 
)
void quda::unitarizeLinksCuda ( const QudaGaugeParam param,
cudaGaugeField &  infield,
cudaGaugeField *  outfield,
int *  num_failures 
)
void quda::unpackGhostStaple ( int *  X,
void *  _even,
void *  _odd,
int  volume,
QudaPrecision  prec,
int  stride,
int  dir,
int  whichway,
void **  fwd_nbr_buf,
void **  back_nbr_buf,
cudaStream_t *  stream 
)
void quda::updateAlphaZeta ( double *  alpha,
double *  zeta,
double *  zeta_old,
const double *  r2,
const double *  beta,
const double  pAp,
const double *  offset,
const int  nShift,
const int  j_low 
)

Compute the new values of alpha and zeta

Definition at line 38 of file inv_multi_cg_quda.cpp.

void quda::updateGaugeField ( GaugeField &  out,
double  dt,
const GaugeField &  in,
const GaugeField &  mom,
bool  conj_mom,
bool  exact 
)

Evolve the gauge field by step size dt using the momentuim field

Parameters
outUpdated gauge field
dtStep size
inInput gauge field
momMomentum field
conj_momWhether we conjugate the momentum in the exponential
exactCalculate exact exponential or use an expansion

Definition at line 348 of file gauge_update_quda.cu.

void quda::updateSolution ( cudaColorSpinorField &  x,
const Complex *  alpha,
Complex **const  beta,
double *  gamma,
int  k,
cudaColorSpinorField *  p[] 
)

Definition at line 111 of file inv_gcr_quda.cpp.

void quda::wilsonDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const cudaColorSpinorField *  in,
const int  oddBit,
const int  daggerBit,
const cudaColorSpinorField *  x,
const double &  k,
const int *  commDim,
TimeProfile &  profile,
const QudaDslashPolicy dslashPolicy = QUDA_DSLASH2 
)

Definition at line 113 of file dslash_wilson.cu.

template<class T >
__device__ void quda::writeLinkVariableToArray ( const Matrix< T, 3 > &  link,
const int  dir,
const int  idx,
const int  stride,
T *const  array 
)
inline

Definition at line 830 of file quda_matrix.h.

__device__ void quda::writeLinkVariableToArray ( const Matrix< double2, 3 > &  link,
const int  dir,
const int  idx,
const int  stride,
float2 *const  array 
)
inline

Definition at line 842 of file quda_matrix.h.

template<class T , int N>
__device__ void quda::writeMatrixToArray ( const Matrix< T, N > &  mat,
const int  idx,
const int  stride,
T *const  array 
)
inline

Definition at line 802 of file quda_matrix.h.

template<class T , class U >
__device__ void quda::writeMomentumToArray ( const Matrix< T, 3 > &  mom,
const int  dir,
const int  idx,
const U  coeff,
const int  stride,
T *const  array 
)
inline

Definition at line 893 of file quda_matrix.h.

double quda::xmyNormCpu ( const cpuColorSpinorField &  a,
cpuColorSpinorField &  b 
)

Definition at line 205 of file blas_cpu.cpp.

double quda::xmyNormCuda ( cudaColorSpinorField &  a,
cudaColorSpinorField &  b 
)

Definition at line 343 of file reduce_quda.cu.

Complex quda::xpaycDotzyCpu ( const cpuColorSpinorField &  x,
const double &  a,
cpuColorSpinorField &  y,
const cpuColorSpinorField &  z 
)

Definition at line 231 of file blas_cpu.cpp.

Complex quda::xpaycDotzyCuda ( cudaColorSpinorField &  x,
const double &  a,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z 
)

Definition at line 534 of file reduce_quda.cu.

void quda::xpayCpu ( const cpuColorSpinorField &  x,
const double &  a,
cpuColorSpinorField &  y 
)

Definition at line 41 of file blas_cpu.cpp.

void quda::xpayCuda ( cudaColorSpinorField &  x,
const double &  a,
cudaColorSpinorField &  y 
)

Definition at line 138 of file blas_quda.cu.

void quda::xpyCpu ( const cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 22 of file blas_cpu.cpp.

void quda::xpyCuda ( cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 98 of file blas_quda.cu.

double3 quda::xpyHeavyQuarkResidualNormCpu ( cpuColorSpinorField &  x,
cpuColorSpinorField &  y,
cpuColorSpinorField &  r 
)
double3 quda::xpyHeavyQuarkResidualNormCuda ( cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
cudaColorSpinorField &  r 
)

Definition at line 782 of file reduce_quda.cu.

void quda::zeroCuda ( cudaColorSpinorField &  a)

Definition at line 40 of file blas_quda.cu.

Variable Documentation

const char* quda::aux_str

Definition at line 46 of file blas_quda.cu.

char quda::aux_tmp[TuneKey::aux_n]

Definition at line 47 of file blas_quda.cu.

unsigned long long quda::blas_bytes

Definition at line 38 of file blas_quda.cu.

unsigned long long quda::blas_flops

Definition at line 37 of file blas_quda.cu.

const int quda::maxNface = 3

The maximum number of faces that can be exchanged

Definition at line 11 of file lattice_field.h.

const int quda::Nstream = 1

Definition at line 217 of file quda_internal.h.

cudaStream_t* quda::stream

Definition at line 816 of file cuda_color_spinor_field.cu.

const char* quda::vol_str

Definition at line 45 of file blas_quda.cu.