QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Namespaces | Classes | Typedefs | Enumerations | Functions | Variables
quda Namespace Reference

Namespaces

 blas
 
 clover
 
 colorspinor
 
 cublas
 
 dslash
 
 fermion_force
 
 gauge
 
 linalg
 
 pool
 

Classes

struct  AllocType
 
struct  AllocType< false >
 
struct  AllocType< true >
 
struct  Arg
 
class  Array
 
class  BiCGstab
 
class  BiCGstabL
 
class  BiCGstabLUpdate
 
struct  BlockOrthoArg
 
struct  bridge_mapper
 
struct  bridge_mapper< double2, char2 >
 
struct  bridge_mapper< double2, char4 >
 
struct  bridge_mapper< double2, double2 >
 
struct  bridge_mapper< double2, float2 >
 
struct  bridge_mapper< double2, float4 >
 
struct  bridge_mapper< double2, short2 >
 
struct  bridge_mapper< double2, short4 >
 
struct  bridge_mapper< float2, char2 >
 
struct  bridge_mapper< float2, double2 >
 
struct  bridge_mapper< float2, float2 >
 
struct  bridge_mapper< float2, short2 >
 
struct  bridge_mapper< float4, char4 >
 
struct  bridge_mapper< float4, double2 >
 
struct  bridge_mapper< float4, float4 >
 
struct  bridge_mapper< float4, short4 >
 
class  CACG
 Communication-avoiding CG solver. This solver does un-preconditioned CG, running in steps of nKrylov, build up a polynomial in the linear operator of length nKrylov, and then performs a steepest descent minimization on the resulting basis vectors. For now only implemented using the power basis so is only useful as a preconditioner. More...
 
class  CACGNE
 
class  CACGNR
 
class  CAGCR
 Communication-avoiding GCR solver. This solver does un-preconditioned GCR, first building up a polynomial in the linear operator of length nKrylov, and then performs a minimum residual extrapolation on the resulting basis vectors. For use as a multigrid smoother with minimum global synchronization. More...
 
class  CalculateY
 
struct  CalculateYArg
 
struct  CalculateYhatArg
 
class  CG
 Conjugate-Gradient Solver. More...
 
class  CG3
 
class  CG3NE
 
class  CGNE
 
class  CGNR
 
struct  ChecksumArg
 
struct  ChiralToNonRelBasis
 
class  Clover
 
struct  clover_mapper
 
struct  clover_mapper< char, N, add_rho >
 
struct  clover_mapper< double, N, add_rho >
 
struct  clover_mapper< float, N, add_rho >
 
struct  clover_mapper< short, N, add_rho >
 
struct  clover_wrapper
 clover_wrapper is an internal class that is used to wrap instances of colorspinor accessors, currying in a specifc location and chirality on the field. The operator() accessors in clover-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the HMatrix class. As a result we can include clover-field accessors directly in HMatrix expressions in kernels without having to declare temporaries with explicit calls to the load/save methods in the clover-field accessors. More...
 
struct  CloverArg
 Parameteter structure for driving the clover and twist-clover application kernels. More...
 
struct  CloverDerivArg
 
class  CloverField
 
struct  CloverFieldParam
 
struct  CloverInvertArg
 
struct  CloverSigmaOprodArg
 
struct  coeff_5
 Structure containing zMobius / Zolotarev coefficients. More...
 
class  coeff_type
 Helper class for grabbing the constant struct, whether we are on the GPU or CPU. More...
 
class  coeff_type< real, true, Arg >
 Specialization for variable complex coefficients. More...
 
struct  ColorSpinor
 
struct  ColorSpinor< Float, Nc, 2 >
 
struct  ColorSpinor< Float, Nc, 4 >
 
struct  colorspinor_ghost_wrapper
 colorspinor_ghost_wrapper is an internal class that is used to wrap instances of colorspinor accessors, currying in a specifc location on the field. The Ghost() accessors in colorspinor-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the ColorSpinor class. As a result we can include colorspinor-field accessors directly in ColorSpinor expressions in kernels without having to declare temporaries with explicit calls to the loadGhost/saveGhost methods in the colorspinor-field accessors. More...
 
struct  colorspinor_mapper
 
struct  colorspinor_mapper< char, 1, Nc, false, huge_alloc >
 
struct  colorspinor_mapper< char, 2, Nc, false, huge_alloc >
 
struct  colorspinor_mapper< char, 4, Nc, false, huge_alloc >
 
struct  colorspinor_mapper< char, 4, Nc, true, huge_alloc >
 
struct  colorspinor_mapper< double, 1, Nc, false, huge_alloc >
 
struct  colorspinor_mapper< double, 2, Nc, false, huge_alloc >
 
struct  colorspinor_mapper< double, 4, Nc, false, huge_alloc >
 
struct  colorspinor_mapper< double, 4, Nc, true, huge_alloc >
 
struct  colorspinor_mapper< float, 1, Nc, false, huge_alloc >
 
struct  colorspinor_mapper< float, 2, Nc, false, huge_alloc >
 
struct  colorspinor_mapper< float, 4, Nc, false, huge_alloc >
 
struct  colorspinor_mapper< float, 4, Nc, true, huge_alloc >
 
struct  colorspinor_mapper< short, 1, Nc, false, huge_alloc >
 
struct  colorspinor_mapper< short, 2, Nc, false, huge_alloc >
 
struct  colorspinor_mapper< short, 4, Nc, false, huge_alloc >
 
struct  colorspinor_mapper< short, 4, Nc, true, huge_alloc >
 
struct  colorspinor_order_mapper
 
struct  colorspinor_order_mapper< T, QUDA_FLOAT2_FIELD_ORDER, Ns, Nc >
 
struct  colorspinor_order_mapper< T, QUDA_SPACE_COLOR_SPIN_FIELD_ORDER, Ns, Nc >
 
struct  colorspinor_order_mapper< T, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER, Ns, Nc >
 
struct  colorspinor_wrapper
 colorspinor_wrapper is an internal class that is used to wrap instances of colorspinor accessors, currying in a specifc location on the field. The operator() accessors in colorspinor-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the ColorSpinor class. As a result we can include colorspinor-field accessors directly in ColorSpinor expressions in kernels without having to declare temporaries with explicit calls to the load/save methods in the colorspinor-field accessors. More...
 
class  ColorSpinorField
 
class  ColorSpinorParam
 
struct  complex
 
struct  complex< char >
 
struct  complex< double >
 
struct  complex< float >
 
struct  complex< int >
 
struct  complex< short >
 
struct  CompositeColorSpinorFieldDescriptor
 
struct  ContractionArg
 
class  CopyColorSpinor
 
class  CopyColorSpinor< 4, Arg >
 
struct  CopyColorSpinorArg
 
class  CopyGauge
 
struct  CopyGaugeArg
 
class  CopyGaugeEx
 
struct  CopyGaugeExArg
 
class  CopySpinor
 
class  CopySpinorEx
 
struct  CopySpinorExArg
 
struct  CovDevArg
 Parameter structure for driving the covariatnt derivative operator. More...
 
class  cpuCloverField
 
class  cpuColorSpinorField
 
class  cpuGaugeField
 
class  cudaCloverField
 
class  cudaColorSpinorField
 
class  cudaGaugeField
 
struct  deflated_solver
 
class  Deflation
 
struct  DeflationParam
 
class  Dirac
 
class  DiracClover
 
class  DiracCloverPC
 
class  DiracCoarse
 
class  DiracCoarsePC
 
class  DiracDagger
 
class  DiracDomainWall
 
class  DiracDomainWall4D
 
class  DiracDomainWall4DPC
 
class  DiracDomainWallPC
 
class  DiracImprovedStaggered
 
class  DiracImprovedStaggeredPC
 
class  DiracM
 
class  DiracMatrix
 
class  DiracMdag
 
class  DiracMdagM
 
class  DiracMMdag
 
class  DiracMobius
 
class  DiracMobiusPC
 
class  DiracParam
 
class  DiracStaggered
 
class  DiracStaggeredPC
 
class  DiracTwistedClover
 
class  DiracTwistedCloverPC
 
class  DiracTwistedMass
 
class  DiracTwistedMassPC
 
class  DiracWilson
 
class  DiracWilsonPC
 
class  DomainWall4D
 
struct  DomainWall4DApply
 
struct  DomainWall4DArg
 
struct  DomainWall4DLaunch
 This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...
 
class  DomainWall5D
 
struct  DomainWall5DApply
 
struct  DomainWall5DArg
 
struct  DomainWall5DLaunch
 This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...
 
class  Dslash
 
class  Dslash5
 
struct  Dslash5Arg
 Parameter structure for applying the Dslash. More...
 
struct  DslashArg
 
struct  DslashCoarseArg
 
struct  DslashCoarseLaunch
 
class  DslashCoarsePolicyTune
 
struct  DslashConstant
 Constants used by dslash and packing kernels. More...
 
class  EigCGArgs
 
class  EigenSolver
 
class  ExtractGhost
 
struct  ExtractGhostArg
 
class  ExtractGhostEx
 
struct  ExtractGhostExArg
 
struct  fixedInvMaxValue
 
struct  fixedInvMaxValue< char >
 
struct  fixedInvMaxValue< char2 >
 
struct  fixedInvMaxValue< char4 >
 
struct  fixedInvMaxValue< short >
 
struct  fixedInvMaxValue< short2 >
 
struct  fixedInvMaxValue< short4 >
 
struct  fixedMaxValue
 
struct  fixedMaxValue< char >
 
struct  fixedMaxValue< char2 >
 
struct  fixedMaxValue< char4 >
 
struct  fixedMaxValue< short >
 
struct  fixedMaxValue< short2 >
 
struct  fixedMaxValue< short4 >
 
struct  float4_precision_mapper
 
struct  float4_precision_mapper< char >
 
struct  float4_precision_mapper< double >
 
struct  float4_precision_mapper< short >
 
struct  FmunuArg
 
struct  FullClover
 
class  Gamma
 
struct  GammaArg
 Parameter structure for driving the Gamma operator. More...
 
struct  gauge_ghost_wrapper
 gauge_ghost_wrapper is an internal class that is used to wrap instances of gauge ghost accessors, currying in a specific location and dimension on the field. The Ghost() accessors in gauge-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the Matrix class. As a result we can include gauge-field ghost accessors directly in Matrix expressions in kernels without having to declare temporaries with explicit calls to the load/save methods in the gauge-field accessors. More...
 
struct  gauge_mapper
 
struct  gauge_mapper< char, QUDA_RECONSTRUCT_10, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< char, QUDA_RECONSTRUCT_12, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< char, QUDA_RECONSTRUCT_13, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< char, QUDA_RECONSTRUCT_8, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< char, QUDA_RECONSTRUCT_9, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< char, QUDA_RECONSTRUCT_NO, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< double, QUDA_RECONSTRUCT_10, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< double, QUDA_RECONSTRUCT_12, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< double, QUDA_RECONSTRUCT_13, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< double, QUDA_RECONSTRUCT_8, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< double, QUDA_RECONSTRUCT_9, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< double, QUDA_RECONSTRUCT_NO, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< float, QUDA_RECONSTRUCT_10, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< float, QUDA_RECONSTRUCT_12, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< float, QUDA_RECONSTRUCT_13, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< float, QUDA_RECONSTRUCT_8, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< float, QUDA_RECONSTRUCT_9, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< float, QUDA_RECONSTRUCT_NO, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< short, QUDA_RECONSTRUCT_10, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< short, QUDA_RECONSTRUCT_12, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< short, QUDA_RECONSTRUCT_13, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< short, QUDA_RECONSTRUCT_8, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< short, QUDA_RECONSTRUCT_9, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_mapper< short, QUDA_RECONSTRUCT_NO, N, stag, huge_alloc, ghostExchange, use_inphase >
 
struct  gauge_order_mapper
 
struct  gauge_order_mapper< T, QUDA_BQCD_GAUGE_ORDER, Nc >
 
struct  gauge_order_mapper< T, QUDA_FLOAT2_GAUGE_ORDER, Nc >
 
struct  gauge_order_mapper< T, QUDA_MILC_GAUGE_ORDER, Nc >
 
struct  gauge_order_mapper< T, QUDA_QDP_GAUGE_ORDER, Nc >
 
struct  gauge_order_mapper< T, QUDA_QDPJIT_GAUGE_ORDER, Nc >
 
struct  gauge_order_mapper< T, QUDA_TIFR_GAUGE_ORDER, Nc >
 
struct  gauge_order_mapper< T, QUDA_TIFR_PADDED_GAUGE_ORDER, Nc >
 
struct  gauge_wrapper
 gauge_wrapper is an internal class that is used to wrap instances of gauge accessors, currying in a specific location on the field. The operator() accessors in gauge-field accessors return instances to this class, allowing us to then use operator overloading upon this class to interact with the Matrix class. As a result we can include gauge-field accessors directly in Matrix expressions in kernels without having to declare temporaries with explicit calls to the load/save methods in the gauge-field accessors. More...
 
struct  GaugeAPEArg
 
class  GaugeCovDev
 Full Covariant Derivative operator. Although not a Dirac operator per se, it's a linear operator so it's conventient to put in the Dirac operator abstraction. More...
 
class  GaugeField
 
struct  GaugeFieldParam
 
class  GaugeGauss
 
struct  GaugeGaussArg
 
class  GaugeLaplace
 Full Gauge Laplace operator. Although not a Dirac operator per se, it's a linear operator so it's conventient to put in the Dirac operator abstraction. More...
 
class  GaugeLaplacePC
 Even-odd preconditioned Gauge Laplace operator. More...
 
class  GaugeOvrImpSTOUT
 
struct  GaugeOvrImpSTOUTArg
 
class  GaugePlaq
 
struct  GaugePlaqArg
 
struct  GaugeSTOUTArg
 
class  GCR
 
class  GenericPackGhostLauncher
 
class  GMResDR
 
class  GMResDRArgs
 
class  HMatrix
 Specialized container for Hermitian matrices (e.g., used for wrapping clover matrices) More...
 
struct  HMatrix_wrapper
 wrapper class that enables us to write to Hmatrices in packed format More...
 
struct  Identity
 
struct  ImprovedStaggeredApply
 
class  IncEigCG
 
struct  Int2
 
struct  is_variable
 
struct  is_variable< DSLASH5_MOBIUS >
 
struct  is_variable< DSLASH5_MOBIUS_PRE >
 
struct  is_variable< M5_INV_ZMOBIUS >
 
struct  isFixed
 
struct  isFixed< char >
 
struct  isFixed< char2 >
 
struct  isFixed< char4 >
 
struct  isFixed< short >
 
struct  isFixed< short2 >
 
struct  isFixed< short4 >
 
struct  isHalf
 
struct  isHalf< short >
 
struct  isHalf< short2 >
 
struct  isHalf< short4 >
 
struct  isQuarter
 
struct  isQuarter< char >
 
struct  isQuarter< char2 >
 
struct  isQuarter< char4 >
 
struct  KSForceArg
 
class  KSForceComplete
 
struct  KSLongLinkArg
 
class  KSLongLinkForce
 
class  Laplace
 
struct  LaplaceApply
 
struct  LaplaceArg
 Parameter structure for driving the covariatnt derivative operator. More...
 
struct  LaplaceLaunch
 This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...
 
class  LatticeField
 
struct  LatticeFieldParam
 
struct  less_significant
 
struct  mapper
 
struct  mapper< char >
 
struct  mapper< char2 >
 
struct  mapper< char4 >
 
struct  mapper< double >
 
struct  mapper< double2 >
 
struct  mapper< double4 >
 
struct  mapper< float >
 
struct  mapper< float2 >
 
struct  mapper< float4 >
 
struct  mapper< short >
 
struct  mapper< short2 >
 
struct  mapper< short4 >
 
class  Matrix
 
struct  matrix_field
 
class  MemAlloc
 
class  MG
 
struct  MGParam
 
class  MinResExt
 This computes the optimum guess for the system Ax=b in the L2 residual norm. For use in the HMD force calculations using a minimal residual chronological method. This computes the guess solution as a linear combination of a given number of previous solutions. Following Brower et al, only the orthogonalised vector basis is stored to conserve memory. More...
 
class  MPBiCGstab
 
class  MPCG
 
class  MR
 
struct  multigrid_solver
 
class  MultiShiftCG
 Multi-Shift Conjugate Gradient Solver. More...
 
class  MultiShiftSolver
 
class  NdegTwistedMass
 
struct  NdegTwistedMassApply
 
struct  NdegTwistedMassArg
 
struct  NdegTwistedMassLaunch
 This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...
 
class  NdegTwistedMassPreconditioned
 
struct  NdegTwistedMassPreconditionedApply
 
struct  NdegTwistedMassPreconditionedLaunch
 This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...
 
struct  non_native_precision_mapper
 
struct  non_native_precision_mapper< char >
 
struct  non_native_precision_mapper< double >
 
struct  non_native_precision_mapper< float >
 
struct  non_native_precision_mapper< short >
 
struct  NonRelBasis
 
struct  NonRelToChiralBasis
 
struct  norm_type
 
struct  norm_type< complex< T > >
 
struct  normal
 
struct  normal< double >
 
struct  normal< float >
 
class  Object
 
class  Pack
 
struct  PackArg
 
struct  PackGhostArg
 
struct  precision_spin_color_mapper
 
struct  precision_spin_color_mapper< double, double, 1, nColor_ >
 
struct  precision_spin_color_mapper< double, double, 2, nColor_ >
 
struct  precision_spin_color_mapper< double, double, 4, nColor_ >
 
struct  precision_spin_color_mapper< float, char, 4, nColor_ >
 
struct  precision_spin_color_mapper< float, short, 4, nColor_ >
 
struct  precision_spin_color_mapper< T, G, 1, nColor_ >
 
class  PreconCG
 
class  PreconditionedSolver
 
class  PreserveBasis
 
class  ProjectSU3
 
struct  ProjectSU3Arg
 
struct  PromoteTypeId
 
struct  PromoteTypeId< complex< double >, double >
 
struct  PromoteTypeId< complex< float >, float >
 
struct  PromoteTypeId< double, complex< double > >
 
struct  PromoteTypeId< double, float >
 
struct  PromoteTypeId< double, int >
 
struct  PromoteTypeId< float, complex< float > >
 
struct  PromoteTypeId< float, double >
 
struct  PromoteTypeId< float, int >
 
struct  PromoteTypeId< int, double >
 
struct  PromoteTypeId< int, float >
 
struct  QChargeArg
 
class  QudaMemCopy
 
struct  RealType
 
struct  RealType< char >
 
struct  RealType< char2 >
 
struct  RealType< char4 >
 
struct  RealType< complex< char > >
 
struct  RealType< complex< double > >
 
struct  RealType< complex< float > >
 
struct  RealType< complex< short > >
 
struct  RealType< double >
 
struct  RealType< double2 >
 
struct  RealType< float >
 
struct  RealType< float2 >
 
struct  RealType< float4 >
 
struct  RealType< short >
 
struct  RealType< short2 >
 
struct  RealType< short4 >
 
struct  reduce_vector
 
struct  ReduceArg
 
struct  RelBasis
 
struct  RestrictArg
 
class  RNG
 Class declaration to initialize and hold CURAND RNG states. More...
 
struct  rngArg
 
struct  scalar
 
struct  scalar< char >
 
struct  scalar< char2 >
 
struct  scalar< char3 >
 
struct  scalar< char4 >
 
struct  scalar< double >
 
struct  scalar< double2 >
 
struct  scalar< double3 >
 
struct  scalar< double4 >
 
struct  scalar< float >
 
struct  scalar< float2 >
 
struct  scalar< float3 >
 
struct  scalar< float4 >
 
struct  scalar< short >
 
struct  scalar< short2 >
 
struct  scalar< short3 >
 
struct  scalar< short4 >
 
class  SD
 
struct  SharedMemory
 
class  ShiftColorSpinorField
 
struct  ShiftColorSpinorFieldArg
 
class  ShiftUpdate
 
class  SimpleBiCGstab
 
class  Solver
 
struct  SolverParam
 
struct  SortedEvals
 
struct  spin_mapper
 
struct  spin_order_mapper
 
struct  spin_order_mapper< 1, QUDA_FLOAT4_FIELD_ORDER >
 
struct  spin_order_mapper< 2, QUDA_FLOAT4_FIELD_ORDER >
 
class  SpinorNoise
 
class  Staggered
 
struct  StaggeredApply
 
struct  StaggeredArg
 Parameter structure for driving the Staggered Dslash operator. More...
 
struct  StaggeredLaunch
 
struct  StaggeredReconstruct
 
struct  TexVectorType
 
struct  TexVectorType< char, 1 >
 
struct  TexVectorType< char, 2 >
 
struct  TexVectorType< char, 4 >
 
struct  TexVectorType< double, 1 >
 
struct  TexVectorType< double, 2 >
 
struct  TexVectorType< float, 1 >
 
struct  TexVectorType< float, 2 >
 
struct  TexVectorType< float, 4 >
 
struct  TexVectorType< short, 1 >
 
struct  TexVectorType< short, 2 >
 
struct  TexVectorType< short, 4 >
 
class  TimeProfile
 
struct  Timer
 
struct  TraceKey
 
class  Transfer
 
struct  Trig
 
struct  Trig< false, float >
 
struct  Trig< true, float >
 
class  TRLM
 Thick Restarted Lanczos Method. More...
 
class  Tunable
 
class  TunableLocalParity
 
class  TunableVectorY
 
class  TunableVectorYZ
 
struct  TuneKey
 
class  TuneParam
 
class  TwistClover
 
class  TwistedClover
 
struct  TwistedCloverApply
 
struct  TwistedCloverArg
 
struct  TwistedCloverLaunch
 This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...
 
class  TwistedCloverPreconditioned
 
struct  TwistedCloverPreconditionedApply
 
struct  TwistedCloverPreconditionedLaunch
 This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...
 
class  TwistedMass
 
struct  TwistedMassApply
 
struct  TwistedMassArg
 
struct  TwistedMassLaunch
 This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...
 
class  TwistedMassPreconditioned
 
struct  TwistedMassPreconditionedApply
 
struct  TwistedMassPreconditionedLaunch
 This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...
 
class  TwistGamma
 
struct  uniform
 
struct  uniform< double >
 
struct  uniform< float >
 
struct  vec_length
 
struct  vec_length< char >
 
struct  vec_length< char2 >
 
struct  vec_length< char4 >
 
struct  vec_length< double >
 
struct  vec_length< double2 >
 
struct  vec_length< double4 >
 
struct  vec_length< float >
 
struct  vec_length< float2 >
 
struct  vec_length< float4 >
 
struct  vec_length< short >
 
struct  vec_length< short2 >
 
struct  vec_length< short4 >
 
struct  vector
 
struct  vector< double, 2 >
 
struct  vector< float, 2 >
 
struct  vector< int, 2 >
 
struct  vector_type
 
class  VectorCache
 Class which wraps around a shared memory cache for a Vector type, where each thread in the thread block stores a unique Vector in the cache which any other thread can access. Presently, the expectation is that Vector is synonymous with the ColorSpinor class, but we could extend this to apply to the Matrix class as well. More...
 
struct  VectorType
 
struct  VectorType< char, 1 >
 
struct  VectorType< char, 2 >
 
struct  VectorType< char, 4 >
 
struct  VectorType< double, 1 >
 
struct  VectorType< double, 2 >
 
struct  VectorType< double, 4 >
 
struct  VectorType< float, 1 >
 
struct  VectorType< float, 2 >
 
struct  VectorType< float, 4 >
 
struct  VectorType< short, 1 >
 
struct  VectorType< short, 2 >
 
struct  VectorType< short, 4 >
 
class  Wilson
 
struct  WilsonApply
 
struct  WilsonArg
 Parameter structure for driving the Wilson operator. More...
 
class  WilsonClover
 
struct  WilsonCloverApply
 
struct  WilsonCloverArg
 
struct  WilsonCloverLaunch
 This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...
 
class  WilsonCloverPreconditioned
 
struct  WilsonCloverPreconditionedApply
 
struct  WilsonCloverPreconditionedLaunch
 This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...
 
struct  WilsonLaunch
 This is a helper class that is used to instantiate the correct templated kernel for the dslash. More...
 
struct  WilsonReconstruct
 
class  Worker
 
class  WuppertalSmearing
 
struct  WuppertalSmearingArg
 
class  XSD
 
struct  Zero
 

Typedefs

typedef std::vector< ColorSpinorField * > CompositeColorSpinorField
 
using ColorSpinorFieldSet = ColorSpinorField
 
typedef int storeType
 
typedef std::complex< double > Complex
 
typedef struct curandStateMRG32k3a cuRNGState
 
using DynamicStride = Stride< Dynamic, Dynamic >
 
using DenseMatrix = MatrixXcd
 
using VectorSet = MatrixXcd
 
using Vector = VectorXcd
 
using RealVector = VectorXd
 
using RowMajorDenseMatrix = Matrix< Complex, Dynamic, Dynamic, RowMajor >
 
typedef std::map< TuneKey, TuneParammap
 

Enumerations

enum  MemoryLocation { Device = 1, Host = 2, Remote = 4 }
 
enum  Dslash5Type {
  DSLASH5_DWF, DSLASH5_MOBIUS_PRE, DSLASH5_MOBIUS, M5_INV_DWF,
  M5_INV_MOBIUS, M5_INV_ZMOBIUS
}
 
enum  KernelType {
  INTERIOR_KERNEL = 5, EXTERIOR_KERNEL_ALL = 6, EXTERIOR_KERNEL_X = 0, EXTERIOR_KERNEL_Y = 1,
  EXTERIOR_KERNEL_Z = 2, EXTERIOR_KERNEL_T = 3, KERNEL_POLICY = 7
}
 
enum  DslashType { DSLASH_INTERIOR, DSLASH_EXTERIOR, DSLASH_FULL }
 
enum  QudaProfileType {
  QUDA_PROFILE_H2D, QUDA_PROFILE_D2H, QUDA_PROFILE_INIT, QUDA_PROFILE_PREAMBLE,
  QUDA_PROFILE_COMPUTE, QUDA_PROFILE_COMMS, QUDA_PROFILE_EPILOGUE, QUDA_PROFILE_FREE,
  QUDA_PROFILE_IO, QUDA_PROFILE_CHRONO, QUDA_PROFILE_EIGEN, QUDA_PROFILE_ARPACK,
  QUDA_PROFILE_LOWER_LEVEL, QUDA_PROFILE_PACK_KERNEL, QUDA_PROFILE_DSLASH_KERNEL, QUDA_PROFILE_GATHER,
  QUDA_PROFILE_SCATTER, QUDA_PROFILE_LAUNCH_KERNEL, QUDA_PROFILE_EVENT_RECORD, QUDA_PROFILE_EVENT_QUERY,
  QUDA_PROFILE_STREAM_WAIT_EVENT, QUDA_PROFILE_FUNC_SET_ATTRIBUTE, QUDA_PROFILE_EVENT_SYNCHRONIZE, QUDA_PROFILE_STREAM_SYNCHRONIZE,
  QUDA_PROFILE_DEVICE_SYNCHRONIZE, QUDA_PROFILE_MEMCPY_D2D_ASYNC, QUDA_PROFILE_MEMCPY_D2H_ASYNC, QUDA_PROFILE_MEMCPY2D_D2H_ASYNC,
  QUDA_PROFILE_MEMCPY_H2D_ASYNC, QUDA_PROFILE_COMMS_START, QUDA_PROFILE_COMMS_QUERY, QUDA_PROFILE_CONSTANT,
  QUDA_PROFILE_TOTAL, QUDA_PROFILE_COUNT
}
 
enum  ComputeType {
  COMPUTE_UV, COMPUTE_AV, COMPUTE_TMAV, COMPUTE_TMCAV,
  COMPUTE_CLOVER_INV_MAX, COMPUTE_TWISTED_CLOVER_INV_MAX, COMPUTE_VUV, COMPUTE_COARSE_CLOVER,
  COMPUTE_REVERSE_Y, COMPUTE_DIAGONAL, COMPUTE_TMDIAGONAL, COMPUTE_CONVERT,
  COMPUTE_RESCALE, COMPUTE_INVALID
}
 
enum  DslashCoarsePolicy {
  DslashCoarsePolicy::DSLASH_COARSE_BASIC, DslashCoarsePolicy::DSLASH_COARSE_ZERO_COPY_PACK, DslashCoarsePolicy::DSLASH_COARSE_ZERO_COPY_READ, DslashCoarsePolicy::DSLASH_COARSE_ZERO_COPY,
  DslashCoarsePolicy::DSLASH_COARSE_GDR_SEND, DslashCoarsePolicy::DSLASH_COARSE_GDR_RECV, DslashCoarsePolicy::DSLASH_COARSE_GDR, DslashCoarsePolicy::DSLASH_COARSE_ZERO_COPY_PACK_GDR_RECV,
  DslashCoarsePolicy::DSLASH_COARSE_GDR_SEND_ZERO_COPY_READ, DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED
}
 
enum  BiCGstabLUpdateType { BICGSTABL_UPDATE_U = 0, BICGSTABL_UPDATE_R = 1 }
 
enum  libtype {
  libtype::eigen_lib, libtype::magma_lib, libtype::lapack_lib, libtype::mkl_lib,
  libtype::eigen_lib, libtype::magma_lib, libtype::lapack_lib, libtype::mkl_lib
}
 
enum  libtype {
  libtype::eigen_lib, libtype::magma_lib, libtype::lapack_lib, libtype::mkl_lib,
  libtype::eigen_lib, libtype::magma_lib, libtype::lapack_lib, libtype::mkl_lib
}
 
enum  AllocType {
  DEVICE, DEVICE_PINNED, HOST, PINNED,
  MAPPED, N_ALLOC_TYPE
}
 
enum  norm_type_ {
  NORM1, NORM2, ABS_MAX, ABS_MIN,
  NORM1, NORM2, ABS_MAX, ABS_MIN
}
 
enum  norm_type_ {
  NORM1, NORM2, ABS_MAX, ABS_MIN,
  NORM1, NORM2, ABS_MAX, ABS_MIN
}
 

Functions

void checkSpinor (const ColorSpinorField &a, const ColorSpinorField &b)
 
void checkLength (const ColorSpinorField &a, const ColorSpinorField &b)
 
__host__ __device__ double set (double &x)
 
__host__ __device__ double2 set (double2 &x)
 
__host__ __device__ double3 set (double3 &x)
 
__host__ __device__ double4 set (double4 &x)
 
__host__ __device__ void sum (double &a, double &b)
 
__host__ __device__ void sum (double2 &a, double2 &b)
 
__host__ __device__ void sum (double3 &a, double3 &b)
 
__host__ __device__ void sum (double4 &a, double4 &b)
 
std::ostream & operator<< (std::ostream &output, const CloverFieldParam &param)
 
double norm1 (const CloverField &u, bool inverse=false)
 
double norm2 (const CloverField &a, bool inverse=false)
 
void computeClover (CloverField &clover, const GaugeField &gauge, double coeff, QudaFieldLocation location)
 
void copyGenericClover (CloverField &out, const CloverField &in, bool inverse, QudaFieldLocation location, void *Out=0, void *In=0, void *outNorm=0, void *inNorm=0)
 This generic function is used for copying the clover field where in the input and output can be in any order and location. More...
 
void cloverInvert (CloverField &clover, bool computeTraceLog)
 This function compute the Cholesky decomposition of each clover matrix and stores the clover inverse field. More...
 
void cloverRho (CloverField &clover, double rho)
 This function adds a real scalar onto the clover diagonal (only to the direct field not the inverse) More...
 
void computeCloverForce (GaugeField &force, const GaugeField &U, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &p, std::vector< double > &coeff)
 Compute the force contribution from the solver solution fields. More...
 
void computeCloverSigmaOprod (GaugeField &oprod, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &p, std::vector< std::vector< double > > &coeff)
 Compute the outer product from the solver solution fields arising from the diagonal term of the fermion bilinear in direction mu,nu and sum to outer product field. More...
 
void computeCloverSigmaTrace (GaugeField &output, const CloverField &clover, double coeff)
 Compute the matrix tensor field necessary for the force calculation from the clover trace action. This computes a tensor field [mu,nu]. More...
 
void cloverDerivative (cudaGaugeField &force, cudaGaugeField &gauge, cudaGaugeField &oprod, double coeff, QudaParity parity)
 Compute the derivative of the clover matrix in the direction mu,nu and compute the resulting force given the outer-product field. More...
 
template<typename Float , int Nc, int Ns>
__device__ __host__ complex< Float > innerProduct (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b)
 Compute the inner product over color and spin dot = ,c conj(a(s,c)) * b(s,c) More...
 
template<typename Float , int Nc, int Ns>
__device__ __host__ complex< Float > innerProduct (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b, int s)
 
template<typename Float , int Nc, int Ns>
__device__ __host__ complex< Float > innerProduct (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b, int sa, int sb)
 
template<typename Float , int Nc, int Ns>
__device__ __host__ complex< Float > innerProduct (const ColorSpinor< Float, Nc, 1 > &a, const ColorSpinor< Float, Nc, Ns > &b, int s)
 Compute the inner product over color at spin s between a color vector and a color spinor dot = conj(a(c)) * b(s,c) More...
 
template<typename Float , int Nc, int Ns>
__device__ __host__ Matrix< complex< Float >, Nc > outerProdSpinTrace (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b)
 
template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator+ (const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
 ColorSpinor addition operator. More...
 
template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator- (const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
 ColorSpinor subtraction operator. More...
 
template<typename Float , int Nc, int Ns, typename S >
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator* (const S &a, const ColorSpinor< Float, Nc, Ns > &x)
 Compute the scalar-vector product y = a * x. More...
 
template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator* (const Matrix< complex< Float >, Nc > &A, const ColorSpinor< Float, Nc, Ns > &x)
 Compute the matrix-vector product y = A * x. More...
 
template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator* (const HMatrix< Float, Nc *Ns > &A, const ColorSpinor< Float, Nc, Ns > &x)
 Compute the matrix-vector product y = A * x. More...
 
void copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, void *Dst=0, void *Src=0, void *dstNorm=0, void *srcNorm=0)
 
void genericSource (cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c)
 
int genericCompare (const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol)
 
void genericPrintVector (const cpuColorSpinorField &a, unsigned int x)
 
void genericCudaPrintVector (const cudaColorSpinorField &a, unsigned x)
 
void wuppertalStep (ColorSpinorField &out, const ColorSpinorField &in, int parity, const GaugeField &U, double A, double B)
 
void wuppertalStep (ColorSpinorField &out, const ColorSpinorField &in, int parity, const GaugeField &U, double alpha)
 
void exchangeExtendedGhost (cudaColorSpinorField *spinor, int R[], int parity, cudaStream_t *stream_p)
 
void copyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, const int parity, void *Dst, void *Src, void *dstNorm, void *srcNorm)
 
void genericPackGhost (void **ghost, const ColorSpinorField &a, QudaParity parity, int nFace, int dagger, MemoryLocation *destination=nullptr)
 Generic ghost packing routine. More...
 
void spinorNoise (ColorSpinorField &src, RNG &randstates, QudaNoiseType type)
 Generate a random noise spinor. This variant allows the user to manage the RNG state. More...
 
void spinorNoise (ColorSpinorField &src, unsigned long long seed, QudaNoiseType type)
 Generate a random noise spinor. This variant just requires a seed and will create and destroy the random number state. More...
 
QudaPCType PCType_ (const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b)
 Helper function for determining if the preconditioning type of the fields is the same. More...
 
template<typename... Args>
QudaPCType PCType_ (const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b, const Args &... args)
 Helper function for determining if the precision of the fields is the same. More...
 
template<typename ValueType >
__host__ __device__ ValueType cos (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType sin (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType tan (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType acos (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType asin (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType atan (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType atan2 (ValueType x, ValueType y)
 
template<typename ValueType >
__host__ __device__ ValueType cosh (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType sinh (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType tanh (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType exp (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType log (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType log10 (ValueType x)
 
template<typename ValueType , typename ExponentType >
__host__ __device__ ValueType pow (ValueType x, ExponentType e)
 
template<typename ValueType >
__host__ __device__ ValueType sqrt (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType abs (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType conj (ValueType x)
 
template<typename ValueType >
__host__ __device__ ValueType abs (const complex< ValueType > &z)
 Returns the magnitude of z. More...
 
template<typename ValueType >
__host__ __device__ ValueType arg (const complex< ValueType > &z)
 Returns the phase angle of z. More...
 
template<typename ValueType >
__host__ __device__ ValueType norm (const complex< ValueType > &z)
 Returns the magnitude of z squared. More...
 
template<typename ValueType >
__host__ __device__ complex< ValueType > conj (const complex< ValueType > &z)
 Returns the complex conjugate of z. More...
 
template<typename ValueType >
__host__ __device__ complex< ValueType > polar (const ValueType &m, const ValueType &theta=0)
 Returns the complex with magnitude m and angle theta in radians. More...
 
template<typename ValueType >
__host__ __device__ complex< ValueType > operator* (const complex< ValueType > &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > operator* (const complex< ValueType > &lhs, const ValueType &rhs)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > operator* (const ValueType &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > operator/ (const complex< ValueType > &lhs, const complex< ValueType > &rhs)
 
template<>
__host__ __device__ complex< float > operator/ (const complex< float > &lhs, const complex< float > &rhs)
 
template<>
__host__ __device__ complex< double > operator/ (const complex< double > &lhs, const complex< double > &rhs)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > operator+ (const complex< ValueType > &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > operator+ (const complex< ValueType > &lhs, const ValueType &rhs)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > operator+ (const ValueType &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > operator- (const complex< ValueType > &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > operator- (const complex< ValueType > &lhs, const ValueType &rhs)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > operator- (const ValueType &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > operator+ (const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > operator- (const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > cos (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > cosh (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > exp (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > log (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > log10 (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > pow (const complex< ValueType > &z, const int &n)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > pow (const complex< ValueType > &z, const ValueType &x)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > pow (const complex< ValueType > &z, const complex< ValueType > &z2)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > pow (const ValueType &x, const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > sin (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > sinh (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > sqrt (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > tan (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > tanh (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > acos (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > asin (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > atan (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > acosh (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > asinh (const complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > atanh (const complex< ValueType > &z)
 
template<typename ValueType , class charT , class traits >
std::basic_ostream< charT, traits > & operator<< (std::basic_ostream< charT, traits > &os, const complex< ValueType > &z)
 
template<typename ValueType , typename charT , class traits >
std::basic_istream< charT, traits > & operator>> (std::basic_istream< charT, traits > &is, complex< ValueType > &z)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > operator+ (const volatile complex< ValueType > &lhs, const volatile complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > operator/ (const complex< ValueType > &lhs, const ValueType &rhs)
 
template<typename ValueType >
__host__ __device__ complex< ValueType > operator/ (const ValueType &lhs, const complex< ValueType > &rhs)
 
template<>
__host__ __device__ complex< float > operator/ (const float &lhs, const complex< float > &rhs)
 
template<>
__host__ __device__ complex< double > operator/ (const double &lhs, const complex< double > &rhs)
 
template<typename ValueType >
__host__ __device__ bool operator== (const complex< ValueType > &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ bool operator== (const ValueType &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ bool operator== (const complex< ValueType > &lhs, const ValueType &rhs)
 
template<typename ValueType >
__host__ __device__ bool operator!= (const complex< ValueType > &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ bool operator!= (const ValueType &lhs, const complex< ValueType > &rhs)
 
template<typename ValueType >
__host__ __device__ bool operator!= (const complex< ValueType > &lhs, const ValueType &rhs)
 
template<>
__host__ __device__ float abs (const complex< float > &z)
 
template<>
__host__ __device__ double abs (const complex< double > &z)
 
template<>
__host__ __device__ float arg (const complex< float > &z)
 
template<>
__host__ __device__ double arg (const complex< double > &z)
 
template<>
__host__ __device__ complex< float > polar (const float &magnitude, const float &angle)
 
template<>
__host__ __device__ complex< double > polar (const double &magnitude, const double &angle)
 
template<>
__host__ __device__ complex< float > cos (const complex< float > &z)
 
template<>
__host__ __device__ complex< float > cosh (const complex< float > &z)
 
template<>
__host__ __device__ complex< float > exp (const complex< float > &z)
 
template<>
__host__ __device__ complex< float > log (const complex< float > &z)
 
template<>
__host__ __device__ complex< float > pow (const float &x, const complex< float > &exponent)
 
template<>
__host__ __device__ complex< float > sin (const complex< float > &z)
 
template<>
__host__ __device__ complex< float > sinh (const complex< float > &z)
 
template<>
__host__ __device__ complex< float > sqrt (const complex< float > &z)
 
template<typename ValueType >
__host__ __device__ complex< float > atanh (const complex< float > &z)
 
template<typename real >
__host__ __device__ complex< real > cmul (const complex< real > &x, const complex< real > &y)
 
template<typename real >
__host__ __device__ complex< real > cmac (const complex< real > &x, const complex< real > &y, const complex< real > &z)
 
void contractQuda (const ColorSpinorField &x, const ColorSpinorField &y, void *result, QudaContractType cType)
 
template<typename type >
int vecLength ()
 
template<>
int vecLength< char > ()
 
template<>
int vecLength< short > ()
 
template<>
int vecLength< float > ()
 
template<>
int vecLength< double > ()
 
template<>
int vecLength< char2 > ()
 
template<>
int vecLength< short2 > ()
 
template<>
int vecLength< float2 > ()
 
template<>
int vecLength< double2 > ()
 
template<>
int vecLength< char4 > ()
 
template<>
int vecLength< short4 > ()
 
template<>
int vecLength< float4 > ()
 
template<>
int vecLength< double4 > ()
 
__host__ __device__ float s2f (short a)
 
__host__ __device__ double s2d (short a)
 
__host__ __device__ float c2f (char a)
 
__host__ __device__ double c2d (char a)
 
__host__ __device__ float s2f (short a, float c)
 
__host__ __device__ double s2d (short a, double c)
 
__host__ __device__ float c2f (char a, float c)
 
__host__ __device__ double c2d (char a, double c)
 
template<typename FloatN >
__device__ void copyFloatN (FloatN &a, const FloatN &b)
 
__device__ void copyFloatN (float2 &a, const char2 &b)
 
__device__ void copyFloatN (float4 &a, const char4 &b)
 
__device__ void copyFloatN (double2 &a, const char2 &b)
 
__device__ void copyFloatN (double4 &a, const char4 &b)
 
__device__ void copyFloatN (float2 &a, const short2 &b)
 
__device__ void copyFloatN (float4 &a, const short4 &b)
 
__device__ void copyFloatN (double2 &a, const short2 &b)
 
__device__ void copyFloatN (double4 &a, const short4 &b)
 
__device__ void copyFloatN (float2 &a, const double2 &b)
 
__device__ void copyFloatN (double2 &a, const float2 &b)
 
__device__ void copyFloatN (float4 &a, const double4 &b)
 
__device__ void copyFloatN (double4 &a, const float4 &b)
 
__device__ __host__ int f2i (float f)
 
__device__ __host__ int d2i (double d)
 
__device__ void copyFloatN (short2 &a, const float2 &b)
 
__device__ void copyFloatN (short4 &a, const float4 &b)
 
__device__ void copyFloatN (short2 &a, const double2 &b)
 
__device__ void copyFloatN (short4 &a, const double4 &b)
 
__device__ void copyFloatN (char2 &a, const float2 &b)
 
__device__ void copyFloatN (char4 &a, const float4 &b)
 
__device__ void copyFloatN (char2 &a, const double2 &b)
 
__device__ void copyFloatN (char4 &a, const double4 &b)
 
template<typename OutputType , typename InputType >
__device__ void convert (OutputType x[], InputType y[], const int N)
 
template<>
__device__ void convert< float2, short2 > (float2 x[], short2 y[], const int N)
 
template<>
__device__ void convert< float4, short4 > (float4 x[], short4 y[], const int N)
 
template<>
__device__ void convert< double4, double2 > (double4 x[], double2 y[], const int N)
 
template<>
__device__ void convert< double2, double4 > (double2 x[], double4 y[], const int N)
 
template<>
__device__ void convert< float4, float2 > (float4 x[], float2 y[], const int N)
 
template<>
__device__ void convert< float2, float4 > (float2 x[], float4 y[], const int N)
 
template<>
__device__ void convert< short4, float2 > (short4 x[], float2 y[], const int N)
 
template<>
__device__ void convert< float2, short4 > (float2 x[], short4 y[], const int N)
 
template<>
__device__ void convert< float4, short2 > (float4 x[], short2 y[], const int N)
 
template<>
__device__ void convert< short2, float4 > (short2 x[], float4 y[], const int N)
 
template<>
__device__ void convert< short4, double2 > (short4 x[], double2 y[], const int N)
 
template<>
__device__ void convert< double2, short4 > (double2 x[], short4 y[], const int N)
 
template<>
__device__ void convert< double4, short2 > (double4 x[], short2 y[], const int N)
 
template<>
__device__ void convert< short2, double4 > (short2 x[], double4 y[], const int N)
 
template<>
__device__ void convert< float4, double2 > (float4 x[], double2 y[], const int N)
 
template<>
__device__ void convert< double2, float4 > (double2 x[], float4 y[], const int N)
 
template<>
__device__ void convert< double4, float2 > (double4 x[], float2 y[], const int N)
 
template<>
__device__ void convert< float2, double4 > (float2 x[], double4 y[], const int N)
 
template<typename scalar , int n>
__device__ __host__ void zero (vector_type< scalar, n > &v)
 
template<typename scalar , int n>
__device__ __host__ vector_type< scalar, n > operator+ (const vector_type< scalar, n > &a, const vector_type< scalar, n > &b)
 
template<int block_size_x, int block_size_y, typename T , bool do_sum = true, typename Reducer = cub::Sum>
__device__ void reduce2d (ReduceArg< T > arg, const T &in, const int idx=0)
 
template<int block_size, typename T , bool do_sum = true, typename Reducer = cub::Sum>
__device__ void reduce (ReduceArg< T > arg, const T &in, const int idx=0)
 
template<int block_size_x, int block_size_y, typename T >
__device__ void reduceRow (ReduceArg< T > arg, const T &in)
 
void setDiracParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
 
void setDiracSloppyParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
 
template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , int nColor, typename... Args>
void instantiate (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args)
 This instantiate function is used to instantiate the reconstruct types used. More...
 
template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , typename... Args>
void instantiate (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args)
 This instantiate function is used to instantiate the colors. More...
 
template<template< typename, int, QudaReconstructType > class Apply, typename Recon = WilsonReconstruct, typename... Args>
void instantiate (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args)
 This instantiate function is used to instantiate the precisions. More...
 
template<KernelType type>
__host__ __device__ bool doHalo (int dim=-1)
 Helper function to determine if we should do halo computation. More...
 
template<KernelType type>
__host__ __device__ bool doBulk ()
 Helper function to determine if we should do interior computation. More...
 
template<KernelType type, typename Arg >
__host__ __device__ bool isComplete (const Arg &arg, int coord[])
 Helper functon to determine if the application of the derivative in the dslash is complete. More...
 
template<int nDim, QudaPCType pc_type, KernelType kernel_type, typename Arg , int nface_ = 1>
__host__ __device__ int getCoords (int coord[], const Arg &arg, int &idx, int parity, int &dim)
 Compute the space-time coordinates we are at. More...
 
template<int dim, typename Arg >
__host__ __device__ bool inBoundary (const int coord[], const Arg &arg)
 Compute whether the provided coordinate is within the halo region boundary of a given dimension. More...
 
template<KernelType kernel_type, typename Arg >
__device__ bool isActive (bool &active, int threadDim, int offsetDim, const int coord[], const Arg &arg)
 Compute whether this thread should be active for updating the a given offsetDim halo. For non-fused halo update kernels this is a trivial kernel that just checks if the given dimension is partitioned and if so, return true. More...
 
template<typename Float >
std::ostream & operator<< (std::ostream &out, const DslashArg< Float > &arg)
 
void setKernelPackT (bool pack)
 
bool getKernelPackT ()
 
void pushKernelPackT (bool pack)
 
void popKernelPackT ()
 
void setPackComms (const int *dim_pack)
 Helper function that sets which dimensions the packing kernel should be packing for. More...
 
bool getDslashLaunch ()
 
void createDslashEvents ()
 
void destroyDslashEvents ()
 
void ApplyWilson (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
 Driver for applying the Wilson stencil. More...
 
void ApplyWilsonClover (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
 Driver for applying the Wilson-clover stencil. More...
 
void ApplyWilsonCloverPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
 Driver for applying the preconditioned Wilson-clover stencil. More...
 
void ApplyTwistedMass (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
 Driver for applying the twisted-mass stencil. More...
 
void ApplyTwistedMassPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile)
 Driver for applying the preconditioned twisted-mass stencil. More...
 
void ApplyNdegTwistedMass (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
 Driver for applying the non-degenerate twisted-mass stencil. More...
 
void ApplyNdegTwistedMassPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile)
 Driver for applying the preconditioned non-degenerate twisted-mass stencil. More...
 
void ApplyTwistedClover (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
 Driver for applying the twisted-clover stencil. More...
 
void ApplyTwistedCloverPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
 Driver for applying the preconditioned twisted-clover stencil. More...
 
void ApplyDomainWall5D (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_f, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
 Driver for applying the Domain-wall 5-d stencil to a 5-d vector with 5-d preconditioned data order. More...
 
void ApplyDomainWall4D (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_5, const Complex *b_5, const Complex *c_5, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
 Driver for applying the batched Wilson 4-d stencil to a 5-d vector with 4-d preconditioned data order. More...
 
void ApplyDslash5 (ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x, double m_f, double m_5, const Complex *b_5, const Complex *c_5, double a, bool dagger, Dslash5Type type)
 Apply either the domain-wall / mobius Dslash5 operator or the M5 inverse operator. In the current implementation, it is expected that the color-spinor fields are 4-d preconditioned. More...
 
void ApplyLaplace (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int dir, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
 Driver for applying the Laplace stencil. More...
 
void ApplyCovDev (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int mu, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
 Driver for applying the covariant derivative. More...
 
void ApplyClover (ColorSpinorField &out, const ColorSpinorField &in, const CloverField &clover, bool inverse, int parity)
 Apply clover-matrix field to a color-spinor field. More...
 
void ApplyStaggered (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
 Apply the staggered dslash operator to a color-spinor field. More...
 
void ApplyImprovedStaggered (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const GaugeField &L, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
 Apply the improved staggered dslash operator to a color-spinor field. More...
 
void ApplyTwistGamma (ColorSpinorField &out, const ColorSpinorField &in, int d, double kappa, double mu, double epsilon, int dagger, QudaTwistGamma5Type type)
 Apply the twisted-mass gamma operator to a color-spinor field. More...
 
void ApplyTwistClover (ColorSpinorField &out, const ColorSpinorField &in, const CloverField &clover, double kappa, double mu, double epsilon, int parity, int dagger, QudaTwistGamma5Type twist)
 Apply twisted clover-matrix field to a color-spinor field. More...
 
void PackGhost (void *ghost[2 *QUDA_MAX_DIM], const ColorSpinorField &field, MemoryLocation location, int nFace, bool dagger, int parity, bool spin_project, double a, double b, double c, const cudaStream_t &stream)
 Dslash face packing routine. More...
 
void gamma5 (ColorSpinorField &out, const ColorSpinorField &in)
 Applies a gamma5 matrix to a spinor (wrapper to ApplyGamma) More...
 
void arpack_solve (std::vector< ColorSpinorField *> &h_evecs, std::vector< Complex > &h_evals, const DiracMatrix &mat, QudaEigParam *eig_param, TimeProfile &profile)
 The QUDA interface function. One passes two allocated arrays to hold the the eigenmode data, the problem matrix, the arpack parameters defining what problem is to be solves, and a container for QUDA data structure types. More...
 
__device__ __host__ void zero (double &a)
 
__device__ __host__ void zero (double2 &a)
 
__device__ __host__ void zero (double3 &a)
 
__device__ __host__ void zero (double4 &a)
 
__device__ __host__ void zero (float &a)
 
__device__ __host__ void zero (float2 &a)
 
__device__ __host__ void zero (float3 &a)
 
__device__ __host__ void zero (float4 &a)
 
__host__ __device__ double2 operator+ (const double2 &x, const double2 &y)
 
__host__ __device__ double2 operator- (const double2 &x, const double2 &y)
 
__host__ __device__ float2 operator- (const float2 &x, const float2 &y)
 
__host__ __device__ float4 operator- (const float4 &x, const float4 &y)
 
__host__ __device__ double3 operator+ (const double3 &x, const double3 &y)
 
__host__ __device__ double4 operator+ (const double4 &x, const double4 &y)
 
__host__ __device__ float4 operator* (const float a, const float4 x)
 
__host__ __device__ float2 operator* (const float a, const float2 x)
 
__host__ __device__ double2 operator* (const double a, const double2 x)
 
__host__ __device__ double4 operator* (const double a, const double4 x)
 
__host__ __device__ float2 operator+ (const float2 x, const float2 y)
 
__host__ __device__ float4 operator+ (const float4 x, const float4 y)
 
__host__ __device__ float4 operator+= (float4 &x, const float4 y)
 
__host__ __device__ float2 operator+= (float2 &x, const float2 y)
 
__host__ __device__ double2 operator+= (double2 &x, const double2 y)
 
__host__ __device__ double3 operator+= (double3 &x, const double3 y)
 
__host__ __device__ double4 operator+= (double4 &x, const double4 y)
 
__host__ __device__ float4 operator-= (float4 &x, const float4 y)
 
__host__ __device__ float2 operator-= (float2 &x, const float2 y)
 
__host__ __device__ double2 operator-= (double2 &x, const double2 y)
 
__host__ __device__ float2 operator*= (float2 &x, const float a)
 
__host__ __device__ double2 operator*= (double2 &x, const float a)
 
__host__ __device__ float4 operator*= (float4 &a, const float &b)
 
__host__ __device__ double2 operator*= (double2 &a, const double &b)
 
__host__ __device__ double4 operator*= (double4 &a, const double &b)
 
__host__ __device__ float2 operator- (const float2 &x)
 
__host__ __device__ double2 operator- (const double2 &x)
 
__forceinline__ __host__ __device__ float max_fabs (const float4 &c)
 
__forceinline__ __host__ __device__ float max_fabs (const float2 &b)
 
__forceinline__ __host__ __device__ double max_fabs (const double4 &c)
 
__forceinline__ __host__ __device__ double max_fabs (const double2 &b)
 
__forceinline__ __host__ __device__ float2 make_FloatN (const double2 &a)
 
__forceinline__ __host__ __device__ float4 make_FloatN (const double4 &a)
 
__forceinline__ __host__ __device__ double2 make_FloatN (const float2 &a)
 
__forceinline__ __host__ __device__ double4 make_FloatN (const float4 &a)
 
__forceinline__ __host__ __device__ short4 make_shortN (const char4 &a)
 
__forceinline__ __host__ __device__ short2 make_shortN (const char2 &a)
 
__forceinline__ __host__ __device__ short4 make_shortN (const float4 &a)
 
__forceinline__ __host__ __device__ short2 make_shortN (const float2 &a)
 
__forceinline__ __host__ __device__ short4 make_shortN (const double4 &a)
 
__forceinline__ __host__ __device__ short2 make_shortN (const double2 &a)
 
__forceinline__ __host__ __device__ char4 make_charN (const short4 &a)
 
__forceinline__ __host__ __device__ char2 make_charN (const short2 &a)
 
__forceinline__ __host__ __device__ char4 make_charN (const float4 &a)
 
__forceinline__ __host__ __device__ char2 make_charN (const float2 &a)
 
__forceinline__ __host__ __device__ char4 make_charN (const double4 &a)
 
__forceinline__ __host__ __device__ char2 make_charN (const double2 &a)
 
template<typename Float2 , typename Complex >
Float2 make_Float2 (const Complex &a)
 
template<>
double2 make_Float2 (const complex< double > &a)
 
template<>
double2 make_Float2 (const complex< float > &a)
 
template<>
float2 make_Float2 (const complex< double > &a)
 
template<>
float2 make_Float2 (const complex< float > &a)
 
template<>
double2 make_Float2 (const std::complex< double > &a)
 
template<>
double2 make_Float2 (const std::complex< float > &a)
 
template<>
float2 make_Float2 (const std::complex< double > &a)
 
template<>
float2 make_Float2 (const std::complex< float > &a)
 
complex< double > make_Complex (const double2 &a)
 
complex< float > make_Complex (const float2 &a)
 
std::ostream & operator<< (std::ostream &output, const GaugeFieldParam &param)
 
double norm1 (const GaugeField &u)
 This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L1 norm. More...
 
double norm2 (const GaugeField &u)
 This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L2 norm. More...
 
void ax (const double &a, GaugeField &u)
 Scale the gauge field by the scalar a. More...
 
void copyGenericGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0, void **ghostOut=0, void **ghostIn=0, int type=0)
 
void copyExtendedGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0)
 
void extractGaugeGhost (const GaugeField &u, void **ghost, bool extract=true, int offset=0)
 
void extractExtendedGaugeGhost (const GaugeField &u, int dim, const int *R, void **ghost, bool extract)
 
void applyGaugePhase (GaugeField &u)
 
uint64_t Checksum (const GaugeField &u, bool mini=false)
 
void gaugeForce (GaugeField &mom, const GaugeField &u, double coeff, int ***input_path, int *length, double *path_coeff, int num_paths, int max_length)
 Compute the gauge-force contribution to the momentum. More...
 
double3 plaquette (const GaugeField &U)
 Compute the plaquette of the gauge field. More...
 
void gaugeGauss (GaugeField &U, RNG &rngstate, double epsilon)
 Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder). More...
 
void gaugeGauss (GaugeField &U, unsigned long long seed, double epsilon)
 Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder). More...
 
void APEStep (GaugeField &dataDs, const GaugeField &dataOr, double alpha)
 Apply APE smearing to the gauge field. More...
 
void STOUTStep (GaugeField &dataDs, const GaugeField &dataOr, double rho)
 Apply STOUT smearing to the gauge field. More...
 
void OvrImpSTOUTStep (GaugeField &dataDs, const GaugeField &dataOr, double rho, double epsilon)
 Apply Over Improved STOUT smearing to the gauge field. More...
 
void gaugefixingOVR (cudaGaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta)
 Gauge fixing with overrelaxation with support for single and multi GPU. More...
 
void gaugefixingFFT (cudaGaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double alpha, const int autotune, const double tolerance, const int stopWtheta)
 Gauge fixing with Steepest descent method with FFTs with support for single GPU only. More...
 
void computeFmunu (GaugeField &Fmunu, const GaugeField &gauge)
 Compute the Fmunu tensor. More...
 
double computeQCharge (const GaugeField &Fmunu)
 Compute the topological charge. More...
 
double computeQChargeDensity (const GaugeField &Fmunu, void *result)
 Compute the topological charge density per lattice site. More...
 
void updateGaugeField (GaugeField &out, double dt, const GaugeField &in, const GaugeField &mom, bool conj_mom, bool exact)
 
template<typename I , typename J , typename K >
static __device__ __host__ int linkIndexShift (const I x[], const J dx[], const K X[4])
 
template<typename I , typename J , typename K >
static __device__ __host__ int linkIndexShift (I y[], const I x[], const J dx[], const K X[4])
 
template<typename I >
static __device__ __host__ int linkIndex (const int x[], const I X[4])
 
template<typename I >
static __device__ __host__ int linkIndex (int y[], const int x[], const I X[4])
 
template<typename I , int n>
static __device__ __host__ int linkIndexDn (const int x[], const I X[4], const int mu)
 
template<typename I >
static __device__ __host__ int linkIndexM1 (const int x[], const I X[4], const int mu)
 
template<typename I >
static __device__ __host__ int linkIndexM3 (const int x[], const I X[4], const int mu)
 
template<typename I >
static __device__ __host__ int linkNormalIndexP1 (const int x[], const I X[4], const int mu)
 
template<typename I >
static __device__ __host__ int linkIndexP1 (const int x[], const I X[4], const int mu)
 
template<typename I >
static __device__ __host__ int linkIndexP3 (const int x[], const I X[4], const int mu)
 
template<int nDim = 4, typename Arg >
static __device__ __host__ int getNeighborIndexCB (const int x[], int mu, int dir, const Arg &arg)
 Compute the checkerboard 1-d index for the nearest neighbor. More...
 
template<typename I , typename J >
static __device__ __host__ void getCoordsCB (int x[], int cb_index, const I X[], J X0h, int parity)
 
template<typename I >
static __device__ __host__ void getCoords (int x[], int cb_index, const I X[], int parity)
 
template<typename I , typename J >
static __device__ __host__ void getCoordsExtended (I x[], int cb_index, const J X[], int parity, const int R[])
 
template<typename I , typename J >
static __device__ __host__ void getCoords5CB (int x[5], int cb_index, const I X[5], J X0h, int parity, QudaPCType pc_type)
 
template<typename I >
static __device__ __host__ void getCoords5 (int x[5], int cb_index, const I X[5], int parity, QudaPCType pc_type)
 
template<typename I >
static __device__ __host__ int getIndexFull (int cb_index, const I X[4], int parity)
 
template<int dir, int nDim = 4, typename I >
__device__ __host__ int ghostFaceIndex (const int x_[], const I X_[], int dim, int nFace)
 
template<int dir, int nDim = 4, typename I >
__device__ __host__ int ghostFaceIndexStaggered (const int x_[], const I X_[], int dim, int nFace)
 
template<int nDim, QudaPCType type, int dim_, int nLayers, typename Int , typename Arg >
__device__ __host__ void coordsFromFaceIndex (int &idx, int &cb_idx, Int *const x, int face_idx, const int &face_num, int parity, const Arg &arg)
 Compute the full-lattice coordinates from the input face index. This is used by the Wilson-like halo update kernels, and can deal with 4-d or 5-d field and 4-d or 5-d preconditioning. More...
 
template<int nDim, QudaPCType type, int dim_, int nLayers, typename Int , typename Arg >
__device__ __host__ void coordsFromFaceIndex (int &idx, int &cb_idx, Int *const x, int face_idx, const int &face_num, const Arg &arg)
 Overloaded variant of indexFromFaceIndex where we use the parity declared in arg. More...
 
template<int nDim, QudaPCType type, int dim, int nLayers, int face_num, typename Arg >
__device__ __host__ int indexFromFaceIndex (int face_idx, int parity, const Arg &arg)
 Compute the checkerboard lattice index from the input face index. This is used by the Wilson-like halo packing kernels, and can deal with 4-d or 5-d field and 4-d or 5-d preconditioning. More...
 
template<int nDim, QudaPCType type, int dim, int nLayers, int face_num, typename Arg >
__device__ __host__ int indexFromFaceIndex (int face_idx, const Arg &arg)
 Overloaded variant of indexFromFaceIndex where we use the parity declared in arg. More...
 
template<int nDim, QudaPCType type, int dim, int nLayers, int face_num, typename Arg >
static __device__ int indexFromFaceIndexStaggered (int face_idx_in, int parity, const Arg &arg)
 Compute global checkerboard index from face index. The following indexing routines work for arbitrary lattice dimensions (though perhaps not odd like thw Wilson variant?) Specifically, we compute an index into the local volume from an index into the face. This is used by the staggered-like face packing routines, and is different from the Wilson variant since here the halo depth is tranversed in a different order - here the halo depth is the faster running dimension. More...
 
template<int nDim = 4, typename Arg >
__host__ __device__ int dimFromFaceIndex (int &face_idx, int tid, const Arg &arg)
 Determines which face a given thread is computing. Also rescale face_idx so that is relative to a given dimension. If 5-d variant if called, then it is assumed that arg.threads contains only the 3-d surface of threads but face_idx is a 4-d index (surface * fifth dimension). At present multi-src staggered uses the 4-d variant since the face_idx that is passed in is the 3-d surface not the 4-d one. More...
 
template<int nDim = 4, typename Arg >
__host__ __device__ int dimFromFaceIndex (int &face_idx, const Arg &arg)
 
template<typename T >
__device__ int block_idx (const T &swizzle)
 Swizzler for reordering the (x) thread block indices - use on conjunction with swizzle-factor autotuning to find the optimum swizzle factor. Specfically, the thread block id is remapped by transposing its coordinates: if the original order can be parametrized by. More...
 
template<typename Arg >
__device__ __host__ auto StaggeredPhase (const int coords[], int dim, int dir, const Arg &arg) -> typename Arg::real
 Compute the staggered phase factor at unit shift from the current lattice coordinates. The routine below optimizes out the shift where possible, hence is only visible where we need to consider the boundary condition. More...
 
__device__ void load_streaming_double2 (double2 &a, const double2 *addr)
 
__device__ void load_streaming_float4 (float4 &a, const float4 *addr)
 
__device__ void load_cached_short4 (short4 &a, const short4 *addr)
 
__device__ void load_cached_short2 (short2 &a, const short2 *addr)
 
__device__ void load_global_short4 (short4 &a, const short4 *addr)
 
__device__ void load_global_short2 (short2 &a, const short2 *addr)
 
__device__ void load_global_float4 (float4 &a, const float4 *addr)
 
__device__ void store_streaming_float4 (float4 *addr, float x, float y, float z, float w)
 
__device__ void store_streaming_short4 (short4 *addr, short x, short y, short z, short w)
 
__device__ void store_streaming_double2 (double2 *addr, double x, double y)
 
__device__ void store_streaming_float2 (float2 *addr, float x, float y)
 
__device__ void store_streaming_short2 (short2 *addr, short x, short y)
 
template<int nColor, typename sumType , typename real >
__device__ __host__ void colorInnerProduct (complex< sumType > &dot, int i, complex< real > v[nColor], complex< real > w[nColor])
 
template<int nColor, typename sumType , typename real >
__device__ __host__ void colorNorm (sumType &nrm, complex< real > v[nColor])
 
template<typename real , int nColor>
__device__ __host__ void colorScaleSubtract (complex< real > v[nColor], complex< real > a, complex< real > w[nColor])
 
template<typename real , int nColor>
__device__ __host__ void colorScale (complex< real > v[nColor], real a)
 
template<typename sumFloat , typename Float , int nSpin, int spinBlockSize, int nColor, int coarseSpin, int nVec, typename Arg >
void blockOrthoCPU (Arg &arg)
 
template<int block_size, typename sumFloat , typename Float , int nSpin, int spinBlockSize, int nColor, int coarseSpin, int nVec, typename Arg >
 __launch_bounds__ (2 *block_size) __global__ void blockOrthoGPU(Arg arg)
 
template<typename real , typename Link >
__device__ void axpy (real a, const real *x, Link &y)
 
template<typename real , typename Link >
__device__ void operator+= (real *y, const Link &x)
 
template<typename real , typename Link >
__device__ void operator-= (real *y, const Link &x)
 
template<typename real , typename Arg , typename Link >
__device__ void computeForce (LINK force, Arg &arg, int xIndex, int yIndex, int mu, int nu)
 
template<typename real , typename Arg >
__global__ void cloverDerivativeKernel (Arg arg)
 
template<typename Float , typename Arg , bool computeTrLog, bool twist>
__device__ __host__ double cloverInvertCompute (Arg &arg, int x_cb, int parity)
 
template<typename Float , typename Arg , bool computeTrLog, bool twist>
void cloverInvert (Arg &arg)
 
template<int blockSize, typename Float , typename Arg , bool computeTrLog, bool twist>
__global__ void cloverInvertKernel (Arg arg)
 
template<typename real , int nvector, int mu, int nu, int parity, typename Arg >
__device__ void sigmaOprod (Arg &arg, int idx)
 
template<int nvector, typename real , typename Arg >
__global__ void sigmaOprodKernel (Arg arg)
 
template<typename Float >
__device__ __host__ void caxpy (const complex< Float > &a, const complex< Float > &x, complex< Float > &y)
 
template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Wtype , typename Arg >
__device__ __host__ void computeUV (Arg &arg, const Wtype &W, int parity, int x_cb, int ic_c)
 
template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >
void ComputeUVCPU (Arg &arg)
 
template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >
__global__ void ComputeUVGPU (Arg arg)
 
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__device__ __host__ void computeAV (Arg &arg, int parity, int x_cb, int ch, int ic_c)
 
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
void ComputeAVCPU (Arg &arg)
 
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__global__ void ComputeAVGPU (Arg arg)
 
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__device__ __host__ void computeTMAV (Arg &arg, int parity, int x_cb, int v)
 
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
void ComputeTMAVCPU (Arg &arg)
 
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__global__ void ComputeTMAVGPU (Arg arg)
 
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__device__ __host__ void computeTMCAV (Arg &arg, int parity, int x_cb, int ch, int ic_c)
 
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
void ComputeTMCAVCPU (Arg &arg)
 
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__global__ void ComputeTMCAVGPU (Arg arg)
 
template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg , typename Gamma >
__device__ __host__ void multiplyVUV (complex< Float > vuv[], const Arg &arg, const Gamma &gamma, int parity, int x_cb, int ic_c, int jc_c)
 Do a single (AV)^ * UV product, where for preconditioned clover, AV correspond to the clover inverse multiplied by the packed null space vectors, else AV is simply the packed null space vectors. More...
 
template<typename Arg >
__device__ __host__ int virtualThreadIdx (const Arg &arg)
 
template<typename Arg >
__device__ __host__ int virtualBlockDim (const Arg &arg)
 
template<typename Arg >
__device__ __host__ int coarseIndex (const Arg &arg)
 
template<bool shared_atomic, bool parity_flip, bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg , typename Gamma >
__device__ __host__ void computeVUV (Arg &arg, const Gamma &gamma, int parity, int x_cb, int c_row, int c_col, int parity_coarse_, int coarse_x_cb_)
 
template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >
void ComputeVUVCPU (Arg arg)
 
template<bool parity_flip, typename Arg >
__device__ void getIndicesShared (const Arg &arg, int &parity, int &x_cb, int &parity_coarse, int &x_coarse_cb, int &c_col, int &c_row)
 
template<bool parity_flip, typename Arg >
__device__ void getIndicesGlobal (const Arg &arg, int &parity, int &x_cb, int &parity_coarse, int &x_coarse_cb, int &c_col, int &c_row)
 
template<bool shared_atomic, bool parity_flip, bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >
__global__ void ComputeVUVGPU (Arg arg)
 
template<typename Float , int nSpin, int nColor, typename Arg >
__device__ __host__ void computeYreverse (Arg &arg, int parity, int x_cb, int ic_c, int jc_c)
 
template<typename Float , int nSpin, int nColor, typename Arg >
void ComputeYReverseCPU (Arg &arg)
 
template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void ComputeYReverseGPU (Arg arg)
 
template<bool from_coarse, typename Float , int fineSpin, int coarseSpin, int fineColor, int coarseColor, typename Arg >
__device__ __host__ void computeCoarseClover (Arg &arg, int parity, int x_cb, int ic_c, int jc_c)
 
template<bool from_coarse, typename Float , int fineSpin, int coarseSpin, int fineColor, int coarseColor, typename Arg >
void ComputeCoarseCloverCPU (Arg &arg)
 
template<bool from_coarse, typename Float , int fineSpin, int coarseSpin, int fineColor, int coarseColor, typename Arg >
__global__ void ComputeCoarseCloverGPU (Arg arg)
 
template<typename Float , int nSpin, int nColor, typename Arg >
void AddCoarseDiagonalCPU (Arg &arg)
 
template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void AddCoarseDiagonalGPU (Arg arg)
 
template<typename Float , int nSpin, int nColor, typename Arg >
void AddCoarseTmDiagonalCPU (Arg &arg)
 
template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void AddCoarseTmDiagonalGPU (Arg arg)
 
template<typename Float , int nSpin, int nColor, typename Arg >
__device__ __host__ void convert (Arg &arg, int parity, int x_cb, int c_row, int c_col)
 
template<typename Float , int nSpin, int nColor, typename Arg >
void ConvertCPU (Arg &arg)
 
template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void ConvertGPU (Arg arg)
 
template<typename Float , int nSpin, int nColor, typename Arg >
__device__ __host__ void rescaleY (Arg &arg, int parity, int x_cb, int c_row, int c_col)
 
template<typename Float , int nSpin, int nColor, typename Arg >
void RescaleYCPU (Arg &arg)
 
template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void RescaleYGPU (Arg arg)
 
template<typename Float , int n, bool compute_max_only, typename Arg >
__device__ __host__ Float computeYhat (Arg &arg, int d, int x_cb, int parity, int i, int j)
 
template<typename Float , int n, bool compute_max_only, typename Arg >
void CalculateYhatCPU (Arg &arg)
 
template<typename Float , int n, bool compute_max_only, typename Arg >
__global__ void CalculateYhatGPU (Arg arg)
 
template<typename Float , int Ns, int Ms, int Nc, int Mc, typename Arg >
__device__ __host__ __forceinline__ Float compute_site_max (Arg &arg, int x_cb, int parity, int spinor_parity, int spin_block, int color_block, bool active)
 
template<typename Float , bool block_float, int Ns, int Ms, int Nc, int Mc, int nDim, int dim, int dir, typename Arg >
__device__ __host__ __forceinline__ void packGhost (Arg &arg, int x_cb, int parity, int spinor_parity, int spin_block, int color_block)
 
template<typename Float , bool block_float, int Ns, int Ms, int Nc, int Mc, int nDim, typename Arg >
void GenericPackGhost (Arg &arg)
 
template<typename Float , bool block_float, int Ns, int Ms, int Nc, int Mc, int nDim, int dim_threads, typename Arg >
__global__ void GenericPackGhostKernel (Arg arg)
 
template<typename real , typename Arg >
__global__ void computeColorContraction (Arg arg)
 
template<typename real , typename Arg >
__global__ void computeDegrandRossiContraction (Arg arg)
 
template<typename FloatOut , typename FloatIn , int length, typename Arg >
void copyGauge (Arg &arg)
 
template<typename Float , int length, typename Arg >
void checkNan (Arg &arg)
 
template<typename FloatOut , typename FloatIn , int length, typename Arg >
__global__ void copyGaugeKernel (Arg arg)
 
template<typename FloatOut , typename FloatIn , int length, typename Arg >
void copyGhost (Arg &arg)
 
template<typename FloatOut , typename FloatIn , int length, typename Arg >
__global__ void copyGhostKernel (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, int mu, typename Arg , typename Vector >
__device__ __host__ void applyCovDev (Vector &out, Arg &arg, int coord[nDim], int x_cb, int parity, int idx, int thread_dim, bool &active)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
__device__ __host__ void covDev (Arg &arg, int idx, int parity)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void covDevGPU (Arg arg)
 
template<DslashType type>
static __host__ __device__ bool doHalo ()
 Helper function to determine if should halo computation. More...
 
template<DslashType type>
static __host__ __device__ bool doBulk ()
 Helper function to determine if should interior computation. More...
 
template<typename Float , int nDim, int Ns, int Nc, int Mc, int color_stride, int dim_stride, int thread_dir, int thread_dim, bool dagger, DslashType type, typename Arg >
__device__ __host__ void applyDslash (complex< Float > out[], Arg &arg, int x_cb, int src_idx, int parity, int s_row, int color_block, int color_offset)
 
template<typename Float , int Ns, int Nc, int Mc, int color_stride, bool dagger, typename Arg >
__device__ __host__ void applyClover (complex< Float > out[], Arg &arg, int x_cb, int src_idx, int parity, int s, int color_block, int color_offset)
 
template<typename Float , int nDim, int Ns, int Nc, int Mc, int color_stride, int dim_thread_split, bool dslash, bool clover, bool dagger, DslashType type, int dir, int dim, typename Arg >
__device__ __host__ void coarseDslash (Arg &arg, int x_cb, int src_idx, int parity, int s, int color_block, int color_offset)
 
template<typename Float , int nDim, int Ns, int Nc, int Mc, bool dslash, bool clover, bool dagger, DslashType type, typename Arg >
void coarseDslash (Arg arg)
 
template<typename Float , int nDim, int Ns, int Nc, int Mc, int color_stride, int dim_thread_split, bool dslash, bool clover, bool dagger, DslashType type, typename Arg >
__global__ void coarseDslashKernel (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void domainWall4D (Arg &arg, int idx, int s, int parity)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void domainWall4DCPU (Arg &arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void domainWall4DGPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void domainWall5D (Arg &arg, int idx, int parity)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void domainWall5DCPU (Arg &arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void domainWall5DGPU (Arg arg)
 
template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, typename Arg >
__device__ __host__ void dslash5 (Arg &arg, int parity, int x_cb, int s)
 Apply the D5 operator at given site. More...
 
template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, typename Arg >
void dslash5CPU (Arg &arg)
 CPU kernel for applying the D5 operator. More...
 
template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, typename Arg >
__global__ void dslash5GPU (Arg arg)
 GPU kernel for applying the D5 operator. More...
 
template<typename real , int nColor, bool dagger, Dslash5Type type, bool shared, typename Vector , typename Arg >
__device__ __host__ Vector constantInv (Arg &arg, int parity, int x_cb, int s_)
 Apply the M5 inverse operator at a given site on the lattice. This is the original algorithm as described in Kim and Izubushi (LATTICE 2013_033), where the b and c coefficients are constant along the Ls dimension, so is suitable for Shamir and Mobius domain-wall fermions. More...
 
template<typename real , int nColor, bool dagger, Dslash5Type type, bool shared, typename Vector , typename Arg >
__device__ __host__ Vector variableInv (Arg &arg, int parity, int x_cb, int s_)
 Apply the M5 inverse operator at a given site on the lattice. This is an alternative algorithm that is applicable to variable b and c coefficients: here each thread in the s dimension starts computing at s = s_, and computes the left- and right-handed contributions in two separate passes. For the left-handed contribution we sweep through increasing s, e.g., s=s_, s_+1, s_+2, and for the right-handed one we do the transpose, s=s_, s_-1, s_-2. This allows us to progressively build up the scalar coefficients needed in a SIMD-friendly fashion. More...
 
template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, bool shared, bool var_inverse, typename Arg >
__device__ __host__ void dslash5inv (Arg &arg, int parity, int x_cb, int s)
 Apply the M5 inverse operator at a given site on the lattice. More...
 
template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, bool shared, bool var_inverse, typename Arg >
__global__ void dslash5invGPU (Arg arg)
 CPU kernel for applying the M5 inverse operator. More...
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
__device__ __host__ void ndegTwistedMass (Arg &arg, int idx, int flavor, int parity)
 Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1)*x Note this routine only exists in xpay form. More...
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
void ndegTwistedMassCPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void ndegTwistedMassGPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool asymmetric, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void ndegTwistedMass (Arg &arg, int idx, int flavor, int parity)
 Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1)*x Note this routine only exists in xpay form. More...
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void ndegTwistedMassPreconditionedCPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void ndegTwistedMassPreconditionedGPU (Arg arg)
 
template<bool dagger, int twist, int dim, QudaPCType pc, typename Arg >
__device__ __host__ void pack (Arg &arg, int ghost_idx, int s, int parity)
 
template<int dim, int nFace = 1, typename Arg >
__device__ __host__ void packStaggered (Arg &arg, int ghost_idx, int s, int parity)
 
template<bool dagger, int twist, QudaPCType pc, typename Arg >
__global__ void packKernel (Arg arg)
 
template<bool dagger, int twist, QudaPCType pc, typename Arg >
__global__ void packShmemKernel (Arg arg)
 
template<typename Arg >
__global__ void packStaggeredKernel (Arg arg)
 
template<typename Arg >
__global__ void packStaggeredShmemKernel (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg , typename Vector >
__device__ __host__ void applyStaggered (Vector &out, Arg &arg, int coord[nDim], int x_cb, int parity, int idx, int thread_dim, bool &active)
 Applies the off-diagonal part of the Staggered / Asqtad operator. More...
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void staggered (Arg &arg, int idx, int parity)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void staggeredGPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void twistedClover (Arg &arg, int idx, int parity)
 Apply the preconditioned twisted-clover dslash. More...
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void twistedCloverPreconditionedCPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void twistedCloverPreconditionedGPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
__device__ __host__ void twistedMass (Arg &arg, int idx, int parity)
 Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5)*x Note this routine only exists in xpay form. More...
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
void twistedMassCPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void twistedMassGPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, int twist, KernelType kernel_type, typename Arg , typename Vector >
__device__ __host__ void applyWilsonTM (Vector &out, Arg &arg, int coord[nDim], int x_cb, int s, int parity, int idx, int thread_dim, bool &active)
 Applies the off-diagonal part of the Wilson operator premultiplied by twist rotation - this is required for applying the symmetric preconditioned twisted-mass dagger operator. More...
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool asymmetric, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void twistedMass (Arg &arg, int idx, int parity)
 Apply the preconditioned twisted-mass dslash. More...
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void twistedMassPreconditionedCPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void twistedMassPreconditionedGPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg , typename Vector >
__device__ __host__ void applyWilson (Vector &out, Arg &arg, int coord[nDim], int x_cb, int s, int parity, int idx, int thread_dim, bool &active)
 Applies the off-diagonal part of the Wilson operator. More...
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void wilson (Arg &arg, int idx, int s, int parity)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void wilsonCPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void wilsonGPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
__device__ __host__ void wilsonClover (Arg &arg, int idx, int parity)
 Apply the Wilson-clover dslash out(x) = M*in = A(x)*x(x) + D * in(x-mu) Note this routine only exists in xpay form. More...
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void wilsonCloverCPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void wilsonCloverGPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void wilsonClover (Arg &arg, int idx, int parity)
 Apply the clover preconditioned Wilson dslash. More...
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void wilsonCloverPreconditionedCPU (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void wilsonCloverPreconditionedGPU (Arg arg)
 
template<int mu, int nu, typename Float , typename Arg >
__device__ __host__ __forceinline__ void computeFmunuCore (Arg &arg, int idx, int parity)
 
template<typename Float , typename Arg >
__global__ void computeFmunuKernel (Arg arg)
 
template<typename Float , typename Arg >
void computeFmunuCPU (Arg &arg)
 
template<typename Float , typename Arg , typename Link >
__host__ __device__ void computeStaple (Arg &arg, int idx, int parity, int dir, Link &staple)
 
template<typename Float , typename Arg >
__global__ void computeAPEStep (Arg arg)
 
template<typename Float , typename Arg >
__device__ double plaquette (Arg &arg, int x[], int parity, int mu, int nu)
 
template<int blockSize, typename Float , typename Gauge >
__global__ void computePlaq (GaugePlaqArg< Gauge > arg)
 
template<int blockSize, typename Float , typename Arg >
__global__ void qChargeComputeKernel (Arg arg)
 
template<typename Float , typename Arg >
__global__ void computeSTOUTStep (Arg arg)
 
template<typename Float , typename Arg , typename Link >
__host__ __device__ void computeStapleRectangle (Arg &arg, int idx, int parity, int dir, Link &staple, Link &rectangle)
 
template<typename Float , typename Arg >
__global__ void computeOvrImpSTOUTStep (Arg arg)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, int dir, typename Arg , typename Vector >
__device__ __host__ void applyLaplace (Vector &out, Arg &arg, int coord[nDim], int x_cb, int parity, int idx, int thread_dim, bool &active)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void laplace (Arg &arg, int idx, int parity)
 
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void laplaceGPU (Arg arg)
 
template<typename Float , int fineSpin, int fineColor, int coarseColor, int coarse_colors_per_thread, class FineColor , class Rotator >
__device__ __host__ void rotateCoarseColor (complex< Float > out[fineSpin *coarse_colors_per_thread], const FineColor &in, const Rotator &V, int parity, int nParity, int x_cb, int coarse_color_block)
 
template<typename Float , int fineSpin, int fineColor, int coarseSpin, int coarseColor, int coarse_colors_per_thread, typename Arg >
void Restrict (Arg arg)
 
template<int block_size, typename Float , int fineSpin, int fineColor, int coarseSpin, int coarseColor, int coarse_colors_per_thread, typename Arg >
__global__ void RestrictKernel (Arg arg)
 
void completeKSForce (GaugeField &mom, const GaugeField &oprod, const GaugeField &gauge, QudaFieldLocation location, long long *flops=NULL)
 
std::ostream & operator<< (std::ostream &output, const LatticeFieldParam &param)
 
QudaFieldLocation Location_ (const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b)
 Helper function for determining if the location of the fields is the same. More...
 
template<typename... Args>
QudaFieldLocation Location_ (const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b, const Args &... args)
 Helper function for determining if the location of the fields is the same. More...
 
QudaPrecision Precision_ (const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b)
 Helper function for determining if the precision of the fields is the same. More...
 
template<typename... Args>
QudaPrecision Precision_ (const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b, const Args &... args)
 Helper function for determining if the precision of the fields is the same. More...
 
QudaFieldLocation reorder_location ()
 Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION. More...
 
void reorder_location_set (QudaFieldLocation reorder_location_)
 Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION. More...
 
const char * compile_type_str (const LatticeField &meta, QudaFieldLocation location_=QUDA_INVALID_FIELD_LOCATION)
 Helper function for setting auxilary string. More...
 
void fatLongKSLink (cudaGaugeField *fat, cudaGaugeField *lng, const cudaGaugeField &gauge, const double *coeff)
 Compute the fat and long links for an improved staggered (Kogut-Susskind) fermions. More...
 
void printPeakMemUsage ()
 
void assertAllMemFree ()
 
long device_allocated_peak ()
 
long pinned_allocated_peak ()
 
long mapped_allocated_peak ()
 
long host_allocated_peak ()
 
void * device_malloc_ (const char *func, const char *file, int line, size_t size)
 
void * device_pinned_malloc_ (const char *func, const char *file, int line, size_t size)
 
void * safe_malloc_ (const char *func, const char *file, int line, size_t size)
 
void * pinned_malloc_ (const char *func, const char *file, int line, size_t size)
 
void * mapped_malloc_ (const char *func, const char *file, int line, size_t size)
 
void device_free_ (const char *func, const char *file, int line, void *ptr)
 
void device_pinned_free_ (const char *func, const char *file, int line, void *ptr)
 
void host_free_ (const char *func, const char *file, int line, void *ptr)
 
constexpr const char * str_end (const char *str)
 
constexpr bool str_slant (const char *str)
 
constexpr const char * r_slant (const char *str)
 
constexpr const char * file_name (const char *str)
 
QudaFieldLocation get_pointer_location (const void *ptr)
 
bool is_aligned (const void *ptr, size_t alignment)
 
template<typename real >
__device__ __host__ real __fast_pow (real a, int b)
 
double computeMomAction (const GaugeField &mom)
 Compute and return global the momentum action 1/2 mom^2. More...
 
void updateMomentum (GaugeField &mom, double coeff, GaugeField &force, const char *fname)
 
void applyU (GaugeField &force, GaugeField &U)
 
bool forceMonitor ()
 Whether we are monitoring the force or not. More...
 
void flushForceMonitor ()
 Flush any outstanding force monitoring information. More...
 
void ApplyCoarse (ColorSpinorField &out, const ColorSpinorField &inA, const ColorSpinorField &inB, const GaugeField &Y, const GaugeField &X, double kappa, int parity=QUDA_INVALID_PARITY, bool dslash=true, bool clover=true, bool dagger=false, const int *commDim=0, QudaPrecision halo_precision=QUDA_INVALID_PRECISION)
 Apply the coarse dslash stencil. This single driver accounts for all variations with and without the clover field, with and without dslash, and both single and full parity fields. More...
 
void CoarseOp (GaugeField &Y, GaugeField &X, const Transfer &T, const cudaGaugeField &gauge, const cudaCloverField *clover, double kappa, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc)
 Coarse operator construction from a fine-grid operator (Wilson / Clover) More...
 
void CoarseCoarseOp (GaugeField &Y, GaugeField &X, const Transfer &T, const GaugeField &gauge, const GaugeField &clover, const GaugeField &cloverInv, double kappa, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc, bool need_bidirectional)
 Coarse operator construction from an intermediate-grid operator (Coarse) More...
 
void calculateYhat (GaugeField &Yhat, GaugeField &Xinv, const GaugeField &Y, const GaugeField &X)
 Calculate preconditioned coarse links and coarse clover inverse field. More...
 
void Monte (cudaGaugeField &data, RNG &rngstate, double Beta, int nhb, int nover)
 Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps. More...
 
void InitGaugeField (cudaGaugeField &data)
 Perform a cold start to the gauge field, identity SU(3) matrix, also fills the ghost links in multi-GPU case (no need to exchange data) More...
 
void InitGaugeField (cudaGaugeField &data, RNG &rngstate)
 Perform a hot start to the gauge field, random SU(3) matrix, followed by reunitarization, also exchange borders links in multi-GPU case. More...
 
void PGaugeExchange (cudaGaugeField &data, const int dir, const int parity)
 Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps. More...
 
void PGaugeExchangeFree ()
 Release all allocated memory used to exchange data between nodes. More...
 
double2 getLinkDeterminant (cudaGaugeField &data)
 Calculate the Determinant. More...
 
double2 getLinkTrace (cudaGaugeField &data)
 Calculate the Trace. More...
 
void qudaMemcpy_ (void *dst, const void *src, size_t count, cudaMemcpyKind kind, const char *func, const char *file, const char *line)
 Wrapper around cudaMemcpy used for auto-profiling. Do not call directly, rather call macro below which will grab the location of the call. More...
 
void qudaMemcpyAsync_ (void *dst, const void *src, size_t count, cudaMemcpyKind kind, const cudaStream_t &stream, const char *func, const char *file, const char *line)
 Wrapper around cudaMemcpyAsync or driver API equivalent Potentially add auto-profiling support. More...
 
void qudaMemcpy2DAsync_ (void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t hieght, cudaMemcpyKind kind, const cudaStream_t &stream, const char *func, const char *file, const char *line)
 Wrapper around cudaMemcpy2DAsync or driver API equivalent Potentially add auto-profiling support. More...
 
cudaError_t qudaLaunchKernel (const void *func, dim3 gridDim, dim3 blockDim, void **args, size_t sharedMem, cudaStream_t stream)
 Wrapper around cudaLaunchKernel. More...
 
cudaError_t qudaEventQuery (cudaEvent_t &event)
 Wrapper around cudaEventQuery or cuEventQuery. More...
 
cudaError_t qudaEventRecord (cudaEvent_t &event, cudaStream_t stream=0)
 Wrapper around cudaEventRecord or cuEventRecord. More...
 
cudaError_t qudaStreamWaitEvent (cudaStream_t stream, cudaEvent_t event, unsigned int flags)
 Wrapper around cudaEventRecord or cuEventRecord. More...
 
cudaError_t qudaStreamSynchronize (cudaStream_t &stream)
 Wrapper around cudaStreamSynchronize or cuStreamSynchronize. More...
 
cudaError_t qudaEventSynchronize (cudaEvent_t &event)
 Wrapper around cudaEventSynchronize or cuEventSynchronize. More...
 
cudaError_t qudaDeviceSynchronize_ (const char *func, const char *file, const char *line)
 Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize. More...
 
void printAPIProfile ()
 Print out the timer profile for CUDA API calls. More...
 
bool canReuseResidentGauge (QudaInvertParam *inv_param)
 
template<class T >
__device__ __host__ T getTrace (const Matrix< T, 3 > &a)
 
template<template< typename, int > class Mat, class T >
__device__ __host__ T getDeterminant (const Mat< T, 3 > &a)
 
template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N > operator+ (const Mat< T, N > &a, const Mat< T, N > &b)
 
template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N > operator+= (Mat< T, N > &a, const Mat< T, N > &b)
 
template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N > operator+= (Mat< T, N > &a, const T &b)
 
template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N > operator-= (Mat< T, N > &a, const Mat< T, N > &b)
 
template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N > operator- (const Mat< T, N > &a, const Mat< T, N > &b)
 
template<template< typename, int > class Mat, class T , int N, class S >
__device__ __host__ Mat< T, N > operator* (const S &scalar, const Mat< T, N > &a)
 
template<template< typename, int > class Mat, class T , int N, class S >
__device__ __host__ Mat< T, N > operator* (const Mat< T, N > &a, const S &scalar)
 
template<template< typename, int > class Mat, class T , int N, class S >
__device__ __host__ Mat< T, N > operator*= (Mat< T, N > &a, const S &scalar)
 
template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N > operator- (const Mat< T, N > &a)
 
template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat< T, N > operator* (const Mat< T, N > &a, const Mat< T, N > &b)
 Generic implementation of matrix multiplication. More...
 
template<template< typename > class complex, typename T , int N>
__device__ __host__ Matrix< complex< T >, N > operator* (const Matrix< complex< T >, N > &a, const Matrix< complex< T >, N > &b)
 Specialization of complex matrix multiplication that will issue optimal fma instructions. More...
 
template<class T , int N>
__device__ __host__ Matrix< T, N > operator*= (Matrix< T, N > &a, const Matrix< T, N > &b)
 
template<class T , class U , int N>
__device__ __host__ Matrix< typename PromoteTypeId< T, U >::Type, N > operator* (const Matrix< T, N > &a, const Matrix< U, N > &b)
 
template<class T >
__device__ __host__ Matrix< T, 2 > operator* (const Matrix< T, 2 > &a, const Matrix< T, 2 > &b)
 
template<class T , int N>
__device__ __host__ Matrix< T, N > conj (const Matrix< T, N > &other)
 
template<class T >
__device__ __host__ Matrix< T, 3 > inverse (const Matrix< T, 3 > &u)
 
template<class T , int N>
__device__ __host__ void setIdentity (Matrix< T, N > *m)
 
template<int N>
__device__ __host__ void setIdentity (Matrix< float2, N > *m)
 
template<int N>
__device__ __host__ void setIdentity (Matrix< double2, N > *m)
 
template<class T , int N>
__device__ __host__ void setZero (Matrix< T, N > *m)
 
template<int N>
__device__ __host__ void setZero (Matrix< float2, N > *m)
 
template<int N>
__device__ __host__ void setZero (Matrix< double2, N > *m)
 
template<typename Complex , int N>
__device__ __host__ void makeAntiHerm (Matrix< Complex, N > &m)
 
template<class T , int N>
__device__ __host__ void copyColumn (const Matrix< T, N > &m, int c, Array< T, N > *a)
 
template<class T , int N>
__device__ __host__ void outerProd (const Array< T, N > &a, const Array< T, N > &b, Matrix< T, N > *m)
 
template<class T , int N>
__device__ __host__ void outerProd (const T(&a)[N], const T(&b)[N], Matrix< T, N > *m)
 
template<class T , int N>
std::ostream & operator<< (std::ostream &os, const Matrix< T, N > &m)
 
template<class T , int N>
std::ostream & operator<< (std::ostream &os, const Array< T, N > &a)
 
template<class T , class U >
__device__ void loadLinkVariableFromArray (const T *const array, const int dir, const int idx, const int stride, Matrix< U, 3 > *link)
 
template<class T , class U , int N>
__device__ void loadMatrixFromArray (const T *const array, const int idx, const int stride, Matrix< U, N > *mat)
 
__device__ void loadLinkVariableFromArray (const float2 *const array, const int dir, const int idx, const int stride, Matrix< complex< double >, 3 > *link)
 
template<class T , int N, class U >
__device__ void writeMatrixToArray (const Matrix< T, N > &mat, const int idx, const int stride, U *const array)
 
__device__ void appendMatrixToArray (const Matrix< complex< double >, 3 > &mat, const int idx, const int stride, double2 *const array)
 
__device__ void appendMatrixToArray (const Matrix< complex< float >, 3 > &mat, const int idx, const int stride, float2 *const array)
 
template<class T , class U >
__device__ void writeLinkVariableToArray (const Matrix< T, 3 > &link, const int dir, const int idx, const int stride, U *const array)
 
__device__ void writeLinkVariableToArray (const Matrix< complex< double >, 3 > &link, const int dir, const int idx, const int stride, float2 *const array)
 
template<class T >
__device__ void loadMomentumFromArray (const T *const array, const int dir, const int idx, const int stride, Matrix< T, 3 > *mom)
 
template<class T , class U >
__device__ void writeMomentumToArray (const Matrix< T, 3 > &mom, const int dir, const int idx, const U coeff, const int stride, T *const array)
 
template<class Cmplx >
__device__ __host__ void computeLinkInverse (Matrix< Cmplx, 3 > *uinv, const Matrix< Cmplx, 3 > &u)
 
void copyArrayToLink (Matrix< float2, 3 > *link, float *array)
 
template<class Cmplx , class Real >
void copyArrayToLink (Matrix< Cmplx, 3 > *link, Real *array)
 
void copyLinkToArray (float *array, const Matrix< float2, 3 > &link)
 
template<class Cmplx , class Real >
void copyLinkToArray (Real *array, const Matrix< Cmplx, 3 > &link)
 
template<class T >
__device__ __host__ Matrix< T, 3 > getSubTraceUnit (const Matrix< T, 3 > &a)
 
template<class T >
__device__ __host__ void SubTraceUnit (Matrix< T, 3 > &a)
 
template<class T >
__device__ __host__ double getRealTraceUVdagger (const Matrix< T, 3 > &a, const Matrix< T, 3 > &b)
 
template<class Cmplx >
__host__ __device__ void printLink (const Matrix< Cmplx, 3 > &link)
 
template<class Cmplx >
__device__ __host__ double ErrorSU3 (const Matrix< Cmplx, 3 > &matrix)
 
template<class T >
__device__ __host__ void exponentiate_iQ (const Matrix< T, 3 > &Q, Matrix< T, 3 > *exp_iQ)
 
template<typename Float >
__device__ __host__ void expsu3 (Matrix< complex< Float >, 3 > &q)
 
template<class Real >
__device__ Real Random (cuRNGState &state, Real a, Real b)
 Return a random number between a and b. More...
 
template<>
__device__ float Random< float > (cuRNGState &state, float a, float b)
 
template<>
__device__ double Random< double > (cuRNGState &state, double a, double b)
 
template<class Real >
__device__ Real Random (cuRNGState &state)
 Return a random number between 0 and 1. More...
 
template<>
__device__ float Random< float > (cuRNGState &state)
 
template<>
__device__ double Random< double > (cuRNGState &state)
 
template<typename T1 , typename T2 >
__host__ __device__ void copy (T1 &a, const T2 &b)
 
template<>
__host__ __device__ void copy (double &a, const int2 &b)
 
template<>
__host__ __device__ void copy (double2 &a, const int4 &b)
 
template<>
__host__ __device__ void copy (float &a, const short &b)
 
template<>
__host__ __device__ void copy (short &a, const float &b)
 
template<>
__host__ __device__ void copy (float2 &a, const short2 &b)
 
template<>
__host__ __device__ void copy (short2 &a, const float2 &b)
 
template<>
__host__ __device__ void copy (float4 &a, const short4 &b)
 
template<>
__host__ __device__ void copy (short4 &a, const float4 &b)
 
template<>
__host__ __device__ void copy (float &a, const char &b)
 
template<>
__host__ __device__ void copy (char &a, const float &b)
 
template<>
__host__ __device__ void copy (float2 &a, const char2 &b)
 
template<>
__host__ __device__ void copy (char2 &a, const float2 &b)
 
template<>
__host__ __device__ void copy (float4 &a, const char4 &b)
 
template<>
__host__ __device__ void copy (char4 &a, const float4 &b)
 
template<typename T1 , typename T2 >
__host__ __device__ void copy_scaled (T1 &a, const T2 &b)
 
template<>
__host__ __device__ void copy_scaled (short4 &a, const float4 &b)
 
template<>
__host__ __device__ void copy_scaled (char4 &a, const float4 &b)
 
template<>
__host__ __device__ void copy_scaled (short2 &a, const float2 &b)
 
template<>
__host__ __device__ void copy_scaled (char2 &a, const float2 &b)
 
template<>
__host__ __device__ void copy_scaled (short &a, const float &b)
 
template<>
__host__ __device__ void copy_scaled (char &a, const float &b)
 
template<typename T1 , typename T2 , typename T3 >
__host__ __device__ void copy_and_scale (T1 &a, const T2 &b, const T3 &c)
 Specialized variants of the copy function that include an additional scale factor. Note the scale factor is ignored unless the input type (b) is either a short or char vector. More...
 
template<>
__host__ __device__ void copy_and_scale (float4 &a, const short4 &b, const float &c)
 
template<>
__host__ __device__ void copy_and_scale (float4 &a, const char4 &b, const float &c)
 
template<>
__host__ __device__ void copy_and_scale (float2 &a, const short2 &b, const float &c)
 
template<>
__host__ __device__ void copy_and_scale (float2 &a, const char2 &b, const float &c)
 
template<>
__host__ __device__ void copy_and_scale (float &a, const short &b, const float &c)
 
template<>
__host__ __device__ void copy_and_scale (float &a, const char &b, const float &c)
 
template<typename VectorType >
__device__ __host__ VectorType vector_load (void *ptr, int idx)
 
template<typename VectorType >
__device__ __host__ void vector_store (void *ptr, int idx, const VectorType &value)
 
template<>
__device__ __host__ void vector_store (void *ptr, int idx, const double2 &value)
 
template<>
__device__ __host__ void vector_store (void *ptr, int idx, const float4 &value)
 
template<>
__device__ __host__ void vector_store (void *ptr, int idx, const float2 &value)
 
template<>
__device__ __host__ void vector_store (void *ptr, int idx, const short4 &value)
 
template<>
__device__ __host__ void vector_store (void *ptr, int idx, const short2 &value)
 
template<>
__device__ __host__ void vector_store (void *ptr, int idx, const char4 &value)
 
template<>
__device__ __host__ void vector_store (void *ptr, int idx, const char2 &value)
 
void computeStaggeredOprod (GaugeField *out[], ColorSpinorField &in, const double coeff[], int nFace)
 Compute the outer-product field between the staggered quark field's one and (for HISQ and ASQTAD) three hop sites. E.g.,. More...
 
template<typename Matrix , typename Float >
__host__ __device__ bool checkUnitary (const Matrix &inv, const Matrix &in, const Float tol)
 Check the unitarity of the input matrix to a given tolerance. More...
 
template<typename Matrix >
__host__ __device__ void checkUnitaryPrint (const Matrix &inv, const Matrix &in)
 Print out deviation for each component (used for debugging only). More...
 
template<typename Float >
__host__ __device__ void polarSu3 (Matrix< complex< Float >, 3 > &in, Float tol)
 Project the input matrix on the SU(3) group. First unitarize the matrix and then project onto the special unitary group. More...
 
void BlockOrthogonalize (ColorSpinorField &V, const std::vector< ColorSpinorField *> &B, const int *fine_to_coarse, const int *coarse_to_fine, const int *geo_bs, const int spin_bs, const int n_block_ortho)
 Block orthogonnalize the matrix field, where the blocks are defined by lookup tables that map the fine grid points to the coarse grid points, and similarly for the spin degrees of freedom. More...
 
void Prolongate (ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &v, int Nvec, const int *fine_to_coarse, const int *const *spin_map, int parity=QUDA_INVALID_PARITY)
 Apply the prolongation operator. More...
 
void Restrict (ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &v, int Nvec, const int *fine_to_coarse, const int *coarse_to_fine, const int *const *spin_map, int parity=QUDA_INVALID_PARITY)
 Apply the restriction operator. More...
 
bool activeTuning ()
 query if tuning is in progress More...
 
void loadTuneCache ()
 
void saveTuneCache (bool error=false)
 
void saveProfile (const std::string label="")
 Save profile to disk. More...
 
void flushProfile ()
 Flush profile contents, setting all counts to zero. More...
 
TuneParamtuneLaunch (Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
 
void postTrace_ (const char *func, const char *file, int line)
 Post an event in the trace, recording where it was posted. More...
 
const std::map< TuneKey, TuneParam > & getTuneCache ()
 Returns a reference to the tunecache map. More...
 
void enableProfileCount ()
 Enable the profile kernel counting. More...
 
void disableProfileCount ()
 Disable the profile kernel counting. More...
 
void setPolicyTuning (bool)
 Enable / disable whether are tuning a policy. More...
 
void u32toa (char *buffer, uint32_t value)
 
void i32toa (char *buffer, int32_t value)
 
void u64toa (char *buffer, uint64_t value)
 
void i64toa (char *buffer, int64_t value)
 
void setUnitarizeLinksConstants (double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
 
void unitarizeLinksCPU (cpuGaugeField &outfield, const cpuGaugeField &infield)
 
void unitarizeLinks (cudaGaugeField &outfield, const cudaGaugeField &infield, int *fails)
 
void unitarizeLinks (cudaGaugeField &outfield, int *fails)
 
bool isUnitary (const cpuGaugeField &field, double max_error)
 
void projectSU3 (cudaGaugeField &U, double tol, int *fails)
 Project the input gauge field onto the SU(3) group. This is a destructive operation. The number of link failures is reported so appropriate action can be taken. More...
 
template<typename Arg >
__device__ __host__ uint64_t siteChecksum (const Arg &arg, int d, int parity, int x_cb)
 
template<typename Arg >
uint64_t ChecksumCPU (const Arg &arg)
 
ColorSpinorParam colorSpinorParam (const CloverField &a, bool inverse)
 
template<bool from_coarse, typename Float , int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename F , typename Ftmp , typename Vt , typename coarseGauge , typename coarseGaugeAtomic , typename fineGauge , typename fineClover >
void calculateY (coarseGauge &Y, coarseGauge &X, coarseGaugeAtomic &Y_atomic, coarseGaugeAtomic &X_atomic, Ftmp &UV, F &AV, Vt &V, fineGauge &G, fineClover &C, fineClover &Cinv, GaugeField &Y_, GaugeField &X_, GaugeField &Y_atomic_, GaugeField &X_atomic_, ColorSpinorField &uv, ColorSpinorField &av, const ColorSpinorField &v, double kappa, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc, bool need_bidirectional, const int *fine_to_coarse, const int *coarse_to_fine)
 Calculate the coarse-link field, including the coarse clover field. More...
 
std::ostream & operator<< (std::ostream &out, const ColorSpinorField &a)
 
template<class T >
void random (T &t)
 
template<class T >
void point (T &t, int x, int s, int c)
 
template<class T >
void constant (T &t, int k, int s, int c)
 
template<class P >
void sin (P &p, int d, int n, int offset)
 
template<class T >
void corner (T &p, int v, int s, int c)
 
template<class U , class V >
int compareSpinor (const U &u, const V &v, const int tol)
 
template<class Order >
void print_vector (const Order &o, unsigned int x)
 
template<typename StoreType , int Ns, int Nc, QudaFieldOrder FieldOrder>
void genericCudaPrintVector (const cudaColorSpinorField &field, unsigned int i)
 
template<typename Float , int Ns, int Nc>
void genericCudaPrintVector (const cudaColorSpinorField &field, unsigned int i)
 
template<typename Float >
void genericCudaPrintVector (const cudaColorSpinorField &field, unsigned int i)
 
template<typename Float , int Nc, typename Vector , typename Arg >
__device__ __host__ void computeNeighborSum (Vector &out, Arg &arg, int x_cb, int parity)
 
template<typename Float , int Ns, int Nc, typename Arg >
__device__ __host__ void computeWupperalStep (Arg &arg, int x_cb, int parity)
 
template<typename Float , int Ns, int Nc, typename Arg >
void wuppertalStepCPU (Arg arg)
 
template<typename Float , int Ns, int Nc, typename Arg >
__global__ void wuppertalStepGPU (Arg arg)
 
void copyGenericColorSpinorDD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorDS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorDH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorDQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorSD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorSS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorSH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorSQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorHD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorHS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorHH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorHQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorQD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorQS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorQH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorQQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorMGDD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorMGDS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorMGSD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorMGSS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorMGSH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorMGSQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorMGHS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorMGHH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorMGHQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorMGQS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorMGQH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
void copyGenericColorSpinorMGQQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0)
 
template<typename Arg , typename Basis >
void copyColorSpinor (Arg &arg, const Basis &basis)
 
template<typename Arg , typename Basis >
__global__ void copyColorSpinorKernel (Arg arg, Basis basis)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename Out , typename In >
void genericCopyColorSpinor (Out &outOrder, const In &inOrder, const ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >
void genericCopyColorSpinor (InOrder &inOrder, ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location, FloatOut *Out, float *outNorm)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc>
void genericCopyColorSpinor (ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In, float *outNorm, float *inNorm)
 
template<int Ns, int Nc, typename dstFloat , typename srcFloat >
void copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm, float *srcNorm)
 
template<int Nc, typename dstFloat , typename srcFloat >
void CopyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm=0, float *srcNorm=0)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
void packSpinor (OutOrder &outOrder, const InOrder &inOrder, int volume)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
__global__ void packSpinorKernel (OutOrder outOrder, const InOrder inOrder, int volume)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
void genericCopyColorSpinor (OutOrder &outOrder, const InOrder &inOrder, const ColorSpinorField &out, QudaFieldLocation location)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >
void genericCopyColorSpinor (InOrder &inOrder, ColorSpinorField &out, QudaFieldLocation location, FloatOut *Out)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc>
void genericCopyColorSpinor (ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In)
 
template<int Ns, int Nc, typename dstFloat , typename srcFloat >
void copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat *Dst, srcFloat *Src)
 
template<int Nc, typename dstFloat , typename srcFloat >
void CopyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat *Dst, srcFloat *Src)
 
void copyGenericGaugeDoubleOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type)
 
void copyGenericGaugeSingleOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type)
 
void copyGenericGaugeHalfOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type)
 
void copyGenericGaugeQuarterOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type)
 
void copyGenericGaugeMG (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type)
 
void checkMomOrder (const GaugeField &u)
 
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder , bool regularToextended>
__device__ __host__ void copyGaugeEx (CopyGaugeExArg< OutOrder, InOrder > &arg, int X, int parity)
 
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder , bool regularToextended>
void copyGaugeEx (CopyGaugeExArg< OutOrder, InOrder > arg)
 
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder , bool regularToextended>
__global__ void copyGaugeExKernel (CopyGaugeExArg< OutOrder, InOrder > arg)
 
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void copyGaugeEx (OutOrder outOrder, const InOrder inOrder, const int *E, const int *X, const int *faceVolumeCB, const GaugeField &meta, QudaFieldLocation location)
 
template<typename FloatOut , typename FloatIn , int length, typename InOrder >
void copyGaugeEx (const InOrder &inOrder, const int *X, GaugeField &out, QudaFieldLocation location, FloatOut *Out)
 
template<typename FloatOut , typename FloatIn , int length>
void copyGaugeEx (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In)
 
template<typename FloatOut , typename FloatIn >
void copyGaugeEx (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In)
 
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void copyGauge (OutOrder &&outOrder, const InOrder &inOrder, const GaugeField &out, const GaugeField &in, QudaFieldLocation location, int type)
 
template<typename FloatOut , typename FloatIn , int length, typename InOrder >
void copyGauge (const InOrder &inOrder, const GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatOut **outGhost, int type)
 
template<typename FloatOut , typename FloatIn , int length>
void copyGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In, FloatOut **outGhost, FloatIn **inGhost, int type)
 
template<typename FloatOut , typename FloatIn , int length, typename Out , typename In , typename Arg >
void copyMom (Arg &arg, const GaugeField &out, const GaugeField &in, QudaFieldLocation location)
 
template<typename FloatOut , typename FloatIn >
void copyGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In, FloatOut **outGhost, FloatIn **inGhost, int type)
 
template<typename sFloatOut , typename sFloatIn , int Nc, typename InOrder >
void copyGaugeMG (const InOrder &inOrder, GaugeField &out, const GaugeField &in, QudaFieldLocation location, sFloatOut *Out, sFloatOut **outGhost, int type)
 
template<typename sFloatOut , typename sFloatIn , int Nc>
void copyGaugeMG (GaugeField &out, const GaugeField &in, QudaFieldLocation location, sFloatOut *Out, sFloatIn *In, sFloatOut **outGhost, sFloatIn **inGhost, int type)
 
template<typename FloatOut , typename FloatIn >
void copyGaugeMG (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In, FloatOut **outGhost, FloatIn **inGhost, int type)
 
void * create_gauge_buffer (size_t bytes, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
 
void ** create_ghost_buffer (size_t bytes[], QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
 
void free_gauge_buffer (void *buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
 
void free_ghost_buffer (void **buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
 
std::ostream & operator<< (std::ostream &out, const cudaColorSpinorField &a)
 
static std::vector< DslashCoarsePolicypolicies (static_cast< int >(DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED), DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED)
 
void enable_policy (DslashCoarsePolicy p)
 
void disable_policy (DslashCoarsePolicy p)
 
template<typename Float , int nSpin, int nColor, bool spin_project>
std::ostream & operator<< (std::ostream &out, const PackArg< Float, nSpin, nColor, spin_project > &arg)
 
template<typename Float , int nColor>
void PackGhost (void *ghost[], const ColorSpinorField &in, MemoryLocation location, int nFace, bool dagger, int parity, bool spin_project, double a, double b, double c, const cudaStream_t &stream)
 
template<typename Float >
void PackGhost (void *ghost[], const ColorSpinorField &in, MemoryLocation location, int nFace, bool dagger, int parity, bool spin_project, double a, double b, double c, const cudaStream_t &stream)
 
template<typename Float , int nColor, typename Arg >
void gammaCPU (Arg arg)
 
template<typename Float , int nColor, int d, typename Arg >
__global__ void gammaGPU (Arg arg)
 
template<typename Float , int nColor>
void ApplyGamma (ColorSpinorField &out, const ColorSpinorField &in, int d)
 
template<typename Float >
void ApplyGamma (ColorSpinorField &out, const ColorSpinorField &in, int d)
 
template<bool doublet, typename Float , int nColor, typename Arg >
void twistGammaCPU (Arg arg)
 
template<bool doublet, typename Float , int nColor, int d, typename Arg >
__global__ void twistGammaGPU (Arg arg)
 
template<typename Float , int nSpin, int nColor, typename Arg >
__device__ __host__ void cloverApply (Arg &arg, int x_cb, int parity)
 
template<typename Float , int nSpin, int nColor, typename Arg >
void cloverCPU (Arg &arg)
 
template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void cloverGPU (Arg arg)
 
template<bool inverse, typename Float , int nSpin, int nColor, typename Arg >
__device__ __host__ void twistCloverApply (Arg &arg, int x_cb, int parity)
 
template<bool inverse, typename Float , int nSpin, int nColor, typename Arg >
void twistCloverCPU (Arg &arg)
 
template<bool inverse, typename Float , int nSpin, int nColor, typename Arg >
__global__ void twistCloverGPU (Arg arg)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
__device__ __host__ void copyInterior (CopySpinorExArg< OutOrder, InOrder, Basis > &arg, int X)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
__global__ void copyInteriorKernel (CopySpinorExArg< OutOrder, InOrder, Basis > arg)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
void copyInterior (CopySpinorExArg< OutOrder, InOrder, Basis > &arg)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >
void copySpinorEx (OutOrder outOrder, const InOrder inOrder, const Basis basis, const int *E, const int *X, const int parity, const bool extend, const ColorSpinorField &meta, QudaFieldLocation location)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
void copySpinorEx (OutOrder outOrder, InOrder inOrder, const QudaGammaBasis outBasis, const QudaGammaBasis inBasis, const int *E, const int *X, const int parity, const bool extend, const ColorSpinorField &meta, QudaFieldLocation location)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >
void extendedCopyColorSpinor (InOrder &inOrder, ColorSpinorField &out, QudaGammaBasis inBasis, const int *E, const int *X, const int parity, const bool extend, QudaFieldLocation location, FloatOut *Out, float *outNorm)
 
template<typename FloatOut , typename FloatIn , int Ns, int Nc>
void extendedCopyColorSpinor (ColorSpinorField &out, const ColorSpinorField &in, const int parity, const QudaFieldLocation location, FloatOut *Out, FloatIn *In, float *outNorm, float *inNorm)
 
template<int Ns, typename dstFloat , typename srcFloat >
void copyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, const int parity, const QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm, float *srcNorm)
 
template<typename dstFloat , typename srcFloat >
void CopyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, const int parity, const QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm=0, float *srcNorm=0)
 
template<typename Float >
void extractGhost (const GaugeField &u, Float **Ghost, bool extract, int offset)
 
void extractGaugeGhostMG (const GaugeField &u, void **ghost, bool extract, int offset)
 
template<typename Float , int length, int dim, typename Arg >
__device__ __host__ void extractor (Arg &arg, int dir, int a, int b, int c, int d, int g, int parity)
 
template<typename Float , int length, int dim, typename Arg >
__device__ __host__ void injector (Arg &arg, int dir, int a, int b, int c, int d, int g, int parity)
 
template<typename Float , int length, int nDim, int dim, typename Order , bool extract>
void extractGhostEx (ExtractGhostExArg< Order, nDim, dim > arg)
 
template<typename Float , int length, int nDim, int dim, typename Order , bool extract>
__global__ void extractGhostExKernel (ExtractGhostExArg< Order, nDim, dim > arg)
 
template<typename Float , int length, typename Order >
void extractGhostEx (Order order, const int dim, const int *surfaceCB, const int *E, const int *R, bool extract, const GaugeField &u, QudaFieldLocation location)
 
template<typename Float >
void extractGhostEx (const GaugeField &u, int dim, const int *R, Float **Ghost, bool extract)
 
template<int nDim, bool extract, typename Arg >
void extractGhost (Arg &arg)
 
template<int nDim, bool extract, typename Arg >
__global__ void extractGhostKernel (Arg arg)
 
template<typename Float , int length, typename Order >
void extractGhost (Order order, const GaugeField &u, QudaFieldLocation location, bool extract, int offset)
 
template<typename storeFloat , int Nc>
void extractGhostMG (const GaugeField &u, storeFloat **Ghost, bool extract, int offset)
 
template<typename Float >
void extractGhostMG (const GaugeField &u, Float **Ghost, bool extract, int offset)
 
ColorSpinorParam colorSpinorParam (const GaugeField &a)
 
template<int NCOLORS>
static __host__ __device__ void IndexBlock (int block, int &p, int &q)
 
template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void GaugeFixHit_AtomicAdd (Matrix< complex< Float >, NCOLORS > &link, const Float relax_boost, const int tid)
 
template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void GaugeFixHit_NoAtomicAdd (Matrix< complex< Float >, NCOLORS > &link, const Float relax_boost, const int tid)
 
template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void GaugeFixHit_NoAtomicAdd_LessSM (Matrix< complex< Float >, NCOLORS > &link, const Float relax_boost, const int tid)
 
template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void GaugeFixHit_AtomicAdd (Matrix< complex< Float >, NCOLORS > &link, Matrix< complex< Float >, NCOLORS > &link1, const Float relax_boost, const int tid)
 
template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void GaugeFixHit_NoAtomicAdd (Matrix< complex< Float >, NCOLORS > &link, Matrix< complex< Float >, NCOLORS > &link1, const Float relax_boost, const int tid)
 
template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void GaugeFixHit_NoAtomicAdd_LessSM (Matrix< complex< Float >, NCOLORS > &link, Matrix< complex< Float >, NCOLORS > &link1, const Float relax_boost, const int tid)
 
template<typename Float , typename Gauge >
void plaquette (const Gauge dataOr, const GaugeField &data, double2 &plq, QudaFieldLocation location)
 
template<typename Float >
void plaquette (const GaugeField &data, double2 &plq, QudaFieldLocation location)
 
template<typename real , typename Link >
__device__ __host__ Link gauss_su3 (cuRNGState &localState)
 
template<typename Float , typename Arg >
__global__ void computeGenGauss (Arg arg)
 
template<typename Float , QudaReconstructType recon, bool group>
void genGauss (GaugeField &U, RNG &rngstate, double sigma)
 
template<typename Float , typename GaugeOr , typename GaugeDs >
void OvrImpSTOUTStep (GaugeOr origin, GaugeDs dest, const GaugeField &dataOr, Float rho, Float epsilon)
 
template<typename Float >
void OvrImpSTOUTStep (GaugeField &dataDs, const GaugeField &dataOr, Float rho, Float epsilon)
 
void printLaunchTimer ()
 
void setDiracRefineParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc)
 
void setDiracPreParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc, bool comms)
 
void createDirac (Dirac *&d, Dirac *&dSloppy, Dirac *&dPre, QudaInvertParam &param, const bool pc_solve)
 
void createDirac (Dirac *&d, Dirac *&dSloppy, Dirac *&dPre, Dirac *&dRef, QudaInvertParam &param, const bool pc_solve)
 
void massRescale (cudaColorSpinorField &b, QudaInvertParam &param)
 
void fillInnerSolveParam (SolverParam &inner, const SolverParam &outer)
 
int reliable (double &rNorm, double &maxrx, double &maxrr, const double &r2, const double &delta)
 
template<int N>
void compute_alpha_N (Complex *Q_AQandg, Complex *alpha)
 
template<int N>
void compute_beta_N (Complex *Q_AQandg, Complex *Q_AS, Complex *beta)
 
template<libtype which_lib>
void ComputeRitz (EigCGArgs &args)
 
template<>
void ComputeRitz< libtype::eigen_lib > (EigCGArgs &args)
 
template<>
void ComputeRitz< libtype::magma_lib > (EigCGArgs &args)
 
static void fillEigCGInnerSolverParam (SolverParam &inner, const SolverParam &outer, bool use_sloppy_partial_accumulator=true)
 
static void fillInitCGSolverParam (SolverParam &inner, const SolverParam &outer)
 
double timeInterval (struct timeval start, struct timeval end)
 
void computeBeta (Complex **beta, std::vector< ColorSpinorField *> Ap, int i, int N, int k)
 
void updateAp (Complex **beta, std::vector< ColorSpinorField *> Ap, int begin, int size, int k)
 
void orthoDir (Complex **beta, std::vector< ColorSpinorField *> Ap, int k, int pipeline)
 
void backSubs (const Complex *alpha, Complex **const beta, const double *gamma, Complex *delta, int n)
 
void updateSolution (ColorSpinorField &x, const Complex *alpha, Complex **const beta, double *gamma, int k, std::vector< ColorSpinorField *> p)
 
template<libtype which_lib>
void ComputeHarmonicRitz (GMResDRArgs &args)
 
template<>
void ComputeHarmonicRitz< libtype::magma_lib > (GMResDRArgs &args)
 
template<>
void ComputeHarmonicRitz< libtype::eigen_lib > (GMResDRArgs &args)
 
template<libtype which_lib>
void ComputeEta (GMResDRArgs &args)
 
template<>
void ComputeEta< libtype::magma_lib > (GMResDRArgs &args)
 
template<>
void ComputeEta< libtype::eigen_lib > (GMResDRArgs &args)
 
void fillFGMResDRInnerSolveParam (SolverParam &inner, const SolverParam &outer)
 
template<typename T >
static void applyT (T d_out[], const T d_in[], const T gamma[], const T rho[], int N)
 
template<typename T >
static void applyB (T d_out[], const T d_in[], int N)
 
void print (const double d[], int n)
 
template<typename T >
static void zero (T d[], int N)
 
template<typename T >
static void applyThirdTerm (T d_out[], const T d_in[], int k, int j, int s, const T gamma[], const T rho[], const T gamma_kprev[], const T rho_kprev[])
 
template<typename T >
static void computeCoeffs (T d_out[], const T d_p1[], const T d_p2[], int k, int j, int s, const T gamma[], const T rho[], const T gamma_kprev[], const T rho_kprev[])
 
void updateAlphaZeta (double *alpha, double *zeta, double *zeta_old, const double *r2, const double *beta, const double pAp, const double *offset, const int nShift, const int j_low)
 
static void fillInnerSolverParam (SolverParam &inner, const SolverParam &outer)
 
template<typename Float , typename Oprod , typename Gauge , typename Mom >
__host__ __device__ void completeKSForceCore (KSForceArg< Oprod, Gauge, Mom > &arg, int idx)
 
template<typename Float , typename Oprod , typename Gauge , typename Mom >
__global__ void completeKSForceKernel (KSForceArg< Oprod, Gauge, Mom > arg)
 
template<typename Float , typename Oprod , typename Gauge , typename Mom >
void completeKSForceCPU (KSForceArg< Oprod, Gauge, Mom > &arg)
 
template<typename Float , typename Oprod , typename Gauge , typename Mom >
void completeKSForce (Oprod oprod, Gauge gauge, Mom mom, int dim[4], const GaugeField &meta, QudaFieldLocation location, long long *flops)
 
template<typename Float , typename Result , typename Oprod , typename Gauge >
__host__ __device__ void computeKSLongLinkForceCore (KSLongLinkArg< Result, Oprod, Gauge > &arg, int idx)
 
template<typename Float , typename Result , typename Oprod , typename Gauge >
__global__ void computeKSLongLinkForceKernel (KSLongLinkArg< Result, Oprod, Gauge > arg)
 
template<typename Float , typename Result , typename Oprod , typename Gauge >
void computeKSLongLinkForceCPU (KSLongLinkArg< Result, Oprod, Gauge > &arg)
 
template<typename Float , typename Result , typename Oprod , typename Gauge >
void computeKSLongLinkForce (Result res, Oprod oprod, Gauge gauge, int dim[4], const GaugeField &meta, QudaFieldLocation location)
 
template<typename Float >
void computeKSLongLinkForce (GaugeField &result, const GaugeField &oprod, const GaugeField &gauge, QudaFieldLocation location)
 
static void print_trace (void)
 
static void print_alloc_header ()
 
static void print_alloc (AllocType type)
 
static void track_malloc (const AllocType &type, const MemAlloc &a, void *ptr)
 
static void track_free (const AllocType &type, void *ptr)
 
static void * aligned_malloc (MemAlloc &a, size_t size)
 
template<typename real , int Nc, QudaCloverFieldOrder order>
double norm (const CloverField &u, norm_type_ type)
 
template<typename real , int Nc>
double norm (const CloverField &u, norm_type_ type)
 
template<typename real >
double _norm (const CloverField &u, norm_type_ type)
 
template<typename real , int Nc, QudaGaugeFieldOrder order>
double norm (const GaugeField &u, int d, norm_type_ type)
 
template<typename real , int Nc>
double norm (const GaugeField &u, int d, norm_type_ type)
 
template<typename real >
double norm (const GaugeField &u, int d, norm_type_ type)
 
void forceRecord (double2 &force, double dt, const char *fname)
 
dim3 GetBlockDim (size_t threads, size_t size)
 
__global__ void kernel_random (cuRNGState *state, unsigned long long seed, int size_cb, rngArg arg)
 CUDA kernel to initialize CURAND RNG states. More...
 
void launch_kernel_random (cuRNGState *state, unsigned long long seed, int size_cb, int n_parity, int X[4])
 Call CUDA kernel to initialize CURAND RNG states. More...
 
template<IndexType idxType, typename Int >
__device__ __forceinline__ int neighborIndex (const unsigned int &cb_idx, const int(&shift)[4], const bool(&partitioned)[4], const unsigned int &parity)
 
template<typename FloatN , int N, typename Output , typename Input >
__global__ void shiftColorSpinorFieldKernel (ShiftQuarkArg< Output, Input > arg)
 
template<typename FloatN , int N, typename Output , typename Input >
__global__ void shiftColorSpinorFieldExternalKernel (ShiftQuarkArg< Output, Input > arg)
 
void shiftColorSpinorField (cudaColorSpinorField &dst, const cudaColorSpinorField &src, const unsigned int parity, const unsigned int dim, const int shift)
 
static void report (const char *type)
 
template<typename real , typename Arg >
__device__ __host__ void genGauss (Arg &arg, cuRNGState &localState, int parity, int x_cb, int s, int c)
 
template<typename real , typename Arg >
__device__ __host__ void genUniform (Arg &arg, cuRNGState &localState, int parity, int x_cb, int s, int c)
 
template<typename real , int Ns, int Nc, QudaNoiseType type, typename Arg >
void SpinorNoiseCPU (Arg &arg)
 
template<typename real , int Ns, int Nc, QudaNoiseType type, typename Arg >
__global__ void SpinorNoiseGPU (Arg arg)
 
void computeStaggeredOprod (GaugeField &outA, GaugeField &outB, ColorSpinorField &inEven, ColorSpinorField &inOdd, int parity, const double coeff[2], int nFace)
 
int traceEnabled ()
 
static void deserializeTuneCache (std::istream &in)
 
static void serializeTuneCache (std::ostream &out)
 
static void serializeProfile (std::ostream &out, std::ostream &async_out)
 
static void serializeTrace (std::ostream &out)
 
static void broadcastTuneCache ()
 
bool policyTuning ()
 
template<typename Float , typename G >
__global__ void ProjectSU3kernel (ProjectSU3Arg< Float, G > arg)
 
void setTransferGPU (bool)
 

Variables

__device__ unsigned int count [QUDA_MAX_MULTI_REDUCE] = { }
 
__shared__ bool isLastBlockDone
 
__shared__ volatile bool isLastWarpDone [16]
 
static __constant__ signed char B_array_d [MAX_MATRIX_SIZE]
 
static signed char B_array_h [MAX_MATRIX_SIZE]
 
__shared__ float s []
 
constexpr int size = 4096
 
static __constant__ char mobius_d [size]
 
static __constant__ char mobius_d [size]
 
static int commDim [QUDA_MAX_DIM]
 
const int Nstream = 9
 
static const char gDigitsLut [200]
 
static bool bidirectional_debug = false
 
cudaStream_t * stream
 
static bool complete_recv_fwd [QUDA_MAX_DIM] = { }
 
static bool complete_recv_back [QUDA_MAX_DIM] = { }
 
static bool complete_send_fwd [QUDA_MAX_DIM] = { }
 
static bool complete_send_back [QUDA_MAX_DIM] = { }
 
static auto pinned_allocator = [] (size_t bytes ) { return static_cast<Complex*>(pool_pinned_malloc(bytes)); }
 
static auto pinned_deleter = [] (Complex *hptr) { pool_pinned_free(hptr); }
 
static bool dslash_init = false
 
static int first_active_policy =static_cast<int>(DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED)
 
static char policy_string [TuneKey::aux_n]
 
static bool kernelPackT = false
 
static std::stack< bool > kptstack
 
static double unscaled_shifts [QUDA_MAX_MULTI_SHIFT]
 
static int max_eigcg_cycles = 4
 
static QudaFieldLocation reorder_location_ = QUDA_CUDA_FIELD_LOCATION
 
static std::map< void *, MemAllocalloc [N_ALLOC_TYPE]
 
static long total_bytes [N_ALLOC_TYPE] = {0}
 
static long max_total_bytes [N_ALLOC_TYPE] = {0}
 
static long total_host_bytes
 
static long max_total_host_bytes
 
static long total_pinned_bytes
 
static long max_total_pinned_bytes
 
static std::stringstream force_stream
 
static long long force_count = 0
 
static long long force_flush = 1000
 
static bool debug = false
 
static TimeProfile apiTimer ("CUDA API calls (driver)")
 
static TuneKey last_key
 
static std::list< TraceKeytrace_list
 
static int enable_trace = 0
 
static const std::string quda_hash = QUDA_HASH
 
static std::string resource_path
 
static map tunecache
 
static map::iterator it
 
static size_t initial_cache_size = 0
 
static const std::string quda_version = STR(QUDA_VERSION_MAJOR) "." STR(QUDA_VERSION_MINOR) "." STR(QUDA_VERSION_SUBMINOR)
 
static bool tuning = false
 
static bool profile_count = true
 
static bool policy_tuning = false
 
static TimeProfile launchTimer ("tuneLaunch")
 

Detailed Description

This is the covariant derivative based on the basic gauged Laplace operator

This is the gauged domain-wall 4-d preconditioned operator.

Note, for now, this just applies a batched 4-d dslash across the fifth dimension.

This is the gauged domain-wall 5-d preconditioned operator.

This is a staggered Dirac operator

This is the gauged twisted-mass operator acting on a non-generate quark doublet.

This is the preconditioned twisted-mass operator acting on a non-generate quark doublet.

This is the basic gauged twisted-clover operator

This is the preconditioned gauged twisted-mass operator

This is the basic gauged twisted-mass operator

This is the basic gauged Wilson operator

TODO

This is the Wilson-clover linear operator

This is the Wilson-clover preconditioned linear operator

This code has not been checked. In particular, I suspect it is erroneous in multi-GPU since it looks like the halo ghost region isn't being treated here.

Generic Multi Shift Solver

For staggered, the mass is folded into the dirac operator Otherwise the matrix mass is 'unmodified'.

The lowest offset is in offsets[0]

This is the laplacian derivative based on the basic gauged differential operator

Typedef Documentation

◆ ColorSpinorFieldSet

Definition at line 1220 of file invert_quda.h.

◆ Complex

typedef std::complex<double> quda::Complex

Definition at line 46 of file quda_internal.h.

◆ CompositeColorSpinorField

Typedef for a set of spinors. Can be further divided into subsets ,e.g., with different precisions (not implemented currently)

Definition at line 17 of file color_spinor_field.h.

◆ cuRNGState

typedef struct curandStateMRG32k3a quda::cuRNGState

Definition at line 17 of file random_quda.h.

◆ DenseMatrix

typedef MatrixXcd quda::DenseMatrix

Definition at line 36 of file inv_eigcg_quda.cpp.

◆ DynamicStride

typedef Stride< Dynamic, Dynamic > quda::DynamicStride

Definition at line 18 of file deflation.cpp.

◆ map

typedef std::map<TuneKey, TuneParam> quda::map

Definition at line 28 of file tune.cpp.

◆ RealVector

using quda::RealVector = typedef VectorXd

Definition at line 39 of file inv_eigcg_quda.cpp.

◆ RowMajorDenseMatrix

typedef Matrix< Complex, Dynamic, Dynamic, RowMajor > quda::RowMajorDenseMatrix

Definition at line 42 of file inv_eigcg_quda.cpp.

◆ storeType

typedef int quda::storeType

Definition at line 15 of file coarse_op_kernel.cuh.

◆ Vector

typedef VectorXcd quda::Vector

Definition at line 38 of file inv_eigcg_quda.cpp.

◆ VectorSet

typedef MatrixXcd quda::VectorSet

Definition at line 37 of file inv_eigcg_quda.cpp.

Enumeration Type Documentation

◆ AllocType

Enumerator
DEVICE 
DEVICE_PINNED 
HOST 
PINNED 
MAPPED 
N_ALLOC_TYPE 

Definition at line 16 of file malloc.cpp.

◆ BiCGstabLUpdateType

The following code is based on Kate's worker class in Multi-CG.

This worker class is used to update most of the u and r vectors. On BiCG iteration j, r[0] through r[j] and u[0] through u[j] all get updated, but the subsequent mat-vec operation only gets applied to r[j] and u[j]. Thus, we can hide updating r[0] through r[j-1] and u[0] through u[j-1], respectively, in the comms for the matvec on r[j] and u[j]. This results in improved strong scaling for BiCGstab-L.

See paragraphs 2 and 3 in the comments on the Worker class in Multi-CG for more remarks.

Enumerator
BICGSTABL_UPDATE_U 
BICGSTABL_UPDATE_R 

Definition at line 173 of file inv_bicgstabl_quda.cpp.

◆ ComputeType

Enumerator
COMPUTE_UV 
COMPUTE_AV 
COMPUTE_TMAV 
COMPUTE_TMCAV 
COMPUTE_CLOVER_INV_MAX 
COMPUTE_TWISTED_CLOVER_INV_MAX 
COMPUTE_VUV 
COMPUTE_COARSE_CLOVER 
COMPUTE_REVERSE_Y 
COMPUTE_DIAGONAL 
COMPUTE_TMDIAGONAL 
COMPUTE_CONVERT 
COMPUTE_RESCALE 
COMPUTE_INVALID 

Definition at line 13 of file coarse_op.cuh.

◆ Dslash5Type

Enumerator
DSLASH5_DWF 
DSLASH5_MOBIUS_PRE 
DSLASH5_MOBIUS 
M5_INV_DWF 
M5_INV_MOBIUS 
M5_INV_ZMOBIUS 

Definition at line 396 of file dslash_quda.h.

◆ DslashCoarsePolicy

Enumerator
DSLASH_COARSE_BASIC 
DSLASH_COARSE_ZERO_COPY_PACK 
DSLASH_COARSE_ZERO_COPY_READ 
DSLASH_COARSE_ZERO_COPY 
DSLASH_COARSE_GDR_SEND 
DSLASH_COARSE_GDR_RECV 
DSLASH_COARSE_GDR 
DSLASH_COARSE_ZERO_COPY_PACK_GDR_RECV 
DSLASH_COARSE_GDR_SEND_ZERO_COPY_READ 
DSLASH_COARSE_POLICY_DISABLED 

Definition at line 458 of file dslash_coarse.cu.

◆ DslashType

Enumerator
DSLASH_INTERIOR 
DSLASH_EXTERIOR 
DSLASH_FULL 

Definition at line 16 of file dslash_coarse.cuh.

◆ KernelType

Enumerator
INTERIOR_KERNEL 
EXTERIOR_KERNEL_ALL 
EXTERIOR_KERNEL_X 
EXTERIOR_KERNEL_Y 
EXTERIOR_KERNEL_Z 
EXTERIOR_KERNEL_T 
KERNEL_POLICY 

Definition at line 464 of file index_helper.cuh.

◆ libtype [1/2]

enum quda::libtype
strong
Enumerator
eigen_lib 
magma_lib 
lapack_lib 
mkl_lib 
eigen_lib 
magma_lib 
lapack_lib 
mkl_lib 

Definition at line 47 of file inv_eigcg_quda.cpp.

◆ libtype [2/2]

enum quda::libtype
strong
Enumerator
eigen_lib 
magma_lib 
lapack_lib 
mkl_lib 
eigen_lib 
magma_lib 
lapack_lib 
mkl_lib 

Definition at line 57 of file inv_gmresdr_quda.cpp.

◆ MemoryLocation

Enumerator
Device 
Host 
Remote 

Definition at line 15 of file color_spinor_field.h.

◆ norm_type_ [1/2]

Enumerator
NORM1 
NORM2 
ABS_MAX 
ABS_MIN 
NORM1 
NORM2 
ABS_MAX 
ABS_MIN 

Definition at line 7 of file max_gauge.cu.

◆ norm_type_ [2/2]

Enumerator
NORM1 
NORM2 
ABS_MAX 
ABS_MIN 
NORM1 
NORM2 
ABS_MAX 
ABS_MIN 

Definition at line 7 of file max_clover.cu.

◆ QudaProfileType

Enumerator
QUDA_PROFILE_H2D 

host -> device transfers

QUDA_PROFILE_D2H 

The time in seconds for device -> host transfers

QUDA_PROFILE_INIT 

The time in seconds taken for initiation

QUDA_PROFILE_PREAMBLE 

The time in seconds taken for any preamble

QUDA_PROFILE_COMPUTE 

The time in seconds taken for the actual computation

QUDA_PROFILE_COMMS 

synchronous communication

QUDA_PROFILE_EPILOGUE 

The time in seconds taken for any epilogue

QUDA_PROFILE_FREE 

The time in seconds for freeing resources

QUDA_PROFILE_IO 

time spent on file i/o

QUDA_PROFILE_CHRONO 

time spent on chronology

QUDA_PROFILE_EIGEN 

time spent on host-side Eigen

QUDA_PROFILE_ARPACK 

time spent on host-side ARPACK

QUDA_PROFILE_LOWER_LEVEL 

dummy timer to mark beginning of lower level timers which do not count towrads global time

QUDA_PROFILE_PACK_KERNEL 

face packing kernel

QUDA_PROFILE_DSLASH_KERNEL 

dslash kernel

QUDA_PROFILE_GATHER 

gather (device -> host)

QUDA_PROFILE_SCATTER 

scatter (host -> device)

QUDA_PROFILE_LAUNCH_KERNEL 

cudaLaunchKernel

QUDA_PROFILE_EVENT_RECORD 

cuda event record

QUDA_PROFILE_EVENT_QUERY 

cuda event querying

QUDA_PROFILE_STREAM_WAIT_EVENT 

stream waiting for event completion

QUDA_PROFILE_FUNC_SET_ATTRIBUTE 

set function attribute

QUDA_PROFILE_EVENT_SYNCHRONIZE 

event synchronization

QUDA_PROFILE_STREAM_SYNCHRONIZE 

stream synchronization

QUDA_PROFILE_DEVICE_SYNCHRONIZE 

device synchronization

QUDA_PROFILE_MEMCPY_D2D_ASYNC 

device to device async copy

QUDA_PROFILE_MEMCPY_D2H_ASYNC 

device to host async copy

QUDA_PROFILE_MEMCPY2D_D2H_ASYNC 

device to host 2-d memcpy async copy

QUDA_PROFILE_MEMCPY_H2D_ASYNC 

host to device async copy

QUDA_PROFILE_COMMS_START 

initiating communication

QUDA_PROFILE_COMMS_QUERY 

querying communication

QUDA_PROFILE_CONSTANT 

time spent setting CUDA constant parameters

QUDA_PROFILE_TOTAL 

The total time in seconds for the algorithm. Must be the penultimate type.

QUDA_PROFILE_COUNT 

The total number of timers we have. Must be last enum type.

Definition at line 103 of file timer.h.

Function Documentation

◆ __fast_pow()

template<typename real >
__device__ __host__ real quda::__fast_pow ( real  a,
int  b 
)
inline

Definition at line 15 of file math_helper.cuh.

References pow().

Referenced by constantInv().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ __launch_bounds__()

template<int block_size, typename sumFloat , typename Float , int nSpin, int spinBlockSize, int nColor, int coarseSpin, int nVec, typename Arg >
quda::__launch_bounds__ ( 2 *  block_size)

◆ _norm()

template<typename real >
double quda::_norm ( const CloverField u,
norm_type_  type 
)

Definition at line 40 of file max_clover.cu.

References errorQuda, and quda::CloverField::Ncolor().

Here is the call graph for this function:

◆ abs() [1/4]

template<typename ValueType >
__host__ __device__ ValueType quda::abs ( ValueType  x)
inline

◆ abs() [2/4]

template<typename ValueType >
__host__ __device__ ValueType quda::abs ( const complex< ValueType > &  z)
inline

Returns the magnitude of z.

Definition at line 1060 of file complex_quda.h.

◆ abs() [3/4]

template<>
__host__ __device__ float quda::abs ( const complex< float > &  z)
inline

Definition at line 1065 of file complex_quda.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Here is the call graph for this function:

◆ abs() [4/4]

template<>
__host__ __device__ double quda::abs ( const complex< double > &  z)
inline

Definition at line 1070 of file complex_quda.h.

References quda::complex< double >::imag(), and quda::complex< double >::real().

Referenced by abs().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ acos() [1/2]

template<typename ValueType >
__host__ __device__ ValueType quda::acos ( ValueType  x)
inline

Definition at line 61 of file complex_quda.h.

References acos().

Referenced by exponentiate_iQ(), and setUnitarizeLinksConstants().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ acos() [2/2]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::acos ( const complex< ValueType > &  z)
inline

Definition at line 1274 of file complex_quda.h.

References asin().

Referenced by acos().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ acosh()

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::acosh ( const complex< ValueType > &  z)
inline

Definition at line 1295 of file complex_quda.h.

References log(), and sqrt().

Here is the call graph for this function:

◆ activeTuning()

bool quda::activeTuning ( )

query if tuning is in progress

Returns
tuning in progress?

Definition at line 121 of file tune.cpp.

References tuning.

Referenced by quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply(), qudaLaunchKernel(), and quda::TunableVectorYZ::resizeStep().

Here is the caller graph for this function:

◆ AddCoarseDiagonalCPU()

template<typename Float , int nSpin, int nColor, typename Arg >
void quda::AddCoarseDiagonalCPU ( Arg arg)

Definition at line 1020 of file coarse_op_kernel.cuh.

References nColor, and s.

◆ AddCoarseDiagonalGPU()

template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void quda::AddCoarseDiagonalGPU ( Arg  arg)

Definition at line 1036 of file coarse_op_kernel.cuh.

References nColor, and s.

◆ AddCoarseTmDiagonalCPU()

template<typename Float , int nSpin, int nColor, typename Arg >
void quda::AddCoarseTmDiagonalCPU ( Arg arg)

◆ AddCoarseTmDiagonalGPU()

template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void quda::AddCoarseTmDiagonalGPU ( Arg  arg)

◆ aligned_malloc()

static void* quda::aligned_malloc ( MemAlloc a,
size_t  size 
)
static

Under CUDA 4.0, cudaHostRegister seems to require that both the beginning and end of the buffer be aligned on page boundaries. This local function takes care of the alignment and gets called by pinned_malloc_() and mapped_malloc_()

Definition at line 141 of file malloc.cpp.

References quda::MemAlloc::base_size, errorQuda, quda::MemAlloc::file, quda::MemAlloc::func, quda::MemAlloc::line, and quda::MemAlloc::size.

Referenced by mapped_malloc_(), and pinned_malloc_().

Here is the caller graph for this function:

◆ APEStep()

void quda::APEStep ( GaugeField dataDs,
const GaugeField dataOr,
double  alpha 
)

Apply APE smearing to the gauge field.

Parameters
[out]dataDsOutput smeared field
[in]dataOrInput gauge field
[in]alphasmearing parameter

Definition at line 128 of file gauge_ape.cu.

References errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().

Referenced by performAPEnStep().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ appendMatrixToArray() [1/2]

__device__ void quda::appendMatrixToArray ( const Matrix< complex< double >, 3 > &  mat,
const int  idx,
const int  stride,
double2 *const  array 
)
inline

Definition at line 904 of file quda_matrix.h.

References mat().

Here is the call graph for this function:

◆ appendMatrixToArray() [2/2]

__device__ void quda::appendMatrixToArray ( const Matrix< complex< float >, 3 > &  mat,
const int  idx,
const int  stride,
float2 *const  array 
)
inline

Definition at line 914 of file quda_matrix.h.

References mat().

Here is the call graph for this function:

◆ applyB()

template<typename T >
static void quda::applyB ( d_out[],
const T  d_in[],
int  N 
)
static

Definition at line 37 of file inv_mpcg_quda.cpp.

Referenced by applyThirdTerm().

Here is the caller graph for this function:

◆ applyClover()

template<typename Float , int Ns, int Nc, int Mc, int color_stride, bool dagger, typename Arg >
__device__ __host__ void quda::applyClover ( complex< Float >  out[],
Arg arg,
int  x_cb,
int  src_idx,
int  parity,
int  s,
int  color_block,
int  color_offset 
)
inline

Applies the coarse clover matrix on a given parity and checkerboard site index

Parameters
outThe result out += X * in
XThe coarse clover field
inThe input field
parityThe site parity
x_cbThe checkerboarded site index

Definition at line 280 of file dslash_coarse.cuh.

References conj(), dagger, quda::DslashCoarseArg< Float, yFloat, ghostFloat, coarseSpin, coarseColor, csOrder, gOrder >::dim, quda::Arg< real, Ns, Nc, order >::nParity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ ApplyClover()

void quda::ApplyClover ( ColorSpinorField out,
const ColorSpinorField in,
const CloverField clover,
bool  inverse,
int  parity 
)

Apply clover-matrix field to a color-spinor field.

Parameters
[out]outResult color-spinor field
[in]inInput color-spinor field
[in]cloverClover-matrix field
[in]inverseWhether we are applying the inverse or not
[in]Fieldparity (if color-spinor field is single parity)

Definition at line 604 of file dslash_quda.cu.

References quda::Clover< Float, nSpin, nColor, Arg >::apply(), arg(), checkCudaError, checkLocation, checkPrecision, errorQuda, in, inverse(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), Nstream, out, parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, and streams.

Referenced by quda::DiracClover::Clover(), and quda::DiracCloverPC::CloverInv().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyCoarse()

void quda::ApplyCoarse ( ColorSpinorField out,
const ColorSpinorField inA,
const ColorSpinorField inB,
const GaugeField Y,
const GaugeField X,
double  kappa,
int  parity = QUDA_INVALID_PARITY,
bool  dslash = true,
bool  clover = true,
bool  dagger = false,
const int *  commDim = 0,
QudaPrecision  halo_precision = QUDA_INVALID_PRECISION 
)

Apply the coarse dslash stencil. This single driver accounts for all variations with and without the clover field, with and without dslash, and both single and full parity fields.

Parameters
[out]outThe result vector
[in]inAThe first input vector
[in]inBThe second input vector
[in]YCoarse link field
[in]XCoarse clover field
[in]kappaScaling parameter
[in]parityParity of the field (if single parity)
[in]dslashAre we applying dslash?
[in]cloverAre we applying clover?
[in]daggerApply dagger operator?
[in]commDimWhich dimensions are partitioned?
[in]halo_precisionWhat precision to use for the halos (if QUDA_INVALID_PRECISION, use field precision)

Definition at line 772 of file dslash_coarse.cu.

References quda::DslashCoarsePolicyTune::apply().

Referenced by quda::DiracCoarse::Clover(), quda::DiracCoarse::CloverInv(), quda::DiracCoarse::Dslash(), quda::DiracCoarsePC::Dslash(), quda::DiracCoarse::DslashXpay(), and quda::DiracCoarse::M().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ applyCovDev()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, int mu, typename Arg , typename Vector >
__device__ __host__ void quda::applyCovDev ( Vector out,
Arg arg,
int  coord[nDim],
int  x_cb,
int  parity,
int  idx,
int  thread_dim,
bool &  active 
)
inline

Applies the off-diagonal part of the covariant derivative operator

Parameters
[out]outThe out result field
[in,out]argParameter struct
[in]UThe gauge field
[in]coordSite coordinate
[in]x_cbThe checker-boarded site index. This is a 4-d index only
[in]parityThe site parity
[in]idxThread index (equal to face index for exterior kernels)
[in]thread_dimWhich dimension this thread corresponds to (fused exterior only)

Definition at line 63 of file covDev.cuh.

References conj(), quda::CovDevArg< Float, nColor, reconstruct_ >::ghost, quda::CovDevArg< Float, nColor, reconstruct_ >::in, quda::Arg< real, Ns, Nc, order >::nParity, and quda::CovDevArg< Float, nColor, reconstruct_ >::U.

Here is the call graph for this function:

◆ ApplyCovDev()

void quda::ApplyCovDev ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
int  mu,
int  parity,
bool  dagger,
const int *  comm_override,
TimeProfile profile 
)

Driver for applying the covariant derivative.

out = U * in

where U is the gauge field in a particular direction.

This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters
[out]outThe output result field
[in]inThe input field
[in]UThe gauge field used for the covariant derivative
[in]muDirection of the derivative. For mu > 3 it goes backwards
[in]parityDestination parity
[in]daggerWhether this is for the dagger operator
[in]comm_overrideOverride for which dimensions are partitioned
[in]profileThe TimeProfile used for profiling the dslash

Definition at line 185 of file covDev.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, mu, out, parity, popKernelPackT(), pushKernelPackT(), and quda::ColorSpinorField::V().

Referenced by quda::GaugeCovDev::DslashCD().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyDomainWall4D()

void quda::ApplyDomainWall4D ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
double  a,
double  m_5,
const Complex b_5,
const Complex c_5,
const ColorSpinorField x,
int  parity,
bool  dagger,
const int *  comm_override,
TimeProfile profile 
)

Driver for applying the batched Wilson 4-d stencil to a 5-d vector with 4-d preconditioned data order.

out = D * in

where D is the gauged Wilson linear operator.

If a is non-zero, the operation is given by out = x + a * D in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters
[out]outThe output result field
[in]inThe input field
[in]UThe gauge field used for the operator
[in]aScale factor applied
[in]m_5Wilson mass shift
[in]b_5Mobius coefficient array (length Ls)
[in]c_5Mobius coefficient array (length Ls)
[in]xVector field we accumulate onto to
[in]parityDestination parity
[in]daggerWhether this is for the dagger operator
[in]comm_overrideOverride for which dimensions are partitioned
[in]profileThe TimeProfile used for profiling the dslash

Definition at line 99 of file dslash_domain_wall_4d.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracDomainWall4D::Dslash4(), quda::DiracMobius::Dslash4(), quda::DiracDomainWall4D::Dslash4Xpay(), quda::DiracMobius::Dslash4Xpay(), quda::DiracDomainWall4D::M(), and quda::DiracMobius::M().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyDomainWall5D()

void quda::ApplyDomainWall5D ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
double  a,
double  m_f,
const ColorSpinorField x,
int  parity,
bool  dagger,
const int *  comm_override,
TimeProfile profile 
)

Driver for applying the Domain-wall 5-d stencil to a 5-d vector with 5-d preconditioned data order.

out = D_5 * in

where D_5 is the 5-d wilson linear operator with fifth dimension boundary condition set by the fermion mass.

If a is non-zero, the operation is given by out = x + a * D_5 in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters
[out]outThe output result field
[in]inThe input field
[in]UThe gauge field used for the operator
[in]aScale factor applied (typically -kappa_5)
[in]m_fFermion mass parameter
[in]xVector field we accumulate onto to
[in]parityDestination parity
[in]daggerWhether this is for the dagger operator
[in]comm_overrideOverride for which dimensions are partitioned
[in]profileThe TimeProfile used for profiling the dslash

Definition at line 118 of file dslash_domain_wall_5d.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, popKernelPackT(), pushKernelPackT(), and quda::ColorSpinorField::V().

Referenced by quda::DiracDomainWall::Dslash(), quda::DiracDomainWall::DslashXpay(), and quda::DiracDomainWall::M().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ applyDslash()

template<typename Float , int nDim, int Ns, int Nc, int Mc, int color_stride, int dim_stride, int thread_dir, int thread_dim, bool dagger, DslashType type, typename Arg >
__device__ __host__ void quda::applyDslash ( complex< Float >  out[],
Arg arg,
int  x_cb,
int  src_idx,
int  parity,
int  s_row,
int  color_block,
int  color_offset 
)
inline

Definition at line 94 of file dslash_coarse.cuh.

References conj(), dagger, getCoordsCB(), linkIndexM1(), linkIndexP1(), quda::Arg< real, Ns, Nc, order >::nParity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ ApplyDslash5()

void quda::ApplyDslash5 ( ColorSpinorField out,
const ColorSpinorField in,
const ColorSpinorField x,
double  m_f,
double  m_5,
const Complex b_5,
const Complex c_5,
double  a,
bool  dagger,
Dslash5Type  type 
)

Apply either the domain-wall / mobius Dslash5 operator or the M5 inverse operator. In the current implementation, it is expected that the color-spinor fields are 4-d preconditioned.

Parameters
[out]outResult color-spinor field
[in]inInput color-spinor field
[in]xAuxilary input color-spinor field
[in]m_fFermion mass parameter
[in]m_5Wilson mass shift
[in]b_5Mobius coefficient array (length Ls)
[in]c_5Mobius coefficient array (length Ls)
[in]aScale factor use in xpay operator
[in]daggerWhether this is for the dagger operator
[in]typeType of dslash we are applying

Definition at line 216 of file dslash5_domain_wall.cu.

References quda::Dslash5< Float, nColor, Arg >::apply(), quda::Dslash5< Float, nColor, Arg >::arg, checkLocation, checkPrecision, dagger, errorQuda, in, quda::ColorSpinorField::Ncolor(), Nstream, out, quda::ColorSpinorField::PCType(), QUDA_4D_PC, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, and streams.

Referenced by quda::DiracMobius::Dslash4pre(), quda::DiracMobius::Dslash4preXpay(), quda::DiracDomainWall4D::Dslash5(), quda::DiracMobius::Dslash5(), quda::DiracDomainWall4DPC::Dslash5inv(), quda::DiracMobiusPC::Dslash5inv(), quda::DiracDomainWall4DPC::Dslash5invXpay(), quda::DiracMobiusPC::Dslash5invXpay(), quda::DiracDomainWall4D::Dslash5Xpay(), quda::DiracMobius::Dslash5Xpay(), quda::DiracDomainWall4D::M(), and quda::DiracMobius::M().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyGamma() [1/2]

template<typename Float , int nColor>
void quda::ApplyGamma ( ColorSpinorField out,
const ColorSpinorField in,
int  d 
)

Definition at line 292 of file dslash_quda.cu.

References quda::Gamma< ValueType, basis, dir >::apply(), arg(), Nstream, and streams.

Referenced by ApplyGamma(), and gamma5().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyGamma() [2/2]

template<typename Float >
void quda::ApplyGamma ( ColorSpinorField out,
const ColorSpinorField in,
int  d 
)

◆ applyGaugePhase()

void quda::applyGaugePhase ( GaugeField u)

Apply the staggered phase factor to the gauge field.

Parameters
[in]uThe gauge field to which we apply the staggered phase factors

Definition at line 223 of file gauge_phase.cu.

References errorQuda, quda::GaugeField::exchangeGhost(), quda::LatticeField::GhostExchange(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_GHOST_EXCHANGE_PAD, and QUDA_SINGLE_PRECISION.

Referenced by quda::GaugeField::applyStaggeredPhase(), quda::cpuGaugeField::Gauge_p(), and quda::GaugeField::removeStaggeredPhase().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyImprovedStaggered()

void quda::ApplyImprovedStaggered ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
const GaugeField L,
double  a,
const ColorSpinorField x,
int  parity,
bool  dagger,
const int *  comm_override,
TimeProfile profile 
)

Apply the improved staggered dslash operator to a color-spinor field.

Parameters
[out]outResult color-spinor field
[in]inInput color-spinor field
[in]UGauge-Link (1-link or fat-link)
[in]LLong-Links for asqtad
[in]axpay parameter (set to 0.0 for non-xpay version)
[in]xVector field we accumulate onto to
[in]parityparity parameter
[in]daggerWhether we are applying the dagger or not
[in]improvedwhether to apply the standard-staggered (false) or asqtad (true) operator

Definition at line 181 of file dslash_improved_staggered.cu.

References checkLocation, checkPrecision, comm_dim_partitioned(), dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, quda::ColorSpinorField::V(), and quda::LatticeField::X().

Referenced by quda::DiracImprovedStaggered::Dslash(), quda::DiracImprovedStaggered::DslashXpay(), and quda::DiracImprovedStaggered::M().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ applyLaplace()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, int dir, typename Arg , typename Vector >
__device__ __host__ void quda::applyLaplace ( Vector out,
Arg arg,
int  coord[nDim],
int  x_cb,
int  parity,
int  idx,
int  thread_dim,
bool &  active 
)
inline

Applies the off-diagonal part of the covariant derivative operator

Parameters
[out]outThe out result field
[in,out]argParameter struct
[in]UThe gauge field
[in]coordSite coordinate
[in]x_cbThe checker-boarded site index. This is a 4-d index only
[in]parityThe site parity
[in]idxThread index (equal to face index for exterior kernels)
[in]thread_dimWhich dimension this thread corresponds to (fused exterior only)

Definition at line 69 of file laplace.cuh.

References conj(), quda::LaplaceArg< Float, nColor, reconstruct_ >::ghost, quda::LaplaceArg< Float, nColor, reconstruct_ >::in, linkIndexM1(), linkIndexP1(), quda::Arg< real, Ns, Nc, order >::nParity, and quda::LaplaceArg< Float, nColor, reconstruct_ >::U.

Referenced by laplace().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyLaplace()

void quda::ApplyLaplace ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
int  dir,
double  kappa,
const ColorSpinorField x,
int  parity,
bool  dagger,
const int *  comm_override,
TimeProfile profile 
)

Driver for applying the Laplace stencil.

out = - kappa * A * in

where A is the gauge laplace linear operator.

If x is defined, the operation is given by out = x - kappa * A in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters
[out]outThe output result field
[in]inThe input field
[in]UThe gauge field used for the gauge Laplace
[in]dirDirection of the derivative 0,1,2,3 to omit (-1 is full 4D)
[in]kappaScale factor applied
[in]xVector field we accumulate onto to

Definition at line 188 of file laplace.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, kappa, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::GaugeLaplace::Dslash(), and quda::GaugeLaplace::DslashXpay().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyNdegTwistedMass()

void quda::ApplyNdegTwistedMass ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
double  a,
double  b,
double  c,
const ColorSpinorField x,
int  parity,
bool  dagger,
const int *  comm_override,
TimeProfile profile 
)

Driver for applying the non-degenerate twisted-mass stencil.

out = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1) * x

where D is the gauged Wilson linear operator. The quark fields out, in and x are five dimensional, with the fifth dimension corresponding to the flavor dimension. The convention is that the first 4-d slice (s=0) corresponds to the positive twist and the second slice (s=1) corresponds to the negative twist.

This operator can be applied to both single parity (4d checker-boarded) fields, or to full fields.

Parameters
[out]outThe output result field
[in]inThe input field
[in]UThe gauge field used for the operator
[in]aScale factor applied to Wilson term (typically -kappa)
[in]bChiral twist factor applied (typically 2*mu*kappa)
[in]cFlavor twist factor applied (typically -2*epsilon*kappa)
[in]xVector field we accumulate onto to
[in]parityDestination parity
[in]daggerWhether this is for the dagger operator
[in]comm_overrideOverride for which dimensions are partitioned
[in]profileThe TimeProfile used for profiling the dslash

Definition at line 102 of file dslash_ndeg_twisted_mass.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracTwistedMass::Dslash(), quda::DiracTwistedMass::DslashXpay(), and quda::DiracTwistedMass::M().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyNdegTwistedMassPreconditioned()

void quda::ApplyNdegTwistedMassPreconditioned ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
double  a,
double  b,
double  c,
bool  xpay,
const ColorSpinorField x,
int  parity,
bool  dagger,
bool  asymmetric,
const int *  comm_override,
TimeProfile profile 
)

Driver for applying the preconditioned non-degenerate twisted-mass stencil.

out = a * (1 + i*b*gamma_5*tau_3 + c*tau_1) * D * in + x

where D is the gauged Wilson linear operator. The quark fields out, in and x are five dimensional, with the fifth dimension corresponding to the flavor dimension. The convention is that the first 4-d slice (s=0) corresponds to the positive twist and the second slice (s=1) corresponds to the negative twist.

This operator can (at present) be applied to only single parity (checker-boarded) fields.

For the dagger operator, we generally apply the conjugate transpose operator

out = x + D^ A^{-}

with the additional asymmetric special case, where we apply do not transpose the order of operations

out = A^{-} D^ (no xpay term)

This variant is required when have the asymmetric preconditioned operator and require the preconditioned twist term to remain in between the applications of D. This would be combined with a subsequent non-preconditioned dagger operator, A*x - kappa^2 D, to form the full operator.

Parameters
[out]outThe output result field
[in]inThe input field
[in]UThe gauge field used for the operator
[in]aScale factor applied to Wilson term (typically -kappa^2/(1 + b*b -c*c) )
[in]bChiral twist factor applied (typically -2*mu*kappa)
[in]cFlavor twist factor applied (typically 2*epsilon*kappa)
[in]xpayWhether to do xpay or not
[in]xVector field we accumulate onto to
[in]parityDestination parity
[in]daggerWhether this is for the dagger operator
[in]asymmetricWhether this is for the asymmetric preconditioned dagger operator (a*(1 - i*b*gamma_5) * D^dagger * in)
[in]comm_overrideOverride for which dimensions are partitioned
[in]profileThe TimeProfile used for profiling the dslash

Definition at line 146 of file dslash_ndeg_twisted_mass_preconditioned.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, popKernelPackT(), pushKernelPackT(), quda::ColorSpinorField::V(), and quda::blas::xpay().

Referenced by quda::DiracTwistedMassPC::Dslash(), and quda::DiracTwistedMassPC::DslashXpay().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ applyStaggered()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg , typename Vector >
__device__ __host__ void quda::applyStaggered ( Vector out,
Arg arg,
int  coord[nDim],
int  x_cb,
int  parity,
int  idx,
int  thread_dim,
bool &  active 
)
inline

◆ ApplyStaggered()

void quda::ApplyStaggered ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
double  a,
const ColorSpinorField x,
int  parity,
bool  dagger,
const int *  comm_override,
TimeProfile profile 
)

Apply the staggered dslash operator to a color-spinor field.

Parameters
[out]outResult color-spinor field
[in]inInput color-spinor field
[in]UGauge-Link (1-link or fat-link)
[in]axpay parameter (set to 0.0 for non-xpay version)
[in]xVector field we accumulate onto to
[in]parityparity parameter
[in]daggerWhether we are applying the dagger or not
[in]improvedwhether to apply the standard-staggered (false) or asqtad (true) operator

Definition at line 112 of file dslash_staggered.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracStaggered::Dslash(), quda::DiracStaggered::DslashXpay(), and quda::DiracStaggered::M().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ applyT()

template<typename T >
static void quda::applyT ( d_out[],
const T  d_in[],
const T  gamma[],
const T  rho[],
int  N 
)
static

Definition at line 18 of file inv_mpcg_quda.cpp.

Referenced by applyThirdTerm().

Here is the caller graph for this function:

◆ applyThirdTerm()

template<typename T >
static void quda::applyThirdTerm ( d_out[],
const T  d_in[],
int  k,
int  j,
int  s,
const T  gamma[],
const T  rho[],
const T  gamma_kprev[],
const T  rho_kprev[] 
)
static

Definition at line 57 of file inv_mpcg_quda.cpp.

References applyB(), applyT(), s, and zero().

Referenced by computeCoeffs().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyTwistClover()

void quda::ApplyTwistClover ( ColorSpinorField out,
const ColorSpinorField in,
const CloverField clover,
double  kappa,
double  mu,
double  epsilon,
int  parity,
int  dagger,
QudaTwistGamma5Type  twist 
)

Apply twisted clover-matrix field to a color-spinor field.

Parameters
[out]outResult color-spinor field
[in]inInput color-spinor field
[in]cloverClover-matrix field
[in]kappakappa parameter
[in]mumu parameter
[in]epsilonepsilon parameter
[in]Fieldparity (if color-spinor field is single parity)
[in]daggerWhether we are applying the dagger or not
[in]twistThe type of kernel we are doing if (twist == QUDA_TWIST_GAMMA5_DIRECT) apply (Clover + i*a*gamma_5) to the input spinor else if (twist == QUDA_TWIST_GAMMA5_INVERSE) apply (Clover + i*a*gamma_5)/(Clover^2 + a^2) to the input spinor

Definition at line 769 of file dslash_quda.cu.

References quda::TwistClover< Float, nSpin, nColor, Arg >::apply(), arg(), checkCudaError, checkLocation, checkPrecision, dagger, epsilon, errorQuda, in, kappa, mu, quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), Nstream, out, parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, QUDA_TWIST_GAMMA5_DIRECT, and streams.

Referenced by quda::DiracTwistedClover::twistedCloverApply().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyTwistedClover()

void quda::ApplyTwistedClover ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
const CloverField C,
double  a,
double  b,
const ColorSpinorField x,
int  parity,
bool  dagger,
const int *  comm_override,
TimeProfile profile 
)

Driver for applying the twisted-clover stencil.

out = a * D * in + (C + i*b*gamma_5) * x

where D is the gauged Wilson linear operator, and C is the clover field.

This operator can be applied to both single parity (4d checker-boarded) fields, or to full fields.

Parameters
[out]outThe output result field
[in]inThe input field
[in]UThe gauge field used for the operator
[in]CThe clover field used for the operator
[in]aScale factor applied to Wilson term (typically -kappa)
[in]bChiral twist factor applied (typically 2*mu*kappa)
[in]xVector field we accumulate onto to
[in]parityDestination parity
[in]daggerWhether this is for the dagger operator
[in]comm_overrideOverride for which dimensions are partitioned
[in]profileThe TimeProfile used for profiling the dslash

Definition at line 122 of file dslash_twisted_clover.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracTwistedClover::DslashXpay(), and quda::DiracTwistedClover::M().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyTwistedCloverPreconditioned()

void quda::ApplyTwistedCloverPreconditioned ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
const CloverField C,
double  a,
double  b,
bool  xpay,
const ColorSpinorField x,
int  parity,
bool  dagger,
const int *  comm_override,
TimeProfile profile 
)

Driver for applying the preconditioned twisted-clover stencil.

out = a * (C + i*b*gamma_5)^{-1} * D * in + x = a * C^{-2} (C - i*b*gamma_5) * D * in + x = A^{-1} * D * in + x

where D is the gauged Wilson linear operator and C is the clover field. This operator can (at present) be applied to only single parity (checker-boarded) fields. When the dagger operator is requested, we do not transpose the order of operations, e.g.

out = A^{-} D^ (no xpay term)

Although not a conjugate transpose of the regular operator, this variant is used to enable kernel fusion between the application of D and the subsequent application of A, e.g., in the symmetric dagger operator we need to apply

M = (1 - kappa^2 D^{} A^{-} D{^} A^{-} )

and since cannot fuse D{^} A^{-}, we instead fused A^{-} D{^}.

Parameters
[out]outThe output result field
[in]inThe input field
[in]UThe gauge field used for the operator
[in]CThe clover field used for the operator
[in]aScale factor applied to Wilson term ( typically 1 / (1 + b*b) or kappa^2 / (1 + b*b) )
[in]bTwist factor applied (typically -2*kappa*mu)
[in]xpayWhether to do xpay or not
[in]xVector field we accumulate onto to when xpay is true
[in]parityDestination parity
[in]daggerWhether this is for the dagger operator
[in]comm_overrideOverride for which dimensions are partitioned
[in]profileThe TimeProfile used for profiling the dslash

Definition at line 161 of file dslash_twisted_clover_preconditioned.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, quda::ColorSpinorField::V(), and quda::blas::xpay().

Referenced by quda::DiracTwistedCloverPC::Dslash(), and quda::DiracTwistedCloverPC::DslashXpay().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyTwistedMass()

void quda::ApplyTwistedMass ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
double  a,
double  b,
const ColorSpinorField x,
int  parity,
bool  dagger,
const int *  comm_override,
TimeProfile profile 
)

Driver for applying the twisted-mass stencil.

out = a * D * in + (1 + i*b*gamma_5) * x

where D is the gauged Wilson linear operator.

This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters
[out]outThe output result field
[in]inThe input field
[in]UThe gauge field used for the operator
[in]aScale factor applied to Wilson term (typically -kappa)
[in]bTwist factor applied (typically 2*mu*kappa)
[in]xVector field we accumulate onto to
[in]parityDestination parity
[in]daggerWhether this is for the dagger operator
[in]comm_overrideOverride for which dimensions are partitioned
[in]profileThe TimeProfile used for profiling the dslash

Definition at line 103 of file dslash_twisted_mass.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracTwistedMass::Dslash(), quda::DiracTwistedMass::DslashXpay(), and quda::DiracTwistedMass::M().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyTwistedMassPreconditioned()

void quda::ApplyTwistedMassPreconditioned ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
double  a,
double  b,
bool  xpay,
const ColorSpinorField x,
int  parity,
bool  dagger,
bool  asymmetric,
const int *  comm_override,
TimeProfile profile 
)

Driver for applying the preconditioned twisted-mass stencil.

out = a*(1 + i*b*gamma_5) * D * in + x

where D is the gauged Wilson linear operator. This operator can (at present) be applied to only single parity (checker-boarded) fields. For the dagger operator, we generally apply the conjugate transpose operator

out = x + D^ A^{-}

with the additional asymmetric special case, where we apply do not transpose the order of operations

out = A^{-} D^ (no xpay term)

This variant is required when have the asymmetric preconditioned operator and require the preconditioned twist term to remain in between the applications of D. This would be combined with a subsequent non-preconditioned dagger operator, A*x - kappa^2 D, to form the full operator.

Parameters
[out]outThe output result field
[in]inThe input field
[in]UThe gauge field used for the operator
[in]aScale factor applied to Wilson term ( typically kappa^2 / (1 + b*b) )
[in]bTwist factor applied (typically -2*kappa*mu)
[in]xpayWhether to do xpay or not
[in]xVector field we accumulate onto to when xpay is true
[in]parityDestination parity
[in]daggerWhether this is for the dagger operator
[in]asymmetricWhether this is for the asymmetric preconditioned dagger operator (a*(1 - i*b*gamma_5) * D^dagger * in)
[in]comm_overrideOverride for which dimensions are partitioned
[in]profileThe TimeProfile used for profiling the dslash

Definition at line 116 of file dslash_twisted_mass_preconditioned.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, popKernelPackT(), pushKernelPackT(), quda::ColorSpinorField::V(), and quda::blas::xpay().

Referenced by quda::DiracTwistedMassPC::Dslash(), and quda::DiracTwistedMassPC::DslashXpay().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyTwistGamma()

void quda::ApplyTwistGamma ( ColorSpinorField out,
const ColorSpinorField in,
int  d,
double  kappa,
double  mu,
double  epsilon,
int  dagger,
QudaTwistGamma5Type  type 
)

Apply the twisted-mass gamma operator to a color-spinor field.

Parameters
[out]outResult color-spinor field
[in]inInput color-spinor field
[in]dWhich gamma matrix we are applying (C counting, so gamma_5 has d=4)
[in]kappakappa parameter
[in]mumu parameter
[in]epsilonepsilon parameter
[in]daggerWhether we are applying the dagger or not
[in]twistThe type of kernel we are doing

Definition at line 416 of file dslash_quda.cu.

References quda::TwistGamma< Float, nColor, Arg >::apply(), arg(), checkCudaError, checkLocation, checkPrecision, dagger, epsilon, errorQuda, in, kappa, mu, quda::ColorSpinorField::Ncolor(), Nstream, out, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, and streams.

Referenced by quda::DiracTwistedMass::twistedApply().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ applyU()

void quda::applyU ( GaugeField force,
GaugeField U 
)

Left multiply the force field by the gauge field

force = U * force

Parameters
forceForce field
UGauge field

Definition at line 446 of file momentum.cu.

References checkCudaError, errorQuda, quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_FLOAT2_GAUGE_ORDER.

Referenced by computeStaggeredForceQuda(), and updateMomentum().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyWilson()

void quda::ApplyWilson ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
double  kappa,
const ColorSpinorField x,
int  parity,
bool  dagger,
const int *  comm_override,
TimeProfile profile 
)

Driver for applying the Wilson stencil.

out = D * in

where D is the gauged Wilson linear operator.

If kappa is non-zero, the operation is given by out = x + kappa * D in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters
[out]outThe output result field
[in]inThe input field
[in]UThe gauge field used for the operator
[in]kappaScale factor applied
[in]xVector field we accumulate onto to
[in]parityDestination parity
[in]daggerWhether this is for the dagger operator
[in]comm_overrideOverride for which dimensions are partitioned
[in]profileThe TimeProfile used for profiling the dslash

Definition at line 86 of file dslash_wilson.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracWilson::Dslash(), quda::DiracWilson::DslashXpay(), and quda::DiracWilson::M().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ applyWilson()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg , typename Vector >
__device__ __host__ void quda::applyWilson ( Vector out,
Arg arg,
int  coord[nDim],
int  x_cb,
int  s,
int  parity,
int  idx,
int  thread_dim,
bool &  active 
)
inline

Applies the off-diagonal part of the Wilson operator.

Parameters
[out]outThe out result field
[in,out]argParameter struct
[in]coordSite coordinate
[in]x_cbThe checker-boarded site index (at present this is a 4-d index only)
[in]sThe fifth-dimension index
[in]paritySite parity
[in]idxThread index (equal to face index for exterior kernels)
[in]thread_dimWhich dimension this thread corresponds to (fused exterior only)

Definition at line 62 of file dslash_wilson.cuh.

References conj(), quda::DslashArg< Float >::dagger, EXTERIOR_KERNEL_ALL, quda::WilsonArg< Float, nColor, reconstruct_ >::ghost, quda::WilsonArg< Float, nColor, reconstruct_ >::in, quda::DslashArg< Float >::kernel_type, nColor, quda::DslashArg< Float >::nParity, quda::DslashArg< Float >::parity, quda::WilsonArg< Float, nColor, reconstruct_ >::reconstruct, and quda::WilsonArg< Float, nColor, reconstruct_ >::U.

Here is the call graph for this function:

◆ ApplyWilsonClover()

void quda::ApplyWilsonClover ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
const CloverField A,
double  kappa,
const ColorSpinorField x,
int  parity,
bool  dagger,
const int *  comm_override,
TimeProfile profile 
)

Driver for applying the Wilson-clover stencil.

out = A * x + kappa * D * in

where D is the gauged Wilson linear operator.

This operator can be applied to both single parity (checker-boarded) fields, or to full fields.

Parameters
[out]outThe output result field
[in]inInput field that D is applied to
[in]xInput field that A is applied to
[in]UThe gauge field used for the operator
[in]AThe clover field used for the operator
[in]kappaScale factor applied
[in]xVector field we accumulate onto to
[in]parityDestination parity
[in]daggerWhether this is for the dagger operator
[in]comm_overrideOverride for which dimensions are partitioned
[in]profileThe TimeProfile used for profiling the dslash

Definition at line 121 of file dslash_wilson_clover.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracClover::DslashXpay(), and quda::DiracClover::M().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ApplyWilsonCloverPreconditioned()

void quda::ApplyWilsonCloverPreconditioned ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
const CloverField A,
double  kappa,
const ColorSpinorField x,
int  parity,
bool  dagger,
const int *  comm_override,
TimeProfile profile 
)

Driver for applying the preconditioned Wilson-clover stencil.

out = A^{-1} * D * in + x

where D is the gauged Wilson linear operator and A is the clover field. This operator can (at present) be applied to only single parity (checker-boarded) fields. When the dagger operator is requested, we do not transpose the order of operations, e.g.

out = A^{-} D^ (no xpay term)

Although not a conjugate transpose of the regular operator, this variant is used to enable kernel fusion between the application of D and the subsequent application of A, e.g., in the symmetric dagger operator we need to apply

M = (1 - kappa^2 D^{} A^{-1} D{^} A^{-1} )

and since cannot fuse D{^} A^{-}, we instead fused A^{-} D{^}.

If kappa is non-zero, the operation is given by out = x + kappa * A^{-1} D in. This operator can (at present) be applied to only single parity (checker-boarded) fields.

Parameters
[out]outThe output result field
[in]inThe input field
[in]UThe gauge field used for the operator
[in]AThe clover field used for the operator
[in]kappaScale factor applied
[in]xVector field we accumulate onto to
[in]parityDestination parity
[in]daggerWhether this is for the dagger operator
[in]comm_overrideOverride for which dimensions are partitioned
[in]profileThe TimeProfile used for profiling the dslash

Definition at line 158 of file dslash_wilson_clover_preconditioned.cu.

References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().

Referenced by quda::DiracCloverPC::Dslash(), and quda::DiracCloverPC::DslashXpay().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ applyWilsonTM()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, int twist, KernelType kernel_type, typename Arg , typename Vector >
__device__ __host__ void quda::applyWilsonTM ( Vector out,
Arg arg,
int  coord[nDim],
int  x_cb,
int  s,
int  parity,
int  idx,
int  thread_dim,
bool &  active 
)
inline

Applies the off-diagonal part of the Wilson operator premultiplied by twist rotation - this is required for applying the symmetric preconditioned twisted-mass dagger operator.

Parameters
[out]outThe out result field
[in,out]argParameter struct
[in]coordSite coordinate
[in]x_cbThe checker-boarded site index
[in]sFifth-dimension index
[in]paritySite parity
[in]idxThread index (equal to face index for exterior kernels)
[in]thread_dimWhich dimension this thread corresponds to (fused exterior only)

Definition at line 52 of file dslash_twisted_mass_preconditioned.cuh.

References quda::TwistedMassArg< Float, nColor, reconstruct_ >::asymmetric, conj(), quda::DslashArg< Float >::dagger, EXTERIOR_KERNEL_ALL, getNeighborIndexCB(), quda::WilsonArg< Float, nColor, reconstruct_ >::ghost, quda::WilsonArg< Float, nColor, reconstruct_ >::in, quda::DslashArg< Float >::kernel_type, nColor, quda::DslashArg< Float >::nParity, quda::DslashArg< Float >::parity, quda::WilsonArg< Float, nColor, reconstruct_ >::reconstruct, quda::WilsonArg< Float, nColor, reconstruct_ >::U, and quda::DslashArg< Float >::xpay.

Here is the call graph for this function:

◆ arg() [1/3]

template<typename ValueType >
__host__ __device__ ValueType quda::arg ( const complex< ValueType > &  z)
inline

Returns the phase angle of z.

Definition at line 1076 of file complex_quda.h.

References atan2().

Referenced by quda::DomainWall5D< Float, nDim, nColor, Arg >::apply(), quda::TwistedMass< Float, nDim, nColor, Arg >::apply(), quda::Staggered< Float, nDim, nColor, Arg >::apply(), quda::TwistedClover< Float, nDim, nColor, Arg >::apply(), quda::WilsonClover< Float, nDim, nColor, Arg >::apply(), quda::NdegTwistedMass< Float, nDim, nColor, Arg >::apply(), quda::TwistedCloverPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::WilsonCloverPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::DomainWall4D< Float, nDim, nColor, Arg >::apply(), quda::TwistedMassPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::Wilson< Float, nDim, nColor, Arg >::apply(), quda::Laplace< Float, nDim, nColor, Arg >::apply(), quda::NdegTwistedMassPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::CopyGauge< FloatOut, FloatIn, length, Arg >::apply(), quda::SpinorNoise< real, Ns, Nc, type, Arg >::apply(), quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), quda::GaugeGauss< Float, Arg >::apply(), quda::KSForceComplete< Float, Oprod, Gauge, Mom >::apply(), quda::CopyGaugeEx< FloatOut, FloatIn, length, OutOrder, InOrder >::apply(), quda::ShiftColorSpinorField< Output, Input >::apply(), quda::WuppertalSmearing< Float, Ns, Nc, Arg >::apply(), quda::GaugeOvrImpSTOUT< Float, Arg >::apply(), quda::CopyColorSpinor< Ns, Arg >::apply(), quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::apply(), quda::Pack< Float, nColor, spin_project >::apply(), quda::ExtractGhost< nDim, Arg >::apply(), quda::ExtractGhostEx< Float, length, nDim, dim, Order >::apply(), quda::CopyColorSpinor< 4, Arg >::apply(), quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >::apply(), quda::CopySpinorEx< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis, extend >::apply(), quda::Gamma< ValueType, basis, dir >::apply(), quda::KSLongLinkForce< Float, Result, Oprod, Gauge >::apply(), quda::TwistGamma< Float, nColor, Arg >::apply(), quda::ProjectSU3< Float, G >::apply(), quda::Clover< Float, nSpin, nColor, Arg >::apply(), quda::TwistClover< Float, nSpin, nColor, Arg >::apply(), ApplyClover(), ApplyGamma(), ApplyTwistClover(), ApplyTwistGamma(), CalculateYhatCPU(), CalculateYhatGPU(), Checksum(), cloverDerivativeKernel(), cloverGPU(), cloverInvert(), cloverInvertKernel(), coarseDslash(), coarseDslashKernel(), quda::coeff_type< real, true, Arg >::coeff(), completeKSForce(), completeKSForceCPU(), completeKSForceKernel(), computeAPEStep(), ComputeAVCPU(), ComputeAVGPU(), ComputeCoarseCloverCPU(), ComputeCoarseCloverGPU(), computeFmunuCPU(), computeFmunuKernel(), computeKSLongLinkForce(), computeKSLongLinkForceCPU(), computeKSLongLinkForceKernel(), computeMomAction(), computeOvrImpSTOUTStep(), computePlaq(), computeSTOUTStep(), ComputeTMAVGPU(), ComputeTMCAVCPU(), ComputeTMCAVGPU(), ComputeUVCPU(), ComputeUVGPU(), computeVUV(), ComputeVUVGPU(), computeWupperalStep(), ComputeYReverseCPU(), ComputeYReverseGPU(), ConvertCPU(), ConvertGPU(), coordsFromFaceIndex(), copyGauge(), copyGaugeEx(), copyGaugeExKernel(), copyInterior(), copyInteriorKernel(), copySpinorEx(), covDev(), covDevGPU(), dimFromFaceIndex(), domainWall4D(), quda::DomainWall4DApply< Float, nColor, recon >::DomainWall4DApply(), domainWall4DCPU(), domainWall4DGPU(), domainWall5D(), quda::DomainWall5DApply< Float, nColor, recon >::DomainWall5DApply(), domainWall5DCPU(), domainWall5DGPU(), dslash5CPU(), dslash5GPU(), dslash5inv(), dslash5invGPU(), quda::ExtractGhost< nDim, Arg >::ExtractGhost(), extractGhost(), extractGhostEx(), extractGhostExKernel(), forceRecord(), quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg(), genericCopyColorSpinor(), GenericPackGhost(), GenericPackGhostKernel(), genGauss(), getCoords(), quda::gauge::Reconstruct< 13, Float, ghostExchange_, stag_phase >::getPhase(), quda::gauge::Reconstruct< 9, Float, ghostExchange_, stag_phase >::getPhase(), quda::ImprovedStaggeredApply< Float, nColor, recon_l >::ImprovedStaggeredApply(), indexFromFaceIndex(), quda::dslash::issuePack(), isUnitary(), laplace(), quda::LaplaceApply< Float, nColor, recon >::LaplaceApply(), laplaceGPU(), quda::StaggeredLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), launch_kernel_random(), log(), quda::blas::multiBlasKernel(), quda::blas::multiReduceKernel(), ndegTwistedMass(), quda::NdegTwistedMassApply< Float, nColor, recon >::NdegTwistedMassApply(), ndegTwistedMassCPU(), ndegTwistedMassGPU(), quda::NdegTwistedMassPreconditionedApply< Float, nColor, recon >::NdegTwistedMassPreconditionedApply(), ndegTwistedMassPreconditionedCPU(), ndegTwistedMassPreconditionedGPU(), operator<<(), OvrImpSTOUTStep(), pack(), packGhost(), packKernel(), packShmemKernel(), packStaggered(), packStaggeredKernel(), packStaggeredShmemKernel(), polarSu3(), projectSU3(), qChargeComputeKernel(), reduce(), quda::blas::reduceKernel(), RescaleYCPU(), RescaleYGPU(), setUnitarizeLinksConstants(), shiftColorSpinorField(), sigmaOprodKernel(), spinorNoise(), SpinorNoiseCPU(), SpinorNoiseGPU(), sqrt(), staggered(), quda::StaggeredApply< Float, nColor, recon_u >::StaggeredApply(), staggeredGPU(), StaggeredPhase(), twistCloverGPU(), twistedClover(), quda::TwistedCloverApply< Float, nColor, recon >::TwistedCloverApply(), quda::TwistedCloverPreconditionedApply< Float, nColor, recon >::TwistedCloverPreconditionedApply(), twistedCloverPreconditionedCPU(), twistedCloverPreconditionedGPU(), twistedMass(), quda::TwistedMassApply< Float, nColor, recon >::TwistedMassApply(), twistedMassCPU(), twistedMassGPU(), quda::TwistedMassPreconditionedApply< Float, nColor, recon >::TwistedMassPreconditionedApply(), twistedMassPreconditionedCPU(), twistedMassPreconditionedGPU(), updateMomentum(), wilson(), quda::WilsonApply< Float, nColor, recon >::WilsonApply(), wilsonClover(), quda::WilsonCloverApply< Float, nColor, recon >::WilsonCloverApply(), wilsonCloverCPU(), wilsonCloverGPU(), quda::WilsonCloverPreconditionedApply< Float, nColor, recon >::WilsonCloverPreconditionedApply(), wilsonCloverPreconditionedCPU(), wilsonCloverPreconditionedGPU(), wilsonCPU(), wilsonGPU(), wuppertalStep(), and wuppertalStepCPU().

Here is the call graph for this function:

◆ arg() [2/3]

template<>
__host__ __device__ float quda::arg ( const complex< float > &  z)
inline

Definition at line 1081 of file complex_quda.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Here is the call graph for this function:

◆ arg() [3/3]

template<>
__host__ __device__ double quda::arg ( const complex< double > &  z)
inline

Definition at line 1086 of file complex_quda.h.

References atan2(), quda::complex< double >::imag(), and quda::complex< double >::real().

Here is the call graph for this function:

◆ arpack_solve()

void quda::arpack_solve ( std::vector< ColorSpinorField *> &  h_evecs,
std::vector< Complex > &  h_evals,
const DiracMatrix mat,
QudaEigParam eig_param,
TimeProfile profile 
)

The QUDA interface function. One passes two allocated arrays to hold the the eigenmode data, the problem matrix, the arpack parameters defining what problem is to be solves, and a container for QUDA data structure types.

arpack_solve()

Parameters
[out]h_evecsHost fields where the e-vectors will be copied to
[out]h_evalsWhere the e-values will be copied to
[in]matAn explicit construction of the problem matrix.
[in]paramParameter container defining the how the matrix is to be solved.
[in]eig_paramParameter structure for all QUDA eigensolvers
[in,out]profileTimeProfile instance used for profiling

Definition at line 507 of file quda_arpack_interface.cpp.

References errorQuda.

Referenced by eigensolveQuda().

Here is the caller graph for this function:

◆ asin() [1/2]

template<typename ValueType >
__host__ __device__ ValueType quda::asin ( ValueType  x)
inline

Definition at line 66 of file complex_quda.h.

References asin().

Referenced by acos().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ asin() [2/2]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::asin ( const complex< ValueType > &  z)
inline

Definition at line 1281 of file complex_quda.h.

References asinh().

Referenced by asin().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ asinh()

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::asinh ( const complex< ValueType > &  z)
inline

Definition at line 1320 of file complex_quda.h.

References log(), and sqrt().

Referenced by asin().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ assertAllMemFree()

void quda::assertAllMemFree ( )

Definition at line 384 of file malloc.cpp.

References DEVICE, DEVICE_PINNED, HOST, MAPPED, PINNED, print_alloc(), print_alloc_header(), printfQuda, and warningQuda.

Referenced by endQuda().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ atan() [1/2]

template<typename ValueType >
__host__ __device__ ValueType quda::atan ( ValueType  x)
inline

Definition at line 71 of file complex_quda.h.

References atan().

Here is the call graph for this function:

◆ atan() [2/2]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::atan ( const complex< ValueType > &  z)
inline

Definition at line 1288 of file complex_quda.h.

References atanh().

Referenced by atan().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ atan2()

template<typename ValueType >
__host__ __device__ ValueType quda::atan2 ( ValueType  x,
ValueType  y 
)
inline

Definition at line 76 of file complex_quda.h.

Referenced by arg(), quda::Trig< isFixed, T >::Atan2(), atanh(), link_sanity_check_internal_8(), new_save_half(), and su3Construct8().

Here is the caller graph for this function:

◆ atanh() [1/2]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::atanh ( const complex< ValueType > &  z)
inline

Definition at line 1326 of file complex_quda.h.

References atan2(), and log().

Referenced by atan().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ atanh() [2/2]

template<typename ValueType >
__host__ __device__ complex<float> quda::atanh ( const complex< float > &  z)
inline

Definition at line 1344 of file complex_quda.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Here is the call graph for this function:

◆ ax()

void quda::ax ( const double &  a,
GaugeField u 
)

Scale the gauge field by the scalar a.

Parameters
[in]ascalar multiplier
[in]uThe gauge field we want to multiply

Definition at line 349 of file gauge_field.cpp.

References quda::blas::ax(), colorSpinorParam(), and quda::ColorSpinorField::Create().

Referenced by quda::MG::buildFreeVectors(), computeHISQForceQuda(), dslashReference_5th(), dslashReference_5th_inv(), quda::cpuGaugeField::Gauge_p(), quda::MG::generateNullVectors(), main(), and mdslashReference_5th_inv().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ axpy()

template<typename real , typename Link >
__device__ void quda::axpy ( real  a,
const real *  x,
Link &  y 
)
inline

◆ backSubs()

void quda::backSubs ( const Complex alpha,
Complex **const  beta,
const double *  gamma,
Complex delta,
int  n 
)

Definition at line 135 of file inv_gcr_quda.cpp.

Referenced by updateSolution().

Here is the caller graph for this function:

◆ block_idx()

template<typename T >
__device__ int quda::block_idx ( const T &  swizzle)
inline

Swizzler for reordering the (x) thread block indices - use on conjunction with swizzle-factor autotuning to find the optimum swizzle factor. Specfically, the thread block id is remapped by transposing its coordinates: if the original order can be parametrized by.

blockIdx.x = j * swizzle + i,

then the new order is

block_idx = i * (gridDim.x / swizzle) + j

We need to factor out any remainder and leave this in original ordering.

Parameters
[in]swizzleSwizzle factor to be applied
Returns
Swizzled block index

Definition at line 834 of file index_helper.cuh.

◆ blockOrthoCPU()

template<typename sumFloat , typename Float , int nSpin, int spinBlockSize, int nColor, int coarseSpin, int nVec, typename Arg >
void quda::blockOrthoCPU ( Arg arg)

◆ BlockOrthogonalize()

void quda::BlockOrthogonalize ( ColorSpinorField V,
const std::vector< ColorSpinorField *> &  B,
const int *  fine_to_coarse,
const int *  coarse_to_fine,
const int *  geo_bs,
const int  spin_bs,
const int  n_block_ortho 
)

Block orthogonnalize the matrix field, where the blocks are defined by lookup tables that map the fine grid points to the coarse grid points, and similarly for the spin degrees of freedom.

Parameters
[in,out]VMatrix field to be orthgonalized
[in]Binput vectors
[in]geo_bsGeometric block size
[in]fine_to_coarseFine-to-coarse lookup table (linear indices)
[in]coarse_to_fineCoarse-to-fine lookup table (linear indices)
[in]spin_bsSpin block size
[in]n_block_orthoNumber of times to Gram-Schmidt

Definition at line 317 of file block_orthogonalize.cu.

References errorQuda, n_block_ortho, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, and V.

Referenced by quda::Transfer::reset(), and quda::Transfer::setTransferGPU().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ broadcastTuneCache()

static void quda::broadcastTuneCache ( )
static

Distribute the tunecache from node 0 to all other nodes.

Definition at line 290 of file tune.cpp.

References comm_broadcast(), comm_rank(), deserializeTuneCache(), serializeTuneCache(), and size.

Referenced by loadTuneCache(), and tuneLaunch().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ c2d() [1/2]

__host__ __device__ double quda::c2d ( char  a)
inline

Definition at line 39 of file convert.h.

Referenced by copyFloatN().

Here is the caller graph for this function:

◆ c2d() [2/2]

__host__ __device__ double quda::c2d ( char  a,
double  c 
)
inline

Definition at line 56 of file convert.h.

◆ c2f() [1/2]

__host__ __device__ float quda::c2f ( char  a)
inline

Definition at line 38 of file convert.h.

Referenced by copy(), copy_and_scale(), and copyFloatN().

Here is the caller graph for this function:

◆ c2f() [2/2]

__host__ __device__ float quda::c2f ( char  a,
float  c 
)
inline

Definition at line 52 of file convert.h.

◆ calculateY()

template<bool from_coarse, typename Float , int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename F , typename Ftmp , typename Vt , typename coarseGauge , typename coarseGaugeAtomic , typename fineGauge , typename fineClover >
void quda::calculateY ( coarseGauge &  Y,
coarseGauge &  X,
coarseGaugeAtomic &  Y_atomic,
coarseGaugeAtomic &  X_atomic,
Ftmp &  UV,
F &  AV,
Vt &  V,
fineGauge &  G,
fineClover &  C,
fineClover &  Cinv,
GaugeField Y_,
GaugeField X_,
GaugeField Y_atomic_,
GaugeField X_atomic_,
ColorSpinorField uv,
ColorSpinorField av,
const ColorSpinorField v,
double  kappa,
double  mu,
double  mu_factor,
QudaDiracType  dirac,
QudaMatPCType  matpc,
bool  need_bidirectional,
const int *  fine_to_coarse,
const int *  coarse_to_fine 
)

Calculate the coarse-link field, including the coarse clover field.

Parameters
Y[out]Coarse link field accessor
X[out]Coarse clover field accessor
UV[out]Temporary accessor used to store fine link field * null space vectors
AV[out]Temporary accessor use to store fine clover inverse * null space vectors (only applicable when fine-grid operator is the preconditioned clover operator else in general this just aliases V
V[in]Packed null-space vector accessor
G[in]Fine grid link / gauge field accessor
C[in]Fine grid clover field accessor
Cinv[in]Fine grid clover inverse field accessor
Y_[out]Coarse link field
X_[out]Coarse clover field
X_[out]Coarse clover inverese field (used as temporary here)
v[in]Packed null-space vectors
kappa[in]Kappa parameter
mu[in]Twisted-mass parameter
matpc[in]The type of preconditioning of the source fine-grid operator
need_bidirectional[in]If we need to force bi-directional build or not. Required if some previous level was preconditioned, even if this one isn't

Definition at line 869 of file coarse_op.cuh.

References abs(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::arg, quda::LatticeField::bufferIndex, checkLocation, COMPUTE_AV, COMPUTE_CLOVER_INV_MAX, COMPUTE_COARSE_CLOVER, COMPUTE_CONVERT, COMPUTE_DIAGONAL, COMPUTE_RESCALE, COMPUTE_REVERSE_Y, COMPUTE_TMAV, COMPUTE_TMCAV, COMPUTE_TMDIAGONAL, COMPUTE_TWISTED_CLOVER_INV_MAX, COMPUTE_UV, COMPUTE_VUV, errorQuda, quda::ColorSpinorField::exchangeGhost(), getVerbosity(), quda::ColorSpinorField::Ghost(), quda::colorspinor::FieldOrderCB< Float, nSpin, nColor, nVec, order, storeFloat, ghostFloat, disable_ghost, block_float, use_tex >::norm2(), quda::LatticeField::Precision(), printfQuda, QUDA_BACKWARDS, QUDA_CLOVER_DIRAC, QUDA_CLOVERPC_DIRAC, QUDA_COARSE_DIRAC, QUDA_COARSEPC_DIRAC, QUDA_CUDA_FIELD_LOCATION, QUDA_DEBUG_VERBOSE, QUDA_FORWARDS, QUDA_HALF_PRECISION, QUDA_INVALID_PARITY, QUDA_MATPC_EVEN_EVEN, QUDA_MATPC_EVEN_EVEN_ASYMMETRIC, QUDA_MATPC_ODD_ODD, QUDA_MATPC_ODD_ODD_ASYMMETRIC, QUDA_MAX_DIM, QUDA_TWISTED_CLOVER_DIRAC, QUDA_TWISTED_CLOVERPC_DIRAC, QUDA_TWISTED_MASS_DIRAC, QUDA_TWISTED_MASSPC_DIRAC, QUDA_VERBOSE, quda::colorspinor::FieldOrderCB< Float, nSpin, nColor, nVec, order, storeFloat, ghostFloat, disable_ghost, block_float, use_tex >::resetGhost(), quda::LatticeField::Scale(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::setComputeType(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::setDimension(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::setDirection(), sqrt(), quda::ColorSpinorField::X(), quda::LatticeField::X(), and quda::GaugeField::zero().

Referenced by CoarseOp().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ calculateYhat()

void quda::calculateYhat ( GaugeField Yhat,
GaugeField Xinv,
const GaugeField Y,
const GaugeField X 
)

Calculate preconditioned coarse links and coarse clover inverse field.

Parameters
Yhat[out]Preconditioned coarse link field
Xinv[out]Coarse clover inverse field
Y[in]Coarse link field
X[in]Coarse clover field

Definition at line 245 of file coarse_op_preconditioned.cu.

References checkPrecision, errorQuda, getVerbosity(), quda::LatticeField::Precision(), printfQuda, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, QUDA_SUMMARIZE, and X.

Referenced by quda::DiracCoarse::createPreconditionedCoarseOp().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ CalculateYhatCPU()

template<typename Float , int n, bool compute_max_only, typename Arg >
void quda::CalculateYhatCPU ( Arg arg)

Definition at line 100 of file coarse_op_preconditioned.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ CalculateYhatGPU()

template<typename Float , int n, bool compute_max_only, typename Arg >
__global__ void quda::CalculateYhatGPU ( Arg  arg)

Definition at line 118 of file coarse_op_preconditioned.cuh.

References arg(), atomicMax(), and parity.

Here is the call graph for this function:

◆ canReuseResidentGauge()

bool quda::canReuseResidentGauge ( QudaInvertParam inv_param)

Check that the resident gauge field is compatible with the requested inv_param

Parameters
inv_paramContains all metadata regarding host and device storage

Definition at line 2232 of file interface_quda.cpp.

References QudaInvertParam_s::cuda_prec, QudaInvertParam_s::dslash_type, quda::LatticeField::Precision(), and QUDA_ASQTAD_DSLASH.

Here is the call graph for this function:

◆ caxpy()

template<typename Float >
__device__ __host__ void quda::caxpy ( const complex< Float > &  a,
const complex< Float > &  x,
complex< Float > &  y 
)
inline

◆ checkLength()

void quda::checkLength ( const ColorSpinorField a,
const ColorSpinorField b 
)
inline

Definition at line 26 of file blas_helper.cuh.

References errorQuda, quda::ColorSpinorField::Length(), and quda::ColorSpinorField::Stride().

Referenced by quda::blas::nativeBlas(), and quda::blas::nativeReduce().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ checkMomOrder()

void quda::checkMomOrder ( const GaugeField u)

Definition at line 22 of file copy_gauge.cu.

References errorQuda, quda::GaugeField::Order(), QUDA_FLOAT2_GAUGE_ORDER, QUDA_MILC_GAUGE_ORDER, QUDA_MILC_SITE_GAUGE_ORDER, QUDA_RECONSTRUCT_10, QUDA_RECONSTRUCT_NO, QUDA_TIFR_GAUGE_ORDER, QUDA_TIFR_PADDED_GAUGE_ORDER, and quda::GaugeField::Reconstruct().

Referenced by copyGauge().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ checkNan()

template<typename Float , int length, typename Arg >
void quda::checkNan ( Arg arg)

Check whether the field contains Nans

Definition at line 63 of file copy_gauge.cuh.

References errorQuda, length, nColor, quda::gauge::Ncolor(), and parity.

Here is the call graph for this function:

◆ checkSpinor()

void quda::checkSpinor ( const ColorSpinorField a,
const ColorSpinorField b 
)
inline

Definition at line 20 of file blas_helper.cuh.

References errorQuda, quda::ColorSpinorField::Length(), and quda::ColorSpinorField::Stride().

Referenced by quda::blas::multiReduce().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ Checksum()

uint64_t quda::Checksum ( const GaugeField u,
bool  mini = false 
)

Compute XOR-based checksum of this gauge field: each gauge field entry is converted to type uint64_t, and compute the cummulative XOR of these values.

Parameters
[in]miniWhether to compute a mini checksum or global checksum. A mini checksum only computes over a subset of the lattice sites and is to be used for online comparisons, e.g., checking a field has changed with a global update algorithm.
Returns
checksum value

Definition at line 34 of file checksum.cu.

References arg(), ChecksumCPU(), comm_allreduce_xor(), errorQuda, quda::GaugeField::Ncolor(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_BQCD_GAUGE_ORDER, QUDA_DOUBLE_PRECISION, QUDA_MILC_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_QDPJIT_GAUGE_ORDER, QUDA_SINGLE_PRECISION, QUDA_TIFR_GAUGE_ORDER, and QUDA_TIFR_PADDED_GAUGE_ORDER.

Referenced by quda::GaugeField::checksum(), and quda::cpuGaugeField::Gauge_p().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ChecksumCPU()

template<typename Arg >
uint64_t quda::ChecksumCPU ( const Arg arg)

Definition at line 23 of file checksum.cu.

References parity, siteChecksum(), and quda::Arg< real, Ns, Nc, order >::volumeCB.

Referenced by Checksum().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ checkUnitary()

template<typename Matrix , typename Float >
__host__ __device__ bool quda::checkUnitary ( const Matrix inv,
const Matrix in,
const Float  tol 
)
inline

Check the unitarity of the input matrix to a given tolerance.

Parameters
invThe inverse of the input matrix
inThe input matrix to which we're reporting its unitarity
tolTolerance to which this check is applied

Definition at line 24 of file su3_project.cuh.

References conj(), in, quda::Matrix< T, N >::size(), and tol.

Referenced by polarSu3().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ checkUnitaryPrint()

template<typename Matrix >
__host__ __device__ void quda::checkUnitaryPrint ( const Matrix inv,
const Matrix in 
)

Print out deviation for each component (used for debugging only).

Parameters
invThe inverse of the input matrix
inThe input matrix to which we're reporting its unitarity

Definition at line 66 of file su3_project.cuh.

References in, and quda::Matrix< T, N >::size().

Here is the call graph for this function:

◆ cloverApply()

template<typename Float , int nSpin, int nColor, typename Arg >
__device__ __host__ void quda::cloverApply ( Arg arg,
int  x_cb,
int  parity 
)
inline

Definition at line 519 of file dslash_quda.cu.

References nColor, quda::Arg< real, Ns, Nc, order >::nParity, out, and parity.

◆ cloverCPU()

template<typename Float , int nSpin, int nColor, typename Arg >
void quda::cloverCPU ( Arg arg)

◆ cloverDerivative()

void quda::cloverDerivative ( cudaGaugeField force,
cudaGaugeField gauge,
cudaGaugeField oprod,
double  coeff,
QudaParity  parity 
)

Compute the derivative of the clover matrix in the direction mu,nu and compute the resulting force given the outer-product field.

Parameters
forceThe computed force field (read/write update)
gaugeThe input gauge field
oprodThe input outer-product field (tensor matrix field)
coeffMultiplicative coefficient (e.g., clover coefficient)
parityThe field parity we are working on

Definition at line 174 of file clover_deriv_quda.cu.

References errorQuda, quda::GaugeField::Geometry(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_EVEN_PARITY, QUDA_SINGLE_PRECISION, QUDA_TENSOR_GEOMETRY, QUDA_VECTOR_GEOMETRY, and quda::LatticeField::X().

Referenced by computeCloverForceQuda(), and quda::FullClover::FullClover().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ cloverDerivativeKernel()

template<typename real , typename Arg >
__global__ void quda::cloverDerivativeKernel ( Arg  arg)

Definition at line 320 of file clover_deriv.cuh.

References arg(), axpy(), DECLARE_LINK, quda::CloverDerivArg< Float, Force, Gauge, Oprod >::force, index(), mu, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ cloverGPU()

template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void quda::cloverGPU ( Arg  arg)

Definition at line 560 of file dslash_quda.cu.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ cloverInvert() [1/2]

template<typename Float , typename Arg , bool computeTrLog, bool twist>
void quda::cloverInvert ( Arg arg)

Definition at line 65 of file clover_invert.cuh.

References arg(), parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ cloverInvert() [2/2]

void quda::cloverInvert ( CloverField clover,
bool  computeTraceLog 
)

This function compute the Cholesky decomposition of each clover matrix and stores the clover inverse field.

Parameters
cloverThe clover field (contains both the field itself and its inverse)
computeTraceLogWhether to compute the trace logarithm of the clover term

Definition at line 106 of file clover_invert.cu.

References errorQuda, quda::CloverField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::FullClover::FullClover(), and loadCloverQuda().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ cloverInvertCompute()

template<typename Float , typename Arg , bool computeTrLog, bool twist>
__device__ __host__ double quda::cloverInvertCompute ( Arg arg,
int  x_cb,
int  parity 
)
inline

Use a Cholesky decomposition and invert the clover matrix

Definition at line 33 of file clover_invert.cuh.

References quda::linalg::Cholesky< Mat, T, N, fast >::D(), quda::linalg::Cholesky< Mat, T, N, fast >::invert(), log(), Mat(), nColor, and quda::CloverInvertArg< Float >::twist.

Here is the call graph for this function:

◆ cloverInvertKernel()

template<int blockSize, typename Float , typename Arg , bool computeTrLog, bool twist>
__global__ void quda::cloverInvertKernel ( Arg  arg)

Definition at line 82 of file clover_invert.cuh.

References arg(), parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ cloverRho()

void quda::cloverRho ( CloverField clover,
double  rho 
)

This function adds a real scalar onto the clover diagonal (only to the direct field not the inverse)

Parameters
cloverThe clover field
rhoReal scalar to be added on

Referenced by quda::FullClover::FullClover().

Here is the caller graph for this function:

◆ cmac()

template<typename real >
__host__ __device__ complex<real> quda::cmac ( const complex< real > &  x,
const complex< real > &  y,
const complex< real > &  z 
)
inline

◆ cmul()

template<typename real >
__host__ __device__ complex<real> quda::cmul ( const complex< real > &  x,
const complex< real > &  y 
)
inline

◆ CoarseCoarseOp()

void quda::CoarseCoarseOp ( GaugeField Y,
GaugeField X,
const Transfer T,
const GaugeField gauge,
const GaugeField clover,
const GaugeField cloverInv,
double  kappa,
double  mu,
double  mu_factor,
QudaDiracType  dirac,
QudaMatPCType  matpc,
bool  need_bidirectional 
)

Coarse operator construction from an intermediate-grid operator (Coarse)

Parameters
Y[out]Coarse link field
X[out]Coarse clover field
T[in]Transfer operator that defines the new coarse space
gauge[in]Link field from fine grid
clover[in]Clover field on fine grid
cloverInv[in]Clover inverse field on fine grid
kappa[in]Kappa parameter
mu[in]Mu parameter (set to non-zero for twisted-mass/twisted-clover)
mu_factor[in]Multiplicative factor for the mu parameter
matpc[in]The type of even-odd preconditioned fine-grid operator we are constructing the coarse grid operator from. If matpc==QUDA_MATPC_INVALID then we assume the operator is not even-odd preconditioned and we coarsen the full operator.
need_bidirectional[in]Whether or not we need to force a bi-directional build, even if the given level isn't preconditioned—if any previous level is preconditioned, we've violated that symmetry.

Definition at line 192 of file coarsecoarse_op.cu.

References checkLocation, quda::ColorSpinorParam::create, quda::GaugeField::Create(), quda::ColorSpinorField::Create(), errorQuda, quda::GaugeFieldParam::location, quda::LatticeField::MemType(), param, quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_SINGLE_PRECISION, QUDA_ZERO_FIELD_CREATE, quda::GaugeFieldParam::setPrecision(), quda::Transfer::Vectors(), and X.

Referenced by quda::DiracCoarse::createCoarseOp(), and quda::DiracCoarsePC::createCoarseOp().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ coarseDslash() [1/2]

template<typename Float , int nDim, int Ns, int Nc, int Mc, int color_stride, int dim_thread_split, bool dslash, bool clover, bool dagger, DslashType type, int dir, int dim, typename Arg >
__device__ __host__ void quda::coarseDslash ( Arg arg,
int  x_cb,
int  src_idx,
int  parity,
int  s,
int  color_block,
int  color_offset 
)
inline

◆ coarseDslash() [2/2]

template<typename Float , int nDim, int Ns, int Nc, int Mc, bool dslash, bool clover, bool dagger, DslashType type, typename Arg >
void quda::coarseDslash ( Arg  arg)

◆ coarseDslashKernel()

template<typename Float , int nDim, int Ns, int Nc, int Mc, int color_stride, int dim_thread_split, bool dslash, bool clover, bool dagger, DslashType type, typename Arg >
__global__ void quda::coarseDslashKernel ( Arg  arg)

◆ coarseIndex()

template<typename Arg >
__device__ __host__ int quda::coarseIndex ( const Arg arg)
inline

Definition at line 619 of file coarse_op_kernel.cuh.

Referenced by getIndicesShared().

Here is the caller graph for this function:

◆ CoarseOp()

void quda::CoarseOp ( GaugeField Y,
GaugeField X,
const Transfer T,
const cudaGaugeField gauge,
const cudaCloverField clover,
double  kappa,
double  mu,
double  mu_factor,
QudaDiracType  dirac,
QudaMatPCType  matpc 
)

Coarse operator construction from a fine-grid operator (Wilson / Clover)

Parameters
Y[out]Coarse link field
X[out]Coarse clover field
T[in]Transfer operator that defines the coarse space
gauge[in]Gauge field from fine grid
clover[in]Clover field on fine grid (optional)
kappa[in]Kappa parameter
mu[in]Mu parameter (set to non-zero for twisted-mass/twisted-clover)
mu_factor[in]Multiplicative factor for the mu parameter
matpc[in]The type of even-odd preconditioned fine-grid operator we are constructing the coarse grid operator from. If matpc==QUDA_MATPC_INVALID then we assume the operator is not even-odd preconditioned and we coarsen the full operator.

Definition at line 201 of file coarse_op.cu.

References quda::GaugeField::Anisotropy(), calculateY(), checkLocation, quda::CloverFieldParam::clover, quda::CloverFieldParam::cloverInv, quda::GaugeField::copy(), quda::CloverFieldParam::create, quda::ColorSpinorParam::create, quda::GaugeField::Create(), quda::ColorSpinorField::Create(), quda::CloverFieldParam::direct, errorQuda, quda::GaugeField::GaugeFixed(), quda::GaugeField::Geometry(), quda::CloverFieldParam::inverse, quda::CloverFieldParam::invNorm, quda::GaugeField::LinkType(), quda::GaugeFieldParam::location, quda::LatticeField::MemType(), quda::LatticeFieldParam::nDim, quda::CloverFieldParam::norm, quda::GaugeFieldParam::order, quda::CloverFieldParam::order, quda::LatticeFieldParam::pad, param, quda::LatticeFieldParam::Precision(), quda::LatticeField::Precision(), QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, QUDA_FLOAT2_GAUGE_ORDER, QUDA_FULL_SITE_SUBSET, QUDA_GHOST_EXCHANGE_PAD, QUDA_INVALID_CLOVER_ORDER, QUDA_MATPC_INVALID, QUDA_NULL_FIELD_CREATE, QUDA_PACKED_CLOVER_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, QUDA_TWISTED_MASSPC_DIRAC, QUDA_ZERO_FIELD_CREATE, quda::GaugeFieldParam::reconstruct, quda::GaugeField::Reconstruct(), quda::cudaCloverField::saveCPUField(), quda::cudaGaugeField::saveCPUField(), quda::CloverFieldParam::setPrecision(), quda::GaugeFieldParam::setPrecision(), quda::LatticeFieldParam::siteSubset, quda::GaugeField::TBoundary(), quda::Transfer::Vectors(), quda::LatticeFieldParam::x, X, and quda::LatticeField::X().

Referenced by quda::DiracWilson::createCoarseOp(), quda::DiracClover::createCoarseOp(), quda::DiracCloverPC::createCoarseOp(), quda::DiracTwistedMass::createCoarseOp(), quda::DiracTwistedMassPC::createCoarseOp(), quda::DiracTwistedClover::createCoarseOp(), and quda::DiracTwistedCloverPC::createCoarseOp().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ colorInnerProduct()

template<int nColor, typename sumType , typename real >
__device__ __host__ void quda::colorInnerProduct ( complex< sumType > &  dot,
int  i,
complex< real >  v[nColor],
complex< real >  w[nColor] 
)
inline

Definition at line 63 of file block_orthogonalize.cuh.

References nColor.

◆ colorNorm()

template<int nColor, typename sumType , typename real >
__device__ __host__ void quda::colorNorm ( sumType &  nrm,
complex< real >  v[nColor] 
)
inline

Definition at line 76 of file block_orthogonalize.cuh.

References nColor.

◆ colorScale()

template<typename real , int nColor>
__device__ __host__ void quda::colorScale ( complex< real >  v[nColor],
real  a 
)
inline

Definition at line 97 of file block_orthogonalize.cuh.

References nColor.

◆ colorScaleSubtract()

template<typename real , int nColor>
__device__ __host__ void quda::colorScaleSubtract ( complex< real >  v[nColor],
complex< real >  a,
complex< real >  w[nColor] 
)
inline

Definition at line 86 of file block_orthogonalize.cuh.

References nColor.

◆ colorSpinorParam() [1/2]

ColorSpinorParam quda::colorSpinorParam ( const GaugeField a)

◆ colorSpinorParam() [2/2]

ColorSpinorParam quda::colorSpinorParam ( const CloverField a,
bool  inverse 
)

◆ compareSpinor()

template<class U , class V >
int quda::compareSpinor ( const U &  u,
const V v,
const int  tol 
)

Definition at line 184 of file color_spinor_util.cu.

References comm_allreduce_int(), comm_size(), parity, pow(), printfQuda, and tol.

Referenced by genericCompare().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ compile_type_str()

const char* quda::compile_type_str ( const LatticeField meta,
QudaFieldLocation  location_ = QUDA_INVALID_FIELD_LOCATION 
)
inline

Helper function for setting auxilary string.

Parameters
[in]metaLatticeField used for querying field location
Returns
String containing location and compilation type

Definition at line 718 of file lattice_field.h.

References quda::LatticeField::Location(), QUDA_CUDA_FIELD_LOCATION, and QUDA_INVALID_FIELD_LOCATION.

Referenced by quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::CalculateY(), quda::CopyGauge< FloatOut, FloatIn, length, Arg >::CopyGauge(), quda::GaugePlaq< Float, Gauge >::GaugePlaq(), and quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::GenericPackGhostLauncher().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ completeKSForce() [1/2]

void quda::completeKSForce ( GaugeField mom,
const GaugeField oprod,
const GaugeField gauge,
QudaFieldLocation  location,
long long *  flops = NULL 
)

◆ completeKSForce() [2/2]

template<typename Float , typename Oprod , typename Gauge , typename Mom >
void quda::completeKSForce ( Oprod  oprod,
Gauge  gauge,
Mom  mom,
int  dim[4],
const GaugeField meta,
QudaFieldLocation  location,
long long *  flops 
)

◆ completeKSForceCore()

template<typename Float , typename Oprod , typename Gauge , typename Mom >
__host__ __device__ void quda::completeKSForceCore ( KSForceArg< Oprod, Gauge, Mom > &  arg,
int  idx 
)

◆ completeKSForceCPU()

template<typename Float , typename Oprod , typename Gauge , typename Mom >
void quda::completeKSForceCPU ( KSForceArg< Oprod, Gauge, Mom > &  arg)

Definition at line 93 of file ks_force_quda.cu.

References arg(), and quda::KSForceArg< Oprod, Gauge, Mom >::threads.

Here is the call graph for this function:

◆ completeKSForceKernel()

template<typename Float , typename Oprod , typename Gauge , typename Mom >
__global__ void quda::completeKSForceKernel ( KSForceArg< Oprod, Gauge, Mom >  arg)

Definition at line 84 of file ks_force_quda.cu.

References arg(), and quda::KSForceArg< Oprod, Gauge, Mom >::threads.

Here is the call graph for this function:

◆ compute_alpha_N()

template<int N>
void quda::compute_alpha_N ( Complex Q_AQandg,
Complex alpha 
)

Definition at line 280 of file inv_ca_cg.cpp.

◆ compute_beta_N()

template<int N>
void quda::compute_beta_N ( Complex Q_AQandg,
Complex Q_AS,
Complex beta 
)

Definition at line 356 of file inv_ca_cg.cpp.

◆ compute_site_max()

template<typename Float , int Ns, int Ms, int Nc, int Mc, typename Arg >
__device__ __host__ __forceinline__ Float quda::compute_site_max ( Arg arg,
int  x_cb,
int  parity,
int  spinor_parity,
int  spin_block,
int  color_block,
bool  active 
)

Compute the max element over the spin-color components of a given site.

Definition at line 48 of file color_spinor_pack.cuh.

References errorQuda, MAX_BLOCK_FLOAT_NC, and s.

◆ computeAPEStep()

template<typename Float , typename Arg >
__global__ void quda::computeAPEStep ( Arg  arg)

Definition at line 96 of file gauge_ape.cuh.

References arg(), conj(), getCoords(), linkIndexShift(), parity, setIdentity(), and quda::GaugeAPEArg< Float, GaugeOr, GaugeDs >::X.

Here is the call graph for this function:

◆ computeAV()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__device__ __host__ void quda::computeAV ( Arg arg,
int  parity,
int  x_cb,
int  ch,
int  ic_c 
)
inline

◆ ComputeAVCPU()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
void quda::ComputeAVCPU ( Arg arg)

Definition at line 272 of file coarse_op_kernel.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ ComputeAVGPU()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__global__ void quda::ComputeAVGPU ( Arg  arg)

Definition at line 288 of file coarse_op_kernel.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ computeBeta()

void quda::computeBeta ( Complex **  beta,
std::vector< ColorSpinorField *>  Ap,
int  i,
int  N,
int  k 
)

Definition at line 62 of file inv_gcr_quda.cpp.

References quda::blas::cDotProduct(), and printfQuda.

Referenced by orthoDir().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ computeClover()

void quda::computeClover ( CloverField clover,
const GaugeField gauge,
double  coeff,
QudaFieldLocation  location 
)

Definition at line 204 of file clover_quda.cu.

References quda::CloverArg< Float, nSpin, nColor, dynamic_clover_ >::clover, errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::cudaCloverField::compute(), createCloverQuda(), and quda::FullClover::FullClover().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ computeCloverForce()

void quda::computeCloverForce ( GaugeField force,
const GaugeField U,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  p,
std::vector< double > &  coeff 
)

Compute the force contribution from the solver solution fields.

Force(x, mu) = U(x, mu) * sum_i=1^nvec ( P_mu^+ x(x+mu) p(x)^ + P_mu^- p(x+mu) x(x)^ )

M = A_even - kappa^2 * Dslash * A_odd^{-1} * Dslash x(even) = M^{-1} b(even) x(odd) = A_odd^{-1} * Dslash * x(even) p(even) = M * x(even) p(odd) = A_odd^{-1} * Dslash^dag * M * x(even).

Parameters
force[out,in]The resulting force field
UThe input gauge field
xSolution field (both parities)
pIntermediate vectors (both parities)
coeffMultiplicative coefficient (e.g., dt * residue)

Definition at line 465 of file clover_outer_product.cu.

References checkCudaError, errorQuda, quda::ColorSpinorField::GhostFace(), quda::GaugeField::Order(), parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_GAUGE_ORDER, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().

Referenced by computeCloverForceQuda(), and quda::FullClover::FullClover().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ computeCloverSigmaOprod()

void quda::computeCloverSigmaOprod ( GaugeField oprod,
std::vector< ColorSpinorField *> &  x,
std::vector< ColorSpinorField *> &  p,
std::vector< std::vector< double > > &  coeff 
)

Compute the outer product from the solver solution fields arising from the diagonal term of the fermion bilinear in direction mu,nu and sum to outer product field.

Parameters
oprod[out,in]Computed outer product field (tensor matrix field)
x[in]Solution field (both parities)
p[in]Intermediate vectors (both parities) coeff[in] Multiplicative coefficient (e.g., dt * residiue), one for each parity

Definition at line 98 of file clover_sigma_outer_product.cu.

References checkCudaError, errorQuda, MAX_NVECTOR, quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_GAUGE_ORDER, and Spinor< RegType, StoreType, N, write >::set().

Referenced by computeCloverForceQuda(), and quda::FullClover::FullClover().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ computeCloverSigmaTrace()

void quda::computeCloverSigmaTrace ( GaugeField output,
const CloverField clover,
double  coeff 
)

Compute the matrix tensor field necessary for the force calculation from the clover trace action. This computes a tensor field [mu,nu].

Parameters
outputThe computed matrix field (tensor matrix field)
cloverThe input clover field
coeffScalar coefficient multiplying the result (e.g., stepsize)

Definition at line 242 of file clover_trace_quda.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by computeCloverForceQuda(), and quda::FullClover::FullClover().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ computeCoarseClover()

template<bool from_coarse, typename Float , int fineSpin, int coarseSpin, int fineColor, int coarseColor, typename Arg >
__device__ __host__ void quda::computeCoarseClover ( Arg arg,
int  parity,
int  x_cb,
int  ic_c,
int  jc_c 
)

◆ ComputeCoarseCloverCPU()

template<bool from_coarse, typename Float , int fineSpin, int coarseSpin, int fineColor, int coarseColor, typename Arg >
void quda::ComputeCoarseCloverCPU ( Arg arg)

Definition at line 988 of file coarse_op_kernel.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ ComputeCoarseCloverGPU()

template<bool from_coarse, typename Float , int fineSpin, int coarseSpin, int fineColor, int coarseColor, typename Arg >
__global__ void quda::ComputeCoarseCloverGPU ( Arg  arg)

Definition at line 1002 of file coarse_op_kernel.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ computeCoeffs()

template<typename T >
static void quda::computeCoeffs ( d_out[],
const T  d_p1[],
const T  d_p2[],
int  k,
int  j,
int  s,
const T  gamma[],
const T  rho[],
const T  gamma_kprev[],
const T  rho_kprev[] 
)
static

Definition at line 79 of file inv_mpcg_quda.cpp.

References applyThirdTerm().

Referenced by quda::MPCG::operator()().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ computeColorContraction()

template<typename real , typename Arg >
__global__ void quda::computeColorContraction ( Arg  arg)

◆ computeDegrandRossiContraction()

template<typename real , typename Arg >
__global__ void quda::computeDegrandRossiContraction ( Arg  arg)

◆ ComputeEta()

template<libtype which_lib>
void quda::ComputeEta ( GMResDRArgs args)

Definition at line 157 of file inv_gmresdr_quda.cpp.

References errorQuda.

◆ ComputeEta< libtype::eigen_lib >()

template<>
void quda::ComputeEta< libtype::eigen_lib > ( GMResDRArgs args)

Definition at line 179 of file inv_gmresdr_quda.cpp.

◆ ComputeEta< libtype::magma_lib >()

template<>
void quda::ComputeEta< libtype::magma_lib > ( GMResDRArgs args)

Definition at line 159 of file inv_gmresdr_quda.cpp.

References errorQuda, magma_Xgels(), and memset().

Here is the call graph for this function:

◆ computeFmunu()

void quda::computeFmunu ( GaugeField Fmunu,
const GaugeField gauge 
)

Compute the Fmunu tensor.

Parameters
[out]FmunuThe Fmunu tensor
[in]gaugeThe gauge field upon which to compute the Fmnu tensor

Definition at line 99 of file gauge_field_strength_tensor.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by createCloverQuda(), qChargeDensityQuda(), and qChargeQuda().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ computeFmunuCore()

template<int mu, int nu, typename Float , typename Arg >
__device__ __host__ __forceinline__ void quda::computeFmunuCore ( Arg arg,
int  idx,
int  parity 
)

Definition at line 28 of file field_strength_tensor.cuh.

References conj(), getCoords(), linkIndexShift(), mu, and quda::FmunuArg< Float, Fmunu, Gauge >::X.

Here is the call graph for this function:

◆ computeFmunuCPU()

template<typename Float , typename Arg >
void quda::computeFmunuCPU ( Arg arg)

Definition at line 184 of file field_strength_tensor.cuh.

References arg(), mu, and parity.

Here is the call graph for this function:

◆ computeFmunuKernel()

template<typename Float , typename Arg >
__global__ void quda::computeFmunuKernel ( Arg  arg)

Definition at line 166 of file field_strength_tensor.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ computeForce()

template<typename real , typename Arg , typename Link >
__device__ void quda::computeForce ( LINK  force,
Arg arg,
int  xIndex,
int  yIndex,
int  mu,
int  nu 
)

Definition at line 128 of file clover_deriv.cuh.

References conj(), DECLARE_ARRAY, getCoordsExtended(), LINK, linkIndexShift(), and mu.

Here is the call graph for this function:

◆ computeGenGauss()

template<typename Float , typename Arg >
__global__ void quda::computeGenGauss ( Arg  arg)

Definition at line 71 of file gauge_random.cu.

References getCoords(), linkIndex(), mu, parity, and setIdentity().

Here is the call graph for this function:

◆ ComputeHarmonicRitz()

template<libtype which_lib>
void quda::ComputeHarmonicRitz ( GMResDRArgs args)

Definition at line 88 of file inv_gmresdr_quda.cpp.

References errorQuda.

◆ ComputeHarmonicRitz< libtype::eigen_lib >()

Definition at line 127 of file inv_gmresdr_quda.cpp.

References abs(), norm(), and quda::SortedEvals::SelectSmall().

Here is the call graph for this function:

◆ ComputeHarmonicRitz< libtype::magma_lib >()

Definition at line 90 of file inv_gmresdr_quda.cpp.

References abs(), errorQuda, magma_Xgeev(), magma_Xgesv(), norm(), and quda::SortedEvals::SelectSmall().

Here is the call graph for this function:

◆ computeKSLongLinkForce() [1/2]

template<typename Float , typename Result , typename Oprod , typename Gauge >
void quda::computeKSLongLinkForce ( Result  res,
Oprod  oprod,
Gauge  gauge,
int  dim[4],
const GaugeField meta,
QudaFieldLocation  location 
)

Definition at line 353 of file ks_force_quda.cu.

References quda::KSLongLinkForce< Float, Result, Oprod, Gauge >::apply(), arg(), and qudaDeviceSynchronize.

Referenced by computeKSLongLinkForce().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ computeKSLongLinkForce() [2/2]

template<typename Float >
void quda::computeKSLongLinkForce ( GaugeField result,
const GaugeField oprod,
const GaugeField gauge,
QudaFieldLocation  location 
)

◆ computeKSLongLinkForceCore()

template<typename Float , typename Result , typename Oprod , typename Gauge >
__host__ __device__ void quda::computeKSLongLinkForceCore ( KSLongLinkArg< Result, Oprod, Gauge > &  arg,
int  idx 
)

Definition at line 222 of file ks_force_quda.cu.

◆ computeKSLongLinkForceCPU()

template<typename Float , typename Result , typename Oprod , typename Gauge >
void quda::computeKSLongLinkForceCPU ( KSLongLinkArg< Result, Oprod, Gauge > &  arg)

Definition at line 298 of file ks_force_quda.cu.

References arg(), and quda::KSLongLinkArg< Result, Oprod, Gauge >::threads.

Here is the call graph for this function:

◆ computeKSLongLinkForceKernel()

template<typename Float , typename Result , typename Oprod , typename Gauge >
__global__ void quda::computeKSLongLinkForceKernel ( KSLongLinkArg< Result, Oprod, Gauge >  arg)

Definition at line 286 of file ks_force_quda.cu.

References arg(), and quda::KSLongLinkArg< Result, Oprod, Gauge >::threads.

Here is the call graph for this function:

◆ computeLinkInverse()

template<class Cmplx >
__device__ __host__ void quda::computeLinkInverse ( Matrix< Cmplx, 3 > *  uinv,
const Matrix< Cmplx, 3 > &  u 
)
inline

Definition at line 1023 of file quda_matrix.h.

References getDeterminant().

Here is the call graph for this function:

◆ computeMomAction()

double quda::computeMomAction ( const GaugeField mom)

◆ computeNeighborSum()

template<typename Float , int Nc, typename Vector , typename Arg >
__device__ __host__ void quda::computeNeighborSum ( Vector out,
Arg arg,
int  x_cb,
int  parity 
)
inline

Computes out = sum_mu U_mu(x)in(x+d) + U^(x-d)in(x-d)

Parameters
[out]outThe out result field
[in]UThe gauge field
[in]inThe input field
[in]x_cbThe checkerboarded site index
[in]parityThe site parity

Definition at line 51 of file color_spinor_wuppertal.cu.

References conj(), getCoords(), quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::in, linkIndexM1(), linkIndexP1(), quda::Arg< real, Ns, Nc, order >::nParity, and quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::U.

Here is the call graph for this function:

◆ computeOvrImpSTOUTStep()

template<typename Float , typename Arg >
__global__ void quda::computeOvrImpSTOUTStep ( Arg  arg)

◆ computePlaq()

template<int blockSize, typename Float , typename Gauge >
__global__ void quda::computePlaq ( GaugePlaqArg< Gauge >  arg)

Definition at line 49 of file gauge_plaq.cuh.

References arg(), quda::GaugePlaqArg< Gauge >::border, getCoords(), mu, parity, quda::GaugePlaqArg< Gauge >::threads, and quda::GaugePlaqArg< Gauge >::X.

Referenced by quda::GaugePlaq< Float, Gauge >::apply().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ computeQCharge()

double quda::computeQCharge ( const GaugeField Fmunu)

Compute the topological charge.

Parameters
[in]FmunuThe Fmunu tensor, usually calculated from a smeared configuration
Returns
double The total topological charge

Definition at line 97 of file gauge_qcharge.cu.

References errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().

Referenced by qChargeQuda().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ computeQChargeDensity()

double quda::computeQChargeDensity ( const GaugeField Fmunu,
void *  result 
)

Compute the topological charge density per lattice site.

Parameters
[in]FmunuThe Fmunu tensor, usually calculated from a smeared configuration
[out]qDensityThe topological charge at each lattice site
Returns
double The total topological charge

Definition at line 116 of file gauge_qcharge.cu.

References errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().

Referenced by qChargeDensityQuda().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ComputeRitz()

template<libtype which_lib>
void quda::ComputeRitz ( EigCGArgs args)

Definition at line 133 of file inv_eigcg_quda.cpp.

References errorQuda.

◆ ComputeRitz< libtype::eigen_lib >()

template<>
void quda::ComputeRitz< libtype::eigen_lib > ( EigCGArgs args)

Definition at line 136 of file inv_eigcg_quda.cpp.

◆ ComputeRitz< libtype::magma_lib >()

template<>
void quda::ComputeRitz< libtype::magma_lib > ( EigCGArgs args)

Definition at line 164 of file inv_eigcg_quda.cpp.

References errorQuda, and magma_Xheev().

Here is the call graph for this function:

◆ computeStaggeredOprod() [1/2]

void quda::computeStaggeredOprod ( GaugeField out[],
ColorSpinorField in,
const double  coeff[],
int  nFace 
)

Compute the outer-product field between the staggered quark field's one and (for HISQ and ASQTAD) three hop sites. E.g.,.

out[0][d](x) = (in(x+1_d) x conj(in(x))) out[1][d](x) = (in(x+3_d) x conj(in(x)))

where 1_d and 3_d represent a relative shift of magnitude 1 and 3 in dimension d, respectively

Note out[1] is only computed if nFace=3

Parameters
[out]outArray of nFace outer-product matrix fields
[in]inInput quark field
[in]coeffCoefficient
[in]nFaceNumber of faces (1 or 3)

Definition at line 447 of file staggered_oprod.cu.

References errorQuda, quda::ColorSpinorField::Even(), and quda::ColorSpinorField::Odd().

Referenced by computeHISQForceQuda(), and computeStaggeredForceQuda().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ computeStaggeredOprod() [2/2]

void quda::computeStaggeredOprod ( GaugeField outA,
GaugeField outB,
ColorSpinorField inEven,
ColorSpinorField inOdd,
int  parity,
const double  coeff[2],
int  nFace 
)

◆ computeStaple()

template<typename Float , typename Arg , typename Link >
__host__ __device__ void quda::computeStaple ( Arg arg,
int  idx,
int  parity,
int  dir,
Link &  staple 
)

Definition at line 36 of file gauge_ape.cuh.

References conj(), getCoords(), linkIndexShift(), mu, setZero(), and quda::GaugeAPEArg< Float, GaugeOr, GaugeDs >::X.

Referenced by fatLongKSLink(), and quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ computeStapleRectangle()

template<typename Float , typename Arg , typename Link >
__host__ __device__ void quda::computeStapleRectangle ( Arg arg,
int  idx,
int  parity,
int  dir,
Link &  staple,
Link &  rectangle 
)

Definition at line 232 of file gauge_stout.cuh.

References conj(), getCoords(), linkIndexShift(), mu, setZero(), and quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::X.

Here is the call graph for this function:

◆ computeSTOUTStep()

template<typename Float , typename Arg >
__global__ void quda::computeSTOUTStep ( Arg  arg)

◆ computeTMAV()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__device__ __host__ void quda::computeTMAV ( Arg arg,
int  parity,
int  x_cb,
int  v 
)
inline

Calculates the matrix A V^{s,c'}(x) = A^{c}(x) * V^{s,c}(x) for twisted-mass fermions Where: s = fine spin, c' = coarse color, c = fine color

Definition at line 312 of file coarse_op_kernel.cuh.

References s.

◆ ComputeTMAVCPU()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
void quda::ComputeTMAVCPU ( Arg arg)

Definition at line 332 of file coarse_op_kernel.cuh.

References parity.

◆ ComputeTMAVGPU()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__global__ void quda::ComputeTMAVGPU ( Arg  arg)

Definition at line 343 of file coarse_op_kernel.cuh.

References arg(), quda::linalg::Cholesky< Mat, T, N, fast >::invert(), Mat(), nColor, and parity.

Here is the call graph for this function:

◆ computeTMCAV()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__device__ __host__ void quda::computeTMCAV ( Arg arg,
int  parity,
int  x_cb,
int  ch,
int  ic_c 
)
inline

◆ ComputeTMCAVCPU()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
void quda::ComputeTMCAVCPU ( Arg arg)

Definition at line 491 of file coarse_op_kernel.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ ComputeTMCAVGPU()

template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg >
__global__ void quda::ComputeTMCAVGPU ( Arg  arg)

Definition at line 506 of file coarse_op_kernel.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ computeUV()

template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Wtype , typename Arg >
__device__ __host__ void quda::computeUV ( Arg arg,
const Wtype &  W,
int  parity,
int  x_cb,
int  ic_c 
)
inline

Calculates the matrix UV^{s,c'}_mu(x) = U^{c}_mu(x) * V^{s,c}_mu(x+mu) Where: mu = dir, s = fine spin, c' = coarse color, c = fine color

Definition at line 130 of file coarse_op_kernel.cuh.

References caxpy(), getCoords(), linkIndexP1(), QUDA_FORWARDS, s, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::UV.

Here is the call graph for this function:

◆ ComputeUVCPU()

template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >
void quda::ComputeUVCPU ( Arg arg)

Definition at line 197 of file coarse_op_kernel.cuh.

References arg(), parity, and QUDA_FORWARDS.

Here is the call graph for this function:

◆ ComputeUVGPU()

template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >
__global__ void quda::ComputeUVGPU ( Arg  arg)

Definition at line 212 of file coarse_op_kernel.cuh.

References arg(), parity, and QUDA_FORWARDS.

Here is the call graph for this function:

◆ computeVUV()

template<bool shared_atomic, bool parity_flip, bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg , typename Gamma >
__device__ __host__ void quda::computeVUV ( Arg arg,
const Gamma gamma,
int  parity,
int  x_cb,
int  c_row,
int  c_col,
int  parity_coarse_,
int  coarse_x_cb_ 
)

◆ ComputeVUVCPU()

template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >
void quda::ComputeVUVCPU ( Arg  arg)

Definition at line 779 of file coarse_op_kernel.cuh.

◆ ComputeVUVGPU()

template<bool shared_atomic, bool parity_flip, bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg >
__global__ void quda::ComputeVUVGPU ( Arg  arg)

Definition at line 857 of file coarse_op_kernel.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ computeWupperalStep()

template<typename Float , int Ns, int Nc, typename Arg >
__device__ __host__ void quda::computeWupperalStep ( Arg arg,
int  x_cb,
int  parity 
)
inline

◆ computeYhat()

template<typename Float , int n, bool compute_max_only, typename Arg >
__device__ __host__ Float quda::computeYhat ( Arg arg,
int  d,
int  x_cb,
int  parity,
int  i,
int  j 
)
inline

Definition at line 45 of file coarse_op_preconditioned.cuh.

References caxpy(), conj(), getCoords(), and linkIndexM1().

Here is the call graph for this function:

◆ computeYreverse()

template<typename Float , int nSpin, int nColor, typename Arg >
__device__ __host__ void quda::computeYreverse ( Arg arg,
int  parity,
int  x_cb,
int  ic_c,
int  jc_c 
)

Compute the forward links from backwards links by flipping the sign of the spin projector

Definition at line 877 of file coarse_op_kernel.cuh.

References quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::Y.

◆ ComputeYReverseCPU()

template<typename Float , int nSpin, int nColor, typename Arg >
void quda::ComputeYReverseCPU ( Arg arg)

Definition at line 898 of file coarse_op_kernel.cuh.

References arg(), nColor, and parity.

Here is the call graph for this function:

◆ ComputeYReverseGPU()

template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void quda::ComputeYReverseGPU ( Arg  arg)

Definition at line 912 of file coarse_op_kernel.cuh.

References arg(), nColor, and parity.

Here is the call graph for this function:

◆ conj() [1/3]

template<typename ValueType >
__host__ __device__ ValueType quda::conj ( ValueType  x)
inline

◆ conj() [2/3]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::conj ( const complex< ValueType > &  z)
inline

Returns the complex conjugate of z.

Definition at line 1054 of file complex_quda.h.

◆ conj() [3/3]

template<class T , int N>
__device__ __host__ Matrix<T,N> quda::conj ( const Matrix< T, N > &  other)
inline

Definition at line 596 of file quda_matrix.h.

References conj().

Here is the call graph for this function:

◆ constant()

template<class T >
void quda::constant ( T &  t,
int  k,
int  s,
int  c 
)

Set all space-time real elements at spin s and color c of the field equal to k

Definition at line 38 of file color_spinor_util.cu.

References parity.

Referenced by genericSource().

Here is the caller graph for this function:

◆ constantInv()

template<typename real , int nColor, bool dagger, Dslash5Type type, bool shared, typename Vector , typename Arg >
__device__ __host__ Vector quda::constantInv ( Arg arg,
int  parity,
int  x_cb,
int  s_ 
)
inline

Apply the M5 inverse operator at a given site on the lattice. This is the original algorithm as described in Kim and Izubushi (LATTICE 2013_033), where the b and c coefficients are constant along the Ls dimension, so is suitable for Shamir and Mobius domain-wall fermions.

Template Parameters
sharedWhether to use a shared memory scratch pad to store the input field acroos the Ls dimension to minimize global memory reads.
Parameters
[in]argArgument struct containing any meta data and accessors
[in]parityParity we are on
[in]x_bCheckerboarded 4-d space-time index
[in]s_Ls dimension coordinate

Definition at line 295 of file dslash_domain_wall_m5.cuh.

References __fast_pow(), dagger, exp(), in, quda::VectorCache< real, Vector >::load(), out, s, quda::VectorCache< real, Vector >::save(), and quda::VectorCache< real, Vector >::sync().

Here is the call graph for this function:

◆ contractQuda()

void quda::contractQuda ( const ColorSpinorField x,
const ColorSpinorField y,
void *  result,
QudaContractType  cType 
)

◆ convert() [1/2]

template<typename OutputType , typename InputType >
__device__ void quda::convert ( OutputType  x[],
InputType  y[],
const int  N 
)
inline

Convert a vector of type InputType to type OutputType.

The main current limitation is that there is an implicit assumption that N * sizeof(OutputType) / sizeof(InputType) is an integer. E.g., you cannot convert a vector 9 float2s into a vector of 5 float4s.

Parameters
xOutput vector.
yInput vector.
NLength of output vector.

Definition at line 149 of file convert.h.

References copyFloatN().

Here is the call graph for this function:

◆ convert() [2/2]

template<typename Float , int nSpin, int nColor, typename Arg >
__device__ __host__ void quda::convert ( Arg arg,
int  parity,
int  x_cb,
int  c_row,
int  c_col 
)

Convert the field from the atomic format to the required computation format, e.g. fixed point to floating point

Definition at line 1096 of file coarse_op_kernel.cuh.

References in.

◆ convert< double2, double4 >()

template<>
__device__ void quda::convert< double2, double4 > ( double2  x[],
double4  y[],
const int  N 
)
inline

Definition at line 176 of file convert.h.

◆ convert< double2, float4 >()

template<>
__device__ void quda::convert< double2, float4 > ( double2  x[],
float4  y[],
const int  N 
)
inline

Definition at line 268 of file convert.h.

◆ convert< double2, short4 >()

template<>
__device__ void quda::convert< double2, short4 > ( double2  x[],
short4  y[],
const int  N 
)
inline

Definition at line 238 of file convert.h.

◆ convert< double4, double2 >()

template<>
__device__ void quda::convert< double4, double2 > ( double4  x[],
double2  y[],
const int  N 
)
inline

Definition at line 170 of file convert.h.

◆ convert< double4, float2 >()

template<>
__device__ void quda::convert< double4, float2 > ( double4  x[],
float2  y[],
const int  N 
)
inline

Definition at line 277 of file convert.h.

◆ convert< double4, short2 >()

template<>
__device__ void quda::convert< double4, short2 > ( double4  x[],
short2  y[],
const int  N 
)
inline

Definition at line 247 of file convert.h.

◆ convert< float2, double4 >()

template<>
__device__ void quda::convert< float2, double4 > ( float2  x[],
double4  y[],
const int  N 
)
inline

Definition at line 283 of file convert.h.

◆ convert< float2, float4 >()

template<>
__device__ void quda::convert< float2, float4 > ( float2  x[],
float4  y[],
const int  N 
)
inline

Definition at line 191 of file convert.h.

◆ convert< float2, short2 >()

template<>
__device__ void quda::convert< float2, short2 > ( float2  x[],
short2  y[],
const int  N 
)
inline

Definition at line 156 of file convert.h.

◆ convert< float2, short4 >()

template<>
__device__ void quda::convert< float2, short4 > ( float2  x[],
short4  y[],
const int  N 
)
inline

Definition at line 207 of file convert.h.

◆ convert< float4, double2 >()

template<>
__device__ void quda::convert< float4, double2 > ( float4  x[],
double2  y[],
const int  N 
)
inline

Definition at line 262 of file convert.h.

◆ convert< float4, float2 >()

template<>
__device__ void quda::convert< float4, float2 > ( float4  x[],
float2  y[],
const int  N 
)
inline

Definition at line 185 of file convert.h.

◆ convert< float4, short2 >()

template<>
__device__ void quda::convert< float4, short2 > ( float4  x[],
short2  y[],
const int  N 
)
inline

Definition at line 216 of file convert.h.

◆ convert< float4, short4 >()

template<>
__device__ void quda::convert< float4, short4 > ( float4  x[],
short4  y[],
const int  N 
)
inline

Definition at line 162 of file convert.h.

◆ convert< short2, double4 >()

template<>
__device__ void quda::convert< short2, double4 > ( short2  x[],
double4  y[],
const int  N 
)
inline

Definition at line 253 of file convert.h.

References d2i().

Here is the call graph for this function:

◆ convert< short2, float4 >()

template<>
__device__ void quda::convert< short2, float4 > ( short2  x[],
float4  y[],
const int  N 
)
inline

Definition at line 222 of file convert.h.

References f2i().

Here is the call graph for this function:

◆ convert< short4, double2 >()

template<>
__device__ void quda::convert< short4, double2 > ( short4  x[],
double2  y[],
const int  N 
)
inline

Definition at line 231 of file convert.h.

References d2i().

Here is the call graph for this function:

◆ convert< short4, float2 >()

template<>
__device__ void quda::convert< short4, float2 > ( short4  x[],
float2  y[],
const int  N 
)
inline

Definition at line 200 of file convert.h.

References f2i().

Here is the call graph for this function:

◆ ConvertCPU()

template<typename Float , int nSpin, int nColor, typename Arg >
void quda::ConvertCPU ( Arg arg)

Definition at line 1133 of file coarse_op_kernel.cuh.

References arg(), nColor, and parity.

Here is the call graph for this function:

◆ ConvertGPU()

template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void quda::ConvertGPU ( Arg  arg)

Definition at line 1147 of file coarse_op_kernel.cuh.

References arg(), nColor, and parity.

Here is the call graph for this function:

◆ coordsFromFaceIndex() [1/2]

template<int nDim, QudaPCType type, int dim_, int nLayers, typename Int , typename Arg >
__device__ __host__ void quda::coordsFromFaceIndex ( int &  idx,
int &  cb_idx,
Int *const  x,
int  face_idx,
const int &  face_num,
int  parity,
const Arg arg 
)
inline

Compute the full-lattice coordinates from the input face index. This is used by the Wilson-like halo update kernels, and can deal with 4-d or 5-d field and 4-d or 5-d preconditioning.

Parameters
[out]idxThe full lattice coordinate
[out]cb_idxThe checkboarded lattice coordinate
[out]xCoordinates we are computing
[in]face_idxInput checkerboarded face index
[in]face_numFace number
[in]parityParity index
[in]argArgument struct with required meta data

Definition at line 488 of file index_helper.cuh.

References EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, QUDA_4D_PC, QUDA_5D_PC, and X.

◆ coordsFromFaceIndex() [2/2]

template<int nDim, QudaPCType type, int dim_, int nLayers, typename Int , typename Arg >
__device__ __host__ void quda::coordsFromFaceIndex ( int &  idx,
int &  cb_idx,
Int *const  x,
int  face_idx,
const int &  face_num,
const Arg arg 
)
inline

Overloaded variant of indexFromFaceIndex where we use the parity declared in arg.

Definition at line 585 of file index_helper.cuh.

References arg().

Here is the call graph for this function:

◆ copy() [1/15]

template<typename T1 , typename T2 >
__host__ __device__ void quda::copy ( T1 &  a,
const T2 &  b 
)
inline

◆ copy() [2/15]

template<>
__host__ __device__ void quda::copy ( double &  a,
const int2 &  b 
)
inline

Definition at line 154 of file register_traits.h.

References errorQuda.

◆ copy() [3/15]

template<>
__host__ __device__ void quda::copy ( double2 &  a,
const int4 &  b 
)
inline

Definition at line 162 of file register_traits.h.

References errorQuda.

◆ copy() [4/15]

template<>
__host__ __device__ void quda::copy ( float &  a,
const short &  b 
)
inline

Definition at line 170 of file register_traits.h.

References s2f().

Here is the call graph for this function:

◆ copy() [5/15]

template<>
__host__ __device__ void quda::copy ( short &  a,
const float &  b 
)
inline

Definition at line 171 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy() [6/15]

template<>
__host__ __device__ void quda::copy ( float2 &  a,
const short2 &  b 
)
inline

Definition at line 173 of file register_traits.h.

References s2f().

Here is the call graph for this function:

◆ copy() [7/15]

template<>
__host__ __device__ void quda::copy ( short2 &  a,
const float2 &  b 
)
inline

Definition at line 177 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy() [8/15]

template<>
__host__ __device__ void quda::copy ( float4 &  a,
const short4 &  b 
)
inline

Definition at line 181 of file register_traits.h.

References s2f().

Here is the call graph for this function:

◆ copy() [9/15]

template<>
__host__ __device__ void quda::copy ( short4 &  a,
const float4 &  b 
)
inline

Definition at line 185 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy() [10/15]

template<>
__host__ __device__ void quda::copy ( float &  a,
const char &  b 
)
inline

Definition at line 189 of file register_traits.h.

References c2f().

Here is the call graph for this function:

◆ copy() [11/15]

template<>
__host__ __device__ void quda::copy ( char &  a,
const float &  b 
)
inline

Definition at line 190 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy() [12/15]

template<>
__host__ __device__ void quda::copy ( float2 &  a,
const char2 &  b 
)
inline

Definition at line 192 of file register_traits.h.

References c2f().

Here is the call graph for this function:

◆ copy() [13/15]

template<>
__host__ __device__ void quda::copy ( char2 &  a,
const float2 &  b 
)
inline

Definition at line 196 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy() [14/15]

template<>
__host__ __device__ void quda::copy ( float4 &  a,
const char4 &  b 
)
inline

Definition at line 200 of file register_traits.h.

References c2f().

Here is the call graph for this function:

◆ copy() [15/15]

template<>
__host__ __device__ void quda::copy ( char4 &  a,
const float4 &  b 
)
inline

Definition at line 204 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy_and_scale() [1/7]

template<typename T1 , typename T2 , typename T3 >
__host__ __device__ void quda::copy_and_scale ( T1 &  a,
const T2 &  b,
const T3 &  c 
)
inline

Specialized variants of the copy function that include an additional scale factor. Note the scale factor is ignored unless the input type (b) is either a short or char vector.

Definition at line 249 of file register_traits.h.

References copy().

Referenced by quda::clover::FloatNOrder< Float, length, N, add_rho, huge_alloc >::load().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ copy_and_scale() [2/7]

template<>
__host__ __device__ void quda::copy_and_scale ( float4 &  a,
const short4 &  b,
const float &  c 
)
inline

Definition at line 254 of file register_traits.h.

References s2f().

Here is the call graph for this function:

◆ copy_and_scale() [3/7]

template<>
__host__ __device__ void quda::copy_and_scale ( float4 &  a,
const char4 &  b,
const float &  c 
)
inline

Definition at line 262 of file register_traits.h.

References c2f().

Here is the call graph for this function:

◆ copy_and_scale() [4/7]

template<>
__host__ __device__ void quda::copy_and_scale ( float2 &  a,
const short2 &  b,
const float &  c 
)
inline

Definition at line 270 of file register_traits.h.

References s2f().

Here is the call graph for this function:

◆ copy_and_scale() [5/7]

template<>
__host__ __device__ void quda::copy_and_scale ( float2 &  a,
const char2 &  b,
const float &  c 
)
inline

Definition at line 276 of file register_traits.h.

References c2f().

Here is the call graph for this function:

◆ copy_and_scale() [6/7]

template<>
__host__ __device__ void quda::copy_and_scale ( float &  a,
const short &  b,
const float &  c 
)
inline

Definition at line 282 of file register_traits.h.

References s2f().

Here is the call graph for this function:

◆ copy_and_scale() [7/7]

template<>
__host__ __device__ void quda::copy_and_scale ( float &  a,
const char &  b,
const float &  c 
)
inline

Definition at line 287 of file register_traits.h.

References c2f().

Here is the call graph for this function:

◆ copy_scaled() [1/7]

template<typename T1 , typename T2 >
__host__ __device__ void quda::copy_scaled ( T1 &  a,
const T2 &  b 
)
inline

Definition at line 209 of file register_traits.h.

References copy().

Here is the call graph for this function:

◆ copy_scaled() [2/7]

template<>
__host__ __device__ void quda::copy_scaled ( short4 &  a,
const float4 &  b 
)
inline

Definition at line 211 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy_scaled() [3/7]

template<>
__host__ __device__ void quda::copy_scaled ( char4 &  a,
const float4 &  b 
)
inline

Definition at line 219 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy_scaled() [4/7]

template<>
__host__ __device__ void quda::copy_scaled ( short2 &  a,
const float2 &  b 
)
inline

Definition at line 227 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy_scaled() [5/7]

template<>
__host__ __device__ void quda::copy_scaled ( char2 &  a,
const float2 &  b 
)
inline

Definition at line 233 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy_scaled() [6/7]

template<>
__host__ __device__ void quda::copy_scaled ( short &  a,
const float &  b 
)
inline

Definition at line 239 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copy_scaled() [7/7]

template<>
__host__ __device__ void quda::copy_scaled ( char &  a,
const float &  b 
)
inline

Definition at line 241 of file register_traits.h.

References f2i().

Here is the call graph for this function:

◆ copyArrayToLink() [1/2]

void quda::copyArrayToLink ( Matrix< float2, 3 > *  link,
float *  array 
)
inline

Definition at line 1061 of file quda_matrix.h.

Referenced by isUnitary(), and unitarizeLinksCPU().

Here is the caller graph for this function:

◆ copyArrayToLink() [2/2]

template<class Cmplx , class Real >
void quda::copyArrayToLink ( Matrix< Cmplx, 3 > *  link,
Real *  array 
)
inline

Definition at line 1074 of file quda_matrix.h.

◆ copyColorSpinor()

template<typename Arg , typename Basis >
void quda::copyColorSpinor ( Arg arg,
const Basis &  basis 
)

CPU function to reorder spinor fields.

Definition at line 136 of file copy_color_spinor.cuh.

References quda::ColorSpinor< Float, Nc, Ns >::data, in, quda::Arg< real, Ns, Nc, order >::nParity, out, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Referenced by quda::CopyColorSpinor< Ns, Arg >::apply(), and quda::CopyColorSpinor< 4, Arg >::apply().

Here is the caller graph for this function:

◆ copyColorSpinorKernel()

template<typename Arg , typename Basis >
__global__ void quda::copyColorSpinorKernel ( Arg  arg,
Basis  basis 
)

CUDA kernel to reorder spinor fields. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 149 of file copy_color_spinor.cuh.

References quda::ColorSpinor< Float, Nc, Ns >::data, in, out, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Referenced by quda::CopyColorSpinor< Ns, Arg >::apply(), and quda::CopyColorSpinor< 4, Arg >::apply().

Here is the caller graph for this function:

◆ copyColumn()

template<class T , int N>
__device__ __host__ void quda::copyColumn ( const Matrix< T, N > &  m,
int  c,
Array< T, N > *  a 
)
inline

Definition at line 793 of file quda_matrix.h.

Referenced by getRealBidiagMatrix().

Here is the caller graph for this function:

◆ copyExtendedColorSpinor() [1/2]

template<int Ns, typename dstFloat , typename srcFloat >
void quda::copyExtendedColorSpinor ( ColorSpinorField dst,
const ColorSpinorField src,
const int  parity,
const QudaFieldLocation  location,
dstFloat *  Dst,
srcFloat *  Src,
float *  dstNorm,
float *  srcNorm 
)

◆ CopyExtendedColorSpinor()

template<typename dstFloat , typename srcFloat >
void quda::CopyExtendedColorSpinor ( ColorSpinorField dst,
const ColorSpinorField src,
const int  parity,
const QudaFieldLocation  location,
dstFloat *  Dst,
srcFloat *  Src,
float *  dstNorm = 0,
float *  srcNorm = 0 
)

Definition at line 428 of file extended_color_spinor_utilities.cu.

References errorQuda, quda::ColorSpinorField::Nspin(), and parity.

Referenced by copyExtendedColorSpinor().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ copyExtendedColorSpinor() [2/2]

void quda::copyExtendedColorSpinor ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
const int  parity,
void *  Dst,
void *  Src,
void *  dstNorm,
void *  srcNorm 
)

Definition at line 454 of file extended_color_spinor_utilities.cu.

References CopyExtendedColorSpinor(), errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::XSD::operator()().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ copyExtendedGauge()

void quda::copyExtendedGauge ( GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
void *  Out = 0,
void *  In = 0 
)

This function is used for copying the gauge field into an extended gauge field. Defined in copy_extended_gauge.cu.

Parameters
outThe extended output field to which we are copying
inThe input field from which we are copying
locationThe location of where we are doing the copying (CPU or CUDA)
OutThe output buffer (optional)
InThe input buffer (optional)

Definition at line 343 of file copy_gauge_extended.cu.

References copyGaugeEx(), errorQuda, quda::LatticeField::Ndim(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and quda::LatticeField::X().

Referenced by computeGaugeFixingOVRQuda(), computeHISQForceQuda(), quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), copyExtendedResidentGaugeQuda(), createExtendedGauge(), quda::cpuGaugeField::Gauge_p(), hisq_force_init(), main(), performWuppertalnStep(), quda::cudaGaugeField::saveCPUField(), and saveGaugeQuda().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ copyFloatN() [1/21]

template<typename FloatN >
__device__ void quda::copyFloatN ( FloatN &  a,
const FloatN &  b 
)
inline

◆ copyFloatN() [2/21]

__device__ void quda::copyFloatN ( float2 &  a,
const char2 &  b 
)
inline

Definition at line 64 of file convert.h.

References c2f().

Here is the call graph for this function:

◆ copyFloatN() [3/21]

__device__ void quda::copyFloatN ( float4 &  a,
const char4 &  b 
)
inline

Definition at line 65 of file convert.h.

References c2f().

Here is the call graph for this function:

◆ copyFloatN() [4/21]

__device__ void quda::copyFloatN ( double2 &  a,
const char2 &  b 
)
inline

Definition at line 69 of file convert.h.

References c2d().

Here is the call graph for this function:

◆ copyFloatN() [5/21]

__device__ void quda::copyFloatN ( double4 &  a,
const char4 &  b 
)
inline

Definition at line 70 of file convert.h.

References c2d().

Here is the call graph for this function:

◆ copyFloatN() [6/21]

__device__ void quda::copyFloatN ( float2 &  a,
const short2 &  b 
)
inline

Definition at line 76 of file convert.h.

References s2f().

Here is the call graph for this function:

◆ copyFloatN() [7/21]

__device__ void quda::copyFloatN ( float4 &  a,
const short4 &  b 
)
inline

Definition at line 77 of file convert.h.

References s2f().

Here is the call graph for this function:

◆ copyFloatN() [8/21]

__device__ void quda::copyFloatN ( double2 &  a,
const short2 &  b 
)
inline

Definition at line 81 of file convert.h.

References s2d().

Here is the call graph for this function:

◆ copyFloatN() [9/21]

__device__ void quda::copyFloatN ( double4 &  a,
const short4 &  b 
)
inline

Definition at line 82 of file convert.h.

References s2d().

Here is the call graph for this function:

◆ copyFloatN() [10/21]

__device__ void quda::copyFloatN ( float2 &  a,
const double2 &  b 
)
inline

Definition at line 87 of file convert.h.

◆ copyFloatN() [11/21]

__device__ void quda::copyFloatN ( double2 &  a,
const float2 &  b 
)
inline

Definition at line 88 of file convert.h.

◆ copyFloatN() [12/21]

__device__ void quda::copyFloatN ( float4 &  a,
const double4 &  b 
)
inline

Definition at line 89 of file convert.h.

◆ copyFloatN() [13/21]

__device__ void quda::copyFloatN ( double4 &  a,
const float4 &  b 
)
inline

Definition at line 90 of file convert.h.

◆ copyFloatN() [14/21]

__device__ void quda::copyFloatN ( short2 &  a,
const float2 &  b 
)
inline

Definition at line 115 of file convert.h.

References f2i().

Here is the call graph for this function:

◆ copyFloatN() [15/21]

__device__ void quda::copyFloatN ( short4 &  a,
const float4 &  b 
)
inline

Definition at line 116 of file convert.h.

References f2i().

Here is the call graph for this function:

◆ copyFloatN() [16/21]

__device__ void quda::copyFloatN ( short2 &  a,
const double2 &  b 
)
inline

Definition at line 120 of file convert.h.

References d2i().

Here is the call graph for this function:

◆ copyFloatN() [17/21]

__device__ void quda::copyFloatN ( short4 &  a,
const double4 &  b 
)
inline

Definition at line 121 of file convert.h.

References d2i().

Here is the call graph for this function:

◆ copyFloatN() [18/21]

__device__ void quda::copyFloatN ( char2 &  a,
const float2 &  b 
)
inline

Definition at line 126 of file convert.h.

References f2i().

Here is the call graph for this function:

◆ copyFloatN() [19/21]

__device__ void quda::copyFloatN ( char4 &  a,
const float4 &  b 
)
inline

Definition at line 127 of file convert.h.

References f2i().

Here is the call graph for this function:

◆ copyFloatN() [20/21]

__device__ void quda::copyFloatN ( char2 &  a,
const double2 &  b 
)
inline

Definition at line 131 of file convert.h.

References d2i().

Here is the call graph for this function:

◆ copyFloatN() [21/21]

__device__ void quda::copyFloatN ( char4 &  a,
const double4 &  b 
)
inline

Definition at line 132 of file convert.h.

References d2i().

Here is the call graph for this function:

◆ copyGauge() [1/5]

template<typename FloatOut , typename FloatIn , int length, typename InOrder >
void quda::copyGauge ( const InOrder &  inOrder,
const GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
FloatOut *  Out,
FloatOut **  outGhost,
int  type 
)

◆ copyGauge() [2/5]

template<typename FloatOut , typename FloatIn , int length, typename Arg >
void quda::copyGauge ( Arg arg)

Generic CPU gauge reordering and packing

Definition at line 32 of file copy_gauge.cuh.

References in, length, nColor, quda::gauge::Ncolor(), out, and parity.

Referenced by copyGauge().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ copyGauge() [3/5]

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void quda::copyGauge ( OutOrder &&  outOrder,
const InOrder &  inOrder,
const GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
int  type 
)

◆ copyGauge() [4/5]

template<typename FloatOut , typename FloatIn , int length>
void quda::copyGauge ( GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
FloatOut *  Out,
FloatIn *  In,
FloatOut **  outGhost,
FloatIn **  inGhost,
int  type 
)

◆ copyGauge() [5/5]

template<typename FloatOut , typename FloatIn >
void quda::copyGauge ( GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
FloatOut *  Out,
FloatIn *  In,
FloatOut **  outGhost,
FloatIn **  inGhost,
int  type 
)

◆ copyGaugeEx() [1/6]

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder , bool regularToextended>
__device__ __host__ void quda::copyGaugeEx ( CopyGaugeExArg< OutOrder, InOrder > &  arg,
int  X,
int  parity 
)

Copy a regular/extended gauge field into an extended/regular gauge field

Definition at line 50 of file copy_gauge_extended.cu.

References quda::CopyGaugeExArg< OutOrder, InOrder >::geometry, quda::CopyGaugeExArg< OutOrder, InOrder >::in, in, length, nColor, quda::gauge::Ncolor(), quda::CopyGaugeExArg< OutOrder, InOrder >::out, out, parity, R, X, quda::CopyGaugeExArg< OutOrder, InOrder >::Xin, and quda::CopyGaugeExArg< OutOrder, InOrder >::Xout.

Referenced by copyExtendedGauge().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ copyGaugeEx() [2/6]

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder , bool regularToextended>
void quda::copyGaugeEx ( CopyGaugeExArg< OutOrder, InOrder >  arg)

Definition at line 93 of file copy_gauge_extended.cu.

References arg(), parity, quda::CopyGaugeExArg< OutOrder, InOrder >::volume, and X.

Here is the call graph for this function:

◆ copyGaugeEx() [3/6]

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder >
void quda::copyGaugeEx ( OutOrder  outOrder,
const InOrder  inOrder,
const int *  E,
const int *  X,
const int *  faceVolumeCB,
const GaugeField meta,
QudaFieldLocation  location 
)

Definition at line 158 of file copy_gauge_extended.cu.

References arg(), checkCudaError, quda::GaugeField::Geometry(), quda::LatticeField::Ndim(), and QUDA_CUDA_FIELD_LOCATION.

Here is the call graph for this function:

◆ copyGaugeEx() [4/6]

template<typename FloatOut , typename FloatIn , int length, typename InOrder >
void quda::copyGaugeEx ( const InOrder &  inOrder,
const int *  X,
GaugeField out,
QudaFieldLocation  location,
FloatOut *  Out 
)

◆ copyGaugeEx() [5/6]

template<typename FloatOut , typename FloatIn , int length>
void quda::copyGaugeEx ( GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
FloatOut *  Out,
FloatIn *  In 
)

◆ copyGaugeEx() [6/6]

template<typename FloatOut , typename FloatIn >
void quda::copyGaugeEx ( GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
FloatOut *  Out,
FloatIn *  In 
)

Definition at line 324 of file copy_gauge_extended.cu.

References errorQuda, quda::GaugeField::Geometry(), in, quda::GaugeField::LinkType(), quda::GaugeField::Ncolor(), out, and QUDA_ASQTAD_MOM_LINKS.

Here is the call graph for this function:

◆ copyGaugeExKernel()

template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder , bool regularToextended>
__global__ void quda::copyGaugeExKernel ( CopyGaugeExArg< OutOrder, InOrder >  arg)

Definition at line 102 of file copy_gauge_extended.cu.

References arg(), parity, quda::CopyGaugeExArg< OutOrder, InOrder >::volume, and X.

Here is the call graph for this function:

◆ copyGaugeKernel()

template<typename FloatOut , typename FloatIn , int length, typename Arg >
__global__ void quda::copyGaugeKernel ( Arg  arg)

Generic CUDA gauge reordering and packing. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 96 of file copy_gauge.cuh.

References in, length, nColor, quda::gauge::Ncolor(), out, and parity.

Here is the call graph for this function:

◆ copyGaugeMG() [1/3]

template<typename sFloatOut , typename sFloatIn , int Nc, typename InOrder >
void quda::copyGaugeMG ( const InOrder &  inOrder,
GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
sFloatOut *  Out,
sFloatOut **  outGhost,
int  type 
)

◆ copyGaugeMG() [2/3]

template<typename sFloatOut , typename sFloatIn , int Nc>
void quda::copyGaugeMG ( GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
sFloatOut *  Out,
sFloatIn *  In,
sFloatOut **  outGhost,
sFloatIn **  inGhost,
int  type 
)

◆ copyGaugeMG() [3/3]

template<typename FloatOut , typename FloatIn >
void quda::copyGaugeMG ( GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
FloatOut *  Out,
FloatIn *  In,
FloatOut **  outGhost,
FloatIn **  inGhost,
int  type 
)

Definition at line 126 of file copy_gauge_mg.cu.

References errorQuda, in, quda::GaugeField::Ncolor(), and out.

Here is the call graph for this function:

◆ copyGenericClover()

void quda::copyGenericClover ( CloverField out,
const CloverField in,
bool  inverse,
QudaFieldLocation  location,
void *  Out = 0,
void *  In = 0,
void *  outNorm = 0,
void *  inNorm = 0 
)

This generic function is used for copying the clover field where in the input and output can be in any order and location.

Parameters
outThe output field to which we are copying
inThe input field from which we are copying
inverseWhether we are copying the inverse term or not
locationThe location of where we are doing the copying (CPU or CUDA)
OutThe output buffer (optional)
InThe input buffer (optional)
outNormThe output norm buffer (optional)
inNormThe input norm buffer (optional)

Definition at line 175 of file copy_clover.cu.

References errorQuda, in, inverse(), quda::CloverField::Order(), out, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::cudaCloverField::copy(), quda::FullClover::FullClover(), and quda::cudaCloverField::saveCPUField().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ copyGenericColorSpinor() [1/3]

template<int Ns, int Nc, typename dstFloat , typename srcFloat >
void quda::copyGenericColorSpinor ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
dstFloat *  Dst,
srcFloat *  Src 
)

◆ CopyGenericColorSpinor() [1/2]

template<int Nc, typename dstFloat , typename srcFloat >
void quda::CopyGenericColorSpinor ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
dstFloat *  Dst,
srcFloat *  Src 
)

Definition at line 184 of file copy_color_spinor_mg.cuh.

References errorQuda, and quda::ColorSpinorField::Nspin().

Here is the call graph for this function:

◆ copyGenericColorSpinor() [2/3]

template<int Ns, int Nc, typename dstFloat , typename srcFloat >
void quda::copyGenericColorSpinor ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
dstFloat *  Dst,
srcFloat *  Src,
float *  dstNorm,
float *  srcNorm 
)

◆ CopyGenericColorSpinor() [2/2]

template<int Nc, typename dstFloat , typename srcFloat >
void quda::CopyGenericColorSpinor ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
dstFloat *  Dst,
srcFloat *  Src,
float *  dstNorm = 0,
float *  srcNorm = 0 
)

Definition at line 409 of file copy_color_spinor.cuh.

References errorQuda, and quda::ColorSpinorField::Nspin().

Here is the call graph for this function:

◆ copyGenericColorSpinor() [3/3]

void quda::copyGenericColorSpinor ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst = 0,
void *  Src = 0,
void *  dstNorm = 0,
void *  srcNorm = 0 
)

◆ copyGenericColorSpinorDD()

void quda::copyGenericColorSpinorDD ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_dd.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorDH()

void quda::copyGenericColorSpinorDH ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_dh.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorDQ()

void quda::copyGenericColorSpinorDQ ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_dq.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorDS()

void quda::copyGenericColorSpinorDS ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_ds.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorHD()

void quda::copyGenericColorSpinorHD ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_hd.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorHH()

void quda::copyGenericColorSpinorHH ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_hh.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorHQ()

void quda::copyGenericColorSpinorHQ ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_hq.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorHS()

void quda::copyGenericColorSpinorHS ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_hs.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGDD()

void quda::copyGenericColorSpinorMGDD ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_mg_dd.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGDS()

void quda::copyGenericColorSpinorMGDS ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_mg_ds.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGHH()

void quda::copyGenericColorSpinorMGHH ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_mg_hh.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGHQ()

void quda::copyGenericColorSpinorMGHQ ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_mg_hq.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGHS()

void quda::copyGenericColorSpinorMGHS ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_mg_hs.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGQH()

void quda::copyGenericColorSpinorMGQH ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_mg_qh.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGQQ()

void quda::copyGenericColorSpinorMGQQ ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_mg_qq.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGQS()

void quda::copyGenericColorSpinorMGQS ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_mg_qs.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGSD()

void quda::copyGenericColorSpinorMGSD ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_mg_sd.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGSH()

void quda::copyGenericColorSpinorMGSH ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_mg_sh.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGSQ()

void quda::copyGenericColorSpinorMGSQ ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_mg_sq.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorMGSS()

void quda::copyGenericColorSpinorMGSS ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_mg_ss.cu.

References errorQuda, and INSTANTIATE_COLOR.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorQD()

void quda::copyGenericColorSpinorQD ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_qd.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorQH()

void quda::copyGenericColorSpinorQH ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_qh.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorQQ()

void quda::copyGenericColorSpinorQQ ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_qq.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorQS()

void quda::copyGenericColorSpinorQS ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_qs.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorSD()

void quda::copyGenericColorSpinorSD ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_sd.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorSH()

void quda::copyGenericColorSpinorSH ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_sh.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorSQ()

void quda::copyGenericColorSpinorSQ ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_sq.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericColorSpinorSS()

void quda::copyGenericColorSpinorSS ( ColorSpinorField dst,
const ColorSpinorField src,
QudaFieldLocation  location,
void *  Dst,
void *  Src,
void *  a = 0,
void *  b = 0 
)

Definition at line 5 of file copy_color_spinor_ss.cu.

Referenced by copyGenericColorSpinor().

Here is the caller graph for this function:

◆ copyGenericGauge()

void quda::copyGenericGauge ( GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
void *  Out = 0,
void *  In = 0,
void **  ghostOut = 0,
void **  ghostIn = 0,
int  type = 0 
)

This function is used for extracting the gauge ghost zone from a gauge field array. Defined in copy_gauge.cu.

Parameters
outThe output field to which we are copying
inThe input field from which we are copying
locationThe location of where we are doing the copying (CPU or CUDA)
OutThe output buffer (optional)
InThe input buffer (optional)
ghostOutThe output ghost buffer (optional)
ghostInThe input ghost buffer (optional)
typeThe type of copy we doing (0 body and ghost else ghost only)

Definition at line 41 of file copy_gauge.cu.

References copyGenericGaugeDoubleOut(), copyGenericGaugeHalfOut(), copyGenericGaugeMG(), copyGenericGaugeQuarterOut(), copyGenericGaugeSingleOut(), errorQuda, quda::GaugeField::Geometry(), quda::LatticeField::GhostExchange(), quda::GaugeField::Ncolor(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_GHOST_EXCHANGE_PAD, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), copyGauge(), quda::cudaGaugeField::exchangeGhost(), quda::cpuGaugeField::Gauge_p(), quda::cudaGaugeField::injectGhost(), and quda::cudaGaugeField::saveCPUField().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ copyGenericGaugeDoubleOut()

void quda::copyGenericGaugeDoubleOut ( GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
void *  Out,
void *  In,
void **  ghostOut,
void **  ghostIn,
int  type 
)

Definition at line 5 of file copy_gauge_double.cu.

References in, and out.

Referenced by copyGenericGauge().

Here is the caller graph for this function:

◆ copyGenericGaugeHalfOut()

void quda::copyGenericGaugeHalfOut ( GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
void *  Out,
void *  In,
void **  ghostOut,
void **  ghostIn,
int  type 
)

Definition at line 5 of file copy_gauge_half.cu.

References errorQuda, in, and out.

Referenced by copyGenericGauge().

Here is the caller graph for this function:

◆ copyGenericGaugeMG()

void quda::copyGenericGaugeMG ( GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
void *  Out,
void *  In,
void **  ghostOut,
void **  ghostIn,
int  type 
)

Definition at line 146 of file copy_gauge_mg.cu.

References copyGaugeMG(), errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by copyGenericGauge().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ copyGenericGaugeQuarterOut()

void quda::copyGenericGaugeQuarterOut ( GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
void *  Out,
void *  In,
void **  ghostOut,
void **  ghostIn,
int  type 
)

Definition at line 6 of file copy_gauge_quarter.cu.

References errorQuda, in, and out.

Referenced by copyGenericGauge().

Here is the caller graph for this function:

◆ copyGenericGaugeSingleOut()

void quda::copyGenericGaugeSingleOut ( GaugeField out,
const GaugeField in,
QudaFieldLocation  location,
void *  Out,
void *  In,
void **  ghostOut,
void **  ghostIn,
int  type 
)

Definition at line 5 of file copy_gauge_single.cu.

References errorQuda, in, and out.

Referenced by copyGenericGauge().

Here is the caller graph for this function:

◆ copyGhost()

template<typename FloatOut , typename FloatIn , int length, typename Arg >
void quda::copyGhost ( Arg arg)

Generic CPU gauge ghost reordering and packing

Definition at line 126 of file copy_gauge.cuh.

References in, length, nColor, quda::gauge::Ncolor(), out, and parity.

Here is the call graph for this function:

◆ copyGhostKernel()

template<typename FloatOut , typename FloatIn , int length, typename Arg >
__global__ void quda::copyGhostKernel ( Arg  arg)

Generic CUDA kernel for copying the ghost zone. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 157 of file copy_gauge.cuh.

References in, length, nColor, quda::gauge::Ncolor(), out, and parity.

Here is the call graph for this function:

◆ copyInterior() [1/2]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
__device__ __host__ void quda::copyInterior ( CopySpinorExArg< OutOrder, InOrder, Basis > &  arg,
int  X 
)

◆ copyInterior() [2/2]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
void quda::copyInterior ( CopySpinorExArg< OutOrder, InOrder, Basis > &  arg)

Definition at line 217 of file extended_color_spinor_utilities.cu.

References arg(), and quda::CopySpinorExArg< OutOrder, InOrder, Basis >::length.

Here is the call graph for this function:

◆ copyInteriorKernel()

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend>
__global__ void quda::copyInteriorKernel ( CopySpinorExArg< OutOrder, InOrder, Basis >  arg)

Definition at line 203 of file extended_color_spinor_utilities.cu.

References arg(), and quda::CopySpinorExArg< OutOrder, InOrder, Basis >::length.

Here is the call graph for this function:

◆ copyLinkToArray() [1/2]

void quda::copyLinkToArray ( float *  array,
const Matrix< float2, 3 > &  link 
)
inline

Definition at line 1088 of file quda_matrix.h.

Referenced by unitarizeLinksCPU().

Here is the caller graph for this function:

◆ copyLinkToArray() [2/2]

template<class Cmplx , class Real >
void quda::copyLinkToArray ( Real *  array,
const Matrix< Cmplx, 3 > &  link 
)
inline

Definition at line 1102 of file quda_matrix.h.

◆ copyMom()

template<typename FloatOut , typename FloatIn , int length, typename Out , typename In , typename Arg >
void quda::copyMom ( Arg arg,
const GaugeField out,
const GaugeField in,
QudaFieldLocation  location 
)

Definition at line 278 of file copy_gauge_inc.cu.

References quda::CopyGauge< FloatOut, FloatIn, length, Arg >::apply().

Here is the call graph for this function:

◆ copySpinorEx() [1/2]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis >
void quda::copySpinorEx ( OutOrder  outOrder,
const InOrder  inOrder,
const Basis  basis,
const int *  E,
const int *  X,
const int  parity,
const bool  extend,
const ColorSpinorField meta,
QudaFieldLocation  location 
)

◆ copySpinorEx() [2/2]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
void quda::copySpinorEx ( OutOrder  outOrder,
InOrder  inOrder,
const QudaGammaBasis  outBasis,
const QudaGammaBasis  inBasis,
const int *  E,
const int *  X,
const int  parity,
const bool  extend,
const ColorSpinorField meta,
QudaFieldLocation  location 
)

◆ corner()

template<class T >
void quda::corner ( T &  p,
int  v,
int  s,
int  c 
)

Create a corner source with value "v" on color "c" on a single corner overloaded into "s". "s" is encoded via a bitmap: 1010 -> x = 0, y = 1, z = 0, t = 1 corner, for ex.

Definition at line 82 of file color_spinor_util.cu.

References errorQuda, getCoords(), parity, and X.

Referenced by genericSource().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ cos() [1/3]

template<typename ValueType >
__host__ __device__ ValueType quda::cos ( ValueType  x)
inline

Definition at line 46 of file complex_quda.h.

References cos().

Referenced by cos(), quda::Trig< isFixed, T >::Cos(), cosh(), exponentiate_iQ(), genGauss(), link_sanity_check_internal_8(), new_load_half(), polar(), setUnitarizeLinksConstants(), sin(), sinh(), su3Reconstruct8(), and tan().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ cos() [2/3]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::cos ( const complex< ValueType > &  z)
inline

Definition at line 1117 of file complex_quda.h.

References cos(), cosh(), sin(), and sinh().

Here is the call graph for this function:

◆ cos() [3/3]

template<>
__host__ __device__ complex<float> quda::cos ( const complex< float > &  z)
inline

Definition at line 1125 of file complex_quda.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Referenced by cos().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ cosh() [1/3]

template<typename ValueType >
__host__ __device__ ValueType quda::cosh ( ValueType  x)
inline

Definition at line 81 of file complex_quda.h.

References cosh().

Referenced by cos(), cosh(), sin(), and sinh().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ cosh() [2/3]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::cosh ( const complex< ValueType > &  z)
inline

Definition at line 1133 of file complex_quda.h.

References cos(), cosh(), sin(), and sinh().

Here is the call graph for this function:

◆ cosh() [3/3]

template<>
__host__ __device__ complex<float> quda::cosh ( const complex< float > &  z)
inline

Definition at line 1141 of file complex_quda.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Referenced by cosh().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ covDev()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
__device__ __host__ void quda::covDev ( Arg arg,
int  idx,
int  parity 
)
inline

Definition at line 119 of file covDev.cuh.

References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::CovDevArg< Float, nColor, reconstruct_ >::out, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ covDevGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void quda::covDevGPU ( Arg  arg)

Definition at line 182 of file covDev.cuh.

References arg(), and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ create_gauge_buffer()

void * quda::create_gauge_buffer ( size_t  bytes,
QudaGaugeFieldOrder  order,
QudaFieldGeometry  geometry 
)

◆ create_ghost_buffer()

void ** quda::create_ghost_buffer ( size_t  bytes[],
QudaGaugeFieldOrder  order,
QudaFieldGeometry  geometry 
)

◆ createDirac() [1/2]

void quda::createDirac ( Dirac *&  d,
Dirac *&  dSloppy,
Dirac *&  dPre,
QudaInvertParam param,
const bool  pc_solve 
)

Definition at line 1730 of file interface_quda.cpp.

References quda::Dirac::create(), QudaInvertParam_s::inv_type, QUDA_INC_EIGCG_INVERTER, setDiracParam(), setDiracPreParam(), and setDiracSloppyParam().

Referenced by eigensolveQuda(), invertMultiShiftQuda(), invertMultiSrcQuda(), and invertQuda().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ createDirac() [2/2]

void quda::createDirac ( Dirac *&  d,
Dirac *&  dSloppy,
Dirac *&  dPre,
Dirac *&  dRef,
QudaInvertParam param,
const bool  pc_solve 
)

◆ createDslashEvents()

void quda::createDslashEvents ( )

◆ d2i()

__device__ __host__ int quda::d2i ( double  d)
inline

Definition at line 104 of file convert.h.

Referenced by convert< short2, double4 >(), convert< short4, double2 >(), and copyFloatN().

Here is the caller graph for this function:

◆ deserializeTuneCache()

static void quda::deserializeTuneCache ( std::istream &  in)
static

Deserialize tunecache from an istream, useful for reading a file or receiving from other nodes.

Definition at line 134 of file tune.cpp.

References quda::TuneKey::aux, quda::TuneParam::aux, quda::TuneKey::aux_n, quda::TuneParam::block, quda::TuneParam::comment, errorQuda, quda::TuneParam::grid, quda::TraceKey::key, quda::TuneKey::name, quda::TuneKey::name_n, param, quda::TuneParam::shared_bytes, quda::TuneParam::time, quda::TuneKey::volume, and quda::TuneKey::volume_n.

Referenced by broadcastTuneCache(), and loadTuneCache().

Here is the caller graph for this function:

◆ destroyDslashEvents()

void quda::destroyDslashEvents ( )

◆ device_allocated_peak()

long quda::device_allocated_peak ( )
Returns
peak device memory allocated

Definition at line 59 of file malloc.cpp.

References DEVICE.

◆ device_free_()

void quda::device_free_ ( const char *  func,
const char *  file,
int  line,
void *  ptr 
)

Free device memory allocated with device_malloc(). This function should only be called via the device_free() macro, defined in malloc_quda.h

Definition at line 301 of file malloc.cpp.

References count, DEVICE, device_pinned_free_(), errorQuda, and track_free().

Referenced by quda::pool::device_free_(), quda::pool::device_malloc_(), and device_pinned_free_().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ device_malloc_()

void * quda::device_malloc_ ( const char *  func,
const char *  file,
int  line,
size_t  size 
)

Perform a standard cudaMalloc() with error-checking. This function should only be called via the device_malloc() macro, defined in malloc_quda.h

Definition at line 169 of file malloc.cpp.

References quda::MemAlloc::base_size, DEVICE, device_pinned_malloc_(), errorQuda, quda::MemAlloc::size, and track_malloc().

Referenced by quda::pool::device_malloc_(), and device_pinned_malloc_().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ device_pinned_free_()

void quda::device_pinned_free_ ( const char *  func,
const char *  file,
int  line,
void *  ptr 
)

Free device memory allocated with device_pinned malloc(). This function should only be called via the device_pinned_free() macro, defined in malloc_quda.h

Definition at line 322 of file malloc.cpp.

References comm_peer2peer_present(), count, device_free_(), DEVICE_PINNED, errorQuda, printfQuda, and track_free().

Referenced by device_free_().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ device_pinned_malloc_()

void * quda::device_pinned_malloc_ ( const char *  func,
const char *  file,
int  line,
size_t  size 
)

Perform a cuMemAlloc with error-checking. This function is to guarantee a unique memory allocation on the device, since cudaMalloc can be redirected (as is the case with QDPJIT). This should only be called via the device_pinned_malloc() macro, defined in malloc_quda.h.

Definition at line 200 of file malloc.cpp.

References quda::MemAlloc::base_size, comm_peer2peer_present(), device_malloc_(), DEVICE_PINNED, errorQuda, quda::MemAlloc::size, and track_malloc().

Referenced by device_malloc_().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dimFromFaceIndex() [1/2]

template<int nDim = 4, typename Arg >
__host__ __device__ int quda::dimFromFaceIndex ( int &  face_idx,
int  tid,
const Arg arg 
)
inline

Determines which face a given thread is computing. Also rescale face_idx so that is relative to a given dimension. If 5-d variant if called, then it is assumed that arg.threads contains only the 3-d surface of threads but face_idx is a 4-d index (surface * fifth dimension). At present multi-src staggered uses the 4-d variant since the face_idx that is passed in is the 3-d surface not the 4-d one.

Parameters
[out]face_idxFace index
[in]tidCheckerboard volume index
[in]argInput parameters
Returns
dimension this face_idx corresponds to

Definition at line 783 of file index_helper.cuh.

References s.

Referenced by packKernel(), and packStaggeredKernel().

Here is the caller graph for this function:

◆ dimFromFaceIndex() [2/2]

template<int nDim = 4, typename Arg >
__host__ __device__ int quda::dimFromFaceIndex ( int &  face_idx,
const Arg arg 
)
inline

Definition at line 809 of file index_helper.cuh.

References arg().

Here is the call graph for this function:

◆ disable_policy()

void quda::disable_policy ( DslashCoarsePolicy  p)

Definition at line 606 of file dslash_coarse.cu.

References DSLASH_COARSE_POLICY_DISABLED, and policies().

Here is the call graph for this function:

◆ disableProfileCount()

void quda::disableProfileCount ( )

◆ doBulk() [1/2]

template<KernelType type>
__host__ __device__ bool quda::doBulk ( )
inline

Helper function to determine if we should do interior computation.

Parameters
[in]dimDimension we are working on

Definition at line 35 of file dslash_helper.cuh.

References EXTERIOR_KERNEL_ALL, EXTERIOR_KERNEL_T, EXTERIOR_KERNEL_X, EXTERIOR_KERNEL_Y, EXTERIOR_KERNEL_Z, and INTERIOR_KERNEL.

◆ doBulk() [2/2]

template<DslashType type>
static __host__ __device__ bool quda::doBulk ( )
static

Helper function to determine if should interior computation.

Definition at line 72 of file dslash_coarse.cuh.

References DSLASH_FULL, DSLASH_INTERIOR, and s.

◆ doHalo() [1/2]

template<KernelType type>
__host__ __device__ bool quda::doHalo ( int  dim = -1)
inline

Helper function to determine if we should do halo computation.

Parameters
[in]dimDimension we are working on. If dim=-1 (default argument) then we return true if type is any halo kernel.

Definition at line 17 of file dslash_helper.cuh.

References EXTERIOR_KERNEL_ALL, EXTERIOR_KERNEL_T, EXTERIOR_KERNEL_X, EXTERIOR_KERNEL_Y, EXTERIOR_KERNEL_Z, and INTERIOR_KERNEL.

◆ doHalo() [2/2]

template<DslashType type>
static __host__ __device__ bool quda::doHalo ( )
static

Helper function to determine if should halo computation.

Definition at line 58 of file dslash_coarse.cuh.

References DSLASH_EXTERIOR, and DSLASH_FULL.

◆ domainWall4D()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void quda::domainWall4D ( Arg arg,
int  idx,
int  s,
int  parity 
)
inline

◆ domainWall4DCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void quda::domainWall4DCPU ( Arg arg)

Definition at line 74 of file dslash_domain_wall_4d.cuh.

References arg(), quda::DslashArg< Float >::nParity, quda::DslashArg< Float >::parity, and s.

Here is the call graph for this function:

◆ domainWall4DGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void quda::domainWall4DGPU ( Arg  arg)

Definition at line 90 of file dslash_domain_wall_4d.cuh.

References arg(), quda::DslashArg< Float >::nParity, quda::DslashArg< Float >::parity, and s.

Here is the call graph for this function:

◆ domainWall5D()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void quda::domainWall5D ( Arg arg,
int  idx,
int  parity 
)
inline

◆ domainWall5DCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void quda::domainWall5DCPU ( Arg arg)

Definition at line 86 of file dslash_domain_wall_5d.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ domainWall5DGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void quda::domainWall5DGPU ( Arg  arg)

Definition at line 100 of file dslash_domain_wall_5d.cuh.

References arg(), quda::DslashArg< Float >::nParity, quda::DslashArg< Float >::parity, and s.

Here is the call graph for this function:

◆ dslash5()

template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, typename Arg >
__device__ __host__ void quda::dslash5 ( Arg arg,
int  parity,
int  x_cb,
int  s 
)
inline

Apply the D5 operator at given site.

Parameters
[in]argArgument struct containing any meta data and accessors
[in]parityParity we are on
[in]x_bCheckerboarded 4-d space-time index
[in]sLs dimension coordinate

Definition at line 191 of file dslash_domain_wall_m5.cuh.

References quda::coeff_type< real, is_variable, Arg >::a(), quda::coeff_type< real, is_variable, Arg >::b(), quda::coeff_type< real, is_variable, Arg >::c(), dagger, DSLASH5_DWF, DSLASH5_MOBIUS, DSLASH5_MOBIUS_PRE, in, out, and quda::blas::xpay().

Here is the call graph for this function:

◆ dslash5CPU()

template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, typename Arg >
void quda::dslash5CPU ( Arg arg)

CPU kernel for applying the D5 operator.

Parameters
[in]argArgument struct containing any meta data and accessors

Definition at line 250 of file dslash_domain_wall_m5.cuh.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, and s.

Here is the call graph for this function:

◆ dslash5GPU()

template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, typename Arg >
__global__ void quda::dslash5GPU ( Arg  arg)

GPU kernel for applying the D5 operator.

Parameters
[in]argArgument struct containing any meta data and accessors

Definition at line 266 of file dslash_domain_wall_m5.cuh.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, and s.

Here is the call graph for this function:

◆ dslash5inv()

template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, bool shared, bool var_inverse, typename Arg >
__device__ __host__ void quda::dslash5inv ( Arg arg,
int  parity,
int  x_cb,
int  s 
)
inline

Apply the M5 inverse operator at a given site on the lattice.

Template Parameters
sharedWhether to use a shared memory scratch pad to store the input field across the Ls dimension to minimize global memory reads.
Parameters
[in]argArgument struct containing any meta data and accessors
[in]parityParity we are on
[in]x_bCheckerboarded 4-d space-time index
[in]sLs dimension coordinate

Definition at line 433 of file dslash_domain_wall_m5.cuh.

References quda::coeff_type< real, is_variable, Arg >::a(), arg(), out, parity, s, and quda::blas::xpay().

Here is the call graph for this function:

◆ dslash5invGPU()

template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, bool shared, bool var_inverse, typename Arg >
__global__ void quda::dslash5invGPU ( Arg  arg)

CPU kernel for applying the M5 inverse operator.

Template Parameters
sharedWhether to use a shared memory scratch pad to store the input field acroos the Ls dimension to minimize global memory reads.
Parameters
[in]argArgument struct containing any meta data and accessors

Definition at line 463 of file dslash_domain_wall_m5.cuh.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, and s.

Here is the call graph for this function:

◆ enable_policy()

void quda::enable_policy ( DslashCoarsePolicy  p)

Definition at line 602 of file dslash_coarse.cu.

References policies().

Referenced by quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ enableProfileCount()

void quda::enableProfileCount ( )

◆ ErrorSU3()

template<class Cmplx >
__device__ __host__ double quda::ErrorSU3 ( const Matrix< Cmplx, 3 > &  matrix)

Definition at line 1164 of file quda_matrix.h.

References conj(), and norm().

Referenced by computeOvrImpSTOUTStep(), and computeSTOUTStep().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ exchangeExtendedGhost()

void quda::exchangeExtendedGhost ( cudaColorSpinorField spinor,
int  R[],
int  parity,
cudaStream_t *  stream_p 
)

◆ exp() [1/3]

template<typename ValueType >
__host__ __device__ ValueType quda::exp ( ValueType  x)
inline

Definition at line 96 of file complex_quda.h.

References exp().

Referenced by constantInv(), exp(), expsu3(), pow(), smallSVD(), tanh(), and test().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ exp() [2/3]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::exp ( const complex< ValueType > &  z)
inline

Definition at line 1150 of file complex_quda.h.

References exp(), and polar().

Here is the call graph for this function:

◆ exp() [3/3]

template<>
__host__ __device__ complex<float> quda::exp ( const complex< float > &  z)
inline

Definition at line 1156 of file complex_quda.h.

References quda::complex< float >::imag(), polar(), and quda::complex< float >::real().

Referenced by exp().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ exponentiate_iQ()

template<class T >
__device__ __host__ void quda::exponentiate_iQ ( const Matrix< T, 3 > &  Q,
Matrix< T, 3 > *  exp_iQ 
)
inline

Definition at line 1191 of file quda_matrix.h.

References acos(), cos(), getDeterminant(), getTrace(), parity, pow(), setIdentity(), setZero(), sin(), and sqrt().

Referenced by computeOvrImpSTOUTStep(), and computeSTOUTStep().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ expsu3()

template<typename Float >
__device__ __host__ void quda::expsu3 ( Matrix< complex< Float >, 3 > &  q)

Direct port of the TIFR expsu3 algorithm

Definition at line 1325 of file quda_matrix.h.

References conj(), exp(), log(), and sqrt().

Here is the call graph for this function:

◆ extendedCopyColorSpinor() [1/2]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >
void quda::extendedCopyColorSpinor ( InOrder &  inOrder,
ColorSpinorField out,
QudaGammaBasis  inBasis,
const int *  E,
const int *  X,
const int  parity,
const bool  extend,
QudaFieldLocation  location,
FloatOut *  Out,
float *  outNorm 
)

Definition at line 313 of file extended_color_spinor_utilities.cu.

References E, errorQuda, quda::ColorSpinorField::GammaBasis(), quda::ColorSpinorField::isNative(), out, parity, and X.

Here is the call graph for this function:

◆ extendedCopyColorSpinor() [2/2]

template<typename FloatOut , typename FloatIn , int Ns, int Nc>
void quda::extendedCopyColorSpinor ( ColorSpinorField out,
const ColorSpinorField in,
const int  parity,
const QudaFieldLocation  location,
FloatOut *  Out,
FloatIn *  In,
float *  outNorm,
float *  inNorm 
)

◆ extractExtendedGaugeGhost()

void quda::extractExtendedGaugeGhost ( const GaugeField u,
int  dim,
const int *  R,
void **  ghost,
bool  extract 
)

This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.

Parameters
uThe gauge field from which we want to extract/pack the ghost zone
dimThe dimension in which we are packing/unpacking
ghostThe array where we want to pack/unpack the ghost zone into/from
extractWhether we are extracting into ghost or injecting from ghost

Definition at line 418 of file extract_gauge_ghost_extended.cu.

References errorQuda, extractGhostEx(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::cudaGaugeField::exchangeExtendedGhost(), quda::cpuGaugeField::exchangeExtendedGhost(), and quda::cpuGaugeField::Gauge_p().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ extractGaugeGhost()

void quda::extractGaugeGhost ( const GaugeField u,
void **  ghost,
bool  extract = true,
int  offset = 0 
)

This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.

Parameters
uThe gauge field from which we want to extract the ghost zone
ghostThe array where we want to pack the ghost zone into
extractWhere we are extracting into ghost or injecting from ghost
offsetBy default we exchange the nDim site-vector of links in the first nDim dimensions; offset allows us to instead exchange the links in nDim+offset dimensions. This is used to faciliate sending bi-directional links which is needed for the coarse links.

Definition at line 105 of file extract_gauge_ghost.cu.

References errorQuda, extractGaugeGhostMG(), extractGhost(), quda::GaugeField::Ncolor(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::cudaGaugeField::exchangeGhost(), quda::cpuGaugeField::exchangeGhost(), quda::cpuGaugeField::Gauge_p(), quda::cudaGaugeField::injectGhost(), and quda::cpuGaugeField::injectGhost().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ extractGaugeGhostMG()

void quda::extractGaugeGhostMG ( const GaugeField u,
void **  ghost,
bool  extract,
int  offset 
)

Definition at line 75 of file extract_gauge_ghost_mg.cu.

References errorQuda, extractGhostMG(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by extractGaugeGhost(), and extractGhost().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ extractGhost() [1/3]

template<typename Float >
void quda::extractGhost ( const GaugeField u,
Float **  Ghost,
bool  extract,
int  offset 
)

◆ extractGhost() [2/3]

template<int nDim, bool extract, typename Arg >
void quda::extractGhost ( Arg arg)

Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Definition at line 46 of file extract_gauge_ghost_helper.cuh.

References nColor, and parity.

◆ extractGhost() [3/3]

template<typename Float , int length, typename Order >
void quda::extractGhost ( Order  order,
const GaugeField u,
QudaFieldLocation  location,
bool  extract,
int  offset 
)

Generic gauge ghost extraction and packing (or the converse) NB This routines is specialized to four dimensions

Definition at line 236 of file extract_gauge_ghost_helper.cuh.

References quda::ExtractGhost< nDim, Arg >::apply(), arg(), commDim, extractor(), X, and quda::LatticeField::X().

Here is the call graph for this function:

◆ extractGhostEx() [1/3]

template<typename Float , int length, int nDim, int dim, typename Order , bool extract>
void quda::extractGhostEx ( ExtractGhostExArg< Order, nDim, dim >  arg)

◆ extractGhostEx() [2/3]

template<typename Float , int length, typename Order >
void quda::extractGhostEx ( Order  order,
const int  dim,
const int *  surfaceCB,
const int *  E,
const int *  R,
bool  extract,
const GaugeField u,
QudaFieldLocation  location 
)

Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Parameters
Ethe extended gauge dimensions
Rarray holding the radius of the extended region
extractWhether we are extracting or injecting the ghost zone

Definition at line 258 of file extract_gauge_ghost_extended.cu.

References quda::ExtractGhostEx< Float, length, nDim, dim, Order >::apply(), arg(), checkCudaError, commDim, errorQuda, extractor(), and X.

Here is the call graph for this function:

◆ extractGhostEx() [3/3]

template<typename Float >
void quda::extractGhostEx ( const GaugeField u,
int  dim,
const int *  R,
Float **  Ghost,
bool  extract 
)

◆ extractGhostExKernel()

template<typename Float , int length, int nDim, int dim, typename Order , bool extract>
__global__ void quda::extractGhostExKernel ( ExtractGhostExArg< Order, nDim, dim >  arg)

Generic GPU gauge ghost extraction and packing NB This routines is specialized to four dimensions FIXME this implementation will have two-way warp divergence Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions

Definition at line 144 of file extract_gauge_ghost_extended.cu.

References quda::ExtractGhostExArg< Order, nDim, dim >::A0, quda::ExtractGhostExArg< Order, nDim, dim >::A1, arg(), quda::ExtractGhostExArg< Order, nDim, dim >::B0, quda::ExtractGhostExArg< Order, nDim, dim >::B1, quda::ExtractGhostExArg< Order, nDim, dim >::C0, quda::ExtractGhostExArg< Order, nDim, dim >::C1, parity, quda::ExtractGhostExArg< Order, nDim, dim >::R, quda::ExtractGhostExArg< Order, nDim, dim >::threads, quda::ExtractGhostExArg< Order, nDim, dim >::X, and X.

Here is the call graph for this function:

◆ extractGhostKernel()

template<int nDim, bool extract, typename Arg >
__global__ void quda::extractGhostKernel ( Arg  arg)

Generic GPU gauge ghost extraction and packing NB This routines is specialized to four dimensions FIXME this implementation will have two-way warp divergence

Definition at line 114 of file extract_gauge_ghost_helper.cuh.

References nColor, parity, and X.

◆ extractGhostMG() [1/2]

template<typename storeFloat , int Nc>
void quda::extractGhostMG ( const GaugeField u,
storeFloat **  Ghost,
bool  extract,
int  offset 
)

This is the template driver for extractGhost

Definition at line 15 of file extract_gauge_ghost_mg.cu.

References errorQuda, quda::GaugeField::isNative(), length, quda::GaugeField::Order(), QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, QUDA_QDP_GAUGE_ORDER, QUDA_RECONSTRUCT_NO, and quda::GaugeField::Reconstruct().

Referenced by extractGaugeGhostMG().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ extractGhostMG() [2/2]

template<typename Float >
void quda::extractGhostMG ( const GaugeField u,
Float **  Ghost,
bool  extract,
int  offset 
)

This is the template driver for extractGhost

Definition at line 54 of file extract_gauge_ghost_mg.cu.

References errorQuda, quda::GaugeField::LinkType(), quda::GaugeField::Ncolor(), QUDA_COARSE_LINKS, QUDA_RECONSTRUCT_NO, and quda::GaugeField::Reconstruct().

Here is the call graph for this function:

◆ extractor()

template<typename Float , int length, int dim, typename Arg >
__device__ __host__ void quda::extractor ( Arg arg,
int  dir,
int  a,
int  b,
int  c,
int  d,
int  g,
int  parity 
)

Definition at line 56 of file extract_gauge_ghost_extended.cu.

References quda::Matrix< T, N >::data, length, and quda::gauge::Ncolor().

Referenced by extractGhost(), and extractGhostEx().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ f2i()

__device__ __host__ int quda::f2i ( float  f)
inline

Definition at line 93 of file convert.h.

Referenced by convert< short2, float4 >(), convert< short4, float2 >(), copy(), copy_scaled(), and copyFloatN().

Here is the caller graph for this function:

◆ fatLongKSLink()

void quda::fatLongKSLink ( cudaGaugeField fat,
cudaGaugeField lng,
const cudaGaugeField gauge,
const double *  coeff 
)

Compute the fat and long links for an improved staggered (Kogut-Susskind) fermions.

Parameters
fat[out]The computed fat link
lng[out]The computed long link (only computed if lng!=0)
u[in]The input gauge field
coeff[in]Array of path coefficients

Definition at line 532 of file llfat_quda.cu.

References checkCudaError, computeStaple(), quda::GaugeFieldParam::create, errorQuda, gParam, MIN_COEFF, quda::LatticeFieldParam::Precision(), QUDA_NULL_FIELD_CREATE, QUDA_RECONSTRUCT_NO, qudaDeviceSynchronize, quda::GaugeFieldParam::reconstruct, quda::GaugeField::Reconstruct(), quda::GaugeFieldParam::setPrecision(), and quda::LatticeField::X().

Referenced by computeKSLinkQuda().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ file_name()

constexpr const char* quda::file_name ( const char *  str)
inline

Definition at line 50 of file malloc_quda.h.

References get_pointer_location(), r_slant(), str_end(), and str_slant().

Here is the call graph for this function:

◆ fillEigCGInnerSolverParam()

static void quda::fillEigCGInnerSolverParam ( SolverParam inner,
const SolverParam outer,
bool  use_sloppy_partial_accumulator = true 
)
static

◆ fillFGMResDRInnerSolveParam()

void quda::fillFGMResDRInnerSolveParam ( SolverParam inner,
const SolverParam outer 
)

◆ fillInitCGSolverParam()

static void quda::fillInitCGSolverParam ( SolverParam inner,
const SolverParam outer 
)
static

◆ fillInnerSolveParam()

void quda::fillInnerSolveParam ( SolverParam inner,
const SolverParam outer 
)

◆ fillInnerSolverParam()

static void quda::fillInnerSolverParam ( SolverParam inner,
const SolverParam outer 
)
static

◆ flushForceMonitor()

void quda::flushForceMonitor ( )

Flush any outstanding force monitoring information.

Definition at line 29 of file momentum.cu.

References comm_rank(), count, forceMonitor(), getVerbosity(), printfQuda, and QUDA_VERBOSE.

Referenced by endQuda(), and forceRecord().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ flushProfile()

void quda::flushProfile ( )

Flush profile contents, setting all counts to zero.

Definition at line 504 of file tune.cpp.

References quda::TuneParam::n_calls, and param.

Referenced by newDeflationQuda(), and quda::TunableVectorYZ::resizeStep().

Here is the caller graph for this function:

◆ forceMonitor()

bool quda::forceMonitor ( )

Whether we are monitoring the force or not.

Returns
Boolean whether we are monitoring the force

Definition at line 13 of file momentum.cu.

References quda::cublas::init().

Referenced by computeGaugeForceQuda(), computeMomAction(), and flushForceMonitor().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ forceRecord()

void quda::forceRecord ( double2 &  force,
double  dt,
const char *  fname 
)

◆ free_gauge_buffer()

void quda::free_gauge_buffer ( void *  buffer,
QudaGaugeFieldOrder  order,
QudaFieldGeometry  geometry 
)

◆ free_ghost_buffer()

void quda::free_ghost_buffer ( void **  buffer,
QudaGaugeFieldOrder  order,
QudaFieldGeometry  geometry 
)

◆ gamma5()

void quda::gamma5 ( ColorSpinorField out,
const ColorSpinorField in 
)

Applies a gamma5 matrix to a spinor (wrapper to ApplyGamma)

Parameters
[out]outOutput field
[in]inInput field

Definition at line 461 of file dslash_quda.cu.

References ApplyGamma().

Referenced by computeCloverForceQuda().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ gammaCPU()

template<typename Float , int nColor, typename Arg >
void quda::gammaCPU ( Arg  arg)

◆ gammaGPU()

template<typename Float , int nColor, int d, typename Arg >
__global__ void quda::gammaGPU ( Arg  arg)

◆ GaugeFixHit_AtomicAdd() [1/2]

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void quda::GaugeFixHit_AtomicAdd ( Matrix< complex< Float >, NCOLORS > &  link,
const Float  relax_boost,
const int  tid 
)

Device function to perform gauge fixing with overrelxation. Uses 8 treads per lattice site, the reduction is performed by shared memory without using atomicadd. This implementation needs 8x more shared memory than the implementation using atomicadd

Definition at line 69 of file gauge_fix_ovr_hit_devf.cuh.

References atomicAdd().

Here is the call graph for this function:

◆ GaugeFixHit_AtomicAdd() [2/2]

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void quda::GaugeFixHit_AtomicAdd ( Matrix< complex< Float >, NCOLORS > &  link,
Matrix< complex< Float >, NCOLORS > &  link1,
const Float  relax_boost,
const int  tid 
)

Device function to perform gauge fixing with overrelxation. Uses 8 treads per lattice site, the reduction is performed by shared memory without using atomicadd. This implementation needs 8x more shared memory than the implementation using atomicadd

Definition at line 392 of file gauge_fix_ovr_hit_devf.cuh.

References atomicAdd().

Here is the call graph for this function:

◆ GaugeFixHit_NoAtomicAdd() [1/2]

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void quda::GaugeFixHit_NoAtomicAdd ( Matrix< complex< Float >, NCOLORS > &  link,
const Float  relax_boost,
const int  tid 
)

Device function to perform gauge fixing with overrelxation. Uses 4 treads per lattice site, the reduction is performed by shared memory using atomicadd.

Definition at line 159 of file gauge_fix_ovr_hit_devf.cuh.

◆ GaugeFixHit_NoAtomicAdd() [2/2]

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void quda::GaugeFixHit_NoAtomicAdd ( Matrix< complex< Float >, NCOLORS > &  link,
Matrix< complex< Float >, NCOLORS > &  link1,
const Float  relax_boost,
const int  tid 
)

Device function to perform gauge fixing with overrelxation. Uses 4 treads per lattice site, the reduction is performed by shared memory using atomicadd.

Definition at line 486 of file gauge_fix_ovr_hit_devf.cuh.

◆ GaugeFixHit_NoAtomicAdd_LessSM() [1/2]

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void quda::GaugeFixHit_NoAtomicAdd_LessSM ( Matrix< complex< Float >, NCOLORS > &  link,
const Float  relax_boost,
const int  tid 
)

Device function to perform gauge fixing with overrelxation. Uses 8 treads per lattice site, the reduction is performed by shared memory without using atomicadd. This implementation uses the same amount of shared memory as the atomicadd implementation with more thread block synchronization

Definition at line 254 of file gauge_fix_ovr_hit_devf.cuh.

◆ GaugeFixHit_NoAtomicAdd_LessSM() [2/2]

template<int blockSize, typename Float , int gauge_dir, int NCOLORS>
__forceinline__ __device__ void quda::GaugeFixHit_NoAtomicAdd_LessSM ( Matrix< complex< Float >, NCOLORS > &  link,
Matrix< complex< Float >, NCOLORS > &  link1,
const Float  relax_boost,
const int  tid 
)

Device function to perform gauge fixing with overrelxation. Uses 4 treads per lattice site, the reduction is performed by shared memory without using atomicadd. This implementation uses the same amount of shared memory as the atomicadd implementation with more thread block synchronization

Definition at line 563 of file gauge_fix_ovr_hit_devf.cuh.

◆ gaugefixingFFT()

void quda::gaugefixingFFT ( cudaGaugeField data,
const int  gauge_dir,
const int  Nsteps,
const int  verbose_interval,
const double  alpha,
const int  autotune,
const double  tolerance,
const int  stopWtheta 
)

Gauge fixing with Steepest descent method with FFTs with support for single GPU only.

Parameters
[in,out]data,qudagauge field
[in]gauge_dir,3for Coulomb gauge fixing, other for Landau gauge fixing
[in]Nsteps,maximumnumber of steps to perform gauge fixing
[in]verbose_interval,printgauge fixing info when iteration count is a multiple of this
[in]alpha,gaugefixing parameter of the method, most common value is 0.08
[in]autotune,1to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value
[in]tolerance,torelancevalue to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
[in]stopWtheta,0for MILC criterium and 1 to use the theta value

Definition at line 1083 of file gauge_fix_fft.cu.

References comm_dim_partitioned(), errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by computeGaugeFixingFFTQuda(), and TEST_F().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ gaugefixingOVR()

void quda::gaugefixingOVR ( cudaGaugeField data,
const int  gauge_dir,
const int  Nsteps,
const int  verbose_interval,
const double  relax_boost,
const double  tolerance,
const int  reunit_interval,
const int  stopWtheta 
)

Gauge fixing with overrelaxation with support for single and multi GPU.

Parameters
[in,out]data,qudagauge field
[in]gauge_dir,3for Coulomb gauge fixing, other for Landau gauge fixing
[in]Nsteps,maximumnumber of steps to perform gauge fixing
[in]verbose_interval,printgauge fixing info when iteration count is a multiple of this
[in]relax_boost,gaugefixing parameter of the overrelaxation method, most common value is 1.5 or 1.7.
[in]tolerance,torelancevalue to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps
[in]reunit_interval,reunitarizegauge field when iteration count is a multiple of this
[in]stopWtheta,0for MILC criterium and 1 to use the theta value

Definition at line 1606 of file gauge_fix_ovr.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by computeGaugeFixingOVRQuda(), and TEST_F().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ gaugeForce()

void quda::gaugeForce ( GaugeField mom,
const GaugeField u,
double  coeff,
int ***  input_path,
int *  length,
double *  path_coeff,
int  num_paths,
int  max_length 
)

Compute the gauge-force contribution to the momentum.

Parameters
[out]momMomentum field
[in]uGauge field (extended when running no multiple GPUs)
[in]coeffStep-size coefficient
[in]input_pathHost-array holding all path contributions for the gauge action
[in]lengthHost array holding the length of all paths
[in]path_coeffCoefficient of each path
[in]num_pathsNumer of paths
[in]max_lengthMaximum length of each path

Definition at line 340 of file gauge_force.cu.

References errorQuda, length, quda::LatticeField::Location(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by computeGaugeForceQuda().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ gaugeGauss() [1/2]

void quda::gaugeGauss ( GaugeField U,
RNG rngstate,
double  epsilon 
)

Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder).

Parameters
[out]UThe output gauge field
[in]rngstaterandom states
[in]sigmaWidth of Gaussian distrubution

Definition at line 145 of file gauge_random.cu.

References errorQuda, quda::GaugeField::exchangeExtendedGhost(), quda::GaugeField::exchangeGhost(), getVerbosity(), quda::LatticeField::GhostExchange(), quda::GaugeField::isNative(), quda::GaugeField::LinkType(), quda::GaugeField::Ncolor(), quda::GaugeField::Order(), quda::LatticeField::Precision(), printfQuda, QUDA_DOUBLE_PRECISION, QUDA_GHOST_EXCHANGE_EXTENDED, QUDA_GHOST_EXCHANGE_PAD, QUDA_MOMENTUM_LINKS, QUDA_RECONSTRUCT_10, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_13, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_9, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, QUDA_SU3_LINKS, QUDA_SUMMARIZE, quda::LatticeField::R(), quda::GaugeField::Reconstruct(), quda::GaugeGaussArg< Float, recon, group_ >::rngstate, quda::GaugeGaussArg< Float, recon, group_ >::sigma, and quda::GaugeGaussArg< Float, recon, group_ >::U.

Referenced by gaugeGauss(), gaussGaugeQuda(), and genGauss().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ gaugeGauss() [2/2]

void quda::gaugeGauss ( GaugeField U,
unsigned long long  seed,
double  epsilon 
)

Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder).

Parameters
[out]UThe GaugeField
[in]seedThe seed used for the RNG
[in]sigmaWdith of the Gaussian distribution

Definition at line 187 of file gauge_random.cu.

References gaugeGauss(), quda::RNG::Init(), and quda::RNG::Release().

Here is the call graph for this function:

◆ gauss_su3()

template<typename real , typename Link >
__device__ __host__ Link quda::gauss_su3 ( cuRNGState localState)

Definition at line 39 of file gauge_random.cu.

References log(), and sqrt().

Here is the call graph for this function:

◆ genericCompare()

int quda::genericCompare ( const cpuColorSpinorField a,
const cpuColorSpinorField b,
int  tol 
)

◆ genericCopyColorSpinor() [1/6]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
void quda::genericCopyColorSpinor ( OutOrder &  outOrder,
const InOrder &  inOrder,
const ColorSpinorField out,
QudaFieldLocation  location 
)

Definition at line 84 of file copy_color_spinor_mg.cuh.

References quda::CopySpinor< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder >::apply(), and copy().

Here is the call graph for this function:

◆ genericCopyColorSpinor() [2/6]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >
void quda::genericCopyColorSpinor ( InOrder &  inOrder,
ColorSpinorField out,
QudaFieldLocation  location,
FloatOut *  Out 
)

Decide on the output order

Definition at line 92 of file copy_color_spinor_mg.cuh.

References errorQuda, quda::ColorSpinorField::FieldOrder(), out, QUDA_FLOAT2_FIELD_ORDER, and QUDA_SPACE_SPIN_COLOR_FIELD_ORDER.

Here is the call graph for this function:

◆ genericCopyColorSpinor() [3/6]

template<typename FloatOut , typename FloatIn , int Ns, int Nc>
void quda::genericCopyColorSpinor ( ColorSpinorField out,
const ColorSpinorField in,
QudaFieldLocation  location,
FloatOut *  Out,
FloatIn *  In 
)

Decide on the input order

Definition at line 111 of file copy_color_spinor_mg.cuh.

References errorQuda, quda::ColorSpinorField::FieldOrder(), out, QUDA_FLOAT2_FIELD_ORDER, and QUDA_SPACE_SPIN_COLOR_FIELD_ORDER.

Here is the call graph for this function:

◆ genericCopyColorSpinor() [4/6]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename Out , typename In >
void quda::genericCopyColorSpinor ( Out &  outOrder,
const In &  inOrder,
const ColorSpinorField out,
const ColorSpinorField in,
QudaFieldLocation  location 
)

Decide whether we are changing basis or not

Definition at line 270 of file copy_color_spinor.cuh.

References quda::CopyColorSpinor< Ns, Arg >::apply(), arg(), and copy().

Here is the call graph for this function:

◆ genericCopyColorSpinor() [5/6]

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder >
void quda::genericCopyColorSpinor ( InOrder &  inOrder,
ColorSpinorField out,
const ColorSpinorField in,
QudaFieldLocation  location,
FloatOut *  Out,
float *  outNorm 
)

◆ genericCopyColorSpinor() [6/6]

template<typename FloatOut , typename FloatIn , int Ns, int Nc>
void quda::genericCopyColorSpinor ( ColorSpinorField out,
const ColorSpinorField in,
QudaFieldLocation  location,
FloatOut *  Out,
FloatIn *  In,
float *  outNorm,
float *  inNorm 
)

◆ genericCudaPrintVector() [1/4]

template<typename StoreType , int Ns, int Nc, QudaFieldOrder FieldOrder>
void quda::genericCudaPrintVector ( const cudaColorSpinorField field,
unsigned int  i 
)

Definition at line 397 of file color_spinor_util.cu.

References quda::ColorSpinorField::Norm(), printfQuda, s, and quda::ColorSpinorField::V().

Here is the call graph for this function:

◆ genericCudaPrintVector() [2/4]

template<typename Float , int Ns, int Nc>
void quda::genericCudaPrintVector ( const cudaColorSpinorField field,
unsigned int  i 
)

◆ genericCudaPrintVector() [3/4]

template<typename Float >
void quda::genericCudaPrintVector ( const cudaColorSpinorField field,
unsigned int  i 
)

◆ genericCudaPrintVector() [4/4]

void quda::genericCudaPrintVector ( const cudaColorSpinorField a,
unsigned  x 
)

Referenced by genericCudaPrintVector(), and quda::cudaColorSpinorField::PrintVector().

Here is the caller graph for this function:

◆ GenericPackGhost()

template<typename Float , bool block_float, int Ns, int Ms, int Nc, int Mc, int nDim, typename Arg >
void quda::GenericPackGhost ( Arg arg)

◆ genericPackGhost()

void quda::genericPackGhost ( void **  ghost,
const ColorSpinorField a,
QudaParity  parity,
int  nFace,
int  dagger,
MemoryLocation destination = nullptr 
)
inline

Generic ghost packing routine.

Parameters
[out]ghostArray of packed ghosts with array ordering [2*dim+dir]
[in]aInput field that is being packed
[in]parityWhich parity are we packing
[in]daggerIs for a dagger operator (presently ignored)

Definition at line 180 of file color_spinor_pack.cu.

References quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::apply(), quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::arg, errorQuda, MAX_BLOCK_FLOAT_NC, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::cudaColorSpinorField::exchangeGhost(), and quda::cpuColorSpinorField::packGhost().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ GenericPackGhostKernel()

template<typename Float , bool block_float, int Ns, int Ms, int Nc, int Mc, int nDim, int dim_threads, typename Arg >
__global__ void quda::GenericPackGhostKernel ( Arg  arg)

◆ genericPrintVector()

void quda::genericPrintVector ( const cpuColorSpinorField a,
unsigned int  x 
)

◆ genericSource()

void quda::genericSource ( cpuColorSpinorField a,
QudaSourceType  sourceType,
int  x,
int  s,
int  c 
)

◆ genGauss() [1/2]

template<typename real , typename Arg >
__device__ __host__ void quda::genGauss ( Arg arg,
cuRNGState localState,
int  parity,
int  x_cb,
int  s,
int  c 
)
inline

Definition at line 32 of file spinor_noise.cu.

References cos(), log(), sin(), sqrt(), and quda::Arg< real, Ns, Nc, order >::v.

Here is the call graph for this function:

◆ genGauss() [2/2]

template<typename Float , QudaReconstructType recon, bool group>
void quda::genGauss ( GaugeField U,
RNG rngstate,
double  sigma 
)

Definition at line 138 of file gauge_random.cu.

References quda::GaugeGauss< Float, Arg >::apply(), arg(), and gaugeGauss().

Here is the call graph for this function:

◆ genUniform()

template<typename real , typename Arg >
__device__ __host__ void quda::genUniform ( Arg arg,
cuRNGState localState,
int  parity,
int  x_cb,
int  s,
int  c 
)
inline

Definition at line 40 of file spinor_noise.cu.

References quda::Arg< real, Ns, Nc, order >::v.

◆ get_pointer_location()

QudaFieldLocation quda::get_pointer_location ( const void *  ptr)

Definition at line 399 of file malloc.cpp.

References errorQuda, QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, and QUDA_INVALID_FIELD_LOCATION.

Referenced by file_name(), and printQudaInvertParam().

Here is the caller graph for this function:

◆ GetBlockDim()

dim3 quda::GetBlockDim ( size_t  threads,
size_t  size 
)

Definition at line 25 of file random.cu.

References BLOCKSDIVUP.

Referenced by launch_kernel_random().

Here is the caller graph for this function:

◆ getCoords() [1/2]

template<int nDim, QudaPCType pc_type, KernelType kernel_type, typename Arg , int nface_ = 1>
__host__ __device__ int quda::getCoords ( int  coord[],
const Arg arg,
int &  idx,
int  parity,
int &  dim 
)
inline

Compute the space-time coordinates we are at.

Parameters
[out]coordThe computed space-time coordinates
[in]argDslashArg struct
[in,out]idxSpace-time index (usually equal to global x-thread index). When doing EXTERIOR kernels we overwrite this with the index into our face (ghost index).
[in]parityField parity
[out]thedimension we are working on (fused kernel only)
Returns
checkerboard space-time index

Definition at line 88 of file dslash_helper.cuh.

References arg(), EXTERIOR_KERNEL_ALL, getCoords5CB(), getCoordsCB(), INTERIOR_KERNEL, Ls, parity, QUDA_5D_PC, and X.

Referenced by completeKSForceCore(), computeAPEStep(), computeCoarseClover(), computeFmunuCore(), computeGenGauss(), computeMomAction(), computeNeighborSum(), computeOvrImpSTOUTStep(), computePlaq(), computeStaple(), computeStapleRectangle(), computeSTOUTStep(), computeUV(), computeVUV(), computeYhat(), corner(), quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::getPaddedIndex(), quda::gauge::TIFRPaddedOrder< Float, length >::getPaddedIndex(), kernel_random(), packGhost(), and sin().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ getCoords() [2/2]

template<typename I >
static __device__ __host__ void quda::getCoords ( int  x[],
int  cb_index,
const I  X[],
int  parity 
)
inlinestatic

Compute the 4-d spatial index from the checkerboarded 1-d index at parity parity. Wrapper around getCoordsCB.

Parameters
[out]xComputed spatial index
[in]cb_index1-d checkerboarded index
[in]XFull lattice dimensions
[in]X0hHalf of x-dim lattice dimension
[in]paritySite parity

Definition at line 228 of file index_helper.cuh.

References getCoordsCB().

Here is the call graph for this function:

◆ getCoords5()

template<typename I >
static __device__ __host__ void quda::getCoords5 ( int  x[5],
int  cb_index,
const I  X[5],
int  parity,
QudaPCType  pc_type 
)
inlinestatic

Compute the 5-d spatial index from the checkerboarded 1-d index at parity parity. Wrapper around getCoords5CB.

Parameters
[out]xComputed spatial index
[in]cb_index1-d checkerboarded index
[in]XFull lattice dimensions
[in]paritySite parity

Definition at line 301 of file index_helper.cuh.

References getCoords5CB().

Referenced by packGhost().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ getCoords5CB()

template<typename I , typename J >
static __device__ __host__ void quda::getCoords5CB ( int  x[5],
int  cb_index,
const I  X[5],
X0h,
int  parity,
QudaPCType  pc_type 
)
inlinestatic

Compute the 5-d spatial index from the checkerboarded 1-d index at parity parity

Parameters
[out]xComputed spatial index
[in]cb_index1-d checkerboarded index
[in]XFull lattice dimensions
[in]X0hHalf of x-dim lattice dimension
[in]paritySite parity

Definition at line 270 of file index_helper.cuh.

References QUDA_5D_PC.

Referenced by getCoords(), and getCoords5().

Here is the caller graph for this function:

◆ getCoordsCB()

template<typename I , typename J >
static __device__ __host__ void quda::getCoordsCB ( int  x[],
int  cb_index,
const I  X[],
X0h,
int  parity 
)
inlinestatic

Compute the 4-d spatial index from the checkerboarded 1-d index at parity parity

Parameters
[out]xComputed spatial index
[in]cb_index1-d checkerboarded index
[in]XFull lattice dimensions
[in]X0hHalf of x-dim lattice dimension
[in]paritySite parity

Definition at line 201 of file index_helper.cuh.

References parity.

Referenced by applyDslash(), and getCoords().

Here is the caller graph for this function:

◆ getCoordsExtended()

template<typename I , typename J >
static __device__ __host__ void quda::getCoordsExtended ( x[],
int  cb_index,
const J  X[],
int  parity,
const int  R[] 
)
inlinestatic

Compute the 4-d spatial index from the checkerboarded 1-d index at parity parity

Parameters
xComputed spatial index
cb_index1-d checkerboarded index
XFull lattice dimensions
paritySite parity

Definition at line 242 of file index_helper.cuh.

References parity.

Referenced by computeForce().

Here is the caller graph for this function:

◆ getDeterminant()

template<template< typename, int > class Mat, class T >
__device__ __host__ T quda::getDeterminant ( const Mat< T, 3 > &  a)
inline

◆ getDslashLaunch()

bool quda::getDslashLaunch ( )

◆ getIndexFull()

template<typename I >
static __device__ __host__ int quda::getIndexFull ( int  cb_index,
const I  X[4],
int  parity 
)
inlinestatic

Compute the 1-d global index from 1-d checkerboard index and parity. This should never be used to index into QUDA fields due to the potential of padding between even and odd regions.

Parameters
cb_index1-d checkerboard index
Xlattice dimensions
paritySite parity

Definition at line 316 of file index_helper.cuh.

References parity.

◆ getIndicesGlobal()

template<bool parity_flip, typename Arg >
__device__ void quda::getIndicesGlobal ( const Arg arg,
int &  parity,
int &  x_cb,
int &  parity_coarse,
int &  x_coarse_cb,
int &  c_col,
int &  c_row 
)
inline

◆ getIndicesShared()

template<bool parity_flip, typename Arg >
__device__ void quda::getIndicesShared ( const Arg arg,
int &  parity,
int &  x_cb,
int &  parity_coarse,
int &  x_coarse_cb,
int &  c_col,
int &  c_row 
)
inline

Definition at line 797 of file coarse_op_kernel.cuh.

References coarseIndex(), parity, virtualBlockDim(), and virtualThreadIdx().

Here is the call graph for this function:

◆ getKernelPackT()

bool quda::getKernelPackT ( )

◆ getLinkDeterminant()

double2 quda::getLinkDeterminant ( cudaGaugeField data)

Calculate the Determinant.

Parameters
[in]dataGauge field
Returns
double2 complex Determinant value

Definition at line 194 of file pgauge_det_trace.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by GaugeAlgTest::TearDown(), and TEST_F().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ getLinkTrace()

double2 quda::getLinkTrace ( cudaGaugeField data)

Calculate the Trace.

Parameters
[in]dataGauge field
Returns
double2 complex trace value

Definition at line 215 of file pgauge_det_trace.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by GaugeAlgTest::TearDown().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ getNeighborIndexCB()

template<int nDim = 4, typename Arg >
static __device__ __host__ int quda::getNeighborIndexCB ( const int  x[],
int  mu,
int  dir,
const Arg arg 
)
inlinestatic

Compute the checkerboard 1-d index for the nearest neighbor.

Parameters
[in]xnDim lattice coordinates
[in]mudimension in which to add 1
[in]dirdirection (+1 or -1)
[in]argparameter struct
Returns
1-d checkboard index

Definition at line 166 of file index_helper.cuh.

Referenced by applyWilsonTM().

Here is the caller graph for this function:

◆ getRealTraceUVdagger()

template<class T >
__device__ __host__ double quda::getRealTraceUVdagger ( const Matrix< T, 3 > &  a,
const Matrix< T, 3 > &  b 
)
inline

Definition at line 1131 of file quda_matrix.h.

References sum().

Here is the call graph for this function:

◆ getSubTraceUnit()

template<class T >
__device__ __host__ Matrix<T,3> quda::getSubTraceUnit ( const Matrix< T, 3 > &  a)
inline

Definition at line 1115 of file quda_matrix.h.

◆ getTrace()

template<class T >
__device__ __host__ T quda::getTrace ( const Matrix< T, 3 > &  a)
inline

Definition at line 415 of file quda_matrix.h.

References Mat().

Referenced by computeOvrImpSTOUTStep(), computeSTOUTStep(), exponentiate_iQ(), plaquette(), qChargeComputeKernel(), and setUnitarizeLinksConstants().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ getTuneCache()

const map & quda::getTuneCache ( )

◆ ghostFaceIndex()

template<int dir, int nDim = 4, typename I >
__device__ __host__ int quda::ghostFaceIndex ( const int  x_[],
const I  X_[],
int  dim,
int  nFace 
)
inline

Compute the checkerboarded index into the ghost field corresponding to full (local) site index x[]

Parameters
x_local site
X_local lattice dimensions
dimdimension
nFacedepth of ghost

Definition at line 335 of file index_helper.cuh.

References index(), and X.

Here is the call graph for this function:

◆ ghostFaceIndexStaggered()

template<int dir, int nDim = 4, typename I >
__device__ __host__ int quda::ghostFaceIndexStaggered ( const int  x_[],
const I  X_[],
int  dim,
int  nFace 
)
inline

Compute the checkerboarded index into the ghost field corresponding to full (local) site index x[] for staggered

Parameters
x_local site
X_local lattice dimensions
dimdimension
nFacedepth of ghost

Definition at line 396 of file index_helper.cuh.

References index(), and X.

Here is the call graph for this function:

◆ host_allocated_peak()

long quda::host_allocated_peak ( )
Returns
peak host memory allocated

Definition at line 65 of file malloc.cpp.

References HOST.

◆ host_free_()

void quda::host_free_ ( const char *  func,
const char *  file,
int  line,
void *  ptr 
)

Free host memory allocated with safe_malloc(), pinned_malloc(), or mapped_malloc(). This function should only be called via the host_free() macro, defined in malloc_quda.h

Definition at line 344 of file malloc.cpp.

References count, errorQuda, HOST, MAPPED, PINNED, print_trace(), printfQuda, and track_free().

Referenced by quda::pool::pinned_free_().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ i32toa()

void quda::i32toa ( char *  buffer,
int32_t  value 
)
inline

Definition at line 117 of file uint_to_char.h.

References u32toa().

Referenced by quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), and postTrace_().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ i64toa()

void quda::i64toa ( char *  buffer,
int64_t  value 
)
inline

Definition at line 284 of file uint_to_char.h.

References u64toa().

Here is the call graph for this function:

◆ inBoundary()

template<int dim, typename Arg >
__host__ __device__ bool quda::inBoundary ( const int  coord[],
const Arg arg 
)
inline

Compute whether the provided coordinate is within the halo region boundary of a given dimension.

Parameters
[in]coordCoordinates
[in]ArgDslash argument struct
Returns
True if in boundary, else false

Definition at line 155 of file dslash_helper.cuh.

◆ IndexBlock()

template<int NCOLORS>
static __host__ __device__ void quda::IndexBlock ( int  block,
int &  p,
int &  q 
)
inlinestatic

Retrieve the SU(N) indices for the current block number

Parameters
[in]block,currentblock number, from 0 to (NCOLORS * (NCOLORS - 1) / 2)
[out]p,rowindex pointing to the SU(N) matrix
[out]q,columnindex pointing to the SU(N) matrix

Definition at line 36 of file gauge_fix_ovr_hit_devf.cuh.

References index().

Here is the call graph for this function:

◆ indexFromFaceIndex() [1/2]

template<int nDim, QudaPCType type, int dim, int nLayers, int face_num, typename Arg >
__device__ __host__ int quda::indexFromFaceIndex ( int  face_idx,
int  parity,
const Arg arg 
)
inline

Compute the checkerboard lattice index from the input face index. This is used by the Wilson-like halo packing kernels, and can deal with 4-d or 5-d field and 4-d or 5-d preconditioning.

Parameters
[in]face_idxCheckerboard halo index
[in]parityParity index
[in]argArgument struct with required meta data
Returns
Checkerboard lattice index

Definition at line 601 of file index_helper.cuh.

References QUDA_4D_PC, QUDA_5D_PC, and s.

◆ indexFromFaceIndex() [2/2]

template<int nDim, QudaPCType type, int dim, int nLayers, int face_num, typename Arg >
__device__ __host__ int quda::indexFromFaceIndex ( int  face_idx,
const Arg arg 
)
inline

Overloaded variant of indexFromFaceIndex where we use the parity declared in arg.

Definition at line 694 of file index_helper.cuh.

References arg().

Here is the call graph for this function:

◆ indexFromFaceIndexStaggered()

template<int nDim, QudaPCType type, int dim, int nLayers, int face_num, typename Arg >
static __device__ int quda::indexFromFaceIndexStaggered ( int  face_idx_in,
int  parity,
const Arg arg 
)
inlinestatic

Compute global checkerboard index from face index. The following indexing routines work for arbitrary lattice dimensions (though perhaps not odd like thw Wilson variant?) Specifically, we compute an index into the local volume from an index into the face. This is used by the staggered-like face packing routines, and is different from the Wilson variant since here the halo depth is tranversed in a different order - here the halo depth is the faster running dimension.

Parameters
[in]face_idx_inCheckerboarded face index
[in]paramParameter struct with required meta data
Returns
Global checkerboard coordinate

Definition at line 717 of file index_helper.cuh.

References dims, s, and X.

◆ InitGaugeField() [1/2]

void quda::InitGaugeField ( cudaGaugeField data)

Perform a cold start to the gauge field, identity SU(3) matrix, also fills the ghost links in multi-GPU case (no need to exchange data)

Parameters
[in,out]dataGauge field

Referenced by main(), and GaugeAlgTest::SetUp().

Here is the caller graph for this function:

◆ InitGaugeField() [2/2]

void quda::InitGaugeField ( cudaGaugeField data,
RNG rngstate 
)

Perform a hot start to the gauge field, random SU(3) matrix, followed by reunitarization, also exchange borders links in multi-GPU case.

Parameters
[in,out]dataGauge field
[in,out]rngstatestate of the CURAND random number generator

Definition at line 450 of file pgauge_init.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Here is the call graph for this function:

◆ injector()

template<typename Float , int length, int dim, typename Arg >
__device__ __host__ void quda::injector ( Arg arg,
int  dir,
int  a,
int  b,
int  c,
int  d,
int  g,
int  parity 
)

Definition at line 76 of file extract_gauge_ghost_extended.cu.

References quda::Matrix< T, N >::data, length, and quda::gauge::Ncolor().

Here is the call graph for this function:

◆ innerProduct() [1/4]

template<typename Float , int Nc, int Ns>
__device__ __host__ complex<Float> quda::innerProduct ( const ColorSpinor< Float, Nc, Ns > &  a,
const ColorSpinor< Float, Nc, Ns > &  b 
)
inline

Compute the inner product over color and spin dot = ,c conj(a(s,c)) * b(s,c)

Parameters
aLeft-hand side ColorSpinor
bRight-hand side ColorSpinor
Returns
The inner product

Definition at line 914 of file color_spinor.h.

References dot(), and s.

Referenced by computeColorContraction(), computeDegrandRossiContraction(), and innerProduct().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ innerProduct() [2/4]

template<typename Float , int Nc, int Ns>
__device__ __host__ complex<Float> quda::innerProduct ( const ColorSpinor< Float, Nc, Ns > &  a,
const ColorSpinor< Float, Nc, Ns > &  b,
int  s 
)
inline

Compute the inner product over color at spin s between two ColorSpinor fields dot = conj(a(s,c)) * b(s,c)

Parameters
aLeft-hand side ColorSpinor
bRight-hand side ColorSpinor
sdiagonal spin index
Returns
The inner product

Definition at line 932 of file color_spinor.h.

References innerProduct().

Here is the call graph for this function:

◆ innerProduct() [3/4]

template<typename Float , int Nc, int Ns>
__device__ __host__ complex<Float> quda::innerProduct ( const ColorSpinor< Float, Nc, Ns > &  a,
const ColorSpinor< Float, Nc, Ns > &  b,
int  sa,
int  sb 
)
inline

Compute the inner product over color at spin sa and sb between two ColorSpinor fields dot = conj(a(s1,c)) * b(s2,c)

Parameters
aLeft-hand side ColorSpinor
bRight-hand side ColorSpinor
saLeft-hand side spin index
sbRight-hand side spin index
Returns
The inner product

Definition at line 948 of file color_spinor.h.

References dot().

Here is the call graph for this function:

◆ innerProduct() [4/4]

template<typename Float , int Nc, int Ns>
__device__ __host__ complex<Float> quda::innerProduct ( const ColorSpinor< Float, Nc, 1 > &  a,
const ColorSpinor< Float, Nc, Ns > &  b,
int  s 
)
inline

Compute the inner product over color at spin s between a color vector and a color spinor dot = conj(a(c)) * b(s,c)

Parameters
aLeft-hand side ColorVector
bRight-hand side ColorSpinor
Returns
The inner product

Definition at line 971 of file color_spinor.h.

References innerProduct().

Here is the call graph for this function:

◆ instantiate() [1/3]

template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , int nColor, typename... Args>
void quda::instantiate ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
Args &&...  args 
)
inline

This instantiate function is used to instantiate the reconstruct types used.

Parameters
[out]outOutput result field
[in]inInput field
[in]UGauge field
[in]argsAdditional arguments for different dslash kernels

Definition at line 426 of file dslash.h.

References errorQuda, quda::Dslash< Float >::in, quda::Dslash< Float >::out, and quda::GaugeField::Reconstruct().

Here is the call graph for this function:

◆ instantiate() [2/3]

template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , typename... Args>
void quda::instantiate ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
Args &&...  args 
)
inline

This instantiate function is used to instantiate the colors.

Parameters
[out]outOutput result field
[in]inInput field
[in]UGauge field
[in]argsAdditional arguments for different dslash kernels

Definition at line 459 of file dslash.h.

References errorQuda, quda::Dslash< Float >::in, quda::GaugeField::Ncolor(), quda::ColorSpinorField::Ncolor(), and quda::Dslash< Float >::out.

Here is the call graph for this function:

◆ instantiate() [3/3]

template<template< typename, int, QudaReconstructType > class Apply, typename Recon = WilsonReconstruct, typename... Args>
void quda::instantiate ( ColorSpinorField out,
const ColorSpinorField in,
const GaugeField U,
Args &&...  args 
)
inline

This instantiate function is used to instantiate the precisions.

Parameters
[out]outOutput result field
[in]inInput field
[in]UGauge field
[in]argsAdditional arguments for different dslash kernels

Definition at line 476 of file dslash.h.

References errorQuda, quda::Dslash< Float >::in, quda::Dslash< Float >::out, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.

Here is the call graph for this function:

◆ inverse()

template<class T >
__device__ __host__ Matrix<T,3> quda::inverse ( const Matrix< T, 3 > &  u)
inline

◆ is_aligned()

bool quda::is_aligned ( const void *  ptr,
size_t  alignment 
)
inline
Returns
whether the pointer is aligned

Definition at line 57 of file malloc_quda.h.

Referenced by quda::cudaColorSpinorField::create(), quda::cudaCloverField::cudaCloverField(), and quda::cudaGaugeField::zeroPad().

Here is the caller graph for this function:

◆ isActive()

template<KernelType kernel_type, typename Arg >
__device__ bool quda::isActive ( bool &  active,
int  threadDim,
int  offsetDim,
const int  coord[],
const Arg arg 
)
inline

Compute whether this thread should be active for updating the a given offsetDim halo. For non-fused halo update kernels this is a trivial kernel that just checks if the given dimension is partitioned and if so, return true.

For fused halo region update kernels: here every thread has a prescribed dimension it is tasked with updating, but for the edges and vertices, the thread responsible for the entire update is the "greatest" one. Hence some threads may be labelled as a given dimension, but they have to update other dimensions too. Conversely, a given thread may be labeled for a given dimension, but if that thread lies at en edge or vertex, and we have partitioned a higher dimension, then that thread will cede to the higher thread.

Parameters
[in,out]Whetherthis thread is "cumulatively" active (cumulative over all dimensions)
[in]threadDimPrescribed dimension of this thread
[in]offsetDimThe dimension we are querying whether this thread should be responsible
[in]offsetThe size of the hop
[in]ySite coordinate
[in]partitionedArray of which dimensions have been partitioned
[in]XLattice dimensions
Returns
true if this thread is active

Definition at line 188 of file dslash_helper.cuh.

References EXTERIOR_KERNEL_ALL.

◆ isComplete()

template<KernelType type, typename Arg >
__host__ __device__ bool quda::isComplete ( const Arg arg,
int  coord[] 
)
inline

Helper functon to determine if the application of the derivative in the dslash is complete.

Parameters
[in]Argumentparameter struct
[in]Checkerboardspace-time index
[in]Paritywe are acting on

Definition at line 55 of file dslash_helper.cuh.

References EXTERIOR_KERNEL_ALL, EXTERIOR_KERNEL_T, EXTERIOR_KERNEL_X, EXTERIOR_KERNEL_Y, EXTERIOR_KERNEL_Z, and INTERIOR_KERNEL.

◆ isUnitary()

bool quda::isUnitary ( const cpuGaugeField field,
double  max_error 
)

◆ kernel_random()

__global__ void quda::kernel_random ( cuRNGState state,
unsigned long long  seed,
int  size_cb,
rngArg  arg 
)

CUDA kernel to initialize CURAND RNG states.

Parameters
stateCURAND RNG state array
seedinitial seed for RNG
sizesize of the CURAND RNG state array
argMetadata needed for computing multi-gpu offsets

Definition at line 51 of file random.cu.

References quda::rngArg::commCoord, quda::rngArg::commDim, getCoords(), parity, and quda::rngArg::X.

Here is the call graph for this function:

◆ laplace()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void quda::laplace ( Arg arg,
int  idx,
int  parity 
)
inline

◆ laplaceGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void quda::laplaceGPU ( Arg  arg)

Definition at line 178 of file laplace.cuh.

References arg(), and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ launch_kernel_random()

void quda::launch_kernel_random ( cuRNGState state,
unsigned long long  seed,
int  size_cb,
int  n_parity,
int  X[4] 
)

Call CUDA kernel to initialize CURAND RNG states.

Parameters
stateCURAND RNG state array
seedinitial seed for RNG
size_cbCheckerboarded size of the CURAND RNG state array
n_parityNumber of parities (1 or 2)
Xarray of lattice dimensions

Definition at line 75 of file random.cu.

References arg(), GetBlockDim(), and qudaDeviceSynchronize.

Referenced by quda::RNG::Init().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ linkIndex() [1/2]

template<typename I >
static __device__ __host__ int quda::linkIndex ( const int  x[],
const I  X[4] 
)
inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[]

Returns
1-d checkerboard index
Parameters
x4-d lattice index
XFull lattice dimensions

Definition at line 46 of file index_helper.cuh.

Referenced by computeGenGauss(), computeMomAction(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::getPaddedIndex(), and quda::gauge::TIFRPaddedOrder< Float, length >::getPaddedIndex().

Here is the caller graph for this function:

◆ linkIndex() [2/2]

template<typename I >
static __device__ __host__ int quda::linkIndex ( int  y[],
const int  x[],
const I  X[4] 
)
inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[]

Returns
1-d checkerboard index
Parameters
ycopy of 4-d lattice index
x4-d lattice index
XFull lattice dimensions

Definition at line 60 of file index_helper.cuh.

◆ linkIndexDn()

template<typename I , int n>
static __device__ __host__ int quda::linkIndexDn ( const int  x[],
const I  X[4],
const int  mu 
)
inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[] +n in the mu direction

Returns
1-d checkerboard index
Template Parameters
nnumber of hops (=/-) in the mu direction
Parameters
x4-d lattice index
XFull lattice dimensions
mudirection in which to add n hops

Definition at line 76 of file index_helper.cuh.

References mu.

Referenced by linkIndexM1(), and linkIndexM3().

Here is the caller graph for this function:

◆ linkIndexM1()

template<typename I >
static __device__ __host__ int quda::linkIndexM1 ( const int  x[],
const I  X[4],
const int  mu 
)
inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[] -1 in the mu direction

Returns
1-d checkerboard index
Parameters
x4-d lattice index
XFull lattice dimensions
mudirection in which to subtract 1

Definition at line 94 of file index_helper.cuh.

References linkIndexDn(), mu, and X.

Referenced by applyDslash(), applyLaplace(), applyStaggered(), computeNeighborSum(), and computeYhat().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ linkIndexM3()

template<typename I >
static __device__ __host__ int quda::linkIndexM3 ( const int  x[],
const I  X[4],
const int  mu 
)
inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[] -3 in the mu direction

Returns
1-d checkerboard index
Parameters
x4-d lattice index
XFull lattice dimensions
mudirection in which to subtract 3

Definition at line 107 of file index_helper.cuh.

References linkIndexDn(), mu, and X.

Referenced by applyStaggered().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ linkIndexP1()

template<typename I >
static __device__ __host__ int quda::linkIndexP1 ( const int  x[],
const I  X[4],
const int  mu 
)
inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[] +1 in the mu direction

Returns
1-d checkerboard index
Parameters
x4-d lattice index
XFull lattice dimensions
mudirection in which to add 1

Definition at line 139 of file index_helper.cuh.

References mu, and X.

Referenced by applyDslash(), applyLaplace(), applyStaggered(), computeNeighborSum(), and computeUV().

Here is the caller graph for this function:

◆ linkIndexP3()

template<typename I >
static __device__ __host__ int quda::linkIndexP3 ( const int  x[],
const I  X[4],
const int  mu 
)
inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[] +3 in the mu direction

Returns
1-d checkerboard index
Parameters
x4-d lattice index
XFull lattice dimensions
mudirection in which to add 3

Definition at line 151 of file index_helper.cuh.

References mu, and X.

Referenced by applyStaggered().

Here is the caller graph for this function:

◆ linkIndexShift() [1/2]

template<typename I , typename J , typename K >
static __device__ __host__ int quda::linkIndexShift ( const I  x[],
const J  dx[],
const K  X[4] 
)
inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[] + dx[]

Returns
1-d checkerboard index
Parameters
x4-d lattice index
dx4-d shift index
XFull lattice dimensions

Definition at line 13 of file index_helper.cuh.

Referenced by completeKSForceCore(), computeAPEStep(), computeFmunuCore(), computeForce(), computeOvrImpSTOUTStep(), computeStaple(), computeStapleRectangle(), computeSTOUTStep(), quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg(), and plaquette().

Here is the caller graph for this function:

◆ linkIndexShift() [2/2]

template<typename I , typename J , typename K >
static __device__ __host__ int quda::linkIndexShift ( y[],
const I  x[],
const J  dx[],
const K  X[4] 
)
inlinestatic

Compute the checkerboard 1-d index from the 4-d coordinate x[] + dx[]

Returns
1-d checkerboard index
Parameters
ynew 4-d lattice index
xoriginal 4-d lattice index
dx4-d shift index
XFull lattice dimensions

Definition at line 31 of file index_helper.cuh.

◆ linkNormalIndexP1()

template<typename I >
static __device__ __host__ int quda::linkNormalIndexP1 ( const int  x[],
const I  X[4],
const int  mu 
)
inlinestatic

Compute the full 1-d index from the 4-d coordinate x[] +1 in the mu direction

Returns
1-d checkerboard index
Parameters
x4-d lattice index
XFull lattice dimensions
mudirection in which to add 1

Definition at line 121 of file index_helper.cuh.

References mu.

◆ load_cached_short2()

__device__ void quda::load_cached_short2 ( short2 &  a,
const short2 *  addr 
)
inline

Definition at line 45 of file inline_ptx.h.

References __PTR.

◆ load_cached_short4()

__device__ void quda::load_cached_short4 ( short4 &  a,
const short4 *  addr 
)
inline

Definition at line 35 of file inline_ptx.h.

References __PTR.

◆ load_global_float4()

__device__ void quda::load_global_float4 ( float4 &  a,
const float4 *  addr 
)
inline

Definition at line 71 of file inline_ptx.h.

References __PTR.

◆ load_global_short2()

__device__ void quda::load_global_short2 ( short2 &  a,
const short2 *  addr 
)
inline

Definition at line 63 of file inline_ptx.h.

References __PTR.

◆ load_global_short4()

__device__ void quda::load_global_short4 ( short4 &  a,
const short4 *  addr 
)
inline

Definition at line 53 of file inline_ptx.h.

References __PTR.

◆ load_streaming_double2()

__device__ void quda::load_streaming_double2 ( double2 &  a,
const double2 *  addr 
)
inline

Definition at line 21 of file inline_ptx.h.

References __PTR.

◆ load_streaming_float4()

__device__ void quda::load_streaming_float4 ( float4 &  a,
const float4 *  addr 
)
inline

Definition at line 28 of file inline_ptx.h.

References __PTR.

◆ loadLinkVariableFromArray() [1/2]

template<class T , class U >
__device__ void quda::loadLinkVariableFromArray ( const T *const  array,
const int  dir,
const int  idx,
const int  stride,
Matrix< U, 3 > *  link 
)
inline

Definition at line 857 of file quda_matrix.h.

References quda::Matrix< T, N >::data.

◆ loadLinkVariableFromArray() [2/2]

__device__ void quda::loadLinkVariableFromArray ( const float2 *const  array,
const int  dir,
const int  idx,
const int  stride,
Matrix< complex< double >, 3 > *  link 
)
inline

Definition at line 879 of file quda_matrix.h.

◆ loadMatrixFromArray()

template<class T , class U , int N>
__device__ void quda::loadMatrixFromArray ( const T *const  array,
const int  idx,
const int  stride,
Matrix< U, N > *  mat 
)
inline

Definition at line 869 of file quda_matrix.h.

References quda::Matrix< T, N >::data.

◆ loadMomentumFromArray()

template<class T >
__device__ void quda::loadMomentumFromArray ( const T *const  array,
const int  dir,
const int  idx,
const int  stride,
Matrix< T, 3 > *  mom 
)
inline

Definition at line 955 of file quda_matrix.h.

References quda::Matrix< T, N >::data.

◆ loadTuneCache()

void quda::loadTuneCache ( )

Definition at line 322 of file tune.cpp.

References broadcastTuneCache(), comm_rank(), deserializeTuneCache(), errorQuda, getTuning(), getVerbosity(), gitversion, printfQuda, QUDA_SUMMARIZE, QUDA_TUNE_NO, resource_path, and warningQuda.

Referenced by initQudaMemory(), and quda::TunableVectorYZ::resizeStep().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ Location_() [1/2]

QudaFieldLocation quda::Location_ ( const char *  func,
const char *  file,
int  line,
const LatticeField a,
const LatticeField b 
)
inline

Helper function for determining if the location of the fields is the same.

Parameters
[in]aInput field
[in]bInput field
Returns
If location is unique return the location

Definition at line 642 of file lattice_field.h.

References errorQuda, quda::LatticeField::Location(), and QUDA_INVALID_FIELD_LOCATION.

Referenced by Location_().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ Location_() [2/2]

template<typename... Args>
QudaFieldLocation quda::Location_ ( const char *  func,
const char *  file,
int  line,
const LatticeField a,
const LatticeField b,
const Args &...  args 
)
inline

Helper function for determining if the location of the fields is the same.

Parameters
[in]aInput field
[in]bInput field
[in]argsList of additional fields to check location on
Returns
If location is unique return the location

Definition at line 659 of file lattice_field.h.

References Location_().

Here is the call graph for this function:

◆ log() [1/3]

template<typename ValueType >
__host__ __device__ ValueType quda::log ( ValueType  x)
inline

Definition at line 101 of file complex_quda.h.

References log().

Referenced by acosh(), asinh(), atanh(), cloverInvertCompute(), expsu3(), gauss_su3(), genGauss(), log(), log10(), pow(), and smallSVD().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ log() [2/3]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::log ( const complex< ValueType > &  z)
inline

Definition at line 1162 of file complex_quda.h.

References abs(), arg(), and log().

Here is the call graph for this function:

◆ log() [3/3]

template<>
__host__ __device__ complex<float> quda::log ( const complex< float > &  z)
inline

Definition at line 1168 of file complex_quda.h.

References abs(), and arg().

Referenced by log().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ log10() [1/2]

template<typename ValueType >
__host__ __device__ ValueType quda::log10 ( ValueType  x)
inline

Definition at line 106 of file complex_quda.h.

References log10().

Here is the call graph for this function:

◆ log10() [2/2]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::log10 ( const complex< ValueType > &  z)
inline

Definition at line 1175 of file complex_quda.h.

References log().

Referenced by log10().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ make_charN() [1/6]

__forceinline__ __host__ __device__ char4 quda::make_charN ( const short4 &  a)

Definition at line 263 of file float_vector.h.

◆ make_charN() [2/6]

__forceinline__ __host__ __device__ char2 quda::make_charN ( const short2 &  a)

Definition at line 267 of file float_vector.h.

◆ make_charN() [3/6]

__forceinline__ __host__ __device__ char4 quda::make_charN ( const float4 &  a)

Definition at line 271 of file float_vector.h.

◆ make_charN() [4/6]

__forceinline__ __host__ __device__ char2 quda::make_charN ( const float2 &  a)

Definition at line 275 of file float_vector.h.

◆ make_charN() [5/6]

__forceinline__ __host__ __device__ char4 quda::make_charN ( const double4 &  a)

Definition at line 279 of file float_vector.h.

◆ make_charN() [6/6]

__forceinline__ __host__ __device__ char2 quda::make_charN ( const double2 &  a)

Definition at line 283 of file float_vector.h.

◆ make_Complex() [1/2]

complex<double> quda::make_Complex ( const double2 &  a)
inline

Definition at line 309 of file float_vector.h.

◆ make_Complex() [2/2]

complex<float> quda::make_Complex ( const float2 &  a)
inline

Definition at line 310 of file float_vector.h.

◆ make_Float2() [1/9]

template<typename Float2 , typename Complex >
Float2 quda::make_Float2 ( const Complex a)
inline

Definition at line 288 of file float_vector.h.

◆ make_Float2() [2/9]

template<>
double2 quda::make_Float2 ( const complex< double > &  a)
inline

Definition at line 291 of file float_vector.h.

References quda::complex< double >::imag(), and quda::complex< double >::real().

Here is the call graph for this function:

◆ make_Float2() [3/9]

template<>
double2 quda::make_Float2 ( const complex< float > &  a)
inline

Definition at line 293 of file float_vector.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Here is the call graph for this function:

◆ make_Float2() [4/9]

template<>
float2 quda::make_Float2 ( const complex< double > &  a)
inline

Definition at line 295 of file float_vector.h.

References quda::complex< double >::imag(), and quda::complex< double >::real().

Here is the call graph for this function:

◆ make_Float2() [5/9]

template<>
float2 quda::make_Float2 ( const complex< float > &  a)
inline

Definition at line 297 of file float_vector.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Here is the call graph for this function:

◆ make_Float2() [6/9]

template<>
double2 quda::make_Float2 ( const std::complex< double > &  a)
inline

Definition at line 300 of file float_vector.h.

◆ make_Float2() [7/9]

template<>
double2 quda::make_Float2 ( const std::complex< float > &  a)
inline

Definition at line 302 of file float_vector.h.

◆ make_Float2() [8/9]

template<>
float2 quda::make_Float2 ( const std::complex< double > &  a)
inline

Definition at line 304 of file float_vector.h.

◆ make_Float2() [9/9]

template<>
float2 quda::make_Float2 ( const std::complex< float > &  a)
inline

Definition at line 306 of file float_vector.h.

◆ make_FloatN() [1/4]

__forceinline__ __host__ __device__ float2 quda::make_FloatN ( const double2 &  a)

Definition at line 223 of file float_vector.h.

◆ make_FloatN() [2/4]

__forceinline__ __host__ __device__ float4 quda::make_FloatN ( const double4 &  a)

Definition at line 227 of file float_vector.h.

◆ make_FloatN() [3/4]

__forceinline__ __host__ __device__ double2 quda::make_FloatN ( const float2 &  a)

Definition at line 231 of file float_vector.h.

◆ make_FloatN() [4/4]

__forceinline__ __host__ __device__ double4 quda::make_FloatN ( const float4 &  a)

Definition at line 235 of file float_vector.h.

◆ make_shortN() [1/6]

__forceinline__ __host__ __device__ short4 quda::make_shortN ( const char4 &  a)

Definition at line 239 of file float_vector.h.

◆ make_shortN() [2/6]

__forceinline__ __host__ __device__ short2 quda::make_shortN ( const char2 &  a)

Definition at line 243 of file float_vector.h.

◆ make_shortN() [3/6]

__forceinline__ __host__ __device__ short4 quda::make_shortN ( const float4 &  a)

Definition at line 247 of file float_vector.h.

◆ make_shortN() [4/6]

__forceinline__ __host__ __device__ short2 quda::make_shortN ( const float2 &  a)

Definition at line 251 of file float_vector.h.

◆ make_shortN() [5/6]

__forceinline__ __host__ __device__ short4 quda::make_shortN ( const double4 &  a)

Definition at line 255 of file float_vector.h.

◆ make_shortN() [6/6]

__forceinline__ __host__ __device__ short2 quda::make_shortN ( const double2 &  a)

Definition at line 259 of file float_vector.h.

◆ makeAntiHerm()

template<typename Complex , int N>
__device__ __host__ void quda::makeAntiHerm ( Matrix< Complex, N > &  m)
inline

Definition at line 746 of file quda_matrix.h.

References conj().

Referenced by completeKSForceCore(), and computeMomAction().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ mapped_allocated_peak()

long quda::mapped_allocated_peak ( )
Returns
peak mapped memory allocated

Definition at line 63 of file malloc.cpp.

References MAPPED.

◆ mapped_malloc_()

void * quda::mapped_malloc_ ( const char *  func,
const char *  file,
int  line,
size_t  size 
)

Allocate page-locked ("pinned") host memory, and map it into the GPU address space. This function should only be called via the mapped_malloc() macro, defined in malloc_quda.h

Definition at line 273 of file malloc.cpp.

References aligned_malloc(), quda::MemAlloc::base_size, errorQuda, MAPPED, memset(), and track_malloc().

Here is the call graph for this function:

◆ massRescale()

void quda::massRescale ( cudaColorSpinorField b,
QudaInvertParam param 
)

◆ max_fabs() [1/4]

__forceinline__ __host__ __device__ float quda::max_fabs ( const float4 &  c)

Definition at line 198 of file float_vector.h.

Referenced by store_norm().

Here is the caller graph for this function:

◆ max_fabs() [2/4]

__forceinline__ __host__ __device__ float quda::max_fabs ( const float2 &  b)

Definition at line 204 of file float_vector.h.

◆ max_fabs() [3/4]

__forceinline__ __host__ __device__ double quda::max_fabs ( const double4 &  c)

Definition at line 208 of file float_vector.h.

◆ max_fabs() [4/4]

__forceinline__ __host__ __device__ double quda::max_fabs ( const double2 &  b)

Definition at line 214 of file float_vector.h.

◆ Monte()

void quda::Monte ( cudaGaugeField data,
RNG rngstate,
double  Beta,
int  nhb,
int  nover 
)

Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps.

Parameters
[in,out]dataGauge field
[in,out]rngstatestate of the CURAND random number generator
[in]Betainverse of the gauge coupling, beta = 2 Nc / g_0^2
[in]nhbnumber of heatbath steps
[in]novernumber of overrelaxation steps

Definition at line 856 of file pgauge_heatbath.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by main(), and GaugeAlgTest::SetUp().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ multiplyVUV()

template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg , typename Gamma >
__device__ __host__ void quda::multiplyVUV ( complex< Float >  vuv[],
const Arg arg,
const Gamma gamma,
int  parity,
int  x_cb,
int  ic_c,
int  jc_c 
)
inline

Do a single (AV)^ * UV product, where for preconditioned clover, AV correspond to the clover inverse multiplied by the packed null space vectors, else AV is simply the packed null space vectors.

Parameters
[out]vuvResult array
[in,out]argArg storing the fields and parameters
[in]Finegrid parity we're working on
[in]x_cbCheckboarded x dimension

Definition at line 537 of file coarse_op_kernel.cuh.

References quda::Gamma< ValueType, basis, dir >::apply(), quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::AV, caxpy(), conj(), quda::Gamma< ValueType, basis, dir >::getcol(), QUDA_BACKWARDS, s, quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::UV, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::V.

Here is the call graph for this function:

◆ ndegTwistedMass() [1/2]

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
__device__ __host__ void quda::ndegTwistedMass ( Arg arg,
int  idx,
int  flavor,
int  parity 
)
inline

Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1)*x Note this routine only exists in xpay form.

Definition at line 31 of file dslash_ndeg_twisted_mass.cuh.

References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, and quda::WilsonArg< Float, nColor, reconstruct_ >::x.

Here is the call graph for this function:

◆ ndegTwistedMass() [2/2]

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool asymmetric, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void quda::ndegTwistedMass ( Arg arg,
int  idx,
int  flavor,
int  parity 
)
inline

Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1)*x Note this routine only exists in xpay form.

Definition at line 49 of file dslash_ndeg_twisted_mass_preconditioned.cuh.

References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::VectorCache< real, Vector >::load(), quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, quda::VectorCache< real, Vector >::save(), quda::VectorCache< real, Vector >::sync(), and quda::WilsonArg< Float, nColor, reconstruct_ >::x.

Here is the call graph for this function:

◆ ndegTwistedMassCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
void quda::ndegTwistedMassCPU ( Arg  arg)

Definition at line 78 of file dslash_ndeg_twisted_mass.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ ndegTwistedMassGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void quda::ndegTwistedMassGPU ( Arg  arg)

Definition at line 94 of file dslash_ndeg_twisted_mass.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ ndegTwistedMassPreconditionedCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void quda::ndegTwistedMassPreconditionedCPU ( Arg  arg)

Definition at line 113 of file dslash_ndeg_twisted_mass_preconditioned.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ ndegTwistedMassPreconditionedGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void quda::ndegTwistedMassPreconditionedGPU ( Arg  arg)

Definition at line 142 of file dslash_ndeg_twisted_mass_preconditioned.cuh.

References arg(), and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ neighborIndex()

template<IndexType idxType, typename Int >
__device__ __forceinline__ int quda::neighborIndex ( const unsigned int &  cb_idx,
const int(&)  shift[4],
const bool(&)  partitioned[4],
const unsigned int &  parity 
)

Definition at line 41 of file shift_quark_field.cu.

References coordsFromIndex(), quda::ShiftColorSpinorFieldArg< Output, Input >::partitioned, quda::ShiftColorSpinorFieldArg< Output, Input >::shift, X1, X2, X3, and X4.

Referenced by gaugeLink(), shiftColorSpinorFieldKernel(), and spinorNeighbor().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ norm() [1/6]

template<typename real , int Nc, QudaGaugeFieldOrder order>
double quda::norm ( const GaugeField u,
int  d,
norm_type_  type 
)

◆ norm() [2/6]

template<typename real , int Nc, QudaCloverFieldOrder order>
double quda::norm ( const CloverField u,
norm_type_  type 
)

◆ norm() [3/6]

template<typename real , int Nc>
double quda::norm ( const GaugeField u,
int  d,
norm_type_  type 
)

Definition at line 28 of file max_gauge.cu.

References errorQuda, quda::GaugeField::FieldOrder(), QUDA_FLOAT2_GAUGE_ORDER, QUDA_MILC_GAUGE_ORDER, and QUDA_QDP_GAUGE_ORDER.

Here is the call graph for this function:

◆ norm() [4/6]

template<typename real , int Nc>
double quda::norm ( const CloverField u,
norm_type_  type 
)

Definition at line 29 of file max_clover.cu.

References errorQuda, quda::CloverField::Order(), QUDA_FLOAT2_CLOVER_ORDER, and QUDA_FLOAT4_CLOVER_ORDER.

Here is the call graph for this function:

◆ norm() [5/6]

template<typename real >
double quda::norm ( const GaugeField u,
int  d,
norm_type_  type 
)

Definition at line 40 of file max_gauge.cu.

References errorQuda, and quda::GaugeField::Ncolor().

Here is the call graph for this function:

◆ norm() [6/6]

template<typename ValueType >
__host__ __device__ ValueType quda::norm ( const complex< ValueType > &  z)
inline

◆ norm1() [1/2]

double quda::norm1 ( const CloverField u,
bool  inverse = false 
)

This is a debugging function, where we cast a clover field into a spinor field so we can compute its L1 norm.

Parameters
aThe clover field that we want the norm of
Returns
The L1 norm of the gauge field

Definition at line 478 of file clover_field.cpp.

References colorSpinorParam(), quda::ColorSpinorField::Create(), and quda::blas::norm1().

Referenced by quda::cpuGaugeField::Gauge_p(), quda::CloverField::Rho(), and quda::GaugeField::SiteSize().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ norm1() [2/2]

double quda::norm1 ( const GaugeField u)

This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L1 norm.

Parameters
uThe gauge field that we want the norm of
Returns
The L1 norm of the gauge field

Definition at line 341 of file gauge_field.cpp.

References colorSpinorParam(), quda::ColorSpinorField::Create(), and quda::blas::norm1().

Here is the call graph for this function:

◆ norm2() [1/2]

double quda::norm2 ( const CloverField a,
bool  inverse = false 
)

This is a debugging function, where we cast a clover field into a spinor field so we can compute its L2 norm.

Parameters
aThe clover field that we want the norm of
Returns
The L2 norm squared of the gauge field

Definition at line 470 of file clover_field.cpp.

References colorSpinorParam(), quda::ColorSpinorField::Create(), and quda::blas::norm2().

Referenced by quda::MG::buildFreeVectors(), computeMomAction(), quda::DiracMobiusPC::Dslash5inv(), quda::GMResDR::FlexArnoldiProcedure(), quda::cpuGaugeField::Gauge_p(), quda::MG::generateNullVectors(), quda::Deflation::operator()(), quda::MG::operator()(), quda::PreconCG::operator()(), quda::SimpleBiCGstab::operator()(), quda::SD::operator()(), quda::IncEigCG::operator()(), quda::GMResDR::operator()(), quda::Deflation::reduce(), quda::CloverField::Rho(), quda::GaugeField::SiteSize(), quda::Deflation::verify(), and quda::MG::verify().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ norm2() [2/2]

double quda::norm2 ( const GaugeField u)

This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L2 norm.

Parameters
uThe gauge field that we want the norm of
Returns
The L2 norm squared of the gauge field

Definition at line 333 of file gauge_field.cpp.

References colorSpinorParam(), quda::ColorSpinorField::Create(), and quda::blas::norm2().

Here is the call graph for this function:

◆ operator!=() [1/3]

template<typename ValueType >
__host__ __device__ bool quda::operator!= ( const complex< ValueType > &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 1035 of file complex_quda.h.

◆ operator!=() [2/3]

template<typename ValueType >
__host__ __device__ bool quda::operator!= ( const ValueType &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 1041 of file complex_quda.h.

◆ operator!=() [3/3]

template<typename ValueType >
__host__ __device__ bool quda::operator!= ( const complex< ValueType > &  lhs,
const ValueType &  rhs 
)
inline

Definition at line 1047 of file complex_quda.h.

◆ operator*() [1/16]

__host__ __device__ float4 quda::operator* ( const float  a,
const float4  x 
)
inline

Definition at line 48 of file float_vector.h.

◆ operator*() [2/16]

__host__ __device__ float2 quda::operator* ( const float  a,
const float2  x 
)
inline

Definition at line 57 of file float_vector.h.

◆ operator*() [3/16]

__host__ __device__ double2 quda::operator* ( const double  a,
const double2  x 
)
inline

Definition at line 64 of file float_vector.h.

◆ operator*() [4/16]

__host__ __device__ double4 quda::operator* ( const double  a,
const double4  x 
)
inline

Definition at line 71 of file float_vector.h.

◆ operator*() [5/16]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator* ( const complex< ValueType > &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 902 of file complex_quda.h.

◆ operator*() [6/16]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator* ( const complex< ValueType > &  lhs,
const ValueType &  rhs 
)
inline

Definition at line 911 of file complex_quda.h.

◆ operator*() [7/16]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator* ( const ValueType &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 918 of file complex_quda.h.

◆ operator*() [8/16]

template<template< typename, int > class Mat, class T , int N, class S >
__device__ __host__ Mat<T,N> quda::operator* ( const S scalar,
const Mat< T, N > &  a 
)
inline

Definition at line 476 of file quda_matrix.h.

◆ operator*() [9/16]

template<template< typename, int > class Mat, class T , int N, class S >
__device__ __host__ Mat<T,N> quda::operator* ( const Mat< T, N > &  a,
const S scalar 
)
inline

Definition at line 484 of file quda_matrix.h.

References Mat().

Here is the call graph for this function:

◆ operator*() [10/16]

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat<T,N> quda::operator* ( const Mat< T, N > &  a,
const Mat< T, N > &  b 
)
inline

Generic implementation of matrix multiplication.

Definition at line 507 of file quda_matrix.h.

◆ operator*() [11/16]

template<template< typename > class complex, typename T , int N>
__device__ __host__ Matrix<complex<T>,N> quda::operator* ( const Matrix< complex< T >, N > &  a,
const Matrix< complex< T >, N > &  b 
)
inline

Specialization of complex matrix multiplication that will issue optimal fma instructions.

Definition at line 528 of file quda_matrix.h.

◆ operator*() [12/16]

template<class T , class U , int N>
__device__ __host__ Matrix<typename PromoteTypeId<T,U>::Type,N> quda::operator* ( const Matrix< T, N > &  a,
const Matrix< U, N > &  b 
)
inline

Definition at line 563 of file quda_matrix.h.

◆ operator*() [13/16]

template<class T >
__device__ __host__ Matrix<T,2> quda::operator* ( const Matrix< T, 2 > &  a,
const Matrix< T, 2 > &  b 
)
inline

Definition at line 583 of file quda_matrix.h.

◆ operator*() [14/16]

template<typename Float , int Nc, int Ns, typename S >
__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::operator* ( const S a,
const ColorSpinor< Float, Nc, Ns > &  x 
)
inline

Compute the scalar-vector product y = a * x.

Parameters
[in]aInput scalar
[in]xInput vector
Returns
The vector a * x

Definition at line 1067 of file color_spinor.h.

References quda::ColorSpinor< Float, Nc, Ns >::data, and s.

◆ operator*() [15/16]

template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::operator* ( const Matrix< complex< Float >, Nc > &  A,
const ColorSpinor< Float, Nc, Ns > &  x 
)
inline

Compute the matrix-vector product y = A * x.

Parameters
[in]AInput matrix
[in]xInput vector
Returns
The vector A * x

Definition at line 1089 of file color_spinor.h.

References quda::ColorSpinor< Float, Nc, Ns >::data, and s.

◆ operator*() [16/16]

template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::operator* ( const HMatrix< Float, Nc *Ns > &  A,
const ColorSpinor< Float, Nc, Ns > &  x 
)
inline

Compute the matrix-vector product y = A * x.

Parameters
[in]AInput Hermitian matrix with dimensions NcxNs x NcxNs
[in]xInput vector
Returns
The vector A * x

Definition at line 1124 of file color_spinor.h.

References quda::ColorSpinor< Float, Nc, Ns >::data.

◆ operator*=() [1/7]

__host__ __device__ float2 quda::operator*= ( float2 &  x,
const float  a 
)
inline

Definition at line 151 of file float_vector.h.

◆ operator*=() [2/7]

__host__ __device__ double2 quda::operator*= ( double2 &  x,
const float  a 
)
inline

Definition at line 157 of file float_vector.h.

◆ operator*=() [3/7]

__host__ __device__ float4 quda::operator*= ( float4 &  a,
const float &  b 
)
inline

Definition at line 163 of file float_vector.h.

◆ operator*=() [4/7]

__host__ __device__ double2 quda::operator*= ( double2 &  a,
const double &  b 
)
inline

Definition at line 171 of file float_vector.h.

◆ operator*=() [5/7]

__host__ __device__ double4 quda::operator*= ( double4 &  a,
const double &  b 
)
inline

Definition at line 177 of file float_vector.h.

◆ operator*=() [6/7]

template<template< typename, int > class Mat, class T , int N, class S >
__device__ __host__ Mat<T,N> quda::operator*= ( Mat< T, N > &  a,
const S scalar 
)
inline

Definition at line 489 of file quda_matrix.h.

References Mat().

Here is the call graph for this function:

◆ operator*=() [7/7]

template<class T , int N>
__device__ __host__ Matrix<T,N> quda::operator*= ( Matrix< T, N > &  a,
const Matrix< T, N > &  b 
)
inline

Definition at line 552 of file quda_matrix.h.

◆ operator+() [1/13]

__host__ __device__ double2 quda::operator+ ( const double2 &  x,
const double2 &  y 
)
inline

Definition at line 24 of file float_vector.h.

◆ operator+() [2/13]

__host__ __device__ double3 quda::operator+ ( const double3 &  x,
const double3 &  y 
)
inline

Definition at line 40 of file float_vector.h.

◆ operator+() [3/13]

__host__ __device__ double4 quda::operator+ ( const double4 &  x,
const double4 &  y 
)
inline

Definition at line 44 of file float_vector.h.

◆ operator+() [4/13]

template<typename scalar , int n>
__device__ __host__ vector_type<scalar,n> quda::operator+ ( const vector_type< scalar, n > &  a,
const vector_type< scalar, n > &  b 
)
inline

Definition at line 60 of file cub_helper.cuh.

◆ operator+() [5/13]

__host__ __device__ float2 quda::operator+ ( const float2  x,
const float2  y 
)
inline

Definition at line 80 of file float_vector.h.

◆ operator+() [6/13]

__host__ __device__ float4 quda::operator+ ( const float4  x,
const float4  y 
)
inline

Definition at line 87 of file float_vector.h.

◆ operator+() [7/13]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator+ ( const complex< ValueType > &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 854 of file complex_quda.h.

◆ operator+() [8/13]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator+ ( const complex< ValueType > &  lhs,
const ValueType &  rhs 
)
inline

Definition at line 870 of file complex_quda.h.

◆ operator+() [9/13]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator+ ( const ValueType &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 876 of file complex_quda.h.

◆ operator+() [10/13]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator+ ( const complex< ValueType > &  rhs)
inline

Definition at line 996 of file complex_quda.h.

◆ operator+() [11/13]

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat<T,N> quda::operator+ ( const Mat< T, N > &  a,
const Mat< T, N > &  b 
)
inline

Definition at line 433 of file quda_matrix.h.

◆ operator+() [12/13]

template<typename ValueType >
__host__ __device__ complex<ValueType> quda::operator+ ( const volatile complex< ValueType > &  lhs,
const volatile complex< ValueType > &  rhs 
)
inline

Definition at line 862 of file complex_quda.h.

◆ operator+() [13/13]

template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::operator+ ( const ColorSpinor< Float, Nc, Ns > &  x,
const ColorSpinor< Float, Nc, Ns > &  y 
)
inline

ColorSpinor addition operator.

Parameters
[in]xInput vector
[in]yInput vector
Returns
The vector x + y

Definition at line 1023 of file color_spinor.h.

References quda::ColorSpinor< Float, Nc, Ns >::data, and s.

◆ operator+=() [1/8]

template<typename real , typename Link >
__device__ void quda::operator+= ( real *  y,
const Link &  x 
)
inline

Definition at line 47 of file clover_deriv.cuh.

◆ operator+=() [2/8]

__host__ __device__ float4 quda::operator+= ( float4 &  x,
const float4  y 
)
inline

Definition at line 96 of file float_vector.h.

◆ operator+=() [3/8]

__host__ __device__ float2 quda::operator+= ( float2 &  x,
const float2  y 
)
inline

Definition at line 104 of file float_vector.h.

◆ operator+=() [4/8]

__host__ __device__ double2 quda::operator+= ( double2 &  x,
const double2  y 
)
inline

Definition at line 110 of file float_vector.h.

◆ operator+=() [5/8]

__host__ __device__ double3 quda::operator+= ( double3 &  x,
const double3  y 
)
inline

Definition at line 116 of file float_vector.h.

◆ operator+=() [6/8]

__host__ __device__ double4 quda::operator+= ( double4 &  x,
const double4  y 
)
inline

Definition at line 123 of file float_vector.h.

◆ operator+=() [7/8]

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat<T,N> quda::operator+= ( Mat< T, N > &  a,
const Mat< T, N > &  b 
)
inline

Definition at line 443 of file quda_matrix.h.

◆ operator+=() [8/8]

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat<T,N> quda::operator+= ( Mat< T, N > &  a,
const T &  b 
)
inline

Definition at line 451 of file quda_matrix.h.

References Mat().

Here is the call graph for this function:

◆ operator-() [1/12]

__host__ __device__ double2 quda::operator- ( const double2 &  x,
const double2 &  y 
)
inline

Definition at line 28 of file float_vector.h.

◆ operator-() [2/12]

__host__ __device__ float2 quda::operator- ( const float2 &  x,
const float2 &  y 
)
inline

Definition at line 32 of file float_vector.h.

◆ operator-() [3/12]

__host__ __device__ float4 quda::operator- ( const float4 &  x,
const float4 &  y 
)
inline

Definition at line 36 of file float_vector.h.

◆ operator-() [4/12]

__host__ __device__ float2 quda::operator- ( const float2 &  x)
inline

Definition at line 185 of file float_vector.h.

◆ operator-() [5/12]

__host__ __device__ double2 quda::operator- ( const double2 &  x)
inline

Definition at line 189 of file float_vector.h.

◆ operator-() [6/12]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator- ( const complex< ValueType > &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 883 of file complex_quda.h.

◆ operator-() [7/12]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator- ( const complex< ValueType > &  lhs,
const ValueType &  rhs 
)
inline

Definition at line 889 of file complex_quda.h.

◆ operator-() [8/12]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator- ( const ValueType &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 895 of file complex_quda.h.

◆ operator-() [9/12]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator- ( const complex< ValueType > &  rhs)
inline

Definition at line 1001 of file complex_quda.h.

◆ operator-() [10/12]

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat<T,N> quda::operator- ( const Mat< T, N > &  a,
const Mat< T, N > &  b 
)
inline

Definition at line 467 of file quda_matrix.h.

◆ operator-() [11/12]

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat<T,N> quda::operator- ( const Mat< T, N > &  a)
inline

Definition at line 495 of file quda_matrix.h.

◆ operator-() [12/12]

template<typename Float , int Nc, int Ns>
__device__ __host__ ColorSpinor<Float,Nc,Ns> quda::operator- ( const ColorSpinor< Float, Nc, Ns > &  x,
const ColorSpinor< Float, Nc, Ns > &  y 
)
inline

ColorSpinor subtraction operator.

Parameters
[in]xInput vector
[in]yInput vector
Returns
The vector x + y

Definition at line 1045 of file color_spinor.h.

References quda::ColorSpinor< Float, Nc, Ns >::data, and s.

◆ operator-=() [1/5]

template<typename real , typename Link >
__device__ void quda::operator-= ( real *  y,
const Link &  x 
)
inline

Definition at line 58 of file clover_deriv.cuh.

References axpy().

Here is the call graph for this function:

◆ operator-=() [2/5]

__host__ __device__ float4 quda::operator-= ( float4 &  x,
const float4  y 
)
inline

Definition at line 131 of file float_vector.h.

◆ operator-=() [3/5]

__host__ __device__ float2 quda::operator-= ( float2 &  x,
const float2  y 
)
inline

Definition at line 139 of file float_vector.h.

◆ operator-=() [4/5]

__host__ __device__ double2 quda::operator-= ( double2 &  x,
const double2  y 
)
inline

Definition at line 145 of file float_vector.h.

◆ operator-=() [5/5]

template<template< typename, int > class Mat, class T , int N>
__device__ __host__ Mat<T,N> quda::operator-= ( Mat< T, N > &  a,
const Mat< T, N > &  b 
)
inline

Definition at line 459 of file quda_matrix.h.

◆ operator/() [1/7]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::operator/ ( const complex< ValueType > &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 926 of file complex_quda.h.

References norm().

Here is the call graph for this function:

◆ operator/() [2/7]

template<>
__host__ __device__ complex< float > quda::operator/ ( const complex< float > &  lhs,
const complex< float > &  rhs 
)
inline

Definition at line 935 of file complex_quda.h.

References quda::complex< float >::imag(), quda::complex< float >::real(), and s.

Here is the call graph for this function:

◆ operator/() [3/7]

template<>
__host__ __device__ complex< double > quda::operator/ ( const complex< double > &  lhs,
const complex< double > &  rhs 
)
inline

Definition at line 952 of file complex_quda.h.

References quda::complex< double >::imag(), quda::complex< double >::real(), and s.

Here is the call graph for this function:

◆ operator/() [4/7]

template<typename ValueType >
__host__ __device__ complex<ValueType> quda::operator/ ( const complex< ValueType > &  lhs,
const ValueType &  rhs 
)
inline

Definition at line 969 of file complex_quda.h.

◆ operator/() [5/7]

template<typename ValueType >
__host__ __device__ complex<ValueType> quda::operator/ ( const ValueType &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 976 of file complex_quda.h.

References norm().

Here is the call graph for this function:

◆ operator/() [6/7]

template<>
__host__ __device__ complex<float> quda::operator/ ( const float &  lhs,
const complex< float > &  rhs 
)
inline

Definition at line 984 of file complex_quda.h.

◆ operator/() [7/7]

template<>
__host__ __device__ complex<double> quda::operator/ ( const double &  lhs,
const complex< double > &  rhs 
)
inline

Definition at line 989 of file complex_quda.h.

◆ operator<<() [1/10]

template<typename Float , int nSpin, int nColor, bool spin_project>
std::ostream& quda::operator<< ( std::ostream &  out,
const PackArg< Float, nSpin, nColor, spin_project > &  arg 
)

Definition at line 21 of file dslash_pack2.cu.

References arg(), and out.

Here is the call graph for this function:

◆ operator<<() [2/10]

std::ostream & quda::operator<< ( std::ostream &  output,
const CloverFieldParam param 
)

◆ operator<<() [3/10]

std::ostream & quda::operator<< ( std::ostream &  output,
const LatticeFieldParam param 
)

◆ operator<<() [4/10]

std::ostream & quda::operator<< ( std::ostream &  output,
const GaugeFieldParam param 
)

◆ operator<<() [5/10]

template<typename ValueType , class charT , class traits >
std::basic_ostream< charT, traits > & quda::operator<< ( std::basic_ostream< charT, traits > &  os,
const complex< ValueType > &  z 
)

Definition at line 310 of file complex_quda.h.

◆ operator<<() [6/10]

template<typename Float >
std::ostream& quda::operator<< ( std::ostream &  out,
const DslashArg< Float > &  arg 
)

Definition at line 300 of file dslash_helper.cuh.

References arg(), and out.

Here is the call graph for this function:

◆ operator<<() [7/10]

template<class T , int N>
std::ostream& quda::operator<< ( std::ostream &  os,
const Matrix< T, N > &  m 
)

Definition at line 833 of file quda_matrix.h.

◆ operator<<() [8/10]

template<class T , int N>
std::ostream& quda::operator<< ( std::ostream &  os,
const Array< T, N > &  a 
)

Definition at line 847 of file quda_matrix.h.

◆ operator<<() [9/10]

std::ostream& quda::operator<< ( std::ostream &  out,
const ColorSpinorField a 
)

◆ operator<<() [10/10]

std::ostream& quda::operator<< ( std::ostream &  out,
const cudaColorSpinorField a 
)

◆ operator==() [1/3]

template<typename ValueType >
__host__ __device__ bool quda::operator== ( const complex< ValueType > &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 1008 of file complex_quda.h.

◆ operator==() [2/3]

template<typename ValueType >
__host__ __device__ bool quda::operator== ( const ValueType &  lhs,
const complex< ValueType > &  rhs 
)
inline

Definition at line 1017 of file complex_quda.h.

◆ operator==() [3/3]

template<typename ValueType >
__host__ __device__ bool quda::operator== ( const complex< ValueType > &  lhs,
const ValueType &  rhs 
)
inline

Definition at line 1025 of file complex_quda.h.

◆ operator>>()

template<typename ValueType , typename charT , class traits >
std::basic_istream< charT, traits > & quda::operator>> ( std::basic_istream< charT, traits > &  is,
complex< ValueType > &  z 
)

Definition at line 318 of file complex_quda.h.

◆ orthoDir()

void quda::orthoDir ( Complex **  beta,
std::vector< ColorSpinorField *>  Ap,
int  k,
int  pipeline 
)

Definition at line 95 of file inv_gcr_quda.cpp.

References quda::blas::caxpy(), quda::blas::caxpyDotzy(), quda::blas::cDotProduct(), computeBeta(), pipeline, and updateAp().

Referenced by quda::GCR::operator()().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ outerProd() [1/2]

template<class T , int N>
__device__ __host__ void quda::outerProd ( const Array< T, N > &  a,
const Array< T, N > &  b,
Matrix< T, N > *  m 
)
inline

Definition at line 805 of file quda_matrix.h.

References conj().

Referenced by constructHHMat().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ outerProd() [2/2]

template<class T , int N>
__device__ __host__ void quda::outerProd ( const T(&)  a[N],
const T(&)  b[N],
Matrix< T, N > *  m 
)
inline

Definition at line 818 of file quda_matrix.h.

References conj().

Here is the call graph for this function:

◆ outerProdSpinTrace()

template<typename Float , int Nc, int Ns>
__device__ __host__ Matrix<complex<Float>, Nc> quda::outerProdSpinTrace ( const ColorSpinor< Float, Nc, Ns > &  a,
const ColorSpinor< Float, Nc, Ns > &  b 
)
inline

Compute the outer product over color and take the spin trace out(j,i) = a(s,j) * conj (b(s,i))

Parameters
aLeft-hand side ColorSpinor
bRight-hand side ColorSpinor
Returns
The spin traced matrix

Definition at line 985 of file color_spinor.h.

References out, and s.

Referenced by sigmaOprod().

Here is the caller graph for this function:

◆ OvrImpSTOUTStep() [1/3]

void quda::OvrImpSTOUTStep ( GaugeField dataDs,
const GaugeField dataOr,
double  rho,
double  epsilon 
)

Apply Over Improved STOUT smearing to the gauge field.

Parameters
[out]dataDsOutput smeared field
[in]dataOrInput gauge field
[in]rhosmearing parameter
[in]epsilonsmearing parameter

Definition at line 269 of file gauge_stout.cu.

References epsilon, errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().

Referenced by OvrImpSTOUTStep(), and performOvrImpSTOUTnStep().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ OvrImpSTOUTStep() [2/3]

template<typename Float , typename GaugeOr , typename GaugeDs >
void quda::OvrImpSTOUTStep ( GaugeOr  origin,
GaugeDs  dest,
const GaugeField dataOr,
Float  rho,
Float  epsilon 
)

Definition at line 208 of file gauge_stout.cu.

References arg(), DOUBLE_TOL, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, qudaDeviceSynchronize, and SINGLE_TOL.

Here is the call graph for this function:

◆ OvrImpSTOUTStep() [3/3]

template<typename Float >
void quda::OvrImpSTOUTStep ( GaugeField dataDs,
const GaugeField dataOr,
Float  rho,
Float  epsilon 
)

Definition at line 217 of file gauge_stout.cu.

References errorQuda, OvrImpSTOUTStep(), QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_NO, and quda::GaugeField::Reconstruct().

Here is the call graph for this function:

◆ pack()

template<bool dagger, int twist, int dim, QudaPCType pc, typename Arg >
__device__ __host__ void quda::pack ( Arg arg,
int  ghost_idx,
int  s,
int  parity 
)
inline

◆ packGhost()

template<typename Float , bool block_float, int Ns, int Ms, int Nc, int Mc, int nDim, int dim, int dir, typename Arg >
__device__ __host__ __forceinline__ void quda::packGhost ( Arg arg,
int  x_cb,
int  parity,
int  spinor_parity,
int  spin_block,
int  color_block 
)

Definition at line 95 of file color_spinor_pack.cuh.

References arg(), getCoords(), getCoords5(), quda::PackGhostArg< Field >::nDim, quda::PackGhostArg< Field >::parity, and s.

Here is the call graph for this function:

◆ PackGhost() [1/3]

template<typename Float , int nColor>
void quda::PackGhost ( void *  ghost[],
const ColorSpinorField in,
MemoryLocation  location,
int  nFace,
bool  dagger,
int  parity,
bool  spin_project,
double  a,
double  b,
double  c,
const cudaStream_t &  stream 
)

Definition at line 342 of file dslash_pack2.cu.

References quda::Pack< Float, nColor, spin_project >::apply(), and pack().

Here is the call graph for this function:

◆ PackGhost() [2/3]

template<typename Float >
void quda::PackGhost ( void *  ghost[],
const ColorSpinorField in,
MemoryLocation  location,
int  nFace,
bool  dagger,
int  parity,
bool  spin_project,
double  a,
double  b,
double  c,
const cudaStream_t &  stream 
)

◆ PackGhost() [3/3]

void quda::PackGhost ( void *  ghost[2 *QUDA_MAX_DIM],
const ColorSpinorField field,
MemoryLocation  location,
int  nFace,
bool  dagger,
int  parity,
bool  spin_project,
double  a,
double  b,
double  c,
const cudaStream_t &  stream 
)

Dslash face packing routine.

Parameters
[out]ghost_bufArray of packed halos, order is [2*dim+dir]
[in]fieldColorSpinorField to be packed
[in]locationLocations where the packed fields are (Device, Host and/or Remote)
[in]nFaceDepth of halo
[in]daggerWhether this is for the dagger operator
[in]parityField parity
[in]spin_projectWhether to spin_project when packing
[in]aTwisted mass scale factor (for preconditioned twisted-mass dagger operator)
[in]bTwisted mass chiral twist factor (for preconditioned twisted-mass dagger operator)
[in]cTwisted mass flavor twist factor (for preconditioned non degenerate twisted-mass dagger operator)
[in]streamWhich stream are we executing in

Definition at line 367 of file dslash_pack2.cu.

References quda::Pack< Float, nColor, spin_project >::a, quda::Pack< Float, nColor, spin_project >::b, quda::Pack< Float, nColor, spin_project >::c, commDim, quda::Pack< Float, nColor, spin_project >::dagger, errorQuda, getKernelPackT(), quda::Pack< Float, nColor, spin_project >::ghost, quda::Pack< Float, nColor, spin_project >::in, quda::Pack< Float, nColor, spin_project >::location, quda::Pack< Float, nColor, spin_project >::nFace, quda::Pack< Float, nColor, spin_project >::parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, and stream.

Referenced by quda::cudaColorSpinorField::packGhost().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ packKernel()

template<bool dagger, int twist, QudaPCType pc, typename Arg >
__global__ void quda::packKernel ( Arg  arg)

◆ packShmemKernel()

template<bool dagger, int twist, QudaPCType pc, typename Arg >
__global__ void quda::packShmemKernel ( Arg  arg)

Definition at line 222 of file dslash_pack.cuh.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::parity, QUDA_5D_PC, and s.

Here is the call graph for this function:

◆ packSpinor()

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
void quda::packSpinor ( OutOrder &  outOrder,
const InOrder &  inOrder,
int  volume 
)

CPU function to reorder spinor fields.

Definition at line 22 of file copy_color_spinor_mg.cuh.

References s.

◆ packSpinorKernel()

template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder >
__global__ void quda::packSpinorKernel ( OutOrder  outOrder,
const InOrder  inOrder,
int  volume 
)

CUDA kernel to reorder spinor fields. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 34 of file copy_color_spinor_mg.cuh.

References s.

◆ packStaggered()

template<int dim, int nFace = 1, typename Arg >
__device__ __host__ void quda::packStaggered ( Arg arg,
int  ghost_idx,
int  s,
int  parity 
)
inline

◆ packStaggeredKernel()

template<typename Arg >
__global__ void quda::packStaggeredKernel ( Arg  arg)

◆ packStaggeredShmemKernel()

template<typename Arg >
__global__ void quda::packStaggeredShmemKernel ( Arg  arg)

Definition at line 325 of file dslash_pack.cuh.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::parity, and s.

Here is the call graph for this function:

◆ PCType_() [1/2]

QudaPCType quda::PCType_ ( const char *  func,
const char *  file,
int  line,
const ColorSpinorField a,
const ColorSpinorField b 
)
inline

Helper function for determining if the preconditioning type of the fields is the same.

Parameters
[in]aInput field
[in]bInput field
Returns
If PCType is unique return this

Definition at line 1011 of file color_spinor_field.h.

References errorQuda, quda::ColorSpinorField::PCType(), and QUDA_PC_INVALID.

Referenced by PCType_().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ PCType_() [2/2]

template<typename... Args>
QudaPCType quda::PCType_ ( const char *  func,
const char *  file,
int  line,
const ColorSpinorField a,
const ColorSpinorField b,
const Args &...  args 
)
inline

Helper function for determining if the precision of the fields is the same.

Parameters
[in]aInput field
[in]bInput field
[in]argsList of additional fields to check precision on
Returns
If precision is unique return the precision

Definition at line 1030 of file color_spinor_field.h.

References PCType_().

Here is the call graph for this function:

◆ PGaugeExchange()

void quda::PGaugeExchange ( cudaGaugeField data,
const int  dir,
const int  parity 
)

Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps.

Parameters
[in,out]dataGauge field
[in,out]rngstatestate of the CURAND random number generator
[in]Betainverse of the gauge coupling, beta = 2 Nc / g_0^2
[in]nhbnumber of heatbath steps
[in]novernumber of overrelaxation steps

Definition at line 342 of file pgauge_exchange.cu.

References comm_dim_partitioned(), errorQuda, parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.

Here is the call graph for this function:

◆ PGaugeExchangeFree()

void quda::PGaugeExchangeFree ( )

Release all allocated memory used to exchange data between nodes.

Referenced by main(), and GaugeAlgTest::TearDown().

Here is the caller graph for this function:

◆ pinned_allocated_peak()

long quda::pinned_allocated_peak ( )
Returns
peak pinned memory allocated

Definition at line 61 of file malloc.cpp.

References PINNED.

◆ pinned_malloc_()

void * quda::pinned_malloc_ ( const char *  func,
const char *  file,
int  line,
size_t  size 
)

Allocate page-locked ("pinned") host memory. This function should only be called via the pinned_malloc() macro, defined in malloc_quda.h

Note that we do not rely on cudaHostAlloc(), since buffers allocated in this way have been observed to cause problems when shared with MPI via GPU Direct on some systems.

Definition at line 250 of file malloc.cpp.

References aligned_malloc(), quda::MemAlloc::base_size, errorQuda, memset(), PINNED, and track_malloc().

Referenced by quda::pool::pinned_malloc_().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ plaquette() [1/4]

double3 quda::plaquette ( const GaugeField U)

Compute the plaquette of the gauge field.

Parameters
[in]UThe gauge field upon which to compute the plaquette
Returns
double3 variable returning (plaquette, spatial plaquette, temporal plaquette) site averages normalized such that each plaquette is in the range [0,1]

Definition at line 65 of file gauge_plaq.cu.

References INSTANTIATE_PRECISION, and quda::LatticeField::Location().

Referenced by main(), performAPEnStep(), performOvrImpSTOUTnStep(), performSTOUTnStep(), plaqQuda(), GaugeAlgTest::SetUp(), and TEST_F().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ plaquette() [2/4]

template<typename Float , typename Arg >
__device__ double quda::plaquette ( Arg arg,
int  x[],
int  parity,
int  mu,
int  nu 
)
inline

Definition at line 32 of file gauge_plaq.cuh.

References conj(), getTrace(), linkIndexShift(), and mu.

Here is the call graph for this function:

◆ plaquette() [3/4]

template<typename Float , typename Gauge >
void quda::plaquette ( const Gauge  dataOr,
const GaugeField data,
double2 &  plq,
QudaFieldLocation  location 
)

◆ plaquette() [4/4]

template<typename Float >
void quda::plaquette ( const GaugeField data,
double2 &  plq,
QudaFieldLocation  location 
)

Definition at line 61 of file gauge_plaq.cu.

References INSTANTIATE_RECONSTRUCT.

◆ point()

template<class T >
void quda::point ( T &  t,
int  x,
int  s,
int  c 
)

Create a point source at spacetime point x, spin s and colour c

Definition at line 31 of file color_spinor_util.cu.

Referenced by genericSource().

Here is the caller graph for this function:

◆ polar() [1/3]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::polar ( const ValueType &  m,
const ValueType &  theta = 0 
)
inline

Returns the complex with magnitude m and angle theta in radians.

Definition at line 1098 of file complex_quda.h.

References cos(), and sin().

Referenced by construct_fat_long_gauge_field(), exp(), and sqrt().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ polar() [2/3]

template<>
__host__ __device__ complex<float> quda::polar ( const float &  magnitude,
const float &  angle 
)
inline

Definition at line 1104 of file complex_quda.h.

◆ polar() [3/3]

template<>
__host__ __device__ complex<double> quda::polar ( const double &  magnitude,
const double &  angle 
)
inline

Definition at line 1110 of file complex_quda.h.

References cos(), and sin().

Here is the call graph for this function:

◆ polarSu3()

template<typename Float >
__host__ __device__ void quda::polarSu3 ( Matrix< complex< Float >, 3 > &  in,
Float  tol 
)
inline

Project the input matrix on the SU(3) group. First unitarize the matrix and then project onto the special unitary group.

Parameters
inThe input matrix to which we're projecting
tolTolerance to which this check is applied

Definition at line 87 of file su3_project.cuh.

References arg(), checkUnitary(), conj(), getDeterminant(), in, inverse(), mod(), norm(), out, and pow().

Here is the call graph for this function:

◆ policies()

static std::vector<DslashCoarsePolicy> quda::policies ( static_cast< int >  DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED,
DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED   
)
static

◆ policyTuning()

bool quda::policyTuning ( )

Definition at line 495 of file tune.cpp.

References policy_tuning.

Referenced by tuneLaunch().

Here is the caller graph for this function:

◆ popKernelPackT()

void quda::popKernelPackT ( )

◆ postTrace_()

void quda::postTrace_ ( const char *  func,
const char *  file,
int  line 
)

Post an event in the trace, recording where it was posted.

Definition at line 92 of file tune.cpp.

References quda::TuneKey::aux_n, i32toa(), quda::TraceKey::key, tmp, and traceEnabled().

Referenced by quda::TunableVectorYZ::resizeStep().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ pow() [1/6]

template<typename ValueType , typename ExponentType >
__host__ __device__ ValueType quda::pow ( ValueType  x,
ExponentType  e 
)
inline

◆ pow() [2/6]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::pow ( const complex< ValueType > &  z,
const int &  n 
)
inline

Definition at line 1208 of file complex_quda.h.

References exp(), and log().

Here is the call graph for this function:

◆ pow() [3/6]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::pow ( const complex< ValueType > &  z,
const ValueType &  x 
)
inline

Definition at line 1184 of file complex_quda.h.

References exp(), and log().

Here is the call graph for this function:

◆ pow() [4/6]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::pow ( const complex< ValueType > &  z,
const complex< ValueType > &  z2 
)
inline

Definition at line 1190 of file complex_quda.h.

References exp(), and log().

Here is the call graph for this function:

◆ pow() [5/6]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::pow ( const ValueType &  x,
const complex< ValueType > &  z 
)
inline

Definition at line 1196 of file complex_quda.h.

References exp(), and log().

Here is the call graph for this function:

◆ pow() [6/6]

template<>
__host__ __device__ complex<float> quda::pow ( const float &  x,
const complex< float > &  exponent 
)
inline

Definition at line 1202 of file complex_quda.h.

References exp().

Referenced by pow().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ Precision_() [1/2]

QudaPrecision quda::Precision_ ( const char *  func,
const char *  file,
int  line,
const LatticeField a,
const LatticeField b 
)
inline

Helper function for determining if the precision of the fields is the same.

Parameters
[in]aInput field
[in]bInput field
Returns
If precision is unique return the precision

Definition at line 672 of file lattice_field.h.

References errorQuda, quda::LatticeFieldParam::precision, quda::LatticeField::Precision(), and QUDA_INVALID_PRECISION.

Referenced by Precision_().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ Precision_() [2/2]

template<typename... Args>
QudaPrecision quda::Precision_ ( const char *  func,
const char *  file,
int  line,
const LatticeField a,
const LatticeField b,
const Args &...  args 
)
inline

Helper function for determining if the precision of the fields is the same.

Parameters
[in]aInput field
[in]bInput field
[in]argsList of additional fields to check precision on
Returns
If precision is unique return the precision

Definition at line 689 of file lattice_field.h.

References Precision_().

Here is the call graph for this function:

◆ print()

void quda::print ( const double  d[],
int  n 
)

Definition at line 44 of file inv_mpcg_quda.cpp.

Referenced by quda::MPBiCGstab::computeMatrixPowers().

Here is the caller graph for this function:

◆ print_alloc()

static void quda::print_alloc ( AllocType  type)
static

Definition at line 85 of file malloc.cpp.

References quda::MemAlloc::base_size, quda::MemAlloc::file, quda::MemAlloc::func, quda::MemAlloc::line, and printfQuda.

Referenced by assertAllMemFree().

Here is the caller graph for this function:

◆ print_alloc_header()

static void quda::print_alloc_header ( )
static

Definition at line 78 of file malloc.cpp.

References printfQuda.

Referenced by assertAllMemFree().

Here is the caller graph for this function:

◆ print_trace()

static void quda::print_trace ( void  )
static

Definition at line 67 of file malloc.cpp.

References printfQuda, and quda::MemAlloc::size.

Referenced by host_free_().

Here is the caller graph for this function:

◆ print_vector()

template<class Order >
void quda::print_vector ( const Order &  o,
unsigned int  x 
)

Definition at line 321 of file color_spinor_util.cu.

References parity, and printfQuda.

Referenced by genericPrintVector().

Here is the caller graph for this function:

◆ printAPIProfile()

void quda::printAPIProfile ( )

Print out the timer profile for CUDA API calls.

Definition at line 336 of file quda_cuda_api.cpp.

Referenced by endQuda().

Here is the caller graph for this function:

◆ printLaunchTimer()

void quda::printLaunchTimer ( )

Definition at line 843 of file tune.cpp.

References quda::TimeProfile::Print().

Referenced by endQuda(), and profilerStop().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ printLink()

template<class Cmplx >
__host__ __device__ void quda::printLink ( const Matrix< Cmplx, 3 > &  link)
inline

Definition at line 1149 of file quda_matrix.h.

Referenced by applyStaggered(), and isUnitary().

Here is the caller graph for this function:

◆ printPeakMemUsage()

void quda::printPeakMemUsage ( )

Definition at line 375 of file malloc.cpp.

References DEVICE, DEVICE_PINNED, and printfQuda.

Referenced by endQuda().

Here is the caller graph for this function:

◆ projectSU3()

void quda::projectSU3 ( cudaGaugeField U,
double  tol,
int *  fails 
)

Project the input gauge field onto the SU(3) group. This is a destructive operation. The number of link failures is reported so appropriate action can be taken.

Parameters
UGauge field that we are projecting onto SU(3)
tolTolerance to which the iterative algorithm works
failsNumber of link failures (device pointer)

Definition at line 590 of file unitarize_links_quda.cu.

References quda::ProjectSU3< Float, G >::apply(), arg(), checkCudaError, errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, qudaDeviceSynchronize, quda::GaugeField::Reconstruct(), quda::GaugeField::StaggeredPhaseApplied(), and tol.

Referenced by projectSU3Quda().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ProjectSU3kernel()

template<typename Float , typename G >
__global__ void quda::ProjectSU3kernel ( ProjectSU3Arg< Float, G >  arg)

◆ Prolongate()

void quda::Prolongate ( ColorSpinorField out,
const ColorSpinorField in,
const ColorSpinorField v,
int  Nvec,
const int *  fine_to_coarse,
const int *const *  spin_map,
int  parity = QUDA_INVALID_PARITY 
)

Apply the prolongation operator.

Parameters
[out]outResulting fine grid field
[in]inInput field on coarse grid
[in]vMatrix field containing the null-space components
[in]NvecNumber of null-space components
[in]fine_to_coarseFine-to-coarse lookup table (linear indices)
[in]spin_mapSpin blocking lookup table
[in]parityof the output fine field (if single parity output field)

Definition at line 296 of file prolongator.cu.

References checkCudaError, checkLocation, checkPrecision, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by quda::Transfer::P(), and quda::Transfer::setTransferGPU().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ pushKernelPackT()

void quda::pushKernelPackT ( bool  pack)

◆ qChargeComputeKernel()

template<int blockSize, typename Float , typename Arg >
__global__ void quda::qChargeComputeKernel ( Arg  arg)

Definition at line 28 of file gauge_qcharge.cuh.

References arg(), getTrace(), parity, and Pi2.

Here is the call graph for this function:

◆ qudaDeviceSynchronize_()

cudaError_t quda::qudaDeviceSynchronize_ ( const char *  func,
const char *  file,
const char *  line 
)

Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize.

Definition at line 306 of file quda_cuda_api.cpp.

References errorQuda, PROFILE, QUDA_PROFILE_DEVICE_SYNCHRONIZE, and QUDA_PROFILE_FUNC_SET_ATTRIBUTE.

◆ qudaEventQuery()

cudaError_t quda::qudaEventQuery ( cudaEvent_t &  event)

Wrapper around cudaEventQuery or cuEventQuery.

Parameters
[in]eventEvent we are querying
Returns
Status of event query

Definition at line 209 of file quda_cuda_api.cpp.

References errorQuda, PROFILE, and QUDA_PROFILE_EVENT_QUERY.

Referenced by quda::blas::multiReduceLaunch(), quda::dslash::DslashBasic< Dslash >::operator()(), quda::dslash::DslashFusedExterior< Dslash >::operator()(), quda::dslash::DslashGDRRecv< Dslash >::operator()(), quda::dslash::DslashFusedGDRRecv< Dslash >::operator()(), and quda::blas::reduceLaunch().

Here is the caller graph for this function:

◆ qudaEventRecord()

cudaError_t quda::qudaEventRecord ( cudaEvent_t &  event,
cudaStream_t  stream = 0 
)

◆ qudaEventSynchronize()

cudaError_t quda::qudaEventSynchronize ( cudaEvent_t &  event)

Wrapper around cudaEventSynchronize or cuEventSynchronize.

Parameters
[in]eventEvent which we are synchronizing with respect to

Definition at line 287 of file quda_cuda_api.cpp.

References errorQuda, PROFILE, and QUDA_PROFILE_EVENT_SYNCHRONIZE.

Referenced by quda::cudaGaugeField::commsComplete().

Here is the caller graph for this function:

◆ qudaLaunchKernel()

cudaError_t quda::qudaLaunchKernel ( const void *  func,
dim3  gridDim,
dim3  blockDim,
void **  args,
size_t  sharedMem,
cudaStream_t  stream 
)

Wrapper around cudaLaunchKernel.

Parameters
[in]funcDevice function symbol
[in]gridDimGrid dimensions
[in]blockDimBlock dimensions
[in]argsArguments
[in]sharedMemShared memory requested per thread block
[in]streamStream identifier

Definition at line 201 of file quda_cuda_api.cpp.

References activeTuning(), errorQuda, PROFILE, and QUDA_PROFILE_LAUNCH_KERNEL.

Referenced by quda::Dslash< Float >::launch(), quda::Dslash5< Float, nColor, Arg >::launch(), and quda::Pack< Float, nColor, spin_project >::launch().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ qudaMemcpy2DAsync_()

void quda::qudaMemcpy2DAsync_ ( void *  dst,
size_t  dpitch,
const void *  src,
size_t  spitch,
size_t  width,
size_t  hieght,
cudaMemcpyKind  kind,
const cudaStream_t &  stream,
const char *  func,
const char *  file,
const char *  line 
)

Wrapper around cudaMemcpy2DAsync or driver API equivalent Potentially add auto-profiling support.

Parameters
[out]dstDestination pointer
[in]dpitchDestination pitch
[in]srcSource pointer
[in]spitchSource pitch
[in]widthWidth in bytes
[in]heightNumber of rows
[in]kindType of memory copy
[in]streamStream to issue copy

Definition at line 170 of file quda_cuda_api.cpp.

References quda::QudaMemCopy::dst, errorQuda, param, PROFILE, and QUDA_PROFILE_MEMCPY2D_D2H_ASYNC.

◆ qudaMemcpy_()

void quda::qudaMemcpy_ ( void *  dst,
const void *  src,
size_t  count,
cudaMemcpyKind  kind,
const char *  func,
const char *  file,
const char *  line 
)

Wrapper around cudaMemcpy used for auto-profiling. Do not call directly, rather call macro below which will grab the location of the call.

Parameters
[out]dstDestination pointer
[in]srcSource pointer
[in]countSize of transfer
[in]kindType of memory copy

Definition at line 126 of file quda_cuda_api.cpp.

References quda::QudaMemCopy::apply(), copy(), and errorQuda.

Here is the call graph for this function:

◆ qudaMemcpyAsync_()

void quda::qudaMemcpyAsync_ ( void *  dst,
const void *  src,
size_t  count,
cudaMemcpyKind  kind,
const cudaStream_t &  stream,
const char *  func,
const char *  file,
const char *  line 
)

Wrapper around cudaMemcpyAsync or driver API equivalent Potentially add auto-profiling support.

Parameters
[out]dstDestination pointer
[in]srcSource pointer
[in]countSize of transfer
[in]kindType of memory copy
[in]streamStream to issue copy

Definition at line 140 of file quda_cuda_api.cpp.

References quda::QudaMemCopy::apply(), copy(), errorQuda, PROFILE, QUDA_PROFILE_MEMCPY_D2D_ASYNC, QUDA_PROFILE_MEMCPY_D2H_ASYNC, and QUDA_PROFILE_MEMCPY_H2D_ASYNC.

Here is the call graph for this function:

◆ qudaStreamSynchronize()

cudaError_t quda::qudaStreamSynchronize ( cudaStream_t &  stream)

◆ qudaStreamWaitEvent()

cudaError_t quda::qudaStreamWaitEvent ( cudaStream_t  stream,
cudaEvent_t  event,
unsigned int  flags 
)

◆ r_slant()

constexpr const char* quda::r_slant ( const char *  str)
inline

Definition at line 49 of file malloc_quda.h.

Referenced by file_name().

Here is the caller graph for this function:

◆ random()

template<class T >
void quda::random ( T &  t)

Random number insertion over all field elements

Definition at line 14 of file color_spinor_util.cu.

References comm_drand(), parity, and s.

Referenced by genericSource().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ Random() [1/2]

template<class Real >
__device__ Real quda::Random ( cuRNGState state,
Real  a,
Real  b 
)
inline

Return a random number between a and b.

Parameters
statecurand rng state
alower range
bupper range
Returns
random number in range a,b

Definition at line 75 of file random_quda.h.

◆ Random() [2/2]

template<class Real >
__device__ Real quda::Random ( cuRNGState state)
inline

Return a random number between 0 and 1.

Parameters
statecurand rng state
Returns
random number in range 0,1

Definition at line 96 of file random_quda.h.

◆ Random< double >() [1/2]

template<>
__device__ double quda::Random< double > ( cuRNGState state,
double  a,
double  b 
)
inline

Definition at line 86 of file random_quda.h.

◆ Random< double >() [2/2]

template<>
__device__ double quda::Random< double > ( cuRNGState state)
inline

Definition at line 107 of file random_quda.h.

◆ Random< float >() [1/2]

template<>
__device__ float quda::Random< float > ( cuRNGState state,
float  a,
float  b 
)
inline

Definition at line 81 of file random_quda.h.

◆ Random< float >() [2/2]

template<>
__device__ float quda::Random< float > ( cuRNGState state)
inline

Definition at line 102 of file random_quda.h.

◆ reduce()

template<int block_size, typename T , bool do_sum = true, typename Reducer = cub::Sum>
__device__ void quda::reduce ( ReduceArg< T >  arg,
const T &  in,
const int  idx = 0 
)
inline

Definition at line 137 of file cub_helper.cuh.

References arg(), and in.

Referenced by quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply(), quda::blas::multiReduce(), and quda::blas::nativeReduce().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ reduce2d()

template<int block_size_x, int block_size_y, typename T , bool do_sum = true, typename Reducer = cub::Sum>
__device__ void quda::reduce2d ( ReduceArg< T >  arg,
const T &  in,
const int  idx = 0 
)
inline

Definition at line 94 of file cub_helper.cuh.

References quda::ReduceArg< T >::partial, quda::ReduceArg< T >::result_d, sum(), and zero().

Here is the call graph for this function:

◆ reduceRow()

template<int block_size_x, int block_size_y, typename T >
__device__ void quda::reduceRow ( ReduceArg< T >  arg,
const T &  in 
)
inline

◆ reliable()

int quda::reliable ( double &  rNorm,
double &  maxrx,
double &  maxrr,
const double &  r2,
const double &  delta 
)

Definition at line 37 of file inv_bicgstab_quda.cpp.

References sqrt(), and updateR().

Referenced by quda::BiCGstab::operator()(), and quda::MultiShiftCG::operator()().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ reorder_location()

QudaFieldLocation quda::reorder_location ( )

Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION.

Returns
Reorder location

Definition at line 725 of file lattice_field.cpp.

References reorder_location_.

Referenced by quda::cudaCloverField::copy(), quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), quda::cudaColorSpinorField::loadSpinorField(), quda::cudaGaugeField::saveCPUField(), and quda::cudaColorSpinorField::saveSpinorField().

Here is the caller graph for this function:

◆ reorder_location_set()

void quda::reorder_location_set ( QudaFieldLocation  reorder_location_)

Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION.

Parameters
reorder_location_The location to set where data will be reordered

Definition at line 726 of file lattice_field.cpp.

Referenced by initQudaDevice().

Here is the caller graph for this function:

◆ report()

static void quda::report ( const char *  type)
static

Definition at line 9 of file solver.cpp.

References getVerbosity(), printfQuda, and QUDA_VERBOSE.

Referenced by quda::Solver::create().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ rescaleY()

template<typename Float , int nSpin, int nColor, typename Arg >
__device__ __host__ void quda::rescaleY ( Arg arg,
int  parity,
int  x_cb,
int  c_row,
int  c_col 
)

Rescale the matrix elements by arg.rescale

Definition at line 1167 of file coarse_op_kernel.cuh.

◆ RescaleYCPU()

template<typename Float , int nSpin, int nColor, typename Arg >
void quda::RescaleYCPU ( Arg arg)

Definition at line 1181 of file coarse_op_kernel.cuh.

References arg(), nColor, and parity.

Here is the call graph for this function:

◆ RescaleYGPU()

template<typename Float , int nSpin, int nColor, typename Arg >
__global__ void quda::RescaleYGPU ( Arg  arg)

Definition at line 1195 of file coarse_op_kernel.cuh.

References arg(), nColor, and parity.

Here is the call graph for this function:

◆ Restrict() [1/2]

template<typename Float , int fineSpin, int fineColor, int coarseSpin, int coarseColor, int coarse_colors_per_thread, typename Arg >
void quda::Restrict ( Arg  arg)

Definition at line 90 of file restrictor.cuh.

References quda::Arg< real, Ns, Nc, order >::nParity, parity, s, and tmp.

Referenced by quda::Transfer::R(), and quda::Transfer::setTransferGPU().

Here is the caller graph for this function:

◆ Restrict() [2/2]

void quda::Restrict ( ColorSpinorField out,
const ColorSpinorField in,
const ColorSpinorField v,
int  Nvec,
const int *  fine_to_coarse,
const int *  coarse_to_fine,
const int *const *  spin_map,
int  parity = QUDA_INVALID_PARITY 
)

Apply the restriction operator.

Parameters
[out]outResulting coarsened field
[in]inInput field on fine grid
[in]vMatrix field containing the null-space components
[in]NvecNumber of null-space components
[in]fine_to_coarseFine-to-coarse lookup table (linear indices)
[in]spin_mapSpin blocking lookup table
[in]parityof the input fine field (if single parity input field)

Definition at line 263 of file restrictor.cu.

References checkPrecision, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Here is the call graph for this function:

◆ RestrictKernel()

template<int block_size, typename Float , int fineSpin, int fineColor, int coarseSpin, int coarseColor, int coarse_colors_per_thread, typename Arg >
__global__ void quda::RestrictKernel ( Arg  arg)

Here, we ensure that each thread block maps exactly to a geometric block. Each thread block corresponds to one geometric block, with number of threads equal to the number of fine grid points per aggregate, so each thread represents a fine-grid point. The look up table coarse_to_fine is the mapping to each fine grid point.

Definition at line 136 of file restrictor.cuh.

References quda::Arg< real, Ns, Nc, order >::nParity, parity, s, and tmp.

◆ rotateCoarseColor()

template<typename Float , int fineSpin, int fineColor, int coarseColor, int coarse_colors_per_thread, class FineColor , class Rotator >
__device__ __host__ void quda::rotateCoarseColor ( complex< Float >  out[fineSpin *coarse_colors_per_thread],
const FineColor &  in,
const Rotator &  V,
int  parity,
int  nParity,
int  x_cb,
int  coarse_color_block 
)
inline

Rotates from the fine-color basis into the coarse-color basis.

Definition at line 50 of file restrictor.cuh.

References conj(), in, out, s, and V.

Here is the call graph for this function:

◆ s2d() [1/2]

__host__ __device__ double quda::s2d ( short  a)
inline

Definition at line 35 of file convert.h.

Referenced by copyFloatN().

Here is the caller graph for this function:

◆ s2d() [2/2]

__host__ __device__ double quda::s2d ( short  a,
double  c 
)
inline

Definition at line 46 of file convert.h.

◆ s2f() [1/2]

__host__ __device__ float quda::s2f ( short  a)
inline

Definition at line 34 of file convert.h.

Referenced by copy(), copy_and_scale(), and copyFloatN().

Here is the caller graph for this function:

◆ s2f() [2/2]

__host__ __device__ float quda::s2f ( short  a,
float  c 
)
inline

Definition at line 42 of file convert.h.

◆ safe_malloc_()

void * quda::safe_malloc_ ( const char *  func,
const char *  file,
int  line,
size_t  size 
)

Perform a standard malloc() with error-checking. This function should only be called via the safe_malloc() macro, defined in malloc_quda.h

Definition at line 226 of file malloc.cpp.

References quda::MemAlloc::base_size, errorQuda, HOST, memset(), quda::MemAlloc::size, and track_malloc().

Here is the call graph for this function:

◆ saveProfile()

void quda::saveProfile ( const std::string  label = "")

Save profile to disk.

Definition at line 514 of file tune.cpp.

References quda::TuneKey::aux_n, comm_rank(), count, getVerbosity(), gitversion, launchTimer, quda::TuneParam::n_calls, param, printfQuda, QUDA_SUMMARIZE, quda_version, serializeProfile(), serializeTrace(), quda::TraceKey::time, tmp, traceEnabled(), and warningQuda.

Referenced by endQuda(), newDeflationQuda(), and quda::TunableVectorYZ::resizeStep().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ saveTuneCache()

void quda::saveTuneCache ( bool  error)

◆ serializeProfile()

static void quda::serializeProfile ( std::ostream &  out,
std::ostream &  async_out 
)
static

Serialize tunecache to an ostream, useful for writing to a file or sending to other nodes.

Definition at line 199 of file tune.cpp.

References quda::TuneKey::aux, quda::TuneKey::aux_n, quda::TuneParam::comment, quda::TraceKey::key, quda::TuneParam::n_calls, quda::TuneKey::name, param, quda::TuneParam::time, quda::TraceKey::time, tmp, and quda::TuneKey::volume.

Referenced by saveProfile().

Here is the caller graph for this function:

◆ serializeTrace()

static void quda::serializeTrace ( std::ostream &  out)
static

Serialize trace to an ostream, useful for writing to a file or sending to other nodes.

Definition at line 261 of file tune.cpp.

References quda::TuneKey::aux, quda::TuneKey::aux_n, quda::TraceKey::key, quda::TuneKey::name, tmp, and quda::TuneKey::volume.

Referenced by saveProfile().

Here is the caller graph for this function:

◆ serializeTuneCache()

static void quda::serializeTuneCache ( std::ostream &  out)
static

Serialize tunecache to an ostream, useful for writing to a file or sending to other nodes.

Definition at line 172 of file tune.cpp.

References quda::TuneKey::aux, quda::TuneParam::aux, quda::TuneParam::block, quda::TuneParam::comment, quda::TuneParam::grid, quda::TraceKey::key, quda::TuneKey::name, param, quda::TuneParam::shared_bytes, quda::TuneParam::time, and quda::TuneKey::volume.

Referenced by broadcastTuneCache(), and saveTuneCache().

Here is the caller graph for this function:

◆ set() [1/4]

__host__ __device__ double quda::set ( double &  x)
inline

Definition at line 58 of file blas_helper.cuh.

Referenced by Spinor< RegType, StoreType, N, write >::set().

Here is the caller graph for this function:

◆ set() [2/4]

__host__ __device__ double2 quda::set ( double2 &  x)
inline

Definition at line 59 of file blas_helper.cuh.

◆ set() [3/4]

__host__ __device__ double3 quda::set ( double3 &  x)
inline

Definition at line 60 of file blas_helper.cuh.

◆ set() [4/4]

__host__ __device__ double4 quda::set ( double4 &  x)
inline

Definition at line 61 of file blas_helper.cuh.

◆ setDiracParam()

void quda::setDiracParam ( DiracParam diracParam,
QudaInvertParam inv_param,
bool  pc 
)

Definition at line 1562 of file interface_quda.cpp.

References quda::GaugeField::Anisotropy(), quda::DiracParam::b_5, QudaInvertParam_s::b_5, quda::DiracParam::c_5, QudaInvertParam_s::c_5, quda::DiracParam::clover, cloverPrecise, quda::DiracParam::commDim, QudaInvertParam_s::cuda_prec, quda::DiracParam::dagger, QudaInvertParam_s::dagger, QudaInvertParam_s::dirac_order, QudaInvertParam_s::dslash_type, quda::DiracParam::epsilon, QudaInvertParam_s::epsilon, errorQuda, quda::DiracParam::fatGauge, quda::DiracParam::gauge, gaugeFatPrecise, gaugeLongPrecise, gaugePrecise, getVerbosity(), quda::DiracParam::kappa, kappa, QudaInvertParam_s::kappa, quda::DiracParam::laplace3D, QudaInvertParam_s::laplace3D, quda::DiracParam::longGauge, quda::DiracParam::Ls, QudaInvertParam_s::Ls, quda::DiracParam::m5, QudaInvertParam_s::m5, quda::DiracParam::mass, QudaInvertParam_s::mass, QudaInvertParam_s::matpc_type, quda::DiracParam::matpcType, quda::DiracParam::mu, QudaInvertParam_s::mu, quda::LatticeField::Precision(), printfQuda, QUDA_ASQTAD_DIRAC, QUDA_ASQTAD_DSLASH, QUDA_ASQTADPC_DIRAC, QUDA_CLOVER_DIRAC, QUDA_CLOVER_WILSON_DSLASH, QUDA_CLOVERPC_DIRAC, QUDA_COVDEV_DSLASH, QUDA_CPS_WILSON_DIRAC_ORDER, QUDA_DEBUG_VERBOSE, QUDA_DOMAIN_WALL_4D_DIRAC, QUDA_DOMAIN_WALL_4D_DSLASH, QUDA_DOMAIN_WALL_4DPC_DIRAC, QUDA_DOMAIN_WALL_DIRAC, QUDA_DOMAIN_WALL_DSLASH, QUDA_DOMAIN_WALLPC_DIRAC, QUDA_GAUGE_COVDEV_DIRAC, QUDA_GAUGE_LAPLACE_DIRAC, QUDA_GAUGE_LAPLACEPC_DIRAC, QUDA_LAPLACE_DSLASH, QUDA_MAX_DWF_LS, QUDA_MOBIUS_DOMAIN_WALL_DIRAC, QUDA_MOBIUS_DOMAIN_WALLPC_DIRAC, QUDA_MOBIUS_DWF_DSLASH, QUDA_STAGGERED_DIRAC, QUDA_STAGGERED_DSLASH, QUDA_STAGGEREDPC_DIRAC, QUDA_TWIST_NONDEG_DOUBLET, QUDA_TWIST_SINGLET, QUDA_TWISTED_CLOVER_DIRAC, QUDA_TWISTED_CLOVER_DSLASH, QUDA_TWISTED_CLOVERPC_DIRAC, QUDA_TWISTED_MASS_DIRAC, QUDA_TWISTED_MASS_DSLASH, QUDA_TWISTED_MASSPC_DIRAC, QUDA_WILSON_DIRAC, QUDA_WILSON_DSLASH, QUDA_WILSONPC_DIRAC, QudaInvertParam_s::twist_flavor, and quda::DiracParam::type.

Referenced by cloverQuda(), computeCloverForceQuda(), computeStaggeredForceQuda(), createDirac(), quda::deflated_solver::deflated_solver(), dslashQuda(), dslashQuda_4dpc(), dslashQuda_mdwf(), init(), MatDagMatQuda(), MatQuda(), quda::DiracParam::print(), setDiracPreParam(), setDiracRefineParam(), and setDiracSloppyParam().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ setDiracPreParam()

void quda::setDiracPreParam ( DiracParam diracParam,
QudaInvertParam inv_param,
const bool  pc,
bool  comms 
)

◆ setDiracRefineParam()

void quda::setDiracRefineParam ( DiracParam diracParam,
QudaInvertParam inv_param,
const bool  pc 
)

◆ setDiracSloppyParam()

void quda::setDiracSloppyParam ( DiracParam diracParam,
QudaInvertParam inv_param,
bool  pc 
)

◆ setIdentity() [1/3]

template<class T , int N>
__device__ __host__ void quda::setIdentity ( Matrix< T, N > *  m)
inline

Definition at line 653 of file quda_matrix.h.

Referenced by bdSVD(), computeAPEStep(), computeGenGauss(), computeOvrImpSTOUTStep(), computeSTOUTStep(), constructHHMat(), exponentiate_iQ(), getRealBidiagMatrix(), and smallSVD().

Here is the caller graph for this function:

◆ setIdentity() [2/3]

template<int N>
__device__ __host__ void quda::setIdentity ( Matrix< float2, N > *  m)
inline

Definition at line 669 of file quda_matrix.h.

◆ setIdentity() [3/3]

template<int N>
__device__ __host__ void quda::setIdentity ( Matrix< double2, N > *  m)
inline

Definition at line 685 of file quda_matrix.h.

◆ setKernelPackT()

void quda::setKernelPackT ( bool  pack)
Parameters
packSets whether to use a kernel to pack the T dimension

Definition at line 24 of file dslash_quda.cu.

Referenced by quda::dslash::DslashPolicyTune< Dslash >::apply(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), eigensolveQuda(), popKernelPackT(), pushKernelPackT(), and set_kernel_pack_t_().

Here is the caller graph for this function:

◆ setPackComms()

void quda::setPackComms ( const int *  dim_pack)

Helper function that sets which dimensions the packing kernel should be packing for.

Parameters
[in]dim_packArray that specifies which dimenstions need to be packed.

Definition at line 14 of file dslash_pack2.cu.

References commDim, and QUDA_MAX_DIM.

Referenced by quda::Dslash< Float >::Dslash(), and DslashCuda::DslashCuda().

Here is the caller graph for this function:

◆ setPolicyTuning()

void quda::setPolicyTuning ( bool  policy_tuning_)

◆ setTransferGPU()

void quda::setTransferGPU ( bool  )

◆ setUnitarizeLinksConstants()

void quda::setUnitarizeLinksConstants ( double  unitarize_eps,
double  max_error,
bool  allow_svd,
bool  svd_only,
double  svd_rel_error,
double  svd_abs_error 
)

Definition at line 72 of file unitarize_links_quda.cu.

References acos(), arg(), conj(), cos(), epsilon, errorQuda, getDeterminant(), getTrace(), in, inverse(), s, size, and sqrt().

Referenced by computeKSLinkQuda(), hisq_test(), setActionPaths(), GaugeAlgTest::SetReunitarizationConsts(), setReunitarizationConsts(), and unitarize_link_test().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ setZero() [1/3]

template<class T , int N>
__device__ __host__ void quda::setZero ( Matrix< T, N > *  m)
inline

Definition at line 702 of file quda_matrix.h.

Referenced by computeStaple(), computeStapleRectangle(), exponentiate_iQ(), and quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg().

Here is the caller graph for this function:

◆ setZero() [2/3]

template<int N>
__device__ __host__ void quda::setZero ( Matrix< float2, N > *  m)
inline

Definition at line 717 of file quda_matrix.h.

◆ setZero() [3/3]

template<int N>
__device__ __host__ void quda::setZero ( Matrix< double2, N > *  m)
inline

Definition at line 732 of file quda_matrix.h.

◆ shiftColorSpinorField()

void quda::shiftColorSpinorField ( cudaColorSpinorField dst,
const cudaColorSpinorField src,
const unsigned int  parity,
const unsigned int  dim,
const int  shift 
)

◆ shiftColorSpinorFieldExternalKernel()

template<typename FloatN , int N, typename Output , typename Input >
__global__ void quda::shiftColorSpinorFieldExternalKernel ( ShiftQuarkArg< Output, Input >  arg)

Definition at line 93 of file shift_quark_field.cu.

◆ shiftColorSpinorFieldKernel()

template<typename FloatN , int N, typename Output , typename Input >
__global__ void quda::shiftColorSpinorFieldKernel ( ShiftQuarkArg< Output, Input >  arg)

Definition at line 68 of file shift_quark_field.cu.

References neighborIndex(), and quda::ShiftColorSpinorFieldArg< Output, Input >::shift.

Here is the call graph for this function:

◆ sigmaOprod()

template<typename real , int nvector, int mu, int nu, int parity, typename Arg >
__device__ void quda::sigmaOprod ( Arg arg,
int  idx 
)
inline

◆ sigmaOprodKernel()

template<int nvector, typename real , typename Arg >
__global__ void quda::sigmaOprodKernel ( Arg  arg)

Definition at line 66 of file clover_sigma_outer_product.cuh.

References arg(), and parity.

Here is the call graph for this function:

◆ sin() [1/4]

template<typename ValueType >
__host__ __device__ ValueType quda::sin ( ValueType  x)
inline

Definition at line 51 of file complex_quda.h.

References sin().

Referenced by cos(), cosh(), exponentiate_iQ(), genericSource(), genGauss(), link_sanity_check_internal_8(), new_load_half(), polar(), sin(), quda::Trig< isFixed, T >::Sin(), sinh(), su3Reconstruct8(), and tan().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sin() [2/4]

template<class P >
void quda::sin ( P &  p,
int  d,
int  n,
int  offset 
)

Insert a sinusoidal wave sin ( n * (x[d] / X[d]) * pi ) in dimension d

Definition at line 56 of file color_spinor_util.cu.

References getCoords(), parity, s, sin(), and X.

Here is the call graph for this function:

◆ sin() [3/4]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::sin ( const complex< ValueType > &  z)
inline

Definition at line 1214 of file complex_quda.h.

References cos(), cosh(), sin(), and sinh().

Here is the call graph for this function:

◆ sin() [4/4]

template<>
__host__ __device__ complex<float> quda::sin ( const complex< float > &  z)
inline

Definition at line 1222 of file complex_quda.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Referenced by sin().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sinh() [1/3]

template<typename ValueType >
__host__ __device__ ValueType quda::sinh ( ValueType  x)
inline

Definition at line 86 of file complex_quda.h.

References sinh().

Referenced by cos(), cosh(), sin(), and sinh().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sinh() [2/3]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::sinh ( const complex< ValueType > &  z)
inline

Definition at line 1230 of file complex_quda.h.

References cos(), cosh(), sin(), and sinh().

Here is the call graph for this function:

◆ sinh() [3/3]

template<>
__host__ __device__ complex<float> quda::sinh ( const complex< float > &  z)
inline

Definition at line 1238 of file complex_quda.h.

References quda::complex< float >::imag(), and quda::complex< float >::real().

Referenced by sinh().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ siteChecksum()

template<typename Arg >
__device__ __host__ uint64_t quda::siteChecksum ( const Arg arg,
int  d,
int  parity,
int  x_cb 
)
inline

Definition at line 17 of file checksum.cu.

References quda::Matrix< T, N >::checksum(), and nColor.

Referenced by ChecksumCPU().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ spinorNoise() [1/2]

void quda::spinorNoise ( ColorSpinorField in,
RNG rngstate,
QudaNoiseType  type 
)

◆ spinorNoise() [2/2]

void quda::spinorNoise ( ColorSpinorField src,
unsigned long long  seed,
QudaNoiseType  type 
)

Generate a random noise spinor. This variant just requires a seed and will create and destroy the random number state.

Parameters
srcThe colorspinorfield
seedSeed
typeThe type of noise to create (QUDA_NOISE_GAUSSIAN or QUDA_NOISE_UNIFORM)

Definition at line 210 of file spinor_noise.cu.

References quda::RNG::Init(), quda::RNG::Release(), and spinorNoise().

Here is the call graph for this function:

◆ SpinorNoiseCPU()

template<typename real , int Ns, int Nc, QudaNoiseType type, typename Arg >
void quda::SpinorNoiseCPU ( Arg arg)

CPU function to reorder spinor fields.

Definition at line 47 of file spinor_noise.cu.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, QUDA_NOISE_GAUSS, QUDA_NOISE_UNIFORM, quda::Arg< real, Ns, Nc, order >::rng, s, quda::RNG::State(), and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ SpinorNoiseGPU()

template<typename real , int Ns, int Nc, QudaNoiseType type, typename Arg >
__global__ void quda::SpinorNoiseGPU ( Arg  arg)

CUDA kernel to reorder spinor fields. Adopts a similar form as the CPU version, using the same inlined functions.

Definition at line 68 of file spinor_noise.cu.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, QUDA_NOISE_GAUSS, QUDA_NOISE_UNIFORM, quda::Arg< real, Ns, Nc, order >::rng, s, quda::RNG::State(), and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ sqrt() [1/3]

template<typename ValueType >
__host__ __device__ ValueType quda::sqrt ( ValueType  x)
inline

Definition at line 120 of file complex_quda.h.

References sqrt().

Referenced by acosh(), asinh(), quda::CG::blocksolve(), quda::MG::buildFreeVectors(), cabs(), calculateY(), quda::linalg::Cholesky< Mat, T, N, fast >::Cholesky(), quda::EigenSolver::computeEvals(), quda::EigenSolver::computeSVD(), quda::IncEigCG::eigCGsolve(), exponentiate_iQ(), expsu3(), quda::GMResDR::FlexArnoldiProcedure(), gauss_su3(), quda::MG::generateNullVectors(), genGauss(), quda::Deflation::increment(), invert_test(), invertMultiShiftQuda(), invertMultiSrcQuda(), invertQuda(), l2(), quda::Matrix< T, N >::L2(), quda::TRLM::lanczosStep(), link_sanity_check_internal_8(), main(), new_save_half(), quda::blas::norm1_(), normalize(), quda::Deflation::operator()(), quda::TRLM::operator()(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::CGNE::operator()(), quda::CGNR::operator()(), quda::MPCG::operator()(), quda::PreconCG::operator()(), quda::BiCGstab::operator()(), quda::SimpleBiCGstab::operator()(), quda::MPBiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::CACG::operator()(), quda::CACGNE::operator()(), quda::CACGNR::operator()(), quda::CAGCR::operator()(), quda::SD::operator()(), quda::MultiShiftCG::operator()(), quda::MinResExt::operator()(), quda::IncEigCG::operator()(), quda::GMResDR::operator()(), quda::Solver::PrintStats(), quda::Solver::PrintSummary(), quadSum(), quda::Deflation::reduce(), reliable(), quda::BiCGstabL::reliable(), quda::CACG::reliable(), quda::GMResDR::RestartVZH(), setUnitarizeLinksConstants(), sqrt(), su3Reconstruct8(), test(), quda::Deflation::verify(), and quda::MG::verify().

Here is the call graph for this function:

◆ sqrt() [2/3]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::sqrt ( const complex< ValueType > &  z)
inline

Definition at line 1246 of file complex_quda.h.

References abs(), arg(), polar(), and sqrt().

Here is the call graph for this function:

◆ sqrt() [3/3]

template<>
__host__ __device__ complex<float> quda::sqrt ( const complex< float > &  z)
inline

Definition at line 1252 of file complex_quda.h.

References abs(), arg(), and polar().

Referenced by sqrt().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ staggered()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void quda::staggered ( Arg arg,
int  idx,
int  parity 
)
inline

◆ staggeredGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void quda::staggeredGPU ( Arg  arg)

Definition at line 197 of file dslash_staggered.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ StaggeredPhase()

template<typename Arg >
__device__ __host__ auto quda::StaggeredPhase ( const int  coords[],
int  dim,
int  dir,
const Arg arg 
) -> typename Arg::real
inline

Compute the staggered phase factor at unit shift from the current lattice coordinates. The routine below optimizes out the shift where possible, hence is only visible where we need to consider the boundary condition.

Parameters
[in]coordsLattice coordinates
[in]XLattice dimensions
[in]dimDimension we are hopping
[in]dirDirection of the unit hop (+1 or -1)
[in]tboundaryBoundary condition

Definition at line 868 of file index_helper.cuh.

References arg(), QUDA_STAGGERED_PHASE_MILC, QUDA_STAGGERED_PHASE_TIFR, and X.

Referenced by applyStaggered().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ store_streaming_double2()

__device__ void quda::store_streaming_double2 ( double2 *  addr,
double  x,
double  y 
)
inline

Definition at line 88 of file inline_ptx.h.

References __PTR.

Referenced by vector_store().

Here is the caller graph for this function:

◆ store_streaming_float2()

__device__ void quda::store_streaming_float2 ( float2 *  addr,
float  x,
float  y 
)
inline

Definition at line 93 of file inline_ptx.h.

References __PTR.

Referenced by vector_store().

Here is the caller graph for this function:

◆ store_streaming_float4()

__device__ void quda::store_streaming_float4 ( float4 *  addr,
float  x,
float  y,
float  z,
float  w 
)
inline

Definition at line 78 of file inline_ptx.h.

References __PTR.

Referenced by vector_store().

Here is the caller graph for this function:

◆ store_streaming_short2()

__device__ void quda::store_streaming_short2 ( short2 *  addr,
short  x,
short  y 
)
inline

Definition at line 98 of file inline_ptx.h.

References __PTR.

Referenced by vector_store().

Here is the caller graph for this function:

◆ store_streaming_short4()

__device__ void quda::store_streaming_short4 ( short4 *  addr,
short  x,
short  y,
short  z,
short  w 
)
inline

Definition at line 83 of file inline_ptx.h.

References __PTR.

Referenced by vector_store().

Here is the caller graph for this function:

◆ STOUTStep()

void quda::STOUTStep ( GaugeField dataDs,
const GaugeField dataOr,
double  rho 
)

Apply STOUT smearing to the gauge field.

Parameters
[out]dataDsOutput smeared field
[in]dataOrInput gauge field
[in]rhosmearing parameter

Definition at line 129 of file gauge_stout.cu.

References errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().

Referenced by performSTOUTnStep().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ str_end()

constexpr const char* quda::str_end ( const char *  str)
inline

Definition at line 47 of file malloc_quda.h.

Referenced by file_name().

Here is the caller graph for this function:

◆ str_slant()

constexpr bool quda::str_slant ( const char *  str)
inline

Definition at line 48 of file malloc_quda.h.

Referenced by file_name().

Here is the caller graph for this function:

◆ SubTraceUnit()

template<class T >
__device__ __host__ void quda::SubTraceUnit ( Matrix< T, 3 > &  a)
inline

Definition at line 1125 of file quda_matrix.h.

◆ sum() [1/4]

__host__ __device__ void quda::sum ( double &  a,
double &  b 
)
inline

Definition at line 62 of file blas_helper.cuh.

Referenced by quda::EigenSolver::blockOrthogonalize(), quda::blas::cdotNormA_(), quda::blas::cdotNormB_(), dslashReference(), dslashReference_4d_sgpu(), dslashReference_5th(), quda::blas::Norm2< ReduceType, Float2, FloatN >::flops(), getRealTraceUVdagger(), quda::blas::multiReduceKernel(), quda::blas::multiReduceLaunch(), quda::blas::Norm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::Dot< NXZ, ReduceType, Float2, FloatN >::operator()(), quda::reduce_vector< T >::operator()(), quda::blas::axpbyzNorm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::AxpyReDot< ReduceType, Float2, FloatN >::operator()(), quda::blas::Cdot< NXZ, ReduceType, Float2, FloatN >::operator()(), quda::blas::CdotCopy< NXZ, ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpyNorm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpyxmaznormx< ReduceType, Float2, FloatN >::operator()(), quda::blas::cabxpyzaxnorm< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpydotzy< ReduceType, Float2, FloatN >::operator()(), quda::blas::CdotNormA< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpbypzYmbwcDotProductUYNormY_< ReduceType, Float2, FloatN >::operator()(), quda::blas::quadrupleCG3InitNorm_< ReduceType, Float2, FloatN >::operator()(), quda::blas::quadrupleCG3UpdateNorm_< ReduceType, Float2, FloatN >::operator()(), quda::blas::doubleCG3InitNorm_< ReduceType, Float2, FloatN >::operator()(), quda::blas::ReduceFunctor< ReduceType, Float2, FloatN >::pre(), quda::blas::MultiReduceFunctor< NXZ, ReduceType, Float2, FloatN >::pre(), reduce2d(), quda::blas::reduceKernel(), quda::blas::reduceLaunch(), reduceRow(), and sum().

Here is the caller graph for this function:

◆ sum() [2/4]

__host__ __device__ void quda::sum ( double2 &  a,
double2 &  b 
)
inline

Definition at line 63 of file blas_helper.cuh.

◆ sum() [3/4]

__host__ __device__ void quda::sum ( double3 &  a,
double3 &  b 
)
inline

Definition at line 68 of file blas_helper.cuh.

◆ sum() [4/4]

__host__ __device__ void quda::sum ( double4 &  a,
double4 &  b 
)
inline

Definition at line 74 of file blas_helper.cuh.

References doubledouble::head(), sum(), doubledouble2::x, doubledouble3::x, doubledouble2::y, doubledouble3::y, and doubledouble3::z.

Here is the call graph for this function:

◆ tan() [1/2]

template<typename ValueType >
__host__ __device__ ValueType quda::tan ( ValueType  x)
inline

Definition at line 56 of file complex_quda.h.

References tan().

Here is the call graph for this function:

◆ tan() [2/2]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::tan ( const complex< ValueType > &  z)
inline

Definition at line 1258 of file complex_quda.h.

References cos(), and sin().

Referenced by tan().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ tanh() [1/2]

template<typename ValueType >
__host__ __device__ ValueType quda::tanh ( ValueType  x)
inline

Definition at line 91 of file complex_quda.h.

References tanh().

Here is the call graph for this function:

◆ tanh() [2/2]

template<typename ValueType >
__host__ __device__ complex< ValueType > quda::tanh ( const complex< ValueType > &  z)
inline

Definition at line 1264 of file complex_quda.h.

References exp().

Referenced by tanh().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ timeInterval()

double quda::timeInterval ( struct timeval  start,
struct timeval  end 
)

Definition at line 18 of file inv_gcr_quda.cpp.

◆ traceEnabled()

int quda::traceEnabled ( )

Definition at line 73 of file tune.cpp.

References enable_trace, and quda::cublas::init().

Referenced by postTrace_(), saveProfile(), and tuneLaunch().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ track_free()

static void quda::track_free ( const AllocType type,
void *  ptr 
)
static

Definition at line 121 of file malloc.cpp.

References DEVICE, DEVICE_PINNED, MAPPED, PINNED, and quda::MemAlloc::size.

Referenced by device_free_(), device_pinned_free_(), and host_free_().

Here is the caller graph for this function:

◆ track_malloc()

static void quda::track_malloc ( const AllocType type,
const MemAlloc a,
void *  ptr 
)
static

Definition at line 99 of file malloc.cpp.

References quda::MemAlloc::base_size, DEVICE, DEVICE_PINNED, MAPPED, PINNED, total_host_bytes, and total_pinned_bytes.

Referenced by device_malloc_(), device_pinned_malloc_(), mapped_malloc_(), pinned_malloc_(), and safe_malloc_().

Here is the caller graph for this function:

◆ tuneLaunch()

TuneParam & quda::tuneLaunch ( Tunable tunable,
QudaTune  enabled,
QudaVerbosity  verbosity 
)

Return the optimal launch parameters for a given kernel, either by retrieving them from tunecache or autotuning on the spot.

Definition at line 643 of file tune.cpp.

References quda::Tunable::advanceTuneParam(), quda::Tunable::apply(), quda::TuneKey::aux, quda::TuneParam::aux, quda::TuneParam::block, broadcastTuneCache(), quda::Tunable::checkLaunchParam(), comm_rank(), quda::TuneParam::comment, commGlobalReduction(), quda::Tunable::defaultTuneParam(), quda::blas::end(), errorQuda, quda::TuneParam::grid, quda::Tunable::initTuneParam(), quda::Tunable::jitifyError(), quda::TraceKey::key, quda::Timer::Last(), last_key, quda::TuneParam::n_calls, quda::TuneKey::name, param, quda::Tunable::paramString(), quda::Tunable::perfString(), policyTuning(), quda::Tunable::postTune(), quda::Tunable::preTune(), printfQuda, QUDA_DEBUG_VERBOSE, QUDA_PROFILE_COMPUTE, QUDA_PROFILE_EPILOGUE, QUDA_PROFILE_INIT, QUDA_PROFILE_PREAMBLE, QUDA_PROFILE_TOTAL, QUDA_TUNE_NO, QUDA_TUNE_YES, QUDA_VERBOSE, quda::TuneParam::shared_bytes, quda::Timer::Start(), quda::Timer::Stop(), quda::TuneParam::time, quda::TraceKey::time, traceEnabled(), quda::Tunable::tuneKey(), quda::Tunable::tuningIter(), and quda::TuneKey::volume.

Referenced by quda::GaugePlaq< Float, Gauge >::apply(), quda::DomainWall5D< Float, nDim, nColor, Arg >::apply(), quda::Staggered< Float, nDim, nColor, Arg >::apply(), quda::TwistedMass< Float, nDim, nColor, Arg >::apply(), quda::TwistedClover< Float, nDim, nColor, Arg >::apply(), quda::WilsonClover< Float, nDim, nColor, Arg >::apply(), quda::NdegTwistedMass< Float, nDim, nColor, Arg >::apply(), quda::DomainWall4D< Float, nDim, nColor, Arg >::apply(), quda::TwistedCloverPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::WilsonCloverPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::Wilson< Float, nDim, nColor, Arg >::apply(), quda::TwistedMassPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::Laplace< Float, nDim, nColor, Arg >::apply(), quda::NdegTwistedMassPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::CopySpinor< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder >::apply(), quda::QudaMemCopy::apply(), quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >::apply(), quda::CopyGauge< FloatOut, FloatIn, length, Arg >::apply(), quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::apply(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::apply(), quda::SpinorNoise< real, Ns, Nc, type, Arg >::apply(), quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), quda::GaugeGauss< Float, Arg >::apply(), quda::CopyGaugeEx< FloatOut, FloatIn, length, OutOrder, InOrder >::apply(), quda::Dslash5< Float, nColor, Arg >::apply(), quda::ShiftColorSpinorField< Output, Input >::apply(), quda::WuppertalSmearing< Float, Ns, Nc, Arg >::apply(), quda::GaugeOvrImpSTOUT< Float, Arg >::apply(), quda::CopyColorSpinor< Ns, Arg >::apply(), quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::apply(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply(), quda::Pack< Float, nColor, spin_project >::apply(), quda::ExtractGhost< nDim, Arg >::apply(), quda::ExtractGhostEx< Float, length, nDim, dim, Order >::apply(), quda::CopyColorSpinor< 4, Arg >::apply(), quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >::apply(), quda::CopySpinorEx< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis, extend >::apply(), quda::Gamma< ValueType, basis, dir >::apply(), quda::TwistGamma< Float, nColor, Arg >::apply(), quda::ProjectSU3< Float, G >::apply(), quda::Clover< Float, nSpin, nColor, Arg >::apply(), quda::DslashCoarsePolicyTune::apply(), quda::TwistClover< Float, nSpin, nColor, Arg >::apply(), quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), quda::dslash::DslashPolicyTune< Dslash >::apply(), computeMomAction(), forceRecord(), isUnitary(), quda::TunableVectorYZ::resizeStep(), and updateMomentum().

Here is the call graph for this function:

◆ twistCloverApply()

template<bool inverse, typename Float , int nSpin, int nColor, typename Arg >
__device__ __host__ void quda::twistCloverApply ( Arg arg,
int  x_cb,
int  parity 
)
inline

Definition at line 665 of file dslash_quda.cu.

References Mat(), nColor, quda::Arg< real, Ns, Nc, order >::nParity, out, and parity.

Here is the call graph for this function:

◆ twistCloverCPU()

template<bool inverse, typename Float , int nSpin, int nColor, typename Arg >
void quda::twistCloverCPU ( Arg arg)

◆ twistCloverGPU()

template<bool inverse, typename Float , int nSpin, int nColor, typename Arg >
__global__ void quda::twistCloverGPU ( Arg  arg)

Definition at line 717 of file dslash_quda.cu.

References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

Here is the call graph for this function:

◆ twistedClover()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void quda::twistedClover ( Arg arg,
int  idx,
int  parity 
)
inline

◆ twistedCloverPreconditionedCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void quda::twistedCloverPreconditionedCPU ( Arg  arg)

Definition at line 109 of file dslash_twisted_clover_preconditioned.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ twistedCloverPreconditionedGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void quda::twistedCloverPreconditionedGPU ( Arg  arg)

Definition at line 124 of file dslash_twisted_clover_preconditioned.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Referenced by quda::TwistedCloverPreconditionedLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ twistedMass() [1/2]

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
__device__ __host__ void quda::twistedMass ( Arg arg,
int  idx,
int  parity 
)
inline

Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5)*x Note this routine only exists in xpay form.

Definition at line 29 of file dslash_twisted_mass.cuh.

References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, and quda::WilsonArg< Float, nColor, reconstruct_ >::x.

Here is the call graph for this function:

◆ twistedMass() [2/2]

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool asymmetric, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void quda::twistedMass ( Arg arg,
int  idx,
int  parity 
)
inline

Apply the preconditioned twisted-mass dslash.

  • no xpay: out(x) = M*in = a*(1+i*b*gamma_5)D * in
  • with xpay: out(x) = M*in = x + a*(1+i*b*gamma_5)D * in

Definition at line 146 of file dslash_twisted_mass_preconditioned.cuh.

References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, and quda::WilsonArg< Float, nColor, reconstruct_ >::x.

Here is the call graph for this function:

◆ twistedMassCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
void quda::twistedMassCPU ( Arg  arg)

Definition at line 62 of file dslash_twisted_mass.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ twistedMassGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void quda::twistedMassGPU ( Arg  arg)

Definition at line 76 of file dslash_twisted_mass.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ twistedMassPreconditionedCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void quda::twistedMassPreconditionedCPU ( Arg  arg)

Definition at line 191 of file dslash_twisted_mass_preconditioned.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ twistedMassPreconditionedGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void quda::twistedMassPreconditionedGPU ( Arg  arg)

Definition at line 217 of file dslash_twisted_mass_preconditioned.cuh.

References arg(), and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ twistGammaCPU()

template<bool doublet, typename Float , int nColor, typename Arg >
void quda::twistGammaCPU ( Arg  arg)

◆ twistGammaGPU()

template<bool doublet, typename Float , int nColor, int d, typename Arg >
__global__ void quda::twistGammaGPU ( Arg  arg)

Definition at line 353 of file dslash_quda.cu.

References parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.

◆ u32toa()

void quda::u32toa ( char *  buffer,
uint32_t  value 
)
inline

Definition at line 45 of file uint_to_char.h.

Referenced by i32toa(), and quda::Laplace< Float, nDim, nColor, Arg >::tuneKey().

Here is the caller graph for this function:

◆ u64toa()

void quda::u64toa ( char *  buffer,
uint64_t  value 
)
inline

◆ unitarizeLinks() [1/2]

void quda::unitarizeLinks ( cudaGaugeField outfield,
const cudaGaugeField infield,
int *  fails 
)

Definition at line 500 of file unitarize_links_quda.cu.

References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by GaugeAlgTest::CallUnitarizeLinks(), CallUnitarizeLinks(), computeKSLinkQuda(), isUnitary(), unitarize_link_test(), and unitarizeLinks().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ unitarizeLinks() [2/2]

void quda::unitarizeLinks ( cudaGaugeField outfield,
int *  fails 
)

Definition at line 517 of file unitarize_links_quda.cu.

References unitarizeLinks().

Here is the call graph for this function:

◆ unitarizeLinksCPU()

void quda::unitarizeLinksCPU ( cpuGaugeField outfield,
const cpuGaugeField infield 
)

Definition at line 299 of file unitarize_links_quda.cu.

References copyArrayToLink(), copyLinkToArray(), errorQuda, quda::cpuGaugeField::Gauge_p(), num_failures, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and quda::LatticeField::Volume().

Referenced by computeHISQLinksCPU(), and TEST().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ updateAlphaZeta()

void quda::updateAlphaZeta ( double *  alpha,
double *  zeta,
double *  zeta_old,
const double *  r2,
const double *  beta,
const double  pAp,
const double *  offset,
const int  nShift,
const int  j_low 
)

Compute the new values of alpha and zeta

Definition at line 128 of file inv_multi_cg_quda.cpp.

References QUDA_MAX_MULTI_SHIFT.

Referenced by quda::MultiShiftCG::operator()().

Here is the caller graph for this function:

◆ updateAp()

void quda::updateAp ( Complex **  beta,
std::vector< ColorSpinorField *>  Ap,
int  begin,
int  size,
int  k 
)

Definition at line 82 of file inv_gcr_quda.cpp.

References quda::blas::caxpy(), and size.

Referenced by orthoDir().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ updateGaugeField()

void quda::updateGaugeField ( GaugeField out,
double  dt,
const GaugeField in,
const GaugeField mom,
bool  conj_mom,
bool  exact 
)

Evolve the gauge field by step size dt using the momentuim field

Parameters
outUpdated gauge field
dtStep size
inInput gauge field
momMomentum field
conj_momWhether we conjugate the momentum in the exponential
exactCalculate exact exponential or use an expansion

Definition at line 227 of file gauge_update_quda.cu.

References errorQuda, in, quda::LatticeField::Location(), out, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.

Referenced by updateGaugeFieldQuda().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ updateMomentum()

void quda::updateMomentum ( GaugeField mom,
double  coeff,
GaugeField force,
const char *  fname 
)

Update the momentum field from the force field

mom = mom - coeff * [force]_TA

where [A]_TA means the traceless anti-hermitian projection of A

Parameters
momMomentum field
coeffIntegration stepsize
forceForce field
funcThe function calling this (fname will be printed if force monitoring is enabled)

Definition at line 328 of file momentum.cu.

References applyU(), arg(), quda::TuneParam::block, quda::blas::bytes, checkCudaError, errorQuda, quda::blas::flops, getTuning(), getVerbosity(), quda::TuneParam::grid, quda::LatticeField::Location(), quda::GaugeField::Order(), parity, quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_GAUGE_ORDER, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, qudaDeviceSynchronize, quda::GaugeField::Reconstruct(), quda::TuneParam::shared_bytes, stream, tuneLaunch(), quda::LatticeField::VolString(), quda::LatticeField::VolumeCB(), X, and quda::LatticeField::X().

Referenced by computeCloverForceQuda(), computeGaugeForceQuda(), computeHISQForceQuda(), computeMomAction(), computeStaggeredForceQuda(), and hisq_force_test().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ updateSolution()

void quda::updateSolution ( ColorSpinorField x,
const Complex alpha,
Complex **const  beta,
double *  gamma,
int  k,
std::vector< ColorSpinorField *>  p 
)

Definition at line 145 of file inv_gcr_quda.cpp.

References backSubs(), quda::blas::caxpy(), and X.

Referenced by quda::GCR::operator()().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ variableInv()

template<typename real , int nColor, bool dagger, Dslash5Type type, bool shared, typename Vector , typename Arg >
__device__ __host__ Vector quda::variableInv ( Arg arg,
int  parity,
int  x_cb,
int  s_ 
)
inline

Apply the M5 inverse operator at a given site on the lattice. This is an alternative algorithm that is applicable to variable b and c coefficients: here each thread in the s dimension starts computing at s = s_, and computes the left- and right-handed contributions in two separate passes. For the left-handed contribution we sweep through increasing s, e.g., s=s_, s_+1, s_+2, and for the right-handed one we do the transpose, s=s_, s_-1, s_-2. This allows us to progressively build up the scalar coefficients needed in a SIMD-friendly fashion.

Template Parameters
sharedWhether to use a shared memory scratch pad to store the input field acroos the Ls dimension to minimize global memory reads.
Parameters
[in]argArgument struct containing any meta data and accessors
[in]parityParity we are on
[in]x_bCheckerboarded 4-d space-time index
[in]s_Ls dimension coordinate

Definition at line 352 of file dslash_domain_wall_m5.cuh.

References quda::coeff_type< real, is_variable, Arg >::b(), quda::coeff_type< real, is_variable, Arg >::c(), dagger, in, quda::VectorCache< real, Vector >::load(), nColor, out, R, s, quda::VectorCache< real, Vector >::save(), and quda::VectorCache< real, Vector >::sync().

Here is the call graph for this function:

◆ vecLength()

template<typename type >
int quda::vecLength ( )
inline

Definition at line 16 of file convert.h.

◆ vecLength< char >()

template<>
int quda::vecLength< char > ( )
inline

Definition at line 18 of file convert.h.

◆ vecLength< char2 >()

template<>
int quda::vecLength< char2 > ( )
inline

Definition at line 23 of file convert.h.

◆ vecLength< char4 >()

template<>
int quda::vecLength< char4 > ( )
inline

Definition at line 28 of file convert.h.

◆ vecLength< double >()

template<>
int quda::vecLength< double > ( )
inline

Definition at line 21 of file convert.h.

◆ vecLength< double2 >()

template<>
int quda::vecLength< double2 > ( )
inline

Definition at line 26 of file convert.h.

◆ vecLength< double4 >()

template<>
int quda::vecLength< double4 > ( )
inline

Definition at line 31 of file convert.h.

◆ vecLength< float >()

template<>
int quda::vecLength< float > ( )
inline

Definition at line 20 of file convert.h.

◆ vecLength< float2 >()

template<>
int quda::vecLength< float2 > ( )
inline

Definition at line 25 of file convert.h.

◆ vecLength< float4 >()

template<>
int quda::vecLength< float4 > ( )
inline

Definition at line 30 of file convert.h.

◆ vecLength< short >()

template<>
int quda::vecLength< short > ( )
inline

Definition at line 19 of file convert.h.

◆ vecLength< short2 >()

template<>
int quda::vecLength< short2 > ( )
inline

Definition at line 24 of file convert.h.

◆ vecLength< short4 >()

template<>
int quda::vecLength< short4 > ( )
inline

Definition at line 29 of file convert.h.

◆ vector_load()

template<typename VectorType >
__device__ __host__ VectorType quda::vector_load ( void *  ptr,
int  idx 
)
inline

Definition at line 412 of file register_traits.h.

◆ vector_store() [1/8]

template<typename VectorType >
__device__ __host__ void quda::vector_store ( void *  ptr,
int  idx,
const VectorType value 
)
inline

◆ vector_store() [2/8]

template<>
__device__ __host__ void quda::vector_store ( void *  ptr,
int  idx,
const double2 &  value 
)
inline

Definition at line 427 of file register_traits.h.

References store_streaming_double2().

Here is the call graph for this function:

◆ vector_store() [3/8]

template<>
__device__ __host__ void quda::vector_store ( void *  ptr,
int  idx,
const float4 &  value 
)
inline

Definition at line 436 of file register_traits.h.

References store_streaming_float4().

Here is the call graph for this function:

◆ vector_store() [4/8]

template<>
__device__ __host__ void quda::vector_store ( void *  ptr,
int  idx,
const float2 &  value 
)
inline

Definition at line 445 of file register_traits.h.

References store_streaming_float2().

Here is the call graph for this function:

◆ vector_store() [5/8]

template<>
__device__ __host__ void quda::vector_store ( void *  ptr,
int  idx,
const short4 &  value 
)
inline

Definition at line 454 of file register_traits.h.

References store_streaming_short4().

Here is the call graph for this function:

◆ vector_store() [6/8]

template<>
__device__ __host__ void quda::vector_store ( void *  ptr,
int  idx,
const short2 &  value 
)
inline

Definition at line 463 of file register_traits.h.

References store_streaming_short2().

Here is the call graph for this function:

◆ vector_store() [7/8]

template<>
__device__ __host__ void quda::vector_store ( void *  ptr,
int  idx,
const char4 &  value 
)
inline

Definition at line 473 of file register_traits.h.

References store_streaming_short2().

Here is the call graph for this function:

◆ vector_store() [8/8]

template<>
__device__ __host__ void quda::vector_store ( void *  ptr,
int  idx,
const char2 &  value 
)
inline

Definition at line 484 of file register_traits.h.

References vector_store().

Here is the call graph for this function:

◆ virtualBlockDim()

template<typename Arg >
__device__ __host__ int quda::virtualBlockDim ( const Arg arg)
inline

Definition at line 613 of file coarse_op_kernel.cuh.

Referenced by getIndicesShared().

Here is the caller graph for this function:

◆ virtualThreadIdx()

template<typename Arg >
__device__ __host__ int quda::virtualThreadIdx ( const Arg arg)
inline

Definition at line 604 of file coarse_op_kernel.cuh.

Referenced by computeVUV(), and getIndicesShared().

Here is the caller graph for this function:

◆ wilson()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void quda::wilson ( Arg arg,
int  idx,
int  s,
int  parity 
)
inline

◆ wilsonClover() [1/2]

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__device__ __host__ void quda::wilsonClover ( Arg arg,
int  idx,
int  parity 
)
inline

Apply the clover preconditioned Wilson dslash.

  • no xpay: out(x) = M*in = A(x)^{-1}D * in(x-mu)
  • with xpay: out(x) = M*in = (1 - kappa*A(x)^{-1}D) * in(x-mu)

Definition at line 37 of file dslash_wilson_clover_preconditioned.cuh.

References quda::WilsonCloverArg< Float, nColor, reconstruct_, twist_ >::A, arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, nColor, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, tmp, quda::WilsonArg< Float, nColor, reconstruct_ >::x, and quda::DslashArg< Float >::xpay.

Here is the call graph for this function:

◆ wilsonClover() [2/2]

template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg >
__device__ __host__ void quda::wilsonClover ( Arg arg,
int  idx,
int  parity 
)
inline

◆ wilsonCloverCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void quda::wilsonCloverCPU ( Arg  arg)

Definition at line 89 of file dslash_wilson_clover.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ wilsonCloverGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void quda::wilsonCloverGPU ( Arg  arg)

Definition at line 103 of file dslash_wilson_clover.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ wilsonCloverPreconditionedCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void quda::wilsonCloverPreconditionedCPU ( Arg  arg)

Definition at line 100 of file dslash_wilson_clover_preconditioned.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ wilsonCloverPreconditionedGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void quda::wilsonCloverPreconditionedGPU ( Arg  arg)

Definition at line 115 of file dslash_wilson_clover_preconditioned.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Referenced by quda::WilsonCloverPreconditionedLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ wilsonCPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
void quda::wilsonCPU ( Arg  arg)

Definition at line 165 of file dslash_wilson.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ wilsonGPU()

template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg >
__global__ void quda::wilsonGPU ( Arg  arg)

Definition at line 180 of file dslash_wilson.cuh.

References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.

Here is the call graph for this function:

◆ writeLinkVariableToArray() [1/2]

template<class T , class U >
__device__ void quda::writeLinkVariableToArray ( const Matrix< T, 3 > &  link,
const int  dir,
const int  idx,
const int  stride,
U *const  array 
)
inline

Definition at line 926 of file quda_matrix.h.

References quda::Matrix< T, N >::data.

◆ writeLinkVariableToArray() [2/2]

__device__ void quda::writeLinkVariableToArray ( const Matrix< complex< double >, 3 > &  link,
const int  dir,
const int  idx,
const int  stride,
float2 *const  array 
)
inline

Definition at line 939 of file quda_matrix.h.

◆ writeMatrixToArray()

template<class T , int N, class U >
__device__ void quda::writeMatrixToArray ( const Matrix< T, N > &  mat,
const int  idx,
const int  stride,
U *const  array 
)
inline

Definition at line 895 of file quda_matrix.h.

References quda::Matrix< T, N >::data.

◆ writeMomentumToArray()

template<class T , class U >
__device__ void quda::writeMomentumToArray ( const Matrix< T, 3 > &  mom,
const int  dir,
const int  idx,
const U  coeff,
const int  stride,
T *const  array 
)
inline

Definition at line 991 of file quda_matrix.h.

References quda::Matrix< T, N >::data.

◆ wuppertalStep() [1/2]

void quda::wuppertalStep ( ColorSpinorField out,
const ColorSpinorField in,
int  parity,
const GaugeField U,
double  A,
double  B 
)

◆ wuppertalStep() [2/2]

void quda::wuppertalStep ( ColorSpinorField out,
const ColorSpinorField in,
int  parity,
const GaugeField U,
double  alpha 
)

Apply a standard Wuppertal smearing step Computes out(x) = 1/(1+6*alpha)*(in(x) + alpha* (U_{-}(x)in(x+mu) + U^(x-mu)in(x-mu)))

Parameters
[out]outThe out result field
[in]inThe in spinor field
[in]UThe gauge field
[in]alphaThe smearing parameter

Definition at line 291 of file color_spinor_wuppertal.cu.

References wuppertalStep().

Here is the call graph for this function:

◆ wuppertalStepCPU()

template<typename Float , int Ns, int Nc, typename Arg >
void quda::wuppertalStepCPU ( Arg  arg)

◆ wuppertalStepGPU()

template<typename Float , int Ns, int Nc, typename Arg >
__global__ void quda::wuppertalStepGPU ( Arg  arg)

◆ zero() [1/10]

__device__ __host__ void quda::zero ( double &  a)
inline

Definition at line 14 of file float_vector.h.

◆ zero() [2/10]

__device__ __host__ void quda::zero ( double2 &  a)
inline

Definition at line 15 of file float_vector.h.

◆ zero() [3/10]

__device__ __host__ void quda::zero ( double3 &  a)
inline

Definition at line 16 of file float_vector.h.

◆ zero() [4/10]

__device__ __host__ void quda::zero ( double4 &  a)
inline

Definition at line 17 of file float_vector.h.

◆ zero() [5/10]

__device__ __host__ void quda::zero ( float &  a)
inline

Definition at line 19 of file float_vector.h.

◆ zero() [6/10]

__device__ __host__ void quda::zero ( float2 &  a)
inline

Definition at line 20 of file float_vector.h.

◆ zero() [7/10]

__device__ __host__ void quda::zero ( float3 &  a)
inline

Definition at line 21 of file float_vector.h.

◆ zero() [8/10]

__device__ __host__ void quda::zero ( float4 &  a)
inline

Definition at line 22 of file float_vector.h.

◆ zero() [9/10]

template<typename T >
static void quda::zero ( d[],
int  N 
)
static

Definition at line 52 of file inv_mpcg_quda.cpp.

◆ zero() [10/10]

template<typename scalar , int n>
__device__ __host__ void quda::zero ( vector_type< scalar, n > &  v)
inline

Variable Documentation

◆ alloc

std::map<void *, MemAlloc> quda::alloc[N_ALLOC_TYPE]
static

◆ apiTimer

TimeProfile quda::apiTimer("CUDA API calls (driver)")
static

◆ B_array_d

__constant__ signed char quda::B_array_d[MAX_MATRIX_SIZE]
static

Definition at line 16 of file block_orthogonalize.cuh.

Referenced by __launch_bounds__().

◆ B_array_h

signed char quda::B_array_h[MAX_MATRIX_SIZE]
static

Definition at line 19 of file block_orthogonalize.cuh.

◆ bidirectional_debug

bool quda::bidirectional_debug = false
static

Definition at line 11 of file coarse_op.cuh.

◆ commDim

int quda::commDim[QUDA_MAX_DIM]
static

◆ complete_recv_back

bool quda::complete_recv_back[QUDA_MAX_DIM] = { }
static

Definition at line 1123 of file cuda_color_spinor_field.cpp.

◆ complete_recv_fwd

bool quda::complete_recv_fwd[QUDA_MAX_DIM] = { }
static

Definition at line 1122 of file cuda_color_spinor_field.cpp.

◆ complete_send_back

bool quda::complete_send_back[QUDA_MAX_DIM] = { }
static

Definition at line 1125 of file cuda_color_spinor_field.cpp.

◆ complete_send_fwd

bool quda::complete_send_fwd[QUDA_MAX_DIM] = { }
static

Definition at line 1124 of file cuda_color_spinor_field.cpp.

◆ count

__device__ unsigned int quda::count[QUDA_MAX_MULTI_REDUCE] = { }

◆ debug

bool quda::debug = false
static

Definition at line 12 of file multigrid.cpp.

◆ dslash_init

bool quda::dslash_init = false
static

Definition at line 595 of file dslash_coarse.cu.

◆ enable_trace

int quda::enable_trace = 0
static

Definition at line 71 of file tune.cpp.

Referenced by traceEnabled().

◆ first_active_policy

int quda::first_active_policy =static_cast<int>(DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED)
static

◆ force_count

long long quda::force_count = 0
static

Definition at line 26 of file momentum.cu.

◆ force_flush

long long quda::force_flush = 1000
static

Definition at line 27 of file momentum.cu.

◆ force_stream

std::stringstream quda::force_stream
static

Definition at line 25 of file momentum.cu.

◆ gDigitsLut

const char quda::gDigitsLut[200]
static
Initial value:
= {
'0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9',
'1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9',
'2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9',
'3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9',
'4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9',
'5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9',
'6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9',
'7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9',
'8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9',
'9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9'
}

Definition at line 32 of file uint_to_char.h.

◆ initial_cache_size

size_t quda::initial_cache_size = 0
static

Definition at line 110 of file tune.cpp.

◆ isLastBlockDone

__shared__ bool quda::isLastBlockDone

Definition at line 91 of file cub_helper.cuh.

◆ isLastWarpDone

__shared__ volatile bool quda::isLastWarpDone[16]

Definition at line 140 of file cub_helper.cuh.

◆ it

map::iterator quda::it
static

◆ kernelPackT

bool quda::kernelPackT = false
static

Definition at line 22 of file dslash_quda.cu.

Referenced by getKernelPackT().

◆ kptstack

std::stack<bool> quda::kptstack
static

Definition at line 28 of file dslash_quda.cu.

◆ last_key

TuneKey quda::last_key
static

Definition at line 22 of file tune.cpp.

Referenced by getLastTuneKey(), and tuneLaunch().

◆ launchTimer

TimeProfile quda::launchTimer("tuneLaunch")
static

Referenced by saveProfile().

◆ max_eigcg_cycles

int quda::max_eigcg_cycles = 4
static

Definition at line 44 of file inv_eigcg_quda.cpp.

Referenced by quda::IncEigCG::operator()().

◆ max_total_bytes

long quda::max_total_bytes[N_ALLOC_TYPE] = {0}
static

Definition at line 55 of file malloc.cpp.

◆ max_total_host_bytes

long quda::max_total_host_bytes
static

Definition at line 56 of file malloc.cpp.

◆ max_total_pinned_bytes

long quda::max_total_pinned_bytes
static

Definition at line 57 of file malloc.cpp.

◆ mobius_d [1/2]

__constant__ char quda::mobius_d[size]
static

◆ mobius_d [2/2]

__constant__ char quda::mobius_d[size]
static

Definition at line 19 of file dslash_domain_wall_m5.cuh.

◆ Nstream

const int quda::Nstream = 9

◆ pinned_allocator

auto quda::pinned_allocator = [] (size_t bytes ) { return static_cast<Complex*>(pool_pinned_malloc(bytes)); }
static

Definition at line 20 of file deflation.cpp.

Referenced by quda::Deflation::reduce(), and quda::Deflation::verify().

◆ pinned_deleter

auto quda::pinned_deleter = [] (Complex *hptr) { pool_pinned_free(hptr); }
static

Definition at line 21 of file deflation.cpp.

Referenced by quda::Deflation::reduce(), and quda::Deflation::verify().

◆ policy_string

char quda::policy_string[TuneKey::aux_n]
static

Definition at line 600 of file dslash_coarse.cu.

◆ policy_tuning

bool quda::policy_tuning = false
static

Definition at line 494 of file tune.cpp.

Referenced by policyTuning().

◆ profile_count

bool quda::profile_count = true
static

Definition at line 123 of file tune.cpp.

◆ quda_hash

const std::string quda::quda_hash = QUDA_HASH
static

Definition at line 106 of file tune.cpp.

◆ quda_version

const std::string quda::quda_version = STR(QUDA_VERSION_MAJOR) "." STR(QUDA_VERSION_MINOR) "." STR(QUDA_VERSION_SUBMINOR)
static

Definition at line 114 of file tune.cpp.

Referenced by initQudaDevice(), saveProfile(), and saveTuneCache().

◆ reorder_location_

QudaFieldLocation quda::reorder_location_ = QUDA_CUDA_FIELD_LOCATION
static

Definition at line 723 of file lattice_field.cpp.

Referenced by reorder_location().

◆ resource_path

std::string quda::resource_path
static

Definition at line 107 of file tune.cpp.

Referenced by loadTuneCache().

◆ s

__shared__ float quda::s[]

Applies the coarse dslash on a given parity and checkerboard site index

Parameters
outThe result - kappa * Dslash in
YThe coarse gauge field
kappaKappa value
inThe input field
parityThe site parity
x_cbThe checkerboarded site index

Referenced by __launch_bounds__(), quda::coeff_type< real, true, Arg >::a(), quda::DomainWall4DArg< Float, nColor, reconstruct_ >::a5(), accumGivensRotation(), AddCoarseDiagonalCPU(), AddCoarseDiagonalGPU(), AddCoarseTmDiagonalCPU(), AddCoarseTmDiagonalGPU(), applyThirdTerm(), applyTwist(), assignGivensRotation(), quda::coeff_type< real, true, Arg >::b(), bdSVD(), blockOrthoCPU(), quda::EigenSolver::blockOrthogonalize(), quda::MG::buildFreeVectors(), quda::coeff_type< real, true, Arg >::c(), quda::ColorSpinor< Float, Nc, 4 >::chiral_project(), quda::ColorSpinor< Float, Nc, 2 >::chiral_reconstruct(), quda::linalg::Cholesky< Mat, T, N, fast >::Cholesky(), coarseDslash(), coarseDslashKernel(), quda::ColorSpinorField::Components(), compute_site_max(), computeAV(), computeCoarseClover(), computeTMAV(), computeTMCAV(), computeUV(), constantInv(), constructSpinorField(), coordsFromIndex(), covdevReference(), quda::Transfer::createSpinMap(), quda::EigenSolver::deflate(), quda::EigenSolver::deflateSVD(), dimFromFaceIndex(), doBulk(), domainWall4D(), quda::DomainWall4DArg< Float, nColor, reconstruct_ >::DomainWall4DArg(), domainWall4DCPU(), domainWall4DGPU(), domainWall5D(), domainWall5DGPU(), quda::Dslash5Arg< Float, nColor >::Dslash5Arg(), dslash5CPU(), dslash5GPU(), dslash5inv(), dslash5invGPU(), dslashReference(), dslashReference_4d_sgpu(), genericBlas(), genericCudaPrintVector(), genericReduce(), genericSource(), get_coords(), get_mass_normalization_str(), get_memory_type_str(), get_ritz_location_str(), getGivensRotation(), quda::cudaColorSpinorField::Ghost2(), quda::colorspinor::AccessorCB< Float, nSpin, nColor, nVec, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER >::index(), quda::colorspinor::GhostAccessorCB< Float, nSpin, nColor, nVec, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER >::index(), quda::colorspinor::AccessorCB< Float, nSpin, nColor, nVec, QUDA_FLOAT4_FIELD_ORDER >::index(), quda::colorspinor::GhostAccessorCB< Float, nSpin, nColor, nVec, QUDA_FLOAT4_FIELD_ORDER >::index(), indexFromFaceIndex(), indexFromFaceIndexStaggered(), indexFromFaceIndexStaggered(), innerProduct(), quda::colorspinor::SpaceColorSpinorOrder< Float, Ns, Nc >::load(), quda::colorspinor::SpaceSpinorColorOrder< Float, Ns, Nc >::load(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::load(), quda::colorspinor::QDPJITDiracOrder< Float, Ns, Nc >::load(), quda::colorspinor::SpaceColorSpinorOrder< Float, Ns, Nc >::loadGhost(), quda::colorspinor::SpaceSpinorColorOrder< Float, Ns, Nc >::loadGhost(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::loadGhost(), matdagmat(), multiplySpinorByDiracProjector(), multiplySpinorByDiracProjector5(), multiplyVUV(), ndegTwistGamma5(), quda::PreserveBasis< Arg >::operator()(), quda::NonRelBasis< Arg >::operator()(), quda::RelBasis< Arg >::operator()(), quda::ChiralToNonRelBasis< Arg >::operator()(), quda::NonRelToChiralBasis< Arg >::operator()(), quda::ColorSpinor< Float, Nc, 4 >::operator()(), quda::MPCG::operator()(), quda::MPBiCGstab::operator()(), quda::ColorSpinor< Float, Nc, 2 >::operator()(), operator*(), operator+(), operator-(), quda::ColorSpinor< Float, Nc, Ns >::operator-=(), operator/(), outerProdSpinTrace(), packGhost(), packKernel(), packShmemKernel(), packSpinor(), packSpinorKernel(), packStaggeredKernel(), packStaggeredShmemKernel(), printSpinorElement(), random(), quda::EigCGArgs::RestartLanczos(), Restrict(), RestrictKernel(), rotateCoarseColor(), rsqrt_dbldbl(), quda::colorspinor::SpaceColorSpinorOrder< Float, Ns, Nc >::save(), quda::colorspinor::SpaceSpinorColorOrder< Float, Ns, Nc >::save(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::save(), quda::colorspinor::QDPJITDiracOrder< Float, Ns, Nc >::save(), quda::colorspinor::SpaceColorSpinorOrder< Float, Ns, Nc >::saveGhost(), quda::colorspinor::SpaceSpinorColorOrder< Float, Ns, Nc >::saveGhost(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::saveGhost(), scale_su3_matrix(), quda::cudaColorSpinorField::sendGhost(), quda::cudaColorSpinorField::sendStart(), setUnitarizeLinksConstants(), sin(), smallSVD(), SpinorNoiseCPU(), SpinorNoiseGPU(), sqrt_dbldbl(), quda::Transfer::Transfer(), twistGamma5(), variableInv(), wilson(), and quda::Transfer::~Transfer().

◆ size

constexpr int quda::size = 4096

◆ stream

cudaStream_t* quda::stream

◆ total_bytes

long quda::total_bytes[N_ALLOC_TYPE] = {0}
static

Definition at line 54 of file malloc.cpp.

◆ total_host_bytes

long quda::total_host_bytes
static

Definition at line 56 of file malloc.cpp.

Referenced by track_malloc().

◆ total_pinned_bytes

long quda::total_pinned_bytes
static

Definition at line 57 of file malloc.cpp.

Referenced by track_malloc().

◆ trace_list

std::list<TraceKey> quda::trace_list
static

Definition at line 70 of file tune.cpp.

◆ tunecache

map quda::tunecache
static

Definition at line 108 of file tune.cpp.

Referenced by getTuneCache().

◆ tuning

bool quda::tuning = false
static

tuning in progress?

Definition at line 119 of file tune.cpp.

Referenced by activeTuning().

◆ unscaled_shifts

double quda::unscaled_shifts[QUDA_MAX_MULTI_SHIFT]
static

Definition at line 1767 of file interface_quda.cpp.

Referenced by invertMultiShiftQuda().