11 #include <cuda/atomic>
71 using shmem_retcount_intra_t = cuda::atomic<int, cuda::thread_scope_system>;
72 using shmem_retcount_inter_t = cuda::atomic<int, cuda::thread_scope_device>;
73 using shmem_interior_done_t = cuda::atomic<shmem_sync_t, cuda::thread_scope_device>;
74 using shmem_interior_count_t = cuda::atomic<int, cuda::thread_scope_block>;
88 shmem_retcount_intra_t *get_shmem_retcount_intra();
95 shmem_retcount_inter_t *get_shmem_retcount_inter();
102 shmem_interior_done_t *get_shmem_interior_done();
109 shmem_interior_count_t *get_shmem_interior_count();
601 namespace mobius_tensor_core
609 namespace mobius_eofa
613 double kappa,
const double *eofa_u,
const double *eofa_x,
const double *eofa_y,
758 bool dagger,
int parity,
bool spin_project,
double a,
double b,
double c,
int shmem,
enum QudaTwistGamma5Type_s QudaTwistGamma5Type
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
shmem_sync_t set_shmem_sync_counter(shmem_sync_t count)
Set the shmem sync counter to count.
uint64_t shmem_sync_t
type used for shmem signaling
shmem_sync_t inc_shmem_sync_counter()
increase the shmem sync counter for the next dslash application
shmem_sync_t get_shmem_sync_counter()
Get the shmem sync counter.
void apply_dslash5(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x, double m_f, double m_5, const Complex *b_5, const Complex *c_5, double a, int eofa_pm, double inv, double kappa, const double *eofa_u, const double *eofa_x, const double *eofa_y, double sherman_morrison, bool dagger, Dslash5Type type)
void apply_fused_dslash(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, ColorSpinorField &y, const ColorSpinorField &x, double m_f, double m_5, const Complex *b_5, const Complex *c_5, bool dagger, int parity, int shift[4], int halo_shift[4], MdwfFusedDslashType type)
void createDslashEvents()
void setKernelPackT(bool pack)
void ApplyStaggered(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Apply the staggered dslash operator to a color-spinor field.
void ApplyClover(ColorSpinorField &out, const ColorSpinorField &in, const CloverField &clover, bool inverse, int parity)
Apply clover-matrix field to a color-spinor field.
void ApplyTwistedCloverPreconditioned(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the preconditioned twisted-clover stencil.
void setPackComms(const int *dim_pack)
Helper function that sets which dimensions the packing kernel should be packing for.
void ApplyWilsonCloverHasenbuschTwistPCClovInv(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, double mu, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the twisted-mass stencil.
void PackGhost(void *ghost[2 *QUDA_MAX_DIM], const ColorSpinorField &field, MemoryLocation location, int nFace, bool dagger, int parity, bool spin_project, double a, double b, double c, int shmem, const qudaStream_t &stream)
Dslash face packing routine.
void destroyDslashEvents()
@ D4DAG_D5PREDAG_D5INVDAG
__device__ __host__ Matrix< T, 3 > inverse(const Matrix< T, 3 > &u)
std::complex< double > Complex
void ApplyTwistClover(ColorSpinorField &out, const ColorSpinorField &in, const CloverField &clover, double kappa, double mu, double epsilon, int parity, int dagger, QudaTwistGamma5Type twist)
Apply twisted clover-matrix field to a color-spinor field.
void ApplyCovDev(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int mu, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the covariant derivative.
void ApplyNdegTwistedMass(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the non-degenerate twisted-mass stencil.
void ApplyTwistGamma(ColorSpinorField &out, const ColorSpinorField &in, int d, double kappa, double mu, double epsilon, int dagger, QudaTwistGamma5Type type)
Apply the twisted-mass gamma operator to a color-spinor field.
void ApplyLaplace(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int dir, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the Laplace stencil.
void ApplyWilsonClover(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the Wilson-clover stencil.
void ApplyTwistedMass(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
void ApplyWilsonCloverPreconditioned(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the preconditioned Wilson-clover stencil.
void ApplyStaggeredKahlerDiracInverse(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &Xinv, bool dagger)
Apply the (improved) staggered Kahler-Dirac inverse block to a color-spinor field.
void ApplyTwistedMassPreconditioned(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile)
Driver for applying the preconditioned twisted-mass stencil.
void ApplyTwistedClover(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the twisted-clover stencil.
void pushKernelPackT(bool pack)
void ApplyDomainWall4D(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_5, const Complex *b_5, const Complex *c_5, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the batched Wilson 4-d stencil to a 5-d vector with 4-d preconditioned data order...
void gamma5(ColorSpinorField &out, const ColorSpinorField &in)
Applies a gamma5 matrix to a spinor (wrapper to ApplyGamma)
void ApplyWilsonCloverHasenbuschTwistPCNoClovInv(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, double mu, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the Wilson-clover stencil with thist for Hasenbusch.
void ApplyImprovedStaggered(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const GaugeField &L, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Apply the improved staggered dslash operator to a color-spinor field.
void ApplyWilson(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the Wilson stencil.
void ApplyNdegTwistedMassPreconditioned(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile)
Driver for applying the preconditioned non-degenerate twisted-mass stencil.
void ApplyWilsonCloverHasenbuschTwist(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, double mu, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the Wilson-clover stencil.
void ApplyDslash5(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x, double m_f, double m_5, const Complex *b_5, const Complex *c_5, double a, bool dagger, Dslash5Type type)
Apply either the domain-wall / mobius Dslash5 operator or the M5 inverse operator....
void ApplyDomainWall5D(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_f, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the Domain-wall 5-d stencil to a 5-d vector with 5-d preconditioned data order.
cudaStream_t qudaStream_t
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5.