20 template <
typename Float,
int nDim,
int nColor,
int nParity,
bool dagger,
bool xpay, KernelType kernel_type,
typename Arg>
22 static constexpr
const char *
kernel =
"quda::domainWall5DGPU";
23 template <
typename Dslash>
26 dslash.
launch(domainWall5DGPU<Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg>, tp, arg, stream);
30 template <
typename Float,
int nDim,
int nColor,
typename Arg>
class DomainWall5D :
public Dslash<Float>
39 Dslash<Float>(arg, out, in,
"kernels/dslash_domain_wall_5d.cuh"),
58 switch (arg.kernel_type) {
67 long long bulk = (Ls - 2) * (in.
Volume() /
Ls);
68 long long wall = 2 * (in.
Volume() /
Ls);
69 flops += 96ll * bulk + 120ll * wall;
78 int spinor_bytes = 2 * in.
Ncolor() * in.
Nspin() * in.
Precision() + (isFixed ?
sizeof(float) : 0);
80 switch (arg.kernel_type) {
103 constexpr
int nDim = 5;
104 DomainWall5DArg<Float, nColor, recon> arg(out, in, U, a, m_f, a != 0.0, x, parity, dagger, comm_override);
108 const_cast<cudaColorSpinorField *>(static_cast<const cudaColorSpinorField *>(&in)),
121 #ifdef GPU_DOMAIN_WALL_DIRAC 122 if (in.
V() == out.
V())
errorQuda(
"Aliasing pointers");
135 instantiate<DomainWall5DApply>(
out,
in, U, a, m_f, x,
parity,
dagger, comm_override, profile);
139 errorQuda(
"Domain-wall dslash has not been built");
140 #endif // GPU_DOMAIN_WALL_DIRAC
void launch(T *f, const TuneParam &tp, Arg &arg, const cudaStream_t &stream)
void apply(const cudaStream_t &stream)
This is a helper class that is used to instantiate the correct templated kernel for the dslash...
QudaVerbosity getVerbosity()
#define checkPrecision(...)
DomainWall5DApply(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_f, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
const char * VolString() const
void ApplyDomainWall5D(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_f, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the Domain-wall 5-d stencil to a 5-d vector with 5-d preconditioned data order...
void apply(const cudaStream_t &stream)
virtual long long bytes() const
const ColorSpinorField & in
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
#define checkLocation(...)
static void launch(Dslash &dslash, TuneParam &tp, Arg &arg, const cudaStream_t &stream)
static constexpr const char * kernel
int ghostFaceCB[QUDA_MAX_DIM+1]
const DslashConstant & getDslashConstant() const
Get the dslash_constant structure from this field.
DomainWall5D(Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in)
cpuColorSpinorField * out
void resizeVector(int y, int z) const
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
void pushKernelPackT(bool pack)
virtual long long flops() const
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
QudaPrecision Precision() const
QudaFieldOrder FieldOrder() const