23 template <
typename Float,
int nDim,
int nColor,
int nParity,
bool dagger,
bool xpay, KernelType kernel_type,
typename Arg>
25 static constexpr
const char *
kernel =
"quda::domainWall4DGPU";
26 template <
typename Dslash>
29 dslash.
launch(domainWall4DGPU<Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg>, tp, arg, stream);
33 template <
typename Float,
int nDim,
int nColor,
typename Arg>
class DomainWall4D :
public Dslash<Float>
42 Dslash<Float>(arg, out, in,
"kernels/dslash_domain_wall_4d.cuh"),
58 using namespace jitify::reflection;
61 Type<Float>(), nDim,
nColor, arg.
nParity, arg.dagger, arg.xpay, arg.kernel_type, Type<Arg>());
62 cuMemcpyHtoDAsync(instance.get_constant_ptr(
"quda::mobius_d"), arg.a_5,
QUDA_MAX_DWF_LS *
sizeof(complex<real>),
84 constexpr
int nDim = 4;
85 DomainWall4DArg<Float, nColor, recon> arg(out, in, U, a, m_5, b_5, c_5, a != 0.0, x, parity, dagger, comm_override);
89 twisted, const_cast<cudaColorSpinorField *>(static_cast<const cudaColorSpinorField *>(&in)),
103 #ifdef GPU_DOMAIN_WALL_DIRAC 104 if (in.
V() == out.
V())
errorQuda(
"Aliasing pointers");
114 instantiate<DomainWall4DApply>(
out,
in, U, a, m_5, b_5, c_5, x,
parity,
dagger, comm_override, profile);
116 errorQuda(
"Domain-wall dslash has not been built");
117 #endif // GPU_DOMAIN_WALL_DIRAC static __constant__ char mobius_d[size]
void launch(T *f, const TuneParam &tp, Arg &arg, const cudaStream_t &stream)
void apply(const cudaStream_t &stream)
QudaVerbosity getVerbosity()
#define checkPrecision(...)
DomainWall4D(Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in)
const char * VolString() const
void apply(const cudaStream_t &stream)
static constexpr const char * kernel
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
#define checkLocation(...)
This is a helper class that is used to instantiate the correct templated kernel for the dslash...
std::complex< double > Complex
void ApplyDomainWall4D(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_5, const Complex *b_5, const Complex *c_5, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the batched Wilson 4-d stencil to a 5-d vector with 4-d preconditioned data order...
const ColorSpinorField & in
int ghostFaceCB[QUDA_MAX_DIM+1]
DomainWall4DApply(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_5, const Complex *b_5, const Complex *c_5, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
const DslashConstant & getDslashConstant() const
Get the dslash_constant structure from this field.
cpuColorSpinorField * out
#define QUDA_MAX_DWF_LS
Maximum length of the Ls dimension for domain-wall fermions.
void resizeVector(int y, int z) const
void instantiate(TuneParam &tp, Arg &arg, const cudaStream_t &stream)
This instantiate function is used to instantiate the the KernelType template required for the multi-G...
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
static void launch(Dslash &dslash, TuneParam &tp, Arg &arg, const cudaStream_t &stream)
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
QudaFieldOrder FieldOrder() const