21 template <
typename Float,
int nDim,
int nColor,
int nParity,
bool dagger,
bool xpay, KernelType kernel_type,
typename Arg>
23 static constexpr
const char *
kernel =
"quda::wilsonCloverGPU";
24 template <
typename Dslash>
27 static_assert(
xpay ==
true,
"Twisted-clover operator only defined for xpay");
28 dslash.
launch(wilsonCloverGPU<Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg>, tp, arg, stream);
32 template <
typename Float,
int nDim,
int nColor,
typename Arg>
class TwistedClover :
public Dslash<Float>
41 Dslash<Float>(arg, out, in,
"kernels/dslash_wilson_clover.cuh"),
56 errorQuda(
"Twisted-clover operator only defined for xpay=true");
61 int clover_flops = 504 + 48;
63 switch (arg.kernel_type) {
78 int clover_bytes = 72 * in.
Precision() + (isFixed ? 2 *
sizeof(float) : 0);
81 switch (arg.kernel_type) {
106 constexpr
int nDim = 4;
107 WilsonCloverArg<Float, nColor, recon, true> arg(out, in, U, C, a, b, x, parity, dagger, comm_override);
111 twisted, const_cast<cudaColorSpinorField *>(static_cast<const cudaColorSpinorField *>(&in)), in.
VolumeCB(),
126 #ifdef GPU_TWISTED_CLOVER_DIRAC 127 if (in.
V() == out.
V())
errorQuda(
"Aliasing pointers");
137 instantiate<TwistedCloverApply>(
out,
in, U, C, a, b, x,
parity,
dagger, comm_override, profile);
139 errorQuda(
"Twisted-clover dslash has not been built");
140 #endif // GPU_TWISTED_CLOVEr_DIRAC void apply(const cudaStream_t &stream)
void launch(T *f, const TuneParam &tp, Arg &arg, const cudaStream_t &stream)
void apply(const cudaStream_t &stream)
QudaVerbosity getVerbosity()
TwistedClover(Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in)
#define checkPrecision(...)
void ApplyTwistedClover(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the twisted-clover stencil.
const char * VolString() const
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
static void launch(Dslash &dslash, TuneParam &tp, Arg &arg, const cudaStream_t &stream)
virtual long long bytes() const
const int * GhostFaceCB() const
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
#define checkLocation(...)
const ColorSpinorField & in
cpuColorSpinorField * out
static constexpr const char * kernel
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
TwistedCloverApply(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
This is a helper class that is used to instantiate the correct templated kernel for the dslash...
virtual long long flops() const
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
QudaPrecision Precision() const
QudaFieldOrder FieldOrder() const