21 template <
typename Float,
int nDim,
int nColor,
int nParity,
bool dagger,
bool xpay, KernelType kernel_type,
typename Arg>
23 static constexpr
const char *
kernel =
"quda::twistedCloverPreconditionedGPU";
24 template <
typename Dslash>
27 static_assert(nParity == 1,
"preconditioned twisted-mass operator only defined for nParity=1");
30 kernel_type,
Arg >, tp, arg, stream);
43 Dslash<Float>(arg, out, in,
"kernels/dslash_twisted_clover_preconditioned.cuh"),
61 errorQuda(
"Preconditioned twisted-clover operator not defined nParity=%d", arg.
nParity);
67 int clover_flops = 504 + 48;
69 switch (arg.kernel_type) {
79 flops += clover_flops * in.
Volume();
83 long long ghost_sites = 0;
84 for (
int d = 0; d < 4; d++)
85 if (arg.commDim[d]) ghost_sites += 2 * in.
GhostFace()[d];
86 flops -= clover_flops * ghost_sites;
96 int clover_bytes = 72 * in.
Precision() + (isFixed ? 2 *
sizeof(float) : 0);
97 if (!arg.dynamic_clover) clover_bytes *= 2;
100 switch (arg.kernel_type) {
110 bytes += clover_bytes * in.
Volume();
114 long long ghost_sites = 0;
115 for (
int d = 0; d < 4; d++)
116 if (arg.commDim[d]) ghost_sites += 2 * in.
GhostFace()[d];
117 bytes -= clover_bytes * ghost_sites;
137 constexpr
int nDim = 4;
138 #ifdef DYNAMIC_CLOVER 139 constexpr
bool dynamic_clover =
true;
141 constexpr
bool dynamic_clover =
false;
144 out, in, U, C, a, b, xpay, x, parity, dagger, comm_override);
149 const_cast<cudaColorSpinorField *>(static_cast<const cudaColorSpinorField *>(&in)), in.
VolumeCB(),
165 #ifdef GPU_TWISTED_CLOVER_DIRAC 166 if (in.
V() == out.
V())
errorQuda(
"Aliasing pointers");
176 instantiate<TwistedCloverPreconditionedApply>(
out,
in, U, C, a, b,
xpay, x,
parity,
dagger, comm_override, profile);
178 errorQuda(
"Twisted-clover dslash has not been built");
179 #endif // GPU_TWISTED_CLOVER_DIRAC void launch(T *f, const TuneParam &tp, Arg &arg, const cudaStream_t &stream)
void apply(const cudaStream_t &stream)
QudaVerbosity getVerbosity()
#define checkPrecision(...)
virtual ~TwistedCloverPreconditioned()
__global__ void twistedCloverPreconditionedGPU(Arg arg)
const char * VolString() const
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
virtual long long bytes() const
const ColorSpinorField & in
const int * GhostFaceCB() const
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
#define checkLocation(...)
void apply(const cudaStream_t &stream)
cpuColorSpinorField * out
This is a helper class that is used to instantiate the correct templated kernel for the dslash...
void ApplyTwistedCloverPreconditioned(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the preconditioned twisted-clover stencil.
static void launch(Dslash &dslash, TuneParam &tp, Arg &arg, const cudaStream_t &stream)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
const int * GhostFace() const
virtual long long flops() const
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
TwistedCloverPreconditioned(Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in)
QudaPrecision Precision() const
TwistedCloverPreconditionedApply(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
QudaFieldOrder FieldOrder() const
static constexpr const char * kernel