21 template <
typename Float,
int nDim,
int nColor,
int nParity,
bool dagger,
bool xpay, KernelType kernel_type,
typename Arg>
23 static constexpr
const char *
kernel =
"quda::ndegTwistedMassPreconditionedGPU";
24 template <
typename Dslash>
27 static_assert(nParity == 1,
"Non-degenerate twisted-mass operator only defined for nParity=1");
28 dslash.
launch(ndegTwistedMassPreconditionedGPU<Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg>, tp,
33 template <
typename Float,
int nDim,
int nColor,
typename Arg>
48 Dslash<Float>(arg, out, in,
"kernels/dslash_ndeg_twisted_mass_preconditioned.cuh"),
51 shared(arg.asymmetric || !arg.
dagger)
55 for (
int i = 0; i < 8; i++)
65 if (arg.asymmetric && !arg.dagger)
errorQuda(
"asymmetric operator only defined for dagger");
66 if (arg.asymmetric && arg.xpay)
errorQuda(
"asymmetric operator not defined for xpay");
76 errorQuda(
"Preconditioned non-degenerate twisted-mass operator not defined nParity=%d", arg.
nParity);
103 switch (arg.kernel_type) {
129 constexpr
int nDim = 4;
131 out, in, U, a, b, c, xpay, x, parity, dagger, asymmetric, comm_override);
135 const_cast<cudaColorSpinorField *>(static_cast<const cudaColorSpinorField *>(&in)),
150 #ifdef GPU_NDEG_TWISTED_MASS_DIRAC 151 if (in.
V() == out.
V())
errorQuda(
"Aliasing pointers");
164 instantiate<NdegTwistedMassPreconditionedApply>(
165 out,
in, U, a, b, c,
xpay, x,
parity,
dagger, asymmetric, comm_override, profile);
169 errorQuda(
"Non-degenerate twisted-mass dslash has not been built");
170 #endif // GPU_NDEG_TWISTED_MASS_DIRAC
void launch(T *f, const TuneParam &tp, Arg &arg, const cudaStream_t &stream)
void apply(const cudaStream_t &stream)
QudaVerbosity getVerbosity()
#define checkPrecision(...)
unsigned int sharedBytesPerThread() const
const char * VolString() const
void initTuneParam(TuneParam ¶m) const
virtual ~NdegTwistedMassPreconditioned()
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
static constexpr const char * kernel
This is a helper class that is used to instantiate the correct templated kernel for the dslash...
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
#define checkLocation(...)
NdegTwistedMassPreconditioned(Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in)
int ghostFaceCB[QUDA_MAX_DIM+1]
void initTuneParam(TuneParam ¶m) const
const DslashConstant & getDslashConstant() const
Get the dslash_constant structure from this field.
void defaultTuneParam(TuneParam ¶m) const
cpuColorSpinorField * out
void apply(const cudaStream_t &stream)
void resizeVector(int y, int z) const
static void launch(Dslash &dslash, TuneParam &tp, Arg &arg, const cudaStream_t &stream)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
NdegTwistedMassPreconditionedApply(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile)
void pushKernelPackT(bool pack)
virtual long long flops() const
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
const ColorSpinorField & in
void ApplyNdegTwistedMassPreconditioned(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile)
Driver for applying the preconditioned non-degenerate twisted-mass stencil.
QudaFieldOrder FieldOrder() const
void defaultTuneParam(TuneParam ¶m) const