1 #include <gauge_field.h>
2 #include <color_spinor_field.h>
3 #include <clover_field.h>
7 #include <dslash_policy.cuh>
8 #include <kernels/dslash_twisted_clover_preconditioned.cuh>
11 This is the preconditioned gauged twisted-mass operator
17 template <typename Arg> class TwistedCloverPreconditioned : public Dslash<twistedCloverPreconditioned, Arg>
19 using Dslash = Dslash<twistedCloverPreconditioned, Arg>;
24 TwistedCloverPreconditioned(Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in) :
29 void apply(const qudaStream_t &stream)
31 TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
33 // specialize here to constrain the template instantiation
34 if (arg.nParity == 1) {
36 if (arg.dagger) errorQuda("xpay operator only defined for not dagger");
37 Dslash::template instantiate<packShmem, 1, false, true>(tp, stream);
40 Dslash::template instantiate<packShmem, 1, true, false>(tp, stream);
42 Dslash::template instantiate<packShmem, 1, false, false>(tp, stream);
45 errorQuda("Preconditioned twisted-clover operator not defined nParity=%d", arg.nParity);
49 long long flops() const
51 int clover_flops = 504 + 48;
52 long long flops = Dslash::flops();
53 switch (arg.kernel_type) {
54 case EXTERIOR_KERNEL_X:
55 case EXTERIOR_KERNEL_Y:
56 case EXTERIOR_KERNEL_Z:
57 case EXTERIOR_KERNEL_T: flops += clover_flops * 2 * in.GhostFace()[arg.kernel_type]; break;
58 case EXTERIOR_KERNEL_ALL:
59 flops += clover_flops * 2 * (in.GhostFace()[0] + in.GhostFace()[1] + in.GhostFace()[2] + in.GhostFace()[3]);
64 flops += clover_flops * in.Volume();
66 if (arg.kernel_type == KERNEL_POLICY) break;
67 // now correct for flops done by exterior kernel
68 long long ghost_sites = 0;
69 for (int d = 0; d < 4; d++)
70 if (arg.commDim[d]) ghost_sites += 2 * in.GhostFace()[d];
71 flops -= clover_flops * ghost_sites;
78 long long bytes() const
80 int clover_bytes = 72 * in.Precision() + (isFixed<typename Arg::Float>::value ? 2 * sizeof(float) : 0);
81 if (!arg.dynamic_clover) clover_bytes *= 2;
83 long long bytes = Dslash::bytes();
84 switch (arg.kernel_type) {
85 case EXTERIOR_KERNEL_X:
86 case EXTERIOR_KERNEL_Y:
87 case EXTERIOR_KERNEL_Z:
88 case EXTERIOR_KERNEL_T: bytes += clover_bytes * 2 * in.GhostFace()[arg.kernel_type]; break;
89 case EXTERIOR_KERNEL_ALL:
90 bytes += clover_bytes * 2 * (in.GhostFace()[0] + in.GhostFace()[1] + in.GhostFace()[2] + in.GhostFace()[3]);
95 bytes += clover_bytes * in.Volume();
97 if (arg.kernel_type == KERNEL_POLICY) break;
98 // now correct for bytes done by exterior kernel
99 long long ghost_sites = 0;
100 for (int d = 0; d < 4; d++)
101 if (arg.commDim[d]) ghost_sites += 2 * in.GhostFace()[d];
102 bytes -= clover_bytes * ghost_sites;
111 template <typename Float, int nColor, QudaReconstructType recon> struct TwistedCloverPreconditionedApply {
113 inline TwistedCloverPreconditionedApply(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U,
114 const CloverField &C, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger,
115 const int *comm_override, TimeProfile &profile)
117 constexpr int nDim = 4;
118 TwistedCloverArg<Float, nColor, nDim, recon> arg(out, in, U, C, a, b, xpay, x, parity, dagger, comm_override);
119 TwistedCloverPreconditioned<decltype(arg)> twisted(arg, out, in);
121 dslash::DslashPolicyTune<decltype(twisted)> policy(twisted,
122 const_cast<cudaColorSpinorField *>(static_cast<const cudaColorSpinorField *>(&in)), in.VolumeCB(),
123 in.GhostFaceCB(), profile);
129 Apply the preconditioned twisted-mass Dslash operator
131 out = x + a*A^{-1} D * in = x + a*(C + i*b*gamma_5)^{-1}*\sum_mu U_{-\mu}(x)in(x+mu) + U^\dagger_mu(x-mu)in(x-mu)
133 void ApplyTwistedCloverPreconditioned(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U,
134 const CloverField &C, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger,
135 const int *comm_override, TimeProfile &profile)
137 #ifdef GPU_TWISTED_CLOVER_DIRAC
138 instantiate<TwistedCloverPreconditionedApply>(out, in, U, C, a, b, xpay, x, parity, dagger, comm_override, profile);
140 errorQuda("Twisted-clover dslash has not been built");
141 #endif // GPU_TWISTED_CLOVER_DIRAC