8 template <
typename Float,
int nColor, QudaReconstructType reconstruct_>
9 struct TwistedMassArg : WilsonArg<Float, nColor, reconstruct_> {
20 WilsonArg<Float,
nColor, reconstruct_>(out, in, U, xpay ? 1.0 : 0.0, x, parity, dagger, comm_override),
24 a_inv(1.0 / (a * (1 + b * b))),
25 b_inv(dagger ? b : -b),
26 asymmetric(asymmetric)
29 if (dagger && !asymmetric) {
53 Vector &
out, Arg &
arg,
int coord[nDim],
int x_cb,
int s,
int parity,
int idx,
int thread_dim,
bool &active)
55 static_assert(twist == 1 || twist == 2,
"twist template must equal 1 or 2");
59 const int their_spinor_parity = nParity == 2 ? 1 -
parity : 0;
62 for (
int d = 0; d < nDim; d++) {
65 constexpr
int proj_dir = dagger ? +1 : -1;
67 = (coord[d] + arg.nFace >= arg.dim[d]) && isActive<kernel_type>(active, thread_dim, d, coord, arg);
69 if (doHalo<kernel_type>(d) &&
ghost) {
72 ghostFaceIndex<1, nDim>(coord, arg.dim, d, arg.nFace) :
75 Link
U = arg.U(d, x_cb, parity);
76 HalfVector
in = arg.in.Ghost(d, 1, ghost_idx + s * arg.dc.ghostFaceCB[d], their_spinor_parity);
77 if (d == 3) in *= arg.t_proj_scale;
80 }
else if (doBulk<kernel_type>() && !
ghost) {
82 Link
U = arg.U(d, x_cb, parity);
85 in = arg.in(fwd_idx + s * arg.dc.volume_4d_cb, their_spinor_parity);
86 in = arg.a * (in + arg.b * in.igamma(4));
88 Vector in0 = arg.in(fwd_idx + 0 * arg.dc.volume_4d_cb, their_spinor_parity);
89 Vector in1 = arg.in(fwd_idx + 1 * arg.dc.volume_4d_cb, their_spinor_parity);
91 in = arg.a * (in0 + arg.b * in0.igamma(4) + arg.c * in1);
93 in = arg.a * (in1 - arg.b * in1.igamma(4) + arg.c * in0);
96 out += (U * in.project(d, proj_dir)).
reconstruct(d, proj_dir);
102 const int gauge_idx = back_idx;
103 constexpr
int proj_dir = dagger ? -1 : +1;
104 const bool ghost = (coord[d] - arg.nFace < 0) && isActive<kernel_type>(active, thread_dim, d, coord, arg);
106 if (doHalo<kernel_type>(d) &&
ghost) {
109 ghostFaceIndex<0, nDim>(coord, arg.dim, d, arg.nFace) :
112 Link
U = arg.U.Ghost(d, ghost_idx, 1 - parity);
113 HalfVector
in = arg.in.Ghost(d, 0, ghost_idx + s * arg.dc.ghostFaceCB[d], their_spinor_parity);
114 if (d == 3) in *= arg.t_proj_scale;
117 }
else if (doBulk<kernel_type>() && !
ghost) {
119 Link
U = arg.U(d, gauge_idx, 1 - parity);
122 in = arg.in(back_idx + s * arg.dc.volume_4d_cb, their_spinor_parity);
123 in = arg.a * (in + arg.b * in.igamma(4));
125 Vector in0 = arg.in(back_idx + 0 * arg.dc.volume_4d_cb, their_spinor_parity);
126 Vector in1 = arg.in(back_idx + 1 * arg.dc.volume_4d_cb, their_spinor_parity);
128 in = arg.a * (in0 + arg.b * in0.igamma(4) + arg.c * in1);
130 in = arg.a * (in1 - arg.b * in1.igamma(4) + arg.c * in0);
156 int x_cb = getCoords<nDim, QUDA_4D_PC, kernel_type>(coord,
arg, idx,
parity, thread_dim);
158 const int my_spinor_parity = nParity == 2 ?
parity : 0;
162 if (!dagger || asymmetric)
163 applyWilson<Float, nDim, nColor, nParity, dagger, kernel_type>(
164 out,
arg, coord, x_cb, 0,
parity, idx, thread_dim, active);
166 applyWilsonTM<Float, nDim, nColor, nParity, dagger, 1, kernel_type>(
167 out,
arg, coord, x_cb, 0,
parity, idx, thread_dim, active);
170 Vector
x = arg.x(x_cb, my_spinor_parity);
171 if (!dagger || asymmetric) {
172 out += arg.a_inv * (x + arg.b_inv * x.igamma(4));
178 Vector
x = arg.out(x_cb, my_spinor_parity);
182 if (isComplete<kernel_type>(arg, coord) && active) {
183 if (!dagger || asymmetric) out = arg.a * (out + arg.b * out.igamma(4));
190 template <
typename Float,
int nDim,
int nColor,
int nParity,
bool dagger,
bool xpay, KernelType kernel_type,
typename Arg>
194 if (arg.asymmetric) {
199 for (
int x_cb = 0; x_cb < arg.threads; x_cb++) {
200 twistedMass<Float, nDim, nColor, nParity, dagger, true, xpay, kernel_type>(
arg, x_cb,
parity);
208 for (
int x_cb = 0; x_cb < arg.threads; x_cb++) {
209 twistedMass<Float, nDim, nColor, nParity, dagger, false, xpay, kernel_type>(
arg, x_cb,
parity);
216 template <
typename Float,
int nDim,
int nColor,
int nParity,
bool dagger,
bool xpay, KernelType kernel_type,
typename Arg>
219 int x_cb = blockIdx.x * blockDim.x + threadIdx.x;
220 if (x_cb >= arg.threads)
return;
223 int parity = nParity == 2 ? blockDim.z * blockIdx.z + threadIdx.z : arg.parity;
225 if (arg.asymmetric) {
228 case 0: twistedMass<Float, nDim, nColor, nParity, true, true, false, kernel_type>(
arg, x_cb, 0);
break;
229 case 1: twistedMass<Float, nDim, nColor, nParity, true, true, false, kernel_type>(
arg, x_cb, 1);
break;
233 case 0: twistedMass<Float, nDim, nColor, nParity, dagger, false, xpay, kernel_type>(
arg, x_cb, 0);
break;
234 case 1: twistedMass<Float, nDim, nColor, nParity, dagger, false, xpay, kernel_type>(
arg, x_cb, 1);
break;
__device__ __host__ void twistedMass(Arg &arg, int idx, int parity)
Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5)*x Note this routine only...
void twistedMassPreconditionedCPU(Arg arg)
static constexpr QudaGhostExchange ghost
static constexpr QudaReconstructType reconstruct
Parameter structure for driving the Wilson operator.
__global__ void twistedMassPreconditionedGPU(Arg arg)
mapper< Float >::type real
TwistedMassArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override)
static __device__ __host__ int getNeighborIndexCB(const int x[], int mu, int dir, const Arg &arg)
Compute the checkerboard 1-d index for the nearest neighbor.
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
__host__ __device__ ValueType conj(ValueType x)
__device__ __host__ void applyWilsonTM(Vector &out, Arg &arg, int coord[nDim], int x_cb, int s, int parity, int idx, int thread_dim, bool &active)
Applies the off-diagonal part of the Wilson operator premultiplied by twist rotation - this is requir...