10 template <
typename Float,
int nColor, QudaReconstructType reconstruct_>
11 struct NdegTwistedMassArg : WilsonArg<Float, nColor, reconstruct_> {
23 const int *comm_override) :
24 WilsonArg<Float,
nColor, reconstruct_>(out, in, U, xpay ? 1.0 : 0.0, x, parity, dagger, comm_override),
28 a_inv(1.0 / (a * (1.0 + b * b - c * c))),
29 b_inv(dagger ? b : -b),
31 asymmetric(asymmetric)
34 if (dagger && !asymmetric) {
59 int x_cb = getCoords<nDim, QUDA_4D_PC, kernel_type>(coord,
arg, idx,
parity, thread_dim);
61 const int my_spinor_parity = nParity == 2 ?
parity : 0;
64 if (!dagger || asymmetric)
65 applyWilson<Float, nDim, nColor, nParity, dagger, kernel_type>(
66 out,
arg, coord, x_cb, flavor,
parity, idx, thread_dim, active);
68 applyWilsonTM<Float, nDim, nColor, nParity, dagger, 2, kernel_type>(
69 out,
arg, coord, x_cb, flavor,
parity, idx, thread_dim, active);
71 int my_flavor_idx = x_cb + flavor * arg.dc.volume_4d_cb;
75 if (!dagger || asymmetric) {
77 Vector x0 = arg.x(x_cb + 0 * arg.dc.volume_4d_cb, my_spinor_parity);
78 Vector x1 = arg.x(x_cb + 1 * arg.dc.volume_4d_cb, my_spinor_parity);
80 out += arg.a_inv * (x0 + arg.b_inv * x0.igamma(4) + arg.c_inv * x1);
82 out += arg.a_inv * (x1 - arg.b_inv * x1.igamma(4) + arg.c_inv * x0);
84 Vector
x = arg.x(my_flavor_idx, my_spinor_parity);
90 Vector
x = arg.out(my_flavor_idx, my_spinor_parity);
94 if (isComplete<kernel_type>(arg, coord) && active) {
95 if (!dagger || asymmetric) {
102 out = arg.a * (out + arg.b * out.igamma(4) + arg.c * cache.
load(threadIdx.x, 1, threadIdx.z));
104 out = arg.a * (out - arg.b * out.igamma(4) + arg.c * cache.
load(threadIdx.x, 0, threadIdx.z));
112 template <
typename Float,
int nDim,
int nColor,
int nParity,
bool dagger,
bool xpay, KernelType kernel_type,
typename Arg>
115 if (arg.asymmetric) {
120 for (
int x_cb = 0; x_cb < arg.threads; x_cb++) {
121 for (
int flavor = 0; flavor < 2; flavor++) {
122 ndegTwistedMass<Float, nDim, nColor, nParity, dagger, true, xpay, kernel_type>(
arg, x_cb, flavor,
parity);
131 for (
int x_cb = 0; x_cb < arg.threads; x_cb++) {
132 for (
int flavor = 0; flavor < 2; flavor++) {
133 ndegTwistedMass<Float, nDim, nColor, nParity, dagger, false, xpay, kernel_type>(
arg, x_cb, flavor,
parity);
141 template <
typename Float,
int nDim,
int nColor,
int nParity,
bool dagger,
bool xpay, KernelType kernel_type,
typename Arg>
144 int x_cb = blockIdx.x * blockDim.x + threadIdx.x;
145 if (x_cb >= arg.threads)
return;
148 int flavor = blockIdx.y * blockDim.y + threadIdx.y;
151 int parity = nParity == 2 ? blockDim.z * blockIdx.z + threadIdx.z : arg.parity;
153 if (arg.asymmetric) {
157 ndegTwistedMass<Float, nDim, nColor, nParity, true, true, false, kernel_type>(
arg, x_cb, flavor, 0);
160 ndegTwistedMass<Float, nDim, nColor, nParity, true, true, false, kernel_type>(
arg, x_cb, flavor, 1);
166 ndegTwistedMass<Float, nDim, nColor, nParity, dagger, false, xpay, kernel_type>(
arg, x_cb, flavor, 0);
169 ndegTwistedMass<Float, nDim, nColor, nParity, dagger, false, xpay, kernel_type>(
arg, x_cb, flavor, 1);
NdegTwistedMassArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override)
__device__ __host__ void ndegTwistedMass(Arg &arg, int idx, int flavor, int parity)
Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1)*x Note t...
Parameter structure for driving the Wilson operator.
__device__ Vector load(int x, int y, int z)
Load a vector from the shared memory cache.
__global__ void ndegTwistedMassPreconditionedGPU(Arg arg)
__device__ void sync()
Synchronize the cache.
Class which wraps around a shared memory cache for a Vector type, where each thread in the thread blo...
void ndegTwistedMassPreconditionedCPU(Arg arg)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
mapper< Float >::type real
__device__ void save(const Vector &a)
Save the vector into the 3-d shared memory cache. Implicitly store the vector at coordinates given by...