8 constexpr
int size = 4096;
11 template <
typename Float,
int nColor, QudaReconstructType reconstruct_>
21 inline __device__ __host__ complex<real>
a5(
int s)
24 return reinterpret_cast<const complex<real> *
>(
mobius_d)[s];
32 const int *comm_override) :
33 WilsonArg<Float,
nColor, reconstruct_>(out, in, U, xpay ? a : 0.0, x, parity, dagger, comm_override),
36 if (b_5 ==
nullptr || c_5 ==
nullptr)
37 for (
int s = 0;
s <
Ls;
s++) a_5[
s] = a;
39 for (
int s = 0;
s <
Ls;
s++) a_5[
s] = 0.5 * a / (b_5[
s] * (m_5 + 4.0) + 1.0);
43 template <
typename Float,
int nDim,
int nColor,
int nParity,
bool dagger,
bool xpay, KernelType kernel_type,
typename Arg>
53 int x_cb = getCoords<nDim, QUDA_4D_PC, kernel_type>(coord,
arg, idx,
parity, thread_dim);
57 applyWilson<Float, nDim, nColor, nParity, dagger, kernel_type>(
60 int xs = x_cb + s * arg.dc.volume_4d_cb;
62 Vector
x = arg.x(xs, my_spinor_parity);
63 out = x + arg.a5(s) *
out;
65 Vector
x = arg.out(xs, my_spinor_parity);
73 template <
typename Float,
int nDim,
int nColor,
int nParity,
bool dagger,
bool xpay, KernelType kernel_type,
typename Arg>
80 for (
int x_cb = 0; x_cb < arg.threads; x_cb++) {
81 for (
int s = 0;
s < arg.Ls;
s++) {
82 domainWall4D<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(
arg, x_cb,
s,
parity);
89 template <
typename Float,
int nDim,
int nColor,
int nParity,
bool dagger,
bool xpay, KernelType kernel_type,
typename Arg>
92 int x_cb = blockIdx.x * blockDim.x + threadIdx.x;
93 if (x_cb >= arg.threads)
return;
96 int s = blockIdx.y * blockDim.y + threadIdx.y;
97 if (s >= arg.Ls)
return;
100 int parity =
nParity == 2 ? blockDim.z * blockIdx.z + threadIdx.z : arg.parity;
103 case 0: domainWall4D<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(
arg, x_cb,
s, 0);
break;
104 case 1: domainWall4D<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(
arg, x_cb,
s, 1);
break;
static __constant__ char mobius_d[size]
__device__ __host__ complex< real > a5(int s)
Helper function for grabbing the constant struct, whether we are on the GPU or CPU.
DomainWall4DArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_5, const Complex *b_5, const Complex *c_5, bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override)
Parameter structure for driving the Wilson operator.
__global__ void domainWall4DGPU(Arg arg)
mapper< Float >::type real
std::complex< double > Complex
__device__ __host__ void domainWall4D(Arg &arg, int idx, int s, int parity)
complex< real > a_5[QUDA_MAX_DWF_LS]
#define QUDA_MAX_DWF_LS
Maximum length of the Ls dimension for domain-wall fermions.
void domainWall4DCPU(Arg &arg)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.