13 template <
typename Float,
int Ns,
int Nc, QudaReconstructType gRecon>
32 : out(out), in(in), U(U), A(A), B(B), parity(parity), nParity(in.SiteSubset()), nFace(1),
33 dim{ (3-
nParity) * in.
X(0), in.
X(1), in.
X(2), in.
X(3), 1 },
38 errorQuda(
"Unsupported field order colorspinor=%d gauge=%d combination\n", in.FieldOrder(), U.FieldOrder());
50 template <
typename Float,
int Nc,
typename Vector,
typename Arg>
54 const int their_spinor_parity = (arg.
nParity == 2) ? 1-parity : 0;
61 for (
int dir=0; dir<3; dir++) {
64 const int fwd_idx =
linkIndexP1(coord, arg.dim, dir);
66 if ( arg.commDim[dir] && (coord[dir] + arg.nFace >= arg.dim[dir]) ) {
67 const int ghost_idx = ghostFaceIndex<1>(coord, arg.dim, dir, arg.nFace);
69 const Link
U = arg.U(dir, x_cb, parity);
70 const Vector in = arg.in.Ghost(dir, 1, ghost_idx, their_spinor_parity);
74 const Link
U = arg.U(dir, x_cb, parity);
75 const Vector in = arg.in(fwd_idx, their_spinor_parity);
81 const int back_idx =
linkIndexM1(coord, arg.dim, dir);
82 const int gauge_idx = back_idx;
84 if ( arg.commDim[dir] && (coord[dir] - arg.nFace < 0) ) {
85 const int ghost_idx = ghostFaceIndex<0>(coord, arg.dim, dir, arg.nFace);
87 const Link
U = arg.U.Ghost(dir, ghost_idx, 1-parity);
88 const Vector in = arg.in.Ghost(dir, 0, ghost_idx, their_spinor_parity);
92 const Link
U = arg.U(dir, gauge_idx, 1-parity);
93 const Vector in = arg.in(back_idx, their_spinor_parity);
101 template <
typename Float,
int Ns,
int Nc,
typename Arg>
109 Vector
in = arg.in(x_cb, parity);
110 out = arg.A*in + arg.B*
out;
112 arg.out(x_cb, parity) =
out;
116 template <
typename Float,
int Ns,
int Nc,
typename Arg>
124 for (
int x_cb = 0; x_cb < arg.
volumeCB; x_cb++) {
125 computeWupperalStep<Float,Ns,Nc>(
arg, x_cb,
parity);
132 template <
typename Float,
int Ns,
int Nc,
typename Arg>
135 int x_cb = blockIdx.x*blockDim.x + threadIdx.x;
138 int parity = blockDim.y*blockIdx.y + threadIdx.y;
141 if (parity >= arg.
nParity)
return;
142 parity = (arg.
nParity == 2) ? parity : arg.parity;
144 computeWupperalStep<Float,Ns,Nc>(arg, x_cb, parity);
147 template <
typename Float,
int Ns,
int Nc,
typename Arg>
156 return (2*3*Ns*Nc*(8*Nc-2) + 2*3*Nc*Ns )*arg.
nParity*(
long long)meta.
VolumeCB();
160 return arg.out.Bytes() + (2*3+1)*arg.in.Bytes() + arg.
nParity*2*3*arg.U.Bytes()*meta.
VolumeCB();
175 wuppertalStepCPU<Float,Ns,Nc>(
arg);
185 template<
typename Float,
int Ns,
int Nc, QudaReconstructType gRecon>
195 template<
typename Float,
int Ns,
int Nc>
200 wuppertalStep<Float,Ns,Nc,QUDA_RECONSTRUCT_NO>(
out,
in,
parity,
U,
A,
B);
202 wuppertalStep<Float,Ns,Nc,QUDA_RECONSTRUCT_12>(
out,
in,
parity,
U,
A,
B);
204 wuppertalStep<Float,Ns,Nc,QUDA_RECONSTRUCT_8>(
out,
in,
parity,
U,
A,
B);
212 template<
typename Float,
int Ns>
217 errorQuda(
"Orign and destination fields must have the same number of colors\n");
223 errorQuda(
" is not implemented for Ncolor!=3");
228 template<
typename Float>
233 errorQuda(
"Orign and destination fields must have the same number of spins\n");
236 if (out.
Nspin() == 4 ){
238 }
else if (in.
Nspin() == 1 ){
258 if (in.
V() == out.
V()) {
259 errorQuda(
"Orign and destination fields must be different pointers");
293 wuppertalStep(out, in, parity, U, 1./(1.+6.*alpha), alpha/(1.+6.*alpha));
gauge_mapper< Float, gRecon >::type G
colorspinor_mapper< Float, Ns, Nc >::type F
const char * AuxString() const
__device__ __host__ void computeNeighborSum(Vector &out, Arg &arg, int x_cb, int parity)
QudaVerbosity getVerbosity()
#define checkPrecision(...)
void apply(const cudaStream_t &stream)
virtual ~WuppertalSmearing()
const char * VolString() const
const char * comm_dim_partitioned_string(const int *comm_dim_override=0)
Return a string that defines the comm partitioning (used as a tuneKey)
static __device__ __host__ int linkIndexM1(const int x[], const I X[4], const int mu)
__device__ __host__ void computeWupperalStep(Arg &arg, int x_cb, int parity)
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
const ColorSpinorField & meta
#define checkLocation(...)
WuppertalSmearing(Arg &arg, const ColorSpinorField &meta)
Main header file for host and device accessors to GaugeFields.
enum QudaParity_s QudaParity
QudaFieldLocation Location() const
void wuppertalStepCPU(Arg arg)
void wuppertalStep(ColorSpinorField &out, const ColorSpinorField &in, int parity, const GaugeField &U, double A, double B)
WuppertalSmearingArg(ColorSpinorField &out, const ColorSpinorField &in, int parity, const GaugeField &U, Float A, Float B)
__global__ void wuppertalStepGPU(Arg arg)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
QudaReconstructType Reconstruct() const
unsigned int minThreads() const
virtual void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr, const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION) const =0
__host__ __device__ ValueType conj(ValueType x)
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
QudaPrecision Precision() const
static __device__ __host__ int linkIndexP1(const int x[], const I X[4], const int mu)
int comm_dim_partitioned(int dim)
__host__ __device__ int getCoords(int coord[], const Arg &arg, int &idx, int parity, int &dim)
Compute the space-time coordinates we are at.