14 template <
typename Float,
int Ns,
int Nc, QudaReconstructType gRecon>
39 errorQuda(
"Unsupported field order colorspinor=%d gauge=%d combination\n",
in.FieldOrder(),
U.FieldOrder());
51 template <
typename Float,
int Nc,
typename Vector,
typename Arg>
55 const int their_spinor_parity = (
arg.nParity == 2) ? 1-
parity : 0;
62 for (
int dir=0; dir<3; dir++) {
67 if (
arg.commDim[dir] && (
coord[dir] +
arg.nFace >=
arg.dim[dir]) ) {
68 const int ghost_idx = ghostFaceIndex<1>(
coord,
arg.dim, dir,
arg.nFace);
71 const Vector in =
arg.in.Ghost(dir, 1, ghost_idx, their_spinor_parity);
76 const Vector in =
arg.in(fwd_idx, their_spinor_parity);
83 const int gauge_idx = back_idx;
85 if (
arg.commDim[dir] && (
coord[dir] -
arg.nFace < 0) ) {
86 const int ghost_idx = ghostFaceIndex<0>(
coord,
arg.dim, dir,
arg.nFace);
88 const Link U =
arg.U.Ghost(dir, ghost_idx, 1-
parity);
89 const Vector in =
arg.in.Ghost(dir, 0, ghost_idx, their_spinor_parity);
93 const Link U =
arg.U(dir, gauge_idx, 1-
parity);
94 const Vector in =
arg.in(back_idx, their_spinor_parity);
102 template <
typename Float,
int Ns,
int Nc,
typename Arg>
118 template <
typename Float,
int Ns,
int Nc,
typename Arg>
126 for (
int x_cb = 0; x_cb <
arg.volumeCB; x_cb++) {
127 computeWupperalStep<Float,Ns,Nc>(
arg, x_cb,
parity);
134 template <
typename Float,
int Ns,
int Nc,
typename Arg>
137 int x_cb = blockIdx.x*
blockDim.x + threadIdx.x;
142 if (x_cb >=
arg.volumeCB)
return;
146 computeWupperalStep<Float,Ns,Nc>(
arg, x_cb,
parity);
149 template <
typename Float,
int Ns,
int Nc,
typename Arg>
158 return (2*3*Ns*Nc*(8*Nc-2) + 2*3*Nc*Ns )*
arg.nParity*(
long long)
meta.
VolumeCB();
178 wuppertalStepCPU<Float,Ns,Nc>(
arg);
188 template<
typename Float,
int Ns,
int Nc, QudaReconstructType gRecon>
198 template<
typename Float,
int Ns,
int Nc>
200 const GaugeField& U,
double A,
double B)
203 wuppertalStep<Float,Ns,Nc,QUDA_RECONSTRUCT_NO>(
out,
in,
parity, U, A, B);
205 wuppertalStep<Float,Ns,Nc,QUDA_RECONSTRUCT_12>(
out,
in,
parity, U, A, B);
207 wuppertalStep<Float,Ns,Nc,QUDA_RECONSTRUCT_8>(
out,
in,
parity, U, A, B);
209 errorQuda(
"Reconstruction type %d of origin gauge field not supported", U.Reconstruct());
215 template<
typename Float,
int Ns>
217 const GaugeField& U,
double A,
double B)
220 errorQuda(
"Orign and destination fields must have the same number of colors\n");
224 wuppertalStep<Float,Ns,3>(
out,
in,
parity, U, A, B);
226 errorQuda(
" is not implemented for Ncolor!=3");
231 template<
typename Float>
233 const GaugeField& U,
double A,
double B)
236 errorQuda(
"Orign and destination fields must have the same number of spins\n");
259 const GaugeField& U,
double A,
double B)
262 errorQuda(
"Orign and destination fields must be different pointers");
gauge_mapper< Float, gRecon >::type G
colorspinor_mapper< Float, Ns, Nc >::type F
const char * comm_dim_partitioned_string()
Return a string that defines the comm partitioning (used as a tuneKey)
cudaDeviceProp deviceProp
const char * AuxString() const
__device__ __host__ void computeNeighborSum(Vector &out, Arg &arg, int x_cb, int parity)
QudaVerbosity getVerbosity()
#define checkPrecision(...)
void apply(const cudaStream_t &stream)
virtual ~WuppertalSmearing()
char * strcpy(char *__dst, const char *__src)
const char * VolString() const
char * strcat(char *__s1, const char *__s2)
void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr, const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false) const
This is a unified ghost exchange function for doing a complete halo exchange regardless of the type o...
unsigned int maxBlockSize() const
static __device__ __host__ int linkIndexM1(const int x[], const I X[4], const int mu)
for(int s=0;s< param.dc.Ls;s++)
__device__ __host__ void computeWupperalStep(Arg &arg, int x_cb, int parity)
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
const ColorSpinorField & meta
#define checkLocation(...)
WuppertalSmearing(Arg &arg, const ColorSpinorField &meta)
Main header file for host and device accessors to GaugeFields.
enum QudaParity_s QudaParity
QudaFieldLocation Location() const
void wuppertalStepCPU(Arg arg)
void wuppertalStep(ColorSpinorField &out, const ColorSpinorField &in, int parity, const GaugeField &U, double A, double B)
cpuColorSpinorField * out
WuppertalSmearingArg(ColorSpinorField &out, const ColorSpinorField &in, int parity, const GaugeField &U, Float A, Float B)
__global__ void wuppertalStepGPU(Arg arg)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
unsigned int minThreads() const
__host__ __device__ ValueType conj(ValueType x)
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
QudaPrecision Precision() const
static __device__ __host__ int linkIndexP1(const int x[], const I X[4], const int mu)
int comm_dim_partitioned(int dim)
static __device__ __host__ void getCoords(int x[], int cb_index, const I X[], int parity)