17 template <
typename Float,
int nColor, QudaReconstructType reconstruct,
bool xpay>
34 __host__ __device__
static constexpr
bool isXpay() {
return xpay; }
44 errorQuda(
"Unsupported field order colorspinor=%d gauge=%d combination\n",
in.FieldOrder(),
U.FieldOrder());
58 template <
typename Float,
int nDim,
int nColor,
typename Vector,
typename Arg>
61 const int their_spinor_parity = (
arg.nParity == 2) ? 1-
parity : 0;
68 for (
int d = 0;
d<nDim;
d++)
74 const int ghost_idx = ghostFaceIndex<1>(
coord,
arg.dim,
d,
arg.nFace);
77 const Vector in =
arg.in.Ghost(
d, 1, ghost_idx, their_spinor_parity);
83 const Vector in =
arg.in(fwd_idx, their_spinor_parity);
90 const int gauge_idx = back_idx;
93 const int ghost_idx = ghostFaceIndex<0>(
coord,
arg.dim,
d,
arg.nFace);
95 const Link U =
arg.U.Ghost(
d, ghost_idx, 1-
parity);
96 const Vector in =
arg.in.Ghost(
d, 0, ghost_idx, their_spinor_parity);
102 const Vector in =
arg.in(back_idx, their_spinor_parity);
112 template <
typename Float,
int nDim,
int nColor,
typename Arg>
118 applyLaplace<Float,nDim,nColor>(
out,
arg, x_cb,
parity);
128 template <
typename Float,
int nDim,
int nColor,
typename Arg>
136 for (
int x_cb = 0; x_cb <
arg.volumeCB; x_cb++) {
137 laplace<Float,nDim,nColor>(
arg, x_cb,
parity);
144 template <
typename Float,
int nDim,
int nColor,
typename Arg>
147 int x_cb = blockIdx.x*
blockDim.x + threadIdx.x;
152 if (x_cb >=
arg.volumeCB)
return;
155 laplace<Float,nDim,nColor>(
arg, x_cb,
parity);
158 template <
typename Float,
int nDim,
int nColor,
typename Arg>
172 (
arg.isXpay() ?
arg.x.Bytes() : 0);
184 comm[0] = (
arg.commDim[0] ?
'1' :
'0');
185 comm[1] = (
arg.commDim[1] ?
'1' :
'0');
186 comm[2] = (
arg.commDim[2] ?
'1' :
'0');
187 comm[3] = (
arg.commDim[3] ?
'1' :
'0');
198 laplaceCPU<Float,nDim,nColor>(
arg);
209 template <
typename Float,
int nColor, QudaReconstructType recon>
213 constexpr
int nDim = 4;
226 template <
typename Float,
int nColor>
227 void ApplyLaplace(ColorSpinorField &
out,
const ColorSpinorField &
in,
const GaugeField &U,
237 errorQuda(
"Unsupported reconstruct type %d\n", U.Reconstruct());
242 template <
typename Float>
243 void ApplyLaplace(ColorSpinorField &
out,
const ColorSpinorField &
in,
const GaugeField &U,
249 errorQuda(
"Unsupported number of colors %d\n", U.Ncolor());
262 void ApplyLaplace(ColorSpinorField &
out,
const ColorSpinorField &
in,
const GaugeField &U,
285 errorQuda(
"Unsupported precision %d\n", U.Precision());
virtual void apply(const cudaStream_t &stream)=0
__device__ __host__ void applyLaplace(Vector &out, Arg &arg, int x_cb, int parity)
void xpay(ColorSpinorField &x, const double &a, ColorSpinorField &y)
Laplace(Arg &arg, const ColorSpinorField &meta)
__global__ void laplaceGPU(Arg arg)
cudaDeviceProp deviceProp
const char * AuxString() const
QudaVerbosity getVerbosity()
#define checkPrecision(...)
char * strcpy(char *__dst, const char *__src)
const char * VolString() const
char * strcat(char *__s1, const char *__s2)
void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr, const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false) const
This is a unified ghost exchange function for doing a complete halo exchange regardless of the type o...
__device__ __host__ void laplace(Arg &arg, int x_cb, int parity)
__host__ static __device__ constexpr bool isXpay()
Parameter structure for driving the Laplace operator.
LaplaceArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Float kappa, const ColorSpinorField *x, int parity)
VOLATILE spinorFloat kappa
unsigned int minThreads() const
static __device__ __host__ int linkIndexM1(const int x[], const I X[4], const int mu)
for(int s=0;s< param.dc.Ls;s++)
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
#define checkLocation(...)
Main header file for host and device accessors to GaugeFields.
enum QudaParity_s QudaParity
QudaFieldLocation Location() const
cpuColorSpinorField * out
colorspinor_mapper< Float, 1, nColor >::type F
const ColorSpinorField & meta
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
void ApplyLaplace(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double kappa, const ColorSpinorField *x, int parity)
Driver for applying the Laplace stencil.
__host__ __device__ ValueType conj(ValueType x)
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
static __inline__ size_t size_t d
static __device__ __host__ int linkIndexP1(const int x[], const I X[4], const int mu)
void apply(const cudaStream_t &stream)
gauge_mapper< Float, reconstruct >::type G
QudaFieldOrder FieldOrder() const
int comm_dim_partitioned(int dim)
unsigned int maxBlockSize() const
static __device__ __host__ void getCoords(int x[], int cb_index, const I X[], int parity)