8 template <
typename Order,
int nDim,
int dim>
20 int fBody[nDim][nDim];
22 int localParity[nDim];
25 const int *surfaceCB_,
26 const int *A0_,
const int *A1_,
const int *B0_,
const int *B1_,
27 const int *C0_,
const int *C1_,
const int fBody_[nDim][nDim],
28 const int fBuf_[nDim][nDim],
const int *localParity_)
29 : order(order), threads(0) {
33 for (
int d=0;
d<nDim;
d++) {
36 surfaceCB[
d] = surfaceCB_[
d];
43 for (
int e=0;
e<nDim;
e++) {
44 fBody[
d][
e] = fBody_[
d][
e];
45 fBuf[
d][
e] = fBuf_[
d][
e];
47 localParity[
d] = localParity_[
d];
53 template <
typename Float,
int length,
int dim,
typename Arg>
72 template <
typename Float,
int length,
int dim,
typename Arg>
86 arg.order.loadGhostEx(u, srcIdx, dstIdx, dir,
dim, g, oddness,
arg.R);
95 template <
typename Float,
int length,
int nDim,
int dim,
typename Order,
bool extract>
103 for (
int dir = 0; dir<2; dir++) {
111 for (
int g=0; g<
arg.order.geometry; g++) {
114 int oddness = (
a+
b+
c+
d) & 1;
116 if (extract) extractor<Float,length,dim>(
arg, dir,
a,
b,
c,
d, g,
parity);
117 else injector<Float,length,dim>(
arg, dir,
a,
b,
c,
d, g,
parity);
140 template <
typename Float,
int length,
int nDim,
int dim,
typename Order,
bool extract>
153 int dir = blockIdx.y;
158 int X = blockIdx.x *
blockDim.x + threadIdx.x;
159 if (
X >=
arg.threads)
return;
171 int b =
arg.B0[
dim] + gdab - gda *dB;
173 int a =
arg.A0[
dim] + gda - gd *dA;
175 int d = D0 + gd - g *
arg.R[
dim];
178 int oddness = (
a+
b+
c+
d) & 1;
180 if (extract) extractor<Float,length,dim>(
arg, dir,
a,
b,
c,
d, g,
parity);
181 else injector<Float,length,dim>(
arg, dir,
a,
b,
c,
d, g,
parity);
189 template <
typename Float,
int length,
int nDim,
int dim,
typename Order>
207 :
arg(
arg), extract(extract), meta(meta), location(location) {
212 writeAuxString(
"prec=%lu,stride=%d,extract=%d,dimension=%d,geometry=%d",
213 sizeof(Float),
arg.order.stride, extract,
dim,
arg.order.geometry);
220 extractGhostEx<Float,length,nDim,dim,Order,true>(
arg);
225 extractGhostExKernel<Float,length,nDim,dim,Order,true>
230 extractGhostEx<Float,length,nDim,dim,Order,false>(
arg);
235 extractGhostExKernel<Float,length,nDim,dim,Order,false>
243 long long flops()
const {
return 0; }
244 long long bytes()
const {
return 2 * 2 * 2 *
size *
arg.order.Bytes(); }
255 template <
typename Float,
int length,
typename Order>
264 for (
int d=0;
d<nDim;
d++)
X[
d] =
E[
d] - 2*
R[
d];
267 int A0[nDim] = {
R[3],
R[3],
R[3], 0};
268 int A1[nDim] = {
X[3]+
R[3],
X[3]+
R[3],
X[3]+
R[3],
X[2]+2*
R[2]};
270 int B0[nDim] = {
R[2],
R[2], 0, 0};
271 int B1[nDim] = {
X[2]+
R[2],
X[2]+
R[2],
X[1]+2*
R[1],
X[1]+2*
R[1]};
273 int C0[nDim] = {
R[1], 0, 0, 0};
274 int C1[nDim] = {
X[1]+
R[1],
X[0]+2*
R[0],
X[0]+2*
R[0],
X[0]+2*
R[0]};
276 int fSrc[nDim][nDim] = {
277 {
E[2]*
E[1]*
E[0],
E[1]*
E[0],
E[0], 1},
278 {
E[2]*
E[1]*
E[0],
E[1]*
E[0], 1,
E[0]},
279 {
E[2]*
E[1]*
E[0],
E[0], 1,
E[1]*
E[0]},
280 {
E[1]*
E[0],
E[0], 1,
E[2]*
E[1]*
E[0]}
283 int fBuf[nDim][nDim]={
284 {
E[2]*
E[1],
E[1], 1,
E[3]*
E[2]*
E[1]},
285 {
E[2]*
E[0],
E[0], 1,
E[3]*
E[2]*
E[0]},
286 {
E[1]*
E[0],
E[0], 1,
E[3]*
E[1]*
E[0]},
287 {
E[1]*
E[0],
E[0], 1,
E[2]*
E[1]*
E[0]}
294 int localParity[nDim];
295 for (
int d=0;
d<nDim;
d++)
300 ExtractGhostExArg<Order,nDim,0> arg(order,
X,
R, surfaceCB, A0, A1, B0, B1,
301 C0,
C1, fSrc, fBuf, localParity);
305 ExtractGhostExArg<Order,nDim,1> arg(order,
X,
R, surfaceCB, A0, A1, B0, B1,
306 C0,
C1, fSrc, fBuf, localParity);
310 ExtractGhostExArg<Order,nDim,2> arg(order,
X,
R, surfaceCB, A0, A1, B0, B1,
311 C0,
C1, fSrc, fBuf, localParity);
315 ExtractGhostExArg<Order,nDim,3> arg(order,
X,
R, surfaceCB, A0, A1, B0, B1,
316 C0,
C1, fSrc, fBuf, localParity);
327 template <
typename Float>
339 dim, u.SurfaceCB(), u.X(),
R, extract, u, location);
342 extractGhostEx<Float,length>(G(u, 0, Ghost),
347 extractGhostEx<Float,length>(G(u, 0, Ghost),
351 extractGhostEx<Float,length>(G(u, 0, Ghost),
355 extractGhostEx<Float,length>(G(u, 0, Ghost),
359 extractGhostEx<Float,length>(G(u, 0, Ghost),
364 #ifdef BUILD_QDP_INTERFACE 366 dim, u.SurfaceCB(), u.X(),
R, extract, u, location);
368 errorQuda(
"QDP interface has not been built\n");
373 #ifdef BUILD_QDPJIT_INTERFACE 375 dim, u.SurfaceCB(), u.X(),
R, extract, u, location);
377 errorQuda(
"QDPJIT interface has not been built\n");
382 #ifdef BUILD_CPS_INTERFACE 384 dim, u.SurfaceCB(), u.X(),
R, extract, u, location);
386 errorQuda(
"CPS interface has not been built\n");
391 #ifdef BUILD_MILC_INTERFACE 393 dim, u.SurfaceCB(), u.X(),
R, extract, u, location);
395 errorQuda(
"MILC interface has not been built\n");
400 #ifdef BUILD_BQCD_INTERFACE 402 dim, u.SurfaceCB(), u.X(),
R, extract, u, location);
404 errorQuda(
"BQCD interface has not been built\n");
409 #ifdef BUILD_TIFR_INTERFACE 411 dim, u.SurfaceCB(), u.X(),
R, extract, u, location);
413 errorQuda(
"TIFR interface has not been built\n");
423 void **ghost,
bool extract) {
QudaVerbosity getVerbosity()
QudaLinkType LinkType() const
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
const char * VolString() const
const int * SurfaceCB() const
__global__ void extractGhostExKernel(ExtractGhostExArg< Order, nDim, dim > arg)
void extractExtendedGaugeGhost(const GaugeField &u, int dim, const int *R, void **ghost, bool extract)
for(int s=0;s< param.dc.Ls;s++)
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
void extractGhostEx(ExtractGhostExArg< Order, nDim, dim > arg)
Main header file for host and device accessors to GaugeFields.
enum QudaFieldLocation_s QudaFieldLocation
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Accessor routine for CloverFields in native field order.
QudaReconstructType Reconstruct() const
QudaGaugeFieldOrder Order() const
__device__ __host__ void injector(Arg &arg, int dir, int a, int b, int c, int d, int g, int parity)
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
static __inline__ size_t size_t d
QudaPrecision Precision() const
__device__ __host__ void extractor(Arg &arg, int dir, int a, int b, int c, int d, int g, int parity)