10 template <
typename Order,
int nDim,
int dim>
22 int fBody[nDim][nDim];
24 int localParity[nDim];
27 const int *surfaceCB_,
28 const int *A0_,
const int *A1_,
const int *B0_,
const int *B1_,
29 const int *C0_,
const int *C1_,
const int fBody_[nDim][nDim],
30 const int fBuf_[nDim][nDim],
const int *localParity_)
31 : order(order), threads(0) {
33 threads = R_[dim]*(A1_[dim]-A0_[dim])*(B1_[dim]-B0_[dim])*(C1_[dim]-C0_[dim])*order.geometry;
35 for (
int d=0; d<nDim; d++) {
38 surfaceCB[d] = surfaceCB_[d];
45 for (
int e=0; e<nDim; e++) {
46 fBody[d][e] = fBody_[d][e];
47 fBuf[d][e] = fBuf_[d][e];
49 localParity[d] = localParity_[d];
55 template <
typename Float,
int length,
int dim,
typename Arg>
57 int c,
int d,
int g,
int parity) {
58 int srcIdx = (a*arg.fBody[dim][0] + b*arg.fBody[dim][1] +
59 c*arg.fBody[dim][2] + d*arg.fBody[dim][3]) >> 1;
61 int dstIdx = (a*arg.fBuf[dim][0] + b*arg.fBuf[dim][1] +
62 c*arg.fBuf[dim][2] + (d-(dir?arg.X[dim]:arg.R[dim]))*arg.fBuf[dim][3]) >> 1;
67 u = arg.order(g, srcIdx, parity);
71 arg.order.saveGhostEx(u.
data, dstIdx, srcIdx, dir, dim, g, (parity+arg.localParity[dim])&1, arg.R);
75 template <
typename Float,
int length,
int dim,
typename Arg>
77 int c,
int d,
int g,
int parity) {
78 int srcIdx = (a*arg.fBuf[dim][0] + b*arg.fBuf[dim][1] +
79 c*arg.fBuf[dim][2] + (d-dir*(arg.X[dim]+arg.R[dim]))*arg.fBuf[dim][3]) >> 1;
81 int dstIdx = (a*arg.fBody[dim][0] + b*arg.fBody[dim][1] +
82 c*arg.fBody[dim][2] + d*arg.fBody[dim][3]) >> 1;
84 int oddness = (parity+arg.localParity[dim])&1;
90 arg.order.loadGhostEx(u.
data, srcIdx, dstIdx, dir, dim, g, oddness, arg.R);
92 arg.order(g, dstIdx, parity) = u;
99 template <
typename Float,
int length,
int nDim,
int dim,
typename Order,
bool extract>
106 for (
int dir = 0; dir<2; dir++) {
108 int D0 = extract ? dir*arg.
X[dim] + (1-dir)*arg.
R[dim] : dir*(arg.
X[dim] + arg.
R[dim]);
110 for (
int d=D0; d<D0+arg.
R[dim]; d++) {
111 for (
int a=arg.
A0[dim]; a<arg.
A1[dim]; a++) {
112 for (
int b=arg.
B0[dim]; b<arg.
B1[dim]; b++) {
113 for (
int c=arg.
C0[dim]; c<arg.
C1[dim]; c++) {
114 for (
int g=0; g<arg.
order.geometry; g++) {
117 int oddness = (a+b+c+d) & 1;
119 if (extract) extractor<Float,length,dim>(
arg, dir, a, b, c, d, g,
parity);
120 else injector<Float,length,dim>(
arg, dir, a, b, c, d, g,
parity);
143 template <
typename Float,
int length,
int nDim,
int dim,
typename Order,
bool extract>
155 int dir = blockIdx.y;
160 int X = blockIdx.x * blockDim.x + threadIdx.x;
163 int dA = arg.
A1[dim]-arg.
A0[dim];
164 int dB = arg.
B1[dim]-arg.
B0[dim];
165 int dC = arg.
C1[dim]-arg.
C0[dim];
166 int D0 = extract ? dir*arg.
X[dim] + (1-dir)*arg.
R[dim] : dir*(arg.
X[dim] + arg.
R[dim]);
171 int c = arg.
C0[dim] + X - gdab*dC;
173 int b = arg.
B0[dim] + gdab - gda *dB;
175 int a = arg.
A0[dim] + gda - gd *dA;
176 int g = gd / arg.
R[dim];
177 int d = D0 + gd - g *arg.
R[dim];
180 int oddness = (a+b+c+d) & 1;
181 if (oddness == parity) {
182 if (extract) extractor<Float,length,dim>(
arg, dir, a, b, c, d, g,
parity);
183 else injector<Float,length,dim>(
arg, dir, a, b, c, d, g,
parity);
191 template <
typename Float,
int length,
int nDim,
int dim,
typename Order>
209 : arg(arg), extract(extract), meta(meta), location(location) {
210 int dA = arg.
A1[dim]-arg.
A0[dim];
211 int dB = arg.
B1[dim]-arg.
B0[dim];
212 int dC = arg.
C1[dim]-arg.
C0[dim];
213 size = arg.
R[dim]*dA*dB*dC*arg.
order.geometry;
214 writeAuxString(
"prec=%lu,stride=%d,extract=%d,dimension=%d,geometry=%d",
215 sizeof(Float),arg.
order.stride, extract, dim, arg.
order.geometry);
222 extractGhostEx<Float,length,nDim,dim,Order,true>(
arg);
227 extractGhostExKernel<Float,length,nDim,dim,Order,true>
232 extractGhostEx<Float,length,nDim,dim,Order,false>(
arg);
237 extractGhostExKernel<Float,length,nDim,dim,Order,false>
245 long long flops()
const {
return 0; }
246 long long bytes()
const {
return 2 * 2 * 2 * size * arg.
order.Bytes(); }
257 template <
typename Float,
int length,
typename Order>
266 for (
int d=0; d<nDim; d++) X[d] = E[d] - 2*R[d];
269 int A0[nDim] = {R[3], R[3], R[3], 0};
270 int A1[nDim] = {X[3]+R[3], X[3]+R[3], X[3]+R[3], X[2]+2*R[2]};
272 int B0[nDim] = {R[2], R[2], 0, 0};
273 int B1[nDim] = {X[2]+R[2], X[2]+R[2], X[1]+2*R[1], X[1]+2*R[1]};
275 int C0[nDim] = {R[1], 0, 0, 0};
276 int C1[nDim] = {X[1]+R[1], X[0]+2*R[0], X[0]+2*R[0], X[0]+2*R[0]};
278 int fSrc[nDim][nDim] = {
279 {E[2]*E[1]*E[0], E[1]*E[0], E[0], 1},
280 {E[2]*E[1]*E[0], E[1]*E[0], 1, E[0]},
281 {E[2]*E[1]*E[0], E[0], 1, E[1]*E[0]},
282 {E[1]*E[0], E[0], 1, E[2]*E[1]*E[0]}
285 int fBuf[nDim][nDim]={
286 {E[2]*E[1], E[1], 1, E[3]*E[2]*E[1]},
287 {E[2]*E[0], E[0], 1, E[3]*E[2]*E[0]},
288 {E[1]*E[0], E[0], 1, E[3]*E[1]*E[0]},
289 {E[1]*E[0], E[0], 1, E[2]*E[1]*E[0]}
296 int localParity[nDim];
297 for (
int d=0; d<nDim; d++)
298 localParity[dim] = ((X[dim] % 2 ==1) && (
commDim(dim) > 1)) ? 1 : 0;
302 ExtractGhostExArg<Order,nDim,0> arg(order, X, R, surfaceCB, A0, A1, B0, B1,
303 C0, C1, fSrc, fBuf, localParity);
307 ExtractGhostExArg<Order,nDim,1> arg(order, X, R, surfaceCB, A0, A1, B0, B1,
308 C0, C1, fSrc, fBuf, localParity);
312 ExtractGhostExArg<Order,nDim,2> arg(order, X, R, surfaceCB, A0, A1, B0, B1,
313 C0, C1, fSrc, fBuf, localParity);
317 ExtractGhostExArg<Order,nDim,3> arg(order, X, R, surfaceCB, A0, A1, B0, B1,
318 C0, C1, fSrc, fBuf, localParity);
329 template <
typename Float>
340 extractGhostEx<Float, length>(G(u, 0, Ghost), dim, u.
SurfaceCB(), u.
X(),
R, extract, u, location);
343 extractGhostEx<Float,length>(G(u, 0, Ghost),
344 dim, u.
SurfaceCB(), u.
X(),
R, extract, u, location);
347 extractGhostEx<Float,length>(G(u, 0, Ghost),
348 dim, u.
SurfaceCB(), u.
X(),
R, extract, u, location);
351 extractGhostEx<Float,length>(G(u, 0, Ghost),
352 dim, u.
SurfaceCB(), u.
X(),
R, extract, u, location);
355 extractGhostEx<Float,length>(G(u, 0, Ghost),
356 dim, u.
SurfaceCB(), u.
X(),
R, extract, u, location);
360 #ifdef BUILD_QDP_INTERFACE 362 dim, u.SurfaceCB(), u.X(),
R, extract, u, location);
364 errorQuda(
"QDP interface has not been built\n");
369 #ifdef BUILD_QDPJIT_INTERFACE 371 dim, u.SurfaceCB(), u.X(),
R, extract, u, location);
373 errorQuda(
"QDPJIT interface has not been built\n");
378 #ifdef BUILD_CPS_INTERFACE 380 dim, u.SurfaceCB(), u.X(),
R, extract, u, location);
382 errorQuda(
"CPS interface has not been built\n");
387 #ifdef BUILD_MILC_INTERFACE 389 dim, u.SurfaceCB(), u.X(),
R, extract, u, location);
391 errorQuda(
"MILC interface has not been built\n");
396 #ifdef BUILD_BQCD_INTERFACE 398 dim, u.SurfaceCB(), u.X(),
R, extract, u, location);
400 errorQuda(
"BQCD interface has not been built\n");
405 #ifdef BUILD_TIFR_INTERFACE 407 dim, u.SurfaceCB(), u.X(),
R, extract, u, location);
409 errorQuda(
"TIFR interface has not been built\n");
419 void **ghost,
bool extract) {
struct to define TIFR ordered gauge fields: [mu][parity][volumecb][col][row]
__host__ __device__ constexpr int Ncolor(int length)
Return the number of colors of the accessor based on the length of the field.
QudaVerbosity getVerbosity()
const char * VolString() const
const int * SurfaceCB() const
__global__ void extractGhostExKernel(ExtractGhostExArg< Order, nDim, dim > arg)
void extractExtendedGaugeGhost(const GaugeField &u, int dim, const int *R, void **ghost, bool extract)
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
void extractGhostEx(ExtractGhostExArg< Order, nDim, dim > arg)
Main header file for host and device accessors to GaugeFields.
enum QudaFieldLocation_s QudaFieldLocation
static int commDim[QUDA_MAX_DIM]
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
QudaReconstructType Reconstruct() const
QudaGaugeFieldOrder Order() const
__device__ __host__ void injector(Arg &arg, int dir, int a, int b, int c, int d, int g, int parity)
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
QudaPrecision Precision() const
__device__ __host__ void extractor(Arg &arg, int dir, int a, int b, int c, int d, int g, int parity)