7 #if defined(GPU_GAUGE_ALG) && defined(MULTI_GPU) 12 BorderIdArg(
int X_[4],
int border_[4]) {
13 for (
int dir = 0; dir < 4; ++dir ) border[dir] = border_[dir];
14 for (
int dir = 0; dir < 4; ++dir )
X[dir] = X_[dir];
18 __global__
void ComputeBorderPointsActiveFaceIndex(BorderIdArg
arg,
int *faceindices,
int facesize,
int faceid,
int parity){
19 int idd =
blockDim.x * blockIdx.x + threadIdx.x;
20 if ( idd < facesize ) {
23 if (
idx >= facesize / 2 ) {
24 borderid =
arg.X[faceid] - 1;
28 for (
int dr = 0; dr < 4; ++dr )
X[dr] =
arg.X[dr];
35 x[2] =
za -
x[3] *
X[2];
37 xodd = (borderid +
x[2] +
x[3] +
parity) & 1;
38 x[1] = (2 *
idx + xodd) -
za *
X[1];
43 x[2] =
za -
x[3] *
X[2];
45 xodd = (borderid +
x[2] +
x[3] +
parity) & 1;
46 x[0] = (2 *
idx + xodd) -
za *
X[0];
51 x[1] =
za -
x[3] *
X[1];
53 xodd = (borderid +
x[1] +
x[3] +
parity) & 1;
54 x[0] = (2 *
idx + xodd) -
za *
X[0];
59 x[1] =
za -
x[2] *
X[1];
61 xodd = (borderid +
x[1] +
x[2] +
parity) & 1;
62 x[0] = (2 *
idx + xodd) -
za *
X[0];
65 idx = (((
x[3] *
X[2] +
x[2]) *
X[1] +
x[1]) *
X[0] +
x[0]);;
66 faceindices[idd] =
idx;
73 void PreCalculateLatticeIndices(
size_t faceVolume[4],
size_t faceVolumeCB[4],
int X[4],
int border[4], \
74 int &threads,
int *borderpoints[2]){
75 BorderIdArg
arg(
X, border);
77 for (
int dir = 0; dir < 4; ++dir )
79 thrust::device_ptr<int> array_faceT[2];
80 thrust::device_ptr<int> array_interiorT[2];
81 for (
int i = 0;
i < 2;
i++ ) {
83 cudaMemset(borderpoints[
i], 0, nlinksfaces *
sizeof(
int) );
84 array_faceT[
i] = thrust::device_pointer_cast(borderpoints[
i]);
86 dim3 nthreads(128, 1, 1);
88 for (
int dir = 0; dir < 4; ++dir ) {
90 dim3 blocks((
faceVolume[dir] + nthreads.x - 1) / nthreads.x,1,1);
91 for (
int oddbit = 0; oddbit < 2; oddbit++ )
92 ComputeBorderPointsActiveFaceIndex << < blocks, nthreads >> > (
arg, borderpoints[oddbit] +
start,
faceVolume[dir], dir, oddbit);
97 for (
int i = 0;
i < 2;
i++ ) {
100 thrust::sort(thrust::cuda::par(
alloc), array_faceT[
i], array_faceT[
i] + nlinksfaces);
101 thrust::device_ptr<int> new_end = thrust::unique(array_faceT[
i], array_faceT[
i] + nlinksfaces);
102 size[
i] = thrust::raw_pointer_cast(new_end) - thrust::raw_pointer_cast(array_faceT[
i]);
105 else errorQuda(
"BORDER: Even and Odd sizes does not match, not supported!!!!, %d:%d",
size[0],
size[1]);
108 #endif // GPU_GAUGE_ALG && MULTI_GPU
static std::map< void *, MemAlloc > alloc[N_ALLOC_TYPE]
#define pool_device_malloc(size)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
int comm_dim_partitioned(int dim)