QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
gauge_fix_ovr_extra.cu
Go to the documentation of this file.
1 #include <comm_quda.h>
2 #include <gauge_fix_ovr_extra.h>
3 #include <thrust_helper.cuh>
4 
5 namespace quda {
6 
7 #if defined(GPU_GAUGE_ALG) && defined(MULTI_GPU)
8 
9  struct BorderIdArg {
10  int X[4]; // grid dimensions
11  int border[4];
12  BorderIdArg(int X_[4], int border_[4]) {
13  for ( int dir = 0; dir < 4; ++dir ) border[dir] = border_[dir];
14  for ( int dir = 0; dir < 4; ++dir ) X[dir] = X_[dir];
15  }
16  };
17 
18  __global__ void ComputeBorderPointsActiveFaceIndex(BorderIdArg arg, int *faceindices, int facesize, int faceid, int parity){
19  int idd = blockDim.x * blockIdx.x + threadIdx.x;
20  if ( idd < facesize ) {
21  int borderid = 0;
22  int idx = idd;
23  if ( idx >= facesize / 2 ) {
24  borderid = arg.X[faceid] - 1;
25  idx -= facesize / 2;
26  }
27  int X[4];
28  for ( int dr = 0; dr < 4; ++dr ) X[dr] = arg.X[dr];
29  int x[4];
30  int za, xodd;
31  switch ( faceid ) {
32  case 0: //X FACE
33  za = idx / ( X[1] / 2);
34  x[3] = za / X[2];
35  x[2] = za - x[3] * X[2];
36  x[0] = borderid;
37  xodd = (borderid + x[2] + x[3] + parity) & 1;
38  x[1] = (2 * idx + xodd) - za * X[1];
39  break;
40  case 1: //Y FACE
41  za = idx / ( X[0] / 2);
42  x[3] = za / X[2];
43  x[2] = za - x[3] * X[2];
44  x[1] = borderid;
45  xodd = (borderid + x[2] + x[3] + parity) & 1;
46  x[0] = (2 * idx + xodd) - za * X[0];
47  break;
48  case 2: //Z FACE
49  za = idx / ( X[0] / 2);
50  x[3] = za / X[1];
51  x[1] = za - x[3] * X[1];
52  x[2] = borderid;
53  xodd = (borderid + x[1] + x[3] + parity) & 1;
54  x[0] = (2 * idx + xodd) - za * X[0];
55  break;
56  case 3: //T FACE
57  za = idx / ( X[0] / 2);
58  x[2] = za / X[1];
59  x[1] = za - x[2] * X[1];
60  x[3] = borderid;
61  xodd = (borderid + x[1] + x[2] + parity) & 1;
62  x[0] = (2 * idx + xodd) - za * X[0];
63  break;
64  }
65  idx = (((x[3] * X[2] + x[2]) * X[1] + x[1]) * X[0] + x[0]);;
66  faceindices[idd] = idx;
67  }
68  }
69 
73  void PreCalculateLatticeIndices(size_t faceVolume[4], size_t faceVolumeCB[4], int X[4], int border[4], \
74  int &threads, int *borderpoints[2]){
75  BorderIdArg arg(X, border);
76  int nlinksfaces = 0;
77  for ( int dir = 0; dir < 4; ++dir )
78  if ( comm_dim_partitioned(dir)) nlinksfaces += faceVolume[dir];
79  thrust::device_ptr<int> array_faceT[2];
80  thrust::device_ptr<int> array_interiorT[2];
81  for ( int i = 0; i < 2; i++ ) { //even and odd ids
82  borderpoints[i] = static_cast<int*>(pool_device_malloc(nlinksfaces * sizeof(int) ));
83  cudaMemset(borderpoints[i], 0, nlinksfaces * sizeof(int) );
84  array_faceT[i] = thrust::device_pointer_cast(borderpoints[i]);
85  }
86  dim3 nthreads(128, 1, 1);
87  int start = 0;
88  for ( int dir = 0; dir < 4; ++dir ) {
89  if ( comm_dim_partitioned(dir)) {
90  dim3 blocks((faceVolume[dir] + nthreads.x - 1) / nthreads.x,1,1);
91  for ( int oddbit = 0; oddbit < 2; oddbit++ )
92  ComputeBorderPointsActiveFaceIndex <<< blocks, nthreads >>> (arg, borderpoints[oddbit] + start, faceVolume[dir], dir, oddbit);
93  start += faceVolume[dir];
94  }
95  }
96  int size[2];
97  for ( int i = 0; i < 2; i++ ) {
98  //sort and remove duplicated lattice indices
100  thrust::sort(thrust::cuda::par(alloc), array_faceT[i], array_faceT[i] + nlinksfaces);
101  thrust::device_ptr<int> new_end = thrust::unique(array_faceT[i], array_faceT[i] + nlinksfaces);
102  size[i] = thrust::raw_pointer_cast(new_end) - thrust::raw_pointer_cast(array_faceT[i]);
103  }
104  if ( size[0] == size[1] ) threads = size[0];
105  else errorQuda("BORDER: Even and Odd sizes does not match, not supported!!!!, %d:%d",size[0],size[1]);
106  }
107 
108 #endif // GPU_GAUGE_ALG && MULTI_GPU
109 
110 }
111 
#define errorQuda(...)
Definition: util_quda.h:121
static std::map< void *, MemAlloc > alloc[N_ALLOC_TYPE]
Definition: malloc.cpp:53
#define pool_device_malloc(size)
Definition: malloc_quda.h:125
constexpr int size
int X[4]
Definition: covdev_test.cpp:70
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
int faceVolume[4]
Definition: test_util.cpp:31
QudaParity parity
Definition: covdev_test.cpp:54
int comm_dim_partitioned(int dim)