QUDA  0.9.0
index_helper.cuh
Go to the documentation of this file.
1 #pragma once
2 
3 namespace quda {
12  template <typename I, typename J, typename K>
13  static __device__ __host__ inline int linkIndexShift(const I x[], const J dx[], const K X[4]) {
14  int y[4];
15 #pragma unroll
16  for ( int i = 0; i < 4; i++ ) y[i] = (x[i] + dx[i] + X[i]) % X[i];
17  int idx = (((y[3] * X[2] + y[2]) * X[1] + y[1]) * X[0] + y[0]) >> 1;
18  return idx;
19  }
20 
30  template <typename I, typename J, typename K>
31  static __device__ __host__ inline int linkIndexShift(I y[], const I x[], const J dx[], const K X[4]) {
32 #pragma unroll
33  for ( int i = 0; i < 4; i++ ) y[i] = (x[i] + dx[i] + X[i]) % X[i];
34  int idx = (((y[3] * X[2] + y[2]) * X[1] + y[1]) * X[0] + y[0]) >> 1;
35  return idx;
36  }
37 
45  template <typename I>
46  static __device__ __host__ inline int linkIndex(const int x[], const I X[4]) {
47  int idx = (((x[3] * X[2] + x[2]) * X[1] + x[1]) * X[0] + x[0]) >> 1;
48  return idx;
49  }
50 
59  template <typename I>
60  static __device__ __host__ inline int linkIndex(int y[], const int x[], const I X[4]) {
61  int idx = (((x[3] * X[2] + x[2]) * X[1] + x[1]) * X[0] + x[0]) >> 1;
62  y[0] = x[0]; y[1] = x[1]; y[2] = x[2]; y[3] = x[3];
63  return idx;
64  }
65 
74  template <typename I>
75  static __device__ __host__ inline int linkIndexM1(const int x[], const I X[4], const int mu) {
76  int y[4];
77 #pragma unroll
78  for ( int i = 0; i < 4; i++ ) y[i] = x[i];
79  y[mu] = (y[mu] - 1 + X[mu]) % X[mu];
80  int idx = (((y[3] * X[2] + y[2]) * X[1] + y[1]) * X[0] + y[0]) >> 1;
81  return idx;
82  }
83 
92  template <typename I>
93  static __device__ __host__ inline int linkNormalIndexP1(const int x[], const I X[4], const int mu) {
94  int y[4];
95 #pragma unroll
96  for ( int i = 0; i < 4; i++ ) y[i] = x[i];
97  y[mu] = (y[mu] + 1 + X[mu]) % X[mu];
98  int idx = ((y[3] * X[2] + y[2]) * X[1] + y[1]) * X[0] + y[0];
99  return idx;
100  }
101 
110  template <typename I>
111  static __device__ __host__ inline int linkIndexP1(const int x[], const I X[4], const int mu) {
112  int y[4];
113 #pragma unroll
114  for ( int i = 0; i < 4; i++ ) y[i] = x[i];
115  y[mu] = (y[mu] + 1 + X[mu]) % X[mu];
116  int idx = (((y[3] * X[2] + y[2]) * X[1] + y[1]) * X[0] + y[0]) >> 1;
117  return idx;
118  }
119 
128  template <typename I>
129  static __device__ __host__ inline void getCoords(int x[], int cb_index, const I X[], int parity) {
130  //x[3] = cb_index/(X[2]*X[1]*X[0]/2);
131  //x[2] = (cb_index/(X[1]*X[0]/2)) % X[2];
132  //x[1] = (cb_index/(X[0]/2)) % X[1];
133  //x[0] = 2*(cb_index%(X[0]/2)) + ((x[3]+x[2]+x[1]+parity)&1);
134 
135  int za = (cb_index / (X[0] >> 1));
136  int zb = (za / X[1]);
137  x[1] = (za - zb * X[1]);
138  x[3] = (zb / X[2]);
139  x[2] = (zb - x[3] * X[2]);
140  int x1odd = (x[1] + x[2] + x[3] + parity) & 1;
141  x[0] = (2 * cb_index + x1odd - za * X[0]);
142  return;
143  }
144 
153  template <typename I, typename J>
154  static __device__ __host__ inline void getCoordsExtended(I x[], int cb_index, const J X[], int parity, const int R[]) {
155  //x[3] = cb_index/(X[2]*X[1]*X[0]/2);
156  //x[2] = (cb_index/(X[1]*X[0]/2)) % X[2];
157  //x[1] = (cb_index/(X[0]/2)) % X[1];
158  //x[0] = 2*(cb_index%(X[0]/2)) + ((x[3]+x[2]+x[1]+parity)&1);
159 
160  int za = (cb_index / (X[0] >> 1));
161  int zb = (za / X[1]);
162  x[1] = (za - zb * X[1]);
163  x[3] = (zb / X[2]);
164  x[2] = (zb - x[3] * X[2]);
165  int x1odd = (x[1] + x[2] + x[3] + parity) & 1;
166  x[0] = (2 * cb_index + x1odd - za * X[0]);
167 #pragma unroll
168  for (int d=0; d<4; d++) x[d] += R[d];
169  return;
170  }
171 
180  template <typename I>
181  static __device__ __host__ inline void getCoords5(int x[5], int cb_index, const I X[5],
182  int parity, QudaDWFPCType pc_type) {
183  //x[4] = cb_index/(X[3]*X[2]*X[1]*X[0]/2);
184  //x[3] = (cb_index/(X[2]*X[1]*X[0]/2) % X[3];
185  //x[2] = (cb_index/(X[1]*X[0]/2)) % X[2];
186  //x[1] = (cb_index/(X[0]/2)) % X[1];
187  //x[0] = 2*(cb_index%(X[0]/2)) + ((x[3]+x[2]+x[1]+parity)&1);
188 
189  int za = (cb_index / (X[0] >> 1));
190  int zb = (za / X[1]);
191  x[1] = za - zb * X[1];
192  int zc = zb / X[2];
193  x[2] = zb - zc*X[2];
194  x[4] = (zc / X[3]);
195  x[3] = zc - x[4] * X[3];
196  int x1odd = (x[1] + x[2] + x[3] + (pc_type==QUDA_5D_PC ? x[4] : 0) + parity) & 1;
197  x[0] = (2 * cb_index + x1odd) - za * X[0];
198  return;
199  }
200 
210  template <typename I>
211  static __device__ __host__ inline int getIndexFull(int cb_index, const I X[4], int parity) {
212  int za = (cb_index / (X[0] / 2));
213  int zb = (za / X[1]);
214  int x1 = za - zb * X[1];
215  int x3 = (zb / X[2]);
216  int x2 = zb - x3 * X[2];
217  int x1odd = (x1 + x2 + x3 + parity) & 1;
218  return 2 * cb_index + x1odd;
219  }
220 
229  template <int dir, typename I>
230  __device__ __host__ inline int ghostFaceIndex(const int x[], const I X[], int dim, int nFace) {
231  int index = 0;
232  switch(dim) {
233  case 0:
234  switch(dir) {
235  case 0:
236  index = (x[0]*X[4]*X[3]*X[2]*X[1] + x[4]*X[3]*X[2]*X[1] + x[3]*(X[2]*X[1])+x[2]*X[1] + x[1])>>1;
237  break;
238  case 1:
239  index = ((x[0]-X[0]+nFace)*X[4]*X[3]*X[2]*X[1] + x[4]*X[3]*X[2]*X[1] + x[3]*(X[2]*X[1]) + x[2]*X[1] + x[1])>>1;
240  break;
241  }
242  break;
243  case 1:
244  switch(dir) {
245  case 0:
246  index = (x[1]*X[4]*X[3]*X[2]*X[0] + x[4]*X[3]*X[2]*X[0] + x[3]*X[2]*X[0]+x[2]*X[0]+x[0])>>1;
247  break;
248  case 1:
249  index = ((x[1]-X[1]+nFace)*X[4]*X[3]*X[2]*X[0] +x[4]*X[3]*X[2]*X[0]+ x[3]*X[2]*X[0] + x[2]*X[0] + x[0])>>1;
250  break;
251  }
252  break;
253  case 2:
254  switch(dir) {
255  case 0:
256  index = (x[2]*X[4]*X[3]*X[1]*X[0] + x[4]*X[3]*X[1]*X[0] + x[3]*X[1]*X[0]+x[1]*X[0]+x[0])>>1;
257  break;
258  case 1:
259  index = ((x[2]-X[2]+nFace)*X[4]*X[3]*X[1]*X[0] + x[4]*X[3]*X[1]*X[0] + x[3]*X[1]*X[0] + x[1]*X[0] + x[0])>>1;
260  break;
261  }
262  break;
263  case 3:
264  switch(dir) {
265  case 0:
266  index = (x[3]*X[4]*X[2]*X[1]*X[0] + x[4]*X[2]*X[1]*X[0] + x[2]*X[1]*X[0]+x[1]*X[0]+x[0])>>1;
267  break;
268  case 1:
269  index = ((x[3]-X[3]+nFace)*X[4]*X[2]*X[1]*X[0] + x[4]*X[2]*X[1]*X[0] + x[2]*X[1]*X[0]+x[1]*X[0] + x[0])>>1;
270  break;
271  }
272  break;
273  }
274  return index;
275  }
276 
277 } // namespace quda
static __device__ __host__ int getIndexFull(int cb_index, const I X[4], int parity)
static __device__ __host__ void getCoordsExtended(I x[], int cb_index, const J X[], int parity, const int R[])
double mu
Definition: test_util.cpp:1643
static __device__ __host__ int linkIndexShift(const I x[], const J dx[], const K X[4])
static __device__ __host__ int linkIndex(const int x[], const I X[4])
static __device__ __host__ void getCoords5(int x[5], int cb_index, const I X[5], int parity, QudaDWFPCType pc_type)
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
static int R[4]
__device__ __host__ int ghostFaceIndex(const int x[], const I X[], int dim, int nFace)
char * index(const char *, int)
enum QudaDWFPCType_s QudaDWFPCType
static __device__ __host__ int linkIndexM1(const int x[], const I X[4], const int mu)
static __inline__ size_t size_t d
static __device__ __host__ int linkIndexP1(const int x[], const I X[4], const int mu)
QudaParity parity
Definition: covdev_test.cpp:53
static __device__ __host__ int linkNormalIndexP1(const int x[], const I X[4], const int mu)
static __device__ __host__ void getCoords(int x[], int cb_index, const I X[], int parity)