12 template <
typename I,
typename J,
typename K>
13 static __device__ __host__
inline int linkIndexShift(
const I
x[],
const J dx[],
const K
X[4]) {
16 for (
int i = 0;
i < 4;
i++ )
y[
i] = (
x[
i] + dx[
i] +
X[
i]) %
X[
i];
17 int idx = (((
y[3] *
X[2] +
y[2]) *
X[1] +
y[1]) *
X[0] +
y[0]) >> 1;
30 template <
typename I,
typename J,
typename K>
31 static __device__ __host__
inline int linkIndexShift(I
y[],
const I
x[],
const J dx[],
const K
X[4]) {
33 for (
int i = 0;
i < 4;
i++ )
y[
i] = (
x[
i] + dx[
i] +
X[
i]) %
X[
i];
34 int idx = (((
y[3] *
X[2] +
y[2]) *
X[1] +
y[1]) *
X[0] +
y[0]) >> 1;
46 static __device__ __host__
inline int linkIndex(
const int x[],
const I
X[4]) {
47 int idx = (((
x[3] *
X[2] +
x[2]) *
X[1] +
x[1]) *
X[0] +
x[0]) >> 1;
60 static __device__ __host__
inline int linkIndex(
int y[],
const int x[],
const I
X[4]) {
61 int idx = (((
x[3] *
X[2] +
x[2]) *
X[1] +
x[1]) *
X[0] +
x[0]) >> 1;
62 y[0] =
x[0];
y[1] =
x[1];
y[2] =
x[2];
y[3] =
x[3];
75 static __device__ __host__
inline int linkIndexM1(
const int x[],
const I
X[4],
const int mu) {
78 for (
int i = 0;
i < 4;
i++ )
y[
i] =
x[
i];
80 int idx = (((
y[3] *
X[2] +
y[2]) *
X[1] +
y[1]) *
X[0] +
y[0]) >> 1;
96 for (
int i = 0;
i < 4;
i++ )
y[
i] =
x[
i];
98 int idx = ((
y[3] *
X[2] +
y[2]) *
X[1] +
y[1]) *
X[0] +
y[0];
110 template <
typename I>
111 static __device__ __host__
inline int linkIndexP1(
const int x[],
const I
X[4],
const int mu) {
114 for (
int i = 0;
i < 4;
i++ )
y[
i] =
x[
i];
116 int idx = (((
y[3] *
X[2] +
y[2]) *
X[1] +
y[1]) *
X[0] +
y[0]) >> 1;
128 template <
typename I>
129 static __device__ __host__
inline void getCoords(
int x[],
int cb_index,
const I
X[],
int parity) {
135 int za = (cb_index / (
X[0] >> 1));
136 int zb = (
za /
X[1]);
139 x[2] = (
zb -
x[3] *
X[2]);
140 int x1odd = (
x[1] +
x[2] +
x[3] +
parity) & 1;
141 x[0] = (2 * cb_index + x1odd -
za *
X[0]);
153 template <
typename I,
typename J>
160 int za = (cb_index / (
X[0] >> 1));
161 int zb = (
za /
X[1]);
164 x[2] = (
zb -
x[3] *
X[2]);
165 int x1odd = (
x[1] +
x[2] +
x[3] +
parity) & 1;
166 x[0] = (2 * cb_index + x1odd -
za *
X[0]);
168 for (
int d=0;
d<4;
d++)
x[
d] +=
R[
d];
180 template <
typename I>
181 static __device__ __host__
inline void getCoords5(
int x[5],
int cb_index,
const I
X[5],
189 int za = (cb_index / (
X[0] >> 1));
190 int zb = (
za /
X[1]);
195 x[3] = zc -
x[4] *
X[3];
197 x[0] = (2 * cb_index + x1odd) -
za *
X[0];
210 template <
typename I>
212 int za = (cb_index / (
X[0] / 2));
213 int zb = (
za /
X[1]);
214 int x1 =
za -
zb *
X[1];
215 int x3 = (
zb /
X[2]);
216 int x2 =
zb - x3 *
X[2];
217 int x1odd = (x1 + x2 + x3 +
parity) & 1;
218 return 2 * cb_index + x1odd;
229 template <
int dir,
typename I>
236 index = (
x[0]*
X[4]*
X[3]*
X[2]*
X[1] +
x[4]*
X[3]*
X[2]*
X[1] +
x[3]*(
X[2]*
X[1])+
x[2]*
X[1] +
x[1])>>1;
239 index = ((
x[0]-
X[0]+nFace)*
X[4]*
X[3]*
X[2]*
X[1] +
x[4]*
X[3]*
X[2]*
X[1] +
x[3]*(
X[2]*
X[1]) +
x[2]*
X[1] +
x[1])>>1;
249 index = ((
x[1]-
X[1]+nFace)*
X[4]*
X[3]*
X[2]*
X[0] +
x[4]*
X[3]*
X[2]*
X[0]+
x[3]*
X[2]*
X[0] +
x[2]*
X[0] +
x[0])>>1;
259 index = ((
x[2]-
X[2]+nFace)*
X[4]*
X[3]*
X[1]*
X[0] +
x[4]*
X[3]*
X[1]*
X[0] +
x[3]*
X[1]*
X[0] +
x[1]*
X[0] +
x[0])>>1;
269 index = ((
x[3]-
X[3]+nFace)*
X[4]*
X[2]*
X[1]*
X[0] +
x[4]*
X[2]*
X[1]*
X[0] +
x[2]*
X[1]*
X[0]+
x[1]*
X[0] +
x[0])>>1;
static __device__ __host__ int getIndexFull(int cb_index, const I X[4], int parity)
static __device__ __host__ void getCoordsExtended(I x[], int cb_index, const J X[], int parity, const int R[])
static __device__ __host__ int linkIndexShift(const I x[], const J dx[], const K X[4])
static __device__ __host__ int linkIndex(const int x[], const I X[4])
static __device__ __host__ void getCoords5(int x[5], int cb_index, const I X[5], int parity, QudaDWFPCType pc_type)
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
__device__ __host__ int ghostFaceIndex(const int x[], const I X[], int dim, int nFace)
char * index(const char *, int)
enum QudaDWFPCType_s QudaDWFPCType
static __device__ __host__ int linkIndexM1(const int x[], const I X[4], const int mu)
static __inline__ size_t size_t d
static __device__ __host__ int linkIndexP1(const int x[], const I X[4], const int mu)
static __device__ __host__ int linkNormalIndexP1(const int x[], const I X[4], const int mu)
static __device__ __host__ void getCoords(int x[], int cb_index, const I X[], int parity)