v0.9.0/doc/index__helper_8cuh_source.html

 #pragma once

 namespace quda {
   template <typename I, typename J, typename K>
   static __device__ __host__ inline int linkIndexShift(const I x[], const J dx[], const K X[4]) {
     int y[4];
 #pragma unroll
     for ( int i = 0; i < 4; i++ ) y[i] = (x[i] + dx[i] + X[i]) % X[i];
     int idx = (((y[3] * X[2] + y[2]) * X[1] + y[1]) * X[0] + y[0]) >> 1;
     return idx;
   }

   template <typename I, typename J, typename K>
   static __device__ __host__ inline int linkIndexShift(I y[], const I x[], const J dx[], const K X[4]) {
 #pragma unroll
     for ( int i = 0; i < 4; i++ ) y[i] = (x[i] + dx[i] + X[i]) % X[i];
     int idx = (((y[3] * X[2] + y[2]) * X[1] + y[1]) * X[0] + y[0]) >> 1;
     return idx;
   }

   template <typename I>
   static __device__ __host__ inline int linkIndex(const int x[], const I X[4]) {
     int idx = (((x[3] * X[2] + x[2]) * X[1] + x[1]) * X[0] + x[0]) >> 1;
     return idx;
   }

   template <typename I>
   static __device__ __host__ inline int linkIndex(int y[], const int x[], const I X[4]) {
     int idx = (((x[3] * X[2] + x[2]) * X[1] + x[1]) * X[0] + x[0]) >> 1;
     y[0] = x[0]; y[1] = x[1]; y[2] = x[2]; y[3] = x[3];
     return idx;
   }

   template <typename I>
   static __device__ __host__ inline int linkIndexM1(const int x[], const I X[4], const int mu) {
     int y[4];
 #pragma unroll
     for ( int i = 0; i < 4; i++ ) y[i] = x[i];
     y[mu] = (y[mu] - 1 + X[mu]) % X[mu];
     int idx = (((y[3] * X[2] + y[2]) * X[1] + y[1]) * X[0] + y[0]) >> 1;
     return idx;
   }

   template <typename I>
   static __device__ __host__ inline int linkNormalIndexP1(const int x[], const I X[4], const int mu) {
     int y[4];
 #pragma unroll
     for ( int i = 0; i < 4; i++ ) y[i] = x[i];
     y[mu] = (y[mu] + 1 + X[mu]) % X[mu];
     int idx = ((y[3] * X[2] + y[2]) * X[1] + y[1]) * X[0] + y[0];
     return idx;
   }

   template <typename I>
   static __device__ __host__ inline int linkIndexP1(const int x[], const I X[4], const int mu) {
     int y[4];
 #pragma unroll
     for ( int i = 0; i < 4; i++ ) y[i] = x[i];
     y[mu] = (y[mu] + 1 + X[mu]) % X[mu];
     int idx = (((y[3] * X[2] + y[2]) * X[1] + y[1]) * X[0] + y[0]) >> 1;
     return idx;
   }

   template <typename I>
   static __device__ __host__ inline void getCoords(int x[], int cb_index, const I X[], int parity) {
     //x[3] = cb_index/(X[2]*X[1]*X[0]/2);
     //x[2] = (cb_index/(X[1]*X[0]/2)) % X[2];
     //x[1] = (cb_index/(X[0]/2)) % X[1];
     //x[0] = 2*(cb_index%(X[0]/2)) + ((x[3]+x[2]+x[1]+parity)&1);

     int za = (cb_index / (X[0] >> 1));
     int zb =  (za / X[1]);
     x[1] = (za - zb * X[1]);
     x[3] = (zb / X[2]);
     x[2] = (zb - x[3] * X[2]);
     int x1odd = (x[1] + x[2] + x[3] + parity) & 1;
     x[0] = (2 * cb_index + x1odd  - za * X[0]);
     return;
   }

   template <typename I, typename J>
   static __device__ __host__ inline void getCoordsExtended(I x[], int cb_index, const J X[], int parity, const int R[]) {
     //x[3] = cb_index/(X[2]*X[1]*X[0]/2);
     //x[2] = (cb_index/(X[1]*X[0]/2)) % X[2];
     //x[1] = (cb_index/(X[0]/2)) % X[1];
     //x[0] = 2*(cb_index%(X[0]/2)) + ((x[3]+x[2]+x[1]+parity)&1);

     int za = (cb_index / (X[0] >> 1));
     int zb =  (za / X[1]);
     x[1] = (za - zb * X[1]);
     x[3] = (zb / X[2]);
     x[2] = (zb - x[3] * X[2]);
     int x1odd = (x[1] + x[2] + x[3] + parity) & 1;
     x[0] = (2 * cb_index + x1odd  - za * X[0]);
 #pragma unroll
     for (int d=0; d<4; d++) x[d] += R[d];
     return;
   }

   template <typename I>
   static __device__ __host__ inline void getCoords5(int x[5], int cb_index, const I X[5],
                 int parity, QudaDWFPCType pc_type) {
     //x[4] = cb_index/(X[3]*X[2]*X[1]*X[0]/2);
     //x[3] = (cb_index/(X[2]*X[1]*X[0]/2) % X[3];
     //x[2] = (cb_index/(X[1]*X[0]/2)) % X[2];
     //x[1] = (cb_index/(X[0]/2)) % X[1];
     //x[0] = 2*(cb_index%(X[0]/2)) + ((x[3]+x[2]+x[1]+parity)&1);

     int za = (cb_index / (X[0] >> 1));
     int zb =  (za / X[1]);
     x[1] = za - zb * X[1];
     int zc = zb / X[2];
     x[2] = zb - zc*X[2];
     x[4] = (zc / X[3]);
     x[3] = zc - x[4] * X[3];
     int x1odd = (x[1] + x[2] + x[3] + (pc_type==QUDA_5D_PC ? x[4] : 0) + parity) & 1;
     x[0] = (2 * cb_index + x1odd)  - za * X[0];
     return;
   }

   template <typename I>
   static __device__ __host__ inline int getIndexFull(int cb_index, const I X[4], int parity) {
     int za = (cb_index / (X[0] / 2));
     int zb =  (za / X[1]);
     int x1 = za - zb * X[1];
     int x3 = (zb / X[2]);
     int x2 = zb - x3 * X[2];
     int x1odd = (x1 + x2 + x3 + parity) & 1;
     return 2 * cb_index + x1odd;
   }

   template <int dir, typename I>
   __device__ __host__ inline int ghostFaceIndex(const int x[], const I X[], int dim, int nFace) {
     int index = 0;
     switch(dim) {
     case 0:
       switch(dir) {
       case 0:
   index = (x[0]*X[4]*X[3]*X[2]*X[1] + x[4]*X[3]*X[2]*X[1] + x[3]*(X[2]*X[1])+x[2]*X[1] + x[1])>>1;
   break;
       case 1:
   index = ((x[0]-X[0]+nFace)*X[4]*X[3]*X[2]*X[1] + x[4]*X[3]*X[2]*X[1] + x[3]*(X[2]*X[1]) + x[2]*X[1] + x[1])>>1;
   break;
       }
       break;
     case 1:
       switch(dir) {
       case 0:
   index = (x[1]*X[4]*X[3]*X[2]*X[0] + x[4]*X[3]*X[2]*X[0] + x[3]*X[2]*X[0]+x[2]*X[0]+x[0])>>1;
   break;
       case 1:
   index = ((x[1]-X[1]+nFace)*X[4]*X[3]*X[2]*X[0] +x[4]*X[3]*X[2]*X[0]+ x[3]*X[2]*X[0] + x[2]*X[0] + x[0])>>1;
   break;
       }
       break;
     case 2:
       switch(dir) {
       case 0:
   index = (x[2]*X[4]*X[3]*X[1]*X[0] + x[4]*X[3]*X[1]*X[0] + x[3]*X[1]*X[0]+x[1]*X[0]+x[0])>>1;
   break;
       case 1:
   index = ((x[2]-X[2]+nFace)*X[4]*X[3]*X[1]*X[0] + x[4]*X[3]*X[1]*X[0] + x[3]*X[1]*X[0] + x[1]*X[0] + x[0])>>1;
   break;
       }
       break;
     case 3:
       switch(dir) {
       case 0:
   index = (x[3]*X[4]*X[2]*X[1]*X[0] + x[4]*X[2]*X[1]*X[0] + x[2]*X[1]*X[0]+x[1]*X[0]+x[0])>>1;
   break;
       case 1:
   index  = ((x[3]-X[3]+nFace)*X[4]*X[2]*X[1]*X[0] + x[4]*X[2]*X[1]*X[0] + x[2]*X[1]*X[0]+x[1]*X[0] + x[0])>>1;
   break;
       }
       break;
     }
     return index;
   }

 } // namespace quda
quda::getIndexFull
static __device__ __host__ int getIndexFull(int cb_index, const I X[4], int parity)
Definition: index_helper.cuh:211

quda::getCoordsExtended
static __device__ __host__ void getCoordsExtended(I x[], int cb_index, const J X[], int parity, const int R[])
Definition: index_helper.cuh:154

mu
double mu
Definition: test_util.cpp:1643

quda::linkIndexShift
static __device__ __host__ int linkIndexShift(const I x[], const J dx[], const K X[4])
Definition: index_helper.cuh:13

quda::linkIndex
static __device__ __host__ int linkIndex(const int x[], const I X[4])
Definition: index_helper.cuh:46

za
int za
Definition: staggered_dslash_core.h:369

quda::getCoords5
static __device__ __host__ void getCoords5(int x[5], int cb_index, const I X[5], int parity, QudaDWFPCType pc_type)
Definition: index_helper.cuh:181

dim
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
Definition: CMakeCUDACompilerId.cpp1.ii:15687

R
static int R[4]
Definition: interface_quda.cpp:83

quda::ghostFaceIndex
__device__ __host__ int ghostFaceIndex(const int x[], const I X[], int dim, int nFace)
Definition: index_helper.cuh:230

zb
int zb
Definition: staggered_dslash_core.h:369

quda
Definition: blas_cublas.h:6

index
char * index(const char *, int)

x
p x
Definition: CMakeCUDACompilerId.cpp1.ii:3011

QudaDWFPCType
enum QudaDWFPCType_s QudaDWFPCType

quda::linkIndexM1
static __device__ __host__ int linkIndexM1(const int x[], const I X[4], const int mu)
Definition: index_helper.cuh:75

fused_exterior_ndeg_tm_dslash_cuda_gen.i
int i
start here
Definition: fused_exterior_ndeg_tm_dslash_cuda_gen.py:816

X
int X
Definition: asym_wilson_clover_dslash_dagger_fermi_core.h:394

idx
int idx
Definition: staggered_fused_exterior_dslash_core.h:355

QUDA_5D_PC
Definition: enum_quda.h:361

d
static __inline__ size_t size_t d
Definition: CMakeCUDACompilerId.cpp1.ii:3019

quda::linkIndexP1
static __device__ __host__ int linkIndexP1(const int x[], const I X[4], const int mu)
Definition: index_helper.cuh:111

parity
QudaParity parity
Definition: covdev_test.cpp:53

quda::linkNormalIndexP1
static __device__ __host__ int linkNormalIndexP1(const int x[], const I X[4], const int mu)
Definition: index_helper.cuh:93

y
int y
Definition: CMakeCUDACompilerId.cpp1.ii:2637

quda::getCoords
static __device__ __host__ void getCoords(int x[], int cb_index, const I X[], int parity)
Definition: index_helper.cuh:129