6 #if ((CUDA_VERSION >= 4010) && (__COMPUTE_CAPABILITY__ >= 200)) // NVVM compiler
8 #else // Open64 compiler
9 #define VOLATILE volatile
13 #define spinorFloat double
14 #define WRITE_SPINOR_SHARED WRITE_SPINOR_SHARED_DOUBLE2
15 #define READ_SPINOR_SHARED READ_SPINOR_SHARED_DOUBLE2
41 #define spinorFloat float
66 #endif // SPINOR_DOUBLE
109 #endif // GAUGE_DOUBLE
112 #define gT00_re (+g00_re)
113 #define gT00_im (-g00_im)
114 #define gT01_re (+g10_re)
115 #define gT01_im (-g10_im)
116 #define gT02_re (+g20_re)
117 #define gT02_im (-g20_im)
118 #define gT10_re (+g01_re)
119 #define gT10_im (-g01_im)
120 #define gT11_re (+g11_re)
121 #define gT11_im (-g11_im)
122 #define gT12_re (+g21_re)
123 #define gT12_im (-g21_im)
124 #define gT20_re (+g02_re)
125 #define gT20_im (-g02_im)
126 #define gT21_re (+g12_re)
127 #define gT21_im (-g12_im)
128 #define gT22_re (+g22_re)
129 #define gT22_im (-g22_im)
158 #define SHARED_STRIDE 16 // to avoid bank conflicts on Fermi
160 #define SHARED_STRIDE 32 // to avoid bank conflicts on Fermi
169 #if (defined MULTI_GPU) && (DD_PREC==2) // half precision
171 #endif // MULTI_GPU half precision
175 sid = blockIdx.x*blockDim.x + threadIdx.x;
190 x2 = aux1 - aux2 *
X2;
221 const int face_volume =
param.threads;
222 const int face_num = 0;
231 #if (DD_PREC==2) // half precision
236 coordsFromFaceIndex<1>(
X,
sid,
x1,
x2,
x3,
x4,
face_idx, face_volume,
dim, face_num,
param.parity,
dims);
VOLATILE spinorFloat o10_re
VOLATILE spinorFloat o11_re
VOLATILE spinorFloat o21_re
READ_SPINOR(SPINORTEX, param.sp_stride, sp_idx, sp_idx)
VOLATILE spinorFloat o11_im
VOLATILE spinorFloat o01_im
VOLATILE spinorFloat o31_re
VOLATILE spinorFloat o20_re
VOLATILE spinorFloat o12_im
__constant__ int ghostFace[QUDA_MAX_DIM+1]
VOLATILE spinorFloat o02_im
RECONSTRUCT_GAUGE_MATRIX(7)
VOLATILE spinorFloat o32_re
VOLATILE spinorFloat o00_re
VOLATILE spinorFloat o12_re
VOLATILE spinorFloat o22_re
VOLATILE spinorFloat o10_im
VOLATILE spinorFloat o22_im
WRITE_SPINOR(param.sp_stride)
__constant__ int X4X3X2X1mX3X2X1
VOLATILE spinorFloat o01_re
__constant__ int ga_stride
VOLATILE spinorFloat o30_re
VOLATILE spinorFloat o02_re
VOLATILE spinorFloat o30_im
VOLATILE spinorFloat o00_im
VOLATILE spinorFloat o21_im
VOLATILE spinorFloat o31_im
#define READ_GAUGE_MATRIX
VOLATILE spinorFloat o20_im
VOLATILE spinorFloat o32_im