3 #define DSLASH_SHARED_FLOATS_PER_THREAD 0 6 #if ((CUDA_VERSION >= 4010) && (__COMPUTE_CAPABILITY__ >= 200)) // NVVM compiler 8 #else // Open64 compiler 9 #define VOLATILE volatile 13 #define spinorFloat double 38 #define acc00_re accum0.x 39 #define acc00_im accum0.y 40 #define acc01_re accum1.x 41 #define acc01_im accum1.y 42 #define acc02_re accum2.x 43 #define acc02_im accum2.y 44 #define acc10_re accum3.x 45 #define acc10_im accum3.y 46 #define acc11_re accum4.x 47 #define acc11_im accum4.y 48 #define acc12_re accum5.x 49 #define acc12_im accum5.y 50 #define acc20_re accum6.x 51 #define acc20_im accum6.y 52 #define acc21_re accum7.x 53 #define acc21_im accum7.y 54 #define acc22_re accum8.x 55 #define acc22_im accum8.y 56 #define acc30_re accum9.x 57 #define acc30_im accum9.y 58 #define acc31_re accum10.x 59 #define acc31_im accum10.y 60 #define acc32_re accum11.x 61 #define acc32_im accum11.y 63 #define spinorFloat float 88 #define acc00_re accum0.x 89 #define acc00_im accum0.y 90 #define acc01_re accum0.z 91 #define acc01_im accum0.w 92 #define acc02_re accum1.x 93 #define acc02_im accum1.y 94 #define acc10_re accum1.z 95 #define acc10_im accum1.w 96 #define acc11_re accum2.x 97 #define acc11_im accum2.y 98 #define acc12_re accum2.z 99 #define acc12_im accum2.w 100 #define acc20_re accum3.x 101 #define acc20_im accum3.y 102 #define acc21_re accum3.z 103 #define acc21_im accum3.w 104 #define acc22_re accum4.x 105 #define acc22_im accum4.y 106 #define acc30_re accum4.z 107 #define acc30_im accum4.w 108 #define acc31_re accum5.x 109 #define acc31_im accum5.y 110 #define acc32_re accum5.z 111 #define acc32_im accum5.w 112 #endif // SPINOR_DOUBLE 155 #endif // GAUGE_DOUBLE 158 #define gT00_re (+g00_re) 159 #define gT00_im (-g00_im) 160 #define gT01_re (+g10_re) 161 #define gT01_im (-g10_im) 162 #define gT02_re (+g20_re) 163 #define gT02_im (-g20_im) 164 #define gT10_re (+g01_re) 165 #define gT10_im (-g01_im) 166 #define gT11_re (+g11_re) 167 #define gT11_im (-g11_im) 168 #define gT12_re (+g21_re) 169 #define gT12_im (-g21_im) 170 #define gT20_re (+g02_re) 171 #define gT20_im (-g02_im) 172 #define gT21_re (+g12_re) 173 #define gT21_im (-g12_im) 174 #define gT22_re (+g22_re) 175 #define gT22_im (-g22_im) 282 face_idx +
param.ghostOffset[
static_cast<int>(kernel_type)][1];
283 #if (DD_PREC==2) // half precision 284 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][1];
304 #ifdef TWIST_INV_DSLASH 314 #ifdef TWIST_INV_DSLASH 335 const int sp_stride_pad =
param.dc.ghostFace[
static_cast<int>(kernel_type)];
488 face_idx +
param.ghostOffset[static_cast<int>(kernel_type)][0];
489 #if (DD_PREC==2) // half precision 490 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][0];
514 #ifdef TWIST_INV_DSLASH 524 #ifdef TWIST_INV_DSLASH 545 const int sp_stride_pad =
param.dc.ghostFace[
static_cast<int>(kernel_type)];
698 face_idx +
param.ghostOffset[static_cast<int>(kernel_type)][1];
699 #if (DD_PREC==2) // half precision 700 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][1];
720 #ifdef TWIST_INV_DSLASH 730 #ifdef TWIST_INV_DSLASH 751 const int sp_stride_pad =
param.dc.ghostFace[
static_cast<int>(kernel_type)];
904 face_idx +
param.ghostOffset[
static_cast<int>(kernel_type)][0];
905 #if (DD_PREC==2) // half precision 906 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][0];
930 #ifdef TWIST_INV_DSLASH 940 #ifdef TWIST_INV_DSLASH 961 const int sp_stride_pad =
param.dc.ghostFace[
static_cast<int>(kernel_type)];
1114 face_idx +
param.ghostOffset[static_cast<int>(kernel_type)][1];
1115 #if (DD_PREC==2) // half precision 1116 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][1];
1136 #ifdef TWIST_INV_DSLASH 1137 #ifdef SPINOR_DOUBLE 1146 #ifdef TWIST_INV_DSLASH 1167 const int sp_stride_pad =
param.dc.ghostFace[
static_cast<int>(kernel_type)];
1320 face_idx +
param.ghostOffset[
static_cast<int>(kernel_type)][0];
1321 #if (DD_PREC==2) // half precision 1322 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][0];
1346 #ifdef TWIST_INV_DSLASH 1347 #ifdef SPINOR_DOUBLE 1356 #ifdef TWIST_INV_DSLASH 1377 const int sp_stride_pad =
param.dc.ghostFace[
static_cast<int>(kernel_type)];
1530 face_idx +
param.ghostOffset[static_cast<int>(kernel_type)][1];
1531 #if (DD_PREC==2) // half precision 1532 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][1];
1554 #ifdef TWIST_INV_DSLASH 1555 #ifdef SPINOR_DOUBLE 1563 #ifndef TWIST_INV_DSLASH 1587 const int sp_stride_pad =
param.dc.ghostFace[
static_cast<int>(kernel_type)];
1639 #ifdef TWIST_INV_DSLASH 1640 #ifdef SPINOR_DOUBLE 1648 #ifndef TWIST_INV_DSLASH 1672 const int sp_stride_pad =
param.dc.ghostFace[
static_cast<int>(kernel_type)];
1815 face_idx +
param.ghostOffset[
static_cast<int>(kernel_type)][0];
1816 #if (DD_PREC==2) // half precision 1817 const int sp_norm_idx =
face_idx +
param.ghostNormOffset[
static_cast<int>(kernel_type)][0];
1843 #ifdef TWIST_INV_DSLASH 1844 #ifdef SPINOR_DOUBLE 1852 #ifndef TWIST_INV_DSLASH 1876 const int sp_stride_pad =
param.dc.ghostFace[
static_cast<int>(kernel_type)];
1928 #ifdef TWIST_INV_DSLASH 1929 #ifdef SPINOR_DOUBLE 1937 #ifndef TWIST_INV_DSLASH 1961 const int sp_stride_pad =
param.dc.ghostFace[
static_cast<int>(kernel_type)];
2095 switch(kernel_type) {
2109 #ifndef TWIST_INV_DSLASH 2110 #ifdef SPINOR_DOUBLE 2119 READ_ACCUM(ACCUMTEX,
param.sp_stride)
2122 #ifndef TWIST_INV_DSLASH 2180 #ifndef TWIST_INV_DSLASH
VOLATILE spinorFloat o30_re
VOLATILE spinorFloat o21_im
READ_GAUGE_MATRIX(G, GAUGE0TEX, 0, ga_idx, param.gauge_stride)
VOLATILE spinorFloat o21_re
#define APPLY_TWIST(a, reg)
READ_SPINOR_UP(SPINORTEX, param.sp_stride, sp_idx, sp_idx)
VOLATILE spinorFloat o00_re
VOLATILE spinorFloat o31_re
VOLATILE spinorFloat o01_im
VOLATILE spinorFloat o22_im
VOLATILE spinorFloat o11_re
VOLATILE spinorFloat o32_re
VOLATILE spinorFloat o12_re
VOLATILE spinorFloat o02_re
#define READ_INTERMEDIATE_SPINOR
VOLATILE spinorFloat o22_re
VOLATILE spinorFloat o10_im
VOLATILE spinorFloat o10_re
VOLATILE spinorFloat o20_re
coordsFromIndex< 4, QUDA_4D_PC, EVEN_X >(X, coord, sid, param)
#define READ_SPINOR_GHOST
VOLATILE spinorFloat o20_im
VOLATILE spinorFloat o11_im
READ_SPINOR(SPINORTEX, param.sp_stride, sp_idx, sp_idx)
VOLATILE spinorFloat o30_im
READ_SPINOR_DOWN(SPINORTEX, param.sp_stride, sp_idx, sp_idx)
VOLATILE spinorFloat o02_im
VOLATILE spinorFloat o31_im
VOLATILE spinorFloat o01_re
VOLATILE spinorFloat o00_im
VOLATILE spinorFloat o32_im
VOLATILE spinorFloat o12_im
WRITE_SPINOR(param.sp_stride)
RECONSTRUCT_GAUGE_MATRIX(0)