3 #define DSLASH_SHARED_FLOATS_PER_THREAD 0
7 #if (CUDA_VERSION >= 4010)
10 #define VOLATILE volatile
14 #define spinorFloat double
40 #define mdwf_b5 mdwf_b5_d
41 #define mdwf_c5 mdwf_c5_d
43 #define spinorFloat float
69 #define mdwf_b5 mdwf_b5_f
70 #define mdwf_c5 mdwf_c5_f
71 #endif // SPINOR_DOUBLE
100 #if (__COMPUTE_CAPABILITY__ >= 200)
101 #define SHARED_STRIDE 16 // to avoid bank conflicts on Fermi
103 #define SHARED_STRIDE 8 // to avoid bank conflicts on G80 and GT200
106 #if (__COMPUTE_CAPABILITY__ >= 200)
107 #define SHARED_STRIDE 32 // to avoid bank conflicts on Fermi
109 #define SHARED_STRIDE 16 // to avoid bank conflicts on G80 and GT200
114 int sid = ((blockIdx.y*blockDim.y + threadIdx.y)*gridDim.x + blockIdx.x)*blockDim.x + threadIdx.x;
126 X = 2*
sid + (boundaryCrossing +
param.parity) % 2;
144 #ifdef MDWF_mode // Check whether MDWF option is enabled
148 #endif // select MDWF mode
158 int base_idx =
sid%
Vh;
172 factorR = ( xs >
s ? -inv_d_n*
pow(kappa,
param.Ls-xs+
s)*mferm : inv_d_n*
pow(kappa,
s-xs))/2.0;
174 sp_idx = base_idx +
s*
Vh;
203 factorL = ( xs <
s ? -inv_d_n*
pow(kappa,
param.Ls-
s+xs)*mferm : inv_d_n*
pow(kappa,xs-
s))/2.0;
235 READ_ACCUM(ACCUMTEX,
param.sp_stride)
237 o00_re = a*o00_re + accum0.x;
262 o00_re = a*o00_re + accum0.x;
286 #endif // SPINOR_DOUBLE
287 #endif // DSLASH_XPAY
VOLATILE spinorFloat o21_re
VOLATILE spinorFloat o12_im
VOLATILE spinorFloat o20_re
VOLATILE spinorFloat o32_re
VOLATILE spinorFloat o22_im
VOLATILE spinorFloat o11_im
VOLATILE spinorFloat o02_re
VOLATILE spinorFloat o12_re
VOLATILE spinorFloat kappa
VOLATILE spinorFloat o01_re
VOLATILE spinorFloat o31_im
VOLATILE spinorFloat o10_im
VOLATILE spinorFloat o02_im
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
VOLATILE spinorFloat o30_re
VOLATILE spinorFloat o01_im
VOLATILE spinorFloat o11_re
VOLATILE spinorFloat o00_re
VOLATILE spinorFloat o32_im
VOLATILE spinorFloat o31_re
VOLATILE spinorFloat o30_im
VOLATILE spinorFloat o00_im
VOLATILE spinorFloat o20_im
VOLATILE spinorFloat o21_im
VOLATILE spinorFloat o22_re
VOLATILE spinorFloat o10_re