40 #if (DD_CLOVER==0) // no clover 41 #define DD_NAME_F dslash 42 #elif (DD_CLOVER==1) // clover 44 #define DD_NAME_F cloverDslash 47 #define DSLASH_CLOVER_XPAY 48 #define DD_NAME_F asymCloverDslash 51 #if (DD_DAG==0) // no dagger 54 #define DD_DAG_F Dagger 58 #if (DD_XPAY==0) && defined(DSLASH_CLOVER_XPAY) 63 #if (DD_XPAY==0) // no xpay 66 #define DD_XPAY_F Xpay 78 #if (DD_RECON==0) // reconstruct from 8 reals 82 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE 83 #ifdef DIRECT_ACCESS_LINK 84 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2 86 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2_TEX 87 #endif // DIRECT_ACCESS_LINK 90 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE 91 #ifdef DIRECT_ACCESS_LINK 92 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4 94 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4_TEX 95 #endif // DIRECT_ACCESS_LINK 98 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE 99 #ifdef DIRECT_ACCESS_LINK 100 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4 102 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4_TEX 103 #endif // DIRECT_ACCESS_LINK 105 #elif (DD_RECON==1) // reconstruct from 12 reals 106 #define DD_RECON_F 12 109 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE 110 #ifdef DIRECT_ACCESS_LINK 111 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2 113 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2_TEX 114 #endif // DIRECT_ACCESS_LINK 117 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE 118 #ifdef DIRECT_ACCESS_LINK 119 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4 121 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4_TEX 122 #endif // DIRECT_ACCESS_LINK 125 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE 126 #ifdef DIRECT_ACCESS_LINK 127 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4 129 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4_TEX 130 #endif // DIRECT_ACCESS_LINK 132 #else // no reconstruct, load all components 133 #define DD_RECON_F 18 136 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE 137 #ifdef DIRECT_ACCESS_LINK 138 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2 140 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2_TEX 141 #endif // DIRECT_ACCESS_LINK 144 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE 145 #ifdef DIRECT_ACCESS_LINK 146 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2 148 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2_TEX 149 #endif // DIRECT_ACCESS_LINK 152 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE 153 #ifdef DIRECT_ACCESS_LINK 154 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2 156 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2_TEX 157 #endif //DIRECT_ACCESS_LINK 161 #if (DD_PREC==0) // double-precision fields 163 #define TPROJSCALE param.tProjScale 166 #if (defined DIRECT_ACCESS_LINK) || (defined FERMI_NO_DBLE_TEX) 167 #define GAUGE0TEX param.gauge0 168 #define GAUGE1TEX param.gauge1 170 #ifdef USE_TEXTURE_OBJECTS 171 #define GAUGE0TEX param.gauge0Tex 172 #define GAUGE1TEX param.gauge1Tex 174 #define GAUGE0TEX gauge0TexDouble2 175 #define GAUGE1TEX gauge1TexDouble2 182 #if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX) 183 #define READ_SPINOR READ_SPINOR_DOUBLE 184 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_DOUBLE 185 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP 186 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN 187 #define SPINORTEX param.in 188 #define GHOSTSPINORTEX param.ghost 190 #define READ_SPINOR READ_SPINOR_DOUBLE_TEX 191 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_DOUBLE_TEX 192 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP_TEX 193 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN_TEX 194 #ifdef USE_TEXTURE_OBJECTS 195 #define SPINORTEX param.inTex 196 #define GHOSTSPINORTEX param.ghostTex 198 #define SPINORTEX spinorTexDouble 199 #define GHOSTSPINORTEX ghostSpinorTexDouble 200 #endif // USE_TEXTURE_OBJECTS 202 #if (defined DIRECT_ACCESS_WILSON_INTER) || (defined FERMI_NO_DBLE_TEX) 203 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE 204 #define INTERTEX param.out 206 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE_TEX 207 #ifdef USE_TEXTURE_OBJECTS 208 #define INTERTEX param.outTex 210 #define INTERTEX interTexDouble 213 #define WRITE_SPINOR WRITE_SPINOR_DOUBLE2_STR 214 #define SPINOR_DOUBLE 216 #if (defined DIRECT_ACCESS_WILSON_ACCUM) || (defined FERMI_NO_DBLE_TEX) 217 #define ACCUMTEX param.x 218 #define READ_ACCUM READ_ACCUM_DOUBLE 220 #ifdef USE_TEXTURE_OBJECTS 221 #define ACCUMTEX param.xTex 223 #define ACCUMTEX accumTexDouble 224 #endif // USE_TEXTURE_OBJECTS 225 #define READ_ACCUM READ_ACCUM_DOUBLE_TEX 230 #define SPINOR_HOP 12 233 #if (defined DIRECT_ACCESS_CLOVER) || (defined FERMI_NO_DBLE_TEX) 234 #define CLOVERTEX param.clover 236 #define READ_CLOVER READ_CLOVER2_DOUBLE_STR 238 #define READ_CLOVER READ_CLOVER_DOUBLE_STR 241 #ifdef USE_TEXTURE_OBJECTS 242 #define CLOVERTEX (param.cloverTex) 244 #define CLOVERTEX cloverTexDouble 248 #define READ_CLOVER READ_CLOVER2_DOUBLE_TEX 250 #define READ_CLOVER READ_CLOVER_DOUBLE_TEX 254 #define CLOVER_DOUBLE 256 #elif (DD_PREC==1) // single-precision fields 258 #define TPROJSCALE param.tProjScale_f 261 #ifdef DIRECT_ACCESS_LINK 262 #define GAUGE0TEX param.gauge0 263 #define GAUGE1TEX param.gauge1 265 #ifdef USE_TEXTURE_OBJECTS 266 #define GAUGE0TEX param.gauge0Tex 267 #define GAUGE1TEX param.gauge1Tex 269 #if (DD_RECON_F == 18) 270 #define GAUGE0TEX gauge0TexSingle2 271 #define GAUGE1TEX gauge1TexSingle2 273 #define GAUGE0TEX gauge0TexSingle4 274 #define GAUGE1TEX gauge1TexSingle4 276 #endif // USE_TEXTURE_OBJECTS 281 #ifdef DIRECT_ACCESS_WILSON_SPINOR 282 #define READ_SPINOR READ_SPINOR_SINGLE 283 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_SINGLE 284 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP 285 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN 286 #define SPINORTEX param.in 287 #define GHOSTSPINORTEX param.ghost 289 #define READ_SPINOR READ_SPINOR_SINGLE_TEX 290 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_SINGLE_TEX 291 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP_TEX 292 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN_TEX 293 #ifdef USE_TEXTURE_OBJECTS 294 #define SPINORTEX param.inTex 295 #define GHOSTSPINORTEX param.ghostTex 297 #define SPINORTEX spinorTexSingle 298 #define GHOSTSPINORTEX ghostSpinorTexSingle 299 #endif // USE_TEXTURE_OBJECTS 301 #ifdef DIRECT_ACCESS_WILSON_INTER 302 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE 303 #define INTERTEX param.out 305 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE_TEX 306 #ifdef USE_TEXTURE_OBJECTS 307 #define INTERTEX param.outTex 309 #define INTERTEX interTexSingle 310 #endif // USE_TEXTURE_OBJECTS 312 #define WRITE_SPINOR WRITE_SPINOR_FLOAT4_STR 314 #ifdef DIRECT_ACCESS_WILSON_ACCUM 315 #define ACCUMTEX param.x 316 #define READ_ACCUM READ_ACCUM_SINGLE 318 #ifdef USE_TEXTURE_OBJECTS 319 #define ACCUMTEX param.xTex 321 #define ACCUMTEX accumTexSingle 322 #endif // USE_TEXTURE_OBJECTS 323 #define READ_ACCUM READ_ACCUM_SINGLE_TEX 330 #ifdef DIRECT_ACCESS_CLOVER 331 #define CLOVERTEX param.clover 333 #define READ_CLOVER READ_CLOVER2_SINGLE 335 #define READ_CLOVER READ_CLOVER_SINGLE 338 #ifdef USE_TEXTURE_OBJECTS 339 #define CLOVERTEX (param.cloverTex) 341 #define CLOVERTEX cloverTexSingle 344 #define READ_CLOVER READ_CLOVER2_SINGLE_TEX 346 #define READ_CLOVER READ_CLOVER_SINGLE_TEX 351 #else // half-precision fields 353 #define TPROJSCALE param.tProjScale_f 356 #ifdef DIRECT_ACCESS_LINK 357 #define GAUGE0TEX param.gauge0 358 #define GAUGE1TEX param.gauge1 360 #ifdef USE_TEXTURE_OBJECTS 361 #define GAUGE0TEX param.gauge0Tex 362 #define GAUGE1TEX param.gauge1Tex 364 #if (DD_RECON_F == 18) 365 #define GAUGE0TEX gauge0TexHalf2 366 #define GAUGE1TEX gauge1TexHalf2 368 #define GAUGE0TEX gauge0TexHalf4 369 #define GAUGE1TEX gauge1TexHalf4 371 #endif // USE_TEXTURE_OBJECTS 376 #ifdef DIRECT_ACCESS_WILSON_SPINOR 377 #define READ_SPINOR READ_SPINOR_HALF 378 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_HALF 379 #define READ_SPINOR_UP READ_SPINOR_HALF_UP 380 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN 381 #define SPINORTEX param.in 382 #define GHOSTSPINORTEX param.ghost 384 #define READ_SPINOR READ_SPINOR_HALF_TEX 385 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_HALF_TEX 386 #define READ_SPINOR_UP READ_SPINOR_HALF_UP_TEX 387 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN_TEX 388 #ifdef USE_TEXTURE_OBJECTS 389 #define SPINORTEX param.inTex 390 #define GHOSTSPINORTEX param.ghostTex 392 #define SPINORTEX spinorTexHalf 393 #define GHOSTSPINORTEX ghostSpinorTexHalf 394 #endif // USE_TEXTURE_OBJECTS 396 #ifdef DIRECT_ACCESS_WILSON_INTER 397 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF 398 #define INTERTEX param.out 400 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF_TEX 401 #ifdef USE_TEXTURE_OBJECTS 402 #define INTERTEX param.outTex 404 #define INTERTEX interTexHalf 405 #endif // USE_TEXTURE_OBJECTS 407 #define WRITE_SPINOR WRITE_SPINOR_SHORT4_STR 409 #ifdef DIRECT_ACCESS_WILSON_ACCUM 410 #define ACCUMTEX param.x 411 #define READ_ACCUM READ_ACCUM_HALF 413 #ifdef USE_TEXTURE_OBJECTS 414 #define ACCUMTEX param.xTex 416 #define ACCUMTEX accumTexHalf 417 #endif // USE_TEXTURE_OBJECTS 418 #define READ_ACCUM READ_ACCUM_HALF_TEX 425 #ifdef DIRECT_ACCESS_CLOVER 426 #define CLOVERTEX param.clover 427 #ifdef (DD_CLOVER==2) 428 #define READ_CLOVER READ_CLOVER2_HALF 430 #define READ_CLOVER READ_CLOVER_HALF 432 #define CLOVERTEXNORM (param.cloverNorm) 434 #ifdef USE_TEXTURE_OBJECTS 435 #define CLOVERTEX (param.cloverTex) 436 #define CLOVERTEXNORM (param.cloverNormTex) 438 #define CLOVERTEX cloverTexHalf 439 #define CLOVERTEXNORM cloverTexNorm 442 #define READ_CLOVER READ_CLOVER2_HALF_TEX 444 #define READ_CLOVER READ_CLOVER_HALF_TEX 450 #define DD_CONCAT(n,p,r,d,x) n ## p ## r ## d ## x ## Kernel 451 #define DD_FUNC(n,p,r,d,x) DD_CONCAT(n,p,r,d,x) 455 template <KernelType kernel_type>
459 #if ((DD_CLOVER==0 && defined(GPU_WILSON_DIRAC)) || ((DD_CLOVER==1 || DD_CLOVER==2) && defined(GPU_CLOVER_DIRAC))) 461 #ifdef SHARED_WILSON_DSLASH // Fermi optimal code 463 #ifdef DSLASH_CLOVER_XPAY 481 #else // no shared-memory blocking 483 #ifdef DSLASH_CLOVER_XPAY 501 #endif // SHARED_WILSON_DSLASH 514 #if ((DD_CLOVER==0 && defined(GPU_WILSON_DIRAC)) || ((DD_CLOVER==1 || DD_CLOVER==2) && defined(GPU_CLOVER_DIRAC))) 516 #ifdef SHARED_WILSON_DSLASH // Fermi optimal code 518 #ifdef DSLASH_CLOVER_XPAY 536 #else // no shared-memory blocking 538 #ifdef DSLASH_CLOVER_XPAY 556 #endif // SHARED_WILSON_DSLASH 575 #undef READ_GAUGE_MATRIX 576 #undef RECONSTRUCT_GAUGE_MATRIX 580 #undef READ_SPINOR_GHOST 581 #undef READ_SPINOR_UP 582 #undef READ_SPINOR_DOWN 584 #undef GHOSTSPINORTEX 585 #undef READ_INTERMEDIATE_SPINOR 593 #undef DSLASH_CLOVER_XPAY
#define DD_FUNC(n, p, r, d, x)