11 #define DD_FAT_RECON 8 12 #define DD_LONG_RECON 8 19 #define DD_FNAME improvedStaggeredDslash 21 #define DD_FNAME staggeredDslash 24 #if (DD_AXPY==0) // no axpy 27 #define DD_AXPY_F Axpy 32 #define DD_FAT_RECON_F 8 33 #elif (DD_FAT_RECON==9) 34 #define DD_FAT_RECON_F 9 35 #elif (DD_FAT_RECON==12) 36 #define DD_FAT_RECON_F 12 37 #elif (DD_FAT_RECON==13) 38 #define DD_FAT_RECON_F 13 40 #define DD_FAT_RECON_F 18 43 #define READ_LONG_PHASE(phase, dir, idx, stride) // May be a problem below with redefinitions 45 #if (DD_LONG_RECON==8) // reconstruct from 8 reals 46 #define DD_LONG_RECON_F 8 48 #if (DD_PREC==0) // DOUBLE PRECISION 50 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_DOUBLE 53 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_DOUBLE 54 #elif (DD_FAT_RECON==12) 55 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_DOUBLE 57 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign) 60 #ifdef DIRECT_ACCESS_FAT_LINK 62 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(FAT, gauge, dir, idx, stride) 63 #elif (DD_FAT_RECON==12) 64 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(FAT, gauge, dir, idx, stride) 66 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(FAT, gauge, dir, idx, stride) 68 #else // texture access 70 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(FAT, gauge, dir, idx, stride) 71 #elif (DD_FAT_RECON==12) 72 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(FAT, gauge, dir, idx, stride) 74 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(FAT, gauge, dir, idx, stride) 75 #endif // DD_FAT_RECON 76 #endif // DIRECT_ACCESS_FAT_LINK 78 #ifdef DIRECT_ACCESS_LONG_LINK 79 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(LONG, gauge, dir, idx, stride) 81 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(LONG, gauge, dir, idx, stride) 82 #endif // DIRECT_ACCESS_LONG_LINK 84 #elif (DD_PREC==1) // SINGLE PRECISION 86 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE 88 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE 89 #elif (DD_FAT_RECON==12) 90 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE 92 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign) 93 #endif // DD_FAT_RECON 95 #ifdef DIRECT_ACCESS_FAT_LINK 97 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(FAT, gauge, dir, idx, stride) 98 #elif (DD_FAT_RECON==12) 99 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(FAT, gauge, dir, idx, stride) 101 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(FAT, gauge, dir, idx, stride) 102 #endif // DD_FAT_RECON 104 #if (DD_FAT_RECON==8) 105 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(FAT, gauge, dir, idx, stride) 106 #elif (DD_FAT_RECON==12) 107 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(FAT, gauge, dir, idx, stride) 109 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(FAT, gauge, dir, idx, stride) 110 #endif // DD_FAT_RECON 111 #endif // DIRECT_ACCESS_FAT_LINK 113 #ifdef DIRECT_ACCESS_LONG_LINK 114 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(LONG, gauge, dir, idx, stride) 116 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(LONG, gauge, dir, idx, stride) 117 #endif // DIRECT_ACCESS_LONG_LINK 119 #else // HALF PRECISION 121 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE 122 #if (DD_FAT_RECON==8) 123 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE 124 #elif (DD_FAT_RECON==12) 125 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE 127 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign) 128 #endif //DD_FAT_RECON 130 #ifdef DIRECT_ACCESS_FAT_LINK 131 #if (DD_FAT_RECON==8) 132 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 133 #elif (DD_FAT_RECON==12) 134 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 135 #elif (DD_FAT_RECON==18) 136 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max); 139 #if (DD_FAT_RECON==8) 140 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 141 #elif (DD_FAT_RECON==12) 142 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 144 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max); 146 #endif // DIRECT_ACCESS_FAT_LINK 147 #ifdef DIRECT_ACCESS_LONG_LINK 148 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4(LONG, gauge, dir, idx, stride) 150 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(LONG, gauge, dir, idx, stride) 151 #endif // DIRECT_ACCESS_LONG_LINK 155 #elif (DD_LONG_RECON == 9) // reconstruct from 9 reals 157 #define DD_LONG_RECON_F 9 159 #if (DD_PREC==0) // DOUBLE PRECISION 161 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_9_DOUBLE 162 #if (DD_FAT_RECON==8) 163 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_DOUBLE 164 #elif (DD_FAT_RECON==12) 165 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_DOUBLE 167 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign) 170 #ifdef DIRECT_ACCESS_FAT_LINK 171 #if (DD_FAT_RECON==8) 172 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(FAT, gauge, dir, idx, stride) 173 #elif (DD_FAT_RECON==12) 174 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(FAT, gauge, dir, idx, stride) 176 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(FAT, gauge, dir, idx, stride) 178 #else // texture access 179 #if (DD_FAT_RECON==8) 180 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(FAT, gauge, dir, idx, stride) 181 #elif (DD_FAT_RECON==12) 182 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(FAT, gauge, dir, idx, stride) 184 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(FAT, gauge, dir, idx, stride) 185 #endif // DD_FAT_RECON 186 #endif // DIRECT_ACCESS_FAT_LINK 187 #undef READ_LONG_PHASE 189 #ifdef DIRECT_ACCESS_LONG_LINK 190 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(LONG, gauge, dir, idx, stride) 191 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_DOUBLE(PHASE, phase, dir, idx, stride); 193 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(LONG, gauge, dir, idx, stride) 194 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_DOUBLE_TEX(PHASE, phase, dir, idx, stride); 195 #endif // DIRECT_ACCESS_LONG_LINK 197 #elif (DD_PREC==1) // SINGLE PRECISION 199 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_9_SINGLE 200 #if (DD_FAT_RECON==8) 201 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE 202 #elif (DD_FAT_RECON==12) 203 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE 205 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign) 208 #ifdef DIRECT_ACCESS_FAT_LINK 209 #if (DD_FAT_RECON==8) 210 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(FAT, gauge, dir, idx, stride) 211 #elif (DD_FAT_RECON==12) 212 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(FAT, gauge, dir, idx, stride) 214 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(FAT, gauge, dir, idx, stride) 217 #if (DD_FAT_RECON==8) 218 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(FAT, gauge, dir, idx, stride) 219 #elif (DD_FAT_RECON==12) 220 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(FAT, gauge, dir, idx, stride) 222 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(FAT, gauge, dir, idx, stride) 224 #endif // DIRECT_ACCESS_FAT_LINK 225 #undef READ_LONG_PHASE 227 #ifdef DIRECT_ACCESS_LONG_LINK 228 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(LONG, gauge, dir, idx, stride) 229 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_FLOAT(PHASE, phase, dir, idx, stride); 231 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(LONG, gauge, dir, idx, stride) 232 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_FLOAT_TEX(PHASE, phase, dir, idx, stride); 233 #endif // DIRECT_ACCESS_LONG_LINK 235 #else // HALF PRECISION 237 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_9_SINGLE 238 #if (DD_FAT_RECON==8) 239 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE 240 #elif (DD_FAT_RECON==12) 241 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE 243 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign) 246 #ifdef DIRECT_ACCESS_FAT_LINK 247 #if (DD_FAT_RECON==8) 248 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 249 #elif (DD_FAT_RECON==12) 250 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 251 #elif (DD_FAT_RECON==18) 252 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max); 255 #if (DD_FAT_RECON==8) 256 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 257 #elif (DD_FAT_RECON==12) 258 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 260 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max); 262 #endif // DIRECT_ACCESS_FAT_LINK 263 #undef READ_LONG_PHASE 264 #ifdef DIRECT_ACCESS_LONG_LINK 265 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4(LONG, gauge, dir, idx, stride) 266 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_SHORT(PHASE, phase, dir, idx, stride); 268 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(LONG, gauge, dir, idx, stride) 269 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_SHORT_TEX(PHASE, phase, dir, idx, stride); 270 #endif // DIRECT_ACCESS_LONG_LINK 274 #elif (DD_LONG_RECON == 12)// reconstruct from 12 reals 276 #define DD_LONG_RECON_F 12 278 #if (DD_PREC==0) // DOUBLE PRECISION 280 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_DOUBLE 281 #if (DD_FAT_RECON==8) 282 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_DOUBLE 283 #elif (DD_FAT_RECON==12) 284 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_DOUBLE 286 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign) 289 #ifdef DIRECT_ACCESS_FAT_LINK 290 #if (DD_FAT_RECON==8) 291 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(FAT, gauge, dir, idx, stride) 292 #elif (DD_FAT_RECON==12) 293 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(FAT, gauge, dir, idx, stride) 295 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(FAT, gauge, dir, idx, stride) 297 #else // texture access 298 #if (DD_FAT_RECON==8) 299 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(FAT, gauge, dir, idx, stride) 300 #elif (DD_FAT_RECON==12) 301 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(FAT, gauge, dir, idx, stride) 303 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(FAT, gauge, dir, idx, stride) 304 #endif // DD_FAT_RECON 305 #endif // DIRECT_ACCESS_FAT_LINK 307 #ifdef DIRECT_ACCESS_LONG_LINK 308 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(LONG, gauge, dir, idx, stride) 310 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(LONG, gauge, dir, idx, stride) 311 #endif // DIRECT_ACCESS_LONG_LINK 313 #elif (DD_PREC==1) // SINGLE PRECISION 315 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE 316 #if (DD_FAT_RECON==8) 317 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE 318 #elif (DD_FAT_RECON==12) 319 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE 321 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign) 324 #ifdef DIRECT_ACCESS_FAT_LINK 325 #if (DD_FAT_RECON==8) 326 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(FAT, gauge, dir, idx, stride) 327 #elif (DD_FAT_RECON==12) 328 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(FAT, gauge, dir, idx, stride) 330 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(FAT, gauge, dir, idx, stride) 333 #if (DD_FAT_RECON==8) 334 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(FAT, gauge, dir, idx, stride) 335 #elif (DD_FAT_RECON==12) 336 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(FAT, gauge, dir, idx, stride) 338 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(FAT, gauge, dir, idx, stride) 340 #endif // DIRECT_ACCESS_FAT_LINK 342 #ifdef DIRECT_ACCESS_LONG_LINK 343 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(LONG, gauge, dir, idx, stride) 345 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(LONG, gauge, dir, idx, stride) 346 #endif // DIRECT_ACCESS_LONG_LINK 348 #else // HALF PRECISION 350 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE 351 #if (DD_FAT_RECON==8) 352 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE 353 #elif (DD_FAT_RECON==12) 354 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE 356 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign) 359 #ifdef DIRECT_ACCESS_FAT_LINK 360 #if (DD_FAT_RECON==8) 361 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 362 #elif (DD_FAT_RECON==12) 363 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 365 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max); 368 #if (DD_FAT_RECON==8) 369 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 370 #elif (DD_FAT_RECON==12) 371 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 373 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max); 375 #endif // DIRECT_ACCESS_FAT_LINK 377 #ifdef DIRECT_ACCESS_LONG_LINK 378 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4(LONG, gauge, dir, idx, stride) 380 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(LONG, gauge, dir, idx, stride) 381 #endif // DIRECT_ACCESS_LONG_LINK 385 #elif (DD_LONG_RECON == 13) 386 #define DD_LONG_RECON_F 13 388 #if (DD_PREC==0) // DOUBLE PRECISION 390 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_13_DOUBLE 391 #if (DD_FAT_RECON==8) 392 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_DOUBLE 393 #elif (DD_FAT_RECON==12) 394 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_DOUBLE 396 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign) 399 #ifdef DIRECT_ACCESS_FAT_LINK 400 #if (DD_FAT_RECON==8) 401 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(FAT, gauge, dir, idx, stride) 402 #elif (DD_FAT_RECON==12) 403 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(FAT, gauge, dir, idx, stride) 405 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(FAT, gauge, dir, idx, stride) 407 #else // texture access 408 #if (DD_FAT_RECON==8) 409 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(FAT, gauge, dir, idx, stride) 410 #elif (DD_FAT_RECON==12) 411 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(FAT, gauge, dir, idx, stride) 413 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(FAT, gauge, dir, idx, stride) 414 #endif // DD_FAT_RECON 415 #endif // DIRECT_ACCESS_FAT_LINK 417 #undef READ_LONG_PHASE 418 #ifdef DIRECT_ACCESS_LONG_LINK 419 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(LONG, gauge, dir, idx, stride) 420 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_DOUBLE(PHASE, phase, dir, idx, stride); 422 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(LONG, gauge, dir, idx, stride) 423 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_DOUBLE_TEX(PHASE, phase, dir, idx, stride); 424 #endif // DIRECT_ACCESS_LONG_LINK 426 #elif (DD_PREC==1) // SINGLE PRECISION 428 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_13_SINGLE 429 #if (DD_FAT_RECON==8) 430 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE 431 #elif (DD_FAT_RECON==12) 432 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE 434 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign) 437 #ifdef DIRECT_ACCESS_FAT_LINK 438 #if (DD_FAT_RECON==8) 439 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(FAT, gauge, dir, idx, stride) 440 #elif (DD_FAT_RECON==12) 441 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(FAT, gauge, dir, idx, stride) 443 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(FAT, gauge, dir, idx, stride) 446 #if (DD_FAT_RECON==8) 447 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(FAT, gauge, dir, idx, stride) 448 #elif (DD_FAT_RECON==12) 449 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(FAT, gauge, dir, idx, stride) 451 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(FAT, gauge, dir, idx, stride) 453 #endif // DIRECT_ACCESS_FAT_LINK 455 #undef READ_LONG_PHASE 456 #ifdef DIRECT_ACCESS_LONG_LINK 457 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(LONG, gauge, dir, idx, stride) 458 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_FLOAT(PHASE, phase, dir, idx, stride); 460 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(LONG, gauge, dir, idx, stride) 461 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_FLOAT_TEX(PHASE, phase, dir, idx, stride); 462 #endif // DIRECT_ACCESS_LONG_LINK 464 #else // HALF PRECISION 466 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_13_SINGLE 467 #if (DD_FAT_RECON==8) 468 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE 469 #elif (DD_FAT_RECON==12) 470 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE 472 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign) 475 #ifdef DIRECT_ACCESS_FAT_LINK 476 #if (DD_FAT_RECON==8) 477 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 478 #elif (DD_FAT_RECON==12) 479 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 481 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max); 484 #if (DD_FAT_RECON==8) 485 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 486 #elif (DD_FAT_RECON==12) 487 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 489 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max); 492 #undef READ_LONG_PHASE 493 #ifdef DIRECT_ACCESS_LONG_LINK 494 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4(LONG, gauge, dir, idx, stride) 495 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_SHORT(PHASE, phase, dir, idx, stride); 497 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(LONG, gauge, dir, idx, stride) 498 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_SHORT_TEX(PHASE, phase, dir, idx, stride); 499 #endif // DIRECT_ACCESS_LONG_LINK 503 #else //18 reconstruct 504 #define DD_LONG_RECON_F 18 505 #define RECONSTRUCT_LONG_GAUGE_MATRIX(dir, gauge, idx, sign) 507 #if (DD_PREC==0) // DOUBLE PRECISION 509 #if (DD_FAT_RECON==8) 510 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_DOUBLE 511 #elif (DD_FAT_RECON==12) 512 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_DOUBLE 514 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign) 517 #ifdef DIRECT_ACCESS_FAT_LINK 518 #if (DD_FAT_RECON==8) 519 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(FAT, gauge, dir, idx, stride) 520 #elif (DD_FAT_RECON==12) 521 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(FAT, gauge, dir, idx, stride) 523 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(FAT, gauge, dir, idx, stride) 525 #else // texture access 526 #if (DD_FAT_RECON==8) 527 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(FAT, gauge, dir, idx, stride) 528 #elif (DD_FAT_RECON==12) 529 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(FAT, gauge, dir, idx, stride) 531 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(FAT, gauge, dir, idx, stride) 532 #endif // DD_FAT_RECON 533 #endif // DIRECT_ACCESS_FAT_LINK 535 #ifdef DIRECT_ACCESS_LONG_LINK 536 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(LONG, gauge, dir, idx, stride) 538 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(LONG, gauge, dir, idx, stride) 539 #endif // DIRECT_ACCESS_LONG_LINK 541 #elif (DD_PREC==1) // SINGLE PRECISION 543 #if (DD_FAT_RECON==8) 544 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE 545 #elif (DD_FAT_RECON==12) 546 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE 548 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign) 551 #ifdef DIRECT_ACCESS_FAT_LINK 552 #if (DD_FAT_RECON==8) 553 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(FAT, gauge, dir, idx, stride) 554 #elif (DD_FAT_RECON==12) 555 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(FAT, gauge, dir, idx, stride) 557 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(FAT, gauge, dir, idx, stride) 560 #if (DD_FAT_RECON==8) 561 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(FAT, gauge, dir, idx, stride) 562 #elif (DD_FAT_RECON==12) 563 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(FAT, gauge, dir, idx, stride) 565 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(FAT, gauge, dir, idx, stride) 567 #endif // DIRECT_ACCESS_FAT_LINK 569 #ifdef DIRECT_ACCESS_LONG_LINK 570 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(LONG, gauge, dir, idx, stride) 572 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(LONG, gauge, dir, idx, stride) 573 #endif // DIRECT_ACCESS_LONG_LINK 575 #else // HALF PRECISION 577 #if (DD_FAT_RECON==8) 578 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE 579 #elif (DD_FAT_RECON==12) 580 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE 582 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign) 585 #ifdef DIRECT_ACCESS_FAT_LINK 587 #if (DD_FAT_RECON==8) 588 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 589 #elif (DD_FAT_RECON==12) 590 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 592 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max); 596 #if (DD_FAT_RECON==8) 597 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 598 #elif (DD_FAT_RECON==12) 599 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max); 601 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max); 605 #ifdef DIRECT_ACCESS_LONG_LINK 606 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2(LONG, gauge, dir, idx, stride) 608 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(LONG, gauge, dir, idx, stride) 609 #endif // DIRECT_ACCESS_LONG_LINK 613 #endif // DD_LONG_RECON 615 #if (DD_PREC==0) // double-precision fields 619 #if (defined DIRECT_ACCESS_FAT_LINK) || (defined FERMI_NO_DBLE_TEX) 620 #define FATLINK0TEX param.gauge0 621 #define FATLINK1TEX param.gauge1 623 #ifdef USE_TEXTURE_OBJECTS 624 #define FATLINK0TEX param.gauge0Tex 625 #define FATLINK1TEX param.gauge1Tex 627 #if (DD_IMPROVED == 1) 628 #define FATLINK0TEX fatGauge0TexDouble 629 #define FATLINK1TEX fatGauge1TexDouble 631 #define FATLINK0TEX gauge0TexDouble2 632 #define FATLINK1TEX gauge1TexDouble2 634 #endif // USE_TEXTURE_OBJECTS 637 #if (defined DIRECT_ACCESS_LONG_LINK) || (defined FERMI_NO_DBLE_TEX) 638 #define LONGLINK0TEX param.longGauge0 639 #define LONGLINK1TEX param.longGauge1 640 #define LONGPHASE0TEX param.longPhase0 641 #define LONGPHASE1TEX param.longPhase1 643 #ifdef USE_TEXTURE_OBJECTS 644 #define LONGLINK0TEX param.longGauge0Tex 645 #define LONGLINK1TEX param.longGauge1Tex 646 #define LONGPHASE0TEX param.longPhase0Tex 647 #define LONGPHASE1TEX param.longPhase1Tex 649 #define LONGLINK0TEX longGauge0TexDouble 650 #define LONGLINK1TEX longGauge1TexDouble 651 #define LONGPHASE0TEX longPhase0TexDouble 652 #define LONGPHASE1TEX longPhase1TexDouble 653 #endif // USE_TEXTURE_OBJECTS 659 #if (defined DIRECT_ACCESS_SPINOR) || (defined FERMI_NO_DBLE_TEX) 660 #define SPINORTEX param.in 661 #define GHOSTSPINORTEX param.ghost 662 #define READ_1ST_NBR_SPINOR READ_1ST_NBR_SPINOR_DOUBLE 663 #define READ_3RD_NBR_SPINOR READ_KS_NBR_SPINOR_DOUBLE 664 #define READ_1ST_NBR_SPINOR_GHOST READ_1ST_NBR_SPINOR_GHOST_DOUBLE 665 #define READ_3RD_NBR_SPINOR_GHOST READ_KS_NBR_SPINOR_GHOST_DOUBLE 667 #ifdef USE_TEXTURE_OBJECTS 668 #define SPINORTEX param.inTex 669 #define GHOSTSPINORTEX param.ghostTex 671 #define SPINORTEX spinorTexDouble 672 #define GHOSTSPINORTEX ghostSpinorTexDouble 673 #endif // USE_TEXTURE_OBJECTS 674 #define READ_1ST_NBR_SPINOR READ_1ST_NBR_SPINOR_DOUBLE_TEX 675 #define READ_3RD_NBR_SPINOR READ_KS_NBR_SPINOR_DOUBLE_TEX 676 #define READ_1ST_NBR_SPINOR_GHOST READ_1ST_NBR_SPINOR_GHOST_DOUBLE_TEX 677 #define READ_3RD_NBR_SPINOR_GHOST READ_KS_NBR_SPINOR_GHOST_DOUBLE_TEX 679 #if (defined DIRECT_ACCESS_INTER) || (defined FERMI_NO_DBLE_TEX) 680 #define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR 681 #define INTERTEX param.out 683 #define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR_DOUBLE_TEX 684 #ifdef USE_TEXTURE_OBJECTS 685 #define INTERTEX param.outTex 687 #define INTERTEX interTexDouble 690 #define WRITE_SPINOR WRITE_ST_SPINOR_DOUBLE2 691 #define SPINOR_DOUBLE 693 #if (defined DIRECT_ACCESS_ACCUM) || (defined FERMI_NO_DBLE_TEX) 694 #define ACCUMTEX param.x 695 #define READ_ACCUM READ_ST_ACCUM_DOUBLE 697 #ifdef USE_TEXTURE_OBJECTS 698 #define ACCUMTEX param.xTex 700 #define ACCUMTEX accumTexDouble 701 #endif // USE_TEXTURE_OBJECTS 702 #define READ_ACCUM READ_ST_ACCUM_DOUBLE_TEX 707 #elif (DD_PREC==1) // single-precision fields 712 #ifndef DIRECT_ACCESS_FAT_LINK 713 #ifdef USE_TEXTURE_OBJECTS 714 #define FATLINK0TEX param.gauge0Tex 715 #define FATLINK1TEX param.gauge1Tex 717 #if (DD_IMPROVED == 1) 718 #define FATLINK0TEX fatGauge0TexSingle 719 #define FATLINK1TEX fatGauge1TexSingle 721 #if (DD_FAT_RECON == 18) 722 #define FATLINK0TEX gauge0TexSingle2 723 #define FATLINK1TEX gauge1TexSingle2 725 #define FATLINK0TEX gauge0TexSingle4 726 #define FATLINK1TEX gauge1TexSingle4 728 #endif // DD_IMPROVED 731 #define FATLINK0TEX param.gauge0 732 #define FATLINK1TEX param.gauge1 735 #ifndef DIRECT_ACCESS_LONG_LINK //longlink access 736 #ifdef USE_TEXTURE_OBJECTS 737 #define LONGLINK0TEX param.longGauge0Tex 738 #define LONGLINK1TEX param.longGauge1Tex 739 #define LONGPHASE0TEX param.longPhase0Tex 740 #define LONGPHASE1TEX param.longPhase1Tex 742 #if (DD_LONG_RECON ==18) 743 #define LONGLINK0TEX longGauge0TexSingle_norecon 744 #define LONGLINK1TEX longGauge1TexSingle_norecon 746 #define LONGLINK0TEX longGauge0TexSingle 747 #define LONGLINK1TEX longGauge1TexSingle 748 #define LONGPHASE0TEX longPhase0TexSingle 749 #define LONGPHASE1TEX longPhase1TexSingle 751 #endif // USE_TEXTURE_OBJECTS 753 #define LONGLINK0TEX param.longGauge0 754 #define LONGLINK1TEX param.longGauge1 755 #define LONGPHASE0TEX param.longPhase0 756 #define LONGPHASE1TEX param.longPhase1 760 #ifndef DIRECT_ACCESS_SPINOR 761 #ifdef USE_TEXTURE_OBJECTS 762 #define SPINORTEX param.inTex 763 #define GHOSTSPINORTEX param.ghostTex 765 #define SPINORTEX spinorTexSingle2 766 #define GHOSTSPINORTEX ghostSpinorTexSingle2 767 #endif // USE_TEXTURE_OBJECTS 768 #define READ_1ST_NBR_SPINOR READ_1ST_NBR_SPINOR_SINGLE_TEX 769 #define READ_3RD_NBR_SPINOR READ_KS_NBR_SPINOR_SINGLE_TEX 770 #define READ_1ST_NBR_SPINOR_GHOST READ_1ST_NBR_SPINOR_GHOST_SINGLE_TEX 771 #define READ_3RD_NBR_SPINOR_GHOST READ_KS_NBR_SPINOR_GHOST_SINGLE_TEX 773 #define SPINORTEX param.in 774 #define GHOSTSPINORTEX param.ghost 775 #define READ_1ST_NBR_SPINOR READ_1ST_NBR_SPINOR_SINGLE 776 #define READ_3RD_NBR_SPINOR READ_KS_NBR_SPINOR_SINGLE 777 #define READ_1ST_NBR_SPINOR_GHOST READ_1ST_NBR_SPINOR_GHOST_SINGLE 778 #define READ_3RD_NBR_SPINOR_GHOST READ_KS_NBR_SPINOR_GHOST_SINGLE 780 #if (defined DIRECT_ACCESS_INTER) 781 #define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR 782 #define INTERTEX param.out 784 #define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR_SINGLE_TEX 785 #ifdef USE_TEXTURE_OBJECTS 786 #define INTERTEX param.outTex 788 #define INTERTEX interTexSingle2 789 #endif // USE_TEXTURE_OBJECTS 791 #define WRITE_SPINOR WRITE_ST_SPINOR_FLOAT2 793 #if (defined DIRECT_ACCESS_ACCUM) 794 #define ACCUMTEX param.x 795 #define READ_ACCUM READ_ST_ACCUM_SINGLE 797 #ifdef USE_TEXTURE_OBJECTS 798 #define ACCUMTEX param.xTex 800 #define ACCUMTEX accumTexSingle2 801 #endif // USE_TEXTURE_OBJECTS 802 #define READ_ACCUM READ_ST_ACCUM_SINGLE_TEX 807 #else // half-precision fields 812 #ifndef DIRECT_ACCESS_FAT_LINK 813 #ifdef USE_TEXTURE_OBJECTS 814 #define FATLINK0TEX param.gauge0Tex 815 #define FATLINK1TEX param.gauge1Tex 817 #if (DD_IMPROVED == 1) 818 #define FATLINK0TEX fatGauge0TexHalf 819 #define FATLINK1TEX fatGauge1TexHalf 821 #if (DD_FAT_RECON == 18) 822 #define FATLINK0TEX gauge0TexHalf2 823 #define FATLINK1TEX gauge1TexHalf2 825 #define FATLINK0TEX gauge0TexHalf4 826 #define FATLINK1TEX gauge1TexHalf4 828 #endif // DD_IMPROVED 829 #endif // USE_TEXTURE_OBJECTS 830 #else // DIRECT_ACCESS_FAT_LINK 831 #define FATLINK0TEX param.gauge0 832 #define FATLINK1TEX param.gauge1 835 #ifndef DIRECT_ACCESS_LONG_LINK 836 #ifdef USE_TEXTURE_OBJECTS 837 #define LONGLINK0TEX param.longGauge0Tex 838 #define LONGLINK1TEX param.longGauge1Tex 839 #define LONGPHASE0TEX param.longPhase0Tex 840 #define LONGPHASE1TEX param.longPhase1Tex 842 #if (DD_LONG_RECON ==18) 843 #define LONGLINK0TEX longGauge0TexHalf_norecon 844 #define LONGLINK1TEX longGauge1TexHalf_norecon 846 #define LONGLINK0TEX longGauge0TexHalf 847 #define LONGLINK1TEX longGauge1TexHalf 848 #define LONGPHASE0TEX longPhase0TexHalf 849 #define LONGPHASE1TEX longPhase1TexHalf 851 #endif // USE_TEXTURE_OBJECTS 852 #else // DIRECT_ACCESS_LONG_LINK 853 #define LONGLINK0TEX param.longGauge0 854 #define LONGLINK1TEX param.longGauge1 855 #define LONGPHASE0TEX param.longPhase0 856 #define LONGPHASE1TEX param.longPhase1 859 #define READ_1ST_NBR_SPINOR READ_1ST_NBR_SPINOR_HALF_TEX 860 #define READ_3RD_NBR_SPINOR READ_KS_NBR_SPINOR_HALF_TEX 861 #define READ_1ST_NBR_SPINOR_GHOST READ_1ST_NBR_SPINOR_GHOST_HALF_TEX 862 #define READ_3RD_NBR_SPINOR_GHOST READ_KS_NBR_SPINOR_GHOST_HALF_TEX 863 #ifdef USE_TEXTURE_OBJECTS 864 #define SPINORTEX param.inTex 865 #define GHOSTSPINORTEX param.ghostTex 867 #define SPINORTEX spinorTexHalf2 868 #define GHOSTSPINORTEX ghostSpinorTexHalf2 869 #endif // USE_TEXTURE_OBJECTS 870 #if (defined DIRECT_ACCESS_INTER) 871 #define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR_HALF 872 #define INTERTEX param.out 874 #define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR_HALF_TEX 875 #ifdef USE_TEXTURE_OBJECTS 876 #define INTERTEX param.outTex 878 #define INTERTEX interTexHalf2 879 #endif // USE_TEXTURE_OBJECTS 881 #define WRITE_SPINOR WRITE_ST_SPINOR_SHORT2 883 #ifdef USE_TEXTURE_OBJECTS 884 #define ACCUMTEX param.xTex 886 #define ACCUMTEX accumTexHalf2 887 #endif // USE_TEXTURE_OBJECTS 888 #define READ_ACCUM READ_ST_ACCUM_HALF_TEX 893 #ifdef GPU_STAGGERED_DIRAC 899 #define DD_CONCAT(n,p,r1,r2,x) n ## p ## r1 ## r2 ## x ## Kernel 900 #define DD_FUNC(n,p,r1,r2,x) DD_CONCAT(n,p,r1,r2,x) 902 template <KernelType kernel_type>
904 #if defined(GPU_STAGGERED_DIRAC) && DD_FAT_RECON == 18 // improved staggered only supports no reconstruct fat-links 912 #if defined(GPU_STAGGERED_DIRAC) && DD_FAT_RECON == 18 // improved staggered only supports no reconstruct fat-links 919 #else // naive staggered kernel 921 #undef READ_LONG_MATRIX 922 #define READ_LONG_MATRIX(gauge, dir, idx, stride) 924 #undef READ_LONG_PHASE 925 #define READ_LONG_PHASE(phase, dir, idx, stride) 927 #define DD_CONCAT(n,p,r,x) n ## p ## r ## x ## Kernel 928 #define DD_FUNC(n,p,r,x) DD_CONCAT(n,p,r,x) 930 #if (DD_LONG_RECON == 18) // avoid kernel aliasing over non-existant long-links 932 template <KernelType kernel_type>
934 #if defined(GPU_STAGGERED_DIRAC) && DD_FAT_RECON != 9 && DD_FAT_RECON != 13 942 #if defined(GPU_STAGGERED_DIRAC) && DD_FAT_RECON != 9 && DD_FAT_RECON != 13 953 #endif // ! GPU_STAGGERED_DIRAC 958 #undef DD_FAT_RECON_F 959 #undef DD_LONG_RECON_F 966 #undef READ_GAUGE_MATRIX 967 #undef RECONSTRUCT_FAT_GAUGE_MATRIX 968 #undef RECONSTRUCT_LONG_GAUGE_MATRIX 976 #undef GHOSTSPINORTEX 978 #undef READ_AND_SUM_SPINOR 988 #undef READ_FAT_MATRIX 989 #undef READ_LONG_MATRIX 990 #undef READ_LONG_PHASE 991 #undef READ_1ST_NBR_SPINOR 992 #undef READ_3RD_NBR_SPINOR 993 #undef READ_1ST_NBR_SPINOR_GHOST 994 #undef READ_3RD_NBR_SPINOR_GHOST 1006 #if (DD_LONG_RECON==8) 1007 #undef DD_LONG_RECON 1008 #define DD_LONG_RECON 9 1009 #elif (DD_LONG_RECON==9) 1010 #undef DD_LONG_RECON 1011 #define DD_LONG_RECON 12 1012 #elif (DD_LONG_RECON==12) 1013 #undef DD_LONG_RECON 1014 #define DD_LONG_RECON 13 1015 #elif (DD_LONG_RECON==13) 1016 #undef DD_LONG_RECON 1017 #define DD_LONG_RECON 18 1019 #undef DD_LONG_RECON 1021 #define DD_LONG_RECON 8 1023 #if (DD_FAT_RECON==8) 1025 #define DD_FAT_RECON 9 // dummy 1026 #elif (DD_FAT_RECON==9) 1028 #define DD_FAT_RECON 12 1029 #elif (DD_FAT_RECON==12) 1031 #define DD_FAT_RECON 13 //dummy 1032 #elif (DD_FAT_RECON==13) 1034 #define DD_FAT_RECON 18 1038 #define DD_FAT_RECON 8 1052 #undef DD_LONG_RECON 1056 #endif // DD_FAT_RECON 1057 #endif // DD_LONG_RECON
#define DD_FUNC(n, p, r, d, x)