QUDA v0.4.0
A library for QCD on GPUs
quda/lib/wilson_dslash_def.h
Go to the documentation of this file.
00001 // wilson_dslash_def.h - Dslash kernel definitions
00002 
00003 // There are currently 72 different variants of the Wilson Dslash
00004 // kernel, each one characterized by a set of 5 options, where each
00005 // option can take one of several values (2*3*2*2*3 = 72).  This file
00006 // is structured so that the C preprocessor loops through all 72
00007 // variants (in a manner resembling a counter), sets the appropriate
00008 // macros, and defines the corresponding functions.
00009 //
00010 // As an example of the function naming conventions, consider
00011 //
00012 // cloverDslash12DaggerXpayKernel(float4* out, ...).
00013 //
00014 // This is a clover Dslash^dagger kernel where the result is
00015 // multiplied by "a" and summed with an input vector (Xpay), and the
00016 // gauge matrix is reconstructed from 12 real numbers.  More
00017 // generally, each function name is given by the concatenation of the
00018 // following 4 fields, with "Kernel" at the end:
00019 //
00020 // DD_NAME_F = dslash, cloverDslash
00021 // DD_RECON_F = 8, 12, 18
00022 // DD_DAG_F = Dagger, [blank]
00023 // DD_XPAY_F = Xpay, [blank]
00024 //
00025 // In addition, the kernels are templated on the precision of the
00026 // fields (double, single, or half).
00027 
00028 // initialize on first iteration
00029 
00030 #ifndef DD_LOOP
00031 #define DD_LOOP
00032 #define DD_DAG 0
00033 #define DD_XPAY 0
00034 #define DD_RECON 0
00035 #define DD_PREC 0
00036 #define DD_CLOVER 0
00037 #endif
00038 
00039 // set options for current iteration
00040 
00041 #if (DD_CLOVER==0) // no clover
00042 #define DD_NAME_F dslash
00043 #else              // clover
00044 #define DSLASH_CLOVER
00045 #define DD_NAME_F cloverDslash
00046 #endif
00047 
00048 #if (DD_DAG==0) // no dagger
00049 #define DD_DAG_F
00050 #else           // dagger
00051 #define DD_DAG_F Dagger
00052 #endif
00053 
00054 #if (DD_XPAY==0) // no xpay 
00055 #define DD_XPAY_F 
00056 #else            // xpay
00057 #define DD_XPAY_F Xpay
00058 #define DSLASH_XPAY
00059 #endif
00060 
00061 #if (DD_PREC == 0)
00062 #define DD_PARAM_XPAY const double2 *x, const float *xNorm, const double a,
00063 #elif (DD_PREC == 1) 
00064 #define DD_PARAM_XPAY const float4 *x, const float *xNorm, const float a,
00065 #else
00066 #define DD_PARAM_XPAY const short4 *x, const float *xNorm, const float a,
00067 #endif
00068 
00069 #if (DD_RECON==0) // reconstruct from 8 reals
00070 #define DD_RECON_F 8
00071 
00072 #if (DD_PREC==0)
00073 #define DD_PARAM_GAUGE const double2 *gauge0, const double2 *gauge1,
00074 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE
00075 #ifdef DIRECT_ACCESS_LINK
00076 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2
00077 #else 
00078 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2_TEX
00079 #endif // DIRECT_ACCESS_LINK
00080 
00081 #elif (DD_PREC==1)
00082 #define DD_PARAM_GAUGE const float4 *gauge0, const float4 *gauge1,
00083 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
00084 #ifdef DIRECT_ACCESS_LINK
00085 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4
00086 #else
00087 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4_TEX
00088 #endif // DIRECT_ACCESS_LINK
00089 
00090 #else
00091 #define DD_PARAM_GAUGE const short4 *gauge0, const short4* gauge1,
00092 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
00093 #ifdef DIRECT_ACCESS_LINK
00094 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4
00095 #else
00096 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4_TEX
00097 #endif // DIRECT_ACCESS_LINK
00098 #endif // DD_PREC
00099 #elif (DD_RECON==1) // reconstruct from 12 reals
00100 #define DD_RECON_F 12
00101 
00102 #if (DD_PREC==0)
00103 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE
00104 #ifdef DIRECT_ACCESS_LINK
00105 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2
00106 #else
00107 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2_TEX
00108 #endif // DIRECT_ACCESS_LINK
00109 #define DD_PARAM_GAUGE const double2 *gauge0, const double2 *gauge1,
00110 
00111 #elif (DD_PREC==1)
00112 #define DD_PARAM_GAUGE const float4 *gauge0, const float4 *gauge1,
00113 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
00114 #ifdef DIRECT_ACCESS_LINK
00115 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4
00116 #else
00117 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4_TEX
00118 #endif // DIRECT_ACCESS_LINK
00119 
00120 #else
00121 #define DD_PARAM_GAUGE const short4 *gauge0, const short4 *gauge1,
00122 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
00123 #ifdef DIRECT_ACCESS_LINK
00124 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4
00125 #else
00126 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4_TEX
00127 #endif // DIRECT_ACCESS_LINK
00128 #endif // DD_PREC
00129 #else               // no reconstruct, load all components
00130 #define DD_RECON_F 18
00131 #define GAUGE_FLOAT2
00132 #if (DD_PREC==0)
00133 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE
00134 #ifdef DIRECT_ACCESS_LINK
00135 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2
00136 #else
00137 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2_TEX
00138 #endif // DIRECT_ACCESS_LINK
00139 #define DD_PARAM_GAUGE const double2 *gauge0, const double2 *gauge1,
00140 
00141 #elif (DD_PREC==1)
00142 #define DD_PARAM_GAUGE const float4 *gauge0, const float4 *gauge1, // FIXME for direct reading, really float2
00143 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
00144 #ifdef DIRECT_ACCESS_LINK
00145 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2
00146 #else
00147 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2_TEX
00148 #endif // DIRECT_ACCESS_LINK
00149 
00150 #else
00151 #define DD_PARAM_GAUGE const short4 *gauge0, const short4 *gauge1, // FIXME for direct reading, really short2
00152 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
00153 #ifdef DIRECT_ACCESS_LINK
00154 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2
00155 #else
00156 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2_TEX
00157 #endif //DIRECT_ACCESS_LINK
00158 #endif
00159 #endif
00160 
00161 #if (DD_PREC==0) // double-precision fields
00162 
00163 #define TPROJSCALE tProjScale
00164 
00165 // double-precision gauge field
00166 #if (defined DIRECT_ACCESS_WILSON_GAUGE) || (defined FERMI_NO_DBLE_TEX)
00167 #define GAUGE0TEX gauge0
00168 #define GAUGE1TEX gauge1
00169 #else
00170 #define GAUGE0TEX gauge0TexDouble2
00171 #define GAUGE1TEX gauge1TexDouble2
00172 #endif
00173 
00174 #define GAUGE_FLOAT2
00175 
00176 // double-precision spinor fields
00177 #define DD_PARAM_OUT double2* out, float *null1,
00178 #define DD_PARAM_IN const double2* in, const float *null4,
00179 
00180 #if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX)
00181 #define READ_SPINOR READ_SPINOR_DOUBLE
00182 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP
00183 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN
00184 #define SPINORTEX in
00185 #else
00186 #define READ_SPINOR READ_SPINOR_DOUBLE_TEX
00187 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP_TEX
00188 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN_TEX
00189 #define SPINORTEX spinorTexDouble
00190 #endif
00191 #if (defined DIRECT_ACCESS_WILSON_INTER) || (defined FERMI_NO_DBLE_TEX)
00192 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE
00193 #define INTERTEX out
00194 #else
00195 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE_TEX
00196 #define INTERTEX interTexDouble
00197 #endif
00198 #define WRITE_SPINOR WRITE_SPINOR_DOUBLE2_STR
00199 #define SPINOR_DOUBLE
00200 #if (DD_XPAY==1)
00201 #if (defined DIRECT_ACCESS_WILSON_ACCUM) || (defined FERMI_NO_DBLE_TEX)
00202 #define ACCUMTEX x
00203 #define READ_ACCUM READ_ACCUM_DOUBLE
00204 #else
00205 #define ACCUMTEX accumTexDouble
00206 #define READ_ACCUM READ_ACCUM_DOUBLE_TEX
00207 #endif
00208 
00209 #endif
00210 
00211 #define SPINOR_HOP 12
00212 
00213 // double-precision clover field
00214 #if (DD_CLOVER==0)
00215 #define DD_PARAM_CLOVER
00216 #else
00217 #define DD_PARAM_CLOVER const double2 *clover, const float *null3,
00218 #endif
00219 #if (defined DIRECT_ACCESS_CLOVER) || (defined FERMI_NO_DBLE_TEX)
00220 #define CLOVERTEX clover
00221 #define READ_CLOVER READ_CLOVER_DOUBLE
00222 #else
00223 #define CLOVERTEX cloverTexDouble
00224 #define READ_CLOVER READ_CLOVER_DOUBLE_TEX
00225 #endif
00226 #define CLOVER_DOUBLE
00227 
00228 #elif (DD_PREC==1) // single-precision fields
00229 
00230 #define TPROJSCALE tProjScale_f
00231 
00232 // single-precision gauge field
00233 #ifdef DIRECT_ACCESS_LINK
00234 #define GAUGE0TEX gauge0
00235 #define GAUGE1TEX gauge1
00236 #else
00237 #if (DD_RECON_F == 18)
00238 #define GAUGE0TEX gauge0TexSingle2
00239 #define GAUGE1TEX gauge1TexSingle2
00240 #else
00241 #define GAUGE0TEX gauge0TexSingle4
00242 #define GAUGE1TEX gauge1TexSingle4
00243 #endif
00244 #endif
00245 
00246 
00247 // single-precision spinor fields
00248 #define DD_PARAM_OUT float4* out, float *null1,
00249 #define DD_PARAM_IN const float4* in, const float *null4,
00250 #ifdef DIRECT_ACCESS_WILSON_SPINOR
00251 #define READ_SPINOR READ_SPINOR_SINGLE
00252 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP
00253 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN
00254 #define SPINORTEX in
00255 #else
00256 #define READ_SPINOR READ_SPINOR_SINGLE_TEX
00257 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP_TEX
00258 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN_TEX
00259 #define SPINORTEX spinorTexSingle
00260 #endif
00261 #ifdef DIRECT_ACCESS_WILSON_INTER
00262 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE
00263 #define INTERTEX out
00264 #else
00265 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE_TEX
00266 #define INTERTEX interTexSingle
00267 #endif
00268 #define WRITE_SPINOR WRITE_SPINOR_FLOAT4_STR
00269 #if (DD_XPAY==1)
00270 #ifdef DIRECT_ACCESS_WILSON_ACCUM
00271 #define ACCUMTEX x
00272 #define READ_ACCUM READ_ACCUM_SINGLE
00273 #else
00274 #define ACCUMTEX accumTexSingle
00275 #define READ_ACCUM READ_ACCUM_SINGLE_TEX
00276 #endif
00277 #endif
00278 
00279 #define SPINOR_HOP 6
00280 
00281 // single-precision clover field
00282 #if (DD_CLOVER==0)
00283 #define DD_PARAM_CLOVER
00284 #else
00285 #define DD_PARAM_CLOVER const float4 *clover, const float *null3,
00286 #endif
00287 #ifdef DIRECT_ACCESS_CLOVER
00288 #define CLOVERTEX clover
00289 #define READ_CLOVER READ_CLOVER_SINGLE
00290 #else
00291 #define CLOVERTEX cloverTexSingle
00292 #define READ_CLOVER READ_CLOVER_SINGLE_TEX
00293 #endif
00294 
00295 #else             // half-precision fields
00296 
00297 #define TPROJSCALE tProjScale_f
00298 
00299 // half-precision gauge field
00300 #ifdef DIRECT_ACCESS_LINK
00301 #define GAUGE0TEX gauge0
00302 #define GAUGE1TEX gauge1
00303 #else
00304 #if (DD_RECON_F == 18)
00305 #define GAUGE0TEX gauge0TexHalf2
00306 #define GAUGE1TEX gauge1TexHalf2
00307 #else
00308 #define GAUGE0TEX gauge0TexHalf4
00309 #define GAUGE1TEX gauge1TexHalf4
00310 #endif
00311 #endif
00312 
00313 
00314 // half-precision spinor fields
00315 #ifdef DIRECT_ACCESS_WILSON_SPINOR
00316 #define READ_SPINOR READ_SPINOR_HALF
00317 #define READ_SPINOR_UP READ_SPINOR_HALF_UP
00318 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN
00319 #define SPINORTEX in
00320 #else
00321 #define READ_SPINOR READ_SPINOR_HALF_TEX
00322 #define READ_SPINOR_UP READ_SPINOR_HALF_UP_TEX
00323 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN_TEX
00324 #define SPINORTEX spinorTexHalf
00325 #endif
00326 #ifdef DIRECT_ACCESS_WILSON_INTER
00327 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF
00328 #define INTERTEX out
00329 #else
00330 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF_TEX
00331 #define INTERTEX interTexHalf
00332 #endif
00333 #define DD_PARAM_OUT short4* out, float *outNorm,
00334 #define DD_PARAM_IN const short4* in, const float *inNorm,
00335 #define WRITE_SPINOR WRITE_SPINOR_SHORT4_STR
00336 #if (DD_XPAY==1)
00337 #ifdef DIRECT_ACCESS_WILSON_ACCUM
00338 #define ACCUMTEX x
00339 #define READ_ACCUM READ_ACCUM_HALF
00340 #else
00341 #define ACCUMTEX accumTexHalf
00342 #define READ_ACCUM READ_ACCUM_HALF_TEX
00343 #endif
00344 #endif
00345 
00346 #define SPINOR_HOP 6
00347 
00348 // half-precision clover field
00349 #if (DD_CLOVER==0)
00350 #define DD_PARAM_CLOVER 
00351 #else
00352 #define DD_PARAM_CLOVER const short4 *clover, const float *cloverNorm,
00353 #endif
00354 #ifdef DIRECT_ACCESS_CLOVER
00355 #define CLOVERTEX clover
00356 #define READ_CLOVER READ_CLOVER_HALF
00357 #else
00358 #define CLOVERTEX cloverTexHalf
00359 #define READ_CLOVER READ_CLOVER_HALF_TEX
00360 #endif
00361 
00362 #endif
00363 
00364 // only build double precision if supported
00365 #if !(__COMPUTE_CAPABILITY__ < 130 && DD_PREC == 0) 
00366 
00367 #define DD_CONCAT(n,r,d,x) n ## r ## d ## x ## Kernel
00368 #define DD_FUNC(n,r,d,x) DD_CONCAT(n,r,d,x)
00369 
00370 #ifdef GPU_WILSON_DIRAC
00371 #define BUILD_WILSON 1
00372 #else
00373 #define BUILD_WILSON 0
00374 #endif
00375 
00376 #ifdef GPU_CLOVER_DIRAC
00377 #define BUILD_CLOVER 1
00378 #else
00379 #define BUILD_CLOVER 0
00380 #endif
00381 
00382 // define the kernel
00383 
00384 template <KernelType kernel_type>
00385 __global__ void DD_FUNC(DD_NAME_F, DD_RECON_F, DD_DAG_F, DD_XPAY_F)
00386   (DD_PARAM_OUT DD_PARAM_GAUGE DD_PARAM_CLOVER DD_PARAM_IN DD_PARAM_XPAY const DslashParam param) {
00387 
00388   // build Wilson or clover as appropriate
00389 #if ((DD_CLOVER==0 && BUILD_WILSON) || (DD_CLOVER==1 && BUILD_CLOVER))
00390 
00391 #if (__COMPUTE_CAPABILITY__ >= 200 && defined(SHARED_WILSON_DSLASH)) // Fermi optimal code
00392 
00393 #if DD_DAG
00394 #include "wilson_dslash_dagger_fermi_core.h"
00395 #else
00396 #include "wilson_dslash_fermi_core.h"
00397 #endif
00398 
00399 #elif (__COMPUTE_CAPABILITY__ >= 120) // GT200 optimal code
00400 
00401 #if DD_DAG
00402 #include "wilson_dslash_dagger_gt200_core.h"
00403 #else
00404 #include "wilson_dslash_gt200_core.h"
00405 #endif
00406 
00407 #else  // fall-back is original G80 
00408 
00409 #if DD_DAG
00410 #include "wilson_dslash_dagger_g80_core.h"
00411 #else
00412 #include "wilson_dslash_g80_core.h"
00413 #endif
00414 
00415 #endif
00416 
00417 
00418 #endif
00419 
00420 }
00421 
00422 #endif
00423 
00424 // clean up
00425 
00426 #undef DD_NAME_F
00427 #undef DD_RECON_F
00428 #undef DD_DAG_F
00429 #undef DD_XPAY_F
00430 #undef DD_PARAM_OUT
00431 #undef DD_PARAM_GAUGE
00432 #undef DD_PARAM_CLOVER
00433 #undef DD_PARAM_IN
00434 #undef DD_PARAM_XPAY
00435 #undef DD_CONCAT
00436 #undef DD_FUNC
00437 
00438 #undef DSLASH_XPAY
00439 #undef READ_GAUGE_MATRIX
00440 #undef RECONSTRUCT_GAUGE_MATRIX
00441 #undef GAUGE0TEX
00442 #undef GAUGE1TEX
00443 #undef READ_SPINOR
00444 #undef READ_SPINOR_UP
00445 #undef READ_SPINOR_DOWN
00446 #undef SPINORTEX
00447 #undef READ_INTERMEDIATE_SPINOR
00448 #undef INTERTEX
00449 #undef WRITE_SPINOR
00450 #undef READ_ACCUM
00451 #undef ACCUMTEX
00452 #undef READ_CLOVER
00453 #undef CLOVERTEX
00454 #undef DSLASH_CLOVER
00455 #undef GAUGE_FLOAT2
00456 #undef SPINOR_DOUBLE
00457 #undef CLOVER_DOUBLE
00458 #undef SPINOR_HOP
00459 
00460 #undef TPROJSCALE
00461 
00462 // prepare next set of options, or clean up after final iteration
00463 
00464 #if (DD_DAG==0)
00465 #undef DD_DAG
00466 #define DD_DAG 1
00467 #else
00468 #undef DD_DAG
00469 #define DD_DAG 0
00470 
00471 #if (DD_XPAY==0)
00472 #undef DD_XPAY
00473 #define DD_XPAY 1
00474 #else
00475 #undef DD_XPAY
00476 #define DD_XPAY 0
00477 
00478 #if (DD_RECON==0)
00479 #undef DD_RECON
00480 #define DD_RECON 1
00481 #elif (DD_RECON==1)
00482 #undef DD_RECON
00483 #define DD_RECON 2
00484 #else
00485 #undef DD_RECON
00486 #define DD_RECON 0
00487 
00488 #if (DD_PREC==0)
00489 #undef DD_PREC
00490 #define DD_PREC 1
00491 #elif (DD_PREC==1)
00492 #undef DD_PREC
00493 #define DD_PREC 2
00494 #else
00495 #undef DD_PREC
00496 #define DD_PREC 0
00497 
00498 #if (DD_CLOVER==0)
00499 #undef DD_CLOVER
00500 #define DD_CLOVER 1
00501 
00502 #else
00503 
00504 #undef DD_LOOP
00505 #undef DD_DAG
00506 #undef DD_XPAY
00507 #undef DD_RECON
00508 #undef DD_PREC
00509 #undef DD_CLOVER
00510 
00511 #endif // DD_CLOVER
00512 #endif // DD_PREC
00513 #endif // DD_RECON
00514 #endif // DD_XPAY
00515 #endif // DD_DAG
00516 
00517 #ifdef DD_LOOP
00518 #include "wilson_dslash_def.h"
00519 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines