QUDA: quda/lib/wilson_dslash

QUDA v0.3.2
A library for QCD on GPUs
00001 // wilson_dslash_def.h - Dslash kernel definitions
00002 
00003 // There are currently 72 different variants of the Wilson Dslash
00004 // kernel, each one characterized by a set of 5 options, where each
00005 // option can take one of several values (2*3*2*2*3 = 72).  This file
00006 // is structured so that the C preprocessor loops through all 72
00007 // variants (in a manner resembling a counter), sets the appropriate
00008 // macros, and defines the corresponding functions.
00009 //
00010 // As an example of the function naming conventions, consider
00011 //
00012 // cloverDslash12DaggerXpayKernel(float4* out, ...).
00013 //
00014 // This is a clover Dslash^dagger kernel where the result is
00015 // multiplied by "a" and summed with an input vector (Xpay), and the
00016 // gauge matrix is reconstructed from 12 real numbers.  More
00017 // generally, each function name is given by the concatenation of the
00018 // following 4 fields, with "Kernel" at the end:
00019 //
00020 // DD_NAME_F = dslash, cloverDslash
00021 // DD_RECON_F = 8, 12, 18
00022 // DD_DAG_F = Dagger, [blank]
00023 // DD_XPAY_F = Xpay, [blank]
00024 //
00025 // In addition, the kernels are templated on the precision of the
00026 // fields (double, single, or half).
00027 
00028 // initialize on first iteration
00029 
00030 #ifndef DD_LOOP
00031 #define DD_LOOP
00032 #define DD_DAG 0
00033 #define DD_XPAY 0
00034 #define DD_RECON 0
00035 #define DD_PREC 0
00036 #define DD_CLOVER 0
00037 #endif
00038 
00039 // set options for current iteration
00040 
00041 #if (DD_CLOVER==0) // no clover
00042 #define DD_NAME_F dslash
00043 #else              // clover
00044 #define DSLASH_CLOVER
00045 #define DD_NAME_F cloverDslash
00046 #endif
00047 
00048 #if (DD_DAG==0) // no dagger
00049 #define DD_DAG_F
00050 #else           // dagger
00051 #define DD_DAG_F Dagger
00052 #endif
00053 
00054 #if (DD_XPAY==0) // no xpay 
00055 #define DD_XPAY_F 
00056 #define DD_PARAM5 const int oddBit
00057 #else            // xpay
00058 #define DSLASH_XPAY
00059 #define DD_XPAY_F Xpay
00060 #if (DD_PREC == 0)
00061 #define DD_PARAM5 const int oddBit, const double2 *x, const float *xNorm, const double a
00062 #elif (DD_PREC == 1) 
00063 #define DD_PARAM5 const int oddBit, const float4 *x, const float *xNorm, const float a
00064 #else
00065 #define DD_PARAM5 const int oddBit, const short4 *x, const float *xNorm, const float a
00066 #endif
00067 #endif
00068 
00069 #if (DD_RECON==0) // reconstruct from 8 reals
00070 #define DD_RECON_F 8
00071 #if (DD_PREC==0)
00072 #define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
00073 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE
00074 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE
00075 #elif (DD_PREC==1)
00076 #define DD_PARAM2 const float4 *gauge0, const float4 *gauge1
00077 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
00078 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SINGLE
00079 #else
00080 #define DD_PARAM2 const short4 *gauge0, const short4* gauge1
00081 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
00082 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_HALF
00083 #endif
00084 #elif (DD_RECON==1) // reconstruct from 12 reals
00085 #define DD_RECON_F 12
00086 #if (DD_PREC==0)
00087 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE
00088 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE
00089 #define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
00090 #elif (DD_PREC==1)
00091 #define DD_PARAM2 const float4 *gauge0, const float4 *gauge1
00092 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
00093 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SINGLE
00094 #else
00095 #define DD_PARAM2 const short4 *gauge0, const short4 *gauge1
00096 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
00097 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SINGLE
00098 #endif
00099 #else               // no reconstruct, load all components
00100 #define DD_RECON_F 18
00101 #define GAUGE_FLOAT2
00102 #if (DD_PREC==0)
00103 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE
00104 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE
00105 #define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
00106 #elif (DD_PREC==1)
00107 #define DD_PARAM2 const float4 *gauge0, const float4 *gauge1 // FIXME for direct reading, really float2
00108 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
00109 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SINGLE
00110 #else
00111 #define DD_PARAM2 const short4 *gauge0, const short4 *gauge1 // FIXME for direct reading, really short2
00112 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
00113 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SINGLE
00114 #endif
00115 #endif
00116 
00117 #if (DD_PREC==0) // double-precision fields
00118 
00119 // double-precision gauge field
00120 #define GAUGE0TEX gauge0TexDouble2
00121 #define GAUGE1TEX gauge1TexDouble2
00122 #define GAUGE_FLOAT2
00123 
00124 // double-precision spinor fields
00125 #define DD_PARAM1 double2* out, float *null1
00126 #define DD_PARAM4 const double2* in, const float *null4
00127 #define READ_SPINOR READ_SPINOR_DOUBLE
00128 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP
00129 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN
00130 #define SPINORTEX spinorTexDouble
00131 #define WRITE_SPINOR WRITE_SPINOR_DOUBLE2
00132 #define SPINOR_DOUBLE
00133 #if (DD_XPAY==1)
00134 #define ACCUMTEX accumTexDouble
00135 #define READ_ACCUM READ_ACCUM_DOUBLE
00136 #endif
00137 
00138 // double-precision clover field
00139 #if (DD_CLOVER==0)
00140 #define DD_PARAM3
00141 #else
00142 #define DD_PARAM3 const double2 *clover, const float *null3,
00143 #endif
00144 #define CLOVERTEX cloverTexDouble
00145 #define READ_CLOVER READ_CLOVER_DOUBLE
00146 #define CLOVER_DOUBLE
00147 
00148 #elif (DD_PREC==1) // single-precision fields
00149 
00150 // single-precision gauge field
00151 #if (DD_RECON_F == 18)
00152 #define GAUGE0TEX gauge0TexSingle2
00153 #define GAUGE1TEX gauge1TexSingle2
00154 #else
00155 #define GAUGE0TEX gauge0TexSingle4
00156 #define GAUGE1TEX gauge1TexSingle4
00157 #endif
00158 
00159 // single-precision spinor fields
00160 #define DD_PARAM1 float4* out, float *null1
00161 #define DD_PARAM4 const float4* in, const float *null4
00162 #define READ_SPINOR READ_SPINOR_SINGLE
00163 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP
00164 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN
00165 #define SPINORTEX spinorTexSingle
00166 #define WRITE_SPINOR WRITE_SPINOR_FLOAT4
00167 #if (DD_XPAY==1)
00168 #define ACCUMTEX accumTexSingle
00169 #define READ_ACCUM READ_ACCUM_SINGLE
00170 #endif
00171 
00172 // single-precision clover field
00173 #if (DD_CLOVER==0)
00174 #define DD_PARAM3
00175 #else
00176 #define DD_PARAM3 const float4 *clover, const float *null3,
00177 #endif
00178 #define CLOVERTEX cloverTexSingle
00179 #define READ_CLOVER READ_CLOVER_SINGLE
00180 
00181 #else             // half-precision fields
00182 
00183 // half-precision gauge field
00184 #if (DD_RECON_F == 18)
00185 #define GAUGE0TEX gauge0TexHalf2
00186 #define GAUGE1TEX gauge1TexHalf2
00187 #else
00188 #define GAUGE0TEX gauge0TexHalf4
00189 #define GAUGE1TEX gauge1TexHalf4
00190 #endif
00191 
00192 // half-precision spinor fields
00193 #define READ_SPINOR READ_SPINOR_HALF
00194 #define READ_SPINOR_UP READ_SPINOR_HALF_UP
00195 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN
00196 #define SPINORTEX spinorTexHalf
00197 #define DD_PARAM1 short4* out, float *outNorm
00198 #define DD_PARAM4 const short4* in, const float *inNorm
00199 #define WRITE_SPINOR WRITE_SPINOR_SHORT4
00200 #if (DD_XPAY==1)
00201 #define ACCUMTEX accumTexHalf
00202 #define READ_ACCUM READ_ACCUM_HALF
00203 #endif
00204 
00205 // half-precision clover field
00206 #if (DD_CLOVER==0)
00207 #define DD_PARAM3 
00208 #else
00209 #define DD_PARAM3 const short4 *clover, const float *cloverNorm,
00210 #endif
00211 #define CLOVERTEX cloverTexHalf
00212 #define READ_CLOVER READ_CLOVER_HALF
00213 
00214 #endif
00215 
00216 // only build double precision if supported
00217 #if !(__CUDA_ARCH__ < 130 && DD_PREC == 0) 
00218 
00219 #define DD_CONCAT(n,r,d,x) n ## r ## d ## x ## Kernel
00220 #define DD_FUNC(n,r,d,x) DD_CONCAT(n,r,d,x)
00221 
00222 // define the kernel
00223 
00224 __global__ void DD_FUNC(DD_NAME_F, DD_RECON_F, DD_DAG_F, DD_XPAY_F)
00225   (DD_PARAM1, DD_PARAM2, DD_PARAM3 DD_PARAM4, DD_PARAM5) {
00226 
00227 #ifdef GPU_WILSON_DIRAC
00228 #if DD_DAG
00229 #include "wilson_dslash_dagger_core.h"
00230 #else
00231 #include "wilson_dslash_core.h"
00232 #endif
00233 #endif
00234 
00235 }
00236 
00237 #endif
00238 
00239 // clean up
00240 
00241 #undef DD_NAME_F
00242 #undef DD_RECON_F
00243 #undef DD_DAG_F
00244 #undef DD_XPAY_F
00245 #undef DD_PARAM1
00246 #undef DD_PARAM2
00247 #undef DD_PARAM3
00248 #undef DD_PARAM4
00249 #undef DD_PARAM5
00250 #undef DD_CONCAT
00251 #undef DD_FUNC
00252 
00253 #undef DSLASH_XPAY
00254 #undef READ_GAUGE_MATRIX
00255 #undef RECONSTRUCT_GAUGE_MATRIX
00256 #undef GAUGE0TEX
00257 #undef GAUGE1TEX
00258 #undef READ_SPINOR
00259 #undef READ_SPINOR_UP
00260 #undef READ_SPINOR_DOWN
00261 #undef SPINORTEX
00262 #undef WRITE_SPINOR
00263 #undef ACCUMTEX
00264 #undef READ_ACCUM
00265 #undef CLOVERTEX
00266 #undef READ_CLOVER
00267 #undef DSLASH_CLOVER
00268 #undef GAUGE_FLOAT2
00269 #undef SPINOR_DOUBLE
00270 #undef CLOVER_DOUBLE
00271 
00272 // prepare next set of options, or clean up after final iteration
00273 
00274 #if (DD_DAG==0)
00275 #undef DD_DAG
00276 #define DD_DAG 1
00277 #else
00278 #undef DD_DAG
00279 #define DD_DAG 0
00280 
00281 #if (DD_XPAY==0)
00282 #undef DD_XPAY
00283 #define DD_XPAY 1
00284 #else
00285 #undef DD_XPAY
00286 #define DD_XPAY 0
00287 
00288 #if (DD_RECON==0)
00289 #undef DD_RECON
00290 #define DD_RECON 1
00291 #elif (DD_RECON==1)
00292 #undef DD_RECON
00293 #define DD_RECON 2
00294 #else
00295 #undef DD_RECON
00296 #define DD_RECON 0
00297 
00298 #if (DD_PREC==0)
00299 #undef DD_PREC
00300 #define DD_PREC 1
00301 #elif (DD_PREC==1)
00302 #undef DD_PREC
00303 #define DD_PREC 2
00304 #else
00305 #undef DD_PREC
00306 #define DD_PREC 0
00307 
00308 #if (DD_CLOVER==0)
00309 #undef DD_CLOVER
00310 #define DD_CLOVER 1
00311 
00312 #else
00313 
00314 #undef DD_LOOP
00315 #undef DD_DAG
00316 #undef DD_XPAY
00317 #undef DD_RECON
00318 #undef DD_PREC
00319 #undef DD_CLOVER
00320 
00321 #endif // DD_CLOVER
00322 #endif // DD_PREC
00323 #endif // DD_RECON
00324 #endif // DD_XPAY
00325 #endif // DD_DAG
00326 
00327 #ifdef DD_LOOP
00328 #include "wilson_dslash_def.h"
00329 #endif
quda/lib/wilson_dslash_def.h