|
QUDA v0.3.2
A library for QCD on GPUs
|
00001 // wilson_dslash_def.h - Dslash kernel definitions 00002 00003 // There are currently 72 different variants of the Wilson Dslash 00004 // kernel, each one characterized by a set of 5 options, where each 00005 // option can take one of several values (2*3*2*2*3 = 72). This file 00006 // is structured so that the C preprocessor loops through all 72 00007 // variants (in a manner resembling a counter), sets the appropriate 00008 // macros, and defines the corresponding functions. 00009 // 00010 // As an example of the function naming conventions, consider 00011 // 00012 // cloverDslash12DaggerXpayKernel(float4* out, ...). 00013 // 00014 // This is a clover Dslash^dagger kernel where the result is 00015 // multiplied by "a" and summed with an input vector (Xpay), and the 00016 // gauge matrix is reconstructed from 12 real numbers. More 00017 // generally, each function name is given by the concatenation of the 00018 // following 4 fields, with "Kernel" at the end: 00019 // 00020 // DD_NAME_F = dslash, cloverDslash 00021 // DD_RECON_F = 8, 12, 18 00022 // DD_DAG_F = Dagger, [blank] 00023 // DD_XPAY_F = Xpay, [blank] 00024 // 00025 // In addition, the kernels are templated on the precision of the 00026 // fields (double, single, or half). 00027 00028 // initialize on first iteration 00029 00030 #ifndef DD_LOOP 00031 #define DD_LOOP 00032 #define DD_DAG 0 00033 #define DD_XPAY 0 00034 #define DD_RECON 0 00035 #define DD_PREC 0 00036 #define DD_CLOVER 0 00037 #endif 00038 00039 // set options for current iteration 00040 00041 #if (DD_CLOVER==0) // no clover 00042 #define DD_NAME_F dslash 00043 #else // clover 00044 #define DSLASH_CLOVER 00045 #define DD_NAME_F cloverDslash 00046 #endif 00047 00048 #if (DD_DAG==0) // no dagger 00049 #define DD_DAG_F 00050 #else // dagger 00051 #define DD_DAG_F Dagger 00052 #endif 00053 00054 #if (DD_XPAY==0) // no xpay 00055 #define DD_XPAY_F 00056 #define DD_PARAM5 const int oddBit 00057 #else // xpay 00058 #define DSLASH_XPAY 00059 #define DD_XPAY_F Xpay 00060 #if (DD_PREC == 0) 00061 #define DD_PARAM5 const int oddBit, const double2 *x, const float *xNorm, const double a 00062 #elif (DD_PREC == 1) 00063 #define DD_PARAM5 const int oddBit, const float4 *x, const float *xNorm, const float a 00064 #else 00065 #define DD_PARAM5 const int oddBit, const short4 *x, const float *xNorm, const float a 00066 #endif 00067 #endif 00068 00069 #if (DD_RECON==0) // reconstruct from 8 reals 00070 #define DD_RECON_F 8 00071 #if (DD_PREC==0) 00072 #define DD_PARAM2 const double2 *gauge0, const double2 *gauge1 00073 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE 00074 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE 00075 #elif (DD_PREC==1) 00076 #define DD_PARAM2 const float4 *gauge0, const float4 *gauge1 00077 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE 00078 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SINGLE 00079 #else 00080 #define DD_PARAM2 const short4 *gauge0, const short4* gauge1 00081 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE 00082 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_HALF 00083 #endif 00084 #elif (DD_RECON==1) // reconstruct from 12 reals 00085 #define DD_RECON_F 12 00086 #if (DD_PREC==0) 00087 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE 00088 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE 00089 #define DD_PARAM2 const double2 *gauge0, const double2 *gauge1 00090 #elif (DD_PREC==1) 00091 #define DD_PARAM2 const float4 *gauge0, const float4 *gauge1 00092 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE 00093 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SINGLE 00094 #else 00095 #define DD_PARAM2 const short4 *gauge0, const short4 *gauge1 00096 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE 00097 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SINGLE 00098 #endif 00099 #else // no reconstruct, load all components 00100 #define DD_RECON_F 18 00101 #define GAUGE_FLOAT2 00102 #if (DD_PREC==0) 00103 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE 00104 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE 00105 #define DD_PARAM2 const double2 *gauge0, const double2 *gauge1 00106 #elif (DD_PREC==1) 00107 #define DD_PARAM2 const float4 *gauge0, const float4 *gauge1 // FIXME for direct reading, really float2 00108 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE 00109 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SINGLE 00110 #else 00111 #define DD_PARAM2 const short4 *gauge0, const short4 *gauge1 // FIXME for direct reading, really short2 00112 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE 00113 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SINGLE 00114 #endif 00115 #endif 00116 00117 #if (DD_PREC==0) // double-precision fields 00118 00119 // double-precision gauge field 00120 #define GAUGE0TEX gauge0TexDouble2 00121 #define GAUGE1TEX gauge1TexDouble2 00122 #define GAUGE_FLOAT2 00123 00124 // double-precision spinor fields 00125 #define DD_PARAM1 double2* out, float *null1 00126 #define DD_PARAM4 const double2* in, const float *null4 00127 #define READ_SPINOR READ_SPINOR_DOUBLE 00128 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP 00129 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN 00130 #define SPINORTEX spinorTexDouble 00131 #define WRITE_SPINOR WRITE_SPINOR_DOUBLE2 00132 #define SPINOR_DOUBLE 00133 #if (DD_XPAY==1) 00134 #define ACCUMTEX accumTexDouble 00135 #define READ_ACCUM READ_ACCUM_DOUBLE 00136 #endif 00137 00138 // double-precision clover field 00139 #if (DD_CLOVER==0) 00140 #define DD_PARAM3 00141 #else 00142 #define DD_PARAM3 const double2 *clover, const float *null3, 00143 #endif 00144 #define CLOVERTEX cloverTexDouble 00145 #define READ_CLOVER READ_CLOVER_DOUBLE 00146 #define CLOVER_DOUBLE 00147 00148 #elif (DD_PREC==1) // single-precision fields 00149 00150 // single-precision gauge field 00151 #if (DD_RECON_F == 18) 00152 #define GAUGE0TEX gauge0TexSingle2 00153 #define GAUGE1TEX gauge1TexSingle2 00154 #else 00155 #define GAUGE0TEX gauge0TexSingle4 00156 #define GAUGE1TEX gauge1TexSingle4 00157 #endif 00158 00159 // single-precision spinor fields 00160 #define DD_PARAM1 float4* out, float *null1 00161 #define DD_PARAM4 const float4* in, const float *null4 00162 #define READ_SPINOR READ_SPINOR_SINGLE 00163 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP 00164 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN 00165 #define SPINORTEX spinorTexSingle 00166 #define WRITE_SPINOR WRITE_SPINOR_FLOAT4 00167 #if (DD_XPAY==1) 00168 #define ACCUMTEX accumTexSingle 00169 #define READ_ACCUM READ_ACCUM_SINGLE 00170 #endif 00171 00172 // single-precision clover field 00173 #if (DD_CLOVER==0) 00174 #define DD_PARAM3 00175 #else 00176 #define DD_PARAM3 const float4 *clover, const float *null3, 00177 #endif 00178 #define CLOVERTEX cloverTexSingle 00179 #define READ_CLOVER READ_CLOVER_SINGLE 00180 00181 #else // half-precision fields 00182 00183 // half-precision gauge field 00184 #if (DD_RECON_F == 18) 00185 #define GAUGE0TEX gauge0TexHalf2 00186 #define GAUGE1TEX gauge1TexHalf2 00187 #else 00188 #define GAUGE0TEX gauge0TexHalf4 00189 #define GAUGE1TEX gauge1TexHalf4 00190 #endif 00191 00192 // half-precision spinor fields 00193 #define READ_SPINOR READ_SPINOR_HALF 00194 #define READ_SPINOR_UP READ_SPINOR_HALF_UP 00195 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN 00196 #define SPINORTEX spinorTexHalf 00197 #define DD_PARAM1 short4* out, float *outNorm 00198 #define DD_PARAM4 const short4* in, const float *inNorm 00199 #define WRITE_SPINOR WRITE_SPINOR_SHORT4 00200 #if (DD_XPAY==1) 00201 #define ACCUMTEX accumTexHalf 00202 #define READ_ACCUM READ_ACCUM_HALF 00203 #endif 00204 00205 // half-precision clover field 00206 #if (DD_CLOVER==0) 00207 #define DD_PARAM3 00208 #else 00209 #define DD_PARAM3 const short4 *clover, const float *cloverNorm, 00210 #endif 00211 #define CLOVERTEX cloverTexHalf 00212 #define READ_CLOVER READ_CLOVER_HALF 00213 00214 #endif 00215 00216 // only build double precision if supported 00217 #if !(__CUDA_ARCH__ < 130 && DD_PREC == 0) 00218 00219 #define DD_CONCAT(n,r,d,x) n ## r ## d ## x ## Kernel 00220 #define DD_FUNC(n,r,d,x) DD_CONCAT(n,r,d,x) 00221 00222 // define the kernel 00223 00224 __global__ void DD_FUNC(DD_NAME_F, DD_RECON_F, DD_DAG_F, DD_XPAY_F) 00225 (DD_PARAM1, DD_PARAM2, DD_PARAM3 DD_PARAM4, DD_PARAM5) { 00226 00227 #ifdef GPU_WILSON_DIRAC 00228 #if DD_DAG 00229 #include "wilson_dslash_dagger_core.h" 00230 #else 00231 #include "wilson_dslash_core.h" 00232 #endif 00233 #endif 00234 00235 } 00236 00237 #endif 00238 00239 // clean up 00240 00241 #undef DD_NAME_F 00242 #undef DD_RECON_F 00243 #undef DD_DAG_F 00244 #undef DD_XPAY_F 00245 #undef DD_PARAM1 00246 #undef DD_PARAM2 00247 #undef DD_PARAM3 00248 #undef DD_PARAM4 00249 #undef DD_PARAM5 00250 #undef DD_CONCAT 00251 #undef DD_FUNC 00252 00253 #undef DSLASH_XPAY 00254 #undef READ_GAUGE_MATRIX 00255 #undef RECONSTRUCT_GAUGE_MATRIX 00256 #undef GAUGE0TEX 00257 #undef GAUGE1TEX 00258 #undef READ_SPINOR 00259 #undef READ_SPINOR_UP 00260 #undef READ_SPINOR_DOWN 00261 #undef SPINORTEX 00262 #undef WRITE_SPINOR 00263 #undef ACCUMTEX 00264 #undef READ_ACCUM 00265 #undef CLOVERTEX 00266 #undef READ_CLOVER 00267 #undef DSLASH_CLOVER 00268 #undef GAUGE_FLOAT2 00269 #undef SPINOR_DOUBLE 00270 #undef CLOVER_DOUBLE 00271 00272 // prepare next set of options, or clean up after final iteration 00273 00274 #if (DD_DAG==0) 00275 #undef DD_DAG 00276 #define DD_DAG 1 00277 #else 00278 #undef DD_DAG 00279 #define DD_DAG 0 00280 00281 #if (DD_XPAY==0) 00282 #undef DD_XPAY 00283 #define DD_XPAY 1 00284 #else 00285 #undef DD_XPAY 00286 #define DD_XPAY 0 00287 00288 #if (DD_RECON==0) 00289 #undef DD_RECON 00290 #define DD_RECON 1 00291 #elif (DD_RECON==1) 00292 #undef DD_RECON 00293 #define DD_RECON 2 00294 #else 00295 #undef DD_RECON 00296 #define DD_RECON 0 00297 00298 #if (DD_PREC==0) 00299 #undef DD_PREC 00300 #define DD_PREC 1 00301 #elif (DD_PREC==1) 00302 #undef DD_PREC 00303 #define DD_PREC 2 00304 #else 00305 #undef DD_PREC 00306 #define DD_PREC 0 00307 00308 #if (DD_CLOVER==0) 00309 #undef DD_CLOVER 00310 #define DD_CLOVER 1 00311 00312 #else 00313 00314 #undef DD_LOOP 00315 #undef DD_DAG 00316 #undef DD_XPAY 00317 #undef DD_RECON 00318 #undef DD_PREC 00319 #undef DD_CLOVER 00320 00321 #endif // DD_CLOVER 00322 #endif // DD_PREC 00323 #endif // DD_RECON 00324 #endif // DD_XPAY 00325 #endif // DD_DAG 00326 00327 #ifdef DD_LOOP 00328 #include "wilson_dslash_def.h" 00329 #endif
1.7.3