QUDA v0.4.0
A library for QCD on GPUs
|
00001 #ifndef _HISQ_FORCE_MACROS_H_ 00002 #define _HISQ_FORCE_MACROS_H_ 00003 00004 00005 #ifndef HISQ_RECONSTRUCT_LINKS 00006 00007 #define LINK_W00_re LINK_W[0].x 00008 #define LINK_W00_im LINK_W[0].y 00009 #define LINK_W01_re LINK_W[1].x 00010 #define LINK_W01_im LINK_W[1].y 00011 #define LINK_W02_re LINK_W[2].x 00012 #define LINK_W02_im LINK_W[2].y 00013 #define LINK_W10_re LINK_W[3].x 00014 #define LINK_W10_im LINK_W[3].y 00015 #define LINK_W11_re LINK_W[4].x 00016 #define LINK_W11_im LINK_W[4].y 00017 #define LINK_W12_re LINK_W[5].x 00018 #define LINK_W12_im LINK_W[5].y 00019 #define LINK_W20_re LINK_W[6].x 00020 #define LINK_W20_im LINK_W[6].y 00021 #define LINK_W21_re LINK_W[7].x 00022 #define LINK_W21_im LINK_W[7].y 00023 #define LINK_W22_re LINK_W[8].x 00024 #define LINK_W22_im LINK_W[8].y 00025 00026 #define LINK_X00_re LINK_X[0].x 00027 #define LINK_X00_im LINK_X[0].y 00028 #define LINK_X01_re LINK_X[1].x 00029 #define LINK_X01_im LINK_X[1].y 00030 #define LINK_X02_re LINK_X[2].x 00031 #define LINK_X02_im LINK_X[2].y 00032 #define LINK_X10_re LINK_X[3].x 00033 #define LINK_X10_im LINK_X[3].y 00034 #define LINK_X11_re LINK_X[4].x 00035 #define LINK_X11_im LINK_X[4].y 00036 #define LINK_X12_re LINK_X[5].x 00037 #define LINK_X12_im LINK_X[5].y 00038 #define LINK_X20_re LINK_X[6].x 00039 #define LINK_X20_im LINK_X[6].y 00040 #define LINK_X21_re LINK_X[7].x 00041 #define LINK_X21_im LINK_X[7].y 00042 #define LINK_X22_re LINK_X[8].x 00043 #define LINK_X22_im LINK_X[8].y 00044 00045 #define LINK_Y00_re LINK_Y[0].x 00046 #define LINK_Y00_im LINK_Y[0].y 00047 #define LINK_Y01_re LINK_Y[1].x 00048 #define LINK_Y01_im LINK_Y[1].y 00049 #define LINK_Y02_re LINK_Y[2].x 00050 #define LINK_Y02_im LINK_Y[2].y 00051 #define LINK_Y10_re LINK_Y[3].x 00052 #define LINK_Y10_im LINK_Y[3].y 00053 #define LINK_Y11_re LINK_Y[4].x 00054 #define LINK_Y11_im LINK_Y[4].y 00055 #define LINK_Y12_re LINK_Y[5].x 00056 #define LINK_Y12_im LINK_Y[5].y 00057 #define LINK_Y20_re LINK_Y[6].x 00058 #define LINK_Y20_im LINK_Y[6].y 00059 #define LINK_Y21_re LINK_Y[7].x 00060 #define LINK_Y21_im LINK_Y[7].y 00061 #define LINK_Y22_re LINK_Y[8].x 00062 #define LINK_Y22_im LINK_Y[8].y 00063 00064 #define LINK_Z00_re LINK_Z[0].x 00065 #define LINK_Z00_im LINK_Z[0].y 00066 #define LINK_Z01_re LINK_Z[1].x 00067 #define LINK_Z01_im LINK_Z[1].y 00068 #define LINK_Z02_re LINK_Z[2].x 00069 #define LINK_Z02_im LINK_Z[2].y 00070 #define LINK_Z10_re LINK_Z[3].x 00071 #define LINK_Z10_im LINK_Z[3].y 00072 #define LINK_Z11_re LINK_Z[4].x 00073 #define LINK_Z11_im LINK_Z[4].y 00074 #define LINK_Z12_re LINK_Z[5].x 00075 #define LINK_Z12_im LINK_Z[5].y 00076 #define LINK_Z20_re LINK_Z[6].x 00077 #define LINK_Z20_im LINK_Z[6].y 00078 #define LINK_Z21_re LINK_Z[7].x 00079 #define LINK_Z21_im LINK_Z[7].y 00080 #define LINK_Z22_re LINK_Z[8].x 00081 #define LINK_Z22_im LINK_Z[8].y 00082 00083 #define ab_link00_re ab_link[0].x 00084 #define ab_link00_im ab_link[0].y 00085 #define ab_link01_re ab_link[1].x 00086 #define ab_link01_im ab_link[1].y 00087 #define ab_link02_re ab_link[2].x 00088 #define ab_link02_im ab_link[2].y 00089 #define ab_link10_re ab_link[3].x 00090 #define ab_link10_im ab_link[3].y 00091 #define ab_link11_re ab_link[4].x 00092 #define ab_link11_im ab_link[4].y 00093 #define ab_link12_re ab_link[5].x 00094 #define ab_link12_im ab_link[5].y 00095 #define ab_link20_re ab_link[6].x 00096 #define ab_link20_im ab_link[6].y 00097 #define ab_link21_re ab_link[7].x 00098 #define ab_link21_im ab_link[7].y 00099 #define ab_link22_re ab_link[8].x 00100 #define ab_link22_im ab_link[8].y 00101 00102 #define bc_link00_re bc_link[0].x 00103 #define bc_link00_im bc_link[0].y 00104 #define bc_link01_re bc_link[1].x 00105 #define bc_link01_im bc_link[1].y 00106 #define bc_link02_re bc_link[2].x 00107 #define bc_link02_im bc_link[2].y 00108 #define bc_link10_re bc_link[3].x 00109 #define bc_link10_im bc_link[3].y 00110 #define bc_link11_re bc_link[4].x 00111 #define bc_link11_im bc_link[4].y 00112 #define bc_link12_re bc_link[5].x 00113 #define bc_link12_im bc_link[5].y 00114 #define bc_link20_re bc_link[6].x 00115 #define bc_link20_im bc_link[6].y 00116 #define bc_link21_re bc_link[7].x 00117 #define bc_link21_im bc_link[7].y 00118 #define bc_link22_re bc_link[8].x 00119 #define bc_link22_im bc_link[8].y 00120 00121 #define ad_link00_re ad_link[0].x 00122 #define ad_link00_im ad_link[0].y 00123 #define ad_link01_re ad_link[1].x 00124 #define ad_link01_im ad_link[1].y 00125 #define ad_link02_re ad_link[2].x 00126 #define ad_link02_im ad_link[2].y 00127 #define ad_link10_re ad_link[3].x 00128 #define ad_link10_im ad_link[3].y 00129 #define ad_link11_re ad_link[4].x 00130 #define ad_link11_im ad_link[4].y 00131 #define ad_link12_re ad_link[5].x 00132 #define ad_link12_im ad_link[5].y 00133 #define ad_link20_re ad_link[6].x 00134 #define ad_link20_im ad_link[6].y 00135 #define ad_link21_re ad_link[7].x 00136 #define ad_link21_im ad_link[7].y 00137 #define ad_link22_re ad_link[8].x 00138 #define ad_link22_im ad_link[8].y 00139 00140 #define de_link00_re de_link[0].x 00141 #define de_link00_im de_link[0].y 00142 #define de_link01_re de_link[1].x 00143 #define de_link01_im de_link[1].y 00144 #define de_link02_re de_link[2].x 00145 #define de_link02_im de_link[2].y 00146 #define de_link10_re de_link[3].x 00147 #define de_link10_im de_link[3].y 00148 #define de_link11_re de_link[4].x 00149 #define de_link11_im de_link[4].y 00150 #define de_link12_re de_link[5].x 00151 #define de_link12_im de_link[5].y 00152 #define de_link20_re de_link[6].x 00153 #define de_link20_im de_link[6].y 00154 #define de_link21_re de_link[7].x 00155 #define de_link21_im de_link[7].y 00156 #define de_link22_re de_link[8].x 00157 #define de_link22_im de_link[8].y 00158 00159 #define ef_link00_re ef_link[0].x 00160 #define ef_link00_im ef_link[0].y 00161 #define ef_link01_re ef_link[1].x 00162 #define ef_link01_im ef_link[1].y 00163 #define ef_link02_re ef_link[2].x 00164 #define ef_link02_im ef_link[2].y 00165 #define ef_link10_re ef_link[3].x 00166 #define ef_link10_im ef_link[3].y 00167 #define ef_link11_re ef_link[4].x 00168 #define ef_link11_im ef_link[4].y 00169 #define ef_link12_re ef_link[5].x 00170 #define ef_link12_im ef_link[5].y 00171 #define ef_link20_re ef_link[6].x 00172 #define ef_link20_im ef_link[6].y 00173 #define ef_link21_re ef_link[7].x 00174 #define ef_link21_im ef_link[7].y 00175 #define ef_link22_re ef_link[8].x 00176 #define ef_link22_im ef_link[8].y 00177 00178 00179 #else // HISQ_RECONSTRUCT_LINKS 00180 00181 #define LINK_W00_re LINK_W[0].x 00182 #define LINK_W00_im LINK_W[0].y 00183 #define LINK_W01_re LINK_W[0].z 00184 #define LINK_W01_im LINK_W[0].w 00185 #define LINK_W02_re LINK_W[1].x 00186 #define LINK_W02_im LINK_W[1].y 00187 #define LINK_W10_re LINK_W[1].z 00188 #define LINK_W10_im LINK_W[1].w 00189 #define LINK_W11_re LINK_W[2].x 00190 #define LINK_W11_im LINK_W[2].y 00191 #define LINK_W12_re LINK_W[2].z 00192 #define LINK_W12_im LINK_W[2].w 00193 #define LINK_W20_re LINK_W[3].x 00194 #define LINK_W20_im LINK_W[3].y 00195 #define LINK_W21_re LINK_W[3].z 00196 #define LINK_W21_im LINK_W[3].w 00197 #define LINK_W22_re LINK_W[4].x 00198 #define LINK_W22_im LINK_W[4].y 00199 00200 00201 #define LINK_X00_re LINK_X[0].x 00202 #define LINK_X00_im LINK_X[0].y 00203 #define LINK_X01_re LINK_X[0].z 00204 #define LINK_X01_im LINK_X[0].w 00205 #define LINK_X02_re LINK_X[1].x 00206 #define LINK_X02_im LINK_X[1].y 00207 #define LINK_X10_re LINK_X[1].z 00208 #define LINK_X10_im LINK_X[1].w 00209 #define LINK_X11_re LINK_X[2].x 00210 #define LINK_X11_im LINK_X[2].y 00211 #define LINK_X12_re LINK_X[2].z 00212 #define LINK_X12_im LINK_X[2].w 00213 #define LINK_X20_re LINK_X[3].x 00214 #define LINK_X20_im LINK_X[3].y 00215 #define LINK_X21_re LINK_X[3].z 00216 #define LINK_X21_im LINK_X[3].w 00217 #define LINK_X22_re LINK_X[4].x 00218 #define LINK_X22_im LINK_X[4].y 00219 00220 00221 #define LINK_Y00_re LINK_Y[0].x 00222 #define LINK_Y00_im LINK_Y[0].y 00223 #define LINK_Y01_re LINK_Y[0].z 00224 #define LINK_Y01_im LINK_Y[0].w 00225 #define LINK_Y02_re LINK_Y[1].x 00226 #define LINK_Y02_im LINK_Y[1].y 00227 #define LINK_Y10_re LINK_Y[1].z 00228 #define LINK_Y10_im LINK_Y[1].w 00229 #define LINK_Y11_re LINK_Y[2].x 00230 #define LINK_Y11_im LINK_Y[2].y 00231 #define LINK_Y12_re LINK_Y[2].z 00232 #define LINK_Y12_im LINK_Y[2].w 00233 #define LINK_Y20_re LINK_Y[3].x 00234 #define LINK_Y20_im LINK_Y[3].y 00235 #define LINK_Y21_re LINK_Y[3].z 00236 #define LINK_Y21_im LINK_Y[3].w 00237 #define LINK_Y22_re LINK_Y[4].x 00238 #define LINK_Y22_im LINK_Y[4].y 00239 00240 00241 #define LINK_Z00_re LINK_Z[0].x 00242 #define LINK_Z00_im LINK_Z[0].y 00243 #define LINK_Z01_re LINK_Z[0].z 00244 #define LINK_Z01_im LINK_Z[0].w 00245 #define LINK_Z02_re LINK_Z[1].x 00246 #define LINK_Z02_im LINK_Z[1].y 00247 #define LINK_Z10_re LINK_Z[1].z 00248 #define LINK_Z10_im LINK_Z[1].w 00249 #define LINK_Z11_re LINK_Z[2].x 00250 #define LINK_Z11_im LINK_Z[2].y 00251 #define LINK_Z12_re LINK_Z[2].z 00252 #define LINK_Z12_im LINK_Z[2].w 00253 #define LINK_Z20_re LINK_Z[3].x 00254 #define LINK_Z20_im LINK_Z[3].y 00255 #define LINK_Z21_re LINK_Z[3].z 00256 #define LINK_Z21_im LINK_Z[3].w 00257 #define LINK_Z22_re LINK_Z[4].x 00258 #define LINK_Z22_im LINK_Z[4].y 00259 00260 #endif // HISQ_RECONSTRUCT_LINKS 00261 00262 #ifndef GENERIC_MATRIX_MACROS 00263 #define GENERIC_MATRIX_MACROS 00264 00265 // Color matrices stored as an array of float2 or double2 00266 00267 #define COLOR_MAT_T00_re COLOR_MAT_T[0].x 00268 #define COLOR_MAT_T00_im COLOR_MAT_T[0].y 00269 #define COLOR_MAT_T01_re COLOR_MAT_T[1].x 00270 #define COLOR_MAT_T01_im COLOR_MAT_T[1].y 00271 #define COLOR_MAT_T02_re COLOR_MAT_T[2].x 00272 #define COLOR_MAT_T02_im COLOR_MAT_T[2].y 00273 #define COLOR_MAT_T10_re COLOR_MAT_T[3].x 00274 #define COLOR_MAT_T10_im COLOR_MAT_T[3].y 00275 #define COLOR_MAT_T11_re COLOR_MAT_T[4].x 00276 #define COLOR_MAT_T11_im COLOR_MAT_T[4].y 00277 #define COLOR_MAT_T12_re COLOR_MAT_T[5].x 00278 #define COLOR_MAT_T12_im COLOR_MAT_T[5].y 00279 #define COLOR_MAT_T20_re COLOR_MAT_T[6].x 00280 #define COLOR_MAT_T20_im COLOR_MAT_T[6].y 00281 #define COLOR_MAT_T21_re COLOR_MAT_T[7].x 00282 #define COLOR_MAT_T21_im COLOR_MAT_T[7].y 00283 #define COLOR_MAT_T22_re COLOR_MAT_T[8].x 00284 #define COLOR_MAT_T22_im COLOR_MAT_T[8].y 00285 00286 00287 #define COLOR_MAT_U00_re COLOR_MAT_U[0].x 00288 #define COLOR_MAT_U00_im COLOR_MAT_U[0].y 00289 #define COLOR_MAT_U01_re COLOR_MAT_U[1].x 00290 #define COLOR_MAT_U01_im COLOR_MAT_U[1].y 00291 #define COLOR_MAT_U02_re COLOR_MAT_U[2].x 00292 #define COLOR_MAT_U02_im COLOR_MAT_U[2].y 00293 #define COLOR_MAT_U10_re COLOR_MAT_U[3].x 00294 #define COLOR_MAT_U10_im COLOR_MAT_U[3].y 00295 #define COLOR_MAT_U11_re COLOR_MAT_U[4].x 00296 #define COLOR_MAT_U11_im COLOR_MAT_U[4].y 00297 #define COLOR_MAT_U12_re COLOR_MAT_U[5].x 00298 #define COLOR_MAT_U12_im COLOR_MAT_U[5].y 00299 #define COLOR_MAT_U20_re COLOR_MAT_U[6].x 00300 #define COLOR_MAT_U20_im COLOR_MAT_U[6].y 00301 #define COLOR_MAT_U21_re COLOR_MAT_U[7].x 00302 #define COLOR_MAT_U21_im COLOR_MAT_U[7].y 00303 #define COLOR_MAT_U22_re COLOR_MAT_U[8].x 00304 #define COLOR_MAT_U22_im COLOR_MAT_U[8].y 00305 00306 00307 #define COLOR_MAT_V00_re COLOR_MAT_V[0].x 00308 #define COLOR_MAT_V00_im COLOR_MAT_V[0].y 00309 #define COLOR_MAT_V01_re COLOR_MAT_V[1].x 00310 #define COLOR_MAT_V01_im COLOR_MAT_V[1].y 00311 #define COLOR_MAT_V02_re COLOR_MAT_V[2].x 00312 #define COLOR_MAT_V02_im COLOR_MAT_V[2].y 00313 #define COLOR_MAT_V10_re COLOR_MAT_V[3].x 00314 #define COLOR_MAT_V10_im COLOR_MAT_V[3].y 00315 #define COLOR_MAT_V11_re COLOR_MAT_V[4].x 00316 #define COLOR_MAT_V11_im COLOR_MAT_V[4].y 00317 #define COLOR_MAT_V12_re COLOR_MAT_V[5].x 00318 #define COLOR_MAT_V12_im COLOR_MAT_V[5].y 00319 #define COLOR_MAT_V20_re COLOR_MAT_V[6].x 00320 #define COLOR_MAT_V20_im COLOR_MAT_V[6].y 00321 #define COLOR_MAT_V21_re COLOR_MAT_V[7].x 00322 #define COLOR_MAT_V21_im COLOR_MAT_V[7].y 00323 #define COLOR_MAT_V22_re COLOR_MAT_V[8].x 00324 #define COLOR_MAT_V22_im COLOR_MAT_V[8].y 00325 00326 00327 00328 00329 #define COLOR_MAT_W00_re COLOR_MAT_W[0].x 00330 #define COLOR_MAT_W00_im COLOR_MAT_W[0].y 00331 #define COLOR_MAT_W01_re COLOR_MAT_W[1].x 00332 #define COLOR_MAT_W01_im COLOR_MAT_W[1].y 00333 #define COLOR_MAT_W02_re COLOR_MAT_W[2].x 00334 #define COLOR_MAT_W02_im COLOR_MAT_W[2].y 00335 #define COLOR_MAT_W10_re COLOR_MAT_W[3].x 00336 #define COLOR_MAT_W10_im COLOR_MAT_W[3].y 00337 #define COLOR_MAT_W11_re COLOR_MAT_W[4].x 00338 #define COLOR_MAT_W11_im COLOR_MAT_W[4].y 00339 #define COLOR_MAT_W12_re COLOR_MAT_W[5].x 00340 #define COLOR_MAT_W12_im COLOR_MAT_W[5].y 00341 #define COLOR_MAT_W20_re COLOR_MAT_W[6].x 00342 #define COLOR_MAT_W20_im COLOR_MAT_W[6].y 00343 #define COLOR_MAT_W21_re COLOR_MAT_W[7].x 00344 #define COLOR_MAT_W21_im COLOR_MAT_W[7].y 00345 #define COLOR_MAT_W22_re COLOR_MAT_W[8].x 00346 #define COLOR_MAT_W22_im COLOR_MAT_W[8].y 00347 00348 00349 #define COLOR_MAT_X00_re COLOR_MAT_X[0].x 00350 #define COLOR_MAT_X00_im COLOR_MAT_X[0].y 00351 #define COLOR_MAT_X01_re COLOR_MAT_X[1].x 00352 #define COLOR_MAT_X01_im COLOR_MAT_X[1].y 00353 #define COLOR_MAT_X02_re COLOR_MAT_X[2].x 00354 #define COLOR_MAT_X02_im COLOR_MAT_X[2].y 00355 #define COLOR_MAT_X10_re COLOR_MAT_X[3].x 00356 #define COLOR_MAT_X10_im COLOR_MAT_X[3].y 00357 #define COLOR_MAT_X11_re COLOR_MAT_X[4].x 00358 #define COLOR_MAT_X11_im COLOR_MAT_X[4].y 00359 #define COLOR_MAT_X12_re COLOR_MAT_X[5].x 00360 #define COLOR_MAT_X12_im COLOR_MAT_X[5].y 00361 #define COLOR_MAT_X20_re COLOR_MAT_X[6].x 00362 #define COLOR_MAT_X20_im COLOR_MAT_X[6].y 00363 #define COLOR_MAT_X21_re COLOR_MAT_X[7].x 00364 #define COLOR_MAT_X21_im COLOR_MAT_X[7].y 00365 #define COLOR_MAT_X22_re COLOR_MAT_X[8].x 00366 #define COLOR_MAT_X22_im COLOR_MAT_X[8].y 00367 00368 00369 #define COLOR_MAT_Y00_re COLOR_MAT_Y[0].x 00370 #define COLOR_MAT_Y00_im COLOR_MAT_Y[0].y 00371 #define COLOR_MAT_Y01_re COLOR_MAT_Y[1].x 00372 #define COLOR_MAT_Y01_im COLOR_MAT_Y[1].y 00373 #define COLOR_MAT_Y02_re COLOR_MAT_Y[2].x 00374 #define COLOR_MAT_Y02_im COLOR_MAT_Y[2].y 00375 #define COLOR_MAT_Y10_re COLOR_MAT_Y[3].x 00376 #define COLOR_MAT_Y10_im COLOR_MAT_Y[3].y 00377 #define COLOR_MAT_Y11_re COLOR_MAT_Y[4].x 00378 #define COLOR_MAT_Y11_im COLOR_MAT_Y[4].y 00379 #define COLOR_MAT_Y12_re COLOR_MAT_Y[5].x 00380 #define COLOR_MAT_Y12_im COLOR_MAT_Y[5].y 00381 #define COLOR_MAT_Y20_re COLOR_MAT_Y[6].x 00382 #define COLOR_MAT_Y20_im COLOR_MAT_Y[6].y 00383 #define COLOR_MAT_Y21_re COLOR_MAT_Y[7].x 00384 #define COLOR_MAT_Y21_im COLOR_MAT_Y[7].y 00385 #define COLOR_MAT_Y22_re COLOR_MAT_Y[8].x 00386 #define COLOR_MAT_Y22_im COLOR_MAT_Y[8].y 00387 00388 00389 #define COLOR_MAT_Z00_re COLOR_MAT_Z[0].x 00390 #define COLOR_MAT_Z00_im COLOR_MAT_Z[0].y 00391 #define COLOR_MAT_Z01_re COLOR_MAT_Z[1].x 00392 #define COLOR_MAT_Z01_im COLOR_MAT_Z[1].y 00393 #define COLOR_MAT_Z02_re COLOR_MAT_Z[2].x 00394 #define COLOR_MAT_Z02_im COLOR_MAT_Z[2].y 00395 #define COLOR_MAT_Z10_re COLOR_MAT_Z[3].x 00396 #define COLOR_MAT_Z10_im COLOR_MAT_Z[3].y 00397 #define COLOR_MAT_Z11_re COLOR_MAT_Z[4].x 00398 #define COLOR_MAT_Z11_im COLOR_MAT_Z[4].y 00399 #define COLOR_MAT_Z12_re COLOR_MAT_Z[5].x 00400 #define COLOR_MAT_Z12_im COLOR_MAT_Z[5].y 00401 #define COLOR_MAT_Z20_re COLOR_MAT_Z[6].x 00402 #define COLOR_MAT_Z20_im COLOR_MAT_Z[6].y 00403 #define COLOR_MAT_Z21_re COLOR_MAT_Z[7].x 00404 #define COLOR_MAT_Z21_im COLOR_MAT_Z[7].y 00405 #define COLOR_MAT_Z22_re COLOR_MAT_Z[8].x 00406 #define COLOR_MAT_Z22_im COLOR_MAT_Z[8].y 00407 00408 #define FF_RECONSTRUCT_LINK_12(var, sign) \ 00409 ACC_CONJ_PROD_ASSIGN(var##20, +var##01, +var##12); \ 00410 ACC_CONJ_PROD(var##20, -var##02, +var##11); \ 00411 ACC_CONJ_PROD_ASSIGN(var##21, +var##02, +var##10); \ 00412 ACC_CONJ_PROD(var##21, -var##00, +var##12); \ 00413 ACC_CONJ_PROD_ASSIGN(var##22, +var##00, +var##11); \ 00414 ACC_CONJ_PROD(var##22, -var##01, +var##10); \ 00415 var##20_re *=sign;var##20_im *=sign; var##21_re *=sign; var##21_im *=sign; \ 00416 var##22_re *=sign;var##22_im *=sign; 00417 00418 00419 00420 #define HISQ_LOAD_MATRIX_18_SINGLE_TEX(gauge, dir, idx, var, stride)do{ \ 00421 var[0] = tex1Dfetch(gauge, idx + dir*stride*9); \ 00422 var[1] = tex1Dfetch(gauge, idx + dir*stride*9 + stride); \ 00423 var[2] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*2); \ 00424 var[3] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*3); \ 00425 var[4] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*4); \ 00426 var[5] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*5); \ 00427 var[6] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*6); \ 00428 var[7] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*7); \ 00429 var[8] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*8); \ 00430 }while(0) 00431 00432 #define HISQ_LOAD_MATRIX_12_SINGLE_TEX(gauge, dir, idx, var, stride)do{ \ 00433 float4 tmp; \ 00434 tmp = tex1Dfetch(gauge, idx + dir*stride*3); \ 00435 var[0] = make_float2(tmp.x, tmp.y); \ 00436 var[1] = make_float2(tmp.z, tmp.w); \ 00437 tmp = tex1Dfetch(gauge, idx + dir*stride*3 + stride); \ 00438 var[2] = make_float2(tmp.x, tmp.y); \ 00439 var[3] = make_float2(tmp.z, tmp.w); \ 00440 tmp = tex1Dfetch(gauge, idx + dir*stride*3 + 2*stride); \ 00441 var[4] = make_float2(tmp.x, tmp.y); \ 00442 var[5] = make_float2(tmp.z, tmp.w); \ 00443 }while(0) 00444 00445 #define HISQ_LOAD_MATRIX_18_DOUBLE_TEX(gauge_tex, gauge, dir, idx, var, stride)do{ \ 00446 var[0] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9); \ 00447 var[1] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride); \ 00448 var[2] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*2); \ 00449 var[3] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*3); \ 00450 var[4] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*4); \ 00451 var[5] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*5); \ 00452 var[6] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*6); \ 00453 var[7] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*7); \ 00454 var[8] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*8); \ 00455 }while(0) 00456 00457 #define HISQ_LOAD_MATRIX_12_DOUBLE_TEX(gauge_tex, gauge, dir, idx, var, stride)do{ \ 00458 var[0] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6); \ 00459 var[1] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride); \ 00460 var[2] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*2); \ 00461 var[3] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*3); \ 00462 var[4] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*4); \ 00463 var[5] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*5); \ 00464 }while(0) 00465 00466 #define FF_COMPUTE_NEW_FULL_IDX_PLUS_UPDATE(mydir, idx, new_idx) do { \ 00467 switch(mydir){ \ 00468 case 0: \ 00469 new_idx = ( (new_x[0]==X1m1)?idx-X1m1:idx+1); \ 00470 new_x[0] = (new_x[0]==X1m1)?0:new_x[0]+1; \ 00471 break; \ 00472 case 1: \ 00473 new_idx = ( (new_x[1]==X2m1)?idx-X2X1mX1:idx+X1); \ 00474 new_x[1] = (new_x[1]==X2m1)?0:new_x[1]+1; \ 00475 break; \ 00476 case 2: \ 00477 new_idx = ( (new_x[2]==X3m1)?idx-X3X2X1mX2X1:idx+X2X1); \ 00478 new_x[2] = (new_x[2]==X3m1)?0:new_x[2]+1; \ 00479 break; \ 00480 case 3: \ 00481 new_idx = ( (new_x[3]==X4m1)?idx-X4X3X2X1mX3X2X1:idx+X3X2X1); \ 00482 new_x[3] = (new_x[3]==X4m1)?0:new_x[3]+1; \ 00483 break; \ 00484 } \ 00485 }while(0) 00486 00487 00488 #define FF_COMPUTE_NEW_FULL_IDX_MINUS_UPDATE(mydir, idx, new_idx) do { \ 00489 switch(mydir){ \ 00490 case 0: \ 00491 new_idx = ( (new_x[0]==0)?idx+X1m1:idx-1); \ 00492 new_x[0] = (new_x[0]==0)?X1m1:new_x[0] - 1; \ 00493 break; \ 00494 case 1: \ 00495 new_idx = ( (new_x[1]==0)?idx+X2X1mX1:idx-X1); \ 00496 new_x[1] = (new_x[1]==0)?X2m1:new_x[1] - 1; \ 00497 break; \ 00498 case 2: \ 00499 new_idx = ( (new_x[2]==0)?idx+X3X2X1mX2X1:idx-X2X1); \ 00500 new_x[2] = (new_x[2]==0)?X3m1:new_x[2] - 1; \ 00501 break; \ 00502 case 3: \ 00503 new_idx = ( (new_x[3]==0)?idx+X4X3X2X1mX3X2X1:idx-X3X2X1); \ 00504 new_x[3] = (new_x[3]==0)?X4m1:new_x[3] - 1; \ 00505 break; \ 00506 } \ 00507 }while(0) 00508 00509 00510 00511 00512 #define WRITE_MATRIX_18_SINGLE(mat, idx, var) do{ \ 00513 mat[idx + 0*Vh] = var[0]; \ 00514 mat[idx + 1*Vh] = var[1]; \ 00515 mat[idx + 2*Vh] = var[2]; \ 00516 mat[idx + 3*Vh] = var[3]; \ 00517 mat[idx + 4*Vh] = var[4]; \ 00518 mat[idx + 5*Vh] = var[5]; \ 00519 mat[idx + 6*Vh] = var[6]; \ 00520 mat[idx + 7*Vh] = var[7]; \ 00521 mat[idx + 8*Vh] = var[8]; \ 00522 }while(0) 00523 00524 00525 // matrix macros: 00526 #define ADJ_MAT(a, b) \ 00527 b##00_re = a##00_re; \ 00528 b##00_im = -a##00_im; \ 00529 b##01_re = a##10_re; \ 00530 b##01_im = -a##10_im; \ 00531 b##02_re = a##20_re; \ 00532 b##02_im = -a##20_im; \ 00533 b##10_re = a##01_re; \ 00534 b##10_im = -a##01_im; \ 00535 b##11_re = a##11_re; \ 00536 b##11_im = -a##11_im; \ 00537 b##12_re = a##21_re; \ 00538 b##12_im = -a##21_im; \ 00539 b##20_re = a##02_re; \ 00540 b##20_im = -a##02_im; \ 00541 b##21_re = a##12_re; \ 00542 b##21_im = -a##12_im; \ 00543 b##22_re = a##22_re; \ 00544 b##22_im = -a##22_im; 00545 00546 00547 #define ASSIGN_MAT(a, b) \ 00548 b##00_re = a##00_re; \ 00549 b##00_im = a##00_im; \ 00550 b##01_re = a##01_re; \ 00551 b##01_im = a##01_im; \ 00552 b##02_re = a##02_re; \ 00553 b##02_im = a##02_im; \ 00554 b##10_re = a##10_re; \ 00555 b##10_im = a##10_im; \ 00556 b##11_re = a##11_re; \ 00557 b##11_im = a##11_im; \ 00558 b##12_re = a##12_re; \ 00559 b##12_im = a##12_im; \ 00560 b##20_re = a##20_re; \ 00561 b##20_im = a##20_im; \ 00562 b##21_re = a##21_re; \ 00563 b##21_im = a##21_im; \ 00564 b##22_re = a##22_re; \ 00565 b##22_im = a##22_im; \ 00566 00567 00568 00569 #define MATRIX_PRODUCT(a, b, simple, c) do{ \ 00570 if(simple){ \ 00571 c##00_re = a##00_re*b##00_re - a##00_im*b##00_im + a##01_re*b##10_re - a##01_im*b##10_im + a##02_re*b##20_re - a##02_im*b##20_im; \ 00572 c##00_im = a##00_re*b##00_im + a##00_im*b##00_re + a##01_re*b##10_im + a##01_im*b##10_re + a##02_re*b##20_im + a##02_im*b##20_re; \ 00573 c##01_re = a##00_re*b##01_re - a##00_im*b##01_im + a##01_re*b##11_re - a##01_im*b##11_im + a##02_re*b##21_re - a##02_im*b##21_im; \ 00574 c##01_im = a##00_re*b##01_im + a##00_im*b##01_re + a##01_re*b##11_im + a##01_im*b##11_re + a##02_re*b##21_im + a##02_im*b##21_re; \ 00575 c##02_re = a##00_re*b##02_re - a##00_im*b##02_im + a##01_re*b##12_re - a##01_im*b##12_im + a##02_re*b##22_re - a##02_im*b##22_im; \ 00576 c##02_im = a##00_re*b##02_im + a##00_im*b##02_re + a##01_re*b##12_im + a##01_im*b##12_re + a##02_re*b##22_im + a##02_im*b##22_re; \ 00577 c##10_re = a##10_re*b##00_re - a##10_im*b##00_im + a##11_re*b##10_re - a##11_im*b##10_im + a##12_re*b##20_re - a##12_im*b##20_im; \ 00578 c##10_im = a##10_re*b##00_im + a##10_im*b##00_re + a##11_re*b##10_im + a##11_im*b##10_re + a##12_re*b##20_im + a##12_im*b##20_re; \ 00579 c##11_re = a##10_re*b##01_re - a##10_im*b##01_im + a##11_re*b##11_re - a##11_im*b##11_im + a##12_re*b##21_re - a##12_im*b##21_im; \ 00580 c##11_im = a##10_re*b##01_im + a##10_im*b##01_re + a##11_re*b##11_im + a##11_im*b##11_re + a##12_re*b##21_im + a##12_im*b##21_re; \ 00581 c##12_re = a##10_re*b##02_re - a##10_im*b##02_im + a##11_re*b##12_re - a##11_im*b##12_im + a##12_re*b##22_re - a##12_im*b##22_im; \ 00582 c##12_im = a##10_re*b##02_im + a##10_im*b##02_re + a##11_re*b##12_im + a##11_im*b##12_re + a##12_re*b##22_im + a##12_im*b##22_re; \ 00583 c##20_re = a##20_re*b##00_re - a##20_im*b##00_im + a##21_re*b##10_re - a##21_im*b##10_im + a##22_re*b##20_re - a##22_im*b##20_im; \ 00584 c##20_im = a##20_re*b##00_im + a##20_im*b##00_re + a##21_re*b##10_im + a##21_im*b##10_re + a##22_re*b##20_im + a##22_im*b##20_re; \ 00585 c##21_re = a##20_re*b##01_re - a##20_im*b##01_im + a##21_re*b##11_re - a##21_im*b##11_im + a##22_re*b##21_re - a##22_im*b##21_im; \ 00586 c##21_im = a##20_re*b##01_im + a##20_im*b##01_re + a##21_re*b##11_im + a##21_im*b##11_re + a##22_re*b##21_im + a##22_im*b##21_re; \ 00587 c##22_re = a##20_re*b##02_re - a##20_im*b##02_im + a##21_re*b##12_re - a##21_im*b##12_im + a##22_re*b##22_re - a##22_im*b##22_im; \ 00588 c##22_im = a##20_re*b##02_im + a##20_im*b##02_re + a##21_re*b##12_im + a##21_im*b##12_re + a##22_re*b##22_im + a##22_im*b##22_re; \ 00589 }else{ \ 00590 c##00_re = a##00_re*b##00_re + a##00_im*b##00_im + a##10_re*b##10_re + a##10_im*b##10_im + a##20_re*b##20_re + a##20_im*b##20_im; \ 00591 c##00_im = a##00_re*b##00_im - a##00_im*b##00_re + a##10_re*b##10_im - a##10_im*b##10_re + a##20_re*b##20_im - a##20_im*b##20_re; \ 00592 c##01_re = a##00_re*b##01_re + a##00_im*b##01_im + a##10_re*b##11_re + a##10_im*b##11_im + a##20_re*b##21_re + a##20_im*b##21_im; \ 00593 c##01_im = a##00_re*b##01_im - a##00_im*b##01_re + a##10_re*b##11_im - a##10_im*b##11_re + a##20_re*b##21_im - a##20_im*b##21_re; \ 00594 c##02_re = a##00_re*b##02_re + a##00_im*b##02_im + a##10_re*b##12_re + a##10_im*b##12_im + a##20_re*b##22_re + a##20_im*b##22_im; \ 00595 c##02_im = a##00_re*b##02_im - a##00_im*b##02_re + a##10_re*b##12_im - a##10_im*b##12_re + a##20_re*b##22_im - a##20_im*b##22_re; \ 00596 c##10_re = a##01_re*b##00_re + a##01_im*b##00_im + a##11_re*b##10_re + a##11_im*b##10_im + a##21_re*b##20_re + a##21_im*b##20_im; \ 00597 c##10_im = a##01_re*b##00_im - a##01_im*b##00_re + a##11_re*b##10_im - a##11_im*b##10_re + a##21_re*b##20_im - a##21_im*b##20_re; \ 00598 c##11_re = a##01_re*b##01_re + a##01_im*b##01_im + a##11_re*b##11_re + a##11_im*b##11_im + a##21_re*b##21_re + a##21_im*b##21_im; \ 00599 c##11_im = a##01_re*b##01_im - a##01_im*b##01_re + a##11_re*b##11_im - a##11_im*b##11_re + a##21_re*b##21_im - a##21_im*b##21_re; \ 00600 c##12_re = a##01_re*b##02_re + a##01_im*b##02_im + a##11_re*b##12_re + a##11_im*b##12_im + a##21_re*b##22_re + a##21_im*b##22_im; \ 00601 c##12_im = a##01_re*b##02_im - a##01_im*b##02_re + a##11_re*b##12_im - a##11_im*b##12_re + a##21_re*b##22_im - a##21_im*b##22_re; \ 00602 c##20_re = a##02_re*b##00_re + a##02_im*b##00_im + a##12_re*b##10_re + a##12_im*b##10_im + a##22_re*b##20_re + a##22_im*b##20_im; \ 00603 c##20_im = a##02_re*b##00_im - a##02_im*b##00_re + a##12_re*b##10_im - a##12_im*b##10_re + a##22_re*b##20_im - a##22_im*b##20_re; \ 00604 c##21_re = a##02_re*b##01_re + a##02_im*b##01_im + a##12_re*b##11_re + a##12_im*b##11_im + a##22_re*b##21_re + a##22_im*b##21_im; \ 00605 c##21_im = a##02_re*b##01_im - a##02_im*b##01_re + a##12_re*b##11_im - a##12_im*b##11_re + a##22_re*b##21_im - a##22_im*b##21_re; \ 00606 c##22_re = a##02_re*b##02_re + a##02_im*b##02_im + a##12_re*b##12_re + a##12_im*b##12_im + a##22_re*b##22_re + a##22_im*b##22_im; \ 00607 c##22_im = a##02_re*b##02_im - a##02_im*b##02_re + a##12_re*b##12_im - a##12_im*b##12_re + a##22_re*b##22_im - a##22_im*b##22_re; \ 00608 } \ 00609 }while(0) 00610 00611 00612 #define MAT_MUL_MAT(a, b, c) \ 00613 c##00_re = a##00_re*b##00_re - a##00_im*b##00_im + a##01_re*b##10_re - a##01_im*b##10_im + a##02_re*b##20_re - a##02_im*b##20_im; \ 00614 c##00_im = a##00_re*b##00_im + a##00_im*b##00_re + a##01_re*b##10_im + a##01_im*b##10_re + a##02_re*b##20_im + a##02_im*b##20_re; \ 00615 c##01_re = a##00_re*b##01_re - a##00_im*b##01_im + a##01_re*b##11_re - a##01_im*b##11_im + a##02_re*b##21_re - a##02_im*b##21_im; \ 00616 c##01_im = a##00_re*b##01_im + a##00_im*b##01_re + a##01_re*b##11_im + a##01_im*b##11_re + a##02_re*b##21_im + a##02_im*b##21_re; \ 00617 c##02_re = a##00_re*b##02_re - a##00_im*b##02_im + a##01_re*b##12_re - a##01_im*b##12_im + a##02_re*b##22_re - a##02_im*b##22_im; \ 00618 c##02_im = a##00_re*b##02_im + a##00_im*b##02_re + a##01_re*b##12_im + a##01_im*b##12_re + a##02_re*b##22_im + a##02_im*b##22_re; \ 00619 c##10_re = a##10_re*b##00_re - a##10_im*b##00_im + a##11_re*b##10_re - a##11_im*b##10_im + a##12_re*b##20_re - a##12_im*b##20_im; \ 00620 c##10_im = a##10_re*b##00_im + a##10_im*b##00_re + a##11_re*b##10_im + a##11_im*b##10_re + a##12_re*b##20_im + a##12_im*b##20_re; \ 00621 c##11_re = a##10_re*b##01_re - a##10_im*b##01_im + a##11_re*b##11_re - a##11_im*b##11_im + a##12_re*b##21_re - a##12_im*b##21_im; \ 00622 c##11_im = a##10_re*b##01_im + a##10_im*b##01_re + a##11_re*b##11_im + a##11_im*b##11_re + a##12_re*b##21_im + a##12_im*b##21_re; \ 00623 c##12_re = a##10_re*b##02_re - a##10_im*b##02_im + a##11_re*b##12_re - a##11_im*b##12_im + a##12_re*b##22_re - a##12_im*b##22_im; \ 00624 c##12_im = a##10_re*b##02_im + a##10_im*b##02_re + a##11_re*b##12_im + a##11_im*b##12_re + a##12_re*b##22_im + a##12_im*b##22_re; \ 00625 c##20_re = a##20_re*b##00_re - a##20_im*b##00_im + a##21_re*b##10_re - a##21_im*b##10_im + a##22_re*b##20_re - a##22_im*b##20_im; \ 00626 c##20_im = a##20_re*b##00_im + a##20_im*b##00_re + a##21_re*b##10_im + a##21_im*b##10_re + a##22_re*b##20_im + a##22_im*b##20_re; \ 00627 c##21_re = a##20_re*b##01_re - a##20_im*b##01_im + a##21_re*b##11_re - a##21_im*b##11_im + a##22_re*b##21_re - a##22_im*b##21_im; \ 00628 c##21_im = a##20_re*b##01_im + a##20_im*b##01_re + a##21_re*b##11_im + a##21_im*b##11_re + a##22_re*b##21_im + a##22_im*b##21_re; \ 00629 c##22_re = a##20_re*b##02_re - a##20_im*b##02_im + a##21_re*b##12_re - a##21_im*b##12_im + a##22_re*b##22_re - a##22_im*b##22_im; \ 00630 c##22_im = a##20_re*b##02_im + a##20_im*b##02_re + a##21_re*b##12_im + a##21_im*b##12_re + a##22_re*b##22_im + a##22_im*b##22_re; 00631 00632 #define MAT_MUL_ADJ_MAT(a, b, c) \ 00633 c##00_re = a##00_re*b##00_re + a##00_im*b##00_im + a##01_re*b##01_re + a##01_im*b##01_im + a##02_re*b##02_re + a##02_im*b##02_im; \ 00634 c##00_im = - a##00_re*b##00_im + a##00_im*b##00_re - a##01_re*b##01_im + a##01_im*b##01_re - a##02_re*b##02_im + a##02_im*b##02_re; \ 00635 c##01_re = a##00_re*b##10_re + a##00_im*b##10_im + a##01_re*b##11_re + a##01_im*b##11_im + a##02_re*b##12_re + a##02_im*b##12_im; \ 00636 c##01_im = - a##00_re*b##10_im + a##00_im*b##10_re - a##01_re*b##11_im + a##01_im*b##11_re - a##02_re*b##12_im + a##02_im*b##12_re; \ 00637 c##02_re = a##00_re*b##20_re + a##00_im*b##20_im + a##01_re*b##21_re + a##01_im*b##21_im + a##02_re*b##22_re + a##02_im*b##22_im; \ 00638 c##02_im = - a##00_re*b##20_im + a##00_im*b##20_re - a##01_re*b##21_im + a##01_im*b##21_re - a##02_re*b##22_im + a##02_im*b##22_re; \ 00639 c##10_re = a##10_re*b##00_re + a##10_im*b##00_im + a##11_re*b##01_re + a##11_im*b##01_im + a##12_re*b##02_re + a##12_im*b##02_im; \ 00640 c##10_im = - a##10_re*b##00_im + a##10_im*b##00_re - a##11_re*b##01_im + a##11_im*b##01_re - a##12_re*b##02_im + a##12_im*b##02_re; \ 00641 c##11_re = a##10_re*b##10_re + a##10_im*b##10_im + a##11_re*b##11_re + a##11_im*b##11_im + a##12_re*b##12_re + a##12_im*b##12_im; \ 00642 c##11_im = - a##10_re*b##10_im + a##10_im*b##10_re - a##11_re*b##11_im + a##11_im*b##11_re - a##12_re*b##12_im + a##12_im*b##12_re; \ 00643 c##12_re = a##10_re*b##20_re + a##10_im*b##20_im + a##11_re*b##21_re + a##11_im*b##21_im + a##12_re*b##22_re + a##12_im*b##22_im; \ 00644 c##12_im = - a##10_re*b##20_im + a##10_im*b##20_re - a##11_re*b##21_im + a##11_im*b##21_re - a##12_re*b##22_im + a##12_im*b##22_re; \ 00645 c##20_re = a##20_re*b##00_re + a##20_im*b##00_im + a##21_re*b##01_re + a##21_im*b##01_im + a##22_re*b##02_re + a##22_im*b##02_im; \ 00646 c##20_im = - a##20_re*b##00_im + a##20_im*b##00_re - a##21_re*b##01_im + a##21_im*b##01_re - a##22_re*b##02_im + a##22_im*b##02_re; \ 00647 c##21_re = a##20_re*b##10_re + a##20_im*b##10_im + a##21_re*b##11_re + a##21_im*b##11_im + a##22_re*b##12_re + a##22_im*b##12_im; \ 00648 c##21_im = - a##20_re*b##10_im + a##20_im*b##10_re - a##21_re*b##11_im + a##21_im*b##11_re - a##22_re*b##12_im + a##22_im*b##12_re; \ 00649 c##22_re = a##20_re*b##20_re + a##20_im*b##20_im + a##21_re*b##21_re + a##21_im*b##21_im + a##22_re*b##22_re + a##22_im*b##22_im; \ 00650 c##22_im = - a##20_re*b##20_im + a##20_im*b##20_re - a##21_re*b##21_im + a##21_im*b##21_re - a##22_re*b##22_im + a##22_im*b##22_re; 00651 00652 #define ADJ_MAT_MUL_MAT(a, b, c) \ 00653 c##00_re = a##00_re*b##00_re + a##00_im*b##00_im + a##10_re*b##10_re + a##10_im*b##10_im + a##20_re*b##20_re + a##20_im*b##20_im; \ 00654 c##00_im = a##00_re*b##00_im - a##00_im*b##00_re + a##10_re*b##10_im - a##10_im*b##10_re + a##20_re*b##20_im - a##20_im*b##20_re; \ 00655 c##01_re = a##00_re*b##01_re + a##00_im*b##01_im + a##10_re*b##11_re + a##10_im*b##11_im + a##20_re*b##21_re + a##20_im*b##21_im; \ 00656 c##01_im = a##00_re*b##01_im - a##00_im*b##01_re + a##10_re*b##11_im - a##10_im*b##11_re + a##20_re*b##21_im - a##20_im*b##21_re; \ 00657 c##02_re = a##00_re*b##02_re + a##00_im*b##02_im + a##10_re*b##12_re + a##10_im*b##12_im + a##20_re*b##22_re + a##20_im*b##22_im; \ 00658 c##02_im = a##00_re*b##02_im - a##00_im*b##02_re + a##10_re*b##12_im - a##10_im*b##12_re + a##20_re*b##22_im - a##20_im*b##22_re; \ 00659 c##10_re = a##01_re*b##00_re + a##01_im*b##00_im + a##11_re*b##10_re + a##11_im*b##10_im + a##21_re*b##20_re + a##21_im*b##20_im; \ 00660 c##10_im = a##01_re*b##00_im - a##01_im*b##00_re + a##11_re*b##10_im - a##11_im*b##10_re + a##21_re*b##20_im - a##21_im*b##20_re; \ 00661 c##11_re = a##01_re*b##01_re + a##01_im*b##01_im + a##11_re*b##11_re + a##11_im*b##11_im + a##21_re*b##21_re + a##21_im*b##21_im; \ 00662 c##11_im = a##01_re*b##01_im - a##01_im*b##01_re + a##11_re*b##11_im - a##11_im*b##11_re + a##21_re*b##21_im - a##21_im*b##21_re; \ 00663 c##12_re = a##01_re*b##02_re + a##01_im*b##02_im + a##11_re*b##12_re + a##11_im*b##12_im + a##21_re*b##22_re + a##21_im*b##22_im; \ 00664 c##12_im = a##01_re*b##02_im - a##01_im*b##02_re + a##11_re*b##12_im - a##11_im*b##12_re + a##21_re*b##22_im - a##21_im*b##22_re; \ 00665 c##20_re = a##02_re*b##00_re + a##02_im*b##00_im + a##12_re*b##10_re + a##12_im*b##10_im + a##22_re*b##20_re + a##22_im*b##20_im; \ 00666 c##20_im = a##02_re*b##00_im - a##02_im*b##00_re + a##12_re*b##10_im - a##12_im*b##10_re + a##22_re*b##20_im - a##22_im*b##20_re; \ 00667 c##21_re = a##02_re*b##01_re + a##02_im*b##01_im + a##12_re*b##11_re + a##12_im*b##11_im + a##22_re*b##21_re + a##22_im*b##21_im; \ 00668 c##21_im = a##02_re*b##01_im - a##02_im*b##01_re + a##12_re*b##11_im - a##12_im*b##11_re + a##22_re*b##21_im - a##22_im*b##21_re; \ 00669 c##22_re = a##02_re*b##02_re + a##02_im*b##02_im + a##12_re*b##12_re + a##12_im*b##12_im + a##22_re*b##22_re + a##22_im*b##22_im; \ 00670 c##22_im = a##02_re*b##02_im - a##02_im*b##02_re + a##12_re*b##12_im - a##12_im*b##12_re + a##22_re*b##22_im - a##22_im*b##22_re; 00671 00672 #define ADJ_MAT_MUL_ADJ_MAT(a, b, c) \ 00673 c##00_re = a##00_re*b##00_re - a##00_im*b##00_im + a##10_re*b##01_re - a##10_im*b##01_im + a##20_re*b##02_re - a##20_im*b##02_im; \ 00674 c##00_im = - a##00_re*b##00_im - a##00_im*b##00_re - a##10_re*b##01_im - a##10_im*b##01_re - a##20_re*b##02_im - a##20_im*b##02_re; \ 00675 c##01_re = a##00_re*b##10_re - a##00_im*b##10_im + a##10_re*b##11_re - a##10_im*b##11_im + a##20_re*b##12_re - a##20_im*b##12_im; \ 00676 c##01_im = - a##00_re*b##10_im - a##00_im*b##10_re - a##10_re*b##11_im - a##10_im*b##11_re - a##20_re*b##12_im - a##20_im*b##12_re; \ 00677 c##02_re = a##00_re*b##20_re - a##00_im*b##20_im + a##10_re*b##21_re - a##10_im*b##21_im + a##20_re*b##22_re - a##20_im*b##22_im; \ 00678 c##02_im = - a##00_re*b##20_im - a##00_im*b##20_re - a##10_re*b##21_im - a##10_im*b##21_re - a##20_re*b##22_im - a##20_im*b##22_re; \ 00679 c##10_re = a##01_re*b##00_re - a##01_im*b##00_im + a##11_re*b##01_re - a##11_im*b##01_im + a##21_re*b##02_re - a##21_im*b##02_im; \ 00680 c##10_im = - a##01_re*b##00_im - a##01_im*b##00_re - a##11_re*b##01_im - a##11_im*b##01_re - a##21_re*b##02_im - a##21_im*b##02_re; \ 00681 c##11_re = a##01_re*b##10_re - a##01_im*b##10_im + a##11_re*b##11_re - a##11_im*b##11_im + a##21_re*b##12_re - a##21_im*b##12_im; \ 00682 c##11_im = - a##01_re*b##10_im - a##01_im*b##10_re - a##11_re*b##11_im - a##11_im*b##11_re - a##21_re*b##12_im - a##21_im*b##12_re; \ 00683 c##12_re = a##01_re*b##20_re - a##01_im*b##20_im + a##11_re*b##21_re - a##11_im*b##21_im + a##21_re*b##22_re - a##21_im*b##22_im; \ 00684 c##12_im = - a##01_re*b##20_im - a##01_im*b##20_re - a##11_re*b##21_im - a##11_im*b##21_re - a##21_re*b##22_im - a##21_im*b##22_re; \ 00685 c##20_re = a##02_re*b##00_re - a##02_im*b##00_im + a##12_re*b##01_re - a##12_im*b##01_im + a##22_re*b##02_re - a##22_im*b##02_im; \ 00686 c##20_im = - a##02_re*b##00_im - a##02_im*b##00_re - a##12_re*b##01_im - a##12_im*b##01_re - a##22_re*b##02_im - a##22_im*b##02_re; \ 00687 c##21_re = a##02_re*b##10_re - a##02_im*b##10_im + a##12_re*b##11_re - a##12_im*b##11_im + a##22_re*b##12_re - a##22_im*b##12_im; \ 00688 c##21_im = - a##02_re*b##10_im - a##02_im*b##10_re - a##12_re*b##11_im - a##12_im*b##11_re - a##22_re*b##12_im - a##22_im*b##12_re; \ 00689 c##22_re = a##02_re*b##20_re - a##02_im*b##20_im + a##12_re*b##21_re - a##12_im*b##21_im + a##22_re*b##22_re - a##22_im*b##22_im; \ 00690 c##22_im = - a##02_re*b##20_im - a##02_im*b##20_re - a##12_re*b##21_im - a##12_im*b##21_re - a##22_re*b##22_im - a##22_im*b##22_re; 00691 00692 // end of macros specific to hisq routines 00693 00694 00695 #define SCALAR_MULT_ADD_MATRIX(a, b, scalar, c) do{ \ 00696 c##00_re = a##00_re + scalar*b##00_re; \ 00697 c##00_im = a##00_im + scalar*b##00_im; \ 00698 c##01_re = a##01_re + scalar*b##01_re; \ 00699 c##01_im = a##01_im + scalar*b##01_im; \ 00700 c##02_re = a##02_re + scalar*b##02_re; \ 00701 c##02_im = a##02_im + scalar*b##02_im; \ 00702 c##10_re = a##10_re + scalar*b##10_re; \ 00703 c##10_im = a##10_im + scalar*b##10_im; \ 00704 c##11_re = a##11_re + scalar*b##11_re; \ 00705 c##11_im = a##11_im + scalar*b##11_im; \ 00706 c##12_re = a##12_re + scalar*b##12_re; \ 00707 c##12_im = a##12_im + scalar*b##12_im; \ 00708 c##20_re = a##20_re + scalar*b##20_re; \ 00709 c##20_im = a##20_im + scalar*b##20_im; \ 00710 c##21_re = a##21_re + scalar*b##21_re; \ 00711 c##21_im = a##21_im + scalar*b##21_im; \ 00712 c##22_re = a##22_re + scalar*b##22_re; \ 00713 c##22_im = a##22_im + scalar*b##22_im; \ 00714 }while(0) 00715 00716 #endif // GENERIC_MATRIX_MACROS 00717 00718 #endif // _HISQ_FORCE_MACROS_H_