QUDA v0.4.0
A library for QCD on GPUs
quda/lib/hisq_force_macros.h
Go to the documentation of this file.
00001 #ifndef _HISQ_FORCE_MACROS_H_
00002 #define _HISQ_FORCE_MACROS_H_
00003 
00004 
00005 #ifndef HISQ_RECONSTRUCT_LINKS
00006 
00007 #define LINK_W00_re LINK_W[0].x
00008 #define LINK_W00_im LINK_W[0].y
00009 #define LINK_W01_re LINK_W[1].x
00010 #define LINK_W01_im LINK_W[1].y
00011 #define LINK_W02_re LINK_W[2].x
00012 #define LINK_W02_im LINK_W[2].y
00013 #define LINK_W10_re LINK_W[3].x
00014 #define LINK_W10_im LINK_W[3].y
00015 #define LINK_W11_re LINK_W[4].x
00016 #define LINK_W11_im LINK_W[4].y
00017 #define LINK_W12_re LINK_W[5].x
00018 #define LINK_W12_im LINK_W[5].y
00019 #define LINK_W20_re LINK_W[6].x
00020 #define LINK_W20_im LINK_W[6].y
00021 #define LINK_W21_re LINK_W[7].x
00022 #define LINK_W21_im LINK_W[7].y
00023 #define LINK_W22_re LINK_W[8].x
00024 #define LINK_W22_im LINK_W[8].y
00025 
00026 #define LINK_X00_re LINK_X[0].x
00027 #define LINK_X00_im LINK_X[0].y
00028 #define LINK_X01_re LINK_X[1].x
00029 #define LINK_X01_im LINK_X[1].y
00030 #define LINK_X02_re LINK_X[2].x
00031 #define LINK_X02_im LINK_X[2].y
00032 #define LINK_X10_re LINK_X[3].x
00033 #define LINK_X10_im LINK_X[3].y
00034 #define LINK_X11_re LINK_X[4].x
00035 #define LINK_X11_im LINK_X[4].y
00036 #define LINK_X12_re LINK_X[5].x
00037 #define LINK_X12_im LINK_X[5].y
00038 #define LINK_X20_re LINK_X[6].x
00039 #define LINK_X20_im LINK_X[6].y
00040 #define LINK_X21_re LINK_X[7].x
00041 #define LINK_X21_im LINK_X[7].y
00042 #define LINK_X22_re LINK_X[8].x
00043 #define LINK_X22_im LINK_X[8].y
00044 
00045 #define LINK_Y00_re LINK_Y[0].x
00046 #define LINK_Y00_im LINK_Y[0].y
00047 #define LINK_Y01_re LINK_Y[1].x
00048 #define LINK_Y01_im LINK_Y[1].y
00049 #define LINK_Y02_re LINK_Y[2].x
00050 #define LINK_Y02_im LINK_Y[2].y
00051 #define LINK_Y10_re LINK_Y[3].x
00052 #define LINK_Y10_im LINK_Y[3].y
00053 #define LINK_Y11_re LINK_Y[4].x
00054 #define LINK_Y11_im LINK_Y[4].y
00055 #define LINK_Y12_re LINK_Y[5].x
00056 #define LINK_Y12_im LINK_Y[5].y
00057 #define LINK_Y20_re LINK_Y[6].x
00058 #define LINK_Y20_im LINK_Y[6].y
00059 #define LINK_Y21_re LINK_Y[7].x
00060 #define LINK_Y21_im LINK_Y[7].y
00061 #define LINK_Y22_re LINK_Y[8].x
00062 #define LINK_Y22_im LINK_Y[8].y
00063 
00064 #define LINK_Z00_re LINK_Z[0].x
00065 #define LINK_Z00_im LINK_Z[0].y
00066 #define LINK_Z01_re LINK_Z[1].x
00067 #define LINK_Z01_im LINK_Z[1].y
00068 #define LINK_Z02_re LINK_Z[2].x
00069 #define LINK_Z02_im LINK_Z[2].y
00070 #define LINK_Z10_re LINK_Z[3].x
00071 #define LINK_Z10_im LINK_Z[3].y
00072 #define LINK_Z11_re LINK_Z[4].x
00073 #define LINK_Z11_im LINK_Z[4].y
00074 #define LINK_Z12_re LINK_Z[5].x
00075 #define LINK_Z12_im LINK_Z[5].y
00076 #define LINK_Z20_re LINK_Z[6].x
00077 #define LINK_Z20_im LINK_Z[6].y
00078 #define LINK_Z21_re LINK_Z[7].x
00079 #define LINK_Z21_im LINK_Z[7].y
00080 #define LINK_Z22_re LINK_Z[8].x
00081 #define LINK_Z22_im LINK_Z[8].y
00082 
00083 #define ab_link00_re ab_link[0].x
00084 #define ab_link00_im ab_link[0].y
00085 #define ab_link01_re ab_link[1].x
00086 #define ab_link01_im ab_link[1].y
00087 #define ab_link02_re ab_link[2].x
00088 #define ab_link02_im ab_link[2].y
00089 #define ab_link10_re ab_link[3].x
00090 #define ab_link10_im ab_link[3].y
00091 #define ab_link11_re ab_link[4].x
00092 #define ab_link11_im ab_link[4].y
00093 #define ab_link12_re ab_link[5].x
00094 #define ab_link12_im ab_link[5].y
00095 #define ab_link20_re ab_link[6].x
00096 #define ab_link20_im ab_link[6].y
00097 #define ab_link21_re ab_link[7].x
00098 #define ab_link21_im ab_link[7].y
00099 #define ab_link22_re ab_link[8].x
00100 #define ab_link22_im ab_link[8].y
00101 
00102 #define bc_link00_re bc_link[0].x
00103 #define bc_link00_im bc_link[0].y
00104 #define bc_link01_re bc_link[1].x
00105 #define bc_link01_im bc_link[1].y
00106 #define bc_link02_re bc_link[2].x
00107 #define bc_link02_im bc_link[2].y
00108 #define bc_link10_re bc_link[3].x
00109 #define bc_link10_im bc_link[3].y
00110 #define bc_link11_re bc_link[4].x
00111 #define bc_link11_im bc_link[4].y
00112 #define bc_link12_re bc_link[5].x
00113 #define bc_link12_im bc_link[5].y
00114 #define bc_link20_re bc_link[6].x
00115 #define bc_link20_im bc_link[6].y
00116 #define bc_link21_re bc_link[7].x
00117 #define bc_link21_im bc_link[7].y
00118 #define bc_link22_re bc_link[8].x
00119 #define bc_link22_im bc_link[8].y
00120 
00121 #define ad_link00_re ad_link[0].x
00122 #define ad_link00_im ad_link[0].y
00123 #define ad_link01_re ad_link[1].x
00124 #define ad_link01_im ad_link[1].y
00125 #define ad_link02_re ad_link[2].x
00126 #define ad_link02_im ad_link[2].y
00127 #define ad_link10_re ad_link[3].x
00128 #define ad_link10_im ad_link[3].y
00129 #define ad_link11_re ad_link[4].x
00130 #define ad_link11_im ad_link[4].y
00131 #define ad_link12_re ad_link[5].x
00132 #define ad_link12_im ad_link[5].y
00133 #define ad_link20_re ad_link[6].x
00134 #define ad_link20_im ad_link[6].y
00135 #define ad_link21_re ad_link[7].x
00136 #define ad_link21_im ad_link[7].y
00137 #define ad_link22_re ad_link[8].x
00138 #define ad_link22_im ad_link[8].y
00139 
00140 #define de_link00_re de_link[0].x
00141 #define de_link00_im de_link[0].y
00142 #define de_link01_re de_link[1].x
00143 #define de_link01_im de_link[1].y
00144 #define de_link02_re de_link[2].x
00145 #define de_link02_im de_link[2].y
00146 #define de_link10_re de_link[3].x
00147 #define de_link10_im de_link[3].y
00148 #define de_link11_re de_link[4].x
00149 #define de_link11_im de_link[4].y
00150 #define de_link12_re de_link[5].x
00151 #define de_link12_im de_link[5].y
00152 #define de_link20_re de_link[6].x
00153 #define de_link20_im de_link[6].y
00154 #define de_link21_re de_link[7].x
00155 #define de_link21_im de_link[7].y
00156 #define de_link22_re de_link[8].x
00157 #define de_link22_im de_link[8].y
00158 
00159 #define ef_link00_re ef_link[0].x
00160 #define ef_link00_im ef_link[0].y
00161 #define ef_link01_re ef_link[1].x
00162 #define ef_link01_im ef_link[1].y
00163 #define ef_link02_re ef_link[2].x
00164 #define ef_link02_im ef_link[2].y
00165 #define ef_link10_re ef_link[3].x
00166 #define ef_link10_im ef_link[3].y
00167 #define ef_link11_re ef_link[4].x
00168 #define ef_link11_im ef_link[4].y
00169 #define ef_link12_re ef_link[5].x
00170 #define ef_link12_im ef_link[5].y
00171 #define ef_link20_re ef_link[6].x
00172 #define ef_link20_im ef_link[6].y
00173 #define ef_link21_re ef_link[7].x
00174 #define ef_link21_im ef_link[7].y
00175 #define ef_link22_re ef_link[8].x
00176 #define ef_link22_im ef_link[8].y
00177 
00178 
00179 #else // HISQ_RECONSTRUCT_LINKS
00180 
00181 #define LINK_W00_re LINK_W[0].x
00182 #define LINK_W00_im LINK_W[0].y
00183 #define LINK_W01_re LINK_W[0].z
00184 #define LINK_W01_im LINK_W[0].w
00185 #define LINK_W02_re LINK_W[1].x
00186 #define LINK_W02_im LINK_W[1].y
00187 #define LINK_W10_re LINK_W[1].z
00188 #define LINK_W10_im LINK_W[1].w
00189 #define LINK_W11_re LINK_W[2].x
00190 #define LINK_W11_im LINK_W[2].y
00191 #define LINK_W12_re LINK_W[2].z
00192 #define LINK_W12_im LINK_W[2].w
00193 #define LINK_W20_re LINK_W[3].x
00194 #define LINK_W20_im LINK_W[3].y
00195 #define LINK_W21_re LINK_W[3].z
00196 #define LINK_W21_im LINK_W[3].w
00197 #define LINK_W22_re LINK_W[4].x
00198 #define LINK_W22_im LINK_W[4].y
00199 
00200 
00201 #define LINK_X00_re LINK_X[0].x
00202 #define LINK_X00_im LINK_X[0].y
00203 #define LINK_X01_re LINK_X[0].z
00204 #define LINK_X01_im LINK_X[0].w
00205 #define LINK_X02_re LINK_X[1].x
00206 #define LINK_X02_im LINK_X[1].y
00207 #define LINK_X10_re LINK_X[1].z
00208 #define LINK_X10_im LINK_X[1].w
00209 #define LINK_X11_re LINK_X[2].x
00210 #define LINK_X11_im LINK_X[2].y
00211 #define LINK_X12_re LINK_X[2].z
00212 #define LINK_X12_im LINK_X[2].w
00213 #define LINK_X20_re LINK_X[3].x
00214 #define LINK_X20_im LINK_X[3].y
00215 #define LINK_X21_re LINK_X[3].z
00216 #define LINK_X21_im LINK_X[3].w
00217 #define LINK_X22_re LINK_X[4].x
00218 #define LINK_X22_im LINK_X[4].y
00219 
00220 
00221 #define LINK_Y00_re LINK_Y[0].x
00222 #define LINK_Y00_im LINK_Y[0].y
00223 #define LINK_Y01_re LINK_Y[0].z
00224 #define LINK_Y01_im LINK_Y[0].w
00225 #define LINK_Y02_re LINK_Y[1].x
00226 #define LINK_Y02_im LINK_Y[1].y
00227 #define LINK_Y10_re LINK_Y[1].z
00228 #define LINK_Y10_im LINK_Y[1].w
00229 #define LINK_Y11_re LINK_Y[2].x
00230 #define LINK_Y11_im LINK_Y[2].y
00231 #define LINK_Y12_re LINK_Y[2].z
00232 #define LINK_Y12_im LINK_Y[2].w
00233 #define LINK_Y20_re LINK_Y[3].x
00234 #define LINK_Y20_im LINK_Y[3].y
00235 #define LINK_Y21_re LINK_Y[3].z
00236 #define LINK_Y21_im LINK_Y[3].w
00237 #define LINK_Y22_re LINK_Y[4].x
00238 #define LINK_Y22_im LINK_Y[4].y
00239 
00240 
00241 #define LINK_Z00_re LINK_Z[0].x
00242 #define LINK_Z00_im LINK_Z[0].y
00243 #define LINK_Z01_re LINK_Z[0].z
00244 #define LINK_Z01_im LINK_Z[0].w
00245 #define LINK_Z02_re LINK_Z[1].x
00246 #define LINK_Z02_im LINK_Z[1].y
00247 #define LINK_Z10_re LINK_Z[1].z
00248 #define LINK_Z10_im LINK_Z[1].w
00249 #define LINK_Z11_re LINK_Z[2].x
00250 #define LINK_Z11_im LINK_Z[2].y
00251 #define LINK_Z12_re LINK_Z[2].z
00252 #define LINK_Z12_im LINK_Z[2].w
00253 #define LINK_Z20_re LINK_Z[3].x
00254 #define LINK_Z20_im LINK_Z[3].y
00255 #define LINK_Z21_re LINK_Z[3].z
00256 #define LINK_Z21_im LINK_Z[3].w
00257 #define LINK_Z22_re LINK_Z[4].x
00258 #define LINK_Z22_im LINK_Z[4].y
00259 
00260 #endif // HISQ_RECONSTRUCT_LINKS
00261 
00262 #ifndef GENERIC_MATRIX_MACROS 
00263 #define GENERIC_MATRIX_MACROS
00264 
00265 // Color matrices stored as an array of float2 or double2 
00266 
00267 #define COLOR_MAT_T00_re COLOR_MAT_T[0].x
00268 #define COLOR_MAT_T00_im COLOR_MAT_T[0].y
00269 #define COLOR_MAT_T01_re COLOR_MAT_T[1].x
00270 #define COLOR_MAT_T01_im COLOR_MAT_T[1].y
00271 #define COLOR_MAT_T02_re COLOR_MAT_T[2].x
00272 #define COLOR_MAT_T02_im COLOR_MAT_T[2].y
00273 #define COLOR_MAT_T10_re COLOR_MAT_T[3].x
00274 #define COLOR_MAT_T10_im COLOR_MAT_T[3].y 
00275 #define COLOR_MAT_T11_re COLOR_MAT_T[4].x
00276 #define COLOR_MAT_T11_im COLOR_MAT_T[4].y
00277 #define COLOR_MAT_T12_re COLOR_MAT_T[5].x
00278 #define COLOR_MAT_T12_im COLOR_MAT_T[5].y
00279 #define COLOR_MAT_T20_re COLOR_MAT_T[6].x
00280 #define COLOR_MAT_T20_im COLOR_MAT_T[6].y
00281 #define COLOR_MAT_T21_re COLOR_MAT_T[7].x
00282 #define COLOR_MAT_T21_im COLOR_MAT_T[7].y
00283 #define COLOR_MAT_T22_re COLOR_MAT_T[8].x
00284 #define COLOR_MAT_T22_im COLOR_MAT_T[8].y
00285 
00286 
00287 #define COLOR_MAT_U00_re COLOR_MAT_U[0].x
00288 #define COLOR_MAT_U00_im COLOR_MAT_U[0].y
00289 #define COLOR_MAT_U01_re COLOR_MAT_U[1].x
00290 #define COLOR_MAT_U01_im COLOR_MAT_U[1].y
00291 #define COLOR_MAT_U02_re COLOR_MAT_U[2].x
00292 #define COLOR_MAT_U02_im COLOR_MAT_U[2].y
00293 #define COLOR_MAT_U10_re COLOR_MAT_U[3].x
00294 #define COLOR_MAT_U10_im COLOR_MAT_U[3].y 
00295 #define COLOR_MAT_U11_re COLOR_MAT_U[4].x
00296 #define COLOR_MAT_U11_im COLOR_MAT_U[4].y
00297 #define COLOR_MAT_U12_re COLOR_MAT_U[5].x
00298 #define COLOR_MAT_U12_im COLOR_MAT_U[5].y
00299 #define COLOR_MAT_U20_re COLOR_MAT_U[6].x
00300 #define COLOR_MAT_U20_im COLOR_MAT_U[6].y
00301 #define COLOR_MAT_U21_re COLOR_MAT_U[7].x
00302 #define COLOR_MAT_U21_im COLOR_MAT_U[7].y
00303 #define COLOR_MAT_U22_re COLOR_MAT_U[8].x
00304 #define COLOR_MAT_U22_im COLOR_MAT_U[8].y
00305 
00306 
00307 #define COLOR_MAT_V00_re COLOR_MAT_V[0].x
00308 #define COLOR_MAT_V00_im COLOR_MAT_V[0].y
00309 #define COLOR_MAT_V01_re COLOR_MAT_V[1].x
00310 #define COLOR_MAT_V01_im COLOR_MAT_V[1].y
00311 #define COLOR_MAT_V02_re COLOR_MAT_V[2].x
00312 #define COLOR_MAT_V02_im COLOR_MAT_V[2].y
00313 #define COLOR_MAT_V10_re COLOR_MAT_V[3].x
00314 #define COLOR_MAT_V10_im COLOR_MAT_V[3].y 
00315 #define COLOR_MAT_V11_re COLOR_MAT_V[4].x
00316 #define COLOR_MAT_V11_im COLOR_MAT_V[4].y
00317 #define COLOR_MAT_V12_re COLOR_MAT_V[5].x
00318 #define COLOR_MAT_V12_im COLOR_MAT_V[5].y
00319 #define COLOR_MAT_V20_re COLOR_MAT_V[6].x
00320 #define COLOR_MAT_V20_im COLOR_MAT_V[6].y
00321 #define COLOR_MAT_V21_re COLOR_MAT_V[7].x
00322 #define COLOR_MAT_V21_im COLOR_MAT_V[7].y
00323 #define COLOR_MAT_V22_re COLOR_MAT_V[8].x
00324 #define COLOR_MAT_V22_im COLOR_MAT_V[8].y
00325 
00326 
00327 
00328 
00329 #define COLOR_MAT_W00_re COLOR_MAT_W[0].x
00330 #define COLOR_MAT_W00_im COLOR_MAT_W[0].y
00331 #define COLOR_MAT_W01_re COLOR_MAT_W[1].x
00332 #define COLOR_MAT_W01_im COLOR_MAT_W[1].y
00333 #define COLOR_MAT_W02_re COLOR_MAT_W[2].x
00334 #define COLOR_MAT_W02_im COLOR_MAT_W[2].y
00335 #define COLOR_MAT_W10_re COLOR_MAT_W[3].x
00336 #define COLOR_MAT_W10_im COLOR_MAT_W[3].y 
00337 #define COLOR_MAT_W11_re COLOR_MAT_W[4].x
00338 #define COLOR_MAT_W11_im COLOR_MAT_W[4].y
00339 #define COLOR_MAT_W12_re COLOR_MAT_W[5].x
00340 #define COLOR_MAT_W12_im COLOR_MAT_W[5].y
00341 #define COLOR_MAT_W20_re COLOR_MAT_W[6].x
00342 #define COLOR_MAT_W20_im COLOR_MAT_W[6].y
00343 #define COLOR_MAT_W21_re COLOR_MAT_W[7].x
00344 #define COLOR_MAT_W21_im COLOR_MAT_W[7].y
00345 #define COLOR_MAT_W22_re COLOR_MAT_W[8].x
00346 #define COLOR_MAT_W22_im COLOR_MAT_W[8].y
00347 
00348 
00349 #define COLOR_MAT_X00_re COLOR_MAT_X[0].x
00350 #define COLOR_MAT_X00_im COLOR_MAT_X[0].y
00351 #define COLOR_MAT_X01_re COLOR_MAT_X[1].x
00352 #define COLOR_MAT_X01_im COLOR_MAT_X[1].y
00353 #define COLOR_MAT_X02_re COLOR_MAT_X[2].x
00354 #define COLOR_MAT_X02_im COLOR_MAT_X[2].y
00355 #define COLOR_MAT_X10_re COLOR_MAT_X[3].x
00356 #define COLOR_MAT_X10_im COLOR_MAT_X[3].y 
00357 #define COLOR_MAT_X11_re COLOR_MAT_X[4].x
00358 #define COLOR_MAT_X11_im COLOR_MAT_X[4].y
00359 #define COLOR_MAT_X12_re COLOR_MAT_X[5].x
00360 #define COLOR_MAT_X12_im COLOR_MAT_X[5].y
00361 #define COLOR_MAT_X20_re COLOR_MAT_X[6].x
00362 #define COLOR_MAT_X20_im COLOR_MAT_X[6].y
00363 #define COLOR_MAT_X21_re COLOR_MAT_X[7].x
00364 #define COLOR_MAT_X21_im COLOR_MAT_X[7].y
00365 #define COLOR_MAT_X22_re COLOR_MAT_X[8].x
00366 #define COLOR_MAT_X22_im COLOR_MAT_X[8].y
00367 
00368 
00369 #define COLOR_MAT_Y00_re COLOR_MAT_Y[0].x
00370 #define COLOR_MAT_Y00_im COLOR_MAT_Y[0].y
00371 #define COLOR_MAT_Y01_re COLOR_MAT_Y[1].x
00372 #define COLOR_MAT_Y01_im COLOR_MAT_Y[1].y
00373 #define COLOR_MAT_Y02_re COLOR_MAT_Y[2].x
00374 #define COLOR_MAT_Y02_im COLOR_MAT_Y[2].y
00375 #define COLOR_MAT_Y10_re COLOR_MAT_Y[3].x
00376 #define COLOR_MAT_Y10_im COLOR_MAT_Y[3].y 
00377 #define COLOR_MAT_Y11_re COLOR_MAT_Y[4].x
00378 #define COLOR_MAT_Y11_im COLOR_MAT_Y[4].y
00379 #define COLOR_MAT_Y12_re COLOR_MAT_Y[5].x
00380 #define COLOR_MAT_Y12_im COLOR_MAT_Y[5].y
00381 #define COLOR_MAT_Y20_re COLOR_MAT_Y[6].x
00382 #define COLOR_MAT_Y20_im COLOR_MAT_Y[6].y
00383 #define COLOR_MAT_Y21_re COLOR_MAT_Y[7].x
00384 #define COLOR_MAT_Y21_im COLOR_MAT_Y[7].y
00385 #define COLOR_MAT_Y22_re COLOR_MAT_Y[8].x
00386 #define COLOR_MAT_Y22_im COLOR_MAT_Y[8].y
00387 
00388 
00389 #define COLOR_MAT_Z00_re COLOR_MAT_Z[0].x
00390 #define COLOR_MAT_Z00_im COLOR_MAT_Z[0].y
00391 #define COLOR_MAT_Z01_re COLOR_MAT_Z[1].x
00392 #define COLOR_MAT_Z01_im COLOR_MAT_Z[1].y
00393 #define COLOR_MAT_Z02_re COLOR_MAT_Z[2].x
00394 #define COLOR_MAT_Z02_im COLOR_MAT_Z[2].y
00395 #define COLOR_MAT_Z10_re COLOR_MAT_Z[3].x
00396 #define COLOR_MAT_Z10_im COLOR_MAT_Z[3].y 
00397 #define COLOR_MAT_Z11_re COLOR_MAT_Z[4].x
00398 #define COLOR_MAT_Z11_im COLOR_MAT_Z[4].y
00399 #define COLOR_MAT_Z12_re COLOR_MAT_Z[5].x
00400 #define COLOR_MAT_Z12_im COLOR_MAT_Z[5].y
00401 #define COLOR_MAT_Z20_re COLOR_MAT_Z[6].x
00402 #define COLOR_MAT_Z20_im COLOR_MAT_Z[6].y
00403 #define COLOR_MAT_Z21_re COLOR_MAT_Z[7].x
00404 #define COLOR_MAT_Z21_im COLOR_MAT_Z[7].y
00405 #define COLOR_MAT_Z22_re COLOR_MAT_Z[8].x
00406 #define COLOR_MAT_Z22_im COLOR_MAT_Z[8].y
00407 
00408 #define FF_RECONSTRUCT_LINK_12(var, sign)                               \
00409   ACC_CONJ_PROD_ASSIGN(var##20, +var##01, +var##12);                    \
00410   ACC_CONJ_PROD(var##20, -var##02, +var##11);                           \
00411   ACC_CONJ_PROD_ASSIGN(var##21, +var##02, +var##10);                    \
00412   ACC_CONJ_PROD(var##21, -var##00, +var##12);                           \
00413   ACC_CONJ_PROD_ASSIGN(var##22, +var##00, +var##11);                    \
00414   ACC_CONJ_PROD(var##22, -var##01, +var##10);                           \
00415   var##20_re *=sign;var##20_im *=sign; var##21_re *=sign; var##21_im *=sign; \
00416   var##22_re *=sign;var##22_im *=sign;
00417 
00418 
00419 
00420 #define HISQ_LOAD_MATRIX_18_SINGLE_TEX(gauge, dir, idx, var, stride)do{ \
00421     var[0] = tex1Dfetch(gauge, idx + dir*stride*9);                     \
00422     var[1] = tex1Dfetch(gauge, idx + dir*stride*9 + stride);            \
00423     var[2] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*2);          \
00424     var[3] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*3);          \
00425     var[4] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*4);          \
00426     var[5] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*5);          \
00427     var[6] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*6);          \
00428     var[7] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*7);          \
00429     var[8] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*8);          \
00430   }while(0)
00431 
00432 #define HISQ_LOAD_MATRIX_12_SINGLE_TEX(gauge, dir, idx, var, stride)do{ \
00433     float4 tmp;                                                         \
00434     tmp = tex1Dfetch(gauge, idx + dir*stride*3);                        \
00435     var[0] = make_float2(tmp.x, tmp.y);                                 \
00436     var[1] = make_float2(tmp.z, tmp.w);                                 \
00437     tmp = tex1Dfetch(gauge, idx + dir*stride*3 + stride);               \
00438     var[2] = make_float2(tmp.x, tmp.y);                                 \
00439     var[3] = make_float2(tmp.z, tmp.w);                                 \
00440     tmp = tex1Dfetch(gauge, idx + dir*stride*3 + 2*stride);             \
00441     var[4] = make_float2(tmp.x, tmp.y);                                 \
00442     var[5] = make_float2(tmp.z, tmp.w);                                 \
00443   }while(0)
00444 
00445 #define HISQ_LOAD_MATRIX_18_DOUBLE_TEX(gauge_tex, gauge, dir, idx, var, stride)do{ \
00446     var[0] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9); \
00447     var[1] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride); \
00448     var[2] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*2); \
00449     var[3] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*3); \
00450     var[4] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*4); \
00451     var[5] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*5); \
00452     var[6] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*6); \
00453     var[7] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*7); \
00454     var[8] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*8); \
00455   }while(0)
00456 
00457 #define HISQ_LOAD_MATRIX_12_DOUBLE_TEX(gauge_tex, gauge, dir, idx, var, stride)do{ \
00458     var[0] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6); \
00459     var[1] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride); \
00460     var[2] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*2); \
00461     var[3] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*3); \
00462     var[4] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*4); \
00463     var[5] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*5); \
00464   }while(0)
00465 
00466 #define FF_COMPUTE_NEW_FULL_IDX_PLUS_UPDATE(mydir, idx, new_idx) do {   \
00467     switch(mydir){                                                      \
00468     case 0:                                                             \
00469       new_idx = ( (new_x[0]==X1m1)?idx-X1m1:idx+1);                     \
00470       new_x[0] = (new_x[0]==X1m1)?0:new_x[0]+1;                         \
00471       break;                                                            \
00472     case 1:                                                             \
00473       new_idx = ( (new_x[1]==X2m1)?idx-X2X1mX1:idx+X1);                 \
00474       new_x[1] = (new_x[1]==X2m1)?0:new_x[1]+1;                         \
00475       break;                                                            \
00476     case 2:                                                             \
00477       new_idx = ( (new_x[2]==X3m1)?idx-X3X2X1mX2X1:idx+X2X1);           \
00478       new_x[2] = (new_x[2]==X3m1)?0:new_x[2]+1;                         \
00479       break;                                                            \
00480     case 3:                                                             \
00481       new_idx = ( (new_x[3]==X4m1)?idx-X4X3X2X1mX3X2X1:idx+X3X2X1);     \
00482       new_x[3] = (new_x[3]==X4m1)?0:new_x[3]+1;                         \
00483       break;                                                            \
00484     }                                                                   \
00485   }while(0)
00486 
00487 
00488 #define FF_COMPUTE_NEW_FULL_IDX_MINUS_UPDATE(mydir, idx, new_idx) do {  \
00489     switch(mydir){                                                      \
00490     case 0:                                                             \
00491       new_idx = ( (new_x[0]==0)?idx+X1m1:idx-1);                        \
00492       new_x[0] = (new_x[0]==0)?X1m1:new_x[0] - 1;                       \
00493       break;                                                            \
00494     case 1:                                                             \
00495       new_idx = ( (new_x[1]==0)?idx+X2X1mX1:idx-X1);                    \
00496       new_x[1] = (new_x[1]==0)?X2m1:new_x[1] - 1;                       \
00497       break;                                                            \
00498     case 2:                                                             \
00499       new_idx = ( (new_x[2]==0)?idx+X3X2X1mX2X1:idx-X2X1);              \
00500       new_x[2] = (new_x[2]==0)?X3m1:new_x[2] - 1;                       \
00501       break;                                                            \
00502     case 3:                                                             \
00503       new_idx = ( (new_x[3]==0)?idx+X4X3X2X1mX3X2X1:idx-X3X2X1);        \
00504       new_x[3] = (new_x[3]==0)?X4m1:new_x[3] - 1;                       \
00505       break;                                                            \
00506     }                                                                   \
00507   }while(0)
00508 
00509 
00510 
00511 
00512 #define WRITE_MATRIX_18_SINGLE(mat, idx, var) do{ \
00513     mat[idx + 0*Vh] = var[0];  \
00514     mat[idx + 1*Vh] = var[1];  \
00515     mat[idx + 2*Vh] = var[2];  \
00516     mat[idx + 3*Vh] = var[3];  \
00517     mat[idx + 4*Vh] = var[4];  \
00518     mat[idx + 5*Vh] = var[5];  \
00519     mat[idx + 6*Vh] = var[6];  \
00520     mat[idx + 7*Vh] = var[7];  \
00521     mat[idx + 8*Vh] = var[8];  \
00522 }while(0)
00523 
00524 
00525 // matrix macros:
00526 #define ADJ_MAT(a, b) \
00527   b##00_re =  a##00_re; \
00528   b##00_im = -a##00_im; \
00529   b##01_re =  a##10_re; \
00530   b##01_im = -a##10_im; \
00531   b##02_re =  a##20_re; \
00532   b##02_im = -a##20_im; \
00533   b##10_re =  a##01_re; \
00534   b##10_im = -a##01_im; \
00535   b##11_re =  a##11_re; \
00536   b##11_im = -a##11_im; \
00537   b##12_re =  a##21_re; \
00538   b##12_im = -a##21_im; \
00539   b##20_re =  a##02_re; \
00540   b##20_im = -a##02_im; \
00541   b##21_re =  a##12_re; \
00542   b##21_im = -a##12_im; \
00543   b##22_re =  a##22_re; \
00544   b##22_im = -a##22_im; 
00545 
00546 
00547 #define ASSIGN_MAT(a, b) \
00548   b##00_re =  a##00_re; \
00549   b##00_im =  a##00_im; \
00550   b##01_re =  a##01_re; \
00551   b##01_im =  a##01_im; \
00552   b##02_re =  a##02_re; \
00553   b##02_im =  a##02_im; \
00554   b##10_re =  a##10_re; \
00555   b##10_im =  a##10_im; \
00556   b##11_re =  a##11_re; \
00557   b##11_im =  a##11_im; \
00558   b##12_re =  a##12_re; \
00559   b##12_im =  a##12_im; \
00560   b##20_re =  a##20_re; \
00561   b##20_im =  a##20_im; \
00562   b##21_re =  a##21_re; \
00563   b##21_im =  a##21_im; \
00564   b##22_re =  a##22_re; \
00565   b##22_im =  a##22_im; \
00566 
00567 
00568 
00569 #define MATRIX_PRODUCT(a, b, simple, c) do{ \
00570   if(simple){                               \
00571       c##00_re = a##00_re*b##00_re - a##00_im*b##00_im + a##01_re*b##10_re - a##01_im*b##10_im + a##02_re*b##20_re - a##02_im*b##20_im; \
00572       c##00_im = a##00_re*b##00_im + a##00_im*b##00_re + a##01_re*b##10_im + a##01_im*b##10_re + a##02_re*b##20_im + a##02_im*b##20_re; \
00573       c##01_re = a##00_re*b##01_re - a##00_im*b##01_im + a##01_re*b##11_re - a##01_im*b##11_im + a##02_re*b##21_re - a##02_im*b##21_im; \
00574       c##01_im = a##00_re*b##01_im + a##00_im*b##01_re + a##01_re*b##11_im + a##01_im*b##11_re + a##02_re*b##21_im + a##02_im*b##21_re; \
00575       c##02_re = a##00_re*b##02_re - a##00_im*b##02_im + a##01_re*b##12_re - a##01_im*b##12_im + a##02_re*b##22_re - a##02_im*b##22_im; \
00576       c##02_im = a##00_re*b##02_im + a##00_im*b##02_re + a##01_re*b##12_im + a##01_im*b##12_re + a##02_re*b##22_im + a##02_im*b##22_re; \
00577       c##10_re = a##10_re*b##00_re - a##10_im*b##00_im + a##11_re*b##10_re - a##11_im*b##10_im + a##12_re*b##20_re - a##12_im*b##20_im; \
00578       c##10_im = a##10_re*b##00_im + a##10_im*b##00_re + a##11_re*b##10_im + a##11_im*b##10_re + a##12_re*b##20_im + a##12_im*b##20_re; \
00579       c##11_re = a##10_re*b##01_re - a##10_im*b##01_im + a##11_re*b##11_re - a##11_im*b##11_im + a##12_re*b##21_re - a##12_im*b##21_im; \
00580       c##11_im = a##10_re*b##01_im + a##10_im*b##01_re + a##11_re*b##11_im + a##11_im*b##11_re + a##12_re*b##21_im + a##12_im*b##21_re; \
00581       c##12_re = a##10_re*b##02_re - a##10_im*b##02_im + a##11_re*b##12_re - a##11_im*b##12_im + a##12_re*b##22_re - a##12_im*b##22_im; \
00582       c##12_im = a##10_re*b##02_im + a##10_im*b##02_re + a##11_re*b##12_im + a##11_im*b##12_re + a##12_re*b##22_im + a##12_im*b##22_re; \
00583       c##20_re = a##20_re*b##00_re - a##20_im*b##00_im + a##21_re*b##10_re - a##21_im*b##10_im + a##22_re*b##20_re - a##22_im*b##20_im; \
00584       c##20_im = a##20_re*b##00_im + a##20_im*b##00_re + a##21_re*b##10_im + a##21_im*b##10_re + a##22_re*b##20_im + a##22_im*b##20_re; \
00585       c##21_re = a##20_re*b##01_re - a##20_im*b##01_im + a##21_re*b##11_re - a##21_im*b##11_im + a##22_re*b##21_re - a##22_im*b##21_im; \
00586       c##21_im = a##20_re*b##01_im + a##20_im*b##01_re + a##21_re*b##11_im + a##21_im*b##11_re + a##22_re*b##21_im + a##22_im*b##21_re; \
00587       c##22_re = a##20_re*b##02_re - a##20_im*b##02_im + a##21_re*b##12_re - a##21_im*b##12_im + a##22_re*b##22_re - a##22_im*b##22_im; \
00588       c##22_im = a##20_re*b##02_im + a##20_im*b##02_re + a##21_re*b##12_im + a##21_im*b##12_re + a##22_re*b##22_im + a##22_im*b##22_re; \
00589   }else{                                      \
00590       c##00_re = a##00_re*b##00_re + a##00_im*b##00_im + a##10_re*b##10_re + a##10_im*b##10_im + a##20_re*b##20_re + a##20_im*b##20_im; \
00591       c##00_im = a##00_re*b##00_im - a##00_im*b##00_re + a##10_re*b##10_im - a##10_im*b##10_re + a##20_re*b##20_im - a##20_im*b##20_re; \
00592       c##01_re = a##00_re*b##01_re + a##00_im*b##01_im + a##10_re*b##11_re + a##10_im*b##11_im + a##20_re*b##21_re + a##20_im*b##21_im; \
00593       c##01_im = a##00_re*b##01_im - a##00_im*b##01_re + a##10_re*b##11_im - a##10_im*b##11_re + a##20_re*b##21_im - a##20_im*b##21_re; \
00594       c##02_re = a##00_re*b##02_re + a##00_im*b##02_im + a##10_re*b##12_re + a##10_im*b##12_im + a##20_re*b##22_re + a##20_im*b##22_im; \
00595       c##02_im = a##00_re*b##02_im - a##00_im*b##02_re + a##10_re*b##12_im - a##10_im*b##12_re + a##20_re*b##22_im - a##20_im*b##22_re; \
00596       c##10_re = a##01_re*b##00_re + a##01_im*b##00_im + a##11_re*b##10_re + a##11_im*b##10_im + a##21_re*b##20_re + a##21_im*b##20_im; \
00597       c##10_im = a##01_re*b##00_im - a##01_im*b##00_re + a##11_re*b##10_im - a##11_im*b##10_re + a##21_re*b##20_im - a##21_im*b##20_re; \
00598       c##11_re = a##01_re*b##01_re + a##01_im*b##01_im + a##11_re*b##11_re + a##11_im*b##11_im + a##21_re*b##21_re + a##21_im*b##21_im; \
00599       c##11_im = a##01_re*b##01_im - a##01_im*b##01_re + a##11_re*b##11_im - a##11_im*b##11_re + a##21_re*b##21_im - a##21_im*b##21_re; \
00600       c##12_re = a##01_re*b##02_re + a##01_im*b##02_im + a##11_re*b##12_re + a##11_im*b##12_im + a##21_re*b##22_re + a##21_im*b##22_im; \
00601       c##12_im = a##01_re*b##02_im - a##01_im*b##02_re + a##11_re*b##12_im - a##11_im*b##12_re + a##21_re*b##22_im - a##21_im*b##22_re; \
00602       c##20_re = a##02_re*b##00_re + a##02_im*b##00_im + a##12_re*b##10_re + a##12_im*b##10_im + a##22_re*b##20_re + a##22_im*b##20_im; \
00603       c##20_im = a##02_re*b##00_im - a##02_im*b##00_re + a##12_re*b##10_im - a##12_im*b##10_re + a##22_re*b##20_im - a##22_im*b##20_re; \
00604       c##21_re = a##02_re*b##01_re + a##02_im*b##01_im + a##12_re*b##11_re + a##12_im*b##11_im + a##22_re*b##21_re + a##22_im*b##21_im; \
00605       c##21_im = a##02_re*b##01_im - a##02_im*b##01_re + a##12_re*b##11_im - a##12_im*b##11_re + a##22_re*b##21_im - a##22_im*b##21_re; \
00606       c##22_re = a##02_re*b##02_re + a##02_im*b##02_im + a##12_re*b##12_re + a##12_im*b##12_im + a##22_re*b##22_re + a##22_im*b##22_im; \
00607       c##22_im = a##02_re*b##02_im - a##02_im*b##02_re + a##12_re*b##12_im - a##12_im*b##12_re + a##22_re*b##22_im - a##22_im*b##22_re; \
00608   }    \
00609 }while(0)
00610 
00611 
00612 #define MAT_MUL_MAT(a, b, c) \
00613   c##00_re = a##00_re*b##00_re - a##00_im*b##00_im + a##01_re*b##10_re - a##01_im*b##10_im + a##02_re*b##20_re - a##02_im*b##20_im; \
00614   c##00_im = a##00_re*b##00_im + a##00_im*b##00_re + a##01_re*b##10_im + a##01_im*b##10_re + a##02_re*b##20_im + a##02_im*b##20_re; \
00615   c##01_re = a##00_re*b##01_re - a##00_im*b##01_im + a##01_re*b##11_re - a##01_im*b##11_im + a##02_re*b##21_re - a##02_im*b##21_im; \
00616   c##01_im = a##00_re*b##01_im + a##00_im*b##01_re + a##01_re*b##11_im + a##01_im*b##11_re + a##02_re*b##21_im + a##02_im*b##21_re; \
00617   c##02_re = a##00_re*b##02_re - a##00_im*b##02_im + a##01_re*b##12_re - a##01_im*b##12_im + a##02_re*b##22_re - a##02_im*b##22_im; \
00618   c##02_im = a##00_re*b##02_im + a##00_im*b##02_re + a##01_re*b##12_im + a##01_im*b##12_re + a##02_re*b##22_im + a##02_im*b##22_re; \
00619   c##10_re = a##10_re*b##00_re - a##10_im*b##00_im + a##11_re*b##10_re - a##11_im*b##10_im + a##12_re*b##20_re - a##12_im*b##20_im; \
00620   c##10_im = a##10_re*b##00_im + a##10_im*b##00_re + a##11_re*b##10_im + a##11_im*b##10_re + a##12_re*b##20_im + a##12_im*b##20_re; \
00621   c##11_re = a##10_re*b##01_re - a##10_im*b##01_im + a##11_re*b##11_re - a##11_im*b##11_im + a##12_re*b##21_re - a##12_im*b##21_im; \
00622   c##11_im = a##10_re*b##01_im + a##10_im*b##01_re + a##11_re*b##11_im + a##11_im*b##11_re + a##12_re*b##21_im + a##12_im*b##21_re; \
00623   c##12_re = a##10_re*b##02_re - a##10_im*b##02_im + a##11_re*b##12_re - a##11_im*b##12_im + a##12_re*b##22_re - a##12_im*b##22_im; \
00624   c##12_im = a##10_re*b##02_im + a##10_im*b##02_re + a##11_re*b##12_im + a##11_im*b##12_re + a##12_re*b##22_im + a##12_im*b##22_re; \
00625   c##20_re = a##20_re*b##00_re - a##20_im*b##00_im + a##21_re*b##10_re - a##21_im*b##10_im + a##22_re*b##20_re - a##22_im*b##20_im; \
00626   c##20_im = a##20_re*b##00_im + a##20_im*b##00_re + a##21_re*b##10_im + a##21_im*b##10_re + a##22_re*b##20_im + a##22_im*b##20_re; \
00627   c##21_re = a##20_re*b##01_re - a##20_im*b##01_im + a##21_re*b##11_re - a##21_im*b##11_im + a##22_re*b##21_re - a##22_im*b##21_im; \
00628   c##21_im = a##20_re*b##01_im + a##20_im*b##01_re + a##21_re*b##11_im + a##21_im*b##11_re + a##22_re*b##21_im + a##22_im*b##21_re; \
00629   c##22_re = a##20_re*b##02_re - a##20_im*b##02_im + a##21_re*b##12_re - a##21_im*b##12_im + a##22_re*b##22_re - a##22_im*b##22_im; \
00630   c##22_im = a##20_re*b##02_im + a##20_im*b##02_re + a##21_re*b##12_im + a##21_im*b##12_re + a##22_re*b##22_im + a##22_im*b##22_re; 
00631 
00632 #define MAT_MUL_ADJ_MAT(a, b, c) \
00633   c##00_re =    a##00_re*b##00_re + a##00_im*b##00_im + a##01_re*b##01_re + a##01_im*b##01_im + a##02_re*b##02_re + a##02_im*b##02_im; \
00634   c##00_im =  - a##00_re*b##00_im + a##00_im*b##00_re - a##01_re*b##01_im + a##01_im*b##01_re - a##02_re*b##02_im + a##02_im*b##02_re; \
00635   c##01_re =    a##00_re*b##10_re + a##00_im*b##10_im + a##01_re*b##11_re + a##01_im*b##11_im + a##02_re*b##12_re + a##02_im*b##12_im; \
00636   c##01_im =  - a##00_re*b##10_im + a##00_im*b##10_re - a##01_re*b##11_im + a##01_im*b##11_re - a##02_re*b##12_im + a##02_im*b##12_re; \
00637   c##02_re =    a##00_re*b##20_re + a##00_im*b##20_im + a##01_re*b##21_re + a##01_im*b##21_im + a##02_re*b##22_re + a##02_im*b##22_im; \
00638   c##02_im =  - a##00_re*b##20_im + a##00_im*b##20_re - a##01_re*b##21_im + a##01_im*b##21_re - a##02_re*b##22_im + a##02_im*b##22_re; \
00639   c##10_re =    a##10_re*b##00_re + a##10_im*b##00_im + a##11_re*b##01_re + a##11_im*b##01_im + a##12_re*b##02_re + a##12_im*b##02_im; \
00640   c##10_im =  - a##10_re*b##00_im + a##10_im*b##00_re - a##11_re*b##01_im + a##11_im*b##01_re - a##12_re*b##02_im + a##12_im*b##02_re; \
00641   c##11_re =    a##10_re*b##10_re + a##10_im*b##10_im + a##11_re*b##11_re + a##11_im*b##11_im + a##12_re*b##12_re + a##12_im*b##12_im; \
00642   c##11_im =  - a##10_re*b##10_im + a##10_im*b##10_re - a##11_re*b##11_im + a##11_im*b##11_re - a##12_re*b##12_im + a##12_im*b##12_re; \
00643   c##12_re =    a##10_re*b##20_re + a##10_im*b##20_im + a##11_re*b##21_re + a##11_im*b##21_im + a##12_re*b##22_re + a##12_im*b##22_im; \
00644   c##12_im =  - a##10_re*b##20_im + a##10_im*b##20_re - a##11_re*b##21_im + a##11_im*b##21_re - a##12_re*b##22_im + a##12_im*b##22_re; \
00645   c##20_re =    a##20_re*b##00_re + a##20_im*b##00_im + a##21_re*b##01_re + a##21_im*b##01_im + a##22_re*b##02_re + a##22_im*b##02_im; \
00646   c##20_im =  - a##20_re*b##00_im + a##20_im*b##00_re - a##21_re*b##01_im + a##21_im*b##01_re - a##22_re*b##02_im + a##22_im*b##02_re; \
00647   c##21_re =    a##20_re*b##10_re + a##20_im*b##10_im + a##21_re*b##11_re + a##21_im*b##11_im + a##22_re*b##12_re + a##22_im*b##12_im; \
00648   c##21_im =  - a##20_re*b##10_im + a##20_im*b##10_re - a##21_re*b##11_im + a##21_im*b##11_re - a##22_re*b##12_im + a##22_im*b##12_re; \
00649   c##22_re =    a##20_re*b##20_re + a##20_im*b##20_im + a##21_re*b##21_re + a##21_im*b##21_im + a##22_re*b##22_re + a##22_im*b##22_im; \
00650   c##22_im =  - a##20_re*b##20_im + a##20_im*b##20_re - a##21_re*b##21_im + a##21_im*b##21_re - a##22_re*b##22_im + a##22_im*b##22_re; 
00651 
00652 #define ADJ_MAT_MUL_MAT(a, b, c) \
00653     c##00_re = a##00_re*b##00_re + a##00_im*b##00_im + a##10_re*b##10_re + a##10_im*b##10_im + a##20_re*b##20_re + a##20_im*b##20_im; \
00654   c##00_im = a##00_re*b##00_im - a##00_im*b##00_re + a##10_re*b##10_im - a##10_im*b##10_re + a##20_re*b##20_im - a##20_im*b##20_re; \
00655   c##01_re = a##00_re*b##01_re + a##00_im*b##01_im + a##10_re*b##11_re + a##10_im*b##11_im + a##20_re*b##21_re + a##20_im*b##21_im; \
00656   c##01_im = a##00_re*b##01_im - a##00_im*b##01_re + a##10_re*b##11_im - a##10_im*b##11_re + a##20_re*b##21_im - a##20_im*b##21_re; \
00657   c##02_re = a##00_re*b##02_re + a##00_im*b##02_im + a##10_re*b##12_re + a##10_im*b##12_im + a##20_re*b##22_re + a##20_im*b##22_im; \
00658   c##02_im = a##00_re*b##02_im - a##00_im*b##02_re + a##10_re*b##12_im - a##10_im*b##12_re + a##20_re*b##22_im - a##20_im*b##22_re; \
00659   c##10_re = a##01_re*b##00_re + a##01_im*b##00_im + a##11_re*b##10_re + a##11_im*b##10_im + a##21_re*b##20_re + a##21_im*b##20_im; \
00660   c##10_im = a##01_re*b##00_im - a##01_im*b##00_re + a##11_re*b##10_im - a##11_im*b##10_re + a##21_re*b##20_im - a##21_im*b##20_re; \
00661   c##11_re = a##01_re*b##01_re + a##01_im*b##01_im + a##11_re*b##11_re + a##11_im*b##11_im + a##21_re*b##21_re + a##21_im*b##21_im; \
00662   c##11_im = a##01_re*b##01_im - a##01_im*b##01_re + a##11_re*b##11_im - a##11_im*b##11_re + a##21_re*b##21_im - a##21_im*b##21_re; \
00663   c##12_re = a##01_re*b##02_re + a##01_im*b##02_im + a##11_re*b##12_re + a##11_im*b##12_im + a##21_re*b##22_re + a##21_im*b##22_im; \
00664   c##12_im = a##01_re*b##02_im - a##01_im*b##02_re + a##11_re*b##12_im - a##11_im*b##12_re + a##21_re*b##22_im - a##21_im*b##22_re; \
00665   c##20_re = a##02_re*b##00_re + a##02_im*b##00_im + a##12_re*b##10_re + a##12_im*b##10_im + a##22_re*b##20_re + a##22_im*b##20_im; \
00666   c##20_im = a##02_re*b##00_im - a##02_im*b##00_re + a##12_re*b##10_im - a##12_im*b##10_re + a##22_re*b##20_im - a##22_im*b##20_re; \
00667   c##21_re = a##02_re*b##01_re + a##02_im*b##01_im + a##12_re*b##11_re + a##12_im*b##11_im + a##22_re*b##21_re + a##22_im*b##21_im; \
00668   c##21_im = a##02_re*b##01_im - a##02_im*b##01_re + a##12_re*b##11_im - a##12_im*b##11_re + a##22_re*b##21_im - a##22_im*b##21_re; \
00669   c##22_re = a##02_re*b##02_re + a##02_im*b##02_im + a##12_re*b##12_re + a##12_im*b##12_im + a##22_re*b##22_re + a##22_im*b##22_im; \
00670   c##22_im = a##02_re*b##02_im - a##02_im*b##02_re + a##12_re*b##12_im - a##12_im*b##12_re + a##22_re*b##22_im - a##22_im*b##22_re; 
00671 
00672 #define ADJ_MAT_MUL_ADJ_MAT(a, b, c)                                    \
00673       c##00_re =    a##00_re*b##00_re - a##00_im*b##00_im + a##10_re*b##01_re - a##10_im*b##01_im + a##20_re*b##02_re - a##20_im*b##02_im; \
00674   c##00_im =  - a##00_re*b##00_im - a##00_im*b##00_re - a##10_re*b##01_im - a##10_im*b##01_re - a##20_re*b##02_im - a##20_im*b##02_re; \
00675   c##01_re =    a##00_re*b##10_re - a##00_im*b##10_im + a##10_re*b##11_re - a##10_im*b##11_im + a##20_re*b##12_re - a##20_im*b##12_im; \
00676   c##01_im =  - a##00_re*b##10_im - a##00_im*b##10_re - a##10_re*b##11_im - a##10_im*b##11_re - a##20_re*b##12_im - a##20_im*b##12_re; \
00677   c##02_re =    a##00_re*b##20_re - a##00_im*b##20_im + a##10_re*b##21_re - a##10_im*b##21_im + a##20_re*b##22_re - a##20_im*b##22_im; \
00678   c##02_im =  - a##00_re*b##20_im - a##00_im*b##20_re - a##10_re*b##21_im - a##10_im*b##21_re - a##20_re*b##22_im - a##20_im*b##22_re; \
00679   c##10_re =    a##01_re*b##00_re - a##01_im*b##00_im + a##11_re*b##01_re - a##11_im*b##01_im + a##21_re*b##02_re - a##21_im*b##02_im; \
00680   c##10_im =  - a##01_re*b##00_im - a##01_im*b##00_re - a##11_re*b##01_im - a##11_im*b##01_re - a##21_re*b##02_im - a##21_im*b##02_re; \
00681   c##11_re =    a##01_re*b##10_re - a##01_im*b##10_im + a##11_re*b##11_re - a##11_im*b##11_im + a##21_re*b##12_re - a##21_im*b##12_im; \
00682   c##11_im =  - a##01_re*b##10_im - a##01_im*b##10_re - a##11_re*b##11_im - a##11_im*b##11_re - a##21_re*b##12_im - a##21_im*b##12_re; \
00683   c##12_re =    a##01_re*b##20_re - a##01_im*b##20_im + a##11_re*b##21_re - a##11_im*b##21_im + a##21_re*b##22_re - a##21_im*b##22_im; \
00684   c##12_im =  - a##01_re*b##20_im - a##01_im*b##20_re - a##11_re*b##21_im - a##11_im*b##21_re - a##21_re*b##22_im - a##21_im*b##22_re; \
00685   c##20_re =    a##02_re*b##00_re - a##02_im*b##00_im + a##12_re*b##01_re - a##12_im*b##01_im + a##22_re*b##02_re - a##22_im*b##02_im; \
00686   c##20_im =  - a##02_re*b##00_im - a##02_im*b##00_re - a##12_re*b##01_im - a##12_im*b##01_re - a##22_re*b##02_im - a##22_im*b##02_re; \
00687   c##21_re =    a##02_re*b##10_re - a##02_im*b##10_im + a##12_re*b##11_re - a##12_im*b##11_im + a##22_re*b##12_re - a##22_im*b##12_im; \
00688   c##21_im =  - a##02_re*b##10_im - a##02_im*b##10_re - a##12_re*b##11_im - a##12_im*b##11_re - a##22_re*b##12_im - a##22_im*b##12_re; \
00689   c##22_re =    a##02_re*b##20_re - a##02_im*b##20_im + a##12_re*b##21_re - a##12_im*b##21_im + a##22_re*b##22_re - a##22_im*b##22_im; \
00690   c##22_im =  - a##02_re*b##20_im - a##02_im*b##20_re - a##12_re*b##21_im - a##12_im*b##21_re - a##22_re*b##22_im - a##22_im*b##22_re; 
00691 
00692   // end of macros specific to hisq routines
00693 
00694 
00695 #define SCALAR_MULT_ADD_MATRIX(a, b, scalar, c) do{ \
00696     c##00_re = a##00_re + scalar*b##00_re;  \
00697     c##00_im = a##00_im + scalar*b##00_im;  \
00698     c##01_re = a##01_re + scalar*b##01_re;  \
00699     c##01_im = a##01_im + scalar*b##01_im;  \
00700     c##02_re = a##02_re + scalar*b##02_re;  \
00701     c##02_im = a##02_im + scalar*b##02_im;  \
00702     c##10_re = a##10_re + scalar*b##10_re;  \
00703     c##10_im = a##10_im + scalar*b##10_im;  \
00704     c##11_re = a##11_re + scalar*b##11_re;  \
00705     c##11_im = a##11_im + scalar*b##11_im;  \
00706     c##12_re = a##12_re + scalar*b##12_re;  \
00707     c##12_im = a##12_im + scalar*b##12_im;  \
00708     c##20_re = a##20_re + scalar*b##20_re;  \
00709     c##20_im = a##20_im + scalar*b##20_im;  \
00710     c##21_re = a##21_re + scalar*b##21_re;  \
00711     c##21_im = a##21_im + scalar*b##21_im;  \
00712     c##22_re = a##22_re + scalar*b##22_re;  \
00713     c##22_im = a##22_im + scalar*b##22_im;  \
00714 }while(0)
00715 
00716 #endif // GENERIC_MATRIX_MACROS
00717 
00718 #endif // _HISQ_FORCE_MACROS_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines