QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
hisq_force_macros.h
Go to the documentation of this file.
1 #ifndef _HISQ_FORCE_MACROS_H_
2 #define _HISQ_FORCE_MACROS_H_
3 
4 
5 #ifndef HISQ_RECONSTRUCT_LINKS
6 
7 #define LINK_W00_re LINK_W[0].x
8 #define LINK_W00_im LINK_W[0].y
9 #define LINK_W01_re LINK_W[1].x
10 #define LINK_W01_im LINK_W[1].y
11 #define LINK_W02_re LINK_W[2].x
12 #define LINK_W02_im LINK_W[2].y
13 #define LINK_W10_re LINK_W[3].x
14 #define LINK_W10_im LINK_W[3].y
15 #define LINK_W11_re LINK_W[4].x
16 #define LINK_W11_im LINK_W[4].y
17 #define LINK_W12_re LINK_W[5].x
18 #define LINK_W12_im LINK_W[5].y
19 #define LINK_W20_re LINK_W[6].x
20 #define LINK_W20_im LINK_W[6].y
21 #define LINK_W21_re LINK_W[7].x
22 #define LINK_W21_im LINK_W[7].y
23 #define LINK_W22_re LINK_W[8].x
24 #define LINK_W22_im LINK_W[8].y
25 
26 #define LINK_X00_re LINK_X[0].x
27 #define LINK_X00_im LINK_X[0].y
28 #define LINK_X01_re LINK_X[1].x
29 #define LINK_X01_im LINK_X[1].y
30 #define LINK_X02_re LINK_X[2].x
31 #define LINK_X02_im LINK_X[2].y
32 #define LINK_X10_re LINK_X[3].x
33 #define LINK_X10_im LINK_X[3].y
34 #define LINK_X11_re LINK_X[4].x
35 #define LINK_X11_im LINK_X[4].y
36 #define LINK_X12_re LINK_X[5].x
37 #define LINK_X12_im LINK_X[5].y
38 #define LINK_X20_re LINK_X[6].x
39 #define LINK_X20_im LINK_X[6].y
40 #define LINK_X21_re LINK_X[7].x
41 #define LINK_X21_im LINK_X[7].y
42 #define LINK_X22_re LINK_X[8].x
43 #define LINK_X22_im LINK_X[8].y
44 
45 #define LINK_Y00_re LINK_Y[0].x
46 #define LINK_Y00_im LINK_Y[0].y
47 #define LINK_Y01_re LINK_Y[1].x
48 #define LINK_Y01_im LINK_Y[1].y
49 #define LINK_Y02_re LINK_Y[2].x
50 #define LINK_Y02_im LINK_Y[2].y
51 #define LINK_Y10_re LINK_Y[3].x
52 #define LINK_Y10_im LINK_Y[3].y
53 #define LINK_Y11_re LINK_Y[4].x
54 #define LINK_Y11_im LINK_Y[4].y
55 #define LINK_Y12_re LINK_Y[5].x
56 #define LINK_Y12_im LINK_Y[5].y
57 #define LINK_Y20_re LINK_Y[6].x
58 #define LINK_Y20_im LINK_Y[6].y
59 #define LINK_Y21_re LINK_Y[7].x
60 #define LINK_Y21_im LINK_Y[7].y
61 #define LINK_Y22_re LINK_Y[8].x
62 #define LINK_Y22_im LINK_Y[8].y
63 
64 #define LINK_Z00_re LINK_Z[0].x
65 #define LINK_Z00_im LINK_Z[0].y
66 #define LINK_Z01_re LINK_Z[1].x
67 #define LINK_Z01_im LINK_Z[1].y
68 #define LINK_Z02_re LINK_Z[2].x
69 #define LINK_Z02_im LINK_Z[2].y
70 #define LINK_Z10_re LINK_Z[3].x
71 #define LINK_Z10_im LINK_Z[3].y
72 #define LINK_Z11_re LINK_Z[4].x
73 #define LINK_Z11_im LINK_Z[4].y
74 #define LINK_Z12_re LINK_Z[5].x
75 #define LINK_Z12_im LINK_Z[5].y
76 #define LINK_Z20_re LINK_Z[6].x
77 #define LINK_Z20_im LINK_Z[6].y
78 #define LINK_Z21_re LINK_Z[7].x
79 #define LINK_Z21_im LINK_Z[7].y
80 #define LINK_Z22_re LINK_Z[8].x
81 #define LINK_Z22_im LINK_Z[8].y
82 
83 #define ab_link00_re ab_link[0].x
84 #define ab_link00_im ab_link[0].y
85 #define ab_link01_re ab_link[1].x
86 #define ab_link01_im ab_link[1].y
87 #define ab_link02_re ab_link[2].x
88 #define ab_link02_im ab_link[2].y
89 #define ab_link10_re ab_link[3].x
90 #define ab_link10_im ab_link[3].y
91 #define ab_link11_re ab_link[4].x
92 #define ab_link11_im ab_link[4].y
93 #define ab_link12_re ab_link[5].x
94 #define ab_link12_im ab_link[5].y
95 #define ab_link20_re ab_link[6].x
96 #define ab_link20_im ab_link[6].y
97 #define ab_link21_re ab_link[7].x
98 #define ab_link21_im ab_link[7].y
99 #define ab_link22_re ab_link[8].x
100 #define ab_link22_im ab_link[8].y
101 
102 #define bc_link00_re bc_link[0].x
103 #define bc_link00_im bc_link[0].y
104 #define bc_link01_re bc_link[1].x
105 #define bc_link01_im bc_link[1].y
106 #define bc_link02_re bc_link[2].x
107 #define bc_link02_im bc_link[2].y
108 #define bc_link10_re bc_link[3].x
109 #define bc_link10_im bc_link[3].y
110 #define bc_link11_re bc_link[4].x
111 #define bc_link11_im bc_link[4].y
112 #define bc_link12_re bc_link[5].x
113 #define bc_link12_im bc_link[5].y
114 #define bc_link20_re bc_link[6].x
115 #define bc_link20_im bc_link[6].y
116 #define bc_link21_re bc_link[7].x
117 #define bc_link21_im bc_link[7].y
118 #define bc_link22_re bc_link[8].x
119 #define bc_link22_im bc_link[8].y
120 
121 #define ad_link00_re ad_link[0].x
122 #define ad_link00_im ad_link[0].y
123 #define ad_link01_re ad_link[1].x
124 #define ad_link01_im ad_link[1].y
125 #define ad_link02_re ad_link[2].x
126 #define ad_link02_im ad_link[2].y
127 #define ad_link10_re ad_link[3].x
128 #define ad_link10_im ad_link[3].y
129 #define ad_link11_re ad_link[4].x
130 #define ad_link11_im ad_link[4].y
131 #define ad_link12_re ad_link[5].x
132 #define ad_link12_im ad_link[5].y
133 #define ad_link20_re ad_link[6].x
134 #define ad_link20_im ad_link[6].y
135 #define ad_link21_re ad_link[7].x
136 #define ad_link21_im ad_link[7].y
137 #define ad_link22_re ad_link[8].x
138 #define ad_link22_im ad_link[8].y
139 
140 #define de_link00_re de_link[0].x
141 #define de_link00_im de_link[0].y
142 #define de_link01_re de_link[1].x
143 #define de_link01_im de_link[1].y
144 #define de_link02_re de_link[2].x
145 #define de_link02_im de_link[2].y
146 #define de_link10_re de_link[3].x
147 #define de_link10_im de_link[3].y
148 #define de_link11_re de_link[4].x
149 #define de_link11_im de_link[4].y
150 #define de_link12_re de_link[5].x
151 #define de_link12_im de_link[5].y
152 #define de_link20_re de_link[6].x
153 #define de_link20_im de_link[6].y
154 #define de_link21_re de_link[7].x
155 #define de_link21_im de_link[7].y
156 #define de_link22_re de_link[8].x
157 #define de_link22_im de_link[8].y
158 
159 #define ef_link00_re ef_link[0].x
160 #define ef_link00_im ef_link[0].y
161 #define ef_link01_re ef_link[1].x
162 #define ef_link01_im ef_link[1].y
163 #define ef_link02_re ef_link[2].x
164 #define ef_link02_im ef_link[2].y
165 #define ef_link10_re ef_link[3].x
166 #define ef_link10_im ef_link[3].y
167 #define ef_link11_re ef_link[4].x
168 #define ef_link11_im ef_link[4].y
169 #define ef_link12_re ef_link[5].x
170 #define ef_link12_im ef_link[5].y
171 #define ef_link20_re ef_link[6].x
172 #define ef_link20_im ef_link[6].y
173 #define ef_link21_re ef_link[7].x
174 #define ef_link21_im ef_link[7].y
175 #define ef_link22_re ef_link[8].x
176 #define ef_link22_im ef_link[8].y
177 
178 
179 #else // HISQ_RECONSTRUCT_LINKS
180 
181 #define LINK_W00_re LINK_W[0].x
182 #define LINK_W00_im LINK_W[0].y
183 #define LINK_W01_re LINK_W[0].z
184 #define LINK_W01_im LINK_W[0].w
185 #define LINK_W02_re LINK_W[1].x
186 #define LINK_W02_im LINK_W[1].y
187 #define LINK_W10_re LINK_W[1].z
188 #define LINK_W10_im LINK_W[1].w
189 #define LINK_W11_re LINK_W[2].x
190 #define LINK_W11_im LINK_W[2].y
191 #define LINK_W12_re LINK_W[2].z
192 #define LINK_W12_im LINK_W[2].w
193 #define LINK_W20_re LINK_W[3].x
194 #define LINK_W20_im LINK_W[3].y
195 #define LINK_W21_re LINK_W[3].z
196 #define LINK_W21_im LINK_W[3].w
197 #define LINK_W22_re LINK_W[4].x
198 #define LINK_W22_im LINK_W[4].y
199 
200 
201 #define LINK_X00_re LINK_X[0].x
202 #define LINK_X00_im LINK_X[0].y
203 #define LINK_X01_re LINK_X[0].z
204 #define LINK_X01_im LINK_X[0].w
205 #define LINK_X02_re LINK_X[1].x
206 #define LINK_X02_im LINK_X[1].y
207 #define LINK_X10_re LINK_X[1].z
208 #define LINK_X10_im LINK_X[1].w
209 #define LINK_X11_re LINK_X[2].x
210 #define LINK_X11_im LINK_X[2].y
211 #define LINK_X12_re LINK_X[2].z
212 #define LINK_X12_im LINK_X[2].w
213 #define LINK_X20_re LINK_X[3].x
214 #define LINK_X20_im LINK_X[3].y
215 #define LINK_X21_re LINK_X[3].z
216 #define LINK_X21_im LINK_X[3].w
217 #define LINK_X22_re LINK_X[4].x
218 #define LINK_X22_im LINK_X[4].y
219 
220 
221 #define LINK_Y00_re LINK_Y[0].x
222 #define LINK_Y00_im LINK_Y[0].y
223 #define LINK_Y01_re LINK_Y[0].z
224 #define LINK_Y01_im LINK_Y[0].w
225 #define LINK_Y02_re LINK_Y[1].x
226 #define LINK_Y02_im LINK_Y[1].y
227 #define LINK_Y10_re LINK_Y[1].z
228 #define LINK_Y10_im LINK_Y[1].w
229 #define LINK_Y11_re LINK_Y[2].x
230 #define LINK_Y11_im LINK_Y[2].y
231 #define LINK_Y12_re LINK_Y[2].z
232 #define LINK_Y12_im LINK_Y[2].w
233 #define LINK_Y20_re LINK_Y[3].x
234 #define LINK_Y20_im LINK_Y[3].y
235 #define LINK_Y21_re LINK_Y[3].z
236 #define LINK_Y21_im LINK_Y[3].w
237 #define LINK_Y22_re LINK_Y[4].x
238 #define LINK_Y22_im LINK_Y[4].y
239 
240 
241 #define LINK_Z00_re LINK_Z[0].x
242 #define LINK_Z00_im LINK_Z[0].y
243 #define LINK_Z01_re LINK_Z[0].z
244 #define LINK_Z01_im LINK_Z[0].w
245 #define LINK_Z02_re LINK_Z[1].x
246 #define LINK_Z02_im LINK_Z[1].y
247 #define LINK_Z10_re LINK_Z[1].z
248 #define LINK_Z10_im LINK_Z[1].w
249 #define LINK_Z11_re LINK_Z[2].x
250 #define LINK_Z11_im LINK_Z[2].y
251 #define LINK_Z12_re LINK_Z[2].z
252 #define LINK_Z12_im LINK_Z[2].w
253 #define LINK_Z20_re LINK_Z[3].x
254 #define LINK_Z20_im LINK_Z[3].y
255 #define LINK_Z21_re LINK_Z[3].z
256 #define LINK_Z21_im LINK_Z[3].w
257 #define LINK_Z22_re LINK_Z[4].x
258 #define LINK_Z22_im LINK_Z[4].y
259 
260 #endif // HISQ_RECONSTRUCT_LINKS
261 
262 #ifndef GENERIC_MATRIX_MACROS
263 #define GENERIC_MATRIX_MACROS
264 
265 // Color matrices stored as an array of float2 or double2
266 
267 #define COLOR_MAT_T00_re COLOR_MAT_T[0].x
268 #define COLOR_MAT_T00_im COLOR_MAT_T[0].y
269 #define COLOR_MAT_T01_re COLOR_MAT_T[1].x
270 #define COLOR_MAT_T01_im COLOR_MAT_T[1].y
271 #define COLOR_MAT_T02_re COLOR_MAT_T[2].x
272 #define COLOR_MAT_T02_im COLOR_MAT_T[2].y
273 #define COLOR_MAT_T10_re COLOR_MAT_T[3].x
274 #define COLOR_MAT_T10_im COLOR_MAT_T[3].y
275 #define COLOR_MAT_T11_re COLOR_MAT_T[4].x
276 #define COLOR_MAT_T11_im COLOR_MAT_T[4].y
277 #define COLOR_MAT_T12_re COLOR_MAT_T[5].x
278 #define COLOR_MAT_T12_im COLOR_MAT_T[5].y
279 #define COLOR_MAT_T20_re COLOR_MAT_T[6].x
280 #define COLOR_MAT_T20_im COLOR_MAT_T[6].y
281 #define COLOR_MAT_T21_re COLOR_MAT_T[7].x
282 #define COLOR_MAT_T21_im COLOR_MAT_T[7].y
283 #define COLOR_MAT_T22_re COLOR_MAT_T[8].x
284 #define COLOR_MAT_T22_im COLOR_MAT_T[8].y
285 
286 
287 #define COLOR_MAT_U00_re COLOR_MAT_U[0].x
288 #define COLOR_MAT_U00_im COLOR_MAT_U[0].y
289 #define COLOR_MAT_U01_re COLOR_MAT_U[1].x
290 #define COLOR_MAT_U01_im COLOR_MAT_U[1].y
291 #define COLOR_MAT_U02_re COLOR_MAT_U[2].x
292 #define COLOR_MAT_U02_im COLOR_MAT_U[2].y
293 #define COLOR_MAT_U10_re COLOR_MAT_U[3].x
294 #define COLOR_MAT_U10_im COLOR_MAT_U[3].y
295 #define COLOR_MAT_U11_re COLOR_MAT_U[4].x
296 #define COLOR_MAT_U11_im COLOR_MAT_U[4].y
297 #define COLOR_MAT_U12_re COLOR_MAT_U[5].x
298 #define COLOR_MAT_U12_im COLOR_MAT_U[5].y
299 #define COLOR_MAT_U20_re COLOR_MAT_U[6].x
300 #define COLOR_MAT_U20_im COLOR_MAT_U[6].y
301 #define COLOR_MAT_U21_re COLOR_MAT_U[7].x
302 #define COLOR_MAT_U21_im COLOR_MAT_U[7].y
303 #define COLOR_MAT_U22_re COLOR_MAT_U[8].x
304 #define COLOR_MAT_U22_im COLOR_MAT_U[8].y
305 
306 
307 #define COLOR_MAT_V00_re COLOR_MAT_V[0].x
308 #define COLOR_MAT_V00_im COLOR_MAT_V[0].y
309 #define COLOR_MAT_V01_re COLOR_MAT_V[1].x
310 #define COLOR_MAT_V01_im COLOR_MAT_V[1].y
311 #define COLOR_MAT_V02_re COLOR_MAT_V[2].x
312 #define COLOR_MAT_V02_im COLOR_MAT_V[2].y
313 #define COLOR_MAT_V10_re COLOR_MAT_V[3].x
314 #define COLOR_MAT_V10_im COLOR_MAT_V[3].y
315 #define COLOR_MAT_V11_re COLOR_MAT_V[4].x
316 #define COLOR_MAT_V11_im COLOR_MAT_V[4].y
317 #define COLOR_MAT_V12_re COLOR_MAT_V[5].x
318 #define COLOR_MAT_V12_im COLOR_MAT_V[5].y
319 #define COLOR_MAT_V20_re COLOR_MAT_V[6].x
320 #define COLOR_MAT_V20_im COLOR_MAT_V[6].y
321 #define COLOR_MAT_V21_re COLOR_MAT_V[7].x
322 #define COLOR_MAT_V21_im COLOR_MAT_V[7].y
323 #define COLOR_MAT_V22_re COLOR_MAT_V[8].x
324 #define COLOR_MAT_V22_im COLOR_MAT_V[8].y
325 
326 
327 
328 
329 #define COLOR_MAT_W00_re COLOR_MAT_W[0].x
330 #define COLOR_MAT_W00_im COLOR_MAT_W[0].y
331 #define COLOR_MAT_W01_re COLOR_MAT_W[1].x
332 #define COLOR_MAT_W01_im COLOR_MAT_W[1].y
333 #define COLOR_MAT_W02_re COLOR_MAT_W[2].x
334 #define COLOR_MAT_W02_im COLOR_MAT_W[2].y
335 #define COLOR_MAT_W10_re COLOR_MAT_W[3].x
336 #define COLOR_MAT_W10_im COLOR_MAT_W[3].y
337 #define COLOR_MAT_W11_re COLOR_MAT_W[4].x
338 #define COLOR_MAT_W11_im COLOR_MAT_W[4].y
339 #define COLOR_MAT_W12_re COLOR_MAT_W[5].x
340 #define COLOR_MAT_W12_im COLOR_MAT_W[5].y
341 #define COLOR_MAT_W20_re COLOR_MAT_W[6].x
342 #define COLOR_MAT_W20_im COLOR_MAT_W[6].y
343 #define COLOR_MAT_W21_re COLOR_MAT_W[7].x
344 #define COLOR_MAT_W21_im COLOR_MAT_W[7].y
345 #define COLOR_MAT_W22_re COLOR_MAT_W[8].x
346 #define COLOR_MAT_W22_im COLOR_MAT_W[8].y
347 
348 
349 #define COLOR_MAT_X00_re COLOR_MAT_X[0].x
350 #define COLOR_MAT_X00_im COLOR_MAT_X[0].y
351 #define COLOR_MAT_X01_re COLOR_MAT_X[1].x
352 #define COLOR_MAT_X01_im COLOR_MAT_X[1].y
353 #define COLOR_MAT_X02_re COLOR_MAT_X[2].x
354 #define COLOR_MAT_X02_im COLOR_MAT_X[2].y
355 #define COLOR_MAT_X10_re COLOR_MAT_X[3].x
356 #define COLOR_MAT_X10_im COLOR_MAT_X[3].y
357 #define COLOR_MAT_X11_re COLOR_MAT_X[4].x
358 #define COLOR_MAT_X11_im COLOR_MAT_X[4].y
359 #define COLOR_MAT_X12_re COLOR_MAT_X[5].x
360 #define COLOR_MAT_X12_im COLOR_MAT_X[5].y
361 #define COLOR_MAT_X20_re COLOR_MAT_X[6].x
362 #define COLOR_MAT_X20_im COLOR_MAT_X[6].y
363 #define COLOR_MAT_X21_re COLOR_MAT_X[7].x
364 #define COLOR_MAT_X21_im COLOR_MAT_X[7].y
365 #define COLOR_MAT_X22_re COLOR_MAT_X[8].x
366 #define COLOR_MAT_X22_im COLOR_MAT_X[8].y
367 
368 
369 #define COLOR_MAT_Y00_re COLOR_MAT_Y[0].x
370 #define COLOR_MAT_Y00_im COLOR_MAT_Y[0].y
371 #define COLOR_MAT_Y01_re COLOR_MAT_Y[1].x
372 #define COLOR_MAT_Y01_im COLOR_MAT_Y[1].y
373 #define COLOR_MAT_Y02_re COLOR_MAT_Y[2].x
374 #define COLOR_MAT_Y02_im COLOR_MAT_Y[2].y
375 #define COLOR_MAT_Y10_re COLOR_MAT_Y[3].x
376 #define COLOR_MAT_Y10_im COLOR_MAT_Y[3].y
377 #define COLOR_MAT_Y11_re COLOR_MAT_Y[4].x
378 #define COLOR_MAT_Y11_im COLOR_MAT_Y[4].y
379 #define COLOR_MAT_Y12_re COLOR_MAT_Y[5].x
380 #define COLOR_MAT_Y12_im COLOR_MAT_Y[5].y
381 #define COLOR_MAT_Y20_re COLOR_MAT_Y[6].x
382 #define COLOR_MAT_Y20_im COLOR_MAT_Y[6].y
383 #define COLOR_MAT_Y21_re COLOR_MAT_Y[7].x
384 #define COLOR_MAT_Y21_im COLOR_MAT_Y[7].y
385 #define COLOR_MAT_Y22_re COLOR_MAT_Y[8].x
386 #define COLOR_MAT_Y22_im COLOR_MAT_Y[8].y
387 
388 
389 #define COLOR_MAT_Z00_re COLOR_MAT_Z[0].x
390 #define COLOR_MAT_Z00_im COLOR_MAT_Z[0].y
391 #define COLOR_MAT_Z01_re COLOR_MAT_Z[1].x
392 #define COLOR_MAT_Z01_im COLOR_MAT_Z[1].y
393 #define COLOR_MAT_Z02_re COLOR_MAT_Z[2].x
394 #define COLOR_MAT_Z02_im COLOR_MAT_Z[2].y
395 #define COLOR_MAT_Z10_re COLOR_MAT_Z[3].x
396 #define COLOR_MAT_Z10_im COLOR_MAT_Z[3].y
397 #define COLOR_MAT_Z11_re COLOR_MAT_Z[4].x
398 #define COLOR_MAT_Z11_im COLOR_MAT_Z[4].y
399 #define COLOR_MAT_Z12_re COLOR_MAT_Z[5].x
400 #define COLOR_MAT_Z12_im COLOR_MAT_Z[5].y
401 #define COLOR_MAT_Z20_re COLOR_MAT_Z[6].x
402 #define COLOR_MAT_Z20_im COLOR_MAT_Z[6].y
403 #define COLOR_MAT_Z21_re COLOR_MAT_Z[7].x
404 #define COLOR_MAT_Z21_im COLOR_MAT_Z[7].y
405 #define COLOR_MAT_Z22_re COLOR_MAT_Z[8].x
406 #define COLOR_MAT_Z22_im COLOR_MAT_Z[8].y
407 
408 #define FF_RECONSTRUCT_LINK_12(var, sign) \
409  ACC_CONJ_PROD_ASSIGN(var##20, +var##01, +var##12); \
410  ACC_CONJ_PROD(var##20, -var##02, +var##11); \
411  ACC_CONJ_PROD_ASSIGN(var##21, +var##02, +var##10); \
412  ACC_CONJ_PROD(var##21, -var##00, +var##12); \
413  ACC_CONJ_PROD_ASSIGN(var##22, +var##00, +var##11); \
414  ACC_CONJ_PROD(var##22, -var##01, +var##10); \
415  var##20_re *=sign;var##20_im *=sign; var##21_re *=sign; var##21_im *=sign; \
416  var##22_re *=sign;var##22_im *=sign;
417 
418 
419 
420 #define HISQ_LOAD_MATRIX_18_SINGLE_TEX(gauge, dir, idx, var, stride)do{ \
421  var[0] = tex1Dfetch(gauge, idx + dir*stride*9); \
422  var[1] = tex1Dfetch(gauge, idx + dir*stride*9 + stride); \
423  var[2] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*2); \
424  var[3] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*3); \
425  var[4] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*4); \
426  var[5] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*5); \
427  var[6] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*6); \
428  var[7] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*7); \
429  var[8] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*8); \
430  }while(0)
431 
432 #define HISQ_LOAD_MATRIX_12_SINGLE_TEX(gauge, dir, idx, var, stride)do{ \
433  float4 tmp; \
434  tmp = tex1Dfetch(gauge, idx + dir*stride*3); \
435  var[0] = make_float2(tmp.x, tmp.y); \
436  var[1] = make_float2(tmp.z, tmp.w); \
437  tmp = tex1Dfetch(gauge, idx + dir*stride*3 + stride); \
438  var[2] = make_float2(tmp.x, tmp.y); \
439  var[3] = make_float2(tmp.z, tmp.w); \
440  tmp = tex1Dfetch(gauge, idx + dir*stride*3 + 2*stride); \
441  var[4] = make_float2(tmp.x, tmp.y); \
442  var[5] = make_float2(tmp.z, tmp.w); \
443  }while(0)
444 
445 #define HISQ_LOAD_MATRIX_18_DOUBLE_TEX(gauge_tex, gauge, dir, idx, var, stride)do{ \
446  var[0] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9); \
447  var[1] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride); \
448  var[2] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*2); \
449  var[3] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*3); \
450  var[4] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*4); \
451  var[5] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*5); \
452  var[6] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*6); \
453  var[7] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*7); \
454  var[8] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*8); \
455  }while(0)
456 
457 #define HISQ_LOAD_MATRIX_12_DOUBLE_TEX(gauge_tex, gauge, dir, idx, var, stride)do{ \
458  var[0] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6); \
459  var[1] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride); \
460  var[2] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*2); \
461  var[3] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*3); \
462  var[4] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*4); \
463  var[5] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*5); \
464  }while(0)
465 
466 #ifdef MULTI_GPU
467 
468 #define FF_COMPUTE_NEW_FULL_IDX_PLUS_UPDATE(mydir, idx, new_idx) do { \
469  switch(mydir){ \
470  case 0: \
471  new_idx = (xcomm || (new_x[0] != X1+1) )?(idx+1):(idx-X1m1); \
472  new_x[0] = (xcomm || (new_x[0] != X1+1) )?(new_x[0]+1):2; \
473  break; \
474  case 1: \
475  new_idx = (ycomm || (new_x[1] != X2+1))?(idx+E1):(idx -X2m1*E1); \
476  new_x[1] = (ycomm || (new_x[1] != X2+1))?(new_x[1]+1):2; \
477  break; \
478  case 2: \
479  new_idx = (zcomm || (new_x[2] != X3+1))?(idx+E2E1):(idx-X3m1*E2E1); \
480  new_x[2] = (zcomm || (new_x[2] != X3+1))?(new_x[2]+1):2; \
481  break; \
482  case 3: \
483  new_idx = (tcomm || (new_x[3] != X4+1))?(idx+E3E2E1):(idx-X4m1*E3E2E1); \
484  new_x[3] = (tcomm || (new_x[3] != X4+1))?(new_x[3]+1):2; \
485  break; \
486  } \
487  if(new_x[mydir] >= E[mydir]) return; \
488  }while(0)
489 
490 
491 
492 #else
493 #define FF_COMPUTE_NEW_FULL_IDX_PLUS_UPDATE(mydir, idx, new_idx) do { \
494  switch(mydir){ \
495  case 0: \
496  new_idx = ( (new_x[0]==X1m1)?idx-X1m1:idx+1); \
497  new_x[0] = (new_x[0]==X1m1)?0:new_x[0]+1; \
498  break; \
499  case 1: \
500  new_idx = ( (new_x[1]==X2m1)?idx-X2X1mX1:idx+X1); \
501  new_x[1] = (new_x[1]==X2m1)?0:new_x[1]+1; \
502  break; \
503  case 2: \
504  new_idx = ( (new_x[2]==X3m1)?idx-X3X2X1mX2X1:idx+X2X1); \
505  new_x[2] = (new_x[2]==X3m1)?0:new_x[2]+1; \
506  break; \
507  case 3: \
508  new_idx = ( (new_x[3]==X4m1)?idx-X4X3X2X1mX3X2X1:idx+X3X2X1); \
509  new_x[3] = (new_x[3]==X4m1)?0:new_x[3]+1; \
510  break; \
511  } \
512  }while(0)
513 #endif
514 
515 #ifdef MULTI_GPU
516 
517 
518 #define FF_COMPUTE_NEW_FULL_IDX_MINUS_UPDATE(mydir, idx, new_idx) do { \
519  switch(mydir){ \
520  case 0: \
521  new_idx = (xcomm || (new_x[0] != 2))?(idx-1):(idx+X1m1); \
522  new_x[0] = (xcomm || (new_x[0] != 2))?(new_x[0]-1):(X1+1); \
523  break; \
524  case 1: \
525  new_idx = (ycomm || (new_x[1] != 2))?(idx-E1):(idx+X2m1*E1); \
526  new_x[1]= (ycomm || (new_x[1] != 2))?(new_x[1]-1):(X2+1); \
527  break; \
528  case 2: \
529  new_idx = (zcomm || (new_x[2] != 2))?(idx-E2E1):(idx+X3m1*E2E1); \
530  new_x[2]= (zcomm || (new_x[2] != 2))?(new_x[2]-1):(X3+1); \
531  break; \
532  case 3: \
533  new_idx = (tcomm || (new_x[3] !=2))?(idx-E3E2E1):(idx+X4m1*E3E2E1); \
534  new_x[3]= (tcomm || (new_x[3] !=2))?(new_x[3]-1):(X4+1); \
535  break; \
536  } \
537  if(new_x[mydir] < 0) return; \
538  }while(0)
539 
540 #else
541 #define FF_COMPUTE_NEW_FULL_IDX_MINUS_UPDATE(mydir, idx, new_idx) do { \
542  switch(mydir){ \
543  case 0: \
544  new_idx = ( (new_x[0]==0)?idx+X1m1:idx-1); \
545  new_x[0] = (new_x[0]==0)?X1m1:new_x[0] - 1; \
546  break; \
547  case 1: \
548  new_idx = ( (new_x[1]==0)?idx+X2X1mX1:idx-X1); \
549  new_x[1] = (new_x[1]==0)?X2m1:new_x[1] - 1; \
550  break; \
551  case 2: \
552  new_idx = ( (new_x[2]==0)?idx+X3X2X1mX2X1:idx-X2X1); \
553  new_x[2] = (new_x[2]==0)?X3m1:new_x[2] - 1; \
554  break; \
555  case 3: \
556  new_idx = ( (new_x[3]==0)?idx+X4X3X2X1mX3X2X1:idx-X3X2X1); \
557  new_x[3] = (new_x[3]==0)?X4m1:new_x[3] - 1; \
558  break; \
559  } \
560  }while(0)
561 #endif
562 
563 
564 // matrix macros:
565 #define ADJ_MAT(a, b) \
566  b##00_re = a##00_re; \
567  b##00_im = -a##00_im; \
568  b##01_re = a##10_re; \
569  b##01_im = -a##10_im; \
570  b##02_re = a##20_re; \
571  b##02_im = -a##20_im; \
572  b##10_re = a##01_re; \
573  b##10_im = -a##01_im; \
574  b##11_re = a##11_re; \
575  b##11_im = -a##11_im; \
576  b##12_re = a##21_re; \
577  b##12_im = -a##21_im; \
578  b##20_re = a##02_re; \
579  b##20_im = -a##02_im; \
580  b##21_re = a##12_re; \
581  b##21_im = -a##12_im; \
582  b##22_re = a##22_re; \
583  b##22_im = -a##22_im;
584 
585 
586 #define ASSIGN_MAT(a, b) \
587  b##00_re = a##00_re; \
588  b##00_im = a##00_im; \
589  b##01_re = a##01_re; \
590  b##01_im = a##01_im; \
591  b##02_re = a##02_re; \
592  b##02_im = a##02_im; \
593  b##10_re = a##10_re; \
594  b##10_im = a##10_im; \
595  b##11_re = a##11_re; \
596  b##11_im = a##11_im; \
597  b##12_re = a##12_re; \
598  b##12_im = a##12_im; \
599  b##20_re = a##20_re; \
600  b##20_im = a##20_im; \
601  b##21_re = a##21_re; \
602  b##21_im = a##21_im; \
603  b##22_re = a##22_re; \
604  b##22_im = a##22_im; \
605 
606 
607 
608 #define MATRIX_PRODUCT(a, b, simple, c) do{ \
609  if(simple){ \
610  c##00_re = a##00_re*b##00_re - a##00_im*b##00_im + a##01_re*b##10_re - a##01_im*b##10_im + a##02_re*b##20_re - a##02_im*b##20_im; \
611  c##00_im = a##00_re*b##00_im + a##00_im*b##00_re + a##01_re*b##10_im + a##01_im*b##10_re + a##02_re*b##20_im + a##02_im*b##20_re; \
612  c##01_re = a##00_re*b##01_re - a##00_im*b##01_im + a##01_re*b##11_re - a##01_im*b##11_im + a##02_re*b##21_re - a##02_im*b##21_im; \
613  c##01_im = a##00_re*b##01_im + a##00_im*b##01_re + a##01_re*b##11_im + a##01_im*b##11_re + a##02_re*b##21_im + a##02_im*b##21_re; \
614  c##02_re = a##00_re*b##02_re - a##00_im*b##02_im + a##01_re*b##12_re - a##01_im*b##12_im + a##02_re*b##22_re - a##02_im*b##22_im; \
615  c##02_im = a##00_re*b##02_im + a##00_im*b##02_re + a##01_re*b##12_im + a##01_im*b##12_re + a##02_re*b##22_im + a##02_im*b##22_re; \
616  c##10_re = a##10_re*b##00_re - a##10_im*b##00_im + a##11_re*b##10_re - a##11_im*b##10_im + a##12_re*b##20_re - a##12_im*b##20_im; \
617  c##10_im = a##10_re*b##00_im + a##10_im*b##00_re + a##11_re*b##10_im + a##11_im*b##10_re + a##12_re*b##20_im + a##12_im*b##20_re; \
618  c##11_re = a##10_re*b##01_re - a##10_im*b##01_im + a##11_re*b##11_re - a##11_im*b##11_im + a##12_re*b##21_re - a##12_im*b##21_im; \
619  c##11_im = a##10_re*b##01_im + a##10_im*b##01_re + a##11_re*b##11_im + a##11_im*b##11_re + a##12_re*b##21_im + a##12_im*b##21_re; \
620  c##12_re = a##10_re*b##02_re - a##10_im*b##02_im + a##11_re*b##12_re - a##11_im*b##12_im + a##12_re*b##22_re - a##12_im*b##22_im; \
621  c##12_im = a##10_re*b##02_im + a##10_im*b##02_re + a##11_re*b##12_im + a##11_im*b##12_re + a##12_re*b##22_im + a##12_im*b##22_re; \
622  c##20_re = a##20_re*b##00_re - a##20_im*b##00_im + a##21_re*b##10_re - a##21_im*b##10_im + a##22_re*b##20_re - a##22_im*b##20_im; \
623  c##20_im = a##20_re*b##00_im + a##20_im*b##00_re + a##21_re*b##10_im + a##21_im*b##10_re + a##22_re*b##20_im + a##22_im*b##20_re; \
624  c##21_re = a##20_re*b##01_re - a##20_im*b##01_im + a##21_re*b##11_re - a##21_im*b##11_im + a##22_re*b##21_re - a##22_im*b##21_im; \
625  c##21_im = a##20_re*b##01_im + a##20_im*b##01_re + a##21_re*b##11_im + a##21_im*b##11_re + a##22_re*b##21_im + a##22_im*b##21_re; \
626  c##22_re = a##20_re*b##02_re - a##20_im*b##02_im + a##21_re*b##12_re - a##21_im*b##12_im + a##22_re*b##22_re - a##22_im*b##22_im; \
627  c##22_im = a##20_re*b##02_im + a##20_im*b##02_re + a##21_re*b##12_im + a##21_im*b##12_re + a##22_re*b##22_im + a##22_im*b##22_re; \
628  }else{ \
629  c##00_re = a##00_re*b##00_re + a##00_im*b##00_im + a##10_re*b##10_re + a##10_im*b##10_im + a##20_re*b##20_re + a##20_im*b##20_im; \
630  c##00_im = a##00_re*b##00_im - a##00_im*b##00_re + a##10_re*b##10_im - a##10_im*b##10_re + a##20_re*b##20_im - a##20_im*b##20_re; \
631  c##01_re = a##00_re*b##01_re + a##00_im*b##01_im + a##10_re*b##11_re + a##10_im*b##11_im + a##20_re*b##21_re + a##20_im*b##21_im; \
632  c##01_im = a##00_re*b##01_im - a##00_im*b##01_re + a##10_re*b##11_im - a##10_im*b##11_re + a##20_re*b##21_im - a##20_im*b##21_re; \
633  c##02_re = a##00_re*b##02_re + a##00_im*b##02_im + a##10_re*b##12_re + a##10_im*b##12_im + a##20_re*b##22_re + a##20_im*b##22_im; \
634  c##02_im = a##00_re*b##02_im - a##00_im*b##02_re + a##10_re*b##12_im - a##10_im*b##12_re + a##20_re*b##22_im - a##20_im*b##22_re; \
635  c##10_re = a##01_re*b##00_re + a##01_im*b##00_im + a##11_re*b##10_re + a##11_im*b##10_im + a##21_re*b##20_re + a##21_im*b##20_im; \
636  c##10_im = a##01_re*b##00_im - a##01_im*b##00_re + a##11_re*b##10_im - a##11_im*b##10_re + a##21_re*b##20_im - a##21_im*b##20_re; \
637  c##11_re = a##01_re*b##01_re + a##01_im*b##01_im + a##11_re*b##11_re + a##11_im*b##11_im + a##21_re*b##21_re + a##21_im*b##21_im; \
638  c##11_im = a##01_re*b##01_im - a##01_im*b##01_re + a##11_re*b##11_im - a##11_im*b##11_re + a##21_re*b##21_im - a##21_im*b##21_re; \
639  c##12_re = a##01_re*b##02_re + a##01_im*b##02_im + a##11_re*b##12_re + a##11_im*b##12_im + a##21_re*b##22_re + a##21_im*b##22_im; \
640  c##12_im = a##01_re*b##02_im - a##01_im*b##02_re + a##11_re*b##12_im - a##11_im*b##12_re + a##21_re*b##22_im - a##21_im*b##22_re; \
641  c##20_re = a##02_re*b##00_re + a##02_im*b##00_im + a##12_re*b##10_re + a##12_im*b##10_im + a##22_re*b##20_re + a##22_im*b##20_im; \
642  c##20_im = a##02_re*b##00_im - a##02_im*b##00_re + a##12_re*b##10_im - a##12_im*b##10_re + a##22_re*b##20_im - a##22_im*b##20_re; \
643  c##21_re = a##02_re*b##01_re + a##02_im*b##01_im + a##12_re*b##11_re + a##12_im*b##11_im + a##22_re*b##21_re + a##22_im*b##21_im; \
644  c##21_im = a##02_re*b##01_im - a##02_im*b##01_re + a##12_re*b##11_im - a##12_im*b##11_re + a##22_re*b##21_im - a##22_im*b##21_re; \
645  c##22_re = a##02_re*b##02_re + a##02_im*b##02_im + a##12_re*b##12_re + a##12_im*b##12_im + a##22_re*b##22_re + a##22_im*b##22_im; \
646  c##22_im = a##02_re*b##02_im - a##02_im*b##02_re + a##12_re*b##12_im - a##12_im*b##12_re + a##22_re*b##22_im - a##22_im*b##22_re; \
647  } \
648 }while(0)
649 
650 
651 #define MAT_MUL_MAT(a, b, c) \
652  c##00_re = a##00_re*b##00_re - a##00_im*b##00_im + a##01_re*b##10_re - a##01_im*b##10_im + a##02_re*b##20_re - a##02_im*b##20_im; \
653  c##00_im = a##00_re*b##00_im + a##00_im*b##00_re + a##01_re*b##10_im + a##01_im*b##10_re + a##02_re*b##20_im + a##02_im*b##20_re; \
654  c##01_re = a##00_re*b##01_re - a##00_im*b##01_im + a##01_re*b##11_re - a##01_im*b##11_im + a##02_re*b##21_re - a##02_im*b##21_im; \
655  c##01_im = a##00_re*b##01_im + a##00_im*b##01_re + a##01_re*b##11_im + a##01_im*b##11_re + a##02_re*b##21_im + a##02_im*b##21_re; \
656  c##02_re = a##00_re*b##02_re - a##00_im*b##02_im + a##01_re*b##12_re - a##01_im*b##12_im + a##02_re*b##22_re - a##02_im*b##22_im; \
657  c##02_im = a##00_re*b##02_im + a##00_im*b##02_re + a##01_re*b##12_im + a##01_im*b##12_re + a##02_re*b##22_im + a##02_im*b##22_re; \
658  c##10_re = a##10_re*b##00_re - a##10_im*b##00_im + a##11_re*b##10_re - a##11_im*b##10_im + a##12_re*b##20_re - a##12_im*b##20_im; \
659  c##10_im = a##10_re*b##00_im + a##10_im*b##00_re + a##11_re*b##10_im + a##11_im*b##10_re + a##12_re*b##20_im + a##12_im*b##20_re; \
660  c##11_re = a##10_re*b##01_re - a##10_im*b##01_im + a##11_re*b##11_re - a##11_im*b##11_im + a##12_re*b##21_re - a##12_im*b##21_im; \
661  c##11_im = a##10_re*b##01_im + a##10_im*b##01_re + a##11_re*b##11_im + a##11_im*b##11_re + a##12_re*b##21_im + a##12_im*b##21_re; \
662  c##12_re = a##10_re*b##02_re - a##10_im*b##02_im + a##11_re*b##12_re - a##11_im*b##12_im + a##12_re*b##22_re - a##12_im*b##22_im; \
663  c##12_im = a##10_re*b##02_im + a##10_im*b##02_re + a##11_re*b##12_im + a##11_im*b##12_re + a##12_re*b##22_im + a##12_im*b##22_re; \
664  c##20_re = a##20_re*b##00_re - a##20_im*b##00_im + a##21_re*b##10_re - a##21_im*b##10_im + a##22_re*b##20_re - a##22_im*b##20_im; \
665  c##20_im = a##20_re*b##00_im + a##20_im*b##00_re + a##21_re*b##10_im + a##21_im*b##10_re + a##22_re*b##20_im + a##22_im*b##20_re; \
666  c##21_re = a##20_re*b##01_re - a##20_im*b##01_im + a##21_re*b##11_re - a##21_im*b##11_im + a##22_re*b##21_re - a##22_im*b##21_im; \
667  c##21_im = a##20_re*b##01_im + a##20_im*b##01_re + a##21_re*b##11_im + a##21_im*b##11_re + a##22_re*b##21_im + a##22_im*b##21_re; \
668  c##22_re = a##20_re*b##02_re - a##20_im*b##02_im + a##21_re*b##12_re - a##21_im*b##12_im + a##22_re*b##22_re - a##22_im*b##22_im; \
669  c##22_im = a##20_re*b##02_im + a##20_im*b##02_re + a##21_re*b##12_im + a##21_im*b##12_re + a##22_re*b##22_im + a##22_im*b##22_re;
670 
671 #define MAT_MUL_ADJ_MAT(a, b, c) \
672  c##00_re = a##00_re*b##00_re + a##00_im*b##00_im + a##01_re*b##01_re + a##01_im*b##01_im + a##02_re*b##02_re + a##02_im*b##02_im; \
673  c##00_im = - a##00_re*b##00_im + a##00_im*b##00_re - a##01_re*b##01_im + a##01_im*b##01_re - a##02_re*b##02_im + a##02_im*b##02_re; \
674  c##01_re = a##00_re*b##10_re + a##00_im*b##10_im + a##01_re*b##11_re + a##01_im*b##11_im + a##02_re*b##12_re + a##02_im*b##12_im; \
675  c##01_im = - a##00_re*b##10_im + a##00_im*b##10_re - a##01_re*b##11_im + a##01_im*b##11_re - a##02_re*b##12_im + a##02_im*b##12_re; \
676  c##02_re = a##00_re*b##20_re + a##00_im*b##20_im + a##01_re*b##21_re + a##01_im*b##21_im + a##02_re*b##22_re + a##02_im*b##22_im; \
677  c##02_im = - a##00_re*b##20_im + a##00_im*b##20_re - a##01_re*b##21_im + a##01_im*b##21_re - a##02_re*b##22_im + a##02_im*b##22_re; \
678  c##10_re = a##10_re*b##00_re + a##10_im*b##00_im + a##11_re*b##01_re + a##11_im*b##01_im + a##12_re*b##02_re + a##12_im*b##02_im; \
679  c##10_im = - a##10_re*b##00_im + a##10_im*b##00_re - a##11_re*b##01_im + a##11_im*b##01_re - a##12_re*b##02_im + a##12_im*b##02_re; \
680  c##11_re = a##10_re*b##10_re + a##10_im*b##10_im + a##11_re*b##11_re + a##11_im*b##11_im + a##12_re*b##12_re + a##12_im*b##12_im; \
681  c##11_im = - a##10_re*b##10_im + a##10_im*b##10_re - a##11_re*b##11_im + a##11_im*b##11_re - a##12_re*b##12_im + a##12_im*b##12_re; \
682  c##12_re = a##10_re*b##20_re + a##10_im*b##20_im + a##11_re*b##21_re + a##11_im*b##21_im + a##12_re*b##22_re + a##12_im*b##22_im; \
683  c##12_im = - a##10_re*b##20_im + a##10_im*b##20_re - a##11_re*b##21_im + a##11_im*b##21_re - a##12_re*b##22_im + a##12_im*b##22_re; \
684  c##20_re = a##20_re*b##00_re + a##20_im*b##00_im + a##21_re*b##01_re + a##21_im*b##01_im + a##22_re*b##02_re + a##22_im*b##02_im; \
685  c##20_im = - a##20_re*b##00_im + a##20_im*b##00_re - a##21_re*b##01_im + a##21_im*b##01_re - a##22_re*b##02_im + a##22_im*b##02_re; \
686  c##21_re = a##20_re*b##10_re + a##20_im*b##10_im + a##21_re*b##11_re + a##21_im*b##11_im + a##22_re*b##12_re + a##22_im*b##12_im; \
687  c##21_im = - a##20_re*b##10_im + a##20_im*b##10_re - a##21_re*b##11_im + a##21_im*b##11_re - a##22_re*b##12_im + a##22_im*b##12_re; \
688  c##22_re = a##20_re*b##20_re + a##20_im*b##20_im + a##21_re*b##21_re + a##21_im*b##21_im + a##22_re*b##22_re + a##22_im*b##22_im; \
689  c##22_im = - a##20_re*b##20_im + a##20_im*b##20_re - a##21_re*b##21_im + a##21_im*b##21_re - a##22_re*b##22_im + a##22_im*b##22_re;
690 
691 #define ADJ_MAT_MUL_MAT(a, b, c) \
692  c##00_re = a##00_re*b##00_re + a##00_im*b##00_im + a##10_re*b##10_re + a##10_im*b##10_im + a##20_re*b##20_re + a##20_im*b##20_im; \
693  c##00_im = a##00_re*b##00_im - a##00_im*b##00_re + a##10_re*b##10_im - a##10_im*b##10_re + a##20_re*b##20_im - a##20_im*b##20_re; \
694  c##01_re = a##00_re*b##01_re + a##00_im*b##01_im + a##10_re*b##11_re + a##10_im*b##11_im + a##20_re*b##21_re + a##20_im*b##21_im; \
695  c##01_im = a##00_re*b##01_im - a##00_im*b##01_re + a##10_re*b##11_im - a##10_im*b##11_re + a##20_re*b##21_im - a##20_im*b##21_re; \
696  c##02_re = a##00_re*b##02_re + a##00_im*b##02_im + a##10_re*b##12_re + a##10_im*b##12_im + a##20_re*b##22_re + a##20_im*b##22_im; \
697  c##02_im = a##00_re*b##02_im - a##00_im*b##02_re + a##10_re*b##12_im - a##10_im*b##12_re + a##20_re*b##22_im - a##20_im*b##22_re; \
698  c##10_re = a##01_re*b##00_re + a##01_im*b##00_im + a##11_re*b##10_re + a##11_im*b##10_im + a##21_re*b##20_re + a##21_im*b##20_im; \
699  c##10_im = a##01_re*b##00_im - a##01_im*b##00_re + a##11_re*b##10_im - a##11_im*b##10_re + a##21_re*b##20_im - a##21_im*b##20_re; \
700  c##11_re = a##01_re*b##01_re + a##01_im*b##01_im + a##11_re*b##11_re + a##11_im*b##11_im + a##21_re*b##21_re + a##21_im*b##21_im; \
701  c##11_im = a##01_re*b##01_im - a##01_im*b##01_re + a##11_re*b##11_im - a##11_im*b##11_re + a##21_re*b##21_im - a##21_im*b##21_re; \
702  c##12_re = a##01_re*b##02_re + a##01_im*b##02_im + a##11_re*b##12_re + a##11_im*b##12_im + a##21_re*b##22_re + a##21_im*b##22_im; \
703  c##12_im = a##01_re*b##02_im - a##01_im*b##02_re + a##11_re*b##12_im - a##11_im*b##12_re + a##21_re*b##22_im - a##21_im*b##22_re; \
704  c##20_re = a##02_re*b##00_re + a##02_im*b##00_im + a##12_re*b##10_re + a##12_im*b##10_im + a##22_re*b##20_re + a##22_im*b##20_im; \
705  c##20_im = a##02_re*b##00_im - a##02_im*b##00_re + a##12_re*b##10_im - a##12_im*b##10_re + a##22_re*b##20_im - a##22_im*b##20_re; \
706  c##21_re = a##02_re*b##01_re + a##02_im*b##01_im + a##12_re*b##11_re + a##12_im*b##11_im + a##22_re*b##21_re + a##22_im*b##21_im; \
707  c##21_im = a##02_re*b##01_im - a##02_im*b##01_re + a##12_re*b##11_im - a##12_im*b##11_re + a##22_re*b##21_im - a##22_im*b##21_re; \
708  c##22_re = a##02_re*b##02_re + a##02_im*b##02_im + a##12_re*b##12_re + a##12_im*b##12_im + a##22_re*b##22_re + a##22_im*b##22_im; \
709  c##22_im = a##02_re*b##02_im - a##02_im*b##02_re + a##12_re*b##12_im - a##12_im*b##12_re + a##22_re*b##22_im - a##22_im*b##22_re;
710 
711 #define ADJ_MAT_MUL_ADJ_MAT(a, b, c) \
712  c##00_re = a##00_re*b##00_re - a##00_im*b##00_im + a##10_re*b##01_re - a##10_im*b##01_im + a##20_re*b##02_re - a##20_im*b##02_im; \
713  c##00_im = - a##00_re*b##00_im - a##00_im*b##00_re - a##10_re*b##01_im - a##10_im*b##01_re - a##20_re*b##02_im - a##20_im*b##02_re; \
714  c##01_re = a##00_re*b##10_re - a##00_im*b##10_im + a##10_re*b##11_re - a##10_im*b##11_im + a##20_re*b##12_re - a##20_im*b##12_im; \
715  c##01_im = - a##00_re*b##10_im - a##00_im*b##10_re - a##10_re*b##11_im - a##10_im*b##11_re - a##20_re*b##12_im - a##20_im*b##12_re; \
716  c##02_re = a##00_re*b##20_re - a##00_im*b##20_im + a##10_re*b##21_re - a##10_im*b##21_im + a##20_re*b##22_re - a##20_im*b##22_im; \
717  c##02_im = - a##00_re*b##20_im - a##00_im*b##20_re - a##10_re*b##21_im - a##10_im*b##21_re - a##20_re*b##22_im - a##20_im*b##22_re; \
718  c##10_re = a##01_re*b##00_re - a##01_im*b##00_im + a##11_re*b##01_re - a##11_im*b##01_im + a##21_re*b##02_re - a##21_im*b##02_im; \
719  c##10_im = - a##01_re*b##00_im - a##01_im*b##00_re - a##11_re*b##01_im - a##11_im*b##01_re - a##21_re*b##02_im - a##21_im*b##02_re; \
720  c##11_re = a##01_re*b##10_re - a##01_im*b##10_im + a##11_re*b##11_re - a##11_im*b##11_im + a##21_re*b##12_re - a##21_im*b##12_im; \
721  c##11_im = - a##01_re*b##10_im - a##01_im*b##10_re - a##11_re*b##11_im - a##11_im*b##11_re - a##21_re*b##12_im - a##21_im*b##12_re; \
722  c##12_re = a##01_re*b##20_re - a##01_im*b##20_im + a##11_re*b##21_re - a##11_im*b##21_im + a##21_re*b##22_re - a##21_im*b##22_im; \
723  c##12_im = - a##01_re*b##20_im - a##01_im*b##20_re - a##11_re*b##21_im - a##11_im*b##21_re - a##21_re*b##22_im - a##21_im*b##22_re; \
724  c##20_re = a##02_re*b##00_re - a##02_im*b##00_im + a##12_re*b##01_re - a##12_im*b##01_im + a##22_re*b##02_re - a##22_im*b##02_im; \
725  c##20_im = - a##02_re*b##00_im - a##02_im*b##00_re - a##12_re*b##01_im - a##12_im*b##01_re - a##22_re*b##02_im - a##22_im*b##02_re; \
726  c##21_re = a##02_re*b##10_re - a##02_im*b##10_im + a##12_re*b##11_re - a##12_im*b##11_im + a##22_re*b##12_re - a##22_im*b##12_im; \
727  c##21_im = - a##02_re*b##10_im - a##02_im*b##10_re - a##12_re*b##11_im - a##12_im*b##11_re - a##22_re*b##12_im - a##22_im*b##12_re; \
728  c##22_re = a##02_re*b##20_re - a##02_im*b##20_im + a##12_re*b##21_re - a##12_im*b##21_im + a##22_re*b##22_re - a##22_im*b##22_im; \
729  c##22_im = - a##02_re*b##20_im - a##02_im*b##20_re - a##12_re*b##21_im - a##12_im*b##21_re - a##22_re*b##22_im - a##22_im*b##22_re;
730 
731  // end of macros specific to hisq routines
732 
733 
734 #define SCALAR_MULT_ADD_MATRIX(a, b, scalar, c) do{ \
735  c##00_re = a##00_re + scalar*b##00_re; \
736  c##00_im = a##00_im + scalar*b##00_im; \
737  c##01_re = a##01_re + scalar*b##01_re; \
738  c##01_im = a##01_im + scalar*b##01_im; \
739  c##02_re = a##02_re + scalar*b##02_re; \
740  c##02_im = a##02_im + scalar*b##02_im; \
741  c##10_re = a##10_re + scalar*b##10_re; \
742  c##10_im = a##10_im + scalar*b##10_im; \
743  c##11_re = a##11_re + scalar*b##11_re; \
744  c##11_im = a##11_im + scalar*b##11_im; \
745  c##12_re = a##12_re + scalar*b##12_re; \
746  c##12_im = a##12_im + scalar*b##12_im; \
747  c##20_re = a##20_re + scalar*b##20_re; \
748  c##20_im = a##20_im + scalar*b##20_im; \
749  c##21_re = a##21_re + scalar*b##21_re; \
750  c##21_im = a##21_im + scalar*b##21_im; \
751  c##22_re = a##22_re + scalar*b##22_re; \
752  c##22_im = a##22_im + scalar*b##22_im; \
753 }while(0)
754 
755 #endif // GENERIC_MATRIX_MACROS
756 
757 #endif // _HISQ_FORCE_MACROS_H_