QUDA
v0.7.0
A library for QCD on GPUs
Main Page
Namespaces
Classes
Files
File List
File Members
•
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Pages
quda
lib
hisq_force_macros.h
Go to the documentation of this file.
1
#ifndef _HISQ_FORCE_MACROS_H_
2
#define _HISQ_FORCE_MACROS_H_
3
4
5
#ifndef HISQ_RECONSTRUCT_LINKS
6
7
#define LINK_W00_re LINK_W[0].x
8
#define LINK_W00_im LINK_W[0].y
9
#define LINK_W01_re LINK_W[1].x
10
#define LINK_W01_im LINK_W[1].y
11
#define LINK_W02_re LINK_W[2].x
12
#define LINK_W02_im LINK_W[2].y
13
#define LINK_W10_re LINK_W[3].x
14
#define LINK_W10_im LINK_W[3].y
15
#define LINK_W11_re LINK_W[4].x
16
#define LINK_W11_im LINK_W[4].y
17
#define LINK_W12_re LINK_W[5].x
18
#define LINK_W12_im LINK_W[5].y
19
#define LINK_W20_re LINK_W[6].x
20
#define LINK_W20_im LINK_W[6].y
21
#define LINK_W21_re LINK_W[7].x
22
#define LINK_W21_im LINK_W[7].y
23
#define LINK_W22_re LINK_W[8].x
24
#define LINK_W22_im LINK_W[8].y
25
26
#define LINK_X00_re LINK_X[0].x
27
#define LINK_X00_im LINK_X[0].y
28
#define LINK_X01_re LINK_X[1].x
29
#define LINK_X01_im LINK_X[1].y
30
#define LINK_X02_re LINK_X[2].x
31
#define LINK_X02_im LINK_X[2].y
32
#define LINK_X10_re LINK_X[3].x
33
#define LINK_X10_im LINK_X[3].y
34
#define LINK_X11_re LINK_X[4].x
35
#define LINK_X11_im LINK_X[4].y
36
#define LINK_X12_re LINK_X[5].x
37
#define LINK_X12_im LINK_X[5].y
38
#define LINK_X20_re LINK_X[6].x
39
#define LINK_X20_im LINK_X[6].y
40
#define LINK_X21_re LINK_X[7].x
41
#define LINK_X21_im LINK_X[7].y
42
#define LINK_X22_re LINK_X[8].x
43
#define LINK_X22_im LINK_X[8].y
44
45
#define LINK_Y00_re LINK_Y[0].x
46
#define LINK_Y00_im LINK_Y[0].y
47
#define LINK_Y01_re LINK_Y[1].x
48
#define LINK_Y01_im LINK_Y[1].y
49
#define LINK_Y02_re LINK_Y[2].x
50
#define LINK_Y02_im LINK_Y[2].y
51
#define LINK_Y10_re LINK_Y[3].x
52
#define LINK_Y10_im LINK_Y[3].y
53
#define LINK_Y11_re LINK_Y[4].x
54
#define LINK_Y11_im LINK_Y[4].y
55
#define LINK_Y12_re LINK_Y[5].x
56
#define LINK_Y12_im LINK_Y[5].y
57
#define LINK_Y20_re LINK_Y[6].x
58
#define LINK_Y20_im LINK_Y[6].y
59
#define LINK_Y21_re LINK_Y[7].x
60
#define LINK_Y21_im LINK_Y[7].y
61
#define LINK_Y22_re LINK_Y[8].x
62
#define LINK_Y22_im LINK_Y[8].y
63
64
#define LINK_Z00_re LINK_Z[0].x
65
#define LINK_Z00_im LINK_Z[0].y
66
#define LINK_Z01_re LINK_Z[1].x
67
#define LINK_Z01_im LINK_Z[1].y
68
#define LINK_Z02_re LINK_Z[2].x
69
#define LINK_Z02_im LINK_Z[2].y
70
#define LINK_Z10_re LINK_Z[3].x
71
#define LINK_Z10_im LINK_Z[3].y
72
#define LINK_Z11_re LINK_Z[4].x
73
#define LINK_Z11_im LINK_Z[4].y
74
#define LINK_Z12_re LINK_Z[5].x
75
#define LINK_Z12_im LINK_Z[5].y
76
#define LINK_Z20_re LINK_Z[6].x
77
#define LINK_Z20_im LINK_Z[6].y
78
#define LINK_Z21_re LINK_Z[7].x
79
#define LINK_Z21_im LINK_Z[7].y
80
#define LINK_Z22_re LINK_Z[8].x
81
#define LINK_Z22_im LINK_Z[8].y
82
83
#define ab_link00_re ab_link[0].x
84
#define ab_link00_im ab_link[0].y
85
#define ab_link01_re ab_link[1].x
86
#define ab_link01_im ab_link[1].y
87
#define ab_link02_re ab_link[2].x
88
#define ab_link02_im ab_link[2].y
89
#define ab_link10_re ab_link[3].x
90
#define ab_link10_im ab_link[3].y
91
#define ab_link11_re ab_link[4].x
92
#define ab_link11_im ab_link[4].y
93
#define ab_link12_re ab_link[5].x
94
#define ab_link12_im ab_link[5].y
95
#define ab_link20_re ab_link[6].x
96
#define ab_link20_im ab_link[6].y
97
#define ab_link21_re ab_link[7].x
98
#define ab_link21_im ab_link[7].y
99
#define ab_link22_re ab_link[8].x
100
#define ab_link22_im ab_link[8].y
101
102
#define bc_link00_re bc_link[0].x
103
#define bc_link00_im bc_link[0].y
104
#define bc_link01_re bc_link[1].x
105
#define bc_link01_im bc_link[1].y
106
#define bc_link02_re bc_link[2].x
107
#define bc_link02_im bc_link[2].y
108
#define bc_link10_re bc_link[3].x
109
#define bc_link10_im bc_link[3].y
110
#define bc_link11_re bc_link[4].x
111
#define bc_link11_im bc_link[4].y
112
#define bc_link12_re bc_link[5].x
113
#define bc_link12_im bc_link[5].y
114
#define bc_link20_re bc_link[6].x
115
#define bc_link20_im bc_link[6].y
116
#define bc_link21_re bc_link[7].x
117
#define bc_link21_im bc_link[7].y
118
#define bc_link22_re bc_link[8].x
119
#define bc_link22_im bc_link[8].y
120
121
#define ad_link00_re ad_link[0].x
122
#define ad_link00_im ad_link[0].y
123
#define ad_link01_re ad_link[1].x
124
#define ad_link01_im ad_link[1].y
125
#define ad_link02_re ad_link[2].x
126
#define ad_link02_im ad_link[2].y
127
#define ad_link10_re ad_link[3].x
128
#define ad_link10_im ad_link[3].y
129
#define ad_link11_re ad_link[4].x
130
#define ad_link11_im ad_link[4].y
131
#define ad_link12_re ad_link[5].x
132
#define ad_link12_im ad_link[5].y
133
#define ad_link20_re ad_link[6].x
134
#define ad_link20_im ad_link[6].y
135
#define ad_link21_re ad_link[7].x
136
#define ad_link21_im ad_link[7].y
137
#define ad_link22_re ad_link[8].x
138
#define ad_link22_im ad_link[8].y
139
140
#define de_link00_re de_link[0].x
141
#define de_link00_im de_link[0].y
142
#define de_link01_re de_link[1].x
143
#define de_link01_im de_link[1].y
144
#define de_link02_re de_link[2].x
145
#define de_link02_im de_link[2].y
146
#define de_link10_re de_link[3].x
147
#define de_link10_im de_link[3].y
148
#define de_link11_re de_link[4].x
149
#define de_link11_im de_link[4].y
150
#define de_link12_re de_link[5].x
151
#define de_link12_im de_link[5].y
152
#define de_link20_re de_link[6].x
153
#define de_link20_im de_link[6].y
154
#define de_link21_re de_link[7].x
155
#define de_link21_im de_link[7].y
156
#define de_link22_re de_link[8].x
157
#define de_link22_im de_link[8].y
158
159
#define ef_link00_re ef_link[0].x
160
#define ef_link00_im ef_link[0].y
161
#define ef_link01_re ef_link[1].x
162
#define ef_link01_im ef_link[1].y
163
#define ef_link02_re ef_link[2].x
164
#define ef_link02_im ef_link[2].y
165
#define ef_link10_re ef_link[3].x
166
#define ef_link10_im ef_link[3].y
167
#define ef_link11_re ef_link[4].x
168
#define ef_link11_im ef_link[4].y
169
#define ef_link12_re ef_link[5].x
170
#define ef_link12_im ef_link[5].y
171
#define ef_link20_re ef_link[6].x
172
#define ef_link20_im ef_link[6].y
173
#define ef_link21_re ef_link[7].x
174
#define ef_link21_im ef_link[7].y
175
#define ef_link22_re ef_link[8].x
176
#define ef_link22_im ef_link[8].y
177
178
179
#else // HISQ_RECONSTRUCT_LINKS
180
181
#define LINK_W00_re LINK_W[0].x
182
#define LINK_W00_im LINK_W[0].y
183
#define LINK_W01_re LINK_W[0].z
184
#define LINK_W01_im LINK_W[0].w
185
#define LINK_W02_re LINK_W[1].x
186
#define LINK_W02_im LINK_W[1].y
187
#define LINK_W10_re LINK_W[1].z
188
#define LINK_W10_im LINK_W[1].w
189
#define LINK_W11_re LINK_W[2].x
190
#define LINK_W11_im LINK_W[2].y
191
#define LINK_W12_re LINK_W[2].z
192
#define LINK_W12_im LINK_W[2].w
193
#define LINK_W20_re LINK_W[3].x
194
#define LINK_W20_im LINK_W[3].y
195
#define LINK_W21_re LINK_W[3].z
196
#define LINK_W21_im LINK_W[3].w
197
#define LINK_W22_re LINK_W[4].x
198
#define LINK_W22_im LINK_W[4].y
199
200
201
#define LINK_X00_re LINK_X[0].x
202
#define LINK_X00_im LINK_X[0].y
203
#define LINK_X01_re LINK_X[0].z
204
#define LINK_X01_im LINK_X[0].w
205
#define LINK_X02_re LINK_X[1].x
206
#define LINK_X02_im LINK_X[1].y
207
#define LINK_X10_re LINK_X[1].z
208
#define LINK_X10_im LINK_X[1].w
209
#define LINK_X11_re LINK_X[2].x
210
#define LINK_X11_im LINK_X[2].y
211
#define LINK_X12_re LINK_X[2].z
212
#define LINK_X12_im LINK_X[2].w
213
#define LINK_X20_re LINK_X[3].x
214
#define LINK_X20_im LINK_X[3].y
215
#define LINK_X21_re LINK_X[3].z
216
#define LINK_X21_im LINK_X[3].w
217
#define LINK_X22_re LINK_X[4].x
218
#define LINK_X22_im LINK_X[4].y
219
220
221
#define LINK_Y00_re LINK_Y[0].x
222
#define LINK_Y00_im LINK_Y[0].y
223
#define LINK_Y01_re LINK_Y[0].z
224
#define LINK_Y01_im LINK_Y[0].w
225
#define LINK_Y02_re LINK_Y[1].x
226
#define LINK_Y02_im LINK_Y[1].y
227
#define LINK_Y10_re LINK_Y[1].z
228
#define LINK_Y10_im LINK_Y[1].w
229
#define LINK_Y11_re LINK_Y[2].x
230
#define LINK_Y11_im LINK_Y[2].y
231
#define LINK_Y12_re LINK_Y[2].z
232
#define LINK_Y12_im LINK_Y[2].w
233
#define LINK_Y20_re LINK_Y[3].x
234
#define LINK_Y20_im LINK_Y[3].y
235
#define LINK_Y21_re LINK_Y[3].z
236
#define LINK_Y21_im LINK_Y[3].w
237
#define LINK_Y22_re LINK_Y[4].x
238
#define LINK_Y22_im LINK_Y[4].y
239
240
241
#define LINK_Z00_re LINK_Z[0].x
242
#define LINK_Z00_im LINK_Z[0].y
243
#define LINK_Z01_re LINK_Z[0].z
244
#define LINK_Z01_im LINK_Z[0].w
245
#define LINK_Z02_re LINK_Z[1].x
246
#define LINK_Z02_im LINK_Z[1].y
247
#define LINK_Z10_re LINK_Z[1].z
248
#define LINK_Z10_im LINK_Z[1].w
249
#define LINK_Z11_re LINK_Z[2].x
250
#define LINK_Z11_im LINK_Z[2].y
251
#define LINK_Z12_re LINK_Z[2].z
252
#define LINK_Z12_im LINK_Z[2].w
253
#define LINK_Z20_re LINK_Z[3].x
254
#define LINK_Z20_im LINK_Z[3].y
255
#define LINK_Z21_re LINK_Z[3].z
256
#define LINK_Z21_im LINK_Z[3].w
257
#define LINK_Z22_re LINK_Z[4].x
258
#define LINK_Z22_im LINK_Z[4].y
259
260
#endif // HISQ_RECONSTRUCT_LINKS
261
262
#ifndef GENERIC_MATRIX_MACROS
263
#define GENERIC_MATRIX_MACROS
264
265
// Color matrices stored as an array of float2 or double2
266
267
#define COLOR_MAT_T00_re COLOR_MAT_T[0].x
268
#define COLOR_MAT_T00_im COLOR_MAT_T[0].y
269
#define COLOR_MAT_T01_re COLOR_MAT_T[1].x
270
#define COLOR_MAT_T01_im COLOR_MAT_T[1].y
271
#define COLOR_MAT_T02_re COLOR_MAT_T[2].x
272
#define COLOR_MAT_T02_im COLOR_MAT_T[2].y
273
#define COLOR_MAT_T10_re COLOR_MAT_T[3].x
274
#define COLOR_MAT_T10_im COLOR_MAT_T[3].y
275
#define COLOR_MAT_T11_re COLOR_MAT_T[4].x
276
#define COLOR_MAT_T11_im COLOR_MAT_T[4].y
277
#define COLOR_MAT_T12_re COLOR_MAT_T[5].x
278
#define COLOR_MAT_T12_im COLOR_MAT_T[5].y
279
#define COLOR_MAT_T20_re COLOR_MAT_T[6].x
280
#define COLOR_MAT_T20_im COLOR_MAT_T[6].y
281
#define COLOR_MAT_T21_re COLOR_MAT_T[7].x
282
#define COLOR_MAT_T21_im COLOR_MAT_T[7].y
283
#define COLOR_MAT_T22_re COLOR_MAT_T[8].x
284
#define COLOR_MAT_T22_im COLOR_MAT_T[8].y
285
286
287
#define COLOR_MAT_U00_re COLOR_MAT_U[0].x
288
#define COLOR_MAT_U00_im COLOR_MAT_U[0].y
289
#define COLOR_MAT_U01_re COLOR_MAT_U[1].x
290
#define COLOR_MAT_U01_im COLOR_MAT_U[1].y
291
#define COLOR_MAT_U02_re COLOR_MAT_U[2].x
292
#define COLOR_MAT_U02_im COLOR_MAT_U[2].y
293
#define COLOR_MAT_U10_re COLOR_MAT_U[3].x
294
#define COLOR_MAT_U10_im COLOR_MAT_U[3].y
295
#define COLOR_MAT_U11_re COLOR_MAT_U[4].x
296
#define COLOR_MAT_U11_im COLOR_MAT_U[4].y
297
#define COLOR_MAT_U12_re COLOR_MAT_U[5].x
298
#define COLOR_MAT_U12_im COLOR_MAT_U[5].y
299
#define COLOR_MAT_U20_re COLOR_MAT_U[6].x
300
#define COLOR_MAT_U20_im COLOR_MAT_U[6].y
301
#define COLOR_MAT_U21_re COLOR_MAT_U[7].x
302
#define COLOR_MAT_U21_im COLOR_MAT_U[7].y
303
#define COLOR_MAT_U22_re COLOR_MAT_U[8].x
304
#define COLOR_MAT_U22_im COLOR_MAT_U[8].y
305
306
307
#define COLOR_MAT_V00_re COLOR_MAT_V[0].x
308
#define COLOR_MAT_V00_im COLOR_MAT_V[0].y
309
#define COLOR_MAT_V01_re COLOR_MAT_V[1].x
310
#define COLOR_MAT_V01_im COLOR_MAT_V[1].y
311
#define COLOR_MAT_V02_re COLOR_MAT_V[2].x
312
#define COLOR_MAT_V02_im COLOR_MAT_V[2].y
313
#define COLOR_MAT_V10_re COLOR_MAT_V[3].x
314
#define COLOR_MAT_V10_im COLOR_MAT_V[3].y
315
#define COLOR_MAT_V11_re COLOR_MAT_V[4].x
316
#define COLOR_MAT_V11_im COLOR_MAT_V[4].y
317
#define COLOR_MAT_V12_re COLOR_MAT_V[5].x
318
#define COLOR_MAT_V12_im COLOR_MAT_V[5].y
319
#define COLOR_MAT_V20_re COLOR_MAT_V[6].x
320
#define COLOR_MAT_V20_im COLOR_MAT_V[6].y
321
#define COLOR_MAT_V21_re COLOR_MAT_V[7].x
322
#define COLOR_MAT_V21_im COLOR_MAT_V[7].y
323
#define COLOR_MAT_V22_re COLOR_MAT_V[8].x
324
#define COLOR_MAT_V22_im COLOR_MAT_V[8].y
325
326
327
328
329
#define COLOR_MAT_W00_re COLOR_MAT_W[0].x
330
#define COLOR_MAT_W00_im COLOR_MAT_W[0].y
331
#define COLOR_MAT_W01_re COLOR_MAT_W[1].x
332
#define COLOR_MAT_W01_im COLOR_MAT_W[1].y
333
#define COLOR_MAT_W02_re COLOR_MAT_W[2].x
334
#define COLOR_MAT_W02_im COLOR_MAT_W[2].y
335
#define COLOR_MAT_W10_re COLOR_MAT_W[3].x
336
#define COLOR_MAT_W10_im COLOR_MAT_W[3].y
337
#define COLOR_MAT_W11_re COLOR_MAT_W[4].x
338
#define COLOR_MAT_W11_im COLOR_MAT_W[4].y
339
#define COLOR_MAT_W12_re COLOR_MAT_W[5].x
340
#define COLOR_MAT_W12_im COLOR_MAT_W[5].y
341
#define COLOR_MAT_W20_re COLOR_MAT_W[6].x
342
#define COLOR_MAT_W20_im COLOR_MAT_W[6].y
343
#define COLOR_MAT_W21_re COLOR_MAT_W[7].x
344
#define COLOR_MAT_W21_im COLOR_MAT_W[7].y
345
#define COLOR_MAT_W22_re COLOR_MAT_W[8].x
346
#define COLOR_MAT_W22_im COLOR_MAT_W[8].y
347
348
349
#define COLOR_MAT_X00_re COLOR_MAT_X[0].x
350
#define COLOR_MAT_X00_im COLOR_MAT_X[0].y
351
#define COLOR_MAT_X01_re COLOR_MAT_X[1].x
352
#define COLOR_MAT_X01_im COLOR_MAT_X[1].y
353
#define COLOR_MAT_X02_re COLOR_MAT_X[2].x
354
#define COLOR_MAT_X02_im COLOR_MAT_X[2].y
355
#define COLOR_MAT_X10_re COLOR_MAT_X[3].x
356
#define COLOR_MAT_X10_im COLOR_MAT_X[3].y
357
#define COLOR_MAT_X11_re COLOR_MAT_X[4].x
358
#define COLOR_MAT_X11_im COLOR_MAT_X[4].y
359
#define COLOR_MAT_X12_re COLOR_MAT_X[5].x
360
#define COLOR_MAT_X12_im COLOR_MAT_X[5].y
361
#define COLOR_MAT_X20_re COLOR_MAT_X[6].x
362
#define COLOR_MAT_X20_im COLOR_MAT_X[6].y
363
#define COLOR_MAT_X21_re COLOR_MAT_X[7].x
364
#define COLOR_MAT_X21_im COLOR_MAT_X[7].y
365
#define COLOR_MAT_X22_re COLOR_MAT_X[8].x
366
#define COLOR_MAT_X22_im COLOR_MAT_X[8].y
367
368
369
#define COLOR_MAT_Y00_re COLOR_MAT_Y[0].x
370
#define COLOR_MAT_Y00_im COLOR_MAT_Y[0].y
371
#define COLOR_MAT_Y01_re COLOR_MAT_Y[1].x
372
#define COLOR_MAT_Y01_im COLOR_MAT_Y[1].y
373
#define COLOR_MAT_Y02_re COLOR_MAT_Y[2].x
374
#define COLOR_MAT_Y02_im COLOR_MAT_Y[2].y
375
#define COLOR_MAT_Y10_re COLOR_MAT_Y[3].x
376
#define COLOR_MAT_Y10_im COLOR_MAT_Y[3].y
377
#define COLOR_MAT_Y11_re COLOR_MAT_Y[4].x
378
#define COLOR_MAT_Y11_im COLOR_MAT_Y[4].y
379
#define COLOR_MAT_Y12_re COLOR_MAT_Y[5].x
380
#define COLOR_MAT_Y12_im COLOR_MAT_Y[5].y
381
#define COLOR_MAT_Y20_re COLOR_MAT_Y[6].x
382
#define COLOR_MAT_Y20_im COLOR_MAT_Y[6].y
383
#define COLOR_MAT_Y21_re COLOR_MAT_Y[7].x
384
#define COLOR_MAT_Y21_im COLOR_MAT_Y[7].y
385
#define COLOR_MAT_Y22_re COLOR_MAT_Y[8].x
386
#define COLOR_MAT_Y22_im COLOR_MAT_Y[8].y
387
388
389
#define COLOR_MAT_Z00_re COLOR_MAT_Z[0].x
390
#define COLOR_MAT_Z00_im COLOR_MAT_Z[0].y
391
#define COLOR_MAT_Z01_re COLOR_MAT_Z[1].x
392
#define COLOR_MAT_Z01_im COLOR_MAT_Z[1].y
393
#define COLOR_MAT_Z02_re COLOR_MAT_Z[2].x
394
#define COLOR_MAT_Z02_im COLOR_MAT_Z[2].y
395
#define COLOR_MAT_Z10_re COLOR_MAT_Z[3].x
396
#define COLOR_MAT_Z10_im COLOR_MAT_Z[3].y
397
#define COLOR_MAT_Z11_re COLOR_MAT_Z[4].x
398
#define COLOR_MAT_Z11_im COLOR_MAT_Z[4].y
399
#define COLOR_MAT_Z12_re COLOR_MAT_Z[5].x
400
#define COLOR_MAT_Z12_im COLOR_MAT_Z[5].y
401
#define COLOR_MAT_Z20_re COLOR_MAT_Z[6].x
402
#define COLOR_MAT_Z20_im COLOR_MAT_Z[6].y
403
#define COLOR_MAT_Z21_re COLOR_MAT_Z[7].x
404
#define COLOR_MAT_Z21_im COLOR_MAT_Z[7].y
405
#define COLOR_MAT_Z22_re COLOR_MAT_Z[8].x
406
#define COLOR_MAT_Z22_im COLOR_MAT_Z[8].y
407
408
#define FF_RECONSTRUCT_LINK_12(var, sign) \
409
ACC_CONJ_PROD_ASSIGN(var##20, +var##01, +var##12); \
410
ACC_CONJ_PROD(var##20, -var##02, +var##11); \
411
ACC_CONJ_PROD_ASSIGN(var##21, +var##02, +var##10); \
412
ACC_CONJ_PROD(var##21, -var##00, +var##12); \
413
ACC_CONJ_PROD_ASSIGN(var##22, +var##00, +var##11); \
414
ACC_CONJ_PROD(var##22, -var##01, +var##10); \
415
var##20_re *=sign;var##20_im *=sign; var##21_re *=sign; var##21_im *=sign; \
416
var##22_re *=sign;var##22_im *=sign;
417
418
419
420
#define HISQ_LOAD_MATRIX_18_SINGLE_TEX(gauge, dir, idx, var, stride)do{ \
421
var[0] = tex1Dfetch(gauge, idx + dir*stride*9); \
422
var[1] = tex1Dfetch(gauge, idx + dir*stride*9 + stride); \
423
var[2] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*2); \
424
var[3] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*3); \
425
var[4] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*4); \
426
var[5] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*5); \
427
var[6] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*6); \
428
var[7] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*7); \
429
var[8] = tex1Dfetch(gauge, idx + dir*stride*9 + stride*8); \
430
}while(0)
431
432
#define HISQ_LOAD_MATRIX_12_SINGLE_TEX(gauge, dir, idx, var, stride)do{ \
433
float4 tmp; \
434
tmp = tex1Dfetch(gauge, idx + dir*stride*3); \
435
var[0] = make_float2(tmp.x, tmp.y); \
436
var[1] = make_float2(tmp.z, tmp.w); \
437
tmp = tex1Dfetch(gauge, idx + dir*stride*3 + stride); \
438
var[2] = make_float2(tmp.x, tmp.y); \
439
var[3] = make_float2(tmp.z, tmp.w); \
440
tmp = tex1Dfetch(gauge, idx + dir*stride*3 + 2*stride); \
441
var[4] = make_float2(tmp.x, tmp.y); \
442
var[5] = make_float2(tmp.z, tmp.w); \
443
}while(0)
444
445
#define HISQ_LOAD_MATRIX_18_DOUBLE_TEX(gauge_tex, gauge, dir, idx, var, stride)do{ \
446
var[0] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9); \
447
var[1] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride); \
448
var[2] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*2); \
449
var[3] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*3); \
450
var[4] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*4); \
451
var[5] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*5); \
452
var[6] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*6); \
453
var[7] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*7); \
454
var[8] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*9 + stride*8); \
455
}while(0)
456
457
#define HISQ_LOAD_MATRIX_12_DOUBLE_TEX(gauge_tex, gauge, dir, idx, var, stride)do{ \
458
var[0] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6); \
459
var[1] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride); \
460
var[2] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*2); \
461
var[3] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*3); \
462
var[4] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*4); \
463
var[5] = READ_DOUBLE2_TEXTURE(gauge_tex, gauge, idx + dir*stride*6 + stride*5); \
464
}while(0)
465
466
#ifdef MULTI_GPU
467
468
#define FF_COMPUTE_NEW_FULL_IDX_PLUS_UPDATE(mydir, idx, new_idx) do { \
469
switch(mydir){ \
470
case 0: \
471
new_idx = (xcomm || (new_x[0] != X1+1) )?(idx+1):(idx-X1m1); \
472
new_x[0] = (xcomm || (new_x[0] != X1+1) )?(new_x[0]+1):2; \
473
break; \
474
case 1: \
475
new_idx = (ycomm || (new_x[1] != X2+1))?(idx+E1):(idx -X2m1*E1); \
476
new_x[1] = (ycomm || (new_x[1] != X2+1))?(new_x[1]+1):2; \
477
break; \
478
case 2: \
479
new_idx = (zcomm || (new_x[2] != X3+1))?(idx+E2E1):(idx-X3m1*E2E1); \
480
new_x[2] = (zcomm || (new_x[2] != X3+1))?(new_x[2]+1):2; \
481
break; \
482
case 3: \
483
new_idx = (tcomm || (new_x[3] != X4+1))?(idx+E3E2E1):(idx-X4m1*E3E2E1); \
484
new_x[3] = (tcomm || (new_x[3] != X4+1))?(new_x[3]+1):2; \
485
break; \
486
} \
487
if(new_x[mydir] >= E[mydir]) return; \
488
}while(0)
489
490
491
492
#else
493
#define FF_COMPUTE_NEW_FULL_IDX_PLUS_UPDATE(mydir, idx, new_idx) do { \
494
switch(mydir){ \
495
case 0: \
496
new_idx = ( (new_x[0]==X1m1)?idx-X1m1:idx+1); \
497
new_x[0] = (new_x[0]==X1m1)?0:new_x[0]+1; \
498
break; \
499
case 1: \
500
new_idx = ( (new_x[1]==X2m1)?idx-X2X1mX1:idx+X1); \
501
new_x[1] = (new_x[1]==X2m1)?0:new_x[1]+1; \
502
break; \
503
case 2: \
504
new_idx = ( (new_x[2]==X3m1)?idx-X3X2X1mX2X1:idx+X2X1); \
505
new_x[2] = (new_x[2]==X3m1)?0:new_x[2]+1; \
506
break; \
507
case 3: \
508
new_idx = ( (new_x[3]==X4m1)?idx-X4X3X2X1mX3X2X1:idx+X3X2X1); \
509
new_x[3] = (new_x[3]==X4m1)?0:new_x[3]+1; \
510
break; \
511
} \
512
}while(0)
513
#endif
514
515
#ifdef MULTI_GPU
516
517
518
#define FF_COMPUTE_NEW_FULL_IDX_MINUS_UPDATE(mydir, idx, new_idx) do { \
519
switch(mydir){ \
520
case 0: \
521
new_idx = (xcomm || (new_x[0] != 2))?(idx-1):(idx+X1m1); \
522
new_x[0] = (xcomm || (new_x[0] != 2))?(new_x[0]-1):(X1+1); \
523
break; \
524
case 1: \
525
new_idx = (ycomm || (new_x[1] != 2))?(idx-E1):(idx+X2m1*E1); \
526
new_x[1]= (ycomm || (new_x[1] != 2))?(new_x[1]-1):(X2+1); \
527
break; \
528
case 2: \
529
new_idx = (zcomm || (new_x[2] != 2))?(idx-E2E1):(idx+X3m1*E2E1); \
530
new_x[2]= (zcomm || (new_x[2] != 2))?(new_x[2]-1):(X3+1); \
531
break; \
532
case 3: \
533
new_idx = (tcomm || (new_x[3] !=2))?(idx-E3E2E1):(idx+X4m1*E3E2E1); \
534
new_x[3]= (tcomm || (new_x[3] !=2))?(new_x[3]-1):(X4+1); \
535
break; \
536
} \
537
if(new_x[mydir] < 0) return; \
538
}while(0)
539
540
#else
541
#define FF_COMPUTE_NEW_FULL_IDX_MINUS_UPDATE(mydir, idx, new_idx) do { \
542
switch(mydir){ \
543
case 0: \
544
new_idx = ( (new_x[0]==0)?idx+X1m1:idx-1); \
545
new_x[0] = (new_x[0]==0)?X1m1:new_x[0] - 1; \
546
break; \
547
case 1: \
548
new_idx = ( (new_x[1]==0)?idx+X2X1mX1:idx-X1); \
549
new_x[1] = (new_x[1]==0)?X2m1:new_x[1] - 1; \
550
break; \
551
case 2: \
552
new_idx = ( (new_x[2]==0)?idx+X3X2X1mX2X1:idx-X2X1); \
553
new_x[2] = (new_x[2]==0)?X3m1:new_x[2] - 1; \
554
break; \
555
case 3: \
556
new_idx = ( (new_x[3]==0)?idx+X4X3X2X1mX3X2X1:idx-X3X2X1); \
557
new_x[3] = (new_x[3]==0)?X4m1:new_x[3] - 1; \
558
break; \
559
} \
560
}while(0)
561
#endif
562
563
564
// matrix macros:
565
#define ADJ_MAT(a, b) \
566
b##00_re = a##00_re; \
567
b##00_im = -a##00_im; \
568
b##01_re = a##10_re; \
569
b##01_im = -a##10_im; \
570
b##02_re = a##20_re; \
571
b##02_im = -a##20_im; \
572
b##10_re = a##01_re; \
573
b##10_im = -a##01_im; \
574
b##11_re = a##11_re; \
575
b##11_im = -a##11_im; \
576
b##12_re = a##21_re; \
577
b##12_im = -a##21_im; \
578
b##20_re = a##02_re; \
579
b##20_im = -a##02_im; \
580
b##21_re = a##12_re; \
581
b##21_im = -a##12_im; \
582
b##22_re = a##22_re; \
583
b##22_im = -a##22_im;
584
585
586
#define ASSIGN_MAT(a, b) \
587
b##00_re = a##00_re; \
588
b##00_im = a##00_im; \
589
b##01_re = a##01_re; \
590
b##01_im = a##01_im; \
591
b##02_re = a##02_re; \
592
b##02_im = a##02_im; \
593
b##10_re = a##10_re; \
594
b##10_im = a##10_im; \
595
b##11_re = a##11_re; \
596
b##11_im = a##11_im; \
597
b##12_re = a##12_re; \
598
b##12_im = a##12_im; \
599
b##20_re = a##20_re; \
600
b##20_im = a##20_im; \
601
b##21_re = a##21_re; \
602
b##21_im = a##21_im; \
603
b##22_re = a##22_re; \
604
b##22_im = a##22_im; \
605
606
607
608
#define MATRIX_PRODUCT(a, b, simple, c) do{ \
609
if(simple){ \
610
c##00_re = a##00_re*b##00_re - a##00_im*b##00_im + a##01_re*b##10_re - a##01_im*b##10_im + a##02_re*b##20_re - a##02_im*b##20_im; \
611
c##00_im = a##00_re*b##00_im + a##00_im*b##00_re + a##01_re*b##10_im + a##01_im*b##10_re + a##02_re*b##20_im + a##02_im*b##20_re; \
612
c##01_re = a##00_re*b##01_re - a##00_im*b##01_im + a##01_re*b##11_re - a##01_im*b##11_im + a##02_re*b##21_re - a##02_im*b##21_im; \
613
c##01_im = a##00_re*b##01_im + a##00_im*b##01_re + a##01_re*b##11_im + a##01_im*b##11_re + a##02_re*b##21_im + a##02_im*b##21_re; \
614
c##02_re = a##00_re*b##02_re - a##00_im*b##02_im + a##01_re*b##12_re - a##01_im*b##12_im + a##02_re*b##22_re - a##02_im*b##22_im; \
615
c##02_im = a##00_re*b##02_im + a##00_im*b##02_re + a##01_re*b##12_im + a##01_im*b##12_re + a##02_re*b##22_im + a##02_im*b##22_re; \
616
c##10_re = a##10_re*b##00_re - a##10_im*b##00_im + a##11_re*b##10_re - a##11_im*b##10_im + a##12_re*b##20_re - a##12_im*b##20_im; \
617
c##10_im = a##10_re*b##00_im + a##10_im*b##00_re + a##11_re*b##10_im + a##11_im*b##10_re + a##12_re*b##20_im + a##12_im*b##20_re; \
618
c##11_re = a##10_re*b##01_re - a##10_im*b##01_im + a##11_re*b##11_re - a##11_im*b##11_im + a##12_re*b##21_re - a##12_im*b##21_im; \
619
c##11_im = a##10_re*b##01_im + a##10_im*b##01_re + a##11_re*b##11_im + a##11_im*b##11_re + a##12_re*b##21_im + a##12_im*b##21_re; \
620
c##12_re = a##10_re*b##02_re - a##10_im*b##02_im + a##11_re*b##12_re - a##11_im*b##12_im + a##12_re*b##22_re - a##12_im*b##22_im; \
621
c##12_im = a##10_re*b##02_im + a##10_im*b##02_re + a##11_re*b##12_im + a##11_im*b##12_re + a##12_re*b##22_im + a##12_im*b##22_re; \
622
c##20_re = a##20_re*b##00_re - a##20_im*b##00_im + a##21_re*b##10_re - a##21_im*b##10_im + a##22_re*b##20_re - a##22_im*b##20_im; \
623
c##20_im = a##20_re*b##00_im + a##20_im*b##00_re + a##21_re*b##10_im + a##21_im*b##10_re + a##22_re*b##20_im + a##22_im*b##20_re; \
624
c##21_re = a##20_re*b##01_re - a##20_im*b##01_im + a##21_re*b##11_re - a##21_im*b##11_im + a##22_re*b##21_re - a##22_im*b##21_im; \
625
c##21_im = a##20_re*b##01_im + a##20_im*b##01_re + a##21_re*b##11_im + a##21_im*b##11_re + a##22_re*b##21_im + a##22_im*b##21_re; \
626
c##22_re = a##20_re*b##02_re - a##20_im*b##02_im + a##21_re*b##12_re - a##21_im*b##12_im + a##22_re*b##22_re - a##22_im*b##22_im; \
627
c##22_im = a##20_re*b##02_im + a##20_im*b##02_re + a##21_re*b##12_im + a##21_im*b##12_re + a##22_re*b##22_im + a##22_im*b##22_re; \
628
}else{ \
629
c##00_re = a##00_re*b##00_re + a##00_im*b##00_im + a##10_re*b##10_re + a##10_im*b##10_im + a##20_re*b##20_re + a##20_im*b##20_im; \
630
c##00_im = a##00_re*b##00_im - a##00_im*b##00_re + a##10_re*b##10_im - a##10_im*b##10_re + a##20_re*b##20_im - a##20_im*b##20_re; \
631
c##01_re = a##00_re*b##01_re + a##00_im*b##01_im + a##10_re*b##11_re + a##10_im*b##11_im + a##20_re*b##21_re + a##20_im*b##21_im; \
632
c##01_im = a##00_re*b##01_im - a##00_im*b##01_re + a##10_re*b##11_im - a##10_im*b##11_re + a##20_re*b##21_im - a##20_im*b##21_re; \
633
c##02_re = a##00_re*b##02_re + a##00_im*b##02_im + a##10_re*b##12_re + a##10_im*b##12_im + a##20_re*b##22_re + a##20_im*b##22_im; \
634
c##02_im = a##00_re*b##02_im - a##00_im*b##02_re + a##10_re*b##12_im - a##10_im*b##12_re + a##20_re*b##22_im - a##20_im*b##22_re; \
635
c##10_re = a##01_re*b##00_re + a##01_im*b##00_im + a##11_re*b##10_re + a##11_im*b##10_im + a##21_re*b##20_re + a##21_im*b##20_im; \
636
c##10_im = a##01_re*b##00_im - a##01_im*b##00_re + a##11_re*b##10_im - a##11_im*b##10_re + a##21_re*b##20_im - a##21_im*b##20_re; \
637
c##11_re = a##01_re*b##01_re + a##01_im*b##01_im + a##11_re*b##11_re + a##11_im*b##11_im + a##21_re*b##21_re + a##21_im*b##21_im; \
638
c##11_im = a##01_re*b##01_im - a##01_im*b##01_re + a##11_re*b##11_im - a##11_im*b##11_re + a##21_re*b##21_im - a##21_im*b##21_re; \
639
c##12_re = a##01_re*b##02_re + a##01_im*b##02_im + a##11_re*b##12_re + a##11_im*b##12_im + a##21_re*b##22_re + a##21_im*b##22_im; \
640
c##12_im = a##01_re*b##02_im - a##01_im*b##02_re + a##11_re*b##12_im - a##11_im*b##12_re + a##21_re*b##22_im - a##21_im*b##22_re; \
641
c##20_re = a##02_re*b##00_re + a##02_im*b##00_im + a##12_re*b##10_re + a##12_im*b##10_im + a##22_re*b##20_re + a##22_im*b##20_im; \
642
c##20_im = a##02_re*b##00_im - a##02_im*b##00_re + a##12_re*b##10_im - a##12_im*b##10_re + a##22_re*b##20_im - a##22_im*b##20_re; \
643
c##21_re = a##02_re*b##01_re + a##02_im*b##01_im + a##12_re*b##11_re + a##12_im*b##11_im + a##22_re*b##21_re + a##22_im*b##21_im; \
644
c##21_im = a##02_re*b##01_im - a##02_im*b##01_re + a##12_re*b##11_im - a##12_im*b##11_re + a##22_re*b##21_im - a##22_im*b##21_re; \
645
c##22_re = a##02_re*b##02_re + a##02_im*b##02_im + a##12_re*b##12_re + a##12_im*b##12_im + a##22_re*b##22_re + a##22_im*b##22_im; \
646
c##22_im = a##02_re*b##02_im - a##02_im*b##02_re + a##12_re*b##12_im - a##12_im*b##12_re + a##22_re*b##22_im - a##22_im*b##22_re; \
647
} \
648
}while(0)
649
650
651
#define MAT_MUL_MAT(a, b, c) \
652
c##00_re = a##00_re*b##00_re - a##00_im*b##00_im + a##01_re*b##10_re - a##01_im*b##10_im + a##02_re*b##20_re - a##02_im*b##20_im; \
653
c##00_im = a##00_re*b##00_im + a##00_im*b##00_re + a##01_re*b##10_im + a##01_im*b##10_re + a##02_re*b##20_im + a##02_im*b##20_re; \
654
c##01_re = a##00_re*b##01_re - a##00_im*b##01_im + a##01_re*b##11_re - a##01_im*b##11_im + a##02_re*b##21_re - a##02_im*b##21_im; \
655
c##01_im = a##00_re*b##01_im + a##00_im*b##01_re + a##01_re*b##11_im + a##01_im*b##11_re + a##02_re*b##21_im + a##02_im*b##21_re; \
656
c##02_re = a##00_re*b##02_re - a##00_im*b##02_im + a##01_re*b##12_re - a##01_im*b##12_im + a##02_re*b##22_re - a##02_im*b##22_im; \
657
c##02_im = a##00_re*b##02_im + a##00_im*b##02_re + a##01_re*b##12_im + a##01_im*b##12_re + a##02_re*b##22_im + a##02_im*b##22_re; \
658
c##10_re = a##10_re*b##00_re - a##10_im*b##00_im + a##11_re*b##10_re - a##11_im*b##10_im + a##12_re*b##20_re - a##12_im*b##20_im; \
659
c##10_im = a##10_re*b##00_im + a##10_im*b##00_re + a##11_re*b##10_im + a##11_im*b##10_re + a##12_re*b##20_im + a##12_im*b##20_re; \
660
c##11_re = a##10_re*b##01_re - a##10_im*b##01_im + a##11_re*b##11_re - a##11_im*b##11_im + a##12_re*b##21_re - a##12_im*b##21_im; \
661
c##11_im = a##10_re*b##01_im + a##10_im*b##01_re + a##11_re*b##11_im + a##11_im*b##11_re + a##12_re*b##21_im + a##12_im*b##21_re; \
662
c##12_re = a##10_re*b##02_re - a##10_im*b##02_im + a##11_re*b##12_re - a##11_im*b##12_im + a##12_re*b##22_re - a##12_im*b##22_im; \
663
c##12_im = a##10_re*b##02_im + a##10_im*b##02_re + a##11_re*b##12_im + a##11_im*b##12_re + a##12_re*b##22_im + a##12_im*b##22_re; \
664
c##20_re = a##20_re*b##00_re - a##20_im*b##00_im + a##21_re*b##10_re - a##21_im*b##10_im + a##22_re*b##20_re - a##22_im*b##20_im; \
665
c##20_im = a##20_re*b##00_im + a##20_im*b##00_re + a##21_re*b##10_im + a##21_im*b##10_re + a##22_re*b##20_im + a##22_im*b##20_re; \
666
c##21_re = a##20_re*b##01_re - a##20_im*b##01_im + a##21_re*b##11_re - a##21_im*b##11_im + a##22_re*b##21_re - a##22_im*b##21_im; \
667
c##21_im = a##20_re*b##01_im + a##20_im*b##01_re + a##21_re*b##11_im + a##21_im*b##11_re + a##22_re*b##21_im + a##22_im*b##21_re; \
668
c##22_re = a##20_re*b##02_re - a##20_im*b##02_im + a##21_re*b##12_re - a##21_im*b##12_im + a##22_re*b##22_re - a##22_im*b##22_im; \
669
c##22_im = a##20_re*b##02_im + a##20_im*b##02_re + a##21_re*b##12_im + a##21_im*b##12_re + a##22_re*b##22_im + a##22_im*b##22_re;
670
671
#define MAT_MUL_ADJ_MAT(a, b, c) \
672
c##00_re = a##00_re*b##00_re + a##00_im*b##00_im + a##01_re*b##01_re + a##01_im*b##01_im + a##02_re*b##02_re + a##02_im*b##02_im; \
673
c##00_im = - a##00_re*b##00_im + a##00_im*b##00_re - a##01_re*b##01_im + a##01_im*b##01_re - a##02_re*b##02_im + a##02_im*b##02_re; \
674
c##01_re = a##00_re*b##10_re + a##00_im*b##10_im + a##01_re*b##11_re + a##01_im*b##11_im + a##02_re*b##12_re + a##02_im*b##12_im; \
675
c##01_im = - a##00_re*b##10_im + a##00_im*b##10_re - a##01_re*b##11_im + a##01_im*b##11_re - a##02_re*b##12_im + a##02_im*b##12_re; \
676
c##02_re = a##00_re*b##20_re + a##00_im*b##20_im + a##01_re*b##21_re + a##01_im*b##21_im + a##02_re*b##22_re + a##02_im*b##22_im; \
677
c##02_im = - a##00_re*b##20_im + a##00_im*b##20_re - a##01_re*b##21_im + a##01_im*b##21_re - a##02_re*b##22_im + a##02_im*b##22_re; \
678
c##10_re = a##10_re*b##00_re + a##10_im*b##00_im + a##11_re*b##01_re + a##11_im*b##01_im + a##12_re*b##02_re + a##12_im*b##02_im; \
679
c##10_im = - a##10_re*b##00_im + a##10_im*b##00_re - a##11_re*b##01_im + a##11_im*b##01_re - a##12_re*b##02_im + a##12_im*b##02_re; \
680
c##11_re = a##10_re*b##10_re + a##10_im*b##10_im + a##11_re*b##11_re + a##11_im*b##11_im + a##12_re*b##12_re + a##12_im*b##12_im; \
681
c##11_im = - a##10_re*b##10_im + a##10_im*b##10_re - a##11_re*b##11_im + a##11_im*b##11_re - a##12_re*b##12_im + a##12_im*b##12_re; \
682
c##12_re = a##10_re*b##20_re + a##10_im*b##20_im + a##11_re*b##21_re + a##11_im*b##21_im + a##12_re*b##22_re + a##12_im*b##22_im; \
683
c##12_im = - a##10_re*b##20_im + a##10_im*b##20_re - a##11_re*b##21_im + a##11_im*b##21_re - a##12_re*b##22_im + a##12_im*b##22_re; \
684
c##20_re = a##20_re*b##00_re + a##20_im*b##00_im + a##21_re*b##01_re + a##21_im*b##01_im + a##22_re*b##02_re + a##22_im*b##02_im; \
685
c##20_im = - a##20_re*b##00_im + a##20_im*b##00_re - a##21_re*b##01_im + a##21_im*b##01_re - a##22_re*b##02_im + a##22_im*b##02_re; \
686
c##21_re = a##20_re*b##10_re + a##20_im*b##10_im + a##21_re*b##11_re + a##21_im*b##11_im + a##22_re*b##12_re + a##22_im*b##12_im; \
687
c##21_im = - a##20_re*b##10_im + a##20_im*b##10_re - a##21_re*b##11_im + a##21_im*b##11_re - a##22_re*b##12_im + a##22_im*b##12_re; \
688
c##22_re = a##20_re*b##20_re + a##20_im*b##20_im + a##21_re*b##21_re + a##21_im*b##21_im + a##22_re*b##22_re + a##22_im*b##22_im; \
689
c##22_im = - a##20_re*b##20_im + a##20_im*b##20_re - a##21_re*b##21_im + a##21_im*b##21_re - a##22_re*b##22_im + a##22_im*b##22_re;
690
691
#define ADJ_MAT_MUL_MAT(a, b, c) \
692
c##00_re = a##00_re*b##00_re + a##00_im*b##00_im + a##10_re*b##10_re + a##10_im*b##10_im + a##20_re*b##20_re + a##20_im*b##20_im; \
693
c##00_im = a##00_re*b##00_im - a##00_im*b##00_re + a##10_re*b##10_im - a##10_im*b##10_re + a##20_re*b##20_im - a##20_im*b##20_re; \
694
c##01_re = a##00_re*b##01_re + a##00_im*b##01_im + a##10_re*b##11_re + a##10_im*b##11_im + a##20_re*b##21_re + a##20_im*b##21_im; \
695
c##01_im = a##00_re*b##01_im - a##00_im*b##01_re + a##10_re*b##11_im - a##10_im*b##11_re + a##20_re*b##21_im - a##20_im*b##21_re; \
696
c##02_re = a##00_re*b##02_re + a##00_im*b##02_im + a##10_re*b##12_re + a##10_im*b##12_im + a##20_re*b##22_re + a##20_im*b##22_im; \
697
c##02_im = a##00_re*b##02_im - a##00_im*b##02_re + a##10_re*b##12_im - a##10_im*b##12_re + a##20_re*b##22_im - a##20_im*b##22_re; \
698
c##10_re = a##01_re*b##00_re + a##01_im*b##00_im + a##11_re*b##10_re + a##11_im*b##10_im + a##21_re*b##20_re + a##21_im*b##20_im; \
699
c##10_im = a##01_re*b##00_im - a##01_im*b##00_re + a##11_re*b##10_im - a##11_im*b##10_re + a##21_re*b##20_im - a##21_im*b##20_re; \
700
c##11_re = a##01_re*b##01_re + a##01_im*b##01_im + a##11_re*b##11_re + a##11_im*b##11_im + a##21_re*b##21_re + a##21_im*b##21_im; \
701
c##11_im = a##01_re*b##01_im - a##01_im*b##01_re + a##11_re*b##11_im - a##11_im*b##11_re + a##21_re*b##21_im - a##21_im*b##21_re; \
702
c##12_re = a##01_re*b##02_re + a##01_im*b##02_im + a##11_re*b##12_re + a##11_im*b##12_im + a##21_re*b##22_re + a##21_im*b##22_im; \
703
c##12_im = a##01_re*b##02_im - a##01_im*b##02_re + a##11_re*b##12_im - a##11_im*b##12_re + a##21_re*b##22_im - a##21_im*b##22_re; \
704
c##20_re = a##02_re*b##00_re + a##02_im*b##00_im + a##12_re*b##10_re + a##12_im*b##10_im + a##22_re*b##20_re + a##22_im*b##20_im; \
705
c##20_im = a##02_re*b##00_im - a##02_im*b##00_re + a##12_re*b##10_im - a##12_im*b##10_re + a##22_re*b##20_im - a##22_im*b##20_re; \
706
c##21_re = a##02_re*b##01_re + a##02_im*b##01_im + a##12_re*b##11_re + a##12_im*b##11_im + a##22_re*b##21_re + a##22_im*b##21_im; \
707
c##21_im = a##02_re*b##01_im - a##02_im*b##01_re + a##12_re*b##11_im - a##12_im*b##11_re + a##22_re*b##21_im - a##22_im*b##21_re; \
708
c##22_re = a##02_re*b##02_re + a##02_im*b##02_im + a##12_re*b##12_re + a##12_im*b##12_im + a##22_re*b##22_re + a##22_im*b##22_im; \
709
c##22_im = a##02_re*b##02_im - a##02_im*b##02_re + a##12_re*b##12_im - a##12_im*b##12_re + a##22_re*b##22_im - a##22_im*b##22_re;
710
711
#define ADJ_MAT_MUL_ADJ_MAT(a, b, c) \
712
c##00_re = a##00_re*b##00_re - a##00_im*b##00_im + a##10_re*b##01_re - a##10_im*b##01_im + a##20_re*b##02_re - a##20_im*b##02_im; \
713
c##00_im = - a##00_re*b##00_im - a##00_im*b##00_re - a##10_re*b##01_im - a##10_im*b##01_re - a##20_re*b##02_im - a##20_im*b##02_re; \
714
c##01_re = a##00_re*b##10_re - a##00_im*b##10_im + a##10_re*b##11_re - a##10_im*b##11_im + a##20_re*b##12_re - a##20_im*b##12_im; \
715
c##01_im = - a##00_re*b##10_im - a##00_im*b##10_re - a##10_re*b##11_im - a##10_im*b##11_re - a##20_re*b##12_im - a##20_im*b##12_re; \
716
c##02_re = a##00_re*b##20_re - a##00_im*b##20_im + a##10_re*b##21_re - a##10_im*b##21_im + a##20_re*b##22_re - a##20_im*b##22_im; \
717
c##02_im = - a##00_re*b##20_im - a##00_im*b##20_re - a##10_re*b##21_im - a##10_im*b##21_re - a##20_re*b##22_im - a##20_im*b##22_re; \
718
c##10_re = a##01_re*b##00_re - a##01_im*b##00_im + a##11_re*b##01_re - a##11_im*b##01_im + a##21_re*b##02_re - a##21_im*b##02_im; \
719
c##10_im = - a##01_re*b##00_im - a##01_im*b##00_re - a##11_re*b##01_im - a##11_im*b##01_re - a##21_re*b##02_im - a##21_im*b##02_re; \
720
c##11_re = a##01_re*b##10_re - a##01_im*b##10_im + a##11_re*b##11_re - a##11_im*b##11_im + a##21_re*b##12_re - a##21_im*b##12_im; \
721
c##11_im = - a##01_re*b##10_im - a##01_im*b##10_re - a##11_re*b##11_im - a##11_im*b##11_re - a##21_re*b##12_im - a##21_im*b##12_re; \
722
c##12_re = a##01_re*b##20_re - a##01_im*b##20_im + a##11_re*b##21_re - a##11_im*b##21_im + a##21_re*b##22_re - a##21_im*b##22_im; \
723
c##12_im = - a##01_re*b##20_im - a##01_im*b##20_re - a##11_re*b##21_im - a##11_im*b##21_re - a##21_re*b##22_im - a##21_im*b##22_re; \
724
c##20_re = a##02_re*b##00_re - a##02_im*b##00_im + a##12_re*b##01_re - a##12_im*b##01_im + a##22_re*b##02_re - a##22_im*b##02_im; \
725
c##20_im = - a##02_re*b##00_im - a##02_im*b##00_re - a##12_re*b##01_im - a##12_im*b##01_re - a##22_re*b##02_im - a##22_im*b##02_re; \
726
c##21_re = a##02_re*b##10_re - a##02_im*b##10_im + a##12_re*b##11_re - a##12_im*b##11_im + a##22_re*b##12_re - a##22_im*b##12_im; \
727
c##21_im = - a##02_re*b##10_im - a##02_im*b##10_re - a##12_re*b##11_im - a##12_im*b##11_re - a##22_re*b##12_im - a##22_im*b##12_re; \
728
c##22_re = a##02_re*b##20_re - a##02_im*b##20_im + a##12_re*b##21_re - a##12_im*b##21_im + a##22_re*b##22_re - a##22_im*b##22_im; \
729
c##22_im = - a##02_re*b##20_im - a##02_im*b##20_re - a##12_re*b##21_im - a##12_im*b##21_re - a##22_re*b##22_im - a##22_im*b##22_re;
730
731
// end of macros specific to hisq routines
732
733
734
#define SCALAR_MULT_ADD_MATRIX(a, b, scalar, c) do{ \
735
c##00_re = a##00_re + scalar*b##00_re; \
736
c##00_im = a##00_im + scalar*b##00_im; \
737
c##01_re = a##01_re + scalar*b##01_re; \
738
c##01_im = a##01_im + scalar*b##01_im; \
739
c##02_re = a##02_re + scalar*b##02_re; \
740
c##02_im = a##02_im + scalar*b##02_im; \
741
c##10_re = a##10_re + scalar*b##10_re; \
742
c##10_im = a##10_im + scalar*b##10_im; \
743
c##11_re = a##11_re + scalar*b##11_re; \
744
c##11_im = a##11_im + scalar*b##11_im; \
745
c##12_re = a##12_re + scalar*b##12_re; \
746
c##12_im = a##12_im + scalar*b##12_im; \
747
c##20_re = a##20_re + scalar*b##20_re; \
748
c##20_im = a##20_im + scalar*b##20_im; \
749
c##21_re = a##21_re + scalar*b##21_re; \
750
c##21_im = a##21_im + scalar*b##21_im; \
751
c##22_re = a##22_re + scalar*b##22_re; \
752
c##22_im = a##22_im + scalar*b##22_im; \
753
}while(0)
754
755
#endif // GENERIC_MATRIX_MACROS
756
757
#endif // _HISQ_FORCE_MACROS_H_
Generated on Wed Feb 4 2015 17:00:11 for QUDA by
1.8.6