1 #ifndef _TMC_GAMMA_CORE_H
2 #define _TMC_GAMMA_CORE_H
30 #ifdef USE_TEXTURE_OBJECTS
31 #define SPINORTEX param.inTex
33 #define SPINORTEX spinorTexDouble
39 #define cd00_00_re C0.x
40 #define cd01_01_re C0.y
41 #define cd02_02_re C1.x
42 #define cd10_10_re C1.y
43 #define cd11_11_re C2.x
44 #define cd12_12_re C2.y
45 #define cd01_00_re C3.x
46 #define cd01_00_im C3.y
47 #define cd02_00_re C4.x
48 #define cd02_00_im C4.y
49 #define cd10_00_re C5.x
50 #define cd10_00_im C5.y
51 #define cd11_00_re C6.x
52 #define cd11_00_im C6.y
53 #define cd12_00_re C7.x
54 #define cd12_00_im C7.y
55 #define cd02_01_re C8.x
56 #define cd02_01_im C8.y
57 #define cd10_01_re C9.x
58 #define cd10_01_im C9.y
59 #define cd11_01_re C10.x
60 #define cd11_01_im C10.y
61 #define cd12_01_re C11.x
62 #define cd12_01_im C11.y
63 #define cd10_02_re C12.x
64 #define cd10_02_im C12.y
65 #define cd11_02_re C13.x
66 #define cd11_02_im C13.y
67 #define cd12_02_re C14.x
68 #define cd12_02_im C14.y
69 #define cd11_10_re C15.x
70 #define cd11_10_im C15.y
71 #define cd12_10_re C16.x
72 #define cd12_10_im C16.y
73 #define cd12_11_re C17.x
74 #define cd12_11_im C17.y
76 #define cd00_01_re (+cd01_00_re)
77 #define cd00_01_im (-cd01_00_im)
78 #define cd00_02_re (+cd02_00_re)
79 #define cd00_02_im (-cd02_00_im)
80 #define cd01_02_re (+cd02_01_re)
81 #define cd01_02_im (-cd02_01_im)
82 #define cd00_10_re (+cd10_00_re)
83 #define cd00_10_im (-cd10_00_im)
84 #define cd01_10_re (+cd10_01_re)
85 #define cd01_10_im (-cd10_01_im)
86 #define cd02_10_re (+cd10_02_re)
87 #define cd02_10_im (-cd10_02_im)
88 #define cd00_11_re (+cd11_00_re)
89 #define cd00_11_im (-cd11_00_im)
90 #define cd01_11_re (+cd11_01_re)
91 #define cd01_11_im (-cd11_01_im)
92 #define cd02_11_re (+cd11_02_re)
93 #define cd02_11_im (-cd11_02_im)
94 #define cd10_11_re (+cd11_10_re)
95 #define cd10_11_im (-cd11_10_im)
96 #define cd00_12_re (+cd12_00_re)
97 #define cd00_12_im (-cd12_00_im)
98 #define cd01_12_re (+cd12_01_re)
99 #define cd01_12_im (-cd12_01_im)
100 #define cd02_12_re (+cd12_02_re)
101 #define cd02_12_im (-cd12_02_im)
102 #define cd10_12_re (+cd12_10_re)
103 #define cd10_12_im (-cd12_10_im)
104 #define cd11_12_re (+cd12_11_re)
105 #define cd11_12_im (-cd12_11_im)
108 #define cd20_20_re cd00_00_re
109 #define cd21_20_re cd01_00_re
110 #define cd21_20_im cd01_00_im
111 #define cd22_20_re cd02_00_re
112 #define cd22_20_im cd02_00_im
113 #define cd30_20_re cd10_00_re
114 #define cd30_20_im cd10_00_im
115 #define cd31_20_re cd11_00_re
116 #define cd31_20_im cd11_00_im
117 #define cd32_20_re cd12_00_re
118 #define cd32_20_im cd12_00_im
119 #define cd20_21_re cd00_01_re
120 #define cd20_21_im cd00_01_im
121 #define cd21_21_re cd01_01_re
122 #define cd22_21_re cd02_01_re
123 #define cd22_21_im cd02_01_im
124 #define cd30_21_re cd10_01_re
125 #define cd30_21_im cd10_01_im
126 #define cd31_21_re cd11_01_re
127 #define cd31_21_im cd11_01_im
128 #define cd32_21_re cd12_01_re
129 #define cd32_21_im cd12_01_im
130 #define cd20_22_re cd00_02_re
131 #define cd20_22_im cd00_02_im
132 #define cd21_22_re cd01_02_re
133 #define cd21_22_im cd01_02_im
134 #define cd22_22_re cd02_02_re
135 #define cd30_22_re cd10_02_re
136 #define cd30_22_im cd10_02_im
137 #define cd31_22_re cd11_02_re
138 #define cd31_22_im cd11_02_im
139 #define cd32_22_re cd12_02_re
140 #define cd32_22_im cd12_02_im
141 #define cd20_30_re cd00_10_re
142 #define cd20_30_im cd00_10_im
143 #define cd21_30_re cd01_10_re
144 #define cd21_30_im cd01_10_im
145 #define cd22_30_re cd02_10_re
146 #define cd22_30_im cd02_10_im
147 #define cd30_30_re cd10_10_re
148 #define cd31_30_re cd11_10_re
149 #define cd31_30_im cd11_10_im
150 #define cd32_30_re cd12_10_re
151 #define cd32_30_im cd12_10_im
152 #define cd20_31_re cd00_11_re
153 #define cd20_31_im cd00_11_im
154 #define cd21_31_re cd01_11_re
155 #define cd21_31_im cd01_11_im
156 #define cd22_31_re cd02_11_re
157 #define cd22_31_im cd02_11_im
158 #define cd30_31_re cd10_11_re
159 #define cd30_31_im cd10_11_im
160 #define cd31_31_re cd11_11_re
161 #define cd32_31_re cd12_11_re
162 #define cd32_31_im cd12_11_im
163 #define cd20_32_re cd00_12_re
164 #define cd20_32_im cd00_12_im
165 #define cd21_32_re cd01_12_re
166 #define cd21_32_im cd01_12_im
167 #define cd22_32_re cd02_12_re
168 #define cd22_32_im cd02_12_im
169 #define cd30_32_re cd10_12_re
170 #define cd30_32_im cd10_12_im
171 #define cd31_32_re cd11_12_re
172 #define cd31_32_im cd11_12_im
173 #define cd32_32_re cd12_12_re
176 #define c00_00_re C0.x
177 #define c01_01_re C0.y
178 #define c02_02_re C0.z
179 #define c10_10_re C0.w
180 #define c11_11_re C1.x
181 #define c12_12_re C1.y
182 #define c01_00_re C1.z
183 #define c01_00_im C1.w
184 #define c02_00_re C2.x
185 #define c02_00_im C2.y
186 #define c10_00_re C2.z
187 #define c10_00_im C2.w
188 #define c11_00_re C3.x
189 #define c11_00_im C3.y
190 #define c12_00_re C3.z
191 #define c12_00_im C3.w
192 #define c02_01_re C4.x
193 #define c02_01_im C4.y
194 #define c10_01_re C4.z
195 #define c10_01_im C4.w
196 #define c11_01_re C5.x
197 #define c11_01_im C5.y
198 #define c12_01_re C5.z
199 #define c12_01_im C5.w
200 #define c10_02_re C6.x
201 #define c10_02_im C6.y
202 #define c11_02_re C6.z
203 #define c11_02_im C6.w
204 #define c12_02_re C7.x
205 #define c12_02_im C7.y
206 #define c11_10_re C7.z
207 #define c11_10_im C7.w
208 #define c12_10_re C8.x
209 #define c12_10_im C8.y
210 #define c12_11_re C8.z
211 #define c12_11_im C8.w
213 #define c00_01_re (+c01_00_re)
214 #define c00_01_im (-c01_00_im)
215 #define c00_02_re (+c02_00_re)
216 #define c00_02_im (-c02_00_im)
217 #define c01_02_re (+c02_01_re)
218 #define c01_02_im (-c02_01_im)
219 #define c00_10_re (+c10_00_re)
220 #define c00_10_im (-c10_00_im)
221 #define c01_10_re (+c10_01_re)
222 #define c01_10_im (-c10_01_im)
223 #define c02_10_re (+c10_02_re)
224 #define c02_10_im (-c10_02_im)
225 #define c00_11_re (+c11_00_re)
226 #define c00_11_im (-c11_00_im)
227 #define c01_11_re (+c11_01_re)
228 #define c01_11_im (-c11_01_im)
229 #define c02_11_re (+c11_02_re)
230 #define c02_11_im (-c11_02_im)
231 #define c10_11_re (+c11_10_re)
232 #define c10_11_im (-c11_10_im)
233 #define c00_12_re (+c12_00_re)
234 #define c00_12_im (-c12_00_im)
235 #define c01_12_re (+c12_01_re)
236 #define c01_12_im (-c12_01_im)
237 #define c02_12_re (+c12_02_re)
238 #define c02_12_im (-c12_02_im)
239 #define c10_12_re (+c12_10_re)
240 #define c10_12_im (-c12_10_im)
241 #define c11_12_re (+c12_11_re)
242 #define c11_12_im (-c12_11_im)
245 #define c20_20_re c00_00_re
246 #define c21_20_re c01_00_re
247 #define c21_20_im c01_00_im
248 #define c22_20_re c02_00_re
249 #define c22_20_im c02_00_im
250 #define c30_20_re c10_00_re
251 #define c30_20_im c10_00_im
252 #define c31_20_re c11_00_re
253 #define c31_20_im c11_00_im
254 #define c32_20_re c12_00_re
255 #define c32_20_im c12_00_im
256 #define c20_21_re c00_01_re
257 #define c20_21_im c00_01_im
258 #define c21_21_re c01_01_re
259 #define c22_21_re c02_01_re
260 #define c22_21_im c02_01_im
261 #define c30_21_re c10_01_re
262 #define c30_21_im c10_01_im
263 #define c31_21_re c11_01_re
264 #define c31_21_im c11_01_im
265 #define c32_21_re c12_01_re
266 #define c32_21_im c12_01_im
267 #define c20_22_re c00_02_re
268 #define c20_22_im c00_02_im
269 #define c21_22_re c01_02_re
270 #define c21_22_im c01_02_im
271 #define c22_22_re c02_02_re
272 #define c30_22_re c10_02_re
273 #define c30_22_im c10_02_im
274 #define c31_22_re c11_02_re
275 #define c31_22_im c11_02_im
276 #define c32_22_re c12_02_re
277 #define c32_22_im c12_02_im
278 #define c20_30_re c00_10_re
279 #define c20_30_im c00_10_im
280 #define c21_30_re c01_10_re
281 #define c21_30_im c01_10_im
282 #define c22_30_re c02_10_re
283 #define c22_30_im c02_10_im
284 #define c30_30_re c10_10_re
285 #define c31_30_re c11_10_re
286 #define c31_30_im c11_10_im
287 #define c32_30_re c12_10_re
288 #define c32_30_im c12_10_im
289 #define c20_31_re c00_11_re
290 #define c20_31_im c00_11_im
291 #define c21_31_re c01_11_re
292 #define c21_31_im c01_11_im
293 #define c22_31_re c02_11_re
294 #define c22_31_im c02_11_im
295 #define c30_31_re c10_11_re
296 #define c30_31_im c10_11_im
297 #define c31_31_re c11_11_re
298 #define c32_31_re c12_11_re
299 #define c32_31_im c12_11_im
300 #define c20_32_re c00_12_re
301 #define c20_32_im c00_12_im
302 #define c21_32_re c01_12_re
303 #define c21_32_im c01_12_im
304 #define c22_32_re c02_12_re
305 #define c22_32_im c02_12_im
306 #define c30_32_re c10_12_re
307 #define c30_32_im c10_12_im
308 #define c31_32_re c11_12_re
309 #define c31_32_im c11_12_im
310 #define c32_32_re c12_12_re
314 #define cdinv00_00_re C0.x
315 #define cdinv01_01_re C0.y
316 #define cdinv02_02_re C1.x
317 #define cdinv10_10_re C1.y
318 #define cdinv11_11_re C2.x
319 #define cdinv12_12_re C2.y
320 #define cdinv01_00_re C3.x
321 #define cdinv01_00_im C3.y
322 #define cdinv02_00_re C4.x
323 #define cdinv02_00_im C4.y
324 #define cdinv10_00_re C5.x
325 #define cdinv10_00_im C5.y
326 #define cdinv11_00_re C6.x
327 #define cdinv11_00_im C6.y
328 #define cdinv12_00_re C7.x
329 #define cdinv12_00_im C7.y
330 #define cdinv02_01_re C8.x
331 #define cdinv02_01_im C8.y
332 #define cdinv10_01_re C9.x
333 #define cdinv10_01_im C9.y
334 #define cdinv11_01_re C10.x
335 #define cdinv11_01_im C10.y
336 #define cdinv12_01_re C11.x
337 #define cdinv12_01_im C11.y
338 #define cdinv10_02_re C12.x
339 #define cdinv10_02_im C12.y
340 #define cdinv11_02_re C13.x
341 #define cdinv11_02_im C13.y
342 #define cdinv12_02_re C14.x
343 #define cdinv12_02_im C14.y
344 #define cdinv11_10_re C15.x
345 #define cdinv11_10_im C15.y
346 #define cdinv12_10_re C16.x
347 #define cdinv12_10_im C16.y
348 #define cdinv12_11_re C17.x
349 #define cdinv12_11_im C17.y
351 #define cdinv00_01_re (+cdinv01_00_re)
352 #define cdinv00_01_im (-cdinv01_00_im)
353 #define cdinv00_02_re (+cdinv02_00_re)
354 #define cdinv00_02_im (-cdinv02_00_im)
355 #define cdinv01_02_re (+cdinv02_01_re)
356 #define cdinv01_02_im (-cdinv02_01_im)
357 #define cdinv00_10_re (+cdinv10_00_re)
358 #define cdinv00_10_im (-cdinv10_00_im)
359 #define cdinv01_10_re (+cdinv10_01_re)
360 #define cdinv01_10_im (-cdinv10_01_im)
361 #define cdinv02_10_re (+cdinv10_02_re)
362 #define cdinv02_10_im (-cdinv10_02_im)
363 #define cdinv00_11_re (+cdinv11_00_re)
364 #define cdinv00_11_im (-cdinv11_00_im)
365 #define cdinv01_11_re (+cdinv11_01_re)
366 #define cdinv01_11_im (-cdinv11_01_im)
367 #define cdinv02_11_re (+cdinv11_02_re)
368 #define cdinv02_11_im (-cdinv11_02_im)
369 #define cdinv10_11_re (+cdinv11_10_re)
370 #define cdinv10_11_im (-cdinv11_10_im)
371 #define cdinv00_12_re (+cdinv12_00_re)
372 #define cdinv00_12_im (-cdinv12_00_im)
373 #define cdinv01_12_re (+cdinv12_01_re)
374 #define cdinv01_12_im (-cdinv12_01_im)
375 #define cdinv02_12_re (+cdinv12_02_re)
376 #define cdinv02_12_im (-cdinv12_02_im)
377 #define cdinv10_12_re (+cdinv12_10_re)
378 #define cdinv10_12_im (-cdinv12_10_im)
379 #define cdinv11_12_re (+cdinv12_11_re)
380 #define cdinv11_12_im (-cdinv12_11_im)
383 #define cdinv20_20_re cdinv00_00_re
384 #define cdinv21_20_re cdinv01_00_re
385 #define cdinv21_20_im cdinv01_00_im
386 #define cdinv22_20_re cdinv02_00_re
387 #define cdinv22_20_im cdinv02_00_im
388 #define cdinv30_20_re cdinv10_00_re
389 #define cdinv30_20_im cdinv10_00_im
390 #define cdinv31_20_re cdinv11_00_re
391 #define cdinv31_20_im cdinv11_00_im
392 #define cdinv32_20_re cdinv12_00_re
393 #define cdinv32_20_im cdinv12_00_im
394 #define cdinv20_21_re cdinv00_01_re
395 #define cdinv20_21_im cdinv00_01_im
396 #define cdinv21_21_re cdinv01_01_re
397 #define cdinv22_21_re cdinv02_01_re
398 #define cdinv22_21_im cdinv02_01_im
399 #define cdinv30_21_re cdinv10_01_re
400 #define cdinv30_21_im cdinv10_01_im
401 #define cdinv31_21_re cdinv11_01_re
402 #define cdinv31_21_im cdinv11_01_im
403 #define cdinv32_21_re cdinv12_01_re
404 #define cdinv32_21_im cdinv12_01_im
405 #define cdinv20_22_re cdinv00_02_re
406 #define cdinv20_22_im cdinv00_02_im
407 #define cdinv21_22_re cdinv01_02_re
408 #define cdinv21_22_im cdinv01_02_im
409 #define cdinv22_22_re cdinv02_02_re
410 #define cdinv30_22_re cdinv10_02_re
411 #define cdinv30_22_im cdinv10_02_im
412 #define cdinv31_22_re cdinv11_02_re
413 #define cdinv31_22_im cdinv11_02_im
414 #define cdinv32_22_re cdinv12_02_re
415 #define cdinv32_22_im cdinv12_02_im
416 #define cdinv20_30_re cdinv00_10_re
417 #define cdinv20_30_im cdinv00_10_im
418 #define cdinv21_30_re cdinv01_10_re
419 #define cdinv21_30_im cdinv01_10_im
420 #define cdinv22_30_re cdinv02_10_re
421 #define cdinv22_30_im cdinv02_10_im
422 #define cdinv30_30_re cdinv10_10_re
423 #define cdinv31_30_re cdinv11_10_re
424 #define cdinv31_30_im cdinv11_10_im
425 #define cdinv32_30_re cdinv12_10_re
426 #define cdinv32_30_im cdinv12_10_im
427 #define cdinv20_31_re cdinv00_11_re
428 #define cdinv20_31_im cdinv00_11_im
429 #define cdinv21_31_re cdinv01_11_re
430 #define cdinv21_31_im cdinv01_11_im
431 #define cdinv22_31_re cdinv02_11_re
432 #define cdinv22_31_im cdinv02_11_im
433 #define cdinv30_31_re cdinv10_11_re
434 #define cdinv30_31_im cdinv10_11_im
435 #define cdinv31_31_re cdinv11_11_re
436 #define cdinv32_31_re cdinv12_11_re
437 #define cdinv32_31_im cdinv12_11_im
438 #define cdinv20_32_re cdinv00_12_re
439 #define cdinv20_32_im cdinv00_12_im
440 #define cdinv21_32_re cdinv01_12_re
441 #define cdinv21_32_im cdinv01_12_im
442 #define cdinv22_32_re cdinv02_12_re
443 #define cdinv22_32_im cdinv02_12_im
444 #define cdinv30_32_re cdinv10_12_re
445 #define cdinv30_32_im cdinv10_12_im
446 #define cdinv31_32_re cdinv11_12_re
447 #define cdinv31_32_im cdinv11_12_im
448 #define cdinv32_32_re cdinv12_12_re
451 #define cinv00_00_re C0.x
452 #define cinv01_01_re C0.y
453 #define cinv02_02_re C0.z
454 #define cinv10_10_re C0.w
455 #define cinv11_11_re C1.x
456 #define cinv12_12_re C1.y
457 #define cinv01_00_re C1.z
458 #define cinv01_00_im C1.w
459 #define cinv02_00_re C2.x
460 #define cinv02_00_im C2.y
461 #define cinv10_00_re C2.z
462 #define cinv10_00_im C2.w
463 #define cinv11_00_re C3.x
464 #define cinv11_00_im C3.y
465 #define cinv12_00_re C3.z
466 #define cinv12_00_im C3.w
467 #define cinv02_01_re C4.x
468 #define cinv02_01_im C4.y
469 #define cinv10_01_re C4.z
470 #define cinv10_01_im C4.w
471 #define cinv11_01_re C5.x
472 #define cinv11_01_im C5.y
473 #define cinv12_01_re C5.z
474 #define cinv12_01_im C5.w
475 #define cinv10_02_re C6.x
476 #define cinv10_02_im C6.y
477 #define cinv11_02_re C6.z
478 #define cinv11_02_im C6.w
479 #define cinv12_02_re C7.x
480 #define cinv12_02_im C7.y
481 #define cinv11_10_re C7.z
482 #define cinv11_10_im C7.w
483 #define cinv12_10_re C8.x
484 #define cinv12_10_im C8.y
485 #define cinv12_11_re C8.z
486 #define cinv12_11_im C8.w
488 #define cinv00_01_re (+cinv01_00_re)
489 #define cinv00_01_im (-cinv01_00_im)
490 #define cinv00_02_re (+cinv02_00_re)
491 #define cinv00_02_im (-cinv02_00_im)
492 #define cinv01_02_re (+cinv02_01_re)
493 #define cinv01_02_im (-cinv02_01_im)
494 #define cinv00_10_re (+cinv10_00_re)
495 #define cinv00_10_im (-cinv10_00_im)
496 #define cinv01_10_re (+cinv10_01_re)
497 #define cinv01_10_im (-cinv10_01_im)
498 #define cinv02_10_re (+cinv10_02_re)
499 #define cinv02_10_im (-cinv10_02_im)
500 #define cinv00_11_re (+cinv11_00_re)
501 #define cinv00_11_im (-cinv11_00_im)
502 #define cinv01_11_re (+cinv11_01_re)
503 #define cinv01_11_im (-cinv11_01_im)
504 #define cinv02_11_re (+cinv11_02_re)
505 #define cinv02_11_im (-cinv11_02_im)
506 #define cinv10_11_re (+cinv11_10_re)
507 #define cinv10_11_im (-cinv11_10_im)
508 #define cinv00_12_re (+cinv12_00_re)
509 #define cinv00_12_im (-cinv12_00_im)
510 #define cinv01_12_re (+cinv12_01_re)
511 #define cinv01_12_im (-cinv12_01_im)
512 #define cinv02_12_re (+cinv12_02_re)
513 #define cinv02_12_im (-cinv12_02_im)
514 #define cinv10_12_re (+cinv12_10_re)
515 #define cinv10_12_im (-cinv12_10_im)
516 #define cinv11_12_re (+cinv12_11_re)
517 #define cinv11_12_im (-cinv12_11_im)
520 #define cinv20_20_re cinv00_00_re
521 #define cinv21_20_re cinv01_00_re
522 #define cinv21_20_im cinv01_00_im
523 #define cinv22_20_re cinv02_00_re
524 #define cinv22_20_im cinv02_00_im
525 #define cinv30_20_re cinv10_00_re
526 #define cinv30_20_im cinv10_00_im
527 #define cinv31_20_re cinv11_00_re
528 #define cinv31_20_im cinv11_00_im
529 #define cinv32_20_re cinv12_00_re
530 #define cinv32_20_im cinv12_00_im
531 #define cinv20_21_re cinv00_01_re
532 #define cinv20_21_im cinv00_01_im
533 #define cinv21_21_re cinv01_01_re
534 #define cinv22_21_re cinv02_01_re
535 #define cinv22_21_im cinv02_01_im
536 #define cinv30_21_re cinv10_01_re
537 #define cinv30_21_im cinv10_01_im
538 #define cinv31_21_re cinv11_01_re
539 #define cinv31_21_im cinv11_01_im
540 #define cinv32_21_re cinv12_01_re
541 #define cinv32_21_im cinv12_01_im
542 #define cinv20_22_re cinv00_02_re
543 #define cinv20_22_im cinv00_02_im
544 #define cinv21_22_re cinv01_02_re
545 #define cinv21_22_im cinv01_02_im
546 #define cinv22_22_re cinv02_02_re
547 #define cinv30_22_re cinv10_02_re
548 #define cinv30_22_im cinv10_02_im
549 #define cinv31_22_re cinv11_02_re
550 #define cinv31_22_im cinv11_02_im
551 #define cinv32_22_re cinv12_02_re
552 #define cinv32_22_im cinv12_02_im
553 #define cinv20_30_re cinv00_10_re
554 #define cinv20_30_im cinv00_10_im
555 #define cinv21_30_re cinv01_10_re
556 #define cinv21_30_im cinv01_10_im
557 #define cinv22_30_re cinv02_10_re
558 #define cinv22_30_im cinv02_10_im
559 #define cinv30_30_re cinv10_10_re
560 #define cinv31_30_re cinv11_10_re
561 #define cinv31_30_im cinv11_10_im
562 #define cinv32_30_re cinv12_10_re
563 #define cinv32_30_im cinv12_10_im
564 #define cinv20_31_re cinv00_11_re
565 #define cinv20_31_im cinv00_11_im
566 #define cinv21_31_re cinv01_11_re
567 #define cinv21_31_im cinv01_11_im
568 #define cinv22_31_re cinv02_11_re
569 #define cinv22_31_im cinv02_11_im
570 #define cinv30_31_re cinv10_11_re
571 #define cinv30_31_im cinv10_11_im
572 #define cinv31_31_re cinv11_11_re
573 #define cinv32_31_re cinv12_11_re
574 #define cinv32_31_im cinv12_11_im
575 #define cinv20_32_re cinv00_12_re
576 #define cinv20_32_im cinv00_12_im
577 #define cinv21_32_re cinv01_12_re
578 #define cinv21_32_im cinv01_12_im
579 #define cinv22_32_re cinv02_12_re
580 #define cinv22_32_im cinv02_12_im
581 #define cinv30_32_re cinv10_12_re
582 #define cinv30_32_im cinv10_12_im
583 #define cinv31_32_re cinv11_12_re
584 #define cinv31_32_im cinv11_12_im
585 #define cinv32_32_re cinv12_12_re
587 #if (__COMPUTE_CAPABILITY__ >= 130)
613 #define spinorFloat double
615 #if (defined DIRECT_ACCESS_CLOVER) || (defined FERMI_NO_DBLE_TEX)
616 #define TMCLOVERTEX clover
617 #define TM_INV_CLOVERTEX cloverInv
618 #define READ_CLOVER READ_CLOVER_DOUBLE_STR
619 #define ASSN_CLOVER ASSN_CLOVER_DOUBLE_STR
621 #ifdef USE_TEXTURE_OBJECTS
622 #define TMCLOVERTEX (param.cloverTex)
623 #define TM_INV_CLOVERTEX (param.cloverInvTex)
625 #define TMCLOVERTEX cloverTexDouble
626 #define TM_INV_CLOVERTEX cloverInvTexDouble
628 #define READ_CLOVER READ_CLOVER_DOUBLE_TEX
629 #define ASSN_CLOVER ASSN_CLOVER_DOUBLE_TEX
632 #define CLOVER_DOUBLE
635 const double2 *clover,
const float *cNorm,
const double2 *cloverInv,
const float *cNrm2)
637 #ifdef GPU_TWISTED_CLOVER_DIRAC
639 int sid = blockIdx.x*blockDim.x + threadIdx.x;
640 if (sid >= param.
threads)
return;
642 #ifndef FERMI_NO_DBLE_TEX
656 double2 I0 = in[sid + 0 * param.
sp_stride];
657 double2 I1 = in[sid + 1 * param.
sp_stride];
658 double2 I2 = in[sid + 2 * param.
sp_stride];
659 double2 I3 = in[sid + 3 * param.
sp_stride];
660 double2 I4 = in[sid + 4 * param.
sp_stride];
661 double2 I5 = in[sid + 5 * param.
sp_stride];
662 double2 I6 = in[sid + 6 * param.
sp_stride];
663 double2 I7 = in[sid + 7 * param.
sp_stride];
664 double2 I8 = in[sid + 8 * param.
sp_stride];
665 double2 I9 = in[sid + 9 * param.
sp_stride];
666 double2 I10 = in[sid + 10 * param.
sp_stride];
667 double2 I11 = in[sid + 11 * param.
sp_stride];
670 double2
C0,
C1,
C2,
C3,
C4,
C5,
C6,
C7,
C8, C9, C10, C11, C12, C13, C14, C15, C16, C17;
685 spinor[sid + 10 * param.
sp_stride] = I10;
686 spinor[sid + 11 * param.
sp_stride] = I11;
692 const double2 *clover,
const float *cNorm,
const double2 *cloverInv,
const float *cNrm2)
694 #ifdef GPU_TWISTED_CLOVER_DIRAC
696 int sid = blockIdx.x*blockDim.x + threadIdx.x;
697 if (sid >= param.
threads)
return;
699 #ifndef FERMI_NO_DBLE_TEX
713 double2 I0 = in[sid + 0 * param.
sp_stride];
714 double2 I1 = in[sid + 1 * param.
sp_stride];
715 double2 I2 = in[sid + 2 * param.
sp_stride];
716 double2 I3 = in[sid + 3 * param.
sp_stride];
717 double2 I4 = in[sid + 4 * param.
sp_stride];
718 double2 I5 = in[sid + 5 * param.
sp_stride];
719 double2 I6 = in[sid + 6 * param.
sp_stride];
720 double2 I7 = in[sid + 7 * param.
sp_stride];
721 double2 I8 = in[sid + 8 * param.
sp_stride];
722 double2 I9 = in[sid + 9 * param.
sp_stride];
723 double2 I10 = in[sid + 10 * param.
sp_stride];
724 double2 I11 = in[sid + 11 * param.
sp_stride];
727 double2
C0,
C1,
C2,
C3,
C4,
C5,
C6,
C7,
C8, C9, C10, C11, C12, C13, C14, C15, C16, C17;
742 spinor[sid + 10 * param.
sp_stride] = I10;
743 spinor[sid + 11 * param.
sp_stride] = I11;
749 #undef TM_INV_CLOVERTEX
780 #endif // (__COMPUTE_CAPABILITY__ >= 130)
783 #ifdef USE_TEXTURE_OBJECTS
784 #define SPINORTEX param.inTex
786 #define SPINORTEX spinorTexSingle
814 #define spinorFloat float
816 #ifdef DIRECT_ACCESS_CLOVER
817 #define TMCLOVERTEX clover
818 #define TM_INV_CLOVERTEX cloverInv
819 #define READ_CLOVER READ_CLOVER_SINGLE
820 #define ASSN_CLOVER ASSN_CLOVER_SINGLE
822 #ifdef USE_TEXTURE_OBJECTS
823 #define TMCLOVERTEX (param.cloverTex)
824 #define TM_INV_CLOVERTEX (param.cloverInvTex)
826 #define TMCLOVERTEX cloverTexSingle
827 #define TM_INV_CLOVERTEX cloverInvTexSingle
829 #define READ_CLOVER READ_CLOVER_SINGLE_TEX
830 #define ASSN_CLOVER ASSN_CLOVER_SINGLE_TEX
834 const float4 *clover,
const float *cNorm,
const float4 *cloverInv,
const float *cNrm2)
836 #ifdef GPU_TWISTED_CLOVER_DIRAC
837 int sid = blockIdx.x*blockDim.x + threadIdx.x;
838 if (sid >= param.
threads)
return;
863 const float4 *clover,
const float *cNorm,
const float4 *cloverInv,
const float *cNrm2)
865 #ifdef GPU_TWISTED_CLOVER_DIRAC
866 int sid = blockIdx.x*blockDim.x + threadIdx.x;
867 if (sid >= param.
threads)
return;
892 #undef TM_INV_CLOVERTEX
898 #ifdef USE_TEXTURE_OBJECTS
899 #define SPINORTEX param.inTex
900 #define SPINORTEXNORM param.inTexNorm
902 #define SPINORTEX spinorTexHalf
903 #define SPINORTEXNORM spinorTexHalfNorm
906 #ifdef DIRECT_ACCESS_CLOVER
907 #define CLOVERTEX clover
908 #define READ_CLOVER READ_CLOVER_HALF
909 #define ASSN_CLOVER ASSN_CLOVER_HALF
911 #ifdef USE_TEXTURE_OBJECTS
912 #define TMCLOVERTEX (param.cloverTex)
913 #define TMCLOVERTEXNORM (param.cloverNormTex)
914 #define TM_INV_CLOVERTEX (param.cloverInvTex)
915 #define TM_INV_CLOVERTEXNORM (param.cloverInvNormTex)
917 #define TMCLOVERTEX cloverTexHalf
918 #define TMCLOVERTEXNORM cloverTexNorm
919 #define TM_INV_CLOVERTEX cloverInvTexHalf
920 #define TM_INV_CLOVERTEXNORM cloverInvTexNorm
922 #define READ_CLOVER READ_CLOVER_HALF_TEX
923 #define ASSN_CLOVER ASSN_CLOVER_HALF_TEX
927 const short4 *clover,
const float *cNorm,
const short4 *cloverInv,
const float *cNrm2)
929 #ifdef GPU_TWISTED_CLOVER_DIRAC
930 int sid = blockIdx.x*blockDim.x + threadIdx.x;
931 if (sid >= param.
threads)
return;
955 float k0 = fmaxf(fabsf(I0.x), fabsf(I0.y));
956 float k1 = fmaxf(fabsf(I0.z), fabsf(I0.w));
957 float k2 = fmaxf(fabsf(I1.x), fabsf(I1.y));
958 float k3 = fmaxf(fabsf(I1.z), fabsf(I1.w));
959 float k4 = fmaxf(fabsf(I2.x), fabsf(I2.y));
960 float k5 = fmaxf(fabsf(I2.z), fabsf(I2.w));
961 float k6 = fmaxf(fabsf(I3.x), fabsf(I3.y));
962 float k7 = fmaxf(fabsf(I3.z), fabsf(I3.w));
963 float k8 = fmaxf(fabsf(I4.x), fabsf(I4.y));
964 float k9 = fmaxf(fabsf(I4.z), fabsf(I4.w));
965 float k10 = fmaxf(fabsf(I5.x), fabsf(I5.y));
966 float k11 = fmaxf(fabsf(I5.z), fabsf(I5.w));
972 k5 = fmaxf(k10, k11);
978 spinorNorm[
sid] = k0;
988 spinor[sid+0*(param.
sp_stride)] = make_short4((
short)I0.x, (short)I0.y, (
short)I0.z, (short)I0.w);
989 spinor[sid+1*(param.
sp_stride)] = make_short4((
short)I1.x, (short)I1.y, (
short)I1.z, (short)I1.w);
990 spinor[sid+2*(param.
sp_stride)] = make_short4((
short)I2.x, (short)I2.y, (
short)I2.z, (short)I2.w);
991 spinor[sid+3*(param.
sp_stride)] = make_short4((
short)I3.x, (short)I3.y, (
short)I3.z, (short)I3.w);
992 spinor[sid+4*(param.
sp_stride)] = make_short4((
short)I4.x, (short)I4.y, (
short)I4.z, (short)I4.w);
993 spinor[sid+5*(param.
sp_stride)] = make_short4((
short)I5.x, (short)I5.y, (
short)I5.z, (short)I5.w);
999 const short4 *clover,
const float *cNorm,
const short4 *cloverInv,
const float *cNrm2)
1001 #ifdef GPU_TWISTED_CLOVER_DIRAC
1002 int sid = blockIdx.x*blockDim.x + threadIdx.x;
1003 if (sid >= param.
threads)
return;
1027 float k0 = fmaxf(fabsf(I0.x), fabsf(I0.y));
1028 float k1 = fmaxf(fabsf(I0.z), fabsf(I0.w));
1029 float k2 = fmaxf(fabsf(I1.x), fabsf(I1.y));
1030 float k3 = fmaxf(fabsf(I1.z), fabsf(I1.w));
1031 float k4 = fmaxf(fabsf(I2.x), fabsf(I2.y));
1032 float k5 = fmaxf(fabsf(I2.z), fabsf(I2.w));
1033 float k6 = fmaxf(fabsf(I3.x), fabsf(I3.y));
1034 float k7 = fmaxf(fabsf(I3.z), fabsf(I3.w));
1035 float k8 = fmaxf(fabsf(I4.x), fabsf(I4.y));
1036 float k9 = fmaxf(fabsf(I4.z), fabsf(I4.w));
1037 float k10 = fmaxf(fabsf(I5.x), fabsf(I5.y));
1038 float k11 = fmaxf(fabsf(I5.z), fabsf(I5.w));
1044 k5 = fmaxf(k10, k11);
1050 spinorNorm[
sid] = k0;
1051 float scale = __fdividef(
MAX_SHORT, k0);
1060 spinor[sid+0*(param.
sp_stride)] = make_short4((
short)I0.x, (short)I0.y, (
short)I0.z, (short)I0.w);
1061 spinor[sid+1*(param.
sp_stride)] = make_short4((
short)I1.x, (short)I1.y, (
short)I1.z, (short)I1.w);
1062 spinor[sid+2*(param.
sp_stride)] = make_short4((
short)I2.x, (short)I2.y, (
short)I2.z, (short)I2.w);
1063 spinor[sid+3*(param.
sp_stride)] = make_short4((
short)I3.x, (short)I3.y, (
short)I3.z, (short)I3.w);
1064 spinor[sid+4*(param.
sp_stride)] = make_short4((
short)I4.x, (short)I4.y, (
short)I4.z, (short)I4.w);
1065 spinor[sid+5*(param.
sp_stride)] = make_short4((
short)I5.x, (short)I5.y, (
short)I5.z, (short)I5.w);
1073 #undef TMCLOVERTEXNORM
1074 #undef TM_INV_CLOVERTEX
1075 #undef TM_INV_CLOVERTEXNORM
1079 #undef SPINORTEXNORM
1107 #endif //_TM_GAMMA_CORE_H
#define APPLY_CLOVER_TWIST(c, a, reg)
__global__ void twistCloverGamma5InvKernel(float4 *spinor, float *null, float a, const float4 *in, const float *null2, DslashParam param, const float4 *clover, const float *cNorm, const float4 *cloverInv, const float *cNrm2)
cpuColorSpinorField * spinor
__global__ void twistCloverGamma5Kernel(float4 *spinor, float *null, float a, const float4 *in, const float *null2, DslashParam param, const float4 *clover, const float *cNorm, const float4 *cloverInv, const float *cNrm2)
#define APPLY_CLOVER_TWIST_INV(c, cinv, a, reg)
__inline__ __device__ double2 fetch_double2(texture< int4, 1 > t, int i)
#define TEX1DFETCH(type, tex, idx)