QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
clover_core.h
Go to the documentation of this file.
1 // *** CUDA CLOVER ***
2 
3 #define CLOVER_SHARED_FLOATS_PER_THREAD 0
4 
5 
6 #if ((CUDA_VERSION >= 4010) && (__COMPUTE_CAPABILITY__ >= 200)) // NVVM compiler
7 #define VOLATILE
8 #else // Open64 compiler
9 #define VOLATILE volatile
10 #endif
11 // input spinor
12 #ifdef SPINOR_DOUBLE
13 #define spinorFloat double
14 #define i00_re I0.x
15 #define i00_im I0.y
16 #define i01_re I1.x
17 #define i01_im I1.y
18 #define i02_re I2.x
19 #define i02_im I2.y
20 #define i10_re I3.x
21 #define i10_im I3.y
22 #define i11_re I4.x
23 #define i11_im I4.y
24 #define i12_re I5.x
25 #define i12_im I5.y
26 #define i20_re I6.x
27 #define i20_im I6.y
28 #define i21_re I7.x
29 #define i21_im I7.y
30 #define i22_re I8.x
31 #define i22_im I8.y
32 #define i30_re I9.x
33 #define i30_im I9.y
34 #define i31_re I10.x
35 #define i31_im I10.y
36 #define i32_re I11.x
37 #define i32_im I11.y
38 #else
39 #define spinorFloat float
40 #define i00_re I0.x
41 #define i00_im I0.y
42 #define i01_re I0.z
43 #define i01_im I0.w
44 #define i02_re I1.x
45 #define i02_im I1.y
46 #define i10_re I1.z
47 #define i10_im I1.w
48 #define i11_re I2.x
49 #define i11_im I2.y
50 #define i12_re I2.z
51 #define i12_im I2.w
52 #define i20_re I3.x
53 #define i20_im I3.y
54 #define i21_re I3.z
55 #define i21_im I3.w
56 #define i22_re I4.x
57 #define i22_im I4.y
58 #define i30_re I4.z
59 #define i30_im I4.w
60 #define i31_re I5.x
61 #define i31_im I5.y
62 #define i32_re I5.z
63 #define i32_im I5.w
64 #endif // SPINOR_DOUBLE
65 
66 // first chiral block of inverted clover term
67 #ifdef CLOVER_DOUBLE
68 #define c00_00_re C0.x
69 #define c01_01_re C0.y
70 #define c02_02_re C1.x
71 #define c10_10_re C1.y
72 #define c11_11_re C2.x
73 #define c12_12_re C2.y
74 #define c01_00_re C3.x
75 #define c01_00_im C3.y
76 #define c02_00_re C4.x
77 #define c02_00_im C4.y
78 #define c10_00_re C5.x
79 #define c10_00_im C5.y
80 #define c11_00_re C6.x
81 #define c11_00_im C6.y
82 #define c12_00_re C7.x
83 #define c12_00_im C7.y
84 #define c02_01_re C8.x
85 #define c02_01_im C8.y
86 #define c10_01_re C9.x
87 #define c10_01_im C9.y
88 #define c11_01_re C10.x
89 #define c11_01_im C10.y
90 #define c12_01_re C11.x
91 #define c12_01_im C11.y
92 #define c10_02_re C12.x
93 #define c10_02_im C12.y
94 #define c11_02_re C13.x
95 #define c11_02_im C13.y
96 #define c12_02_re C14.x
97 #define c12_02_im C14.y
98 #define c11_10_re C15.x
99 #define c11_10_im C15.y
100 #define c12_10_re C16.x
101 #define c12_10_im C16.y
102 #define c12_11_re C17.x
103 #define c12_11_im C17.y
104 #else
105 #define c00_00_re C0.x
106 #define c01_01_re C0.y
107 #define c02_02_re C0.z
108 #define c10_10_re C0.w
109 #define c11_11_re C1.x
110 #define c12_12_re C1.y
111 #define c01_00_re C1.z
112 #define c01_00_im C1.w
113 #define c02_00_re C2.x
114 #define c02_00_im C2.y
115 #define c10_00_re C2.z
116 #define c10_00_im C2.w
117 #define c11_00_re C3.x
118 #define c11_00_im C3.y
119 #define c12_00_re C3.z
120 #define c12_00_im C3.w
121 #define c02_01_re C4.x
122 #define c02_01_im C4.y
123 #define c10_01_re C4.z
124 #define c10_01_im C4.w
125 #define c11_01_re C5.x
126 #define c11_01_im C5.y
127 #define c12_01_re C5.z
128 #define c12_01_im C5.w
129 #define c10_02_re C6.x
130 #define c10_02_im C6.y
131 #define c11_02_re C6.z
132 #define c11_02_im C6.w
133 #define c12_02_re C7.x
134 #define c12_02_im C7.y
135 #define c11_10_re C7.z
136 #define c11_10_im C7.w
137 #define c12_10_re C8.x
138 #define c12_10_im C8.y
139 #define c12_11_re C8.z
140 #define c12_11_im C8.w
141 #endif // CLOVER_DOUBLE
142 
143 #define c00_01_re (+c01_00_re)
144 #define c00_01_im (-c01_00_im)
145 #define c00_02_re (+c02_00_re)
146 #define c00_02_im (-c02_00_im)
147 #define c01_02_re (+c02_01_re)
148 #define c01_02_im (-c02_01_im)
149 #define c00_10_re (+c10_00_re)
150 #define c00_10_im (-c10_00_im)
151 #define c01_10_re (+c10_01_re)
152 #define c01_10_im (-c10_01_im)
153 #define c02_10_re (+c10_02_re)
154 #define c02_10_im (-c10_02_im)
155 #define c00_11_re (+c11_00_re)
156 #define c00_11_im (-c11_00_im)
157 #define c01_11_re (+c11_01_re)
158 #define c01_11_im (-c11_01_im)
159 #define c02_11_re (+c11_02_re)
160 #define c02_11_im (-c11_02_im)
161 #define c10_11_re (+c11_10_re)
162 #define c10_11_im (-c11_10_im)
163 #define c00_12_re (+c12_00_re)
164 #define c00_12_im (-c12_00_im)
165 #define c01_12_re (+c12_01_re)
166 #define c01_12_im (-c12_01_im)
167 #define c02_12_re (+c12_02_re)
168 #define c02_12_im (-c12_02_im)
169 #define c10_12_re (+c12_10_re)
170 #define c10_12_im (-c12_10_im)
171 #define c11_12_re (+c12_11_re)
172 #define c11_12_im (-c12_11_im)
173 
174 // second chiral block of inverted clover term (reuses C0,...,C9)
175 #define c20_20_re c00_00_re
176 #define c21_20_re c01_00_re
177 #define c21_20_im c01_00_im
178 #define c22_20_re c02_00_re
179 #define c22_20_im c02_00_im
180 #define c30_20_re c10_00_re
181 #define c30_20_im c10_00_im
182 #define c31_20_re c11_00_re
183 #define c31_20_im c11_00_im
184 #define c32_20_re c12_00_re
185 #define c32_20_im c12_00_im
186 #define c20_21_re c00_01_re
187 #define c20_21_im c00_01_im
188 #define c21_21_re c01_01_re
189 #define c22_21_re c02_01_re
190 #define c22_21_im c02_01_im
191 #define c30_21_re c10_01_re
192 #define c30_21_im c10_01_im
193 #define c31_21_re c11_01_re
194 #define c31_21_im c11_01_im
195 #define c32_21_re c12_01_re
196 #define c32_21_im c12_01_im
197 #define c20_22_re c00_02_re
198 #define c20_22_im c00_02_im
199 #define c21_22_re c01_02_re
200 #define c21_22_im c01_02_im
201 #define c22_22_re c02_02_re
202 #define c30_22_re c10_02_re
203 #define c30_22_im c10_02_im
204 #define c31_22_re c11_02_re
205 #define c31_22_im c11_02_im
206 #define c32_22_re c12_02_re
207 #define c32_22_im c12_02_im
208 #define c20_30_re c00_10_re
209 #define c20_30_im c00_10_im
210 #define c21_30_re c01_10_re
211 #define c21_30_im c01_10_im
212 #define c22_30_re c02_10_re
213 #define c22_30_im c02_10_im
214 #define c30_30_re c10_10_re
215 #define c31_30_re c11_10_re
216 #define c31_30_im c11_10_im
217 #define c32_30_re c12_10_re
218 #define c32_30_im c12_10_im
219 #define c20_31_re c00_11_re
220 #define c20_31_im c00_11_im
221 #define c21_31_re c01_11_re
222 #define c21_31_im c01_11_im
223 #define c22_31_re c02_11_re
224 #define c22_31_im c02_11_im
225 #define c30_31_re c10_11_re
226 #define c30_31_im c10_11_im
227 #define c31_31_re c11_11_re
228 #define c32_31_re c12_11_re
229 #define c32_31_im c12_11_im
230 #define c20_32_re c00_12_re
231 #define c20_32_im c00_12_im
232 #define c21_32_re c01_12_re
233 #define c21_32_im c01_12_im
234 #define c22_32_re c02_12_re
235 #define c22_32_im c02_12_im
236 #define c30_32_re c10_12_re
237 #define c30_32_im c10_12_im
238 #define c31_32_re c11_12_re
239 #define c31_32_im c11_12_im
240 #define c32_32_re c12_12_re
241 
242 // output spinor
267 
268 #include "read_clover.h"
269 #include "io_spinor.h"
270 
271 int sid = blockIdx.x*blockDim.x + threadIdx.x;
272 if (sid >= param.threads) return;
273 
274 // read spinor from device memory
275 READ_SPINOR(SPINORTEX, param.sp_stride, sid, sid);
276 {
277  // change to chiral basis
278  {
285  spinorFloat a30_re = i00_re - i20_re;
286  spinorFloat a30_im = i00_im - i20_im;
287 
288  o00_re = a00_re; o00_im = a00_im;
289  o10_re = a10_re; o10_im = a10_im;
290  o20_re = a20_re; o20_im = a20_im;
291  o30_re = a30_re; o30_im = a30_im;
292  }
293 
294  {
301  spinorFloat a31_re = i01_re - i21_re;
302  spinorFloat a31_im = i01_im - i21_im;
303 
304  o01_re = a01_re; o01_im = a01_im;
305  o11_re = a11_re; o11_im = a11_im;
306  o21_re = a21_re; o21_im = a21_im;
307  o31_re = a31_re; o31_im = a31_im;
308  }
309 
310  {
317  spinorFloat a32_re = i02_re - i22_re;
318  spinorFloat a32_im = i02_im - i22_im;
319 
320  o02_re = a02_re; o02_im = a02_im;
321  o12_re = a12_re; o12_im = a12_im;
322  o22_re = a22_re; o22_im = a22_im;
323  o32_re = a32_re; o32_im = a32_im;
324  }
325 
326  // apply first chiral block
327  {
329 
336 
337  a00_re += c00_00_re * o00_re;
338  a00_im += c00_00_re * o00_im;
339  a00_re += c00_01_re * o01_re;
340  a00_re -= c00_01_im * o01_im;
341  a00_im += c00_01_re * o01_im;
342  a00_im += c00_01_im * o01_re;
343  a00_re += c00_02_re * o02_re;
344  a00_re -= c00_02_im * o02_im;
345  a00_im += c00_02_re * o02_im;
346  a00_im += c00_02_im * o02_re;
347  a00_re += c00_10_re * o10_re;
348  a00_re -= c00_10_im * o10_im;
349  a00_im += c00_10_re * o10_im;
350  a00_im += c00_10_im * o10_re;
351  a00_re += c00_11_re * o11_re;
352  a00_re -= c00_11_im * o11_im;
353  a00_im += c00_11_re * o11_im;
354  a00_im += c00_11_im * o11_re;
355  a00_re += c00_12_re * o12_re;
356  a00_re -= c00_12_im * o12_im;
357  a00_im += c00_12_re * o12_im;
358  a00_im += c00_12_im * o12_re;
359 
360  a01_re += c01_00_re * o00_re;
361  a01_re -= c01_00_im * o00_im;
362  a01_im += c01_00_re * o00_im;
363  a01_im += c01_00_im * o00_re;
364  a01_re += c01_01_re * o01_re;
365  a01_im += c01_01_re * o01_im;
366  a01_re += c01_02_re * o02_re;
367  a01_re -= c01_02_im * o02_im;
368  a01_im += c01_02_re * o02_im;
369  a01_im += c01_02_im * o02_re;
370  a01_re += c01_10_re * o10_re;
371  a01_re -= c01_10_im * o10_im;
372  a01_im += c01_10_re * o10_im;
373  a01_im += c01_10_im * o10_re;
374  a01_re += c01_11_re * o11_re;
375  a01_re -= c01_11_im * o11_im;
376  a01_im += c01_11_re * o11_im;
377  a01_im += c01_11_im * o11_re;
378  a01_re += c01_12_re * o12_re;
379  a01_re -= c01_12_im * o12_im;
380  a01_im += c01_12_re * o12_im;
381  a01_im += c01_12_im * o12_re;
382 
383  a02_re += c02_00_re * o00_re;
384  a02_re -= c02_00_im * o00_im;
385  a02_im += c02_00_re * o00_im;
386  a02_im += c02_00_im * o00_re;
387  a02_re += c02_01_re * o01_re;
388  a02_re -= c02_01_im * o01_im;
389  a02_im += c02_01_re * o01_im;
390  a02_im += c02_01_im * o01_re;
391  a02_re += c02_02_re * o02_re;
392  a02_im += c02_02_re * o02_im;
393  a02_re += c02_10_re * o10_re;
394  a02_re -= c02_10_im * o10_im;
395  a02_im += c02_10_re * o10_im;
396  a02_im += c02_10_im * o10_re;
397  a02_re += c02_11_re * o11_re;
398  a02_re -= c02_11_im * o11_im;
399  a02_im += c02_11_re * o11_im;
400  a02_im += c02_11_im * o11_re;
401  a02_re += c02_12_re * o12_re;
402  a02_re -= c02_12_im * o12_im;
403  a02_im += c02_12_re * o12_im;
404  a02_im += c02_12_im * o12_re;
405 
406  a10_re += c10_00_re * o00_re;
407  a10_re -= c10_00_im * o00_im;
408  a10_im += c10_00_re * o00_im;
409  a10_im += c10_00_im * o00_re;
410  a10_re += c10_01_re * o01_re;
411  a10_re -= c10_01_im * o01_im;
412  a10_im += c10_01_re * o01_im;
413  a10_im += c10_01_im * o01_re;
414  a10_re += c10_02_re * o02_re;
415  a10_re -= c10_02_im * o02_im;
416  a10_im += c10_02_re * o02_im;
417  a10_im += c10_02_im * o02_re;
418  a10_re += c10_10_re * o10_re;
419  a10_im += c10_10_re * o10_im;
420  a10_re += c10_11_re * o11_re;
421  a10_re -= c10_11_im * o11_im;
422  a10_im += c10_11_re * o11_im;
423  a10_im += c10_11_im * o11_re;
424  a10_re += c10_12_re * o12_re;
425  a10_re -= c10_12_im * o12_im;
426  a10_im += c10_12_re * o12_im;
427  a10_im += c10_12_im * o12_re;
428 
429  a11_re += c11_00_re * o00_re;
430  a11_re -= c11_00_im * o00_im;
431  a11_im += c11_00_re * o00_im;
432  a11_im += c11_00_im * o00_re;
433  a11_re += c11_01_re * o01_re;
434  a11_re -= c11_01_im * o01_im;
435  a11_im += c11_01_re * o01_im;
436  a11_im += c11_01_im * o01_re;
437  a11_re += c11_02_re * o02_re;
438  a11_re -= c11_02_im * o02_im;
439  a11_im += c11_02_re * o02_im;
440  a11_im += c11_02_im * o02_re;
441  a11_re += c11_10_re * o10_re;
442  a11_re -= c11_10_im * o10_im;
443  a11_im += c11_10_re * o10_im;
444  a11_im += c11_10_im * o10_re;
445  a11_re += c11_11_re * o11_re;
446  a11_im += c11_11_re * o11_im;
447  a11_re += c11_12_re * o12_re;
448  a11_re -= c11_12_im * o12_im;
449  a11_im += c11_12_re * o12_im;
450  a11_im += c11_12_im * o12_re;
451 
452  a12_re += c12_00_re * o00_re;
453  a12_re -= c12_00_im * o00_im;
454  a12_im += c12_00_re * o00_im;
455  a12_im += c12_00_im * o00_re;
456  a12_re += c12_01_re * o01_re;
457  a12_re -= c12_01_im * o01_im;
458  a12_im += c12_01_re * o01_im;
459  a12_im += c12_01_im * o01_re;
460  a12_re += c12_02_re * o02_re;
461  a12_re -= c12_02_im * o02_im;
462  a12_im += c12_02_re * o02_im;
463  a12_im += c12_02_im * o02_re;
464  a12_re += c12_10_re * o10_re;
465  a12_re -= c12_10_im * o10_im;
466  a12_im += c12_10_re * o10_im;
467  a12_im += c12_10_im * o10_re;
468  a12_re += c12_11_re * o11_re;
469  a12_re -= c12_11_im * o11_im;
470  a12_im += c12_11_re * o11_im;
471  a12_im += c12_11_im * o11_re;
472  a12_re += c12_12_re * o12_re;
473  a12_im += c12_12_re * o12_im;
474 
475  o00_re = a00_re; o00_im = a00_im;
476  o01_re = a01_re; o01_im = a01_im;
477  o02_re = a02_re; o02_im = a02_im;
478  o10_re = a10_re; o10_im = a10_im;
479  o11_re = a11_re; o11_im = a11_im;
480  o12_re = a12_re; o12_im = a12_im;
481 
482  }
483 
484  // apply second chiral block
485  {
487 
491  spinorFloat a30_re = 0; spinorFloat a30_im = 0;
492  spinorFloat a31_re = 0; spinorFloat a31_im = 0;
493  spinorFloat a32_re = 0; spinorFloat a32_im = 0;
494 
495  a20_re += c20_20_re * o20_re;
496  a20_im += c20_20_re * o20_im;
497  a20_re += c20_21_re * o21_re;
498  a20_re -= c20_21_im * o21_im;
499  a20_im += c20_21_re * o21_im;
500  a20_im += c20_21_im * o21_re;
501  a20_re += c20_22_re * o22_re;
502  a20_re -= c20_22_im * o22_im;
503  a20_im += c20_22_re * o22_im;
504  a20_im += c20_22_im * o22_re;
505  a20_re += c20_30_re * o30_re;
506  a20_re -= c20_30_im * o30_im;
507  a20_im += c20_30_re * o30_im;
508  a20_im += c20_30_im * o30_re;
509  a20_re += c20_31_re * o31_re;
510  a20_re -= c20_31_im * o31_im;
511  a20_im += c20_31_re * o31_im;
512  a20_im += c20_31_im * o31_re;
513  a20_re += c20_32_re * o32_re;
514  a20_re -= c20_32_im * o32_im;
515  a20_im += c20_32_re * o32_im;
516  a20_im += c20_32_im * o32_re;
517 
518  a21_re += c21_20_re * o20_re;
519  a21_re -= c21_20_im * o20_im;
520  a21_im += c21_20_re * o20_im;
521  a21_im += c21_20_im * o20_re;
522  a21_re += c21_21_re * o21_re;
523  a21_im += c21_21_re * o21_im;
524  a21_re += c21_22_re * o22_re;
525  a21_re -= c21_22_im * o22_im;
526  a21_im += c21_22_re * o22_im;
527  a21_im += c21_22_im * o22_re;
528  a21_re += c21_30_re * o30_re;
529  a21_re -= c21_30_im * o30_im;
530  a21_im += c21_30_re * o30_im;
531  a21_im += c21_30_im * o30_re;
532  a21_re += c21_31_re * o31_re;
533  a21_re -= c21_31_im * o31_im;
534  a21_im += c21_31_re * o31_im;
535  a21_im += c21_31_im * o31_re;
536  a21_re += c21_32_re * o32_re;
537  a21_re -= c21_32_im * o32_im;
538  a21_im += c21_32_re * o32_im;
539  a21_im += c21_32_im * o32_re;
540 
541  a22_re += c22_20_re * o20_re;
542  a22_re -= c22_20_im * o20_im;
543  a22_im += c22_20_re * o20_im;
544  a22_im += c22_20_im * o20_re;
545  a22_re += c22_21_re * o21_re;
546  a22_re -= c22_21_im * o21_im;
547  a22_im += c22_21_re * o21_im;
548  a22_im += c22_21_im * o21_re;
549  a22_re += c22_22_re * o22_re;
550  a22_im += c22_22_re * o22_im;
551  a22_re += c22_30_re * o30_re;
552  a22_re -= c22_30_im * o30_im;
553  a22_im += c22_30_re * o30_im;
554  a22_im += c22_30_im * o30_re;
555  a22_re += c22_31_re * o31_re;
556  a22_re -= c22_31_im * o31_im;
557  a22_im += c22_31_re * o31_im;
558  a22_im += c22_31_im * o31_re;
559  a22_re += c22_32_re * o32_re;
560  a22_re -= c22_32_im * o32_im;
561  a22_im += c22_32_re * o32_im;
562  a22_im += c22_32_im * o32_re;
563 
564  a30_re += c30_20_re * o20_re;
565  a30_re -= c30_20_im * o20_im;
566  a30_im += c30_20_re * o20_im;
567  a30_im += c30_20_im * o20_re;
568  a30_re += c30_21_re * o21_re;
569  a30_re -= c30_21_im * o21_im;
570  a30_im += c30_21_re * o21_im;
571  a30_im += c30_21_im * o21_re;
572  a30_re += c30_22_re * o22_re;
573  a30_re -= c30_22_im * o22_im;
574  a30_im += c30_22_re * o22_im;
575  a30_im += c30_22_im * o22_re;
576  a30_re += c30_30_re * o30_re;
577  a30_im += c30_30_re * o30_im;
578  a30_re += c30_31_re * o31_re;
579  a30_re -= c30_31_im * o31_im;
580  a30_im += c30_31_re * o31_im;
581  a30_im += c30_31_im * o31_re;
582  a30_re += c30_32_re * o32_re;
583  a30_re -= c30_32_im * o32_im;
584  a30_im += c30_32_re * o32_im;
585  a30_im += c30_32_im * o32_re;
586 
587  a31_re += c31_20_re * o20_re;
588  a31_re -= c31_20_im * o20_im;
589  a31_im += c31_20_re * o20_im;
590  a31_im += c31_20_im * o20_re;
591  a31_re += c31_21_re * o21_re;
592  a31_re -= c31_21_im * o21_im;
593  a31_im += c31_21_re * o21_im;
594  a31_im += c31_21_im * o21_re;
595  a31_re += c31_22_re * o22_re;
596  a31_re -= c31_22_im * o22_im;
597  a31_im += c31_22_re * o22_im;
598  a31_im += c31_22_im * o22_re;
599  a31_re += c31_30_re * o30_re;
600  a31_re -= c31_30_im * o30_im;
601  a31_im += c31_30_re * o30_im;
602  a31_im += c31_30_im * o30_re;
603  a31_re += c31_31_re * o31_re;
604  a31_im += c31_31_re * o31_im;
605  a31_re += c31_32_re * o32_re;
606  a31_re -= c31_32_im * o32_im;
607  a31_im += c31_32_re * o32_im;
608  a31_im += c31_32_im * o32_re;
609 
610  a32_re += c32_20_re * o20_re;
611  a32_re -= c32_20_im * o20_im;
612  a32_im += c32_20_re * o20_im;
613  a32_im += c32_20_im * o20_re;
614  a32_re += c32_21_re * o21_re;
615  a32_re -= c32_21_im * o21_im;
616  a32_im += c32_21_re * o21_im;
617  a32_im += c32_21_im * o21_re;
618  a32_re += c32_22_re * o22_re;
619  a32_re -= c32_22_im * o22_im;
620  a32_im += c32_22_re * o22_im;
621  a32_im += c32_22_im * o22_re;
622  a32_re += c32_30_re * o30_re;
623  a32_re -= c32_30_im * o30_im;
624  a32_im += c32_30_re * o30_im;
625  a32_im += c32_30_im * o30_re;
626  a32_re += c32_31_re * o31_re;
627  a32_re -= c32_31_im * o31_im;
628  a32_im += c32_31_re * o31_im;
629  a32_im += c32_31_im * o31_re;
630  a32_re += c32_32_re * o32_re;
631  a32_im += c32_32_re * o32_im;
632 
633  o20_re = a20_re; o20_im = a20_im;
634  o21_re = a21_re; o21_im = a21_im;
635  o22_re = a22_re; o22_im = a22_im;
636  o30_re = a30_re; o30_im = a30_im;
637  o31_re = a31_re; o31_im = a31_im;
638  o32_re = a32_re; o32_im = a32_im;
639 
640  }
641 
642  // change back from chiral basis
643  // (note: required factor of 1/2 is included in clover term normalization)
644  {
645  spinorFloat a00_re = o10_re + o30_re;
646  spinorFloat a00_im = o10_im + o30_im;
647  spinorFloat a10_re = -o00_re - o20_re;
648  spinorFloat a10_im = -o00_im - o20_im;
649  spinorFloat a20_re = o10_re - o30_re;
650  spinorFloat a20_im = o10_im - o30_im;
651  spinorFloat a30_re = -o00_re + o20_re;
652  spinorFloat a30_im = -o00_im + o20_im;
653 
654  o00_re = a00_re; o00_im = a00_im;
655  o10_re = a10_re; o10_im = a10_im;
656  o20_re = a20_re; o20_im = a20_im;
657  o30_re = a30_re; o30_im = a30_im;
658  }
659 
660  {
661  spinorFloat a01_re = o11_re + o31_re;
662  spinorFloat a01_im = o11_im + o31_im;
663  spinorFloat a11_re = -o01_re - o21_re;
664  spinorFloat a11_im = -o01_im - o21_im;
665  spinorFloat a21_re = o11_re - o31_re;
666  spinorFloat a21_im = o11_im - o31_im;
667  spinorFloat a31_re = -o01_re + o21_re;
668  spinorFloat a31_im = -o01_im + o21_im;
669 
670  o01_re = a01_re; o01_im = a01_im;
671  o11_re = a11_re; o11_im = a11_im;
672  o21_re = a21_re; o21_im = a21_im;
673  o31_re = a31_re; o31_im = a31_im;
674  }
675 
676  {
677  spinorFloat a02_re = o12_re + o32_re;
678  spinorFloat a02_im = o12_im + o32_im;
679  spinorFloat a12_re = -o02_re - o22_re;
680  spinorFloat a12_im = -o02_im - o22_im;
681  spinorFloat a22_re = o12_re - o32_re;
682  spinorFloat a22_im = o12_im - o32_im;
683  spinorFloat a32_re = -o02_re + o22_re;
684  spinorFloat a32_im = -o02_im + o22_im;
685 
686  o02_re = a02_re; o02_im = a02_im;
687  o12_re = a12_re; o12_im = a12_im;
688  o22_re = a22_re; o22_im = a22_im;
689  o32_re = a32_re; o32_im = a32_im;
690  }
691 
692 #ifdef DSLASH_XPAY
693 
694  READ_ACCUM(ACCUMTEX, param.sp_stride)
695 
696  o00_re = a*o00_re+acc00_re;
697  o00_im = a*o00_im+acc00_im;
698  o01_re = a*o01_re+acc01_re;
699  o01_im = a*o01_im+acc01_im;
700  o02_re = a*o02_re+acc02_re;
701  o02_im = a*o02_im+acc02_im;
702  o10_re = a*o10_re+acc10_re;
703  o10_im = a*o10_im+acc10_im;
704  o11_re = a*o11_re+acc11_re;
705  o11_im = a*o11_im+acc11_im;
706  o12_re = a*o12_re+acc12_re;
707  o12_im = a*o12_im+acc12_im;
708  o20_re = a*o20_re+acc20_re;
709  o20_im = a*o20_im+acc20_im;
710  o21_re = a*o21_re+acc21_re;
711  o21_im = a*o21_im+acc21_im;
712  o22_re = a*o22_re+acc22_re;
713  o22_im = a*o22_im+acc22_im;
714  o30_re = a*o30_re+acc30_re;
715  o30_im = a*o30_im+acc30_im;
716  o31_re = a*o31_re+acc31_re;
717  o31_im = a*o31_im+acc31_im;
718  o32_re = a*o32_re+acc32_re;
719  o32_im = a*o32_im+acc32_im;
720 #endif // DSLASH_XPAY
721 }
722 
723 // write spinor field back to device memory
724 WRITE_SPINOR(param.sp_stride);
725 
726 // undefine to prevent warning when precision is changed
727 #undef spinorFloat
728 #undef i00_re
729 #undef i00_im
730 #undef i01_re
731 #undef i01_im
732 #undef i02_re
733 #undef i02_im
734 #undef i10_re
735 #undef i10_im
736 #undef i11_re
737 #undef i11_im
738 #undef i12_re
739 #undef i12_im
740 #undef i20_re
741 #undef i20_im
742 #undef i21_re
743 #undef i21_im
744 #undef i22_re
745 #undef i22_im
746 #undef i30_re
747 #undef i30_im
748 #undef i31_re
749 #undef i31_im
750 #undef i32_re
751 #undef i32_im
752 
753 #undef c00_00_re
754 #undef c01_01_re
755 #undef c02_02_re
756 #undef c10_10_re
757 #undef c11_11_re
758 #undef c12_12_re
759 #undef c01_00_re
760 #undef c01_00_im
761 #undef c02_00_re
762 #undef c02_00_im
763 #undef c10_00_re
764 #undef c10_00_im
765 #undef c11_00_re
766 #undef c11_00_im
767 #undef c12_00_re
768 #undef c12_00_im
769 #undef c02_01_re
770 #undef c02_01_im
771 #undef c10_01_re
772 #undef c10_01_im
773 #undef c11_01_re
774 #undef c11_01_im
775 #undef c12_01_re
776 #undef c12_01_im
777 #undef c10_02_re
778 #undef c10_02_im
779 #undef c11_02_re
780 #undef c11_02_im
781 #undef c12_02_re
782 #undef c12_02_im
783 #undef c11_10_re
784 #undef c11_10_im
785 #undef c12_10_re
786 #undef c12_10_im
787 #undef c12_11_re
788 #undef c12_11_im
789 
790 
791 #undef VOLATILE
#define i22_im
Definition: clover_core.h:57
#define c31_22_im
Definition: clover_core.h:205
#define c21_22_re
Definition: clover_core.h:199
VOLATILE spinorFloat o11_re
Definition: clover_core.h:251
#define c22_30_re
Definition: clover_core.h:212
#define c22_21_im
Definition: clover_core.h:190
#define a22_re
Definition: llfat_core.h:131
#define c31_32_im
Definition: clover_core.h:239
VOLATILE spinorFloat o31_re
Definition: clover_core.h:263
#define c32_31_re
Definition: clover_core.h:228
#define c31_30_im
Definition: clover_core.h:216
#define i11_im
Definition: clover_core.h:49
#define c00_02_re
Definition: clover_core.h:145
#define c11_10_im
Definition: clover_core.h:136
#define c11_00_re
Definition: clover_core.h:117
#define c20_30_im
Definition: clover_core.h:209
#define c00_00_re
Definition: clover_core.h:105
#define c12_10_re
Definition: clover_core.h:137
#define c00_10_im
Definition: clover_core.h:150
#define c21_30_re
Definition: clover_core.h:210
VOLATILE spinorFloat o21_re
Definition: clover_core.h:257
#define c21_30_im
Definition: clover_core.h:211
VOLATILE spinorFloat o20_im
Definition: clover_core.h:256
#define i00_im
Definition: clover_core.h:41
#define CLOVERTEX
Definition: clover_def.h:101
#define c31_30_re
Definition: clover_core.h:215
#define c10_00_re
Definition: clover_core.h:115
VOLATILE spinorFloat o12_re
Definition: clover_core.h:253
READ_SPINOR(SPINORTEX, param.sp_stride, sid, sid)
#define a02_im
Definition: llfat_core.h:120
#define c12_02_re
Definition: clover_core.h:133
#define c22_32_im
Definition: clover_core.h:235
#define c10_11_re
Definition: clover_core.h:161
VOLATILE spinorFloat o00_re
Definition: clover_core.h:243
#define c31_22_re
Definition: clover_core.h:204
#define c12_01_re
Definition: clover_core.h:127
#define c30_31_re
Definition: clover_core.h:225
#define i10_re
Definition: clover_core.h:46
#define VOLATILE
Definition: clover_core.h:9
VOLATILE spinorFloat o32_im
Definition: clover_core.h:266
#define c32_31_im
Definition: clover_core.h:229
#define a22_im
Definition: llfat_core.h:132
#define c02_02_re
Definition: clover_core.h:107
#define c30_22_im
Definition: clover_core.h:203
#define c31_21_im
Definition: clover_core.h:194
#define c21_31_im
Definition: clover_core.h:222
#define c02_00_re
Definition: clover_core.h:113
VOLATILE spinorFloat o01_re
Definition: clover_core.h:245
#define c11_00_im
Definition: clover_core.h:118
#define c32_32_re
Definition: clover_core.h:240
int sid
Definition: clover_core.h:271
VOLATILE spinorFloat o30_re
Definition: clover_core.h:261
#define c30_32_im
Definition: clover_core.h:237
#define c00_10_re
Definition: clover_core.h:149
#define c22_31_re
Definition: clover_core.h:223
#define c11_02_re
Definition: clover_core.h:131
#define c11_02_im
Definition: clover_core.h:132
#define a01_re
Definition: llfat_core.h:117
#define c01_01_re
Definition: clover_core.h:106
#define spinorFloat
Definition: clover_core.h:39
#define c10_00_im
Definition: clover_core.h:116
#define c12_00_re
Definition: clover_core.h:119
#define i01_re
Definition: clover_core.h:42
#define c20_20_re
Definition: clover_core.h:175
#define c21_32_re
Definition: clover_core.h:232
#define c02_11_im
Definition: clover_core.h:160
#define c02_10_re
Definition: clover_core.h:153
#define a02_re
Definition: llfat_core.h:119
#define a20_re
Definition: llfat_core.h:127
#define c10_10_re
Definition: clover_core.h:108
#define a12_im
Definition: llfat_core.h:126
#define c21_20_im
Definition: clover_core.h:177
#define c10_11_im
Definition: clover_core.h:162
#define a20_im
Definition: llfat_core.h:128
#define c12_02_im
Definition: clover_core.h:134
#define c01_12_im
Definition: clover_core.h:166
#define c12_12_re
Definition: clover_core.h:110
#define c22_30_im
Definition: clover_core.h:213
QudaGaugeParam param
Definition: pack_test.cpp:17
#define c12_11_re
Definition: clover_core.h:139
#define i12_re
Definition: clover_core.h:50
VOLATILE spinorFloat o10_re
Definition: clover_core.h:249
#define c31_20_re
Definition: clover_core.h:182
#define c10_02_im
Definition: clover_core.h:130
#define c01_02_im
Definition: clover_core.h:148
#define c22_20_im
Definition: clover_core.h:179
#define i20_im
Definition: clover_core.h:53
#define c30_31_im
Definition: clover_core.h:226
VOLATILE spinorFloat o20_re
Definition: clover_core.h:255
#define c01_10_im
Definition: clover_core.h:152
#define i11_re
Definition: clover_core.h:48
VOLATILE spinorFloat o31_im
Definition: clover_core.h:264
#define c30_22_re
Definition: clover_core.h:202
#define i02_im
Definition: clover_core.h:45
#define c00_11_re
Definition: clover_core.h:155
#define c31_32_re
Definition: clover_core.h:238
#define i02_re
Definition: clover_core.h:44
#define c01_10_re
Definition: clover_core.h:151
VOLATILE spinorFloat o11_im
Definition: clover_core.h:252
#define c32_22_re
Definition: clover_core.h:206
#define a01_im
Definition: llfat_core.h:118
#define c11_10_re
Definition: clover_core.h:135
VOLATILE spinorFloat o30_im
Definition: clover_core.h:262
#define a12_re
Definition: llfat_core.h:125
#define c02_12_re
Definition: clover_core.h:167
#define a11_re
Definition: llfat_core.h:123
#define c02_10_im
Definition: clover_core.h:154
#define c10_01_im
Definition: clover_core.h:124
VOLATILE spinorFloat o01_im
Definition: clover_core.h:246
VOLATILE spinorFloat o02_im
Definition: clover_core.h:248
#define c21_21_re
Definition: clover_core.h:188
#define c30_21_im
Definition: clover_core.h:192
#define c22_22_re
Definition: clover_core.h:201
#define c30_20_im
Definition: clover_core.h:181
#define c10_12_im
Definition: clover_core.h:170
#define c11_01_re
Definition: clover_core.h:125
#define c20_30_re
Definition: clover_core.h:208
VOLATILE spinorFloat o12_im
Definition: clover_core.h:254
#define c10_02_re
Definition: clover_core.h:129
#define i22_re
Definition: clover_core.h:56
#define c00_01_re
Definition: clover_core.h:143
#define c01_11_re
Definition: clover_core.h:157
#define c21_32_im
Definition: clover_core.h:233
#define c32_21_im
Definition: clover_core.h:196
#define c01_11_im
Definition: clover_core.h:158
#define i32_im
Definition: clover_core.h:63
#define c21_20_re
Definition: clover_core.h:176
#define c01_00_im
Definition: clover_core.h:112
#define SPINORTEX
Definition: clover_def.h:40
#define c12_00_im
Definition: clover_core.h:120
#define i30_re
Definition: clover_core.h:58
#define c21_22_im
Definition: clover_core.h:200
#define i12_im
Definition: clover_core.h:51
#define c30_30_re
Definition: clover_core.h:214
#define c02_12_im
Definition: clover_core.h:168
#define i21_re
Definition: clover_core.h:54
#define c01_00_re
Definition: clover_core.h:111
#define c02_11_re
Definition: clover_core.h:159
#define c22_32_re
Definition: clover_core.h:234
#define c02_01_re
Definition: clover_core.h:121
#define c30_21_re
Definition: clover_core.h:191
#define c22_21_re
Definition: clover_core.h:189
#define c10_01_re
Definition: clover_core.h:123
#define c32_21_re
Definition: clover_core.h:195
VOLATILE spinorFloat o22_re
Definition: clover_core.h:259
VOLATILE spinorFloat o00_im
Definition: clover_core.h:244
#define a00_re
Definition: llfat_core.h:115
#define i32_re
Definition: clover_core.h:62
#define c12_01_im
Definition: clover_core.h:128
VOLATILE spinorFloat o02_re
Definition: clover_core.h:247
#define c01_12_re
Definition: clover_core.h:165
#define c20_32_im
Definition: clover_core.h:231
#define i21_im
Definition: clover_core.h:55
#define c11_11_re
Definition: clover_core.h:109
#define c30_20_re
Definition: clover_core.h:180
#define c11_12_im
Definition: clover_core.h:172
#define c32_30_im
Definition: clover_core.h:218
#define c32_22_im
Definition: clover_core.h:207
#define c00_02_im
Definition: clover_core.h:146
#define a11_im
Definition: llfat_core.h:124
#define i00_re
Definition: clover_core.h:40
#define c31_31_re
Definition: clover_core.h:227
#define a10_re
Definition: llfat_core.h:121
#define c32_30_re
Definition: clover_core.h:217
#define c11_12_re
Definition: clover_core.h:171
#define c10_12_re
Definition: clover_core.h:169
#define c20_31_re
Definition: clover_core.h:219
#define c32_20_re
Definition: clover_core.h:184
#define c00_11_im
Definition: clover_core.h:156
#define c11_01_im
Definition: clover_core.h:126
VOLATILE spinorFloat o10_im
Definition: clover_core.h:250
#define c31_21_re
Definition: clover_core.h:193
#define c12_10_im
Definition: clover_core.h:138
#define c00_01_im
Definition: clover_core.h:144
#define c30_32_re
Definition: clover_core.h:236
VOLATILE spinorFloat o32_re
Definition: clover_core.h:265
#define c00_12_im
Definition: clover_core.h:164
#define c02_01_im
Definition: clover_core.h:122
#define a10_im
Definition: llfat_core.h:122
#define a21_re
Definition: llfat_core.h:129
#define i01_im
Definition: clover_core.h:43
#define WRITE_SPINOR
Definition: clover_def.h:48
#define c22_20_re
Definition: clover_core.h:178
#define c20_21_im
Definition: clover_core.h:187
#define i31_re
Definition: clover_core.h:60
#define c21_31_re
Definition: clover_core.h:221
#define c20_32_re
Definition: clover_core.h:230
#define i30_im
Definition: clover_core.h:59
#define i10_im
Definition: clover_core.h:47
#define c32_20_im
Definition: clover_core.h:185
#define c22_31_im
Definition: clover_core.h:224
#define c02_00_im
Definition: clover_core.h:114
#define i20_re
Definition: clover_core.h:52
#define c31_20_im
Definition: clover_core.h:183
#define a21_im
Definition: llfat_core.h:130
#define c20_31_im
Definition: clover_core.h:220
#define READ_CLOVER
Definition: clover_def.h:103
#define c01_02_re
Definition: clover_core.h:147
#define c00_12_re
Definition: clover_core.h:163
#define c20_22_re
Definition: clover_core.h:197
#define c12_11_im
Definition: clover_core.h:140
#define c20_22_im
Definition: clover_core.h:198
VOLATILE spinorFloat o21_im
Definition: clover_core.h:258
#define c20_21_re
Definition: clover_core.h:186
#define a00_im
Definition: llfat_core.h:116
VOLATILE spinorFloat o22_im
Definition: clover_core.h:260
#define i31_im
Definition: clover_core.h:61