3 #define CLOVER_SHARED_FLOATS_PER_THREAD 0
6 #if ((CUDA_VERSION >= 4010) && (__COMPUTE_CAPABILITY__ >= 200)) // NVVM compiler
8 #else // Open64 compiler
9 #define VOLATILE volatile
13 #define spinorFloat double
39 #define spinorFloat float
64 #endif // SPINOR_DOUBLE
68 #define c00_00_re C0.x
69 #define c01_01_re C0.y
70 #define c02_02_re C1.x
71 #define c10_10_re C1.y
72 #define c11_11_re C2.x
73 #define c12_12_re C2.y
74 #define c01_00_re C3.x
75 #define c01_00_im C3.y
76 #define c02_00_re C4.x
77 #define c02_00_im C4.y
78 #define c10_00_re C5.x
79 #define c10_00_im C5.y
80 #define c11_00_re C6.x
81 #define c11_00_im C6.y
82 #define c12_00_re C7.x
83 #define c12_00_im C7.y
84 #define c02_01_re C8.x
85 #define c02_01_im C8.y
86 #define c10_01_re C9.x
87 #define c10_01_im C9.y
88 #define c11_01_re C10.x
89 #define c11_01_im C10.y
90 #define c12_01_re C11.x
91 #define c12_01_im C11.y
92 #define c10_02_re C12.x
93 #define c10_02_im C12.y
94 #define c11_02_re C13.x
95 #define c11_02_im C13.y
96 #define c12_02_re C14.x
97 #define c12_02_im C14.y
98 #define c11_10_re C15.x
99 #define c11_10_im C15.y
100 #define c12_10_re C16.x
101 #define c12_10_im C16.y
102 #define c12_11_re C17.x
103 #define c12_11_im C17.y
105 #define c00_00_re C0.x
106 #define c01_01_re C0.y
107 #define c02_02_re C0.z
108 #define c10_10_re C0.w
109 #define c11_11_re C1.x
110 #define c12_12_re C1.y
111 #define c01_00_re C1.z
112 #define c01_00_im C1.w
113 #define c02_00_re C2.x
114 #define c02_00_im C2.y
115 #define c10_00_re C2.z
116 #define c10_00_im C2.w
117 #define c11_00_re C3.x
118 #define c11_00_im C3.y
119 #define c12_00_re C3.z
120 #define c12_00_im C3.w
121 #define c02_01_re C4.x
122 #define c02_01_im C4.y
123 #define c10_01_re C4.z
124 #define c10_01_im C4.w
125 #define c11_01_re C5.x
126 #define c11_01_im C5.y
127 #define c12_01_re C5.z
128 #define c12_01_im C5.w
129 #define c10_02_re C6.x
130 #define c10_02_im C6.y
131 #define c11_02_re C6.z
132 #define c11_02_im C6.w
133 #define c12_02_re C7.x
134 #define c12_02_im C7.y
135 #define c11_10_re C7.z
136 #define c11_10_im C7.w
137 #define c12_10_re C8.x
138 #define c12_10_im C8.y
139 #define c12_11_re C8.z
140 #define c12_11_im C8.w
141 #endif // CLOVER_DOUBLE
143 #define c00_01_re (+c01_00_re)
144 #define c00_01_im (-c01_00_im)
145 #define c00_02_re (+c02_00_re)
146 #define c00_02_im (-c02_00_im)
147 #define c01_02_re (+c02_01_re)
148 #define c01_02_im (-c02_01_im)
149 #define c00_10_re (+c10_00_re)
150 #define c00_10_im (-c10_00_im)
151 #define c01_10_re (+c10_01_re)
152 #define c01_10_im (-c10_01_im)
153 #define c02_10_re (+c10_02_re)
154 #define c02_10_im (-c10_02_im)
155 #define c00_11_re (+c11_00_re)
156 #define c00_11_im (-c11_00_im)
157 #define c01_11_re (+c11_01_re)
158 #define c01_11_im (-c11_01_im)
159 #define c02_11_re (+c11_02_re)
160 #define c02_11_im (-c11_02_im)
161 #define c10_11_re (+c11_10_re)
162 #define c10_11_im (-c11_10_im)
163 #define c00_12_re (+c12_00_re)
164 #define c00_12_im (-c12_00_im)
165 #define c01_12_re (+c12_01_re)
166 #define c01_12_im (-c12_01_im)
167 #define c02_12_re (+c12_02_re)
168 #define c02_12_im (-c12_02_im)
169 #define c10_12_re (+c12_10_re)
170 #define c10_12_im (-c12_10_im)
171 #define c11_12_re (+c12_11_re)
172 #define c11_12_im (-c12_11_im)
175 #define c20_20_re c00_00_re
176 #define c21_20_re c01_00_re
177 #define c21_20_im c01_00_im
178 #define c22_20_re c02_00_re
179 #define c22_20_im c02_00_im
180 #define c30_20_re c10_00_re
181 #define c30_20_im c10_00_im
182 #define c31_20_re c11_00_re
183 #define c31_20_im c11_00_im
184 #define c32_20_re c12_00_re
185 #define c32_20_im c12_00_im
186 #define c20_21_re c00_01_re
187 #define c20_21_im c00_01_im
188 #define c21_21_re c01_01_re
189 #define c22_21_re c02_01_re
190 #define c22_21_im c02_01_im
191 #define c30_21_re c10_01_re
192 #define c30_21_im c10_01_im
193 #define c31_21_re c11_01_re
194 #define c31_21_im c11_01_im
195 #define c32_21_re c12_01_re
196 #define c32_21_im c12_01_im
197 #define c20_22_re c00_02_re
198 #define c20_22_im c00_02_im
199 #define c21_22_re c01_02_re
200 #define c21_22_im c01_02_im
201 #define c22_22_re c02_02_re
202 #define c30_22_re c10_02_re
203 #define c30_22_im c10_02_im
204 #define c31_22_re c11_02_re
205 #define c31_22_im c11_02_im
206 #define c32_22_re c12_02_re
207 #define c32_22_im c12_02_im
208 #define c20_30_re c00_10_re
209 #define c20_30_im c00_10_im
210 #define c21_30_re c01_10_re
211 #define c21_30_im c01_10_im
212 #define c22_30_re c02_10_re
213 #define c22_30_im c02_10_im
214 #define c30_30_re c10_10_re
215 #define c31_30_re c11_10_re
216 #define c31_30_im c11_10_im
217 #define c32_30_re c12_10_re
218 #define c32_30_im c12_10_im
219 #define c20_31_re c00_11_re
220 #define c20_31_im c00_11_im
221 #define c21_31_re c01_11_re
222 #define c21_31_im c01_11_im
223 #define c22_31_re c02_11_re
224 #define c22_31_im c02_11_im
225 #define c30_31_re c10_11_re
226 #define c30_31_im c10_11_im
227 #define c31_31_re c11_11_re
228 #define c32_31_re c12_11_re
229 #define c32_31_im c12_11_im
230 #define c20_32_re c00_12_re
231 #define c20_32_im c00_12_im
232 #define c21_32_re c01_12_re
233 #define c21_32_im c01_12_im
234 #define c22_32_re c02_12_re
235 #define c22_32_im c02_12_im
236 #define c30_32_re c10_12_re
237 #define c30_32_im c10_12_im
238 #define c31_32_re c11_12_re
239 #define c31_32_im c11_12_im
240 #define c32_32_re c12_12_re
271 int sid = blockIdx.x*blockDim.x + threadIdx.x;
475 o00_re = a00_re; o00_im = a00_im;
476 o01_re = a01_re; o01_im = a01_im;
477 o02_re = a02_re; o02_im = a02_im;
478 o10_re = a10_re; o10_im = a10_im;
479 o11_re = a11_re; o11_im = a11_im;
480 o12_re = a12_re; o12_im = a12_im;
633 o20_re = a20_re; o20_im = a20_im;
634 o21_re = a21_re; o21_im = a21_im;
635 o22_re = a22_re; o22_im = a22_im;
636 o30_re = a30_re; o30_im = a30_im;
637 o31_re = a31_re; o31_im = a31_im;
638 o32_re = a32_re; o32_im = a32_im;
657 o30_re = a30_re; o30_im = a30_im;
673 o31_re = a31_re; o31_im = a31_im;
689 o32_re = a32_re; o32_im = a32_im;
694 READ_ACCUM(ACCUMTEX,
param.sp_stride)
720 #endif // DSLASH_XPAY
VOLATILE spinorFloat o11_re
VOLATILE spinorFloat o31_re
VOLATILE spinorFloat o21_re
VOLATILE spinorFloat o20_im
VOLATILE spinorFloat o12_re
READ_SPINOR(SPINORTEX, param.sp_stride, sid, sid)
VOLATILE spinorFloat o00_re
VOLATILE spinorFloat o32_im
VOLATILE spinorFloat o01_re
VOLATILE spinorFloat o30_re
VOLATILE spinorFloat o10_re
VOLATILE spinorFloat o20_re
VOLATILE spinorFloat o31_im
VOLATILE spinorFloat o11_im
VOLATILE spinorFloat o30_im
VOLATILE spinorFloat o01_im
VOLATILE spinorFloat o02_im
VOLATILE spinorFloat o12_im
VOLATILE spinorFloat o22_re
VOLATILE spinorFloat o00_im
VOLATILE spinorFloat o02_re
VOLATILE spinorFloat o10_im
VOLATILE spinorFloat o32_re
VOLATILE spinorFloat o21_im
VOLATILE spinorFloat o22_im