QUDA  0.9.0
staggered_dslash_def.h
Go to the documentation of this file.
1 // staggered_dslash_def.h - staggered Dslash kernel definitions
2 //
3 // See comments in wilson_dslash_def.h
4 
5 // initialize on first iteration
6 
7 #ifndef DD_LOOP
8 #define DD_LOOP
9 
10 #define DD_AXPY 0
11 #define DD_FAT_RECON 8
12 #define DD_LONG_RECON 8
13 #define DD_PREC 0
14 #endif
15 
16 // set options for current iteration
17 
18 #if (DD_IMPROVED==1)
19 #define DD_FNAME improvedStaggeredDslash
20 #else
21 #define DD_FNAME staggeredDslash
22 #endif
23 
24 #if (DD_AXPY==0) // no axpy
25 #define DD_AXPY_F
26 #else // axpy
27 #define DD_AXPY_F Axpy
28 #define DSLASH_AXPY
29 #endif
30 
31 #if (DD_FAT_RECON==8)
32 #define DD_FAT_RECON_F 8
33 #elif (DD_FAT_RECON==9)
34 #define DD_FAT_RECON_F 9
35 #elif (DD_FAT_RECON==12)
36 #define DD_FAT_RECON_F 12
37 #elif (DD_FAT_RECON==13)
38 #define DD_FAT_RECON_F 13
39 #else
40 #define DD_FAT_RECON_F 18
41 #endif
42 
43 #define READ_LONG_PHASE(phase, dir, idx, stride) // May be a problem below with redefinitions
44 
45 #if (DD_LONG_RECON==8) // reconstruct from 8 reals
46 #define DD_LONG_RECON_F 8
47 
48 #if (DD_PREC==0) // DOUBLE PRECISION
49 
50 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_DOUBLE
51 
52 #if (DD_FAT_RECON==8)
53 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_DOUBLE
54 #elif (DD_FAT_RECON==12)
55 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_DOUBLE
56 #else
57 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign)
58 #endif
59 
60 #ifdef DIRECT_ACCESS_FAT_LINK
61 #if (DD_FAT_RECON==8)
62 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(FAT, gauge, dir, idx, stride)
63 #elif (DD_FAT_RECON==12)
64 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(FAT, gauge, dir, idx, stride)
65 #else
66 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(FAT, gauge, dir, idx, stride)
67 #endif
68 #else // texture access
69 #if (DD_FAT_RECON==8)
70 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
71 #elif (DD_FAT_RECON==12)
72 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
73 #else
74 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
75 #endif // DD_FAT_RECON
76 #endif // DIRECT_ACCESS_FAT_LINK
77 
78 #ifdef DIRECT_ACCESS_LONG_LINK
79 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(LONG, gauge, dir, idx, stride)
80 #else
81 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(LONG, gauge, dir, idx, stride)
82 #endif // DIRECT_ACCESS_LONG_LINK
83 
84 #elif (DD_PREC==1) // SINGLE PRECISION
85 
86 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE
87 #if (DD_FAT_RECON==8)
88 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE
89 #elif (DD_FAT_RECON==12)
90 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE
91 #else
92 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign)
93 #endif // DD_FAT_RECON
94 
95 #ifdef DIRECT_ACCESS_FAT_LINK
96 #if (DD_FAT_RECON==8)
97 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(FAT, gauge, dir, idx, stride)
98 #elif (DD_FAT_RECON==12)
99 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(FAT, gauge, dir, idx, stride)
100 #else
101 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(FAT, gauge, dir, idx, stride)
102 #endif // DD_FAT_RECON
103 #else
104 #if (DD_FAT_RECON==8)
105 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(FAT, gauge, dir, idx, stride)
106 #elif (DD_FAT_RECON==12)
107 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(FAT, gauge, dir, idx, stride)
108 #else
109 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(FAT, gauge, dir, idx, stride)
110 #endif // DD_FAT_RECON
111 #endif // DIRECT_ACCESS_FAT_LINK
112 
113 #ifdef DIRECT_ACCESS_LONG_LINK
114 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(LONG, gauge, dir, idx, stride)
115 #else
116 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(LONG, gauge, dir, idx, stride)
117 #endif // DIRECT_ACCESS_LONG_LINK
118 
119 #else // HALF PRECISION
120 
121 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE
122 #if (DD_FAT_RECON==8)
123 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE
124 #elif (DD_FAT_RECON==12)
125 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE
126 #else
127 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign)
128 #endif //DD_FAT_RECON
129 
130 #ifdef DIRECT_ACCESS_FAT_LINK
131 #if (DD_FAT_RECON==8)
132 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
133 #elif (DD_FAT_RECON==12)
134 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
135 #elif (DD_FAT_RECON==18)
136 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max);
137 #endif
138 #else
139 #if (DD_FAT_RECON==8)
140 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
141 #elif (DD_FAT_RECON==12)
142 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
143 #else
144 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max);
145 #endif
146 #endif // DIRECT_ACCESS_FAT_LINK
147 #ifdef DIRECT_ACCESS_LONG_LINK
148 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4(LONG, gauge, dir, idx, stride)
149 #else
150 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(LONG, gauge, dir, idx, stride)
151 #endif // DIRECT_ACCESS_LONG_LINK
152 
153 #endif // DD_PREC
154 
155 #elif (DD_LONG_RECON == 9) // reconstruct from 9 reals
156 
157 #define DD_LONG_RECON_F 9
158 
159 #if (DD_PREC==0) // DOUBLE PRECISION
160 
161 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_9_DOUBLE
162 #if (DD_FAT_RECON==8)
163 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_DOUBLE
164 #elif (DD_FAT_RECON==12)
165 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_DOUBLE
166 #else
167 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign)
168 #endif
169 
170 #ifdef DIRECT_ACCESS_FAT_LINK
171 #if (DD_FAT_RECON==8)
172 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(FAT, gauge, dir, idx, stride)
173 #elif (DD_FAT_RECON==12)
174 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(FAT, gauge, dir, idx, stride)
175 #else
176 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(FAT, gauge, dir, idx, stride)
177 #endif
178 #else // texture access
179 #if (DD_FAT_RECON==8)
180 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
181 #elif (DD_FAT_RECON==12)
182 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
183 #else
184 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
185 #endif // DD_FAT_RECON
186 #endif // DIRECT_ACCESS_FAT_LINK
187 #undef READ_LONG_PHASE
188 
189 #ifdef DIRECT_ACCESS_LONG_LINK
190 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(LONG, gauge, dir, idx, stride)
191 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_DOUBLE(PHASE, phase, dir, idx, stride);
192 #else
193 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(LONG, gauge, dir, idx, stride)
194 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_DOUBLE_TEX(PHASE, phase, dir, idx, stride);
195 #endif // DIRECT_ACCESS_LONG_LINK
196 
197 #elif (DD_PREC==1) // SINGLE PRECISION
198 
199 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_9_SINGLE
200 #if (DD_FAT_RECON==8)
201 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE
202 #elif (DD_FAT_RECON==12)
203 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE
204 #else
205 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign)
206 #endif
207 
208 #ifdef DIRECT_ACCESS_FAT_LINK
209 #if (DD_FAT_RECON==8)
210 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(FAT, gauge, dir, idx, stride)
211 #elif (DD_FAT_RECON==12)
212 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(FAT, gauge, dir, idx, stride)
213 #else
214 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(FAT, gauge, dir, idx, stride)
215 #endif
216 #else
217 #if (DD_FAT_RECON==8)
218 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(FAT, gauge, dir, idx, stride)
219 #elif (DD_FAT_RECON==12)
220 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(FAT, gauge, dir, idx, stride)
221 #else
222 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(FAT, gauge, dir, idx, stride)
223 #endif
224 #endif // DIRECT_ACCESS_FAT_LINK
225 #undef READ_LONG_PHASE
226 
227 #ifdef DIRECT_ACCESS_LONG_LINK
228 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(LONG, gauge, dir, idx, stride)
229 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_FLOAT(PHASE, phase, dir, idx, stride);
230 #else
231 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(LONG, gauge, dir, idx, stride)
232 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_FLOAT_TEX(PHASE, phase, dir, idx, stride);
233 #endif // DIRECT_ACCESS_LONG_LINK
234 
235 #else // HALF PRECISION
236 
237 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_9_SINGLE
238 #if (DD_FAT_RECON==8)
239 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE
240 #elif (DD_FAT_RECON==12)
241 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE
242 #else
243 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign)
244 #endif
245 
246 #ifdef DIRECT_ACCESS_FAT_LINK
247 #if (DD_FAT_RECON==8)
248 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
249 #elif (DD_FAT_RECON==12)
250 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
251 #elif (DD_FAT_RECON==18)
252 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max);
253 #endif
254 #else
255 #if (DD_FAT_RECON==8)
256 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
257 #elif (DD_FAT_RECON==12)
258 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
259 #else
260 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max);
261 #endif
262 #endif // DIRECT_ACCESS_FAT_LINK
263 #undef READ_LONG_PHASE
264 #ifdef DIRECT_ACCESS_LONG_LINK
265 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4(LONG, gauge, dir, idx, stride)
266 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_SHORT(PHASE, phase, dir, idx, stride);
267 #else
268 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(LONG, gauge, dir, idx, stride)
269 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_SHORT_TEX(PHASE, phase, dir, idx, stride);
270 #endif // DIRECT_ACCESS_LONG_LINK
271 
272 #endif // DD_PREC
273 
274 #elif (DD_LONG_RECON == 12)// reconstruct from 12 reals
275 
276 #define DD_LONG_RECON_F 12
277 
278 #if (DD_PREC==0) // DOUBLE PRECISION
279 
280 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_DOUBLE
281 #if (DD_FAT_RECON==8)
282 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_DOUBLE
283 #elif (DD_FAT_RECON==12)
284 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_DOUBLE
285 #else
286 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign)
287 #endif
288 
289 #ifdef DIRECT_ACCESS_FAT_LINK
290 #if (DD_FAT_RECON==8)
291 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(FAT, gauge, dir, idx, stride)
292 #elif (DD_FAT_RECON==12)
293 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(FAT, gauge, dir, idx, stride)
294 #else
295 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(FAT, gauge, dir, idx, stride)
296 #endif
297 #else // texture access
298 #if (DD_FAT_RECON==8)
299 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
300 #elif (DD_FAT_RECON==12)
301 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
302 #else
303 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
304 #endif // DD_FAT_RECON
305 #endif // DIRECT_ACCESS_FAT_LINK
306 
307 #ifdef DIRECT_ACCESS_LONG_LINK
308 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(LONG, gauge, dir, idx, stride)
309 #else
310 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(LONG, gauge, dir, idx, stride)
311 #endif // DIRECT_ACCESS_LONG_LINK
312 
313 #elif (DD_PREC==1) // SINGLE PRECISION
314 
315 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE
316 #if (DD_FAT_RECON==8)
317 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE
318 #elif (DD_FAT_RECON==12)
319 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE
320 #else
321 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign)
322 #endif
323 
324 #ifdef DIRECT_ACCESS_FAT_LINK
325 #if (DD_FAT_RECON==8)
326 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(FAT, gauge, dir, idx, stride)
327 #elif (DD_FAT_RECON==12)
328 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(FAT, gauge, dir, idx, stride)
329 #else
330 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(FAT, gauge, dir, idx, stride)
331 #endif
332 #else
333 #if (DD_FAT_RECON==8)
334 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(FAT, gauge, dir, idx, stride)
335 #elif (DD_FAT_RECON==12)
336 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(FAT, gauge, dir, idx, stride)
337 #else
338 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(FAT, gauge, dir, idx, stride)
339 #endif
340 #endif // DIRECT_ACCESS_FAT_LINK
341 
342 #ifdef DIRECT_ACCESS_LONG_LINK
343 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(LONG, gauge, dir, idx, stride)
344 #else
345 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(LONG, gauge, dir, idx, stride)
346 #endif // DIRECT_ACCESS_LONG_LINK
347 
348 #else // HALF PRECISION
349 
350 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE
351 #if (DD_FAT_RECON==8)
352 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE
353 #elif (DD_FAT_RECON==12)
354 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE
355 #else
356 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign)
357 #endif
358 
359 #ifdef DIRECT_ACCESS_FAT_LINK
360 #if (DD_FAT_RECON==8)
361 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
362 #elif (DD_FAT_RECON==12)
363 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
364 #else
365 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max);
366 #endif
367 #else
368 #if (DD_FAT_RECON==8)
369 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
370 #elif (DD_FAT_RECON==12)
371 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
372 #else
373 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max);
374 #endif
375 #endif // DIRECT_ACCESS_FAT_LINK
376 
377 #ifdef DIRECT_ACCESS_LONG_LINK
378 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4(LONG, gauge, dir, idx, stride)
379 #else
380 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(LONG, gauge, dir, idx, stride)
381 #endif // DIRECT_ACCESS_LONG_LINK
382 
383 #endif // DD_PREC
384 
385 #elif (DD_LONG_RECON == 13)
386 #define DD_LONG_RECON_F 13
387 
388 #if (DD_PREC==0) // DOUBLE PRECISION
389 
390 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_13_DOUBLE
391 #if (DD_FAT_RECON==8)
392 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_DOUBLE
393 #elif (DD_FAT_RECON==12)
394 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_DOUBLE
395 #else
396 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign)
397 #endif
398 
399 #ifdef DIRECT_ACCESS_FAT_LINK
400 #if (DD_FAT_RECON==8)
401 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(FAT, gauge, dir, idx, stride)
402 #elif (DD_FAT_RECON==12)
403 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(FAT, gauge, dir, idx, stride)
404 #else
405 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(FAT, gauge, dir, idx, stride)
406 #endif
407 #else // texture access
408 #if (DD_FAT_RECON==8)
409 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
410 #elif (DD_FAT_RECON==12)
411 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
412 #else
413 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
414 #endif // DD_FAT_RECON
415 #endif // DIRECT_ACCESS_FAT_LINK
416 
417 #undef READ_LONG_PHASE
418 #ifdef DIRECT_ACCESS_LONG_LINK
419 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(LONG, gauge, dir, idx, stride)
420 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_DOUBLE(PHASE, phase, dir, idx, stride);
421 #else
422 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(LONG, gauge, dir, idx, stride)
423 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_DOUBLE_TEX(PHASE, phase, dir, idx, stride);
424 #endif // DIRECT_ACCESS_LONG_LINK
425 
426 #elif (DD_PREC==1) // SINGLE PRECISION
427 
428 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_13_SINGLE
429 #if (DD_FAT_RECON==8)
430 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE
431 #elif (DD_FAT_RECON==12)
432 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE
433 #else
434 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign)
435 #endif
436 
437 #ifdef DIRECT_ACCESS_FAT_LINK
438 #if (DD_FAT_RECON==8)
439 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(FAT, gauge, dir, idx, stride)
440 #elif (DD_FAT_RECON==12)
441 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(FAT, gauge, dir, idx, stride)
442 #else
443 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(FAT, gauge, dir, idx, stride)
444 #endif
445 #else
446 #if (DD_FAT_RECON==8)
447 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(FAT, gauge, dir, idx, stride)
448 #elif (DD_FAT_RECON==12)
449 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(FAT, gauge, dir, idx, stride)
450 #else
451 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(FAT, gauge, dir, idx, stride)
452 #endif
453 #endif // DIRECT_ACCESS_FAT_LINK
454 
455 #undef READ_LONG_PHASE
456 #ifdef DIRECT_ACCESS_LONG_LINK
457 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(LONG, gauge, dir, idx, stride)
458 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_FLOAT(PHASE, phase, dir, idx, stride);
459 #else
460 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(LONG, gauge, dir, idx, stride)
461 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_FLOAT_TEX(PHASE, phase, dir, idx, stride);
462 #endif // DIRECT_ACCESS_LONG_LINK
463 
464 #else // HALF PRECISION
465 
466 #define RECONSTRUCT_LONG_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_13_SINGLE
467 #if (DD_FAT_RECON==8)
468 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE
469 #elif (DD_FAT_RECON==12)
470 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE
471 #else
472 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign)
473 #endif
474 
475 #ifdef DIRECT_ACCESS_FAT_LINK
476 #if (DD_FAT_RECON==8)
477 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
478 #elif (DD_FAT_RECON==12)
479 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
480 #else
481 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max);
482 #endif
483 #else
484 #if (DD_FAT_RECON==8)
485 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
486 #elif (DD_FAT_RECON==12)
487 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
488 #else
489 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max);
490 #endif
491 #endif
492 #undef READ_LONG_PHASE
493 #ifdef DIRECT_ACCESS_LONG_LINK
494 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4(LONG, gauge, dir, idx, stride)
495 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_SHORT(PHASE, phase, dir, idx, stride);
496 #else
497 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(LONG, gauge, dir, idx, stride)
498 #define READ_LONG_PHASE(phase, dir, idx, stride) READ_GAUGE_PHASE_SHORT_TEX(PHASE, phase, dir, idx, stride);
499 #endif // DIRECT_ACCESS_LONG_LINK
500 
501 #endif // DD_PREC
502 
503 #else //18 reconstruct
504 #define DD_LONG_RECON_F 18
505 #define RECONSTRUCT_LONG_GAUGE_MATRIX(dir, gauge, idx, sign)
506 
507 #if (DD_PREC==0) // DOUBLE PRECISION
508 
509 #if (DD_FAT_RECON==8)
510 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_DOUBLE
511 #elif (DD_FAT_RECON==12)
512 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_DOUBLE
513 #else
514 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign)
515 #endif
516 
517 #ifdef DIRECT_ACCESS_FAT_LINK
518 #if (DD_FAT_RECON==8)
519 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2(FAT, gauge, dir, idx, stride)
520 #elif (DD_FAT_RECON==12)
521 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2(FAT, gauge, dir, idx, stride)
522 #else
523 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(FAT, gauge, dir, idx, stride)
524 #endif
525 #else // texture access
526 #if (DD_FAT_RECON==8)
527 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
528 #elif (DD_FAT_RECON==12)
529 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
530 #else
531 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(FAT, gauge, dir, idx, stride)
532 #endif // DD_FAT_RECON
533 #endif // DIRECT_ACCESS_FAT_LINK
534 
535 #ifdef DIRECT_ACCESS_LONG_LINK
536 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2(LONG, gauge, dir, idx, stride)
537 #else
538 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_DOUBLE2_TEX(LONG, gauge, dir, idx, stride)
539 #endif // DIRECT_ACCESS_LONG_LINK
540 
541 #elif (DD_PREC==1) // SINGLE PRECISION
542 
543 #if (DD_FAT_RECON==8)
544 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE
545 #elif (DD_FAT_RECON==12)
546 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE
547 #else
548 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign)
549 #endif
550 
551 #ifdef DIRECT_ACCESS_FAT_LINK
552 #if (DD_FAT_RECON==8)
553 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4(FAT, gauge, dir, idx, stride)
554 #elif (DD_FAT_RECON==12)
555 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4(FAT, gauge, dir, idx, stride)
556 #else
557 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(FAT, gauge, dir, idx, stride)
558 #endif
559 #else
560 #if (DD_FAT_RECON==8)
561 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_FLOAT4_TEX(FAT, gauge, dir, idx, stride)
562 #elif (DD_FAT_RECON==12)
563 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_FLOAT4_TEX(FAT, gauge, dir, idx, stride)
564 #else
565 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(FAT, gauge, dir, idx, stride)
566 #endif
567 #endif // DIRECT_ACCESS_FAT_LINK
568 
569 #ifdef DIRECT_ACCESS_LONG_LINK
570 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2(LONG, gauge, dir, idx, stride)
571 #else
572 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_FLOAT2_TEX(LONG, gauge, dir, idx, stride)
573 #endif // DIRECT_ACCESS_LONG_LINK
574 
575 #else // HALF PRECISION
576 
577 #if (DD_FAT_RECON==8)
578 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_8_SINGLE
579 #elif (DD_FAT_RECON==12)
580 #define RECONSTRUCT_FAT_GAUGE_MATRIX RECONSTRUCT_GAUGE_MATRIX_12_SINGLE
581 #else
582 #define RECONSTRUCT_FAT_GAUGE_MATRIX(dir, gauge, idx, sign)
583 #endif
584 
585 #ifdef DIRECT_ACCESS_FAT_LINK
586 
587 #if (DD_FAT_RECON==8)
588 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
589 #elif (DD_FAT_RECON==12)
590 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
591 #else
592 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max);
593 #endif
594 #else
595 
596 #if (DD_FAT_RECON==8)
597 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_8_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
598 #elif (DD_FAT_RECON==12)
599 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_12_SHORT4_TEX(FAT, gauge, dir, idx, stride); RESCALE4(FAT, fat_link_max);
600 #else
601 #define READ_FAT_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(FAT, gauge, dir, idx, stride); RESCALE2(FAT, fat_link_max);
602 #endif
603 #endif
604 
605 #ifdef DIRECT_ACCESS_LONG_LINK
606 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2(LONG, gauge, dir, idx, stride)
607 #else
608 #define READ_LONG_MATRIX(gauge, dir, idx, stride) READ_GAUGE_MATRIX_18_SHORT2_TEX(LONG, gauge, dir, idx, stride)
609 #endif // DIRECT_ACCESS_LONG_LINK
610 
611 #endif // DD_PREC
612 
613 #endif // DD_LONG_RECON
614 
615 #if (DD_PREC==0) // double-precision fields
616 
617 // gauge field
618 #define DD_PREC_F D
619 #if (defined DIRECT_ACCESS_FAT_LINK) || (defined FERMI_NO_DBLE_TEX)
620 #define FATLINK0TEX param.gauge0
621 #define FATLINK1TEX param.gauge1
622 #else
623 #ifdef USE_TEXTURE_OBJECTS
624 #define FATLINK0TEX param.gauge0Tex
625 #define FATLINK1TEX param.gauge1Tex
626 #else
627 #if (DD_IMPROVED == 1)
628 #define FATLINK0TEX fatGauge0TexDouble
629 #define FATLINK1TEX fatGauge1TexDouble
630 #else
631 #define FATLINK0TEX gauge0TexDouble2
632 #define FATLINK1TEX gauge1TexDouble2
633 #endif
634 #endif // USE_TEXTURE_OBJECTS
635 #endif
636 
637 #if (defined DIRECT_ACCESS_LONG_LINK) || (defined FERMI_NO_DBLE_TEX)
638 #define LONGLINK0TEX param.longGauge0
639 #define LONGLINK1TEX param.longGauge1
640 #define LONGPHASE0TEX param.longPhase0
641 #define LONGPHASE1TEX param.longPhase1
642 #else
643 #ifdef USE_TEXTURE_OBJECTS
644 #define LONGLINK0TEX param.longGauge0Tex
645 #define LONGLINK1TEX param.longGauge1Tex
646 #define LONGPHASE0TEX param.longPhase0Tex
647 #define LONGPHASE1TEX param.longPhase1Tex
648 #else
649 #define LONGLINK0TEX longGauge0TexDouble
650 #define LONGLINK1TEX longGauge1TexDouble
651 #define LONGPHASE0TEX longPhase0TexDouble
652 #define LONGPHASE1TEX longPhase1TexDouble
653 #endif // USE_TEXTURE_OBJECTS
654 #endif
655 
656 #define GAUGE_DOUBLE
657 
658 // spinor fields
659 #if (defined DIRECT_ACCESS_SPINOR) || (defined FERMI_NO_DBLE_TEX)
660 #define SPINORTEX param.in
661 #define GHOSTSPINORTEX param.ghost
662 #define READ_1ST_NBR_SPINOR READ_1ST_NBR_SPINOR_DOUBLE
663 #define READ_3RD_NBR_SPINOR READ_KS_NBR_SPINOR_DOUBLE
664 #define READ_1ST_NBR_SPINOR_GHOST READ_1ST_NBR_SPINOR_GHOST_DOUBLE
665 #define READ_3RD_NBR_SPINOR_GHOST READ_KS_NBR_SPINOR_GHOST_DOUBLE
666 #else
667 #ifdef USE_TEXTURE_OBJECTS
668 #define SPINORTEX param.inTex
669 #define GHOSTSPINORTEX param.ghostTex
670 #else
671 #define SPINORTEX spinorTexDouble
672 #define GHOSTSPINORTEX ghostSpinorTexDouble
673 #endif // USE_TEXTURE_OBJECTS
674 #define READ_1ST_NBR_SPINOR READ_1ST_NBR_SPINOR_DOUBLE_TEX
675 #define READ_3RD_NBR_SPINOR READ_KS_NBR_SPINOR_DOUBLE_TEX
676 #define READ_1ST_NBR_SPINOR_GHOST READ_1ST_NBR_SPINOR_GHOST_DOUBLE_TEX
677 #define READ_3RD_NBR_SPINOR_GHOST READ_KS_NBR_SPINOR_GHOST_DOUBLE_TEX
678 #endif
679 #if (defined DIRECT_ACCESS_INTER) || (defined FERMI_NO_DBLE_TEX)
680 #define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR
681 #define INTERTEX param.out
682 #else
683 #define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR_DOUBLE_TEX
684 #ifdef USE_TEXTURE_OBJECTS
685 #define INTERTEX param.outTex
686 #else
687 #define INTERTEX interTexDouble
688 #endif
689 #endif
690 #define WRITE_SPINOR WRITE_ST_SPINOR_DOUBLE2
691 #define SPINOR_DOUBLE
692 #if (DD_AXPY==1)
693 #if (defined DIRECT_ACCESS_ACCUM) || (defined FERMI_NO_DBLE_TEX)
694 #define ACCUMTEX param.x
695 #define READ_ACCUM READ_ST_ACCUM_DOUBLE
696 #else
697 #ifdef USE_TEXTURE_OBJECTS
698 #define ACCUMTEX param.xTex
699 #else
700 #define ACCUMTEX accumTexDouble
701 #endif // USE_TEXTURE_OBJECTS
702 #define READ_ACCUM READ_ST_ACCUM_DOUBLE_TEX
703 #endif
704 #endif // DD_AXPY
705 
706 
707 #elif (DD_PREC==1) // single-precision fields
708 
709 // gauge fields
710 #define DD_PREC_F S
711 
712 #ifndef DIRECT_ACCESS_FAT_LINK
713 #ifdef USE_TEXTURE_OBJECTS
714 #define FATLINK0TEX param.gauge0Tex
715 #define FATLINK1TEX param.gauge1Tex
716 #else
717 #if (DD_IMPROVED == 1)
718 #define FATLINK0TEX fatGauge0TexSingle
719 #define FATLINK1TEX fatGauge1TexSingle
720 #else
721 #if (DD_FAT_RECON == 18)
722 #define FATLINK0TEX gauge0TexSingle2
723 #define FATLINK1TEX gauge1TexSingle2
724 #else
725 #define FATLINK0TEX gauge0TexSingle4
726 #define FATLINK1TEX gauge1TexSingle4
727 #endif
728 #endif // DD_IMPROVED
729 #endif
730 #else
731 #define FATLINK0TEX param.gauge0
732 #define FATLINK1TEX param.gauge1
733 #endif
734 
735 #ifndef DIRECT_ACCESS_LONG_LINK //longlink access
736 #ifdef USE_TEXTURE_OBJECTS
737 #define LONGLINK0TEX param.longGauge0Tex
738 #define LONGLINK1TEX param.longGauge1Tex
739 #define LONGPHASE0TEX param.longPhase0Tex
740 #define LONGPHASE1TEX param.longPhase1Tex
741 #else
742 #if (DD_LONG_RECON ==18)
743 #define LONGLINK0TEX longGauge0TexSingle_norecon
744 #define LONGLINK1TEX longGauge1TexSingle_norecon
745 #else
746 #define LONGLINK0TEX longGauge0TexSingle
747 #define LONGLINK1TEX longGauge1TexSingle
748 #define LONGPHASE0TEX longPhase0TexSingle
749 #define LONGPHASE1TEX longPhase1TexSingle
750 #endif
751 #endif // USE_TEXTURE_OBJECTS
752 #else
753 #define LONGLINK0TEX param.longGauge0
754 #define LONGLINK1TEX param.longGauge1
755 #define LONGPHASE0TEX param.longPhase0
756 #define LONGPHASE1TEX param.longPhase1
757 #endif
758 
759 // spinor fields
760 #ifndef DIRECT_ACCESS_SPINOR
761 #ifdef USE_TEXTURE_OBJECTS
762 #define SPINORTEX param.inTex
763 #define GHOSTSPINORTEX param.ghostTex
764 #else
765 #define SPINORTEX spinorTexSingle2
766 #define GHOSTSPINORTEX ghostSpinorTexSingle2
767 #endif // USE_TEXTURE_OBJECTS
768 #define READ_1ST_NBR_SPINOR READ_1ST_NBR_SPINOR_SINGLE_TEX
769 #define READ_3RD_NBR_SPINOR READ_KS_NBR_SPINOR_SINGLE_TEX
770 #define READ_1ST_NBR_SPINOR_GHOST READ_1ST_NBR_SPINOR_GHOST_SINGLE_TEX
771 #define READ_3RD_NBR_SPINOR_GHOST READ_KS_NBR_SPINOR_GHOST_SINGLE_TEX
772 #else
773 #define SPINORTEX param.in
774 #define GHOSTSPINORTEX param.ghost
775 #define READ_1ST_NBR_SPINOR READ_1ST_NBR_SPINOR_SINGLE
776 #define READ_3RD_NBR_SPINOR READ_KS_NBR_SPINOR_SINGLE
777 #define READ_1ST_NBR_SPINOR_GHOST READ_1ST_NBR_SPINOR_GHOST_SINGLE
778 #define READ_3RD_NBR_SPINOR_GHOST READ_KS_NBR_SPINOR_GHOST_SINGLE
779 #endif
780 #if (defined DIRECT_ACCESS_INTER)
781 #define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR
782 #define INTERTEX param.out
783 #else
784 #define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR_SINGLE_TEX
785 #ifdef USE_TEXTURE_OBJECTS
786 #define INTERTEX param.outTex
787 #else
788 #define INTERTEX interTexSingle2
789 #endif // USE_TEXTURE_OBJECTS
790 #endif
791 #define WRITE_SPINOR WRITE_ST_SPINOR_FLOAT2
792 #if (DD_AXPY==1)
793 #if (defined DIRECT_ACCESS_ACCUM)
794 #define ACCUMTEX param.x
795 #define READ_ACCUM READ_ST_ACCUM_SINGLE
796 #else
797 #ifdef USE_TEXTURE_OBJECTS
798 #define ACCUMTEX param.xTex
799 #else
800 #define ACCUMTEX accumTexSingle2
801 #endif // USE_TEXTURE_OBJECTS
802 #define READ_ACCUM READ_ST_ACCUM_SINGLE_TEX
803 #endif
804 #endif // DD_AXPY
805 
806 
807 #else // half-precision fields
808 
809 // gauge fields
810 #define DD_PREC_F H
811 
812 #ifndef DIRECT_ACCESS_FAT_LINK
813 #ifdef USE_TEXTURE_OBJECTS
814 #define FATLINK0TEX param.gauge0Tex
815 #define FATLINK1TEX param.gauge1Tex
816 #else
817 #if (DD_IMPROVED == 1)
818 #define FATLINK0TEX fatGauge0TexHalf
819 #define FATLINK1TEX fatGauge1TexHalf
820 #else
821 #if (DD_FAT_RECON == 18)
822 #define FATLINK0TEX gauge0TexHalf2
823 #define FATLINK1TEX gauge1TexHalf2
824 #else
825 #define FATLINK0TEX gauge0TexHalf4
826 #define FATLINK1TEX gauge1TexHalf4
827 #endif
828 #endif // DD_IMPROVED
829 #endif // USE_TEXTURE_OBJECTS
830 #else // DIRECT_ACCESS_FAT_LINK
831 #define FATLINK0TEX param.gauge0
832 #define FATLINK1TEX param.gauge1
833 #endif
834 
835 #ifndef DIRECT_ACCESS_LONG_LINK
836 #ifdef USE_TEXTURE_OBJECTS
837 #define LONGLINK0TEX param.longGauge0Tex
838 #define LONGLINK1TEX param.longGauge1Tex
839 #define LONGPHASE0TEX param.longPhase0Tex
840 #define LONGPHASE1TEX param.longPhase1Tex
841 #else
842 #if (DD_LONG_RECON ==18)
843 #define LONGLINK0TEX longGauge0TexHalf_norecon
844 #define LONGLINK1TEX longGauge1TexHalf_norecon
845 #else
846 #define LONGLINK0TEX longGauge0TexHalf
847 #define LONGLINK1TEX longGauge1TexHalf
848 #define LONGPHASE0TEX longPhase0TexHalf
849 #define LONGPHASE1TEX longPhase1TexHalf
850 #endif
851 #endif // USE_TEXTURE_OBJECTS
852 #else // DIRECT_ACCESS_LONG_LINK
853 #define LONGLINK0TEX param.longGauge0
854 #define LONGLINK1TEX param.longGauge1
855 #define LONGPHASE0TEX param.longPhase0
856 #define LONGPHASE1TEX param.longPhase1
857 #endif
858 
859 #define READ_1ST_NBR_SPINOR READ_1ST_NBR_SPINOR_HALF_TEX
860 #define READ_3RD_NBR_SPINOR READ_KS_NBR_SPINOR_HALF_TEX
861 #define READ_1ST_NBR_SPINOR_GHOST READ_1ST_NBR_SPINOR_GHOST_HALF_TEX
862 #define READ_3RD_NBR_SPINOR_GHOST READ_KS_NBR_SPINOR_GHOST_HALF_TEX
863 #ifdef USE_TEXTURE_OBJECTS
864 #define SPINORTEX param.inTex
865 #define GHOSTSPINORTEX param.ghostTex
866 #else
867 #define SPINORTEX spinorTexHalf2
868 #define GHOSTSPINORTEX ghostSpinorTexHalf2
869 #endif // USE_TEXTURE_OBJECTS
870 #if (defined DIRECT_ACCESS_INTER)
871 #define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR_HALF
872 #define INTERTEX param.out
873 #else
874 #define READ_AND_SUM_SPINOR READ_AND_SUM_ST_SPINOR_HALF_TEX
875 #ifdef USE_TEXTURE_OBJECTS
876 #define INTERTEX param.outTex
877 #else
878 #define INTERTEX interTexHalf2
879 #endif // USE_TEXTURE_OBJECTS
880 #endif
881 #define WRITE_SPINOR WRITE_ST_SPINOR_SHORT2
882 #if (DD_AXPY==1)
883 #ifdef USE_TEXTURE_OBJECTS
884 #define ACCUMTEX param.xTex
885 #else
886 #define ACCUMTEX accumTexHalf2
887 #endif // USE_TEXTURE_OBJECTS
888 #define READ_ACCUM READ_ST_ACCUM_HALF_TEX
889 #endif // DD_AXPY
890 
891 #endif
892 
893 #ifdef GPU_STAGGERED_DIRAC
894 
895 // define the kernel
896 
897 #if (DD_IMPROVED==1)
898 
899 #define DD_CONCAT(n,p,r1,r2,x) n ## p ## r1 ## r2 ## x ## Kernel
900 #define DD_FUNC(n,p,r1,r2,x) DD_CONCAT(n,p,r1,r2,x)
901 
902 template <KernelType kernel_type>
904 #if defined(GPU_STAGGERED_DIRAC) && DD_FAT_RECON == 18 // improved staggered only supports no reconstruct fat-links
905  #include "staggered_dslash_core.h"
906 #endif
907 }
908 
909 #ifdef MULTI_GPU
910 template <>
912 #if defined(GPU_STAGGERED_DIRAC) && DD_FAT_RECON == 18 // improved staggered only supports no reconstruct fat-links
914 #endif
915 }
916 
917 #endif // MULTI_GPU
918 
919 #else // naive staggered kernel
920 
921 #undef READ_LONG_MATRIX
922 #define READ_LONG_MATRIX(gauge, dir, idx, stride)
923 
924 #undef READ_LONG_PHASE
925 #define READ_LONG_PHASE(phase, dir, idx, stride)
926 
927 #define DD_CONCAT(n,p,r,x) n ## p ## r ## x ## Kernel
928 #define DD_FUNC(n,p,r,x) DD_CONCAT(n,p,r,x)
929 
930 #if (DD_LONG_RECON == 18) // avoid kernel aliasing over non-existant long-links
931 
932 template <KernelType kernel_type>
934 #if defined(GPU_STAGGERED_DIRAC) && DD_FAT_RECON != 9 && DD_FAT_RECON != 13
935  #include "staggered_dslash_core.h"
936 #endif
937 }
938 
939 #ifdef MULTI_GPU
940 template <>
942 #if defined(GPU_STAGGERED_DIRAC) && DD_FAT_RECON != 9 && DD_FAT_RECON != 13
944 #endif
945 }
946 #endif // MULTI_GPU
947 
948 #endif
949 
950 
951 #endif
952 
953 #endif // ! GPU_STAGGERED_DIRAC
954 
955 // clean up
956 
957 #undef DD_PREC_F
958 #undef DD_FAT_RECON_F
959 #undef DD_LONG_RECON_F
960 #undef DD_AXPY_F
961 #undef DD_FNAME
962 #undef DD_CONCAT
963 #undef DD_FUNC
964 
965 #undef DSLASH_AXPY
966 #undef READ_GAUGE_MATRIX
967 #undef RECONSTRUCT_FAT_GAUGE_MATRIX
968 #undef RECONSTRUCT_LONG_GAUGE_MATRIX
969 #undef FATLINK0TEX
970 #undef FATLINK1TEX
971 #undef LONGLINK0TEX
972 #undef LONGLINK1TEX
973 #undef LONGPHASE0TEX
974 #undef LONGPHASE1TEX
975 #undef SPINORTEX
976 #undef GHOSTSPINORTEX
977 #undef WRITE_SPINOR
978 #undef READ_AND_SUM_SPINOR
979 #undef INTERTEX
980 #undef ACCUMTEX
981 #undef READ_ACCUM
982 #undef CLOVERTEX
983 #undef READ_CLOVER
984 #undef DSLASH_CLOVER
985 #undef GAUGE_DOUBLE
986 #undef SPINOR_DOUBLE
987 #undef CLOVER_DOUBLE
988 #undef READ_FAT_MATRIX
989 #undef READ_LONG_MATRIX
990 #undef READ_LONG_PHASE
991 #undef READ_1ST_NBR_SPINOR
992 #undef READ_3RD_NBR_SPINOR
993 #undef READ_1ST_NBR_SPINOR_GHOST
994 #undef READ_3RD_NBR_SPINOR_GHOST
995 
996 
997 // prepare next set of options, or clean up after final iteration
998 
999 #if (DD_AXPY==0)
1000 #undef DD_AXPY
1001 #define DD_AXPY 1
1002 #else
1003 #undef DD_AXPY
1004 #define DD_AXPY 0
1005 
1006 #if (DD_LONG_RECON==8)
1007 #undef DD_LONG_RECON
1008 #define DD_LONG_RECON 9
1009 #elif (DD_LONG_RECON==9)
1010 #undef DD_LONG_RECON
1011 #define DD_LONG_RECON 12
1012 #elif (DD_LONG_RECON==12)
1013 #undef DD_LONG_RECON
1014 #define DD_LONG_RECON 13
1015 #elif (DD_LONG_RECON==13)
1016 #undef DD_LONG_RECON
1017 #define DD_LONG_RECON 18
1018 #else
1019 #undef DD_LONG_RECON
1020 
1021 #define DD_LONG_RECON 8
1022 
1023 #if (DD_FAT_RECON==8)
1024 #undef DD_FAT_RECON
1025 #define DD_FAT_RECON 9 // dummy
1026 #elif (DD_FAT_RECON==9)
1027 #undef DD_FAT_RECON
1028 #define DD_FAT_RECON 12
1029 #elif (DD_FAT_RECON==12)
1030 #undef DD_FAT_RECON
1031 #define DD_FAT_RECON 13 //dummy
1032 #elif (DD_FAT_RECON==13)
1033 #undef DD_FAT_RECON
1034 #define DD_FAT_RECON 18
1035 #else
1036 #undef DD_FAT_RECON
1037 
1038 #define DD_FAT_RECON 8
1039 
1040 #if (DD_PREC==0)
1041 #undef DD_PREC
1042 #define DD_PREC 1
1043 #elif (DD_PREC==1)
1044 #undef DD_PREC
1045 #define DD_PREC 2
1046 #else
1047 #undef DD_PREC
1048 #define DD_PREC 0
1049 
1050 #undef DD_LOOP
1051 #undef DD_AXPY
1052 #undef DD_LONG_RECON
1053 #undef DD_PREC
1054 
1055 #endif // DD_PREC
1056 #endif // DD_FAT_RECON
1057 #endif // DD_LONG_RECON
1058 #endif // DD_AXPY
1059 
1060 #ifdef DD_LOOP
1061 #include "staggered_dslash_def.h"
1062 #endif
#define DD_LONG_RECON_F
#define DD_FUNC(n, p, r, d, x)
#define DD_AXPY_F
#define DD_FAT_RECON_F
QudaGaugeParam param
Definition: pack_test.cpp:17
#define DD_FNAME
#define DD_PREC_F