QUDA  0.9.0
dw_dslash5inv_def.h
Go to the documentation of this file.
1 // tm_dslash_def.h - Twisted Mass Dslash kernel definitions
2 
3 // There are currently 36 different variants of the Twisted Mass
4 // Wilson Dslash kernel, each one characterized by a set of 5 options,
5 // where each option can take one of several values (3*2*2*3 = 36).
6 // This file is structured so that the C preprocessor loops through all 36
7 // variants (in a manner resembling a counter), sets the appropriate
8 // macros, and defines the corresponding functions.
9 //
10 // As an example of the function naming conventions, consider
11 //
12 // twistedMassDslash12DaggerXpayKernel(float4* out, ...).
13 //
14 // This is a twisted mass Dslash^dagger kernel where the result is
15 // multiplied by "a" and summed with an input vector (Xpay), and the
16 // gauge matrix is reconstructed from 12 real numbers. More
17 // generally, each function name is given by the concatenation of the
18 // following 4 fields, with "Kernel" at the end:
19 //
20 // DD_NAME_F = twistedMassDslash
21 // DD_RECON_F = 8, 12, 18
22 // DD_DAG_F = Dagger, [blank]
23 // DD_XPAY_F = Xpay, [blank]
24 //
25 // In addition, the kernels are templated on the precision of the
26 // fields (double, single, or half).
27 
28 // initialize on first iteration
29 
30 #ifndef DD_LOOP
31 #define DD_LOOP
32 #define DD_DAG 0
33 #define DD_XPAY 0
34 #define DD_RECON 0
35 #define DD_PREC 0
36 #endif
37 
38 // set options for current iteration
39 
40 #define DD_NAME_F domainWallDslash5inv
41 
42 #if (DD_DAG==0) // no dagger
43 #define DD_DAG_F
44 #else // dagger
45 #define DD_DAG_F Dagger
46 #endif
47 
48 #if (DD_XPAY==0) // no xpay
49 #define DD_XPAY_F
50 #else
51 #define DSLASH_XPAY
52 #define DD_XPAY_F Xpay
53 #endif
54 
55 #if (DD_PREC == 0)
56 #define DD_PREC_F D
57 #elif (DD_PREC == 1)
58 #define DD_PREC_F S
59 #else
60 #define DD_PREC_F H
61 #endif
62 
63 #if (DD_RECON==0) // reconstruct from 8 reals
64 #define DD_RECON_F 8
65 
66 #if (DD_PREC==0)
67 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE
68 #ifdef DIRECT_ACCESS_LINK
69 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_DOUBLE2
70 #else
71 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_DOUBLE2_TEX
72 #endif // DIRECT_ACCESS_LINK
73 
74 #elif (DD_PREC==1)
75 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
76 #ifdef DIRECT_ACCESS_LINK
77 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_FLOAT4
78 #else
79 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_FLOAT4_TEX
80 #endif // DIRECT_ACCESS_LINK
81 
82 #else
83 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
84 #ifdef DIRECT_ACCESS_LINK
85 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_SHORT4
86 #else
87 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_SHORT4_TEX
88 #endif // DIRECT_ACCESS_LINK
89 #endif // DD_PREC
90 #elif (DD_RECON==1) // reconstruct from 12 reals
91 #define DD_RECON_F 12
92 
93 #if (DD_PREC==0)
94 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE
95 #ifdef DIRECT_ACCESS_LINK
96 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_DOUBLE2
97 #else
98 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_DOUBLE2_TEX
99 #endif // DIRECT_ACCESS_LINK
100 
101 #elif (DD_PREC==1)
102 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
103 #ifdef DIRECT_ACCESS_LINK
104 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_FLOAT4
105 #else
106 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_FLOAT4_TEX
107 #endif // DIRECT_ACCESS_LINK
108 
109 #else
110 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
111 #ifdef DIRECT_ACCESS_LINK
112 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_SHORT4
113 #else
114 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_SHORT4_TEX
115 #endif // DIRECT_ACCESS_LINK
116 #endif // DD_PREC
117 #else // no reconstruct, load all components
118 #define DD_RECON_F 18
119 #define GAUGE_FLOAT2
120 #if (DD_PREC==0)
121 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE
122 #ifdef DIRECT_ACCESS_LINK
123 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_DOUBLE2
124 #else
125 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_DOUBLE2_TEX
126 #endif // DIRECT_ACCESS_LINK
127 
128 #elif (DD_PREC==1)
129 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
130 #ifdef DIRECT_ACCESS_LINK
131 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_FLOAT2
132 #else
133 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_FLOAT2_TEX
134 #endif // DIRECT_ACCESS_LINK
135 
136 #else
137 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
138 #ifdef DIRECT_ACCESS_LINK
139 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_SHORT2
140 #else
141 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_SHORT2_TEX
142 #endif //DIRECT_ACCESS_LINK
143 #endif
144 #endif
145 
146 #if (DD_PREC==0) // double-precision fields
147 
148 #define TPROJSCALE param.tProjScale
149 
150 // double-precision gauge field
151 #if (defined DIRECT_ACCESS_LINK) || (defined FERMI_NO_DBLE_TEX)
152 #define GAUGE0TEX param.gauge0
153 #define GAUGE1TEX param.gauge1
154 #else
155 #ifdef USE_TEXTURE_OBJECTS
156 #define GAUGE0TEX param.gauge0Tex
157 #define GAUGE1TEX param.gauge1Tex
158 #else
159 #define GAUGE0TEX gauge0TexDouble2
160 #define GAUGE1TEX gauge1TexDouble2
161 #endif // USE_TEXTURE_OBJECTS
162 #endif
163 
164 #define GAUGE_FLOAT2
165 
166 // double-precision spinor fields
167 #if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX)
168 #define READ_SPINOR READ_SPINOR_DOUBLE
169 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP
170 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN
171 #define SPINORTEX param.in
172 #else
173 #define READ_SPINOR READ_SPINOR_DOUBLE_TEX
174 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP_TEX
175 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN_TEX
176 #ifdef USE_TEXTURE_OBJECTS
177 #define SPINORTEX param.inTex
178 #else
179 #define SPINORTEX spinorTexDouble
180 #endif // USE_TEXTURE_OBJECTS
181 #endif
182 #if (defined DIRECT_ACCESS_WILSON_INTER) || (defined FERMI_NO_DBLE_TEX)
183 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE
184 #define INTERTEX param.out
185 #else
186 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE_TEX
187 #ifdef USE_TEXTURE_OBJECTS
188 #define INTERTEX param.outTex
189 #else
190 #define INTERTEX interTexDouble
191 #endif
192 #endif
193 #define WRITE_SPINOR WRITE_SPINOR_DOUBLE2
194 #define SPINOR_DOUBLE
195 #if (DD_XPAY==1)
196 #if (defined DIRECT_ACCESS_WILSON_ACCUM) || (defined FERMI_NO_DBLE_TEX)
197 #define ACCUMTEX param.x
198 #define READ_ACCUM READ_ACCUM_DOUBLE
199 #else
200 #ifdef USE_TEXTURE_OBJECTS
201 #define ACCUMTEX param.xTex
202 #else
203 #define ACCUMTEX accumTexDouble
204 #endif // USE_TEXTURE_OBJECTS
205 #define READ_ACCUM READ_ACCUM_DOUBLE_TEX
206 #endif
207 
208 #endif
209 
210 #define SPINOR_HOP 12
211 
212 #elif (DD_PREC==1) // single-precision fields
213 
214 #define TPROJSCALE param.tProjScale_f
215 
216 // single-precision gauge field
217 #ifdef DIRECT_ACCESS_LINK
218 #define GAUGE0TEX param.gauge0
219 #define GAUGE1TEX param.gauge1
220 #else
221 #ifdef USE_TEXTURE_OBJECTS
222 #define GAUGE0TEX param.gauge0Tex
223 #define GAUGE1TEX param.gauge1Tex
224 #else
225 #if (DD_RECON_F == 18)
226 #define GAUGE0TEX gauge0TexSingle2
227 #define GAUGE1TEX gauge1TexSingle2
228 #else
229 #define GAUGE0TEX gauge0TexSingle4
230 #define GAUGE1TEX gauge1TexSingle4
231 #endif
232 #endif // USE_TEXTURE_OBJECTS
233 #endif
234 
235 
236 // single-precision spinor fields
237 #ifdef DIRECT_ACCESS_WILSON_SPINOR
238 #define READ_SPINOR READ_SPINOR_SINGLE
239 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP
240 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN
241 #define SPINORTEX param.in
242 #else
243 #define READ_SPINOR READ_SPINOR_SINGLE_TEX
244 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP_TEX
245 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN_TEX
246 #ifdef USE_TEXTURE_OBJECTS
247 #define SPINORTEX param.inTex
248 #else
249 #define SPINORTEX spinorTexSingle
250 #endif // USE_TEXTURE_OBJECTS
251 #endif
252 #ifdef DIRECT_ACCESS_WILSON_INTER
253 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE
254 #define INTERTEX param.out
255 #else
256 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE_TEX
257 #ifdef USE_TEXTURE_OBJECTS
258 #define INTERTEX param.outTex
259 #else
260 #define INTERTEX interTexSingle
261 #endif // USE_TEXTURE_OBJECTS
262 #endif
263 #define WRITE_SPINOR WRITE_SPINOR_FLOAT4
264 #if (DD_XPAY==1)
265 #ifdef DIRECT_ACCESS_WILSON_ACCUM
266 #define ACCUMTEX param.x
267 #define READ_ACCUM READ_ACCUM_SINGLE
268 #else
269 #ifdef USE_TEXTURE_OBJECTS
270 #define ACCUMTEX param.xTex
271 #else
272 #define ACCUMTEX accumTexSingle
273 #endif // USE_TEXTURE_OBJECTS
274 #define READ_ACCUM READ_ACCUM_SINGLE_TEX
275 #endif
276 #endif
277 
278 #define SPINOR_HOP 6
279 
280 #else // half-precision fields
281 
282 #define TPROJSCALE param.tProjScale_f
283 
284 // half-precision gauge field
285 #ifdef DIRECT_ACCESS_LINK
286 #define GAUGE0TEX param.gauge0
287 #define GAUGE1TEX param.gauge1
288 #else
289 #ifdef USE_TEXTURE_OBJECTS
290 #define GAUGE0TEX param.gauge0Tex
291 #define GAUGE1TEX param.gauge1Tex
292 #else
293 #if (DD_RECON_F == 18)
294 #define GAUGE0TEX gauge0TexHalf2
295 #define GAUGE1TEX gauge1TexHalf2
296 #else
297 #define GAUGE0TEX gauge0TexHalf4
298 #define GAUGE1TEX gauge1TexHalf4
299 #endif
300 #endif // USE_TEXTURE_OBJECTS
301 #endif
302 
303 
304 // half-precision spinor fields
305 #ifdef DIRECT_ACCESS_WILSON_SPINOR
306 #define READ_SPINOR READ_SPINOR_HALF
307 #define READ_SPINOR_UP READ_SPINOR_HALF_UP
308 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN
309 #define SPINORTEX param.in
310 #else
311 #define READ_SPINOR READ_SPINOR_HALF_TEX
312 #define READ_SPINOR_UP READ_SPINOR_HALF_UP_TEX
313 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN_TEX
314 #ifdef USE_TEXTURE_OBJECTS
315 #define SPINORTEX param.inTex
316 #else
317 #define SPINORTEX spinorTexHalf
318 #endif // USE_TEXTURE_OBJECTS
319 #endif
320 #ifdef DIRECT_ACCESS_WILSON_INTER
321 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF
322 #define INTERTEX param.out
323 #else
324 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF_TEX
325 #ifdef USE_TEXTURE_OBJECTS
326 #define INTERTEX param.outTex
327 #else
328 #define INTERTEX interTexHalf
329 #endif // USE_TEXTURE_OBJECTS
330 #endif
331 #define WRITE_SPINOR WRITE_SPINOR_SHORT4
332 #if (DD_XPAY==1)
333 #ifdef DIRECT_ACCESS_WILSON_ACCUM
334 #define ACCUMTEX param.x
335 #define READ_ACCUM READ_ACCUM_HALF
336 #else
337 #ifdef USE_TEXTURE_OBJECTS
338 #define ACCUMTEX param.xTex
339 #else
340 #define ACCUMTEX accumTexHalf
341 #endif // USE_TEXTURE_OBJECTS
342 #define READ_ACCUM READ_ACCUM_HALF_TEX
343 #endif
344 
345 #endif
346 
347 #define SPINOR_HOP 6
348 
349 #endif
350 
351 #define DD_CONCAT(n,p,r,d,x) n ## p ## r ## d ## x ## Kernel
352 #define DD_FUNC(n,p,r,d,x) DD_CONCAT(n,p,r,d,x)
353 
354 // define the kernel
355 
356 template <KernelType kernel_type>
358  (const DslashParam param) {
359 
360 #ifdef GPU_DOMAIN_WALL_DIRAC
361 #if DD_DAG
363 #else
364 #include "dw_dslash5inv_core.h"
365 #endif
366 #endif
367 
368 }
369 
370 // clean up
371 
372 #undef DD_NAME_F
373 #undef DD_PREC_F
374 #undef DD_RECON_F
375 #undef DD_DAG_F
376 #undef DD_XPAY_F
377 #undef DD_CONCAT
378 #undef DD_FUNC
379 
380 #undef DSLASH_XPAY
381 #undef ASSN_GAUGE_MATRIX
382 #undef RECONSTRUCT_GAUGE_MATRIX
383 #undef GAUGE0TEX
384 #undef GAUGE1TEX
385 #undef READ_SPINOR
386 #undef READ_SPINOR_UP
387 #undef READ_SPINOR_DOWN
388 #undef SPINORTEX
389 #undef READ_INTERMEDIATE_SPINOR
390 #undef INTERTEX
391 #undef READ_ACCUM
392 #undef ACCUMTEX
393 #undef WRITE_SPINOR
394 #undef GAUGE_FLOAT2
395 #undef SPINOR_DOUBLE
396 
397 #undef SPINOR_HOP
398 
399 #undef TPROJSCALE
400 
401 // prepare next set of options, or clean up after final iteration
402 
403 #if (DD_DAG==0)
404 #undef DD_DAG
405 #define DD_DAG 1
406 #else
407 #undef DD_DAG
408 #define DD_DAG 0
409 
410 #if (DD_XPAY==0)
411 #undef DD_XPAY
412 #define DD_XPAY 1
413 #else
414 #undef DD_XPAY
415 #define DD_XPAY 0
416 
417 #if (DD_RECON==0)
418 #undef DD_RECON
419 #define DD_RECON 1
420 #elif (DD_RECON==1)
421 #undef DD_RECON
422 #define DD_RECON 2
423 #else
424 #undef DD_RECON
425 #define DD_RECON 0
426 
427 #if (DD_PREC==0)
428 #undef DD_PREC
429 #define DD_PREC 1
430 #elif (DD_PREC==1)
431 #undef DD_PREC
432 #define DD_PREC 2
433 
434 #else
435 
436 #undef DD_LOOP
437 #undef DD_DAG
438 #undef DD_XPAY
439 #undef DD_RECON
440 #undef DD_PREC
441 
442 #endif // DD_PREC
443 #endif // DD_RECON
444 #endif // DD_XPAY
445 #endif // DD_DAG
446 
447 #ifdef DD_LOOP
448 #include "dw_dslash5inv_def.h"
449 #endif
#define DD_FUNC(n, p, r, d, x)
#define DD_NAME_F
QudaGaugeParam param
Definition: pack_test.cpp:17
#define DD_XPAY_F
#define DD_DAG_F
#define DD_PREC_F
#define DD_RECON_F