QUDA  0.9.0
tm_ndeg_dslash_def.h
Go to the documentation of this file.
1 // tm_dslash_def.h - Twisted Mass Dslash kernel definitions
2 
3 // There are currently 36 different variants of the Twisted Mass
4 // Wilson Dslash kernel, each one characterized by a set of 5 options,
5 // where each option can take one of several values (3*2*2*3 = 36).
6 // This file is structured so that the C preprocessor loops through all 36
7 // variants (in a manner resembling a counter), sets the appropriate
8 // macros, and defines the corresponding functions.
9 //
10 // As an example of the function naming conventions, consider
11 //
12 // twistedMassDslash12DaggerXpayKernel(float4* out, ...).
13 //
14 // This is a twisted mass Dslash^dagger kernel where the result is
15 // multiplied by "a" and summed with an input vector (Xpay), and the
16 // gauge matrix is reconstructed from 12 real numbers. More
17 // generally, each function name is given by the concatenation of the
18 // following 4 fields, with "Kernel" at the end:
19 //
20 // DD_NAME_F = twistedMassDslash
21 // DD_RECON_F = 8, 12, 18
22 // DD_DAG_F = Dagger, [blank]
23 // DD_XPAY_F = Xpay, [blank]
24 //
25 // In addition, the kernels are templated on the precision of the
26 // fields (double, single, or half).
27 
28 // initialize on first iteration
29 
30 #ifndef DD_LOOP
31 #define DD_LOOP
32 #define DD_DAG 0
33 #define DD_XPAY 0
34 #define DD_TWIST 0
35 #define DD_RECON 0
36 #define DD_PREC 0
37 #endif
38 
39 // set options for current iteration
40 
41 #define DD_NAME_F twistedNdegMassDslash
42 
43 #if (DD_DAG==0) // no dagger
44 #define DD_DAG_F
45 #else // dagger
46 #define DD_DAG_F Dagger
47 #endif
48 
49 #if (DD_XPAY==0) // no xpay
50 #define DD_XPAY_F
51 #else
52 #define DSLASH_XPAY
53 #define DD_XPAY_F Xpay
54 #endif
55 
57 #if (DD_TWIST==0) // no twist
58 #define DD_TWIST_F
59 #else
60 #define DSLASH_TWIST
61 #define DD_TWIST_F Twist
62 #endif
63 
65 #if (DD_PREC == 0)
66 #define DD_PREC_F D
67 #elif (DD_PREC == 1)
68 #define DD_PREC_F S
69 #else
70 #define DD_PREC_F H
71 #endif
72 
73 #if (DD_RECON==0) // reconstruct from 8 reals
74 #define DD_RECON_F 8
75 
76 #if (DD_PREC==0)
77 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE
78 #ifdef DIRECT_ACCESS_LINK
79 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2
80 #else
81 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2_TEX
82 #endif // DIRECT_ACCESS_LINK
83 
84 #elif (DD_PREC==1)
85 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
86 #ifdef DIRECT_ACCESS_LINK
87 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4
88 #else
89 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4_TEX
90 #endif // DIRECT_ACCESS_LINK
91 
92 #else
93 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
94 #ifdef DIRECT_ACCESS_LINK
95 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4
96 #else
97 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4_TEX
98 #endif // DIRECT_ACCESS_LINK
99 #endif // DD_PREC
100 #elif (DD_RECON==1) // reconstruct from 12 reals
101 #define DD_RECON_F 12
102 
103 #if (DD_PREC==0)
104 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE
105 #ifdef DIRECT_ACCESS_LINK
106 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2
107 #else
108 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2_TEX
109 #endif // DIRECT_ACCESS_LINK
110 
111 #elif (DD_PREC==1)
112 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
113 #ifdef DIRECT_ACCESS_LINK
114 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4
115 #else
116 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4_TEX
117 #endif // DIRECT_ACCESS_LINK
118 
119 #else
120 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
121 #ifdef DIRECT_ACCESS_LINK
122 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4
123 #else
124 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4_TEX
125 #endif // DIRECT_ACCESS_LINK
126 #endif // DD_PREC
127 #else // no reconstruct, load all components
128 #define DD_RECON_F 18
129 #define GAUGE_FLOAT2
130 #if (DD_PREC==0)
131 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE
132 #ifdef DIRECT_ACCESS_LINK
133 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2
134 #else
135 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2_TEX
136 #endif // DIRECT_ACCESS_LINK
137 
138 #elif (DD_PREC==1)
139 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
140 #ifdef DIRECT_ACCESS_LINK
141 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2
142 #else
143 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2_TEX
144 #endif // DIRECT_ACCESS_LINK
145 
146 #else
147 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
148 #ifdef DIRECT_ACCESS_LINK
149 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2
150 #else
151 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2_TEX
152 #endif //DIRECT_ACCESS_LINK
153 #endif
154 #endif
155 
156 #if (DD_PREC==0) // double-precision fields
157 
158 #define TPROJSCALE param.tProjScale
159 
160 // double-precision gauge field
161 #if (defined DIRECT_ACCESS_LINK) || (defined FERMI_NO_DBLE_TEX)
162 #define GAUGE0TEX param.gauge0
163 #define GAUGE1TEX param.gauge1
164 #else
165 #ifdef USE_TEXTURE_OBJECTS
166 #define GAUGE0TEX param.gauge0Tex
167 #define GAUGE1TEX param.gauge1Tex
168 #else
169 #define GAUGE0TEX gauge0TexDouble2
170 #define GAUGE1TEX gauge1TexDouble2
171 #endif // USE_TEXTURE_OBJECTS
172 #endif
173 
174 #define GAUGE_FLOAT2
175 
176 // double-precision spinor fields
177 #if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX)
178 #define READ_SPINOR READ_SPINOR_DOUBLE
179 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_DOUBLE
180 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP
181 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN
182 #define SPINORTEX param.in
183 #else
184 #define READ_SPINOR READ_SPINOR_DOUBLE_TEX
185 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_DOUBLE_TEX
186 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP_TEX
187 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN_TEX
188 #ifdef USE_TEXTURE_OBJECTS
189 #define SPINORTEX param.inTex
190 #define GHOSTSPINORTEX param.ghostTex
191 #else
192 #define SPINORTEX spinorTexDouble
193 #define GHOSTSPINORTEX ghostSpinorTexDouble
194 #endif // USE_TEXTURE_OBJECTS
195 #endif
196 #if (defined DIRECT_ACCESS_WILSON_INTER) || (defined FERMI_NO_DBLE_TEX)
197 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE
198 #define INTERTEX param.out
199 #else
200 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE_TEX
201 #ifdef USE_TEXTURE_OBJECTS
202 #define INTERTEX param.outTex
203 #else
204 #define INTERTEX interTexDouble
205 #endif
206 #endif
207 #define WRITE_FLAVOR_SPINOR WRITE_FLAVOR_SPINOR_DOUBLE2
208 #define SPINOR_DOUBLE
209 #if (DD_XPAY==1)
210 #if (defined DIRECT_ACCESS_WILSON_ACCUM) || (defined FERMI_NO_DBLE_TEX)
211 #define ACCUMTEX param.x
212 #define READ_ACCUM READ_ACCUM_DOUBLE
213 #define ASSN_ACCUM ASSN_ACCUM_DOUBLE
215 #define READ_ACCUM_FLAVOR READ_ACCUM_FLAVOR_DOUBLE
216 #else
217 #ifdef USE_TEXTURE_OBJECTS
218 #define ACCUMTEX param.xTex
219 #else
220 #define ACCUMTEX accumTexDouble
221 #endif // USE_TEXTURE_OBJECTS
222 #define READ_ACCUM READ_ACCUM_DOUBLE_TEX
223 #define ASSN_ACCUM ASSN_ACCUM_DOUBLE_TEX
225 #define READ_ACCUM_FLAVOR READ_ACCUM_FLAVOR_DOUBLE_TEX
226 #endif
227 
228 #endif
229 
230 #define SPINOR_HOP 12
231 
232 #elif (DD_PREC==1) // single-precision fields
233 
234 #define TPROJSCALE param.tProjScale_f
235 
236 // single-precision gauge field
237 #ifdef DIRECT_ACCESS_LINK
238 #define GAUGE0TEX param.gauge0
239 #define GAUGE1TEX param.gauge1
240 #else
241 #ifdef USE_TEXTURE_OBJECTS
242 #define GAUGE0TEX param.gauge0Tex
243 #define GAUGE1TEX param.gauge1Tex
244 #else
245 #if (DD_RECON_F == 18)
246 #define GAUGE0TEX gauge0TexSingle2
247 #define GAUGE1TEX gauge1TexSingle2
248 #else
249 #define GAUGE0TEX gauge0TexSingle4
250 #define GAUGE1TEX gauge1TexSingle4
251 #endif
252 #endif // USE_TEXTURE_OBJECTS
253 #endif
254 
255 
256 // single-precision spinor fields
257 #ifdef DIRECT_ACCESS_WILSON_SPINOR
258 #define READ_SPINOR READ_SPINOR_SINGLE
259 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_SINGLE
260 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP
261 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN
262 #define SPINORTEX param.in
263 #else
264 #define READ_SPINOR READ_SPINOR_SINGLE_TEX
265 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_SINGLE_TEX
266 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP_TEX
267 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN_TEX
268 #ifdef USE_TEXTURE_OBJECTS
269 #define SPINORTEX param.inTex
270 #define GHOSTSPINORTEX param.ghostTex
271 #else
272 #define SPINORTEX spinorTexSingle
273 #define GHOSTSPINORTEX ghostSpinorTexSingle
274 #endif // USE_TEXTURE_OBJECTS
275 #endif
276 #ifdef DIRECT_ACCESS_WILSON_INTER
277 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE
278 #define INTERTEX param.out
279 #else
280 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE_TEX
281 #ifdef USE_TEXTURE_OBJECTS
282 #define INTERTEX param.outTex
283 #else
284 #define INTERTEX interTexSingle
285 #endif // USE_TEXTURE_OBJECTS
286 #endif
287 #define WRITE_FLAVOR_SPINOR WRITE_FLAVOR_SPINOR_FLOAT4
288 #if (DD_XPAY==1)
289 #ifdef DIRECT_ACCESS_WILSON_ACCUM
290 #define ACCUMTEX param.x
291 #define READ_ACCUM READ_ACCUM_SINGLE
292 #define ASSN_ACCUM ASSN_ACCUM_SINGLE
294 #define READ_ACCUM_FLAVOR READ_ACCUM_FLAVOR_SINGLE
295 #else
296 #ifdef USE_TEXTURE_OBJECTS
297 #define ACCUMTEX param.xTex
298 #else
299 #define ACCUMTEX accumTexSingle
300 #endif // USE_TEXTURE_OBJECTS
301 #define READ_ACCUM READ_ACCUM_SINGLE_TEX
302 #define ASSN_ACCUM ASSN_ACCUM_SINGLE_TEX
304 #define READ_ACCUM_FLAVOR READ_ACCUM_FLAVOR_SINGLE_TEX
305 #endif
306 #endif
307 
308 #define SPINOR_HOP 6
309 
310 #else // half-precision fields
311 
312 #define TPROJSCALE param.tProjScale_f
313 
314 // half-precision gauge field
315 #ifdef DIRECT_ACCESS_LINK
316 #define GAUGE0TEX param.gauge0
317 #define GAUGE1TEX param.gauge1
318 #else
319 #ifdef USE_TEXTURE_OBJECTS
320 #define GAUGE0TEX param.gauge0Tex
321 #define GAUGE1TEX param.gauge1Tex
322 #else
323 #if (DD_RECON_F == 18)
324 #define GAUGE0TEX gauge0TexHalf2
325 #define GAUGE1TEX gauge1TexHalf2
326 #else
327 #define GAUGE0TEX gauge0TexHalf4
328 #define GAUGE1TEX gauge1TexHalf4
329 #endif
330 #endif // USE_TEXTURE_OBJECTS
331 #endif
332 
333 
334 // half-precision spinor fields
335 #ifdef DIRECT_ACCESS_WILSON_SPINOR
336 #define READ_SPINOR READ_SPINOR_HALF
337 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_HALF
338 #define READ_SPINOR_UP READ_SPINOR_HALF_UP
339 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN
340 #define SPINORTEX param.in
341 #else
342 #define READ_SPINOR READ_SPINOR_HALF_TEX
343 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_HALF_TEX
344 #define READ_SPINOR_UP READ_SPINOR_HALF_UP_TEX
345 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN_TEX
346 #ifdef USE_TEXTURE_OBJECTS
347 #define SPINORTEX param.inTex
348 #define GHOSTSPINORTEX param.ghostTex
349 #else
350 #define SPINORTEX spinorTexHalf
351 #define GHOSTSPINORTEX ghostSpinorTexHalf
352 #endif // USE_TEXTURE_OBJECTS
353 #endif
354 #ifdef DIRECT_ACCESS_WILSON_INTER
355 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF
356 #define INTERTEX param.out
357 #else
358 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF_TEX
359 #ifdef USE_TEXTURE_OBJECTS
360 #define INTERTEX param.outTex
361 #else
362 #define INTERTEX interTexHalf
363 #endif // USE_TEXTURE_OBJECTS
364 #endif
365 #define WRITE_FLAVOR_SPINOR WRITE_FLAVOR_SPINOR_SHORT4
366 #if (DD_XPAY==1)
367 #ifdef DIRECT_ACCESS_WILSON_ACCUM
368 #define ACCUMTEX param.x
369 #define READ_ACCUM READ_ACCUM_HALF
370 #define ASSN_ACCUM ASSN_ACCUM_HALF
372 #define READ_ACCUM_FLAVOR READ_ACCUM_FLAVOR_HALF
373 #else
374 #ifdef USE_TEXTURE_OBJECTS
375 #define ACCUMTEX param.xTex
376 #else
377 #define ACCUMTEX accumTexHalf
378 #endif // USE_TEXTURE_OBJECTS
379 #define READ_ACCUM READ_ACCUM_HALF_TEX
380 #define ASSN_ACCUM ASSN_ACCUM_HALF_TEX
382 #define READ_ACCUM_FLAVOR READ_ACCUM_FLAVOR_HALF_TEX
383 #endif
384 
385 #endif
386 
387 #define SPINOR_HOP 6
388 
389 #endif
390 
391 #define DD_CONCAT(n,p,r,d,t,x) n ## p ## r ## d ## t ## x ## Kernel
392 #define DD_FUNC(n,p,r,d,t,x) DD_CONCAT(n,p,r,d,t,x)
393 
394 // define the kernel
395 
396 template <KernelType kernel_type>
398  (const DslashParam param) {
399 
400 #ifdef GPU_NDEG_TWISTED_MASS_DIRAC
401 #if DD_DAG
403 #else
404 #include "tm_ndeg_dslash_core.h"
405 #endif
406 #endif
407 
408 }
409 
410 #ifdef MULTI_GPU
411 template <>
413  (const DslashParam param) {
414 
415 #ifdef GPU_NDEG_TWISTED_MASS_DIRAC
416 #if DD_DAG
418 #else
420 #endif
421 
422 }
423 #endif // MULTI_GPU
424 
425 #endif
426 
427 // clean up
428 
429 #undef DD_PREC_F
430 #undef DD_NAME_F
431 #undef DD_RECON_F
432 #undef DD_DAG_F
433 #undef DD_XPAY_F
434 #undef DD_TWIST_F
435 #undef DD_CONCAT
436 #undef DD_FUNC
437 
438 #undef DSLASH_XPAY
439 #undef DSLASH_TWIST
440 #undef READ_GAUGE_MATRIX
441 #undef RECONSTRUCT_GAUGE_MATRIX
442 #undef GAUGE0TEX
443 #undef GAUGE1TEX
444 #undef READ_SPINOR
445 #undef READ_SPINOR_GHOST
446 #undef READ_SPINOR_UP
447 #undef READ_SPINOR_DOWN
448 #undef SPINORTEX
449 #undef GHOSTSPINORTEX
450 #undef READ_INTERMEDIATE_SPINOR
451 #undef INTERTEX
452 #undef READ_ACCUM
453 #undef ASSN_ACCUM
455 #undef READ_ACCUM_FLAVOR
456 #undef ACCUMTEX
457 #undef WRITE_FLAVOR_SPINOR
458 #undef GAUGE_FLOAT2
459 #undef SPINOR_DOUBLE
460 
461 #undef SPINOR_HOP
462 
463 #undef TPROJSCALE
464 
465 // prepare next set of options, or clean up after final iteration
466 
467 #if (DD_DAG==0)
468 #undef DD_DAG
469 #define DD_DAG 1
470 #else
471 #undef DD_DAG
472 #define DD_DAG 0
473 
474 #if (DD_XPAY==0)
475 #undef DD_XPAY
476 #define DD_XPAY 1
477 #else
478 #undef DD_XPAY
479 #define DD_XPAY 0
480 
482 #if (DD_TWIST==0)
483 #undef DD_TWIST
484 #define DD_TWIST 1
485 #else
486 #undef DD_TWIST
487 #define DD_TWIST 0
488 
490 #if (DD_RECON==0)
491 #undef DD_RECON
492 #define DD_RECON 1
493 #elif (DD_RECON==1)
494 #undef DD_RECON
495 #define DD_RECON 2
496 #else
497 #undef DD_RECON
498 #define DD_RECON 0
499 
500 #if (DD_PREC==0)
501 #undef DD_PREC
502 #define DD_PREC 1
503 #elif (DD_PREC==1)
504 #undef DD_PREC
505 #define DD_PREC 2
506 
507 #else
508 
509 #undef DD_LOOP
510 #undef DD_DAG
511 #undef DD_XPAY
512 #undef DD_TWIST
513 #undef DD_RECON
514 #undef DD_PREC
515 
516 #endif // DD_PREC
517 #endif // DD_RECON
518 #endif // DD_TWIST
519 #endif // DD_XPAY
520 #endif // DD_DAG
521 
522 #ifdef DD_LOOP
523 #include "tm_ndeg_dslash_def.h"
524 #endif
#define DD_PREC_F
#define DD_XPAY_F
#define DD_FUNC(n, p, r, d, t, x)
QudaGaugeParam param
Definition: pack_test.cpp:17
#define DD_TWIST_F
#define DD_RECON_F
#define DD_DAG_F
#define DD_NAME_F