QUDA  0.9.0
tm_dslash_def.h
Go to the documentation of this file.
1 // tm_dslash_def.h - Twisted Mass Dslash kernel definitions
2 
3 // There are currently 36 different variants of the Twisted Mass
4 // Wilson Dslash kernel, each one characterized by a set of 5 options,
5 // where each option can take one of several values (3*2*2*3 = 36).
6 // This file is structured so that the C preprocessor loops through all 36
7 // variants (in a manner resembling a counter), sets the appropriate
8 // macros, and defines the corresponding functions.
9 //
10 // As an example of the function naming conventions, consider
11 //
12 // twistedMassDslash12DaggerXpayKernel(float4* out, ...).
13 //
14 // This is a twisted mass Dslash^ger kernel where the result is
15 // multiplied by "a" and summed with an input vector (Xpay), and the
16 // gauge matrix is reconstructed from 12 real numbers. More
17 // generally, each function name is given by the concatenation of the
18 // following 4 fields, with "Kernel" at the end:
19 //
20 // DD_NAME_F = twistedMassDslash
21 // DD_RECON_F = 8, 12, 18
22 // DD_DAG_F = Dagger, [blank]
23 // DD_XPAY_F = Xpay, [blank]
24 //
25 // In addition, the kernels are templated on the precision of the
26 // fields (double, single, or half).
27 
28 // initialize on first iteration
29 
30 #ifndef DD_LOOP
31 #define DD_LOOP
32 #define DD_DAG 0
33 #define DD_XPAY 0
34 #define DD_TWIST 0
36 #define DD_RECON 0
37 #define DD_PREC 0
38 #endif
39 
40 // set options for current iteration
41 
42 //#define DD_NAME_F twistedMassDslash
43 
44 #if (DD_DAG==0) // no dagger
45 #define DD_DAG_F
46 #else // dagger
47 #define DD_DAG_F Dagger
48 #endif
49 
51 #if (DD_XPAY==0) // no xpay
52 #define DD_XPAY_F
53 #elif (DD_XPAY==1)
54 #define DSLASH_XPAY
55 #define DD_XPAY_F Xpay
56 #endif
57 
58 #if (DD_PREC == 0)
59 #define DD_PREC_F D
60 #elif (DD_PREC == 1)
61 #define DD_PREC_F S
62 #else
63 #define DD_PREC_F H
64 #endif
65 
66 #if (DD_TWIST==0) // twisted input
67 #define DD_NAME_F twistedMassTwistInvDslash
68 #define TWIST_INV_DSLASH
69 #else
70 #define DD_NAME_F twistedMassDslash
71 #endif
72 
74 #if (DD_RECON==0) // reconstruct from 8 reals
75 #define DD_RECON_F 8
76 
77 #if (DD_PREC==0)
78 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE
79 #ifdef DIRECT_ACCESS_LINK
80 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2
81 #else
82 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2_TEX
83 #endif // DIRECT_ACCESS_LINK
84 
85 #elif (DD_PREC==1)
86 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
87 #ifdef DIRECT_ACCESS_LINK
88 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4
89 #else
90 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4_TEX
91 #endif // DIRECT_ACCESS_LINK
92 
93 #else
94 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
95 #ifdef DIRECT_ACCESS_LINK
96 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4
97 #else
98 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4_TEX
99 #endif // DIRECT_ACCESS_LINK
100 #endif // DD_PREC
101 #elif (DD_RECON==1) // reconstruct from 12 reals
102 #define DD_RECON_F 12
103 
104 #if (DD_PREC==0)
105 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE
106 #ifdef DIRECT_ACCESS_LINK
107 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2
108 #else
109 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2_TEX
110 #endif // DIRECT_ACCESS_LINK
111 
112 #elif (DD_PREC==1)
113 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
114 #ifdef DIRECT_ACCESS_LINK
115 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4
116 #else
117 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4_TEX
118 #endif // DIRECT_ACCESS_LINK
119 
120 #else
121 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
122 #ifdef DIRECT_ACCESS_LINK
123 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4
124 #else
125 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4_TEX
126 #endif // DIRECT_ACCESS_LINK
127 #endif // DD_PREC
128 #else // no reconstruct, load all components
129 #define DD_RECON_F 18
130 #define GAUGE_FLOAT2
131 #if (DD_PREC==0)
132 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE
133 #ifdef DIRECT_ACCESS_LINK
134 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2
135 #else
136 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2_TEX
137 #endif // DIRECT_ACCESS_LINK
138 
139 #elif (DD_PREC==1)
140 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
141 #ifdef DIRECT_ACCESS_LINK
142 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2
143 #else
144 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2_TEX
145 #endif // DIRECT_ACCESS_LINK
146 
147 #else
148 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
149 #ifdef DIRECT_ACCESS_LINK
150 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2
151 #else
152 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2_TEX
153 #endif //DIRECT_ACCESS_LINK
154 #endif
155 #endif
156 
157 #if (DD_PREC==0) // double-precision fields
158 
159 #define TPROJSCALE param.tProjScale
160 
161 // double-precision gauge field
162 #if (defined DIRECT_ACCESS_LINK) || (defined FERMI_NO_DBLE_TEX)
163 #define GAUGE0TEX param.gauge0
164 #define GAUGE1TEX param.gauge1
165 #else
166 #ifdef USE_TEXTURE_OBJECTS
167 #define GAUGE0TEX param.gauge0Tex
168 #define GAUGE1TEX param.gauge1Tex
169 #else
170 #define GAUGE0TEX gauge0TexDouble2
171 #define GAUGE1TEX gauge1TexDouble2
172 #endif // USE_TEXTURE_OBJECTS
173 #endif
174 
175 #define GAUGE_FLOAT2
176 
177 // double-precision spinor fields
178 #if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX)
179 #define READ_SPINOR READ_SPINOR_DOUBLE
180 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_DOUBLE
181 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP
182 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN
183 #define SPINORTEX param.in
184 #else
185 #define READ_SPINOR READ_SPINOR_DOUBLE_TEX
186 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_DOUBLE_TEX
187 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP_TEX
188 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN_TEX
189 #ifdef USE_TEXTURE_OBJECTS
190 #define SPINORTEX param.inTex
191 #define GHOSTSPINORTEX param.ghostTex
192 #else
193 #define SPINORTEX spinorTexDouble
194 #define GHOSTSPINORTEX ghostSpinorTexDouble
195 #endif // USE_TEXTURE_OBJECTS
196 #endif
197 #if (defined DIRECT_ACCESS_WILSON_INTER) || (defined FERMI_NO_DBLE_TEX)
198 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE
199 #define INTERTEX param.out
200 #else
201 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE_TEX
202 #ifdef USE_TEXTURE_OBJECTS
203 #define INTERTEX param.outTex
204 #else
205 #define INTERTEX interTexDouble
206 #endif
207 #endif
208 #define WRITE_SPINOR WRITE_SPINOR_DOUBLE2
209 #define SPINOR_DOUBLE
210 #if (DD_XPAY!=0)
211 #if (defined DIRECT_ACCESS_WILSON_ACCUM) || (defined FERMI_NO_DBLE_TEX)
212 #define ACCUMTEX param.x
213 #define READ_ACCUM READ_ACCUM_DOUBLE
214 #else
215 #ifdef USE_TEXTURE_OBJECTS
216 #define ACCUMTEX param.xTex
217 #else
218 #define ACCUMTEX accumTexDouble
219 #endif // USE_TEXTURE_OBJECTS
220 #define READ_ACCUM READ_ACCUM_DOUBLE_TEX
221 #endif
222 
223 #endif
224 
225 #define SPINOR_HOP 12
226 
227 #elif (DD_PREC==1) // single-precision fields
228 
229 #define TPROJSCALE param.tProjScale_f
230 
231 // single-precision gauge field
232 #ifdef DIRECT_ACCESS_LINK
233 #define GAUGE0TEX param.gauge0
234 #define GAUGE1TEX param.gauge1
235 #else
236 #ifdef USE_TEXTURE_OBJECTS
237 #define GAUGE0TEX param.gauge0Tex
238 #define GAUGE1TEX param.gauge1Tex
239 #else
240 #if (DD_RECON_F == 18)
241 #define GAUGE0TEX gauge0TexSingle2
242 #define GAUGE1TEX gauge1TexSingle2
243 #else
244 #define GAUGE0TEX gauge0TexSingle4
245 #define GAUGE1TEX gauge1TexSingle4
246 #endif
247 #endif // USE_TEXTURE_OBJECTS
248 #endif
249 
250 
251 // single-precision spinor fields
252 #ifdef DIRECT_ACCESS_WILSON_SPINOR
253 #define READ_SPINOR READ_SPINOR_SINGLE
254 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_SINGLE
255 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP
256 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN
257 #define SPINORTEX param.in
258 #else
259 #define READ_SPINOR READ_SPINOR_SINGLE_TEX
260 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_SINGLE_TEX
261 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP_TEX
262 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN_TEX
263 #ifdef USE_TEXTURE_OBJECTS
264 #define SPINORTEX param.inTex
265 #define GHOSTSPINORTEX param.ghostTex
266 #else
267 #define SPINORTEX spinorTexSingle
268 #define GHOSTSPINORTEX ghostSpinorTexSingle
269 #endif // USE_TEXTURE_OBJECTS
270 #endif
271 #ifdef DIRECT_ACCESS_WILSON_INTER
272 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE
273 #define INTERTEX param.out
274 #else
275 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE_TEX
276 #ifdef USE_TEXTURE_OBJECTS
277 #define INTERTEX param.outTex
278 #else
279 #define INTERTEX interTexSingle
280 #endif // USE_TEXTURE_OBJECTS
281 #endif
282 #define WRITE_SPINOR WRITE_SPINOR_FLOAT4
283 #if (DD_XPAY!=0)
284 #ifdef DIRECT_ACCESS_WILSON_ACCUM
285 #define ACCUMTEX param.x
286 #define READ_ACCUM READ_ACCUM_SINGLE
287 #else
288 #ifdef USE_TEXTURE_OBJECTS
289 #define ACCUMTEX param.xTex
290 #else
291 #define ACCUMTEX accumTexSingle
292 #endif // USE_TEXTURE_OBJECTS
293 #define READ_ACCUM READ_ACCUM_SINGLE_TEX
294 #endif
295 #endif
296 
297 #define SPINOR_HOP 6
298 
299 #else // half-precision fields
300 
301 #define TPROJSCALE param.tProjScale_f
302 
303 // half-precision gauge field
304 #ifdef DIRECT_ACCESS_LINK
305 #define GAUGE0TEX param.gauge0
306 #define GAUGE1TEX param.gauge1
307 #else
308 #ifdef USE_TEXTURE_OBJECTS
309 #define GAUGE0TEX param.gauge0Tex
310 #define GAUGE1TEX param.gauge1Tex
311 #else
312 #if (DD_RECON_F == 18)
313 #define GAUGE0TEX gauge0TexHalf2
314 #define GAUGE1TEX gauge1TexHalf2
315 #else
316 #define GAUGE0TEX gauge0TexHalf4
317 #define GAUGE1TEX gauge1TexHalf4
318 #endif
319 #endif // USE_TEXTURE_OBJECTS
320 #endif
321 
322 
323 // half-precision spinor fields
324 #ifdef DIRECT_ACCESS_WILSON_SPINOR
325 #define READ_SPINOR READ_SPINOR_HALF
326 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_HALF
327 #define READ_SPINOR_UP READ_SPINOR_HALF_UP
328 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN
329 #define SPINORTEX param.in
330 #else
331 #define READ_SPINOR READ_SPINOR_HALF_TEX
332 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_HALF_TEX
333 #define READ_SPINOR_UP READ_SPINOR_HALF_UP_TEX
334 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN_TEX
335 #ifdef USE_TEXTURE_OBJECTS
336 #define SPINORTEX param.inTex
337 #define GHOSTSPINORTEX param.ghostTex
338 #else
339 #define SPINORTEX spinorTexHalf
340 #define GHOSTSPINORTEX ghostSpinorTexHalf
341 #endif // USE_TEXTURE_OBJECTS
342 #endif
343 #ifdef DIRECT_ACCESS_WILSON_INTER
344 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF
345 #define INTERTEX param.out
346 #else
347 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF_TEX
348 #ifdef USE_TEXTURE_OBJECTS
349 #define INTERTEX param.outTex
350 #else
351 #define INTERTEX interTexHalf
352 #endif // USE_TEXTURE_OBJECTS
353 #endif
354 #define WRITE_SPINOR WRITE_SPINOR_SHORT4
355 #if (DD_XPAY!=0)
357 #ifdef DIRECT_ACCESS_WILSON_ACCUM
358 #define ACCUMTEX param.x
359 #define READ_ACCUM READ_ACCUM_HALF
360 #else
361 #ifdef USE_TEXTURE_OBJECTS
362 #define ACCUMTEX param.xTex
363 #else
364 #define ACCUMTEX accumTexHalf
365 #endif // USE_TEXTURE_OBJECTS
366 #define READ_ACCUM READ_ACCUM_HALF_TEX
367 #endif
368 
369 #endif
370 
371 #define SPINOR_HOP 6
372 
373 #endif
374 
375 #define DD_CONCAT(n,p,r,d,x) n ## p ## r ## d ## x ## Kernel
376 #define DD_FUNC(n,p,r,d,x) DD_CONCAT(n,p,r,d,x)
377 
378 // define the kernel
380 template <KernelType kernel_type>
382 
383 #ifdef GPU_TWISTED_MASS_DIRAC
384 
385 #ifdef SHARED_WILSON_DSLASH // Fermi optimal code
386 
387 #if DD_DAG
389 #else
390 #include "tm_dslash_gt200_core.h"
391 #endif
392 
393 #else // no shared memory blocking
394 
395 #if DD_DAG
397 #else
398 #include "tm_dslash_gt200_core.h"
399 #endif
400 
401 #endif
402 
403 }
404 
405 #ifdef MULTI_GPU
406 template <>
408  (const DslashParam param) {
409 
410 #ifdef GPU_TWISTED_MASS_DIRAC
411 
412 #ifdef SHARED_WILSON_DSLASH // Fermi optimal code
413 
414 #if DD_DAG
416 #else
418 #endif
419 
420 #else // no shared memory blocking
421 
422 #if DD_DAG
424 #else
426 #endif
427 
428 #endif
429 
430 #endif
431 
432 }
433 #endif // MULTI_GPU
434 
435 
436 //NEW
437 #if (DD_XPAY==1) && (DD_TWIST==1)
438 #define TWIST_XPAY
439 
440 //redefine kernel name:
441 #undef DD_NAME_F
442 #define DD_NAME_F twistedMassDslashTwist
443 
444 template <KernelType kernel_type>
446 
447 #ifdef GPU_TWISTED_MASS_DIRAC
448 
449 #ifdef SHARED_WILSON_DSLASH // Fermi optimal code
450 
451 #if DD_DAG
453 #else
454 #include "tm_dslash_gt200_core.h"
455 #endif
456 
457 #else // no shared memory blocking
458 
459 #if DD_DAG
461 #else
462 #include "tm_dslash_gt200_core.h"
463 #endif
464 
465 #endif
466 
467 #endif
468 
469 }
470 
471 template <>
473  (const DslashParam param) {
474 
475 #ifdef GPU_TWISTED_MASS_DIRAC
476 
477 #ifdef SHARED_WILSON_DSLASH // Fermi optimal code
478 
479 #if DD_DAG
481 #else
483 #endif
484 
485 #else // no shared memory blocking
486 
487 #if DD_DAG
489 #else
491 #endif
492 
493 #endif
494 
495 #endif
496 
497 }
498 
499 #undef TWIST_XPAY
500 #endif //(DD_XPAY==0) && (DD_TWIST==1)
501 
502 
503 //BEGIN DUMMY KERNEL (remove it later)
504 #if (DD_XPAY==0) && (DD_TWIST==1)
505 #define TWIST_XPAY
506 
507 //redefine kernel name:
508 #undef DD_NAME_F
509 #define DD_NAME_F twistedMassDslashTwist
510 
511 template <KernelType kernel_type>
513 
514 #ifdef GPU_TWISTED_MASS_DIRAC
515 
516 #endif
517 
518 }
519 #undef TWIST_XPAY
520 #endif //(DD_XPAY==0) && (DD_TWIST==1)
521 //END DUMMY KERNEL
522 
523 #endif
524 
525 // clean up
526 
527 #undef DD_PREC_F
528 #undef DD_NAME_F
529 #undef DD_RECON_F
530 #undef DD_DAG_F
531 #undef DD_XPAY_F
532 #undef DD_CONCAT
533 #undef DD_FUNC
534 
535 #undef DSLASH_XPAY
536 
538 #undef TWIST_INV_DSLASH
539 #undef READ_GAUGE_MATRIX
541 #undef RECONSTRUCT_GAUGE_MATRIX
542 #undef GAUGE0TEX
543 #undef GAUGE1TEX
544 #undef READ_SPINOR
545 #undef READ_SPINOR_GHOST
546 #undef READ_SPINOR_UP
547 #undef READ_SPINOR_DOWN
548 #undef SPINORTEX
549 #undef GHOSTSPINORTEX
550 #undef READ_INTERMEDIATE_SPINOR
551 #undef INTERTEX
552 #undef READ_ACCUM
553 #undef ACCUMTEX
554 #undef WRITE_SPINOR
555 #undef GAUGE_FLOAT2
556 #undef SPINOR_DOUBLE
557 
558 #undef SPINOR_HOP
559 
560 #undef TPROJSCALE
561 
562 // prepare next set of options, or clean up after final iteration
563 
564 #if (DD_DAG==0)
565 #undef DD_DAG
566 #define DD_DAG 1
567 #else
568 #undef DD_DAG
569 #define DD_DAG 0
570 
571 #if (DD_TWIST==0)
572 #undef DD_TWIST
573 #define DD_TWIST 1
574 #else
575 #undef DD_TWIST
576 #define DD_TWIST 0
577 
578 #if (DD_XPAY==0)
579 #undef DD_XPAY
580 #define DD_XPAY 1
581 #else
582 #undef DD_XPAY
583 #define DD_XPAY 0
584 
585 #if (DD_RECON==0)
586 #undef DD_RECON
587 #define DD_RECON 1
588 #elif (DD_RECON==1)
589 #undef DD_RECON
590 #define DD_RECON 2
591 #else
592 #undef DD_RECON
593 #define DD_RECON 0
594 
595 #if (DD_PREC==0)
596 #undef DD_PREC
597 #define DD_PREC 1
598 #elif (DD_PREC==1)
599 #undef DD_PREC
600 #define DD_PREC 2
601 
602 #else
603 
604 #undef DD_LOOP
605 #undef DD_DAG
606 #undef DD_TWIST
607 #undef DD_XPAY
608 #undef DD_RECON
609 #undef DD_PREC
610 
611 #endif // DD_PREC
612 #endif // DD_RECON
613 #endif // DD_XPAY
614 #endif // DD_TWIST
615 #endif // DD_DAG
616 
617 #ifdef DD_LOOP
618 #include "tm_dslash_def.h"
619 #endif
#define DD_XPAY_F
Definition: tm_dslash_def.h:52
#define DD_FUNC(n, p, r, d, x)
#define DD_RECON_F
Definition: tm_dslash_def.h:75
QudaGaugeParam param
Definition: pack_test.cpp:17
#define DD_PREC_F
Definition: tm_dslash_def.h:59
#define DD_NAME_F
Definition: tm_dslash_def.h:67
#define DD_DAG_F
Definition: tm_dslash_def.h:45