QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
tmc_dslash_def.h
Go to the documentation of this file.
1 // tmc_dslash_def.h - Twisted Mass Dslash kernel definitions
2 
3 // There are currently 36 different variants of the Twisted Mass
4 // Wilson Dslash kernel, each one characterized by a set of 5 options,
5 // where each option can take one of several values (3*2*2*3 = 36).
6 // This file is structured so that the C preprocessor loops through all 36
7 // variants (in a manner resembling a counter), sets the appropriate
8 // macros, and defines the corresponding functions.
9 //
10 // As an example of the function naming conventions, consider
11 //
12 // twistedMassDslash12DaggerXpayKernel(float4* out, ...).
13 //
14 // This is a twisted mass Dslash^ger kernel where the result is
15 // multiplied by "a" and summed with an input vector (Xpay), and the
16 // gauge matrix is reconstructed from 12 real numbers. More
17 // generally, each function name is given by the concatenation of the
18 // following 4 fields, with "Kernel" at the end:
19 //
20 // DD_NAME_F = twistedCloverDslash
21 // DD_RECON_F = 8, 12, 18
22 // DD_DAG_F = Dagger, [blank]
23 // DD_XPAY_F = Xpay, [blank]
24 //
25 // In addition, the kernels are templated on the precision of the
26 // fields (double, single, or half).
27 
28 // initialize on first iteration
29 
30 #ifndef DD_LOOP
31 #define DD_LOOP
32 #define DD_DAG 0
33 #define DD_XPAY 0
34 #define DD_TWIST 0
36 #define DD_RECON 0
37 #define DD_PREC 0
38 #endif
39 
40 // set options for current iteration
41 
42 //#define DD_NAME_F twistedCloverDslash
43 
44 #if (DD_DAG==0) // no dagger
45 #define DD_DAG_F
46 #else // dagger
47 #define DD_DAG_F Dagger
48 #endif
49 
51 #if (DD_XPAY==0) // no xpay
52 #define DD_XPAY_F
53 #elif (DD_XPAY==1)
54 #define DSLASH_XPAY
55 #define DD_XPAY_F Xpay
56 #endif
57 
58 #if (DD_TWIST==0) // twisted input
59 #define DD_NAME_F twistedCloverInvDslash
60 #define CLOVER_TWIST_INV_DSLASH
61 #else
62 #define DD_NAME_F twistedCloverDslash
63 #endif
64 
66 #if (DD_PREC == 0)
67  #define DD_PARAMCLOVER const double2 *clover, const float *null2, const double2 *cloverInv, const float *null3
68  #if (defined DIRECT_ACCESS_CLOVER) || (defined FERMI_NO_DBLE_TEX)
69  #define TMCLOVERTEX clover
70  #define TM_INV_CLOVERTEX cloverInv
71  #define READ_CLOVER READ_CLOVER_DOUBLE_STR
72  #define ASSN_CLOVER ASSN_CLOVER_DOUBLE_STR
73  #else
74  #ifdef USE_TEXTURE_OBJECTS
75  #define TMCLOVERTEX (param.cloverTex)
76  #define TM_INV_CLOVERTEX (param.cloverInvTex)
77  #else
78  #define TMCLOVERTEX cloverTexDouble
79  #define TM_INV_CLOVERTEX cloverInvTexDouble
80  #endif
81  #define READ_CLOVER READ_CLOVER_DOUBLE_TEX
82  #define ASSN_CLOVER ASSN_CLOVER_DOUBLE_TEX
83  #endif
84  #define CLOVER_DOUBLE
85 #elif (DD_PREC == 1)
86  #define DD_PARAMCLOVER const float4 *clover, const float *null2, const float4 *cloverInv, const float *null3
87  #ifdef DIRECT_ACCESS_CLOVER
88  #define TMCLOVERTEX clover
89  #define TM_INV_CLOVERTEX cloverInv
90  #define READ_CLOVER READ_CLOVER_SINGLE
91  #define ASSN_CLOVER ASSN_CLOVER_SINGLE
92  #else
93  #ifdef USE_TEXTURE_OBJECTS
94  #define TMCLOVERTEX (param.cloverTex)
95  #define TM_INV_CLOVERTEX (param.cloverInvTex)
96  #else
97  #define TMCLOVERTEX cloverTexSingle
98  #define TM_INV_CLOVERTEX cloverInvTexSingle
99  #endif
100  #define READ_CLOVER READ_CLOVER_SINGLE_TEX
101  #define ASSN_CLOVER ASSN_CLOVER_SINGLE_TEX
102  #endif
103 #else // half-precision fields
104  #define DD_PARAMCLOVER const short4 *clover, const float *cNorm, const short4 *cloverInv, const float *cNrm2
105  #ifdef DIRECT_ACCESS_CLOVER
106  #define TMCLOVERTEX clover
107  #define TM_INV_CLOVERTEX cloverInv
108  #define READ_CLOVER READ_CLOVER_HALF
109  #define ASSN_CLOVER ASSN_CLOVER_HALF
110  #else
111  #ifdef USE_TEXTURE_OBJECTS
112  #define TMCLOVERTEX (param.cloverTex)
113  #define TMCLOVERTEXNORM (param.cloverNormTex)
114  #define TM_INV_CLOVERTEX (param.cloverInvTex)
115  #define TM_INV_CLOVERTEXNORM (param.cloverInvNormTex)
116  #else
117  #define TMCLOVERTEX cloverTexHalf
118  #define TMCLOVERTEXNORM cloverTexNorm
119  #define TM_INV_CLOVERTEX cloverInvTexHalf
120  #define TM_INV_CLOVERTEXNORM cloverInvTexNorm
121  #endif
122  #define READ_CLOVER READ_CLOVER_HALF_TEX
123  #define ASSN_CLOVER ASSN_CLOVER_HALF_TEX
124  #endif
125 
126 #endif //End clover defs
127 
128 
129 #if (DD_PREC == 0)
130 #define DD_PARAM4 const double a, const double b, const double2 *x, const float *xNorm, const DslashParam param
131 #elif (DD_PREC == 1)
132 #define DD_PARAM4 const float a, const float b, const float4 *x, const float *xNorm, const DslashParam param
133 #else
134 #define DD_PARAM4 const float a, const float b, const short4 *x, const float *xNorm, const DslashParam param
135 #endif
136 
137 #if (DD_RECON==0) // reconstruct from 8 reals
138 #define DD_RECON_F 8
139 
140 #if (DD_PREC==0)
141 #define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
142 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE
143 #ifdef DIRECT_ACCESS_LINK
144 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2
145 #else
146 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2_TEX
147 #endif // DIRECT_ACCESS_LINK
148 
149 #elif (DD_PREC==1)
150 #define DD_PARAM2 const float4 *gauge0, const float4 *gauge1
151 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
152 #ifdef DIRECT_ACCESS_LINK
153 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4
154 #else
155 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4_TEX
156 #endif // DIRECT_ACCESS_LINK
157 
158 #else
159 #define DD_PARAM2 const short4 *gauge0, const short4* gauge1
160 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
161 #ifdef DIRECT_ACCESS_LINK
162 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4
163 #else
164 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4_TEX
165 #endif // DIRECT_ACCESS_LINK
166 #endif // DD_PREC
167 #elif (DD_RECON==1) // reconstruct from 12 reals
168 #define DD_RECON_F 12
169 
170 #if (DD_PREC==0)
171 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE
172 #ifdef DIRECT_ACCESS_LINK
173 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2
174 #else
175 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2_TEX
176 #endif // DIRECT_ACCESS_LINK
177 #define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
178 
179 #elif (DD_PREC==1)
180 #define DD_PARAM2 const float4 *gauge0, const float4 *gauge1
181 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
182 #ifdef DIRECT_ACCESS_LINK
183 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4
184 #else
185 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4_TEX
186 #endif // DIRECT_ACCESS_LINK
187 
188 #else
189 #define DD_PARAM2 const short4 *gauge0, const short4 *gauge1
190 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
191 #ifdef DIRECT_ACCESS_LINK
192 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4
193 #else
194 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4_TEX
195 #endif // DIRECT_ACCESS_LINK
196 #endif // DD_PREC
197 #else // no reconstruct, load all components
198 #define DD_RECON_F 18
199 #define GAUGE_FLOAT2
200 #if (DD_PREC==0)
201 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE
202 #ifdef DIRECT_ACCESS_LINK
203 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2
204 #else
205 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2_TEX
206 #endif // DIRECT_ACCESS_LINK
207 #define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
208 
209 #elif (DD_PREC==1)
210 #define DD_PARAM2 const float4 *gauge0, const float4 *gauge1 // FIXME for direct reading, really float2
211 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
212 #ifdef DIRECT_ACCESS_LINK
213 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2
214 #else
215 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2_TEX
216 #endif // DIRECT_ACCESS_LINK
217 
218 #else
219 #define DD_PARAM2 const short4 *gauge0, const short4 *gauge1 // FIXME for direct reading, really short2
220 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
221 #ifdef DIRECT_ACCESS_LINK
222 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2
223 #else
224 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2_TEX
225 #endif //DIRECT_ACCESS_LINK
226 #endif
227 #endif
228 
229 #if (DD_PREC==0) // double-precision fields
230 
231 #define TPROJSCALE tProjScale
232 
233 // double-precision gauge field
234 #if (defined DIRECT_ACCESS_LINK) || (defined FERMI_NO_DBLE_TEX)
235 #define GAUGE0TEX gauge0
236 #define GAUGE1TEX gauge1
237 #else
238 #ifdef USE_TEXTURE_OBJECTS
239 #define GAUGE0TEX param.gauge0Tex
240 #define GAUGE1TEX param.gauge1Tex
241 #else
242 #define GAUGE0TEX gauge0TexDouble2
243 #define GAUGE1TEX gauge1TexDouble2
244 #endif // USE_TEXTURE_OBJECTS
245 #endif
246 
247 #define GAUGE_FLOAT2
248 
249 // double-precision spinor fields
250 #define DD_PARAM1 double2* out, float *null1
251 #define DD_PARAM3 const double2* in, const float *null4
252 #if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX)
253 #define READ_SPINOR READ_SPINOR_DOUBLE
254 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP
255 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN
256 #define SPINORTEX in
257 #else
258 #define READ_SPINOR READ_SPINOR_DOUBLE_TEX
259 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP_TEX
260 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN_TEX
261 #ifdef USE_TEXTURE_OBJECTS
262 #define SPINORTEX param.inTex
263 #else
264 #define SPINORTEX spinorTexDouble
265 #endif // USE_TEXTURE_OBJECTS
266 #endif
267 #if (defined DIRECT_ACCESS_WILSON_INTER) || (defined FERMI_NO_DBLE_TEX)
268 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE
269 #define INTERTEX out
270 #else
271 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE_TEX
272 #ifdef USE_TEXTURE_OBJECTS
273 #define INTERTEX param.outTex
274 #else
275 #define INTERTEX interTexDouble
276 #endif
277 #endif
278 #define WRITE_SPINOR WRITE_SPINOR_DOUBLE2
279 #define SPINOR_DOUBLE
280 #if (DD_XPAY!=0)
281 #if (defined DIRECT_ACCESS_WILSON_ACCUM) || (defined FERMI_NO_DBLE_TEX)
282 #define ACCUMTEX x
283 #define READ_ACCUM READ_ACCUM_DOUBLE
284 #else
285 #ifdef USE_TEXTURE_OBJECTS
286 #define ACCUMTEX param.xTex
287 #else
288 #define ACCUMTEX accumTexDouble
289 #endif // USE_TEXTURE_OBJECTS
290 #define READ_ACCUM READ_ACCUM_DOUBLE_TEX
291 #endif
292 
293 #endif
294 
295 #define SPINOR_HOP 12
296 
297 #elif (DD_PREC==1) // single-precision fields
298 
299 #define TPROJSCALE tProjScale_f
300 
301 // single-precision gauge field
302 #ifdef DIRECT_ACCESS_LINK
303 #define GAUGE0TEX gauge0
304 #define GAUGE1TEX gauge1
305 #else
306 #ifdef USE_TEXTURE_OBJECTS
307 #define GAUGE0TEX param.gauge0Tex
308 #define GAUGE1TEX param.gauge1Tex
309 #else
310 #if (DD_RECON_F == 18)
311 #define GAUGE0TEX gauge0TexSingle2
312 #define GAUGE1TEX gauge1TexSingle2
313 #else
314 #define GAUGE0TEX gauge0TexSingle4
315 #define GAUGE1TEX gauge1TexSingle4
316 #endif
317 #endif // USE_TEXTURE_OBJECTS
318 #endif
319 
320 
321 // single-precision spinor fields
322 #define DD_PARAM1 float4* out, float *null1
323 #define DD_PARAM3 const float4* in, const float *null4
324 #ifdef DIRECT_ACCESS_WILSON_SPINOR
325 #define READ_SPINOR READ_SPINOR_SINGLE
326 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP
327 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN
328 #define SPINORTEX in
329 #else
330 #define READ_SPINOR READ_SPINOR_SINGLE_TEX
331 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP_TEX
332 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN_TEX
333 #ifdef USE_TEXTURE_OBJECTS
334 #define SPINORTEX param.inTex
335 #else
336 #define SPINORTEX spinorTexSingle
337 #endif // USE_TEXTURE_OBJECTS
338 #endif
339 #ifdef DIRECT_ACCESS_WILSON_INTER
340 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE
341 #define INTERTEX out
342 #else
343 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE_TEX
344 #ifdef USE_TEXTURE_OBJECTS
345 #define INTERTEX param.outTex
346 #else
347 #define INTERTEX interTexSingle
348 #endif // USE_TEXTURE_OBJECTS
349 #endif
350 #define WRITE_SPINOR WRITE_SPINOR_FLOAT4
351 #if (DD_XPAY!=0)
352 #ifdef DIRECT_ACCESS_WILSON_ACCUM
353 #define ACCUMTEX x
354 #define READ_ACCUM READ_ACCUM_SINGLE
355 #else
356 #ifdef USE_TEXTURE_OBJECTS
357 #define ACCUMTEX param.xTex
358 #else
359 #define ACCUMTEX accumTexSingle
360 #endif // USE_TEXTURE_OBJECTS
361 #define READ_ACCUM READ_ACCUM_SINGLE_TEX
362 #endif
363 #endif
364 
365 #define SPINOR_HOP 6
366 
367 #else // half-precision fields
368 
369 #define TPROJSCALE tProjScale_f
370 
371 // half-precision gauge field
372 #ifdef DIRECT_ACCESS_LINK
373 #define GAUGE0TEX gauge0
374 #define GAUGE1TEX gauge1
375 #else
376 #ifdef USE_TEXTURE_OBJECTS
377 #define GAUGE0TEX param.gauge0Tex
378 #define GAUGE1TEX param.gauge1Tex
379 #else
380 #if (DD_RECON_F == 18)
381 #define GAUGE0TEX gauge0TexHalf2
382 #define GAUGE1TEX gauge1TexHalf2
383 #else
384 #define GAUGE0TEX gauge0TexHalf4
385 #define GAUGE1TEX gauge1TexHalf4
386 #endif
387 #endif // USE_TEXTURE_OBJECTS
388 #endif
389 
390 
391 // half-precision spinor fields
392 #ifdef DIRECT_ACCESS_WILSON_SPINOR
393 #define READ_SPINOR READ_SPINOR_HALF
394 #define READ_SPINOR_UP READ_SPINOR_HALF_UP
395 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN
396 #define SPINORTEX in
397 #else
398 #define READ_SPINOR READ_SPINOR_HALF_TEX
399 #define READ_SPINOR_UP READ_SPINOR_HALF_UP_TEX
400 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN_TEX
401 #ifdef USE_TEXTURE_OBJECTS
402 #define SPINORTEX param.inTex
403 #else
404 #define SPINORTEX spinorTexHalf
405 #endif // USE_TEXTURE_OBJECTS
406 #endif
407 #ifdef DIRECT_ACCESS_WILSON_INTER
408 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF
409 #define INTERTEX out
410 #else
411 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF_TEX
412 #ifdef USE_TEXTURE_OBJECTS
413 #define INTERTEX param.outTex
414 #else
415 #define INTERTEX interTexHalf
416 #endif // USE_TEXTURE_OBJECTS
417 #endif
418 #define DD_PARAM1 short4* out, float *outNorm
419 #define DD_PARAM3 const short4* in, const float *inNorm
420 #define WRITE_SPINOR WRITE_SPINOR_SHORT4
421 #if (DD_XPAY!=0)
423 #ifdef DIRECT_ACCESS_WILSON_ACCUM
424 #define ACCUMTEX x
425 #define READ_ACCUM READ_ACCUM_HALF
426 #else
427 #ifdef USE_TEXTURE_OBJECTS
428 #define ACCUMTEX param.xTex
429 #else
430 #define ACCUMTEX accumTexHalf
431 #endif // USE_TEXTURE_OBJECTS
432 #define READ_ACCUM READ_ACCUM_HALF_TEX
433 #endif
434 
435 #endif
436 
437 #define SPINOR_HOP 6
438 
439 #endif
440 
441 // only build double precision if supported
442 #if !(__COMPUTE_CAPABILITY__ < 130 && DD_PREC == 0)
443 
444  #define DD_CONCAT(n,r,d,x) n ## r ## d ## x ## Kernel
445  #define DD_FUNC(n,r,d,x) DD_CONCAT(n,r,d,x)
446 
447 // define the kernel
449 template <KernelType kernel_type>
450 __global__ void DD_FUNC(DD_NAME_F, DD_RECON_F, DD_DAG_F, DD_XPAY_F)
452 
453  #ifdef GPU_TWISTED_CLOVER_DIRAC
454 
455  #if (__COMPUTE_CAPABILITY__ >= 200 && defined(SHARED_WILSON_DSLASH)) // Fermi optimal code
456 
457  #if DD_DAG
459  #else
460  #include "tmc_dslash_fermi_core.h"
461  #endif
462 
463  #elif (__COMPUTE_CAPABILITY__ >= 120) // GT200 optimal code
464 
465  #if DD_DAG
467  #else
468  #include "tmc_dslash_gt200_core.h"
469  #endif
470 
471  #else // fall-back is original G80
472 
473  #if DD_DAG
475  #else
476  #include "tmc_dslash_g80_core.h"
477  #endif
478 
479  #endif
480 
481  #endif
482 
483 }
484 
485 template <>
488 
489 #ifdef GPU_TWISTED_CLOVER_DIRAC
490 
491 #if (__COMPUTE_CAPABILITY__ >= 200 && defined(SHARED_WILSON_DSLASH)) // Fermi optimal code
492 
493 #if DD_DAG
495 #else
497 #endif
498 
499 #elif (__COMPUTE_CAPABILITY__ >= 120) // GT200 optimal code
500 
501 #if DD_DAG
503 #else
505 #endif
506 
507 #else // fall-back is original G80
508 
509 #if DD_DAG
511 #else
513 #endif
514 
515 #endif
516 
517 #endif
518 
519 }
520 
521 
522 //NEW
523 #if (DD_XPAY==1) && (DD_TWIST==1)
524 #define CLOVER_TWIST_XPAY
525 
526 //redefine kernel name:
527 #undef DD_NAME_F
528 #define DD_NAME_F twistedCloverDslashTwist
529 
530 template <KernelType kernel_type>
531 __global__ void DD_FUNC(DD_NAME_F, DD_RECON_F, DD_DAG_F, DD_XPAY_F)
533 
534 #ifdef GPU_TWISTED_CLOVER_DIRAC
535 
536 #if (__COMPUTE_CAPABILITY__ >= 200 && defined(SHARED_WILSON_DSLASH)) // Fermi optimal code
537 
538 #if DD_DAG
540 #else
541 #include "tmc_dslash_fermi_core.h"
542 #endif
543 
544 #elif (__COMPUTE_CAPABILITY__ >= 120) // GT200 optimal code
545 
546 #if DD_DAG
548 #else
549 #include "tmc_dslash_gt200_core.h"
550 #endif
551 
552 #else // fall-back is original G80
553 
554 #if DD_DAG
556 #else
557 #include "tmc_dslash_g80_core.h"
558 #endif
559 
560 #endif
561 
562 #endif
563 
564 }
565 
566 template <>
569 
570 #ifdef GPU_TWISTED_CLOVER_DIRAC
571 
572 #if (__COMPUTE_CAPABILITY__ >= 200 && defined(SHARED_WILSON_DSLASH)) // Fermi optimal code
573 
574 #if DD_DAG
576 #else
578 #endif
579 
580 #elif (__COMPUTE_CAPABILITY__ >= 120) // GT200 optimal code
581 
582 #if DD_DAG
584 #else
586 #endif
587 
588 #else // fall-back is original G80
589 
590 #if DD_DAG
592 #else
594 #endif
595 
596 #endif
597 
598 #endif
599 
600 }
601 
602 #undef CLOVER_TWIST_XPAY
603 #endif //(DD_XPAY==0) && (DD_TWIST==1)
604 
605 
606 //BEGIN DUMMY KERNEL (remove it later)
607 #if (DD_XPAY==0) && (DD_TWIST==1)
608 #define CLOVER_TWIST_XPAY
609 
610 //redefine kernel name:
611 #undef DD_NAME_F
612 #define DD_NAME_F twistedCloverDslashTwist
613 
614 template <KernelType kernel_type>
615 __global__ void DD_FUNC(DD_NAME_F, DD_RECON_F, DD_DAG_F, DD_XPAY_F)
617 
618 #ifdef GPU_TWISTED_CLOVER_DIRAC
619 
620 #endif
621 
622 }
623 #undef CLOVER_TWIST_XPAY
624 #endif //(DD_XPAY==0) && (DD_TWIST==1)
625 //END DUMMY KERNEL
626 
627 #endif
628 
629 // clean up
630 
631 #undef DD_NAME_F
632 #undef DD_RECON_F
633 #undef DD_DAG_F
634 #undef DD_XPAY_F
635 #undef DD_PARAM1
636 #undef DD_PARAM2
637 #undef DD_PARAMCLOVER
638 #undef DD_PARAM3
639 #undef DD_PARAM4
640 #undef DD_CONCAT
641 #undef DD_FUNC
642 
643 #undef DSLASH_XPAY
644 
646 #undef CLOVER_TWIST_INV_DSLASH
647 #undef READ_GAUGE_MATRIX
649 #undef RECONSTRUCT_GAUGE_MATRIX
650 #undef GAUGE0TEX
651 #undef GAUGE1TEX
652 #undef READ_SPINOR
653 #undef READ_SPINOR_UP
654 #undef READ_SPINOR_DOWN
655 #undef SPINORTEX
656 #undef READ_INTERMEDIATE_SPINOR
657 #undef INTERTEX
658 #undef READ_ACCUM
659 #undef ACCUMTEX
660 #undef WRITE_SPINOR
661 #undef GAUGE_FLOAT2
662 #undef SPINOR_DOUBLE
663 
664 #undef READ_CLOVER
665 #undef ASSN_CLOVER
666 #undef TMCLOVERTEX
667 #undef TMCLOVERTEXNORM
668 #undef TM_INV_CLOVERTEX
669 #undef TM_INV_CLOVERTEXNORM
670 #undef CLOVER_DOUBLE
671 
672 #undef SPINOR_HOP
673 
674 #undef TPROJSCALE
675 
676 // prepare next set of options, or clean up after final iteration
677 
678 #if (DD_DAG==0)
679 #undef DD_DAG
680 #define DD_DAG 1
681 #else
682 #undef DD_DAG
683 #define DD_DAG 0
684 
685 #if (DD_TWIST==0)
686 #undef DD_TWIST
687 #define DD_TWIST 1
688 #else
689 #undef DD_TWIST
690 #define DD_TWIST 0
691 
692 #if (DD_XPAY==0)
693 #undef DD_XPAY
694 #define DD_XPAY 1
695 #else
696 #undef DD_XPAY
697 #define DD_XPAY 0
698 
699 #if (DD_RECON==0)
700 #undef DD_RECON
701 #define DD_RECON 1
702 #elif (DD_RECON==1)
703 #undef DD_RECON
704 #define DD_RECON 2
705 #else
706 #undef DD_RECON
707 #define DD_RECON 0
708 
709 #if (DD_PREC==0)
710 #undef DD_PREC
711 #define DD_PREC 1
712 #elif (DD_PREC==1)
713 #undef DD_PREC
714 #define DD_PREC 2
715 
716 #else
717 
718 #undef DD_LOOP
719 #undef DD_DAG
720 #undef DD_TWIST
721 #undef DD_XPAY
722 #undef DD_RECON
723 #undef DD_PREC
724 
725 #endif // DD_PREC
726 #endif // DD_RECON
727 #endif // DD_XPAY
728 #endif // DD_TWIST
729 #endif // DD_DAG
730 
731 #ifdef DD_LOOP
732 #include "tmc_dslash_def.h"
733 #endif
#define DD_DAG_F
#define DD_PARAMCLOVER
#define DD_RECON_F
#define DD_PARAM4
#define DD_FUNC(x)
Definition: clover_def.h:141
#define DD_PARAM1
#define DD_PARAM2
#define DD_XPAY_F
#define DD_NAME_F
#define DD_PARAM3