QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
tm_ndeg_dslash_def.h
Go to the documentation of this file.
1 // tm_dslash_def.h - Twisted Mass Dslash kernel definitions
2 
3 // There are currently 36 different variants of the Twisted Mass
4 // Wilson Dslash kernel, each one characterized by a set of 5 options,
5 // where each option can take one of several values (3*2*2*3 = 36).
6 // This file is structured so that the C preprocessor loops through all 36
7 // variants (in a manner resembling a counter), sets the appropriate
8 // macros, and defines the corresponding functions.
9 //
10 // As an example of the function naming conventions, consider
11 //
12 // twistedMassDslash12DaggerXpayKernel(float4* out, ...).
13 //
14 // This is a twisted mass Dslash^dagger kernel where the result is
15 // multiplied by "a" and summed with an input vector (Xpay), and the
16 // gauge matrix is reconstructed from 12 real numbers. More
17 // generally, each function name is given by the concatenation of the
18 // following 4 fields, with "Kernel" at the end:
19 //
20 // DD_NAME_F = twistedMassDslash
21 // DD_RECON_F = 8, 12, 18
22 // DD_DAG_F = Dagger, [blank]
23 // DD_XPAY_F = Xpay, [blank]
24 //
25 // In addition, the kernels are templated on the precision of the
26 // fields (double, single, or half).
27 
28 // initialize on first iteration
29 
30 #ifndef DD_LOOP
31 #define DD_LOOP
32 #define DD_DAG 0
33 #define DD_XPAY 0
34 #define DD_TWIST 0
35 #define DD_RECON 0
36 #define DD_PREC 0
37 #endif
38 
39 // set options for current iteration
40 
41 #define DD_NAME_F twistedNdegMassDslash
42 
43 #if (DD_DAG==0) // no dagger
44 #define DD_DAG_F
45 #else // dagger
46 #define DD_DAG_F Dagger
47 #endif
48 
49 #if (DD_XPAY==0) // no xpay
50 #define DD_XPAY_F
51 #else
52 #define DSLASH_XPAY
53 #define DD_XPAY_F Xpay
54 #endif
55 
57 #if (DD_TWIST==0) // no twist
58 #define DD_TWIST_F
59 #else
60 #define DSLASH_TWIST
61 #define DD_TWIST_F Twist
62 #endif
63 
65 #if (DD_PREC == 0)
66 #define DD_PARAM4 const double a, const double b, const double c, const double k, const double2 *x, const float *xNorm, const DslashParam param
67 #elif (DD_PREC == 1)
68 #define DD_PARAM4 const float a, const float b, const float c, const float k, const float4 *x, const float *xNorm, const DslashParam param
69 #else
70 #define DD_PARAM4 const float a, const float b, const float c, const float k, const short4 *x, const float *xNorm, const DslashParam param
71 #endif
72 
73 #if (DD_RECON==0) // reconstruct from 8 reals
74 #define DD_RECON_F 8
75 
76 #if (DD_PREC==0)
77 #define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
78 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE
79 #ifdef DIRECT_ACCESS_LINK
80 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2
81 #else
82 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2_TEX
83 #endif // DIRECT_ACCESS_LINK
84 
85 #elif (DD_PREC==1)
86 #define DD_PARAM2 const float4 *gauge0, const float4 *gauge1
87 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
88 #ifdef DIRECT_ACCESS_LINK
89 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4
90 #else
91 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4_TEX
92 #endif // DIRECT_ACCESS_LINK
93 
94 #else
95 #define DD_PARAM2 const short4 *gauge0, const short4* gauge1
96 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
97 #ifdef DIRECT_ACCESS_LINK
98 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4
99 #else
100 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4_TEX
101 #endif // DIRECT_ACCESS_LINK
102 #endif // DD_PREC
103 #elif (DD_RECON==1) // reconstruct from 12 reals
104 #define DD_RECON_F 12
105 
106 #if (DD_PREC==0)
107 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE
108 #ifdef DIRECT_ACCESS_LINK
109 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2
110 #else
111 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2_TEX
112 #endif // DIRECT_ACCESS_LINK
113 #define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
114 
115 #elif (DD_PREC==1)
116 #define DD_PARAM2 const float4 *gauge0, const float4 *gauge1
117 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
118 #ifdef DIRECT_ACCESS_LINK
119 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4
120 #else
121 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4_TEX
122 #endif // DIRECT_ACCESS_LINK
123 
124 #else
125 #define DD_PARAM2 const short4 *gauge0, const short4 *gauge1
126 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
127 #ifdef DIRECT_ACCESS_LINK
128 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4
129 #else
130 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4_TEX
131 #endif // DIRECT_ACCESS_LINK
132 #endif // DD_PREC
133 #else // no reconstruct, load all components
134 #define DD_RECON_F 18
135 #define GAUGE_FLOAT2
136 #if (DD_PREC==0)
137 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE
138 #ifdef DIRECT_ACCESS_LINK
139 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2
140 #else
141 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2_TEX
142 #endif // DIRECT_ACCESS_LINK
143 #define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
144 
145 #elif (DD_PREC==1)
146 #define DD_PARAM2 const float4 *gauge0, const float4 *gauge1 // FIXME for direct reading, really float2
147 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
148 #ifdef DIRECT_ACCESS_LINK
149 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2
150 #else
151 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2_TEX
152 #endif // DIRECT_ACCESS_LINK
153 
154 #else
155 #define DD_PARAM2 const short4 *gauge0, const short4 *gauge1 // FIXME for direct reading, really short2
156 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
157 #ifdef DIRECT_ACCESS_LINK
158 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2
159 #else
160 #define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2_TEX
161 #endif //DIRECT_ACCESS_LINK
162 #endif
163 #endif
164 
165 #if (DD_PREC==0) // double-precision fields
166 
167 #define TPROJSCALE tProjScale
168 
169 // double-precision gauge field
170 #if (defined DIRECT_ACCESS_LINK) || (defined FERMI_NO_DBLE_TEX)
171 #define GAUGE0TEX gauge0
172 #define GAUGE1TEX gauge1
173 #else
174 #ifdef USE_TEXTURE_OBJECTS
175 #define GAUGE0TEX param.gauge0Tex
176 #define GAUGE1TEX param.gauge1Tex
177 #else
178 #define GAUGE0TEX gauge0TexDouble2
179 #define GAUGE1TEX gauge1TexDouble2
180 #endif // USE_TEXTURE_OBJECTS
181 #endif
182 
183 #define GAUGE_FLOAT2
184 
185 // double-precision spinor fields
186 #define DD_PARAM1 double2* out, float *null1
187 #define DD_PARAM3 const double2* in, const float *null4
188 #if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX)
189 #define READ_SPINOR READ_SPINOR_DOUBLE
190 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP
191 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN
192 #define SPINORTEX in
193 #else
194 #define READ_SPINOR READ_SPINOR_DOUBLE_TEX
195 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP_TEX
196 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN_TEX
197 #ifdef USE_TEXTURE_OBJECTS
198 #define SPINORTEX param.inTex
199 #else
200 #define SPINORTEX spinorTexDouble
201 #endif // USE_TEXTURE_OBJECTS
202 #endif
203 #if (defined DIRECT_ACCESS_WILSON_INTER) || (defined FERMI_NO_DBLE_TEX)
204 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE
205 #define INTERTEX out
206 #else
207 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE_TEX
208 #ifdef USE_TEXTURE_OBJECTS
209 #define INTERTEX param.outTex
210 #else
211 #define INTERTEX interTexDouble
212 #endif
213 #endif
214 #define WRITE_FLAVOR_SPINOR WRITE_FLAVOR_SPINOR_DOUBLE2
215 #define SPINOR_DOUBLE
216 #if (DD_XPAY==1)
217 #if (defined DIRECT_ACCESS_WILSON_ACCUM) || (defined FERMI_NO_DBLE_TEX)
218 #define ACCUMTEX x
219 #define READ_ACCUM READ_ACCUM_DOUBLE
220 #define ASSN_ACCUM ASSN_ACCUM_DOUBLE
222 #define READ_ACCUM_FLAVOR READ_ACCUM_FLAVOR_DOUBLE
223 #else
224 #ifdef USE_TEXTURE_OBJECTS
225 #define ACCUMTEX param.xTex
226 #else
227 #define ACCUMTEX accumTexDouble
228 #endif // USE_TEXTURE_OBJECTS
229 #define READ_ACCUM READ_ACCUM_DOUBLE_TEX
230 #define ASSN_ACCUM ASSN_ACCUM_DOUBLE_TEX
232 #define READ_ACCUM_FLAVOR READ_ACCUM_FLAVOR_DOUBLE_TEX
233 #endif
234 
235 #endif
236 
237 #define SPINOR_HOP 12
238 
239 #elif (DD_PREC==1) // single-precision fields
240 
241 #define TPROJSCALE tProjScale_f
242 
243 // single-precision gauge field
244 #ifdef DIRECT_ACCESS_LINK
245 #define GAUGE0TEX gauge0
246 #define GAUGE1TEX gauge1
247 #else
248 #ifdef USE_TEXTURE_OBJECTS
249 #define GAUGE0TEX param.gauge0Tex
250 #define GAUGE1TEX param.gauge1Tex
251 #else
252 #if (DD_RECON_F == 18)
253 #define GAUGE0TEX gauge0TexSingle2
254 #define GAUGE1TEX gauge1TexSingle2
255 #else
256 #define GAUGE0TEX gauge0TexSingle4
257 #define GAUGE1TEX gauge1TexSingle4
258 #endif
259 #endif // USE_TEXTURE_OBJECTS
260 #endif
261 
262 
263 // single-precision spinor fields
264 #define DD_PARAM1 float4* out, float *null1
265 #define DD_PARAM3 const float4* in, const float *null4
266 #ifdef DIRECT_ACCESS_WILSON_SPINOR
267 #define READ_SPINOR READ_SPINOR_SINGLE
268 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP
269 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN
270 #define SPINORTEX in
271 #else
272 #define READ_SPINOR READ_SPINOR_SINGLE_TEX
273 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP_TEX
274 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN_TEX
275 #ifdef USE_TEXTURE_OBJECTS
276 #define SPINORTEX param.inTex
277 #else
278 #define SPINORTEX spinorTexSingle
279 #endif // USE_TEXTURE_OBJECTS
280 #endif
281 #ifdef DIRECT_ACCESS_WILSON_INTER
282 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE
283 #define INTERTEX out
284 #else
285 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE_TEX
286 #ifdef USE_TEXTURE_OBJECTS
287 #define INTERTEX param.outTex
288 #else
289 #define INTERTEX interTexSingle
290 #endif // USE_TEXTURE_OBJECTS
291 #endif
292 #define WRITE_FLAVOR_SPINOR WRITE_FLAVOR_SPINOR_FLOAT4
293 #if (DD_XPAY==1)
294 #ifdef DIRECT_ACCESS_WILSON_ACCUM
295 #define ACCUMTEX x
296 #define READ_ACCUM READ_ACCUM_SINGLE
297 #define ASSN_ACCUM ASSN_ACCUM_SINGLE
299 #define READ_ACCUM_FLAVOR READ_ACCUM_FLAVOR_SINGLE
300 #else
301 #ifdef USE_TEXTURE_OBJECTS
302 #define ACCUMTEX param.xTex
303 #else
304 #define ACCUMTEX accumTexSingle
305 #endif // USE_TEXTURE_OBJECTS
306 #define READ_ACCUM READ_ACCUM_SINGLE_TEX
307 #define ASSN_ACCUM ASSN_ACCUM_SINGLE_TEX
309 #define READ_ACCUM_FLAVOR READ_ACCUM_FLAVOR_SINGLE_TEX
310 #endif
311 #endif
312 
313 #define SPINOR_HOP 6
314 
315 #else // half-precision fields
316 
317 #define TPROJSCALE tProjScale_f
318 
319 // half-precision gauge field
320 #ifdef DIRECT_ACCESS_LINK
321 #define GAUGE0TEX gauge0
322 #define GAUGE1TEX gauge1
323 #else
324 #ifdef USE_TEXTURE_OBJECTS
325 #define GAUGE0TEX param.gauge0Tex
326 #define GAUGE1TEX param.gauge1Tex
327 #else
328 #if (DD_RECON_F == 18)
329 #define GAUGE0TEX gauge0TexHalf2
330 #define GAUGE1TEX gauge1TexHalf2
331 #else
332 #define GAUGE0TEX gauge0TexHalf4
333 #define GAUGE1TEX gauge1TexHalf4
334 #endif
335 #endif // USE_TEXTURE_OBJECTS
336 #endif
337 
338 
339 // half-precision spinor fields
340 #ifdef DIRECT_ACCESS_WILSON_SPINOR
341 #define READ_SPINOR READ_SPINOR_HALF
342 #define READ_SPINOR_UP READ_SPINOR_HALF_UP
343 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN
344 #define SPINORTEX in
345 #else
346 #define READ_SPINOR READ_SPINOR_HALF_TEX
347 #define READ_SPINOR_UP READ_SPINOR_HALF_UP_TEX
348 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN_TEX
349 #ifdef USE_TEXTURE_OBJECTS
350 #define SPINORTEX param.inTex
351 #else
352 #define SPINORTEX spinorTexHalf
353 #endif // USE_TEXTURE_OBJECTS
354 #endif
355 #ifdef DIRECT_ACCESS_WILSON_INTER
356 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF
357 #define INTERTEX out
358 #else
359 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF_TEX
360 #ifdef USE_TEXTURE_OBJECTS
361 #define INTERTEX param.outTex
362 #else
363 #define INTERTEX interTexHalf
364 #endif // USE_TEXTURE_OBJECTS
365 #endif
366 #define DD_PARAM1 short4* out, float *outNorm
367 #define DD_PARAM3 const short4* in, const float *inNorm
368 #define WRITE_FLAVOR_SPINOR WRITE_FLAVOR_SPINOR_SHORT4
369 #if (DD_XPAY==1)
370 #ifdef DIRECT_ACCESS_WILSON_ACCUM
371 #define ACCUMTEX x
372 #define READ_ACCUM READ_ACCUM_HALF
373 #define ASSN_ACCUM ASSN_ACCUM_HALF
375 #define READ_ACCUM_FLAVOR READ_ACCUM_FLAVOR_HALF
376 #else
377 #ifdef USE_TEXTURE_OBJECTS
378 #define ACCUMTEX param.xTex
379 #else
380 #define ACCUMTEX accumTexHalf
381 #endif // USE_TEXTURE_OBJECTS
382 #define READ_ACCUM READ_ACCUM_HALF_TEX
383 #define ASSN_ACCUM ASSN_ACCUM_HALF_TEX
385 #define READ_ACCUM_FLAVOR READ_ACCUM_FLAVOR_HALF_TEX
386 #endif
387 
388 #endif
389 
390 #define SPINOR_HOP 6
391 
392 #endif
393 
394 // only build double precision if supported
395 #if !(__COMPUTE_CAPABILITY__ < 130 && DD_PREC == 0)
396 
397 #define DD_CONCAT(n,r,d,t,x) n ## r ## d ## t ## x ## Kernel
398 #define DD_FUNC(n,r,d,t,x) DD_CONCAT(n,r,d,t,x)
399 
400 // define the kernel
401 
402 template <KernelType kernel_type>
405 
406 #ifdef GPU_NDEG_TWISTED_MASS_DIRAC
407 #if DD_DAG
409 #else
410 #include "tm_ndeg_dslash_core.h"
411 #endif
412 #endif
413 
414 }
415 
416 
417 template <>
420 
421 #ifdef GPU_NDEG_TWISTED_MASS_DIRAC
422 #if DD_DAG
424 #else
426 #endif
427 #endif
428 
429 }
430 
431 
432 #endif
433 
434 // clean up
435 
436 #undef DD_NAME_F
437 #undef DD_RECON_F
438 #undef DD_DAG_F
439 #undef DD_XPAY_F
440 #undef DD_TWIST_F
441 #undef DD_PARAM1
442 #undef DD_PARAM2
443 #undef DD_PARAM3
444 #undef DD_PARAM4
445 #undef DD_CONCAT
446 #undef DD_FUNC
447 
448 #undef DSLASH_XPAY
449 #undef DSLASH_TWIST
450 #undef READ_GAUGE_MATRIX
451 #undef RECONSTRUCT_GAUGE_MATRIX
452 #undef GAUGE0TEX
453 #undef GAUGE1TEX
454 #undef READ_SPINOR
455 #undef READ_SPINOR_UP
456 #undef READ_SPINOR_DOWN
457 #undef SPINORTEX
458 #undef READ_INTERMEDIATE_SPINOR
459 #undef INTERTEX
460 #undef READ_ACCUM
461 #undef ASSN_ACCUM
463 #undef READ_ACCUM_FLAVOR
464 #undef ACCUMTEX
465 #undef WRITE_FLAVOR_SPINOR
466 #undef GAUGE_FLOAT2
467 #undef SPINOR_DOUBLE
468 
469 #undef SPINOR_HOP
470 
471 #undef TPROJSCALE
472 
473 // prepare next set of options, or clean up after final iteration
474 
475 #if (DD_DAG==0)
476 #undef DD_DAG
477 #define DD_DAG 1
478 #else
479 #undef DD_DAG
480 #define DD_DAG 0
481 
482 #if (DD_XPAY==0)
483 #undef DD_XPAY
484 #define DD_XPAY 1
485 #else
486 #undef DD_XPAY
487 #define DD_XPAY 0
488 
490 #if (DD_TWIST==0)
491 #undef DD_TWIST
492 #define DD_TWIST 1
493 #else
494 #undef DD_TWIST
495 #define DD_TWIST 0
496 
498 #if (DD_RECON==0)
499 #undef DD_RECON
500 #define DD_RECON 1
501 #elif (DD_RECON==1)
502 #undef DD_RECON
503 #define DD_RECON 2
504 #else
505 #undef DD_RECON
506 #define DD_RECON 0
507 
508 #if (DD_PREC==0)
509 #undef DD_PREC
510 #define DD_PREC 1
511 #elif (DD_PREC==1)
512 #undef DD_PREC
513 #define DD_PREC 2
514 
515 #else
516 
517 #undef DD_LOOP
518 #undef DD_DAG
519 #undef DD_XPAY
520 #undef DD_TWIST
521 #undef DD_RECON
522 #undef DD_PREC
523 
524 #endif // DD_PREC
525 #endif // DD_RECON
526 #endif // DD_TWIST
527 #endif // DD_XPAY
528 #endif // DD_DAG
529 
530 #ifdef DD_LOOP
531 #include "tm_ndeg_dslash_def.h"
532 #endif
#define DD_PARAM1
#define DD_PARAM3
#define DD_XPAY_F
#define DD_FUNC(x)
Definition: clover_def.h:141
#define DD_PARAM2
#define DD_TWIST_F
#define DD_RECON_F
#define DD_PARAM4
#define DD_DAG_F
#define DD_NAME_F