QUDA  0.9.0
mdw_dslash4_def.h
Go to the documentation of this file.
1 // tm_dslash_def.h - Twisted Mass Dslash kernel definitions
2 
3 // There are currently 36 different variants of the Twisted Mass
4 // Wilson Dslash kernel, each one characterized by a set of 5 options,
5 // where each option can take one of several values (3*2*2*3 = 36).
6 // This file is structured so that the C preprocessor loops through all 36
7 // variants (in a manner resembling a counter), sets the appropriate
8 // macros, and defines the corresponding functions.
9 //
10 // As an example of the function naming conventions, consider
11 //
12 // twistedMassDslash12DaggerXpayKernel(float4* out, ...).
13 //
14 // This is a twisted mass Dslash^dagger kernel where the result is
15 // multiplied by "a" and summed with an input vector (Xpay), and the
16 // gauge matrix is reconstructed from 12 real numbers. More
17 // generally, each function name is given by the concatenation of the
18 // following 4 fields, with "Kernel" at the end:
19 //
20 // DD_NAME_F = twistedMassDslash
21 // DD_RECON_F = 8, 12, 18
22 // DD_DAG_F = Dagger, [blank]
23 // DD_XPAY_F = Xpay, [blank]
24 //
25 // In addition, the kernels are templated on the precision of the
26 // fields (double, single, or half).
27 
28 // initialize on first iteration
29 
30 #ifndef DD_LOOP
31 #define DD_LOOP
32 #define DD_DAG 0
33 #define DD_XPAY 0
34 #define DD_RECON 0
35 #define DD_PREC 0
36 #endif
37 
38 // set options for current iteration
39 
40 #define DD_NAME_F MDWFDslash4
41 
42 #if (DD_DAG==0) // no dagger
43 #define DD_DAG_F
44 #else // dagger
45 #define DD_DAG_F Dagger
46 #endif
47 
48 #if (DD_XPAY==0) // no xpay
49 #define DD_XPAY_F
50 #else
51 #define DSLASH_XPAY
52 #define DD_XPAY_F Xpay
53 #endif
54 
55 #if (DD_PREC == 0)
56 #define DD_PREC_F D
57 #elif (DD_PREC == 1)
58 #define DD_PREC_F S
59 #else
60 #define DD_PREC_F H
61 #endif
62 
63 #if (DD_PREC == 0)
64 #define DD_PREC_F D
65 #elif (DD_PREC == 1)
66 #define DD_PREC_F S
67 #else
68 #define DD_PREC_F H
69 #endif
70 
71 #if (DD_RECON==0) // reconstruct from 8 reals
72 #define DD_RECON_F 8
73 
74 #if (DD_PREC==0)
75 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE
76 #ifdef DIRECT_ACCESS_LINK
77 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_DOUBLE2
78 #else
79 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_DOUBLE2_TEX
80 #endif // DIRECT_ACCESS_LINK
81 
82 #elif (DD_PREC==1)
83 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
84 #ifdef DIRECT_ACCESS_LINK
85 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_FLOAT4
86 #else
87 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_FLOAT4_TEX
88 #endif // DIRECT_ACCESS_LINK
89 
90 #else
91 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
92 #ifdef DIRECT_ACCESS_LINK
93 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_SHORT4
94 #else
95 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_SHORT4_TEX
96 #endif // DIRECT_ACCESS_LINK
97 #endif // DD_PREC
98 #elif (DD_RECON==1) // reconstruct from 12 reals
99 #define DD_RECON_F 12
100 
101 #if (DD_PREC==0)
102 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE
103 #ifdef DIRECT_ACCESS_LINK
104 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_DOUBLE2
105 #else
106 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_DOUBLE2_TEX
107 #endif // DIRECT_ACCESS_LINK
108 
109 #elif (DD_PREC==1)
110 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
111 #ifdef DIRECT_ACCESS_LINK
112 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_FLOAT4
113 #else
114 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_FLOAT4_TEX
115 #endif // DIRECT_ACCESS_LINK
116 
117 #else
118 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
119 #ifdef DIRECT_ACCESS_LINK
120 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_SHORT4
121 #else
122 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_SHORT4_TEX
123 #endif // DIRECT_ACCESS_LINK
124 #endif // DD_PREC
125 #else // no reconstruct, load all components
126 #define DD_RECON_F 18
127 #define GAUGE_FLOAT2
128 #if (DD_PREC==0)
129 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE
130 #ifdef DIRECT_ACCESS_LINK
131 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_DOUBLE2
132 #else
133 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_DOUBLE2_TEX
134 #endif // DIRECT_ACCESS_LINK
135 
136 #elif (DD_PREC==1)
137 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
138 #ifdef DIRECT_ACCESS_LINK
139 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_FLOAT2
140 #else
141 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_FLOAT2_TEX
142 #endif // DIRECT_ACCESS_LINK
143 
144 #else
145 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
146 #ifdef DIRECT_ACCESS_LINK
147 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_SHORT2
148 #else
149 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_SHORT2_TEX
150 #endif //DIRECT_ACCESS_LINK
151 #endif
152 #endif
153 
154 #if (DD_PREC==0) // double-precision fields
155 
156 #define TPROJSCALE param.tProjScale
157 
158 // double-precision gauge field
159 #if (defined DIRECT_ACCESS_LINK) || (defined FERMI_NO_DBLE_TEX)
160 #define GAUGE0TEX param.gauge0
161 #define GAUGE1TEX param.gauge1
162 #else
163 #ifdef USE_TEXTURE_OBJECTS
164 #define GAUGE0TEX param.gauge0Tex
165 #define GAUGE1TEX param.gauge1Tex
166 #else
167 #define GAUGE0TEX gauge0TexDouble2
168 #define GAUGE1TEX gauge1TexDouble2
169 #endif // USE_TEXTURE_OBJECTS
170 #endif
171 
172 #define GAUGE_FLOAT2
173 
174 // double-precision spinor fields
175 #if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX)
176 #define READ_SPINOR READ_SPINOR_DOUBLE
177 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_DOUBLE
178 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP
179 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN
180 #define SPINORTEX param.in
181 #define GHOSTSPINORTEX param.ghost
182 #else
183 #define READ_SPINOR READ_SPINOR_DOUBLE_TEX
184 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_DOUBLE_TEX
185 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP_TEX
186 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN_TEX
187 #ifdef USE_TEXTURE_OBJECTS
188 #define SPINORTEX param.inTex
189 #define GHOSTSPINORTEX param.ghostTex
190 #else
191 #define SPINORTEX spinorTexDouble
192 #define GHOSTSPINORTEX ghostSpinorTexDouble
193 #endif // USE_TEXTURE_OBJECTS
194 #endif
195 #if (defined DIRECT_ACCESS_WILSON_INTER) || (defined FERMI_NO_DBLE_TEX)
196 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE
197 #define INTERTEX param.out
198 #else
199 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE_TEX
200 #ifdef USE_TEXTURE_OBJECTS
201 #define INTERTEX param.outTex
202 #else
203 #define INTERTEX interTexDouble
204 #endif
205 #endif
206 #define WRITE_SPINOR WRITE_SPINOR_DOUBLE2
207 #define SPINOR_DOUBLE
208 #if (DD_XPAY==1)
209 #if (defined DIRECT_ACCESS_WILSON_ACCUM) || (defined FERMI_NO_DBLE_TEX)
210 #define ACCUMTEX param.x
211 #define READ_ACCUM READ_ACCUM_DOUBLE
212 #else
213 #ifdef USE_TEXTURE_OBJECTS
214 #define ACCUMTEX param.xTex
215 #else
216 #define ACCUMTEX accumTexDouble
217 #endif // USE_TEXTURE_OBJECTS
218 #define READ_ACCUM READ_ACCUM_DOUBLE_TEX
219 #endif
220 
221 #endif
222 
223 #define SPINOR_HOP 12
224 
225 #elif (DD_PREC==1) // single-precision fields
226 
227 #define TPROJSCALE param.tProjScale_f
228 
229 // single-precision gauge field
230 #ifdef DIRECT_ACCESS_LINK
231 #define GAUGE0TEX param.gauge0
232 #define GAUGE1TEX param.gauge1
233 #else
234 #ifdef USE_TEXTURE_OBJECTS
235 #define GAUGE0TEX param.gauge0Tex
236 #define GAUGE1TEX param.gauge1Tex
237 #else
238 #if (DD_RECON_F == 18)
239 #define GAUGE0TEX gauge0TexSingle2
240 #define GAUGE1TEX gauge1TexSingle2
241 #else
242 #define GAUGE0TEX gauge0TexSingle4
243 #define GAUGE1TEX gauge1TexSingle4
244 #endif
245 #endif // USE_TEXTURE_OBJECTS
246 #endif
247 
248 
249 // single-precision spinor fields
250 #ifdef DIRECT_ACCESS_WILSON_SPINOR
251 #define READ_SPINOR READ_SPINOR_SINGLE
252 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_SINGLE
253 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP
254 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN
255 #define SPINORTEX param.in
256 #define GHOSTSPINORTEX param.ghost
257 #else
258 #define READ_SPINOR READ_SPINOR_SINGLE_TEX
259 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_SINGLE_TEX
260 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP_TEX
261 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN_TEX
262 #ifdef USE_TEXTURE_OBJECTS
263 #define SPINORTEX param.inTex
264 #define GHOSTSPINORTEX param.ghostTex
265 #else
266 #define SPINORTEX spinorTexSingle
267 #define GHOSTSPINORTEX ghostSpinorTexSingle
268 #endif // USE_TEXTURE_OBJECTS
269 #endif
270 #ifdef DIRECT_ACCESS_WILSON_INTER
271 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE
272 #define INTERTEX param.out
273 #else
274 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE_TEX
275 #ifdef USE_TEXTURE_OBJECTS
276 #define INTERTEX param.outTex
277 #else
278 #define INTERTEX interTexSingle
279 #endif // USE_TEXTURE_OBJECTS
280 #endif
281 #define WRITE_SPINOR WRITE_SPINOR_FLOAT4
282 #if (DD_XPAY==1)
283 #ifdef DIRECT_ACCESS_WILSON_ACCUM
284 #define ACCUMTEX param.x
285 #define READ_ACCUM READ_ACCUM_SINGLE
286 #else
287 #ifdef USE_TEXTURE_OBJECTS
288 #define ACCUMTEX param.xTex
289 #else
290 #define ACCUMTEX accumTexSingle
291 #endif // USE_TEXTURE_OBJECTS
292 #define READ_ACCUM READ_ACCUM_SINGLE_TEX
293 #endif
294 #endif
295 
296 #define SPINOR_HOP 6
297 
298 #else // half-precision fields
299 
300 #define TPROJSCALE param.tProjScale_f
301 
302 // half-precision gauge field
303 #ifdef DIRECT_ACCESS_LINK
304 #define GAUGE0TEX param.gauge0
305 #define GAUGE1TEX param.gauge1
306 #else
307 #ifdef USE_TEXTURE_OBJECTS
308 #define GAUGE0TEX param.gauge0Tex
309 #define GAUGE1TEX param.gauge1Tex
310 #else
311 #if (DD_RECON_F == 18)
312 #define GAUGE0TEX gauge0TexHalf2
313 #define GAUGE1TEX gauge1TexHalf2
314 #else
315 #define GAUGE0TEX gauge0TexHalf4
316 #define GAUGE1TEX gauge1TexHalf4
317 #endif
318 #endif // USE_TEXTURE_OBJECTS
319 #endif
320 
321 
322 // half-precision spinor fields
323 #ifdef DIRECT_ACCESS_WILSON_SPINOR
324 #define READ_SPINOR READ_SPINOR_HALF
325 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_HALF
326 #define READ_SPINOR_UP READ_SPINOR_HALF_UP
327 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN
328 #define SPINORTEX param.in
329 #define GHOSTSPINORTEX param.ghost
330 #else
331 #define READ_SPINOR READ_SPINOR_HALF_TEX
332 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_HALF_TEX
333 #define READ_SPINOR_UP READ_SPINOR_HALF_UP_TEX
334 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN_TEX
335 #ifdef USE_TEXTURE_OBJECTS
336 #define SPINORTEX param.inTex
337 #define GHOSTSPINORTEX param.ghostTex
338 #else
339 #define SPINORTEX spinorTexHalf
340 #define GHOSTSPINORTEX ghostSpinorTexHalf
341 #endif // USE_TEXTURE_OBJECTS
342 #endif
343 #ifdef DIRECT_ACCESS_WILSON_INTER
344 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF
345 #define INTERTEX param.out
346 #else
347 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF_TEX
348 #ifdef USE_TEXTURE_OBJECTS
349 #define INTERTEX param.outTex
350 #else
351 #define INTERTEX interTexHalf
352 #endif // USE_TEXTURE_OBJECTS
353 #endif
354 #define WRITE_SPINOR WRITE_SPINOR_SHORT4
355 #if (DD_XPAY==1)
356 #ifdef DIRECT_ACCESS_WILSON_ACCUM
357 #define ACCUMTEX param.x
358 #define READ_ACCUM READ_ACCUM_HALF
359 #else
360 #ifdef USE_TEXTURE_OBJECTS
361 #define ACCUMTEX param.xTex
362 #else
363 #define ACCUMTEX accumTexHalf
364 #endif // USE_TEXTURE_OBJECTS
365 #define READ_ACCUM READ_ACCUM_HALF_TEX
366 #endif
367 
368 #endif
369 
370 #define SPINOR_HOP 6
371 
372 #endif
373 
374 #define DD_CONCAT(n,p,r,d,x) n ## p ## r ## d ## x ## Kernel
375 #define DD_FUNC(n,p,r,d,x) DD_CONCAT(n,p,r,d,x)
376 
377 // define the kernel
378 
379 template <KernelType kernel_type>
381  (const DslashParam param) {
382 
383 #ifdef GPU_DOMAIN_WALL_DIRAC
384 #define MDWF_mode 1
385 #if DD_DAG
386 #include "dw_dslash4_dagger_core.h"
387 #else
388 #include "dw_dslash4_core.h"
389 #endif
390 #undef MDWF_mode
391 #endif
392 
393 }
394 
395 #ifdef MULTI_GPU
396 template <>
398  (const DslashParam param) {
399 
400 #ifdef GPU_DOMAIN_WALL_DIRAC
401 #define MDWF_mode 1
402 #if DD_DAG
404 #else
406 #endif
407 #undef MDWF_mode
408 #endif
409 
410 }
411 #endif // MULTI_GPU
412 
413 // clean up
414 
415 #undef DD_NAME_F
416 #undef DD_PREC_F
417 #undef DD_RECON_F
418 #undef DD_DAG_F
419 #undef DD_XPAY_F
420 #undef DD_CONCAT
421 #undef DD_FUNC
422 
423 #undef DSLASH_XPAY
424 #undef ASSN_GAUGE_MATRIX
425 #undef RECONSTRUCT_GAUGE_MATRIX
426 #undef GAUGE0TEX
427 #undef GAUGE1TEX
428 #undef READ_SPINOR
429 #undef READ_SPINOR_GHOST
430 #undef READ_SPINOR_UP
431 #undef READ_SPINOR_DOWN
432 #undef SPINORTEX
433 #undef GHOSTSPINORTEX
434 #undef READ_INTERMEDIATE_SPINOR
435 #undef INTERTEX
436 #undef READ_ACCUM
437 #undef ACCUMTEX
438 #undef WRITE_SPINOR
439 #undef GAUGE_FLOAT2
440 #undef SPINOR_DOUBLE
441 
442 #undef SPINOR_HOP
443 
444 #undef TPROJSCALE
445 
446 // prepare next set of options, or clean up after final iteration
447 
448 #if (DD_DAG==0)
449 #undef DD_DAG
450 #define DD_DAG 1
451 #else
452 #undef DD_DAG
453 #define DD_DAG 0
454 
455 #if (DD_XPAY==0)
456 #undef DD_XPAY
457 #define DD_XPAY 1
458 #else
459 #undef DD_XPAY
460 #define DD_XPAY 0
461 
462 #if (DD_RECON==0)
463 #undef DD_RECON
464 #define DD_RECON 1
465 #elif (DD_RECON==1)
466 #undef DD_RECON
467 #define DD_RECON 2
468 #else
469 #undef DD_RECON
470 #define DD_RECON 0
471 
472 #if (DD_PREC==0)
473 #undef DD_PREC
474 #define DD_PREC 1
475 #elif (DD_PREC==1)
476 #undef DD_PREC
477 #define DD_PREC 2
478 
479 #else
480 
481 #undef DD_LOOP
482 #undef DD_DAG
483 #undef DD_XPAY
484 #undef DD_RECON
485 #undef DD_PREC
486 
487 #endif // DD_PREC
488 #endif // DD_RECON
489 #endif // DD_XPAY
490 #endif // DD_DAG
491 
492 #ifdef DD_LOOP
493 #include "mdw_dslash4_def.h"
494 #endif
#define DD_XPAY_F
#define DD_FUNC(n, p, r, d, x)
QudaGaugeParam param
Definition: pack_test.cpp:17
#define DD_NAME_F
#define DD_PREC_F
#define DD_DAG_F
#define DD_RECON_F