QUDA  0.9.0
dw_dslash_def.h
Go to the documentation of this file.
1 // tm_dslash_def.h - Twisted Mass Dslash kernel definitions
2 
3 // There are currently 36 different variants of the Twisted Mass
4 // Wilson Dslash kernel, each one characterized by a set of 5 options,
5 // where each option can take one of several values (3*2*2*3 = 36).
6 // This file is structured so that the C preprocessor loops through all 36
7 // variants (in a manner resembling a counter), sets the appropriate
8 // macros, and defines the corresponding functions.
9 //
10 // As an example of the function naming conventions, consider
11 //
12 // twistedMassDslash12DaggerXpayKernel(float4* out, ...).
13 //
14 // This is a twisted mass Dslash^dagger kernel where the result is
15 // multiplied by "a" and summed with an input vector (Xpay), and the
16 // gauge matrix is reconstructed from 12 real numbers. More
17 // generally, each function name is given by the concatenation of the
18 // following 4 fields, with "Kernel" at the end:
19 //
20 // DD_NAME_F = twistedMassDslash
21 // DD_RECON_F = 8, 12, 18
22 // DD_DAG_F = Dagger, [blank]
23 // DD_XPAY_F = Xpay, [blank]
24 //
25 // In addition, the kernels are templated on the precision of the
26 // fields (double, single, or half).
27 
28 // initialize on first iteration
29 
30 #ifndef DD_LOOP
31 #define DD_LOOP
32 #define DD_DAG 0
33 #define DD_XPAY 0
34 #define DD_RECON 0
35 #define DD_PREC 0
36 #endif
37 
38 // set options for current iteration
39 
40 #define DD_NAME_F domainWallDslash
41 
42 #if (DD_DAG==0) // no dagger
43 #define DD_DAG_F
44 #else // dagger
45 #define DD_DAG_F Dagger
46 #endif
47 
48 #if (DD_XPAY==0) // no xpay
49 #define DD_XPAY_F
50 #else
51 #define DSLASH_XPAY
52 #define DD_XPAY_F Xpay
53 #endif
54 
55 #if (DD_PREC == 0)
56 #define DD_PREC_F D
57 #elif (DD_PREC == 1)
58 #define DD_PREC_F S
59 #else
60 #define DD_PREC_F H
61 #endif
62 
63 #if (DD_RECON==0) // reconstruct from 8 reals
64 #define DD_RECON_F 8
65 
66 #if (DD_PREC==0)
67 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE
68 #ifdef DIRECT_ACCESS_LINK
69 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_DOUBLE2
70 #else
71 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_DOUBLE2_TEX
72 #endif // DIRECT_ACCESS_LINK
73 
74 #elif (DD_PREC==1)
75 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
76 #ifdef DIRECT_ACCESS_LINK
77 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_FLOAT4
78 #else
79 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_FLOAT4_TEX
80 #endif // DIRECT_ACCESS_LINK
81 
82 #else
83 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
84 #ifdef DIRECT_ACCESS_LINK
85 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_SHORT4
86 #else
87 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_SHORT4_TEX
88 #endif // DIRECT_ACCESS_LINK
89 #endif // DD_PREC
90 #elif (DD_RECON==1) // reconstruct from 12 reals
91 #define DD_RECON_F 12
92 
93 #if (DD_PREC==0)
94 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE
95 #ifdef DIRECT_ACCESS_LINK
96 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_DOUBLE2
97 #else
98 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_DOUBLE2_TEX
99 #endif // DIRECT_ACCESS_LINK
100 
101 #elif (DD_PREC==1)
102 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
103 #ifdef DIRECT_ACCESS_LINK
104 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_FLOAT4
105 #else
106 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_FLOAT4_TEX
107 #endif // DIRECT_ACCESS_LINK
108 
109 #else
110 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
111 #ifdef DIRECT_ACCESS_LINK
112 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_SHORT4
113 #else
114 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_SHORT4_TEX
115 #endif // DIRECT_ACCESS_LINK
116 #endif // DD_PREC
117 #else // no reconstruct, load all components
118 #define DD_RECON_F 18
119 #define GAUGE_FLOAT2
120 #if (DD_PREC==0)
121 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE
122 #ifdef DIRECT_ACCESS_LINK
123 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_DOUBLE2
124 #else
125 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_DOUBLE2_TEX
126 #endif // DIRECT_ACCESS_LINK
127 
128 #elif (DD_PREC==1)
129 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
130 #ifdef DIRECT_ACCESS_LINK
131 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_FLOAT2
132 #else
133 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_FLOAT2_TEX
134 #endif // DIRECT_ACCESS_LINK
135 
136 #else
137 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
138 #ifdef DIRECT_ACCESS_LINK
139 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_SHORT2
140 #else
141 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_SHORT2_TEX
142 #endif //DIRECT_ACCESS_LINK
143 #endif
144 #endif
145 
146 #if (DD_PREC==0) // double-precision fields
147 
148 #define TPROJSCALE param.tProjScale
149 
150 // double-precision gauge field
151 #if (defined DIRECT_ACCESS_LINK) || (defined FERMI_NO_DBLE_TEX)
152 #define GAUGE0TEX param.gauge0
153 #define GAUGE1TEX param.gauge1
154 #else
155 #ifdef USE_TEXTURE_OBJECTS
156 #define GAUGE0TEX param.gauge0Tex
157 #define GAUGE1TEX param.gauge1Tex
158 #else
159 #define GAUGE0TEX gauge0TexDouble2
160 #define GAUGE1TEX gauge1TexDouble2
161 #endif // USE_TEXTURE_OBJECTS
162 #endif
163 
164 #define GAUGE_FLOAT2
165 
166 // double-precision spinor fields
167 #if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX)
168 #define READ_SPINOR READ_SPINOR_DOUBLE
169 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_DOUBLE
170 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP
171 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN
172 #define SPINORTEX param.in
173 #define GHOSTSPINORTEX param.ghost
174 #else
175 #define READ_SPINOR READ_SPINOR_DOUBLE_TEX
176 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_DOUBLE_TEX
177 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP_TEX
178 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN_TEX
179 #ifdef USE_TEXTURE_OBJECTS
180 #define SPINORTEX param.inTex
181 #define GHOSTSPINORTEX param.ghostTex
182 #else
183 #define SPINORTEX spinorTexDouble
184 #define GHOSTSPINORTEX ghostSpinorTexDouble
185 #endif // USE_TEXTURE_OBJECTS
186 #endif
187 #if (defined DIRECT_ACCESS_WILSON_INTER) || (defined FERMI_NO_DBLE_TEX)
188 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE
189 #define INTERTEX param.out
190 #else
191 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE_TEX
192 #ifdef USE_TEXTURE_OBJECTS
193 #define INTERTEX param.outTex
194 #else
195 #define INTERTEX interTexDouble
196 #endif
197 #endif
198 #define WRITE_SPINOR WRITE_SPINOR_DOUBLE2
199 #define SPINOR_DOUBLE
200 #if (DD_XPAY==1)
201 #if (defined DIRECT_ACCESS_WILSON_ACCUM) || (defined FERMI_NO_DBLE_TEX)
202 #define ACCUMTEX x
203 #define READ_ACCUM READ_ACCUM_DOUBLE
204 #else
205 #ifdef USE_TEXTURE_OBJECTS
206 #define ACCUMTEX param.xTex
207 #else
208 #define ACCUMTEX accumTexDouble
209 #endif // USE_TEXTURE_OBJECTS
210 #define READ_ACCUM READ_ACCUM_DOUBLE_TEX
211 #endif
212 
213 #endif
214 
215 #define SPINOR_HOP 12
216 
217 #elif (DD_PREC==1) // single-precision fields
218 
219 #define TPROJSCALE param.tProjScale_f
220 
221 // single-precision gauge field
222 #ifdef DIRECT_ACCESS_LINK
223 #define GAUGE0TEX param.gauge0
224 #define GAUGE1TEX param.gauge1
225 #else
226 #ifdef USE_TEXTURE_OBJECTS
227 #define GAUGE0TEX param.gauge0Tex
228 #define GAUGE1TEX param.gauge1Tex
229 #else
230 #if (DD_RECON_F == 18)
231 #define GAUGE0TEX gauge0TexSingle2
232 #define GAUGE1TEX gauge1TexSingle2
233 #else
234 #define GAUGE0TEX gauge0TexSingle4
235 #define GAUGE1TEX gauge1TexSingle4
236 #endif
237 #endif // USE_TEXTURE_OBJECTS
238 #endif
239 
240 
241 // single-precision spinor fields
242 #ifdef DIRECT_ACCESS_WILSON_SPINOR
243 #define READ_SPINOR READ_SPINOR_SINGLE
244 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_SINGLE
245 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP
246 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN
247 #define SPINORTEX param.in
248 #define GHOSTSPINORTEX param.ghost
249 #else
250 #define READ_SPINOR READ_SPINOR_SINGLE_TEX
251 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_SINGLE_TEX
252 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP_TEX
253 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN_TEX
254 #ifdef USE_TEXTURE_OBJECTS
255 #define SPINORTEX param.inTex
256 #define GHOSTSPINORTEX param.ghostTex
257 #else
258 #define SPINORTEX spinorTexSingle
259 #define GHOSTSPINORTEX ghostSpinorTexSingle
260 #endif // USE_TEXTURE_OBJECTS
261 #endif
262 #ifdef DIRECT_ACCESS_WILSON_INTER
263 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE
264 #define INTERTEX param.out
265 #else
266 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE_TEX
267 #ifdef USE_TEXTURE_OBJECTS
268 #define INTERTEX param.outTex
269 #else
270 #define INTERTEX interTexSingle
271 #endif // USE_TEXTURE_OBJECTS
272 #endif
273 #define WRITE_SPINOR WRITE_SPINOR_FLOAT4
274 #if (DD_XPAY==1)
275 #ifdef DIRECT_ACCESS_WILSON_ACCUM
276 #define ACCUMTEX x
277 #define READ_ACCUM READ_ACCUM_SINGLE
278 #else
279 #ifdef USE_TEXTURE_OBJECTS
280 #define ACCUMTEX param.xTex
281 #else
282 #define ACCUMTEX accumTexSingle
283 #endif // USE_TEXTURE_OBJECTS
284 #define READ_ACCUM READ_ACCUM_SINGLE_TEX
285 #endif
286 #endif
287 
288 #define SPINOR_HOP 6
289 
290 #else // half-precision fields
291 
292 #define TPROJSCALE param.tProjScale_f
293 
294 // half-precision gauge field
295 #ifdef DIRECT_ACCESS_LINK
296 #define GAUGE0TEX param.gauge0
297 #define GAUGE1TEX param.gauge1
298 #else
299 #ifdef USE_TEXTURE_OBJECTS
300 #define GAUGE0TEX param.gauge0Tex
301 #define GAUGE1TEX param.gauge1Tex
302 #else
303 #if (DD_RECON_F == 18)
304 #define GAUGE0TEX gauge0TexHalf2
305 #define GAUGE1TEX gauge1TexHalf2
306 #else
307 #define GAUGE0TEX gauge0TexHalf4
308 #define GAUGE1TEX gauge1TexHalf4
309 #endif
310 #endif // USE_TEXTURE_OBJECTS
311 #endif
312 
313 
314 // half-precision spinor fields
315 #ifdef DIRECT_ACCESS_WILSON_SPINOR
316 #define READ_SPINOR READ_SPINOR_HALF
317 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_HALF
318 #define READ_SPINOR_UP READ_SPINOR_HALF_UP
319 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN
320 #define SPINORTEX param.in
321 #define GHOSTSPINORTEX param.ghost
322 #else
323 #define READ_SPINOR READ_SPINOR_HALF_TEX
324 #define READ_SPINOR_GHOST READ_SPINOR_GHOST_HALF_TEX
325 #define READ_SPINOR_UP READ_SPINOR_HALF_UP_TEX
326 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN_TEX
327 #ifdef USE_TEXTURE_OBJECTS
328 #define SPINORTEX param.inTex
329 #define GHOSTSPINORTEX param.ghostTex
330 #else
331 #define SPINORTEX spinorTexHalf
332 #define GHOSTSPINORTEX ghostSpinorTexHalf
333 #endif // USE_TEXTURE_OBJECTS
334 #endif
335 #ifdef DIRECT_ACCESS_WILSON_INTER
336 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF
337 #define INTERTEX param.out
338 #else
339 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF_TEX
340 #ifdef USE_TEXTURE_OBJECTS
341 #define INTERTEX param.outTex
342 #else
343 #define INTERTEX interTexHalf
344 #endif // USE_TEXTURE_OBJECTS
345 #endif
346 #define WRITE_SPINOR WRITE_SPINOR_SHORT4
347 #if (DD_XPAY==1)
348 #ifdef DIRECT_ACCESS_WILSON_ACCUM
349 #define ACCUMTEX x
350 #define READ_ACCUM READ_ACCUM_HALF
351 #else
352 #ifdef USE_TEXTURE_OBJECTS
353 #define ACCUMTEX param.xTex
354 #else
355 #define ACCUMTEX accumTexHalf
356 #endif // USE_TEXTURE_OBJECTS
357 #define READ_ACCUM READ_ACCUM_HALF_TEX
358 #endif
359 
360 #endif
361 
362 #define SPINOR_HOP 6
363 
364 #endif
365 
366 #define DD_CONCAT(n,p,r,d,x) n ## p ## r ## d ## x ## Kernel
367 #define DD_FUNC(n,p,r,d,x) DD_CONCAT(n,p,r,d,x)
368 
369 // define the kernel
370 
371 template <KernelType kernel_type>
373  (const DslashParam param) {
374 
375 #ifdef GPU_DOMAIN_WALL_DIRAC
376 #if DD_DAG
377 #include "dw_dslash_dagger_core.h"
378 #else
379 #include "dw_dslash_core.h"
380 #endif
381 #endif
382 
383 }
384 
385 #ifdef MULTI_GPU
386 template <>
388  (const DslashParam param) {
389 
390 #ifdef GPU_DOMAIN_WALL_DIRAC
391 #if DD_DAG
393 #else
395 #endif
396 #endif
397 }
398 #endif // MULTI_GPU
399 
400 // clean up
401 
402 #undef DD_NAME_F
403 #undef DD_PREC_F
404 #undef DD_RECON_F
405 #undef DD_DAG_F
406 #undef DD_XPAY_F
407 #undef DD_CONCAT
408 #undef DD_FUNC
409 
410 #undef DSLASH_XPAY
411 #undef ASSN_GAUGE_MATRIX
412 #undef RECONSTRUCT_GAUGE_MATRIX
413 #undef GAUGE0TEX
414 #undef GAUGE1TEX
415 #undef READ_SPINOR
416 #undef READ_SPINOR_GHOST
417 #undef READ_SPINOR_UP
418 #undef READ_SPINOR_DOWN
419 #undef SPINORTEX
420 #undef GHOSTSPINORTEX
421 #undef READ_INTERMEDIATE_SPINOR
422 #undef INTERTEX
423 #undef READ_ACCUM
424 #undef ACCUMTEX
425 #undef WRITE_SPINOR
426 #undef GAUGE_FLOAT2
427 #undef SPINOR_DOUBLE
428 
429 #undef SPINOR_HOP
430 
431 #undef TPROJSCALE
432 
433 // prepare next set of options, or clean up after final iteration
434 
435 #if (DD_DAG==0)
436 #undef DD_DAG
437 #define DD_DAG 1
438 #else
439 #undef DD_DAG
440 #define DD_DAG 0
441 
442 #if (DD_XPAY==0)
443 #undef DD_XPAY
444 #define DD_XPAY 1
445 #else
446 #undef DD_XPAY
447 #define DD_XPAY 0
448 
449 #if (DD_RECON==0)
450 #undef DD_RECON
451 #define DD_RECON 1
452 #elif (DD_RECON==1)
453 #undef DD_RECON
454 #define DD_RECON 2
455 #else
456 #undef DD_RECON
457 #define DD_RECON 0
458 
459 #if (DD_PREC==0)
460 #undef DD_PREC
461 #define DD_PREC 1
462 #elif (DD_PREC==1)
463 #undef DD_PREC
464 #define DD_PREC 2
465 
466 #else
467 
468 #undef DD_LOOP
469 #undef DD_DAG
470 #undef DD_XPAY
471 #undef DD_RECON
472 #undef DD_PREC
473 
474 #endif // DD_PREC
475 #endif // DD_RECON
476 #endif // DD_XPAY
477 #endif // DD_DAG
478 
479 #ifdef DD_LOOP
480 #include "dw_dslash_def.h"
481 #endif
#define DD_NAME_F
Definition: dw_dslash_def.h:40
QudaGaugeParam param
Definition: pack_test.cpp:17
#define DD_DAG_F
Definition: dw_dslash_def.h:43
#define DD_XPAY_F
Definition: dw_dslash_def.h:49
#define DD_PREC_F
Definition: dw_dslash_def.h:56
#define DD_FUNC(n, p, r, d, x)
#define DD_RECON_F
Definition: dw_dslash_def.h:64