QUDA  0.9.0
mdw_dslash5_def.h
Go to the documentation of this file.
1 // tm_dslash_def.h - Twisted Mass Dslash kernel definitions
2 
3 // There are currently 36 different variants of the Twisted Mass
4 // Wilson Dslash kernel, each one characterized by a set of 5 options,
5 // where each option can take one of several values (3*2*2*3 = 36).
6 // This file is structured so that the C preprocessor loops through all 36
7 // variants (in a manner resembling a counter), sets the appropriate
8 // macros, and defines the corresponding functions.
9 //
10 // As an example of the function naming conventions, consider
11 //
12 // twistedMassDslash12DaggerXpayKernel(float4* out, ...).
13 //
14 // This is a twisted mass Dslash^dagger kernel where the result is
15 // multiplied by "a" and summed with an input vector (Xpay), and the
16 // gauge matrix is reconstructed from 12 real numbers. More
17 // generally, each function name is given by the concatenation of the
18 // following 4 fields, with "Kernel" at the end:
19 //
20 // DD_NAME_F = twistedMassDslash
21 // DD_RECON_F = 8, 12, 18
22 // DD_DAG_F = Dagger, [blank]
23 // DD_XPAY_F = Xpay, [blank]
24 //
25 // In addition, the kernels are templated on the precision of the
26 // fields (double, single, or half).
27 
28 // initialize on first iteration
29 
30 #ifndef DD_LOOP
31 #define DD_LOOP
32 #define DD_DAG 0
33 #define DD_XPAY 0
34 #define DD_RECON 0
35 #define DD_PREC 0
36 #endif
37 
38 // set options for current iteration
39 
40 #define DD_NAME_F MDWFDslash5
41 
42 #if (DD_DAG==0) // no dagger
43 #define DD_DAG_F
44 #else // dagger
45 #define DD_DAG_F Dagger
46 #endif
47 
48 #if (DD_XPAY==0) // no xpay
49 #define DD_XPAY_F
50 #else
51 #define DSLASH_XPAY
52 #define DD_XPAY_F Xpay
53 #endif
54 
55 #if (DD_PREC == 0)
56 #define DD_PREC_F D
57 #elif (DD_PREC == 1)
58 #define DD_PREC_F S
59 #else
60 #define DD_PREC_F H
61 #endif
62 
63 #if (DD_RECON==0) // reconstruct from 8 reals
64 #define DD_RECON_F 8
65 
66 #if (DD_PREC==0)
67 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE
68 #ifdef DIRECT_ACCESS_LINK
69 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_DOUBLE2
70 #else
71 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_DOUBLE2_TEX
72 #endif // DIRECT_ACCESS_LINK
73 
74 #elif (DD_PREC==1)
75 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
76 #ifdef DIRECT_ACCESS_LINK
77 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_FLOAT4
78 #else
79 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_FLOAT4_TEX
80 #endif // DIRECT_ACCESS_LINK
81 
82 #else
83 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
84 #ifdef DIRECT_ACCESS_LINK
85 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_SHORT4
86 #else
87 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_SHORT4_TEX
88 #endif // DIRECT_ACCESS_LINK
89 #endif // DD_PREC
90 #elif (DD_RECON==1) // reconstruct from 12 reals
91 #define DD_RECON_F 12
92 
93 #if (DD_PREC==0)
94 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE
95 #ifdef DIRECT_ACCESS_LINK
96 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_DOUBLE2
97 #else
98 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_DOUBLE2_TEX
99 #endif // DIRECT_ACCESS_LINK
100 
101 #elif (DD_PREC==1)
102 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
103 #ifdef DIRECT_ACCESS_LINK
104 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_FLOAT4
105 #else
106 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_FLOAT4_TEX
107 #endif // DIRECT_ACCESS_LINK
108 
109 #else
110 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
111 #ifdef DIRECT_ACCESS_LINK
112 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_SHORT4
113 #else
114 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_SHORT4_TEX
115 #endif // DIRECT_ACCESS_LINK
116 #endif // DD_PREC
117 #else // no reconstruct, load all components
118 #define DD_RECON_F 18
119 #define GAUGE_FLOAT2
120 #if (DD_PREC==0)
121 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE
122 #ifdef DIRECT_ACCESS_LINK
123 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_DOUBLE2
124 #else
125 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_DOUBLE2_TEX
126 #endif // DIRECT_ACCESS_LINK
127 
128 #elif (DD_PREC==1)
129 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
130 #ifdef DIRECT_ACCESS_LINK
131 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_FLOAT2
132 #else
133 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_FLOAT2_TEX
134 #endif // DIRECT_ACCESS_LINK
135 
136 #else
137 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
138 #ifdef DIRECT_ACCESS_LINK
139 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_SHORT2
140 #else
141 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_SHORT2_TEX
142 #endif //DIRECT_ACCESS_LINK
143 #endif
144 #endif
145 
146 #if (DD_PREC==0) // double-precision fields
147 
148 #define TPROJSCALE param.tProjScale
149 
150 // double-precision gauge field
151 #if (defined DIRECT_ACCESS_LINK) || (defined FERMI_NO_DBLE_TEX)
152 #define GAUGE0TEX param.gauge0
153 #define GAUGE1TEX param.gauge1
154 #else
155 #ifdef USE_TEXTURE_OBJECTS
156 #define GAUGE0TEX param.gauge0Tex
157 #define GAUGE1TEX param.gauge1Tex
158 #else
159 #define GAUGE0TEX gauge0TexDouble2
160 #define GAUGE1TEX gauge1TexDouble2
161 #endif // USE_TEXTURE_OBJECTS
162 #endif
163 
164 #define GAUGE_FLOAT2
165 
166 // double-precision spinor fields
167 #if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX)
168 #define READ_SPINOR READ_SPINOR_DOUBLE
169 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP
170 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN
171 #define SPINORTEX param.in
172 #define GHOSTSPINORTEX param.ghost
173 #else
174 #define READ_SPINOR READ_SPINOR_DOUBLE_TEX
175 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP_TEX
176 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN_TEX
177 #ifdef USE_TEXTURE_OBJECTS
178 #define SPINORTEX param.inTex
179 #define GHOSTSPINORTEX param.ghostTex
180 #else
181 #define SPINORTEX spinorTexDouble
182 #define GHOSTSPINORTEX ghostSpinorTexDouble
183 #endif // USE_TEXTURE_OBJECTS
184 #endif
185 #if (defined DIRECT_ACCESS_WILSON_INTER) || (defined FERMI_NO_DBLE_TEX)
186 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE
187 #define INTERTEX param.out
188 #else
189 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE_TEX
190 #ifdef USE_TEXTURE_OBJECTS
191 #define INTERTEX param.outTex
192 #else
193 #define INTERTEX interTexDouble
194 #endif
195 #endif
196 #define WRITE_SPINOR WRITE_SPINOR_DOUBLE2
197 #define SPINOR_DOUBLE
198 #if (DD_XPAY==1)
199 #if (defined DIRECT_ACCESS_WILSON_ACCUM) || (defined FERMI_NO_DBLE_TEX)
200 #define ACCUMTEX param.x
201 #define READ_ACCUM READ_ACCUM_DOUBLE
202 #else
203 #ifdef USE_TEXTURE_OBJECTS
204 #define ACCUMTEX param.xTex
205 #else
206 #define ACCUMTEX accumTexDouble
207 #endif // USE_TEXTURE_OBJECTS
208 #define READ_ACCUM READ_ACCUM_DOUBLE_TEX
209 #endif
210 
211 #endif
212 
213 #define SPINOR_HOP 12
214 
215 #elif (DD_PREC==1) // single-precision fields
216 
217 #define TPROJSCALE param.tProjScale_f
218 
219 // single-precision gauge field
220 #ifdef DIRECT_ACCESS_LINK
221 #define GAUGE0TEX param.gauge0
222 #define GAUGE1TEX param.gauge1
223 #else
224 #ifdef USE_TEXTURE_OBJECTS
225 #define GAUGE0TEX param.gauge0Tex
226 #define GAUGE1TEX param.gauge1Tex
227 #else
228 #if (DD_RECON_F == 18)
229 #define GAUGE0TEX gauge0TexSingle2
230 #define GAUGE1TEX gauge1TexSingle2
231 #else
232 #define GAUGE0TEX gauge0TexSingle4
233 #define GAUGE1TEX gauge1TexSingle4
234 #endif
235 #endif // USE_TEXTURE_OBJECTS
236 #endif
237 
238 
239 // single-precision spinor fields
240 #ifdef DIRECT_ACCESS_WILSON_SPINOR
241 #define READ_SPINOR READ_SPINOR_SINGLE
242 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP
243 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN
244 #define SPINORTEX param.in
245 #define GHOSTSPINORTEX param.ghost
246 #else
247 #define READ_SPINOR READ_SPINOR_SINGLE_TEX
248 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP_TEX
249 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN_TEX
250 #ifdef USE_TEXTURE_OBJECTS
251 #define SPINORTEX param.inTex
252 #define GHOSTSPINORTEX param.ghostTex
253 #else
254 #define SPINORTEX spinorTexSingle
255 #define GHOSTSPINORTEX ghostSpinorTexSingle
256 #endif // USE_TEXTURE_OBJECTS
257 #endif
258 #ifdef DIRECT_ACCESS_WILSON_INTER
259 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE
260 #define INTERTEX param.out
261 #else
262 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE_TEX
263 #ifdef USE_TEXTURE_OBJECTS
264 #define INTERTEX param.outTex
265 #else
266 #define INTERTEX interTexSingle
267 #endif // USE_TEXTURE_OBJECTS
268 #endif
269 #define WRITE_SPINOR WRITE_SPINOR_FLOAT4
270 #if (DD_XPAY==1)
271 #ifdef DIRECT_ACCESS_WILSON_ACCUM
272 #define ACCUMTEX param.x
273 #define READ_ACCUM READ_ACCUM_SINGLE
274 #else
275 #ifdef USE_TEXTURE_OBJECTS
276 #define ACCUMTEX param.xTex
277 #else
278 #define ACCUMTEX accumTexSingle
279 #endif // USE_TEXTURE_OBJECTS
280 #define READ_ACCUM READ_ACCUM_SINGLE_TEX
281 #endif
282 #endif
283 
284 #define SPINOR_HOP 6
285 
286 #else // half-precision fields
287 
288 #define TPROJSCALE param.tProjScale_f
289 
290 // half-precision gauge field
291 #ifdef DIRECT_ACCESS_LINK
292 #define GAUGE0TEX param.gauge0
293 #define GAUGE1TEX param.gauge1
294 #else
295 #ifdef USE_TEXTURE_OBJECTS
296 #define GAUGE0TEX param.gauge0Tex
297 #define GAUGE1TEX param.gauge1Tex
298 #else
299 #if (DD_RECON_F == 18)
300 #define GAUGE0TEX gauge0TexHalf2
301 #define GAUGE1TEX gauge1TexHalf2
302 #else
303 #define GAUGE0TEX gauge0TexHalf4
304 #define GAUGE1TEX gauge1TexHalf4
305 #endif
306 #endif // USE_TEXTURE_OBJECTS
307 #endif
308 
309 
310 // half-precision spinor fields
311 #ifdef DIRECT_ACCESS_WILSON_SPINOR
312 #define READ_SPINOR READ_SPINOR_HALF
313 #define READ_SPINOR_UP READ_SPINOR_HALF_UP
314 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN
315 #define SPINORTEX param.in
316 #define GHOSTSPINORTEX param.ghost
317 #else
318 #define READ_SPINOR READ_SPINOR_HALF_TEX
319 #define READ_SPINOR_UP READ_SPINOR_HALF_UP_TEX
320 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN_TEX
321 #ifdef USE_TEXTURE_OBJECTS
322 #define SPINORTEX param.inTex
323 #define GHOSTSPINORTEX param.ghostTex
324 #else
325 #define SPINORTEX spinorTexHalf
326 #define GHOSTSPINORTEX ghostSpinorTexHalf
327 #endif // USE_TEXTURE_OBJECTS
328 #endif
329 #ifdef DIRECT_ACCESS_WILSON_INTER
330 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF
331 #define INTERTEX param.out
332 #else
333 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF_TEX
334 #ifdef USE_TEXTURE_OBJECTS
335 #define INTERTEX param.outTex
336 #else
337 #define INTERTEX interTexHalf
338 #endif // USE_TEXTURE_OBJECTS
339 #endif
340 #define WRITE_SPINOR WRITE_SPINOR_SHORT4
341 #if (DD_XPAY==1)
342 #ifdef DIRECT_ACCESS_WILSON_ACCUM
343 #define ACCUMTEX param.x
344 #define READ_ACCUM READ_ACCUM_HALF
345 #else
346 #ifdef USE_TEXTURE_OBJECTS
347 #define ACCUMTEX param.xTex
348 #else
349 #define ACCUMTEX accumTexHalf
350 #endif // USE_TEXTURE_OBJECTS
351 #define READ_ACCUM READ_ACCUM_HALF_TEX
352 #endif
353 
354 #endif
355 
356 #define SPINOR_HOP 6
357 
358 #endif
359 
360 #define DD_CONCAT(n,p,r,d,x) n ## p ## r ## d ## x ## Kernel
361 #define DD_FUNC(n,p,r,d,x) DD_CONCAT(n,p,r,d,x)
362 
363 // define the kernel
364 
365 template <KernelType kernel_type>
367  (const DslashParam param) {
368 
369 #ifdef GPU_DOMAIN_WALL_DIRAC
370 #define MDWF_mode 2
371 #define YPAX 1
372 #if DD_DAG
373 #include "dw_dslash5_dagger_core.h"
374 #else
375 #include "dw_dslash5_core.h"
376 #endif
377 #undef YPAX
378 #undef MDWF_mode
379 #endif
380 
381 }
382 
383 // clean up
384 
385 #undef DD_NAME_F
386 #undef DD_PREC_F
387 #undef DD_RECON_F
388 #undef DD_DAG_F
389 #undef DD_XPAY_F
390 #undef DD_CONCAT
391 #undef DD_FUNC
392 
393 #undef DSLASH_XPAY
394 #undef ASSN_GAUGE_MATRIX
395 #undef RECONSTRUCT_GAUGE_MATRIX
396 #undef GAUGE0TEX
397 #undef GAUGE1TEX
398 #undef READ_SPINOR
399 #undef READ_SPINOR_UP
400 #undef READ_SPINOR_DOWN
401 #undef SPINORTEX
402 #undef GHOSTSPINORTEX
403 #undef READ_INTERMEDIATE_SPINOR
404 #undef INTERTEX
405 #undef READ_ACCUM
406 #undef ACCUMTEX
407 #undef WRITE_SPINOR
408 #undef GAUGE_FLOAT2
409 #undef SPINOR_DOUBLE
410 
411 #undef SPINOR_HOP
412 
413 #undef TPROJSCALE
414 
415 // prepare next set of options, or clean up after final iteration
416 
417 #if (DD_DAG==0)
418 #undef DD_DAG
419 #define DD_DAG 1
420 #else
421 #undef DD_DAG
422 #define DD_DAG 0
423 
424 #if (DD_XPAY==0)
425 #undef DD_XPAY
426 #define DD_XPAY 1
427 #else
428 #undef DD_XPAY
429 #define DD_XPAY 0
430 
431 #if (DD_RECON==0)
432 #undef DD_RECON
433 #define DD_RECON 1
434 #elif (DD_RECON==1)
435 #undef DD_RECON
436 #define DD_RECON 2
437 #else
438 #undef DD_RECON
439 #define DD_RECON 0
440 
441 #if (DD_PREC==0)
442 #undef DD_PREC
443 #define DD_PREC 1
444 #elif (DD_PREC==1)
445 #undef DD_PREC
446 #define DD_PREC 2
447 
448 #else
449 
450 #undef DD_LOOP
451 #undef DD_DAG
452 #undef DD_XPAY
453 #undef DD_RECON
454 #undef DD_PREC
455 
456 #endif // DD_PREC
457 #endif // DD_RECON
458 #endif // DD_XPAY
459 #endif // DD_DAG
460 
461 #ifdef DD_LOOP
462 #include "mdw_dslash5_def.h"
463 #endif
#define DD_DAG_F
#define DD_RECON_F
#define DD_XPAY_F
QudaGaugeParam param
Definition: pack_test.cpp:17
#define DD_FUNC(n, p, r, d, x)
#define DD_NAME_F
#define DD_PREC_F