QUDA  0.9.0
mdw_dslash5inv_def.h
Go to the documentation of this file.
1 // tm_dslash_def.h - Twisted Mass Dslash kernel definitions
2 
3 // There are currently 36 different variants of the Twisted Mass
4 // Wilson Dslash kernel, each one characterized by a set of 5 options,
5 // where each option can take one of several values (3*2*2*3 = 36).
6 // This file is structured so that the C preprocessor loops through all 36
7 // variants (in a manner resembling a counter), sets the appropriate
8 // macros, and defines the corresponding functions.
9 //
10 // As an example of the function naming conventions, consider
11 //
12 // twistedMassDslash12DaggerXpayKernel(float4* out, ...).
13 //
14 // This is a twisted mass Dslash^dagger kernel where the result is
15 // multiplied by "a" and summed with an input vector (Xpay), and the
16 // gauge matrix is reconstructed from 12 real numbers. More
17 // generally, each function name is given by the concatenation of the
18 // following 4 fields, with "Kernel" at the end:
19 //
20 // DD_NAME_F = twistedMassDslash
21 // DD_RECON_F = 8, 12, 18
22 // DD_DAG_F = Dagger, [blank]
23 // DD_XPAY_F = Xpay, [blank]
24 //
25 // In addition, the kernels are templated on the precision of the
26 // fields (double, single, or half).
27 
28 // initialize on first iteration
29 
30 #ifndef DD_LOOP
31 #define DD_LOOP
32 #define DD_DAG 0
33 #define DD_XPAY 0
34 #define DD_RECON 0
35 #define DD_PREC 0
36 #endif
37 
38 // set options for current iteration
39 
40 #define DD_NAME_F MDWFDslash5inv
41 
42 #if (DD_DAG==0) // no dagger
43 #define DD_DAG_F
44 #else // dagger
45 #define DD_DAG_F Dagger
46 #endif
47 
48 #if (DD_XPAY==0) // no xpay
49 #define DD_XPAY_F
50 #else
51 #define DSLASH_XPAY
52 #define DD_XPAY_F Xpay
53 #endif
54 
55 #if (DD_PREC == 0)
56 #define DD_PREC_F D
57 #elif (DD_PREC == 1)
58 #define DD_PREC_F S
59 #else
60 #define DD_PREC_F H
61 #endif
62 
63 #if (DD_RECON==0) // reconstruct from 8 reals
64 #define DD_RECON_F 8
65 
66 #if (DD_PREC==0)
67 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE
68 #ifdef DIRECT_ACCESS_LINK
69 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_DOUBLE2
70 #else
71 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_DOUBLE2_TEX
72 #endif // DIRECT_ACCESS_LINK
73 
74 #elif (DD_PREC==1)
75 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
76 #ifdef DIRECT_ACCESS_LINK
77 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_FLOAT4
78 #else
79 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_FLOAT4_TEX
80 #endif // DIRECT_ACCESS_LINK
81 
82 #else
83 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
84 #ifdef DIRECT_ACCESS_LINK
85 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_SHORT4
86 #else
87 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_SHORT4_TEX
88 #endif // DIRECT_ACCESS_LINK
89 #endif // DD_PREC
90 #elif (DD_RECON==1) // reconstruct from 12 reals
91 #define DD_RECON_F 12
92 
93 #if (DD_PREC==0)
94 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE
95 #ifdef DIRECT_ACCESS_LINK
96 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_DOUBLE2
97 #else
98 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_DOUBLE2_TEX
99 #endif // DIRECT_ACCESS_LINK
100 
101 #elif (DD_PREC==1)
102 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
103 #ifdef DIRECT_ACCESS_LINK
104 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_FLOAT4
105 #else
106 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_FLOAT4_TEX
107 #endif // DIRECT_ACCESS_LINK
108 
109 #else
110 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
111 #ifdef DIRECT_ACCESS_LINK
112 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_SHORT4
113 #else
114 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_SHORT4_TEX
115 #endif // DIRECT_ACCESS_LINK
116 #endif // DD_PREC
117 #else // no reconstruct, load all components
118 #define DD_RECON_F 18
119 #define GAUGE_FLOAT2
120 #if (DD_PREC==0)
121 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE
122 #ifdef DIRECT_ACCESS_LINK
123 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_DOUBLE2
124 #else
125 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_DOUBLE2_TEX
126 #endif // DIRECT_ACCESS_LINK
127 
128 #elif (DD_PREC==1)
129 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
130 #ifdef DIRECT_ACCESS_LINK
131 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_FLOAT2
132 #else
133 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_FLOAT2_TEX
134 #endif // DIRECT_ACCESS_LINK
135 
136 #else
137 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
138 #ifdef DIRECT_ACCESS_LINK
139 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_SHORT2
140 #else
141 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_SHORT2_TEX
142 #endif //DIRECT_ACCESS_LINK
143 #endif
144 #endif
145 
146 #if (DD_PREC==0) // double-precision fields
147 
148 #define TPROJSCALE param.tProjScale
149 
150 // double-precision gauge field
151 #if (defined DIRECT_ACCESS_LINK) || (defined FERMI_NO_DBLE_TEX)
152 #define GAUGE0TEX param.gauge0
153 #define GAUGE1TEX param.gauge1
154 #else
155 #ifdef USE_TEXTURE_OBJECTS
156 #define GAUGE0TEX param.gauge0Tex
157 #define GAUGE1TEX param.gauge1Tex
158 #else
159 #define GAUGE0TEX gauge0TexDouble2
160 #define GAUGE1TEX gauge1TexDouble2
161 #endif // USE_TEXTURE_OBJECTS
162 #endif
163 
164 #define GAUGE_FLOAT2
165 
166 // double-precision spinor fields
167 #if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX)
168 #define READ_SPINOR READ_SPINOR_DOUBLE
169 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP
170 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN
171 #define SPINORTEX param.in
172 #define GHOSTSPINORTEX param.ghost
173 #else
174 #define READ_SPINOR READ_SPINOR_DOUBLE_TEX
175 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP_TEX
176 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN_TEX
177 #ifdef USE_TEXTURE_OBJECTS
178 #define SPINORTEX param.inTex
179 #else
180 #define SPINORTEX spinorTexDouble
181 #endif // USE_TEXTURE_OBJECTS
182 #endif
183 #if (defined DIRECT_ACCESS_WILSON_INTER) || (defined FERMI_NO_DBLE_TEX)
184 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE
185 #define INTERTEX param.out
186 #else
187 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE_TEX
188 #ifdef USE_TEXTURE_OBJECTS
189 #define INTERTEX param.outTex
190 #else
191 #define INTERTEX interTexDouble
192 #endif
193 #endif
194 #define WRITE_SPINOR WRITE_SPINOR_DOUBLE2
195 #define SPINOR_DOUBLE
196 #if (DD_XPAY==1)
197 #if (defined DIRECT_ACCESS_WILSON_ACCUM) || (defined FERMI_NO_DBLE_TEX)
198 #define ACCUMTEX param.x
199 #define READ_ACCUM READ_ACCUM_DOUBLE
200 #else
201 #ifdef USE_TEXTURE_OBJECTS
202 #define ACCUMTEX param.xTex
203 #else
204 #define ACCUMTEX accumTexDouble
205 #endif // USE_TEXTURE_OBJECTS
206 #define READ_ACCUM READ_ACCUM_DOUBLE_TEX
207 #endif
208 
209 #endif
210 
211 #define SPINOR_HOP 12
212 
213 #elif (DD_PREC==1) // single-precision fields
214 
215 #define TPROJSCALE param.tProjScale_f
216 
217 // single-precision gauge field
218 #ifdef DIRECT_ACCESS_LINK
219 #define GAUGE0TEX param.gauge0
220 #define GAUGE1TEX param.gauge1
221 #else
222 #ifdef USE_TEXTURE_OBJECTS
223 #define GAUGE0TEX param.gauge0Tex
224 #define GAUGE1TEX param.gauge1Tex
225 #else
226 #if (DD_RECON_F == 18)
227 #define GAUGE0TEX gauge0TexSingle2
228 #define GAUGE1TEX gauge1TexSingle2
229 #else
230 #define GAUGE0TEX gauge0TexSingle4
231 #define GAUGE1TEX gauge1TexSingle4
232 #endif
233 #endif // USE_TEXTURE_OBJECTS
234 #endif
235 
236 
237 // single-precision spinor fields
238 #ifdef DIRECT_ACCESS_WILSON_SPINOR
239 #define READ_SPINOR READ_SPINOR_SINGLE
240 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP
241 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN
242 #define SPINORTEX param.in
243 #define GHOSTSPINORTEX param.ghost
244 #else
245 #define READ_SPINOR READ_SPINOR_SINGLE_TEX
246 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP_TEX
247 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN_TEX
248 #ifdef USE_TEXTURE_OBJECTS
249 #define SPINORTEX param.inTex
250 #else
251 #define SPINORTEX spinorTexSingle
252 #endif // USE_TEXTURE_OBJECTS
253 #endif
254 #ifdef DIRECT_ACCESS_WILSON_INTER
255 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE
256 #define INTERTEX param.out
257 #else
258 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE_TEX
259 #ifdef USE_TEXTURE_OBJECTS
260 #define INTERTEX param.outTex
261 #else
262 #define INTERTEX interTexSingle
263 #endif // USE_TEXTURE_OBJECTS
264 #endif
265 #define WRITE_SPINOR WRITE_SPINOR_FLOAT4
266 #if (DD_XPAY==1)
267 #ifdef DIRECT_ACCESS_WILSON_ACCUM
268 #define ACCUMTEX param.x
269 #define READ_ACCUM READ_ACCUM_SINGLE
270 #else
271 #ifdef USE_TEXTURE_OBJECTS
272 #define ACCUMTEX param.xTex
273 #else
274 #define ACCUMTEX accumTexSingle
275 #endif // USE_TEXTURE_OBJECTS
276 #define READ_ACCUM READ_ACCUM_SINGLE_TEX
277 #endif
278 #endif
279 
280 #define SPINOR_HOP 6
281 
282 #else // half-precision fields
283 
284 #define TPROJSCALE param.tProjScale_f
285 
286 // half-precision gauge field
287 #ifdef DIRECT_ACCESS_LINK
288 #define GAUGE0TEX param.gauge0
289 #define GAUGE1TEX param.gauge1
290 #else
291 #ifdef USE_TEXTURE_OBJECTS
292 #define GAUGE0TEX param.gauge0Tex
293 #define GAUGE1TEX param.gauge1Tex
294 #else
295 #if (DD_RECON_F == 18)
296 #define GAUGE0TEX gauge0TexHalf2
297 #define GAUGE1TEX gauge1TexHalf2
298 #else
299 #define GAUGE0TEX gauge0TexHalf4
300 #define GAUGE1TEX gauge1TexHalf4
301 #endif
302 #endif // USE_TEXTURE_OBJECTS
303 #endif
304 
305 
306 // half-precision spinor fields
307 #ifdef DIRECT_ACCESS_WILSON_SPINOR
308 #define READ_SPINOR READ_SPINOR_HALF
309 #define READ_SPINOR_UP READ_SPINOR_HALF_UP
310 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN
311 #define SPINORTEX param.in
312 #define GHOSTSPINORTEX param.ghost
313 #else
314 #define READ_SPINOR READ_SPINOR_HALF_TEX
315 #define READ_SPINOR_UP READ_SPINOR_HALF_UP_TEX
316 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN_TEX
317 #ifdef USE_TEXTURE_OBJECTS
318 #define SPINORTEX param.inTex
319 #else
320 #define SPINORTEX spinorTexHalf
321 #endif // USE_TEXTURE_OBJECTS
322 #endif
323 #ifdef DIRECT_ACCESS_WILSON_INTER
324 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF
325 #define INTERTEX param.out
326 #else
327 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF_TEX
328 #ifdef USE_TEXTURE_OBJECTS
329 #define INTERTEX param.outTex
330 #else
331 #define INTERTEX interTexHalf
332 #endif // USE_TEXTURE_OBJECTS
333 #endif
334 #define WRITE_SPINOR WRITE_SPINOR_SHORT4
335 #if (DD_XPAY==1)
336 #ifdef DIRECT_ACCESS_WILSON_ACCUM
337 #define ACCUMTEX param.x
338 #define READ_ACCUM READ_ACCUM_HALF
339 #else
340 #ifdef USE_TEXTURE_OBJECTS
341 #define ACCUMTEX param.xTex
342 #else
343 #define ACCUMTEX accumTexHalf
344 #endif // USE_TEXTURE_OBJECTS
345 #define READ_ACCUM READ_ACCUM_HALF_TEX
346 #endif
347 
348 #endif
349 
350 #define SPINOR_HOP 6
351 
352 #endif
353 
354 #define DD_CONCAT(n,p,r,d,x) n ## p ## r ## d ## x ## Kernel
355 #define DD_FUNC(n,p,r,d,x) DD_CONCAT(n,p,r,d,x)
356 
357 // define the kernel
358 
359 template <KernelType kernel_type>
361  (const DslashParam param) {
362 
363 #ifdef GPU_DOMAIN_WALL_DIRAC
364 #define MDWF_mode 1
365 #if DD_DAG
367 #else
368 #include "dw_dslash5inv_core.h"
369 #endif
370 #undef MDWF_mode
371 #endif
372 
373 }
374 
375 // clean up
376 
377 #undef DD_NAME_F
378 #undef DD_PREC_F
379 #undef DD_RECON_F
380 #undef DD_DAG_F
381 #undef DD_XPAY_F
382 #undef DD_CONCAT
383 #undef DD_FUNC
384 
385 #undef DSLASH_XPAY
386 #undef ASSN_GAUGE_MATRIX
387 #undef RECONSTRUCT_GAUGE_MATRIX
388 #undef GAUGE0TEX
389 #undef GAUGE1TEX
390 #undef READ_SPINOR
391 #undef READ_SPINOR_UP
392 #undef READ_SPINOR_DOWN
393 #undef SPINORTEX
394 #undef READ_INTERMEDIATE_SPINOR
395 #undef INTERTEX
396 #undef READ_ACCUM
397 #undef ACCUMTEX
398 #undef WRITE_SPINOR
399 #undef GAUGE_FLOAT2
400 #undef SPINOR_DOUBLE
401 
402 #undef SPINOR_HOP
403 
404 #undef TPROJSCALE
405 
406 // prepare next set of options, or clean up after final iteration
407 
408 #if (DD_DAG==0)
409 #undef DD_DAG
410 #define DD_DAG 1
411 #else
412 #undef DD_DAG
413 #define DD_DAG 0
414 
415 #if (DD_XPAY==0)
416 #undef DD_XPAY
417 #define DD_XPAY 1
418 #else
419 #undef DD_XPAY
420 #define DD_XPAY 0
421 
422 #if (DD_RECON==0)
423 #undef DD_RECON
424 #define DD_RECON 1
425 #elif (DD_RECON==1)
426 #undef DD_RECON
427 #define DD_RECON 2
428 #else
429 #undef DD_RECON
430 #define DD_RECON 0
431 
432 #if (DD_PREC==0)
433 #undef DD_PREC
434 #define DD_PREC 1
435 #elif (DD_PREC==1)
436 #undef DD_PREC
437 #define DD_PREC 2
438 
439 #else
440 
441 #undef DD_LOOP
442 #undef DD_DAG
443 #undef DD_XPAY
444 #undef DD_RECON
445 #undef DD_PREC
446 
447 #endif // DD_PREC
448 #endif // DD_RECON
449 #endif // DD_XPAY
450 #endif // DD_DAG
451 
452 #ifdef DD_LOOP
453 #include "mdw_dslash5inv_def.h"
454 #endif
#define DD_DAG_F
QudaGaugeParam param
Definition: pack_test.cpp:17
#define DD_PREC_F
#define DD_FUNC(n, p, r, d, x)
#define DD_RECON_F
#define DD_XPAY_F
#define DD_NAME_F