QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
mdw_dslash4pre_def.h
Go to the documentation of this file.
1 // tm_dslash_def.h - Twisted Mass Dslash kernel definitions
2 
3 // There are currently 36 different variants of the Twisted Mass
4 // Wilson Dslash kernel, each one characterized by a set of 5 options,
5 // where each option can take one of several values (3*2*2*3 = 36).
6 // This file is structured so that the C preprocessor loops through all 36
7 // variants (in a manner resembling a counter), sets the appropriate
8 // macros, and defines the corresponding functions.
9 //
10 // As an example of the function naming conventions, consider
11 //
12 // twistedMassDslash12DaggerXpayKernel(float4* out, ...).
13 //
14 // This is a twisted mass Dslash^dagger kernel where the result is
15 // multiplied by "a" and summed with an input vector (Xpay), and the
16 // gauge matrix is reconstructed from 12 real numbers. More
17 // generally, each function name is given by the concatenation of the
18 // following 4 fields, with "Kernel" at the end:
19 //
20 // DD_NAME_F = twistedMassDslash
21 // DD_RECON_F = 8, 12, 18
22 // DD_DAG_F = Dagger, [blank]
23 // DD_XPAY_F = Xpay, [blank]
24 //
25 // In addition, the kernels are templated on the precision of the
26 // fields (double, single, or half).
27 
28 // initialize on first iteration
29 
30 #ifndef DD_LOOP
31 #define DD_LOOP
32 #define DD_DAG 0
33 #define DD_XPAY 0
34 #define DD_RECON 0
35 #define DD_PREC 0
36 #endif
37 
38 // set options for current iteration
39 
40 #define DD_NAME_F MDWFDslash4pre
41 
42 #if (DD_DAG==0) // no dagger
43 #define DD_DAG_F
44 #else // dagger
45 #define DD_DAG_F Dagger
46 #endif
47 
48 #if (DD_XPAY==0) // no xpay
49 #define DD_XPAY_F
50 #else
51 #define DSLASH_XPAY
52 #define DD_XPAY_F Xpay
53 #endif
54 
55 #if (DD_PREC == 0)
56 #define DD_PARAM4 const double mferm, const double2 *x, const float *xNorm, const double a, const DslashParam param
57 #elif (DD_PREC == 1)
58 #define DD_PARAM4 const float mferm, const float4 *x, const float *xNorm, const float a, const DslashParam param
59 #else
60 #define DD_PARAM4 const float mferm, const short4 *x, const float *xNorm, const float a, const DslashParam param
61 #endif
62 
63 #if (DD_RECON==0) // reconstruct from 8 reals
64 #define DD_RECON_F 8
65 
66 #if (DD_PREC==0)
67 #define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
68 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE
69 #ifdef DIRECT_ACCESS_LINK
70 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_DOUBLE2
71 #else
72 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_DOUBLE2_TEX
73 #endif // DIRECT_ACCESS_LINK
74 
75 #elif (DD_PREC==1)
76 #define DD_PARAM2 const float4 *gauge0, const float4 *gauge1
77 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
78 #ifdef DIRECT_ACCESS_LINK
79 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_FLOAT4
80 #else
81 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_FLOAT4_TEX
82 #endif // DIRECT_ACCESS_LINK
83 
84 #else
85 #define DD_PARAM2 const short4 *gauge0, const short4* gauge1
86 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
87 #ifdef DIRECT_ACCESS_LINK
88 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_SHORT4
89 #else
90 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_SHORT4_TEX
91 #endif // DIRECT_ACCESS_LINK
92 #endif // DD_PREC
93 #elif (DD_RECON==1) // reconstruct from 12 reals
94 #define DD_RECON_F 12
95 
96 #if (DD_PREC==0)
97 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE
98 #ifdef DIRECT_ACCESS_LINK
99 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_DOUBLE2
100 #else
101 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_DOUBLE2_TEX
102 #endif // DIRECT_ACCESS_LINK
103 #define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
104 
105 #elif (DD_PREC==1)
106 #define DD_PARAM2 const float4 *gauge0, const float4 *gauge1
107 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
108 #ifdef DIRECT_ACCESS_LINK
109 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_FLOAT4
110 #else
111 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_FLOAT4_TEX
112 #endif // DIRECT_ACCESS_LINK
113 
114 #else
115 #define DD_PARAM2 const short4 *gauge0, const short4 *gauge1
116 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
117 #ifdef DIRECT_ACCESS_LINK
118 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_SHORT4
119 #else
120 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_SHORT4_TEX
121 #endif // DIRECT_ACCESS_LINK
122 #endif // DD_PREC
123 #else // no reconstruct, load all components
124 #define DD_RECON_F 18
125 #define GAUGE_FLOAT2
126 #if (DD_PREC==0)
127 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE
128 #ifdef DIRECT_ACCESS_LINK
129 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_DOUBLE2
130 #else
131 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_DOUBLE2_TEX
132 #endif // DIRECT_ACCESS_LINK
133 #define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
134 
135 #elif (DD_PREC==1)
136 #define DD_PARAM2 const float4 *gauge0, const float4 *gauge1 // FIXME for direct reading, really float2
137 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
138 #ifdef DIRECT_ACCESS_LINK
139 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_FLOAT2
140 #else
141 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_FLOAT2_TEX
142 #endif // DIRECT_ACCESS_LINK
143 
144 #else
145 #define DD_PARAM2 const short4 *gauge0, const short4 *gauge1 // FIXME for direct reading, really short2
146 #define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
147 #ifdef DIRECT_ACCESS_LINK
148 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_SHORT2
149 #else
150 #define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_SHORT2_TEX
151 #endif //DIRECT_ACCESS_LINK
152 #endif
153 #endif
154 
155 #if (DD_PREC==0) // double-precision fields
156 
157 #define TPROJSCALE tProjScale
158 
159 // double-precision gauge field
160 #if (defined DIRECT_ACCESS_LINK) || (defined FERMI_NO_DBLE_TEX)
161 #define GAUGE0TEX gauge0
162 #define GAUGE1TEX gauge1
163 #else
164 #ifdef USE_TEXTURE_OBJECTS
165 #define GAUGE0TEX param.gauge0Tex
166 #define GAUGE1TEX param.gauge1Tex
167 #else
168 #define GAUGE0TEX gauge0TexDouble2
169 #define GAUGE1TEX gauge1TexDouble2
170 #endif // USE_TEXTURE_OBJECTS
171 #endif
172 
173 #define GAUGE_FLOAT2
174 
175 // double-precision spinor fields
176 #define DD_PARAM1 double2* out, float *null1
177 #define DD_PARAM3 const double2* in, const float *null4
178 #if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX)
179 #define READ_SPINOR READ_SPINOR_DOUBLE
180 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP
181 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN
182 #define SPINORTEX in
183 #else
184 #define READ_SPINOR READ_SPINOR_DOUBLE_TEX
185 #define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP_TEX
186 #define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN_TEX
187 #ifdef USE_TEXTURE_OBJECTS
188 #define SPINORTEX param.inTex
189 #else
190 #define SPINORTEX spinorTexDouble
191 #endif // USE_TEXTURE_OBJECTS
192 #endif
193 #if (defined DIRECT_ACCESS_WILSON_INTER) || (defined FERMI_NO_DBLE_TEX)
194 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE
195 #define INTERTEX out
196 #else
197 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE_TEX
198 #ifdef USE_TEXTURE_OBJECTS
199 #define INTERTEX param.outTex
200 #else
201 #define INTERTEX interTexDouble
202 #endif
203 #endif
204 #define WRITE_SPINOR WRITE_SPINOR_DOUBLE2
205 #define SPINOR_DOUBLE
206 #if (DD_XPAY==1)
207 #if (defined DIRECT_ACCESS_WILSON_ACCUM) || (defined FERMI_NO_DBLE_TEX)
208 #define ACCUMTEX x
209 #define READ_ACCUM READ_ACCUM_DOUBLE
210 #else
211 #ifdef USE_TEXTURE_OBJECTS
212 #define ACCUMTEX param.xTex
213 #else
214 #define ACCUMTEX accumTexDouble
215 #endif // USE_TEXTURE_OBJECTS
216 #define READ_ACCUM READ_ACCUM_DOUBLE_TEX
217 #endif
218 
219 #endif
220 
221 #define SPINOR_HOP 12
222 
223 #elif (DD_PREC==1) // single-precision fields
224 
225 #define TPROJSCALE tProjScale_f
226 
227 // single-precision gauge field
228 #ifdef DIRECT_ACCESS_LINK
229 #define GAUGE0TEX gauge0
230 #define GAUGE1TEX gauge1
231 #else
232 #ifdef USE_TEXTURE_OBJECTS
233 #define GAUGE0TEX param.gauge0Tex
234 #define GAUGE1TEX param.gauge1Tex
235 #else
236 #if (DD_RECON_F == 18)
237 #define GAUGE0TEX gauge0TexSingle2
238 #define GAUGE1TEX gauge1TexSingle2
239 #else
240 #define GAUGE0TEX gauge0TexSingle4
241 #define GAUGE1TEX gauge1TexSingle4
242 #endif
243 #endif // USE_TEXTURE_OBJECTS
244 #endif
245 
246 
247 // single-precision spinor fields
248 #define DD_PARAM1 float4* out, float *null1
249 #define DD_PARAM3 const float4* in, const float *null4
250 #ifdef DIRECT_ACCESS_WILSON_SPINOR
251 #define READ_SPINOR READ_SPINOR_SINGLE
252 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP
253 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN
254 #define SPINORTEX in
255 #else
256 #define READ_SPINOR READ_SPINOR_SINGLE_TEX
257 #define READ_SPINOR_UP READ_SPINOR_SINGLE_UP_TEX
258 #define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN_TEX
259 #ifdef USE_TEXTURE_OBJECTS
260 #define SPINORTEX param.inTex
261 #else
262 #define SPINORTEX spinorTexSingle
263 #endif // USE_TEXTURE_OBJECTS
264 #endif
265 #ifdef DIRECT_ACCESS_WILSON_INTER
266 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE
267 #define INTERTEX out
268 #else
269 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE_TEX
270 #ifdef USE_TEXTURE_OBJECTS
271 #define INTERTEX param.outTex
272 #else
273 #define INTERTEX interTexSingle
274 #endif // USE_TEXTURE_OBJECTS
275 #endif
276 #define WRITE_SPINOR WRITE_SPINOR_FLOAT4
277 #if (DD_XPAY==1)
278 #ifdef DIRECT_ACCESS_WILSON_ACCUM
279 #define ACCUMTEX x
280 #define READ_ACCUM READ_ACCUM_SINGLE
281 #else
282 #ifdef USE_TEXTURE_OBJECTS
283 #define ACCUMTEX param.xTex
284 #else
285 #define ACCUMTEX accumTexSingle
286 #endif // USE_TEXTURE_OBJECTS
287 #define READ_ACCUM READ_ACCUM_SINGLE_TEX
288 #endif
289 #endif
290 
291 #define SPINOR_HOP 6
292 
293 #else // half-precision fields
294 
295 #define TPROJSCALE tProjScale_f
296 
297 // half-precision gauge field
298 #ifdef DIRECT_ACCESS_LINK
299 #define GAUGE0TEX gauge0
300 #define GAUGE1TEX gauge1
301 #else
302 #ifdef USE_TEXTURE_OBJECTS
303 #define GAUGE0TEX param.gauge0Tex
304 #define GAUGE1TEX param.gauge1Tex
305 #else
306 #if (DD_RECON_F == 18)
307 #define GAUGE0TEX gauge0TexHalf2
308 #define GAUGE1TEX gauge1TexHalf2
309 #else
310 #define GAUGE0TEX gauge0TexHalf4
311 #define GAUGE1TEX gauge1TexHalf4
312 #endif
313 #endif // USE_TEXTURE_OBJECTS
314 #endif
315 
316 
317 // half-precision spinor fields
318 #ifdef DIRECT_ACCESS_WILSON_SPINOR
319 #define READ_SPINOR READ_SPINOR_HALF
320 #define READ_SPINOR_UP READ_SPINOR_HALF_UP
321 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN
322 #define SPINORTEX in
323 #else
324 #define READ_SPINOR READ_SPINOR_HALF_TEX
325 #define READ_SPINOR_UP READ_SPINOR_HALF_UP_TEX
326 #define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN_TEX
327 #ifdef USE_TEXTURE_OBJECTS
328 #define SPINORTEX param.inTex
329 #else
330 #define SPINORTEX spinorTexHalf
331 #endif // USE_TEXTURE_OBJECTS
332 #endif
333 #ifdef DIRECT_ACCESS_WILSON_INTER
334 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF
335 #define INTERTEX out
336 #else
337 #define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF_TEX
338 #ifdef USE_TEXTURE_OBJECTS
339 #define INTERTEX param.outTex
340 #else
341 #define INTERTEX interTexHalf
342 #endif // USE_TEXTURE_OBJECTS
343 #endif
344 #define DD_PARAM1 short4* out, float *outNorm
345 #define DD_PARAM3 const short4* in, const float *inNorm
346 #define WRITE_SPINOR WRITE_SPINOR_SHORT4
347 #if (DD_XPAY==1)
348 #ifdef DIRECT_ACCESS_WILSON_ACCUM
349 #define ACCUMTEX x
350 #define READ_ACCUM READ_ACCUM_HALF
351 #else
352 #ifdef USE_TEXTURE_OBJECTS
353 #define ACCUMTEX param.xTex
354 #else
355 #define ACCUMTEX accumTexHalf
356 #endif // USE_TEXTURE_OBJECTS
357 #define READ_ACCUM READ_ACCUM_HALF_TEX
358 #endif
359 
360 #endif
361 
362 #define SPINOR_HOP 6
363 
364 #endif
365 
366 // only build double precision if supported
367 #if !(__COMPUTE_CAPABILITY__ < 130 && DD_PREC == 0)
368 
369 #define DD_CONCAT(n,r,d,x) n ## r ## d ## x ## Kernel
370 #define DD_FUNC(n,r,d,x) DD_CONCAT(n,r,d,x)
371 
372 // define the kernel
373 
374 template <KernelType kernel_type>
375 __global__ void DD_FUNC(DD_NAME_F, DD_RECON_F, DD_DAG_F, DD_XPAY_F)
377 
378 #ifdef GPU_DOMAIN_WALL_DIRAC
379 #define MDWF_mode 1
380 #if DD_DAG
381 #include "dw_dslash5_dagger_core.h"
382 #else
383 #include "dw_dslash5_core.h"
384 #endif
385 #undef MDWF_mode
386 #endif
387 
388 }
389 
390 #endif
391 
392 // clean up
393 
394 #undef DD_NAME_F
395 #undef DD_RECON_F
396 #undef DD_DAG_F
397 #undef DD_XPAY_F
398 #undef DD_PARAM1
399 #undef DD_PARAM2
400 #undef DD_PARAM3
401 #undef DD_PARAM4
402 #undef DD_CONCAT
403 #undef DD_FUNC
404 
405 #undef DSLASH_XPAY
406 #undef ASSN_GAUGE_MATRIX
407 #undef RECONSTRUCT_GAUGE_MATRIX
408 #undef GAUGE0TEX
409 #undef GAUGE1TEX
410 #undef READ_SPINOR
411 #undef READ_SPINOR_UP
412 #undef READ_SPINOR_DOWN
413 #undef SPINORTEX
414 #undef READ_INTERMEDIATE_SPINOR
415 #undef INTERTEX
416 #undef READ_ACCUM
417 #undef ACCUMTEX
418 #undef WRITE_SPINOR
419 #undef GAUGE_FLOAT2
420 #undef SPINOR_DOUBLE
421 
422 #undef SPINOR_HOP
423 
424 #undef TPROJSCALE
425 
426 // prepare next set of options, or clean up after final iteration
427 
428 #if (DD_DAG==0)
429 #undef DD_DAG
430 #define DD_DAG 1
431 #else
432 #undef DD_DAG
433 #define DD_DAG 0
434 
435 #if (DD_XPAY==0)
436 #undef DD_XPAY
437 #define DD_XPAY 1
438 #else
439 #undef DD_XPAY
440 #define DD_XPAY 0
441 
442 #if (DD_RECON==0)
443 #undef DD_RECON
444 #define DD_RECON 1
445 #elif (DD_RECON==1)
446 #undef DD_RECON
447 #define DD_RECON 2
448 #else
449 #undef DD_RECON
450 #define DD_RECON 0
451 
452 #if (DD_PREC==0)
453 #undef DD_PREC
454 #define DD_PREC 1
455 #elif (DD_PREC==1)
456 #undef DD_PREC
457 #define DD_PREC 2
458 
459 #else
460 
461 #undef DD_LOOP
462 #undef DD_DAG
463 #undef DD_XPAY
464 #undef DD_RECON
465 #undef DD_PREC
466 
467 #endif // DD_PREC
468 #endif // DD_RECON
469 #endif // DD_XPAY
470 #endif // DD_DAG
471 
472 #ifdef DD_LOOP
473 #include "mdw_dslash4pre_def.h"
474 #endif
#define DD_FUNC(x)
Definition: clover_def.h:141
#define DD_DAG_F
#define DD_XPAY_F
#define DD_PARAM2
#define DD_RECON_F
#define DD_PARAM4
#define DD_PARAM1
#define DD_NAME_F
#define DD_PARAM3