QUDA
v0.7.0
A library for QCD on GPUs
Main Page
Namespaces
Classes
Files
File List
File Members
•
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Pages
quda
lib
mdw_dslash4pre_def.h
Go to the documentation of this file.
1
// tm_dslash_def.h - Twisted Mass Dslash kernel definitions
2
3
// There are currently 36 different variants of the Twisted Mass
4
// Wilson Dslash kernel, each one characterized by a set of 5 options,
5
// where each option can take one of several values (3*2*2*3 = 36).
6
// This file is structured so that the C preprocessor loops through all 36
7
// variants (in a manner resembling a counter), sets the appropriate
8
// macros, and defines the corresponding functions.
9
//
10
// As an example of the function naming conventions, consider
11
//
12
// twistedMassDslash12DaggerXpayKernel(float4* out, ...).
13
//
14
// This is a twisted mass Dslash^dagger kernel where the result is
15
// multiplied by "a" and summed with an input vector (Xpay), and the
16
// gauge matrix is reconstructed from 12 real numbers. More
17
// generally, each function name is given by the concatenation of the
18
// following 4 fields, with "Kernel" at the end:
19
//
20
// DD_NAME_F = twistedMassDslash
21
// DD_RECON_F = 8, 12, 18
22
// DD_DAG_F = Dagger, [blank]
23
// DD_XPAY_F = Xpay, [blank]
24
//
25
// In addition, the kernels are templated on the precision of the
26
// fields (double, single, or half).
27
28
// initialize on first iteration
29
30
#ifndef DD_LOOP
31
#define DD_LOOP
32
#define DD_DAG 0
33
#define DD_XPAY 0
34
#define DD_RECON 0
35
#define DD_PREC 0
36
#endif
37
38
// set options for current iteration
39
40
#define DD_NAME_F MDWFDslash4pre
41
42
#if (DD_DAG==0) // no dagger
43
#define DD_DAG_F
44
#else // dagger
45
#define DD_DAG_F Dagger
46
#endif
47
48
#if (DD_XPAY==0) // no xpay
49
#define DD_XPAY_F
50
#else
51
#define DSLASH_XPAY
52
#define DD_XPAY_F Xpay
53
#endif
54
55
#if (DD_PREC == 0)
56
#define DD_PARAM4 const double mferm, const double2 *x, const float *xNorm, const double a, const DslashParam param
57
#elif (DD_PREC == 1)
58
#define DD_PARAM4 const float mferm, const float4 *x, const float *xNorm, const float a, const DslashParam param
59
#else
60
#define DD_PARAM4 const float mferm, const short4 *x, const float *xNorm, const float a, const DslashParam param
61
#endif
62
63
#if (DD_RECON==0) // reconstruct from 8 reals
64
#define DD_RECON_F 8
65
66
#if (DD_PREC==0)
67
#define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
68
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE
69
#ifdef DIRECT_ACCESS_LINK
70
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_DOUBLE2
71
#else
72
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_DOUBLE2_TEX
73
#endif // DIRECT_ACCESS_LINK
74
75
#elif (DD_PREC==1)
76
#define DD_PARAM2 const float4 *gauge0, const float4 *gauge1
77
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
78
#ifdef DIRECT_ACCESS_LINK
79
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_FLOAT4
80
#else
81
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_FLOAT4_TEX
82
#endif // DIRECT_ACCESS_LINK
83
84
#else
85
#define DD_PARAM2 const short4 *gauge0, const short4* gauge1
86
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
87
#ifdef DIRECT_ACCESS_LINK
88
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_SHORT4
89
#else
90
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_8_SHORT4_TEX
91
#endif // DIRECT_ACCESS_LINK
92
#endif // DD_PREC
93
#elif (DD_RECON==1) // reconstruct from 12 reals
94
#define DD_RECON_F 12
95
96
#if (DD_PREC==0)
97
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE
98
#ifdef DIRECT_ACCESS_LINK
99
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_DOUBLE2
100
#else
101
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_DOUBLE2_TEX
102
#endif // DIRECT_ACCESS_LINK
103
#define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
104
105
#elif (DD_PREC==1)
106
#define DD_PARAM2 const float4 *gauge0, const float4 *gauge1
107
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
108
#ifdef DIRECT_ACCESS_LINK
109
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_FLOAT4
110
#else
111
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_FLOAT4_TEX
112
#endif // DIRECT_ACCESS_LINK
113
114
#else
115
#define DD_PARAM2 const short4 *gauge0, const short4 *gauge1
116
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
117
#ifdef DIRECT_ACCESS_LINK
118
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_SHORT4
119
#else
120
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_12_SHORT4_TEX
121
#endif // DIRECT_ACCESS_LINK
122
#endif // DD_PREC
123
#else // no reconstruct, load all components
124
#define DD_RECON_F 18
125
#define GAUGE_FLOAT2
126
#if (DD_PREC==0)
127
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE
128
#ifdef DIRECT_ACCESS_LINK
129
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_DOUBLE2
130
#else
131
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_DOUBLE2_TEX
132
#endif // DIRECT_ACCESS_LINK
133
#define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
134
135
#elif (DD_PREC==1)
136
#define DD_PARAM2 const float4 *gauge0, const float4 *gauge1 // FIXME for direct reading, really float2
137
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
138
#ifdef DIRECT_ACCESS_LINK
139
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_FLOAT2
140
#else
141
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_FLOAT2_TEX
142
#endif // DIRECT_ACCESS_LINK
143
144
#else
145
#define DD_PARAM2 const short4 *gauge0, const short4 *gauge1 // FIXME for direct reading, really short2
146
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
147
#ifdef DIRECT_ACCESS_LINK
148
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_SHORT2
149
#else
150
#define ASSN_GAUGE_MATRIX ASSN_GAUGE_MATRIX_18_SHORT2_TEX
151
#endif //DIRECT_ACCESS_LINK
152
#endif
153
#endif
154
155
#if (DD_PREC==0) // double-precision fields
156
157
#define TPROJSCALE tProjScale
158
159
// double-precision gauge field
160
#if (defined DIRECT_ACCESS_LINK) || (defined FERMI_NO_DBLE_TEX)
161
#define GAUGE0TEX gauge0
162
#define GAUGE1TEX gauge1
163
#else
164
#ifdef USE_TEXTURE_OBJECTS
165
#define GAUGE0TEX param.gauge0Tex
166
#define GAUGE1TEX param.gauge1Tex
167
#else
168
#define GAUGE0TEX gauge0TexDouble2
169
#define GAUGE1TEX gauge1TexDouble2
170
#endif // USE_TEXTURE_OBJECTS
171
#endif
172
173
#define GAUGE_FLOAT2
174
175
// double-precision spinor fields
176
#define DD_PARAM1 double2* out, float *null1
177
#define DD_PARAM3 const double2* in, const float *null4
178
#if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX)
179
#define READ_SPINOR READ_SPINOR_DOUBLE
180
#define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP
181
#define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN
182
#define SPINORTEX in
183
#else
184
#define READ_SPINOR READ_SPINOR_DOUBLE_TEX
185
#define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP_TEX
186
#define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN_TEX
187
#ifdef USE_TEXTURE_OBJECTS
188
#define SPINORTEX param.inTex
189
#else
190
#define SPINORTEX spinorTexDouble
191
#endif // USE_TEXTURE_OBJECTS
192
#endif
193
#if (defined DIRECT_ACCESS_WILSON_INTER) || (defined FERMI_NO_DBLE_TEX)
194
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE
195
#define INTERTEX out
196
#else
197
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE_TEX
198
#ifdef USE_TEXTURE_OBJECTS
199
#define INTERTEX param.outTex
200
#else
201
#define INTERTEX interTexDouble
202
#endif
203
#endif
204
#define WRITE_SPINOR WRITE_SPINOR_DOUBLE2
205
#define SPINOR_DOUBLE
206
#if (DD_XPAY==1)
207
#if (defined DIRECT_ACCESS_WILSON_ACCUM) || (defined FERMI_NO_DBLE_TEX)
208
#define ACCUMTEX x
209
#define READ_ACCUM READ_ACCUM_DOUBLE
210
#else
211
#ifdef USE_TEXTURE_OBJECTS
212
#define ACCUMTEX param.xTex
213
#else
214
#define ACCUMTEX accumTexDouble
215
#endif // USE_TEXTURE_OBJECTS
216
#define READ_ACCUM READ_ACCUM_DOUBLE_TEX
217
#endif
218
219
#endif
220
221
#define SPINOR_HOP 12
222
223
#elif (DD_PREC==1) // single-precision fields
224
225
#define TPROJSCALE tProjScale_f
226
227
// single-precision gauge field
228
#ifdef DIRECT_ACCESS_LINK
229
#define GAUGE0TEX gauge0
230
#define GAUGE1TEX gauge1
231
#else
232
#ifdef USE_TEXTURE_OBJECTS
233
#define GAUGE0TEX param.gauge0Tex
234
#define GAUGE1TEX param.gauge1Tex
235
#else
236
#if (DD_RECON_F == 18)
237
#define GAUGE0TEX gauge0TexSingle2
238
#define GAUGE1TEX gauge1TexSingle2
239
#else
240
#define GAUGE0TEX gauge0TexSingle4
241
#define GAUGE1TEX gauge1TexSingle4
242
#endif
243
#endif // USE_TEXTURE_OBJECTS
244
#endif
245
246
247
// single-precision spinor fields
248
#define DD_PARAM1 float4* out, float *null1
249
#define DD_PARAM3 const float4* in, const float *null4
250
#ifdef DIRECT_ACCESS_WILSON_SPINOR
251
#define READ_SPINOR READ_SPINOR_SINGLE
252
#define READ_SPINOR_UP READ_SPINOR_SINGLE_UP
253
#define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN
254
#define SPINORTEX in
255
#else
256
#define READ_SPINOR READ_SPINOR_SINGLE_TEX
257
#define READ_SPINOR_UP READ_SPINOR_SINGLE_UP_TEX
258
#define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN_TEX
259
#ifdef USE_TEXTURE_OBJECTS
260
#define SPINORTEX param.inTex
261
#else
262
#define SPINORTEX spinorTexSingle
263
#endif // USE_TEXTURE_OBJECTS
264
#endif
265
#ifdef DIRECT_ACCESS_WILSON_INTER
266
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE
267
#define INTERTEX out
268
#else
269
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE_TEX
270
#ifdef USE_TEXTURE_OBJECTS
271
#define INTERTEX param.outTex
272
#else
273
#define INTERTEX interTexSingle
274
#endif // USE_TEXTURE_OBJECTS
275
#endif
276
#define WRITE_SPINOR WRITE_SPINOR_FLOAT4
277
#if (DD_XPAY==1)
278
#ifdef DIRECT_ACCESS_WILSON_ACCUM
279
#define ACCUMTEX x
280
#define READ_ACCUM READ_ACCUM_SINGLE
281
#else
282
#ifdef USE_TEXTURE_OBJECTS
283
#define ACCUMTEX param.xTex
284
#else
285
#define ACCUMTEX accumTexSingle
286
#endif // USE_TEXTURE_OBJECTS
287
#define READ_ACCUM READ_ACCUM_SINGLE_TEX
288
#endif
289
#endif
290
291
#define SPINOR_HOP 6
292
293
#else // half-precision fields
294
295
#define TPROJSCALE tProjScale_f
296
297
// half-precision gauge field
298
#ifdef DIRECT_ACCESS_LINK
299
#define GAUGE0TEX gauge0
300
#define GAUGE1TEX gauge1
301
#else
302
#ifdef USE_TEXTURE_OBJECTS
303
#define GAUGE0TEX param.gauge0Tex
304
#define GAUGE1TEX param.gauge1Tex
305
#else
306
#if (DD_RECON_F == 18)
307
#define GAUGE0TEX gauge0TexHalf2
308
#define GAUGE1TEX gauge1TexHalf2
309
#else
310
#define GAUGE0TEX gauge0TexHalf4
311
#define GAUGE1TEX gauge1TexHalf4
312
#endif
313
#endif // USE_TEXTURE_OBJECTS
314
#endif
315
316
317
// half-precision spinor fields
318
#ifdef DIRECT_ACCESS_WILSON_SPINOR
319
#define READ_SPINOR READ_SPINOR_HALF
320
#define READ_SPINOR_UP READ_SPINOR_HALF_UP
321
#define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN
322
#define SPINORTEX in
323
#else
324
#define READ_SPINOR READ_SPINOR_HALF_TEX
325
#define READ_SPINOR_UP READ_SPINOR_HALF_UP_TEX
326
#define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN_TEX
327
#ifdef USE_TEXTURE_OBJECTS
328
#define SPINORTEX param.inTex
329
#else
330
#define SPINORTEX spinorTexHalf
331
#endif // USE_TEXTURE_OBJECTS
332
#endif
333
#ifdef DIRECT_ACCESS_WILSON_INTER
334
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF
335
#define INTERTEX out
336
#else
337
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF_TEX
338
#ifdef USE_TEXTURE_OBJECTS
339
#define INTERTEX param.outTex
340
#else
341
#define INTERTEX interTexHalf
342
#endif // USE_TEXTURE_OBJECTS
343
#endif
344
#define DD_PARAM1 short4* out, float *outNorm
345
#define DD_PARAM3 const short4* in, const float *inNorm
346
#define WRITE_SPINOR WRITE_SPINOR_SHORT4
347
#if (DD_XPAY==1)
348
#ifdef DIRECT_ACCESS_WILSON_ACCUM
349
#define ACCUMTEX x
350
#define READ_ACCUM READ_ACCUM_HALF
351
#else
352
#ifdef USE_TEXTURE_OBJECTS
353
#define ACCUMTEX param.xTex
354
#else
355
#define ACCUMTEX accumTexHalf
356
#endif // USE_TEXTURE_OBJECTS
357
#define READ_ACCUM READ_ACCUM_HALF_TEX
358
#endif
359
360
#endif
361
362
#define SPINOR_HOP 6
363
364
#endif
365
366
// only build double precision if supported
367
#if !(__COMPUTE_CAPABILITY__ < 130 && DD_PREC == 0)
368
369
#define DD_CONCAT(n,r,d,x) n ## r ## d ## x ## Kernel
370
#define DD_FUNC(n,r,d,x) DD_CONCAT(n,r,d,x)
371
372
// define the kernel
373
374
template
<KernelType kernel_type>
375
__global__
void
DD_FUNC
(
DD_NAME_F
,
DD_RECON_F
,
DD_DAG_F
,
DD_XPAY_F
)
376
(
DD_PARAM1
,
DD_PARAM2
,
DD_PARAM3
,
DD_PARAM4
) {
377
378
#ifdef GPU_DOMAIN_WALL_DIRAC
379
#define MDWF_mode 1
380
#if DD_DAG
381
#include "
dw_dslash5_dagger_core.h
"
382
#else
383
#include "
dw_dslash5_core.h
"
384
#endif
385
#undef MDWF_mode
386
#endif
387
388
}
389
390
#endif
391
392
// clean up
393
394
#undef DD_NAME_F
395
#undef DD_RECON_F
396
#undef DD_DAG_F
397
#undef DD_XPAY_F
398
#undef DD_PARAM1
399
#undef DD_PARAM2
400
#undef DD_PARAM3
401
#undef DD_PARAM4
402
#undef DD_CONCAT
403
#undef DD_FUNC
404
405
#undef DSLASH_XPAY
406
#undef ASSN_GAUGE_MATRIX
407
#undef RECONSTRUCT_GAUGE_MATRIX
408
#undef GAUGE0TEX
409
#undef GAUGE1TEX
410
#undef READ_SPINOR
411
#undef READ_SPINOR_UP
412
#undef READ_SPINOR_DOWN
413
#undef SPINORTEX
414
#undef READ_INTERMEDIATE_SPINOR
415
#undef INTERTEX
416
#undef READ_ACCUM
417
#undef ACCUMTEX
418
#undef WRITE_SPINOR
419
#undef GAUGE_FLOAT2
420
#undef SPINOR_DOUBLE
421
422
#undef SPINOR_HOP
423
424
#undef TPROJSCALE
425
426
// prepare next set of options, or clean up after final iteration
427
428
#if (DD_DAG==0)
429
#undef DD_DAG
430
#define DD_DAG 1
431
#else
432
#undef DD_DAG
433
#define DD_DAG 0
434
435
#if (DD_XPAY==0)
436
#undef DD_XPAY
437
#define DD_XPAY 1
438
#else
439
#undef DD_XPAY
440
#define DD_XPAY 0
441
442
#if (DD_RECON==0)
443
#undef DD_RECON
444
#define DD_RECON 1
445
#elif (DD_RECON==1)
446
#undef DD_RECON
447
#define DD_RECON 2
448
#else
449
#undef DD_RECON
450
#define DD_RECON 0
451
452
#if (DD_PREC==0)
453
#undef DD_PREC
454
#define DD_PREC 1
455
#elif (DD_PREC==1)
456
#undef DD_PREC
457
#define DD_PREC 2
458
459
#else
460
461
#undef DD_LOOP
462
#undef DD_DAG
463
#undef DD_XPAY
464
#undef DD_RECON
465
#undef DD_PREC
466
467
#endif // DD_PREC
468
#endif // DD_RECON
469
#endif // DD_XPAY
470
#endif // DD_DAG
471
472
#ifdef DD_LOOP
473
#include "
mdw_dslash4pre_def.h
"
474
#endif
mdw_dslash4pre_def.h
DD_FUNC
#define DD_FUNC(x)
Definition:
clover_def.h:141
DD_DAG_F
#define DD_DAG_F
Definition:
mdw_dslash4pre_def.h:43
DD_XPAY_F
#define DD_XPAY_F
Definition:
mdw_dslash4pre_def.h:49
dw_dslash5_dagger_core.h
DD_PARAM2
#define DD_PARAM2
Definition:
mdw_dslash4pre_def.h:67
DD_RECON_F
#define DD_RECON_F
Definition:
mdw_dslash4pre_def.h:64
DD_PARAM4
#define DD_PARAM4
Definition:
mdw_dslash4pre_def.h:56
DD_PARAM1
#define DD_PARAM1
Definition:
mdw_dslash4pre_def.h:176
DD_NAME_F
#define DD_NAME_F
Definition:
mdw_dslash4pre_def.h:40
DD_PARAM3
#define DD_PARAM3
Definition:
mdw_dslash4pre_def.h:177
dw_dslash5_core.h
Generated on Wed Feb 4 2015 17:00:12 for QUDA by
1.8.6