QUDA
v0.7.0
A library for QCD on GPUs
Main Page
Namespaces
Classes
Files
File List
File Members
•
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Pages
quda
lib
tmc_dslash_def.h
Go to the documentation of this file.
1
// tmc_dslash_def.h - Twisted Mass Dslash kernel definitions
2
3
// There are currently 36 different variants of the Twisted Mass
4
// Wilson Dslash kernel, each one characterized by a set of 5 options,
5
// where each option can take one of several values (3*2*2*3 = 36).
6
// This file is structured so that the C preprocessor loops through all 36
7
// variants (in a manner resembling a counter), sets the appropriate
8
// macros, and defines the corresponding functions.
9
//
10
// As an example of the function naming conventions, consider
11
//
12
// twistedMassDslash12DaggerXpayKernel(float4* out, ...).
13
//
14
// This is a twisted mass Dslash^ger kernel where the result is
15
// multiplied by "a" and summed with an input vector (Xpay), and the
16
// gauge matrix is reconstructed from 12 real numbers. More
17
// generally, each function name is given by the concatenation of the
18
// following 4 fields, with "Kernel" at the end:
19
//
20
// DD_NAME_F = twistedCloverDslash
21
// DD_RECON_F = 8, 12, 18
22
// DD_DAG_F = Dagger, [blank]
23
// DD_XPAY_F = Xpay, [blank]
24
//
25
// In addition, the kernels are templated on the precision of the
26
// fields (double, single, or half).
27
28
// initialize on first iteration
29
30
#ifndef DD_LOOP
31
#define DD_LOOP
32
#define DD_DAG 0
33
#define DD_XPAY 0
34
#define DD_TWIST 0
36
#define DD_RECON 0
37
#define DD_PREC 0
38
#endif
39
40
// set options for current iteration
41
42
//#define DD_NAME_F twistedCloverDslash
43
44
#if (DD_DAG==0) // no dagger
45
#define DD_DAG_F
46
#else // dagger
47
#define DD_DAG_F Dagger
48
#endif
49
51
#if (DD_XPAY==0) // no xpay
52
#define DD_XPAY_F
53
#elif (DD_XPAY==1)
54
#define DSLASH_XPAY
55
#define DD_XPAY_F Xpay
56
#endif
57
58
#if (DD_TWIST==0) // twisted input
59
#define DD_NAME_F twistedCloverInvDslash
60
#define CLOVER_TWIST_INV_DSLASH
61
#else
62
#define DD_NAME_F twistedCloverDslash
63
#endif
64
66
#if (DD_PREC == 0)
67
#define DD_PARAMCLOVER const double2 *clover, const float *null2, const double2 *cloverInv, const float *null3
68
#if (defined DIRECT_ACCESS_CLOVER) || (defined FERMI_NO_DBLE_TEX)
69
#define TMCLOVERTEX clover
70
#define TM_INV_CLOVERTEX cloverInv
71
#define READ_CLOVER READ_CLOVER_DOUBLE_STR
72
#define ASSN_CLOVER ASSN_CLOVER_DOUBLE_STR
73
#else
74
#ifdef USE_TEXTURE_OBJECTS
75
#define TMCLOVERTEX (param.cloverTex)
76
#define TM_INV_CLOVERTEX (param.cloverInvTex)
77
#else
78
#define TMCLOVERTEX cloverTexDouble
79
#define TM_INV_CLOVERTEX cloverInvTexDouble
80
#endif
81
#define READ_CLOVER READ_CLOVER_DOUBLE_TEX
82
#define ASSN_CLOVER ASSN_CLOVER_DOUBLE_TEX
83
#endif
84
#define CLOVER_DOUBLE
85
#elif (DD_PREC == 1)
86
#define DD_PARAMCLOVER const float4 *clover, const float *null2, const float4 *cloverInv, const float *null3
87
#ifdef DIRECT_ACCESS_CLOVER
88
#define TMCLOVERTEX clover
89
#define TM_INV_CLOVERTEX cloverInv
90
#define READ_CLOVER READ_CLOVER_SINGLE
91
#define ASSN_CLOVER ASSN_CLOVER_SINGLE
92
#else
93
#ifdef USE_TEXTURE_OBJECTS
94
#define TMCLOVERTEX (param.cloverTex)
95
#define TM_INV_CLOVERTEX (param.cloverInvTex)
96
#else
97
#define TMCLOVERTEX cloverTexSingle
98
#define TM_INV_CLOVERTEX cloverInvTexSingle
99
#endif
100
#define READ_CLOVER READ_CLOVER_SINGLE_TEX
101
#define ASSN_CLOVER ASSN_CLOVER_SINGLE_TEX
102
#endif
103
#else // half-precision fields
104
#define DD_PARAMCLOVER const short4 *clover, const float *cNorm, const short4 *cloverInv, const float *cNrm2
105
#ifdef DIRECT_ACCESS_CLOVER
106
#define TMCLOVERTEX clover
107
#define TM_INV_CLOVERTEX cloverInv
108
#define READ_CLOVER READ_CLOVER_HALF
109
#define ASSN_CLOVER ASSN_CLOVER_HALF
110
#else
111
#ifdef USE_TEXTURE_OBJECTS
112
#define TMCLOVERTEX (param.cloverTex)
113
#define TMCLOVERTEXNORM (param.cloverNormTex)
114
#define TM_INV_CLOVERTEX (param.cloverInvTex)
115
#define TM_INV_CLOVERTEXNORM (param.cloverInvNormTex)
116
#else
117
#define TMCLOVERTEX cloverTexHalf
118
#define TMCLOVERTEXNORM cloverTexNorm
119
#define TM_INV_CLOVERTEX cloverInvTexHalf
120
#define TM_INV_CLOVERTEXNORM cloverInvTexNorm
121
#endif
122
#define READ_CLOVER READ_CLOVER_HALF_TEX
123
#define ASSN_CLOVER ASSN_CLOVER_HALF_TEX
124
#endif
125
126
#endif //End clover defs
127
128
129
#if (DD_PREC == 0)
130
#define DD_PARAM4 const double a, const double b, const double2 *x, const float *xNorm, const DslashParam param
131
#elif (DD_PREC == 1)
132
#define DD_PARAM4 const float a, const float b, const float4 *x, const float *xNorm, const DslashParam param
133
#else
134
#define DD_PARAM4 const float a, const float b, const short4 *x, const float *xNorm, const DslashParam param
135
#endif
136
137
#if (DD_RECON==0) // reconstruct from 8 reals
138
#define DD_RECON_F 8
139
140
#if (DD_PREC==0)
141
#define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
142
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE
143
#ifdef DIRECT_ACCESS_LINK
144
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2
145
#else
146
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2_TEX
147
#endif // DIRECT_ACCESS_LINK
148
149
#elif (DD_PREC==1)
150
#define DD_PARAM2 const float4 *gauge0, const float4 *gauge1
151
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
152
#ifdef DIRECT_ACCESS_LINK
153
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4
154
#else
155
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4_TEX
156
#endif // DIRECT_ACCESS_LINK
157
158
#else
159
#define DD_PARAM2 const short4 *gauge0, const short4* gauge1
160
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
161
#ifdef DIRECT_ACCESS_LINK
162
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4
163
#else
164
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4_TEX
165
#endif // DIRECT_ACCESS_LINK
166
#endif // DD_PREC
167
#elif (DD_RECON==1) // reconstruct from 12 reals
168
#define DD_RECON_F 12
169
170
#if (DD_PREC==0)
171
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE
172
#ifdef DIRECT_ACCESS_LINK
173
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2
174
#else
175
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2_TEX
176
#endif // DIRECT_ACCESS_LINK
177
#define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
178
179
#elif (DD_PREC==1)
180
#define DD_PARAM2 const float4 *gauge0, const float4 *gauge1
181
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
182
#ifdef DIRECT_ACCESS_LINK
183
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4
184
#else
185
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4_TEX
186
#endif // DIRECT_ACCESS_LINK
187
188
#else
189
#define DD_PARAM2 const short4 *gauge0, const short4 *gauge1
190
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
191
#ifdef DIRECT_ACCESS_LINK
192
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4
193
#else
194
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4_TEX
195
#endif // DIRECT_ACCESS_LINK
196
#endif // DD_PREC
197
#else // no reconstruct, load all components
198
#define DD_RECON_F 18
199
#define GAUGE_FLOAT2
200
#if (DD_PREC==0)
201
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE
202
#ifdef DIRECT_ACCESS_LINK
203
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2
204
#else
205
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2_TEX
206
#endif // DIRECT_ACCESS_LINK
207
#define DD_PARAM2 const double2 *gauge0, const double2 *gauge1
208
209
#elif (DD_PREC==1)
210
#define DD_PARAM2 const float4 *gauge0, const float4 *gauge1 // FIXME for direct reading, really float2
211
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
212
#ifdef DIRECT_ACCESS_LINK
213
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2
214
#else
215
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2_TEX
216
#endif // DIRECT_ACCESS_LINK
217
218
#else
219
#define DD_PARAM2 const short4 *gauge0, const short4 *gauge1 // FIXME for direct reading, really short2
220
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
221
#ifdef DIRECT_ACCESS_LINK
222
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2
223
#else
224
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2_TEX
225
#endif //DIRECT_ACCESS_LINK
226
#endif
227
#endif
228
229
#if (DD_PREC==0) // double-precision fields
230
231
#define TPROJSCALE tProjScale
232
233
// double-precision gauge field
234
#if (defined DIRECT_ACCESS_LINK) || (defined FERMI_NO_DBLE_TEX)
235
#define GAUGE0TEX gauge0
236
#define GAUGE1TEX gauge1
237
#else
238
#ifdef USE_TEXTURE_OBJECTS
239
#define GAUGE0TEX param.gauge0Tex
240
#define GAUGE1TEX param.gauge1Tex
241
#else
242
#define GAUGE0TEX gauge0TexDouble2
243
#define GAUGE1TEX gauge1TexDouble2
244
#endif // USE_TEXTURE_OBJECTS
245
#endif
246
247
#define GAUGE_FLOAT2
248
249
// double-precision spinor fields
250
#define DD_PARAM1 double2* out, float *null1
251
#define DD_PARAM3 const double2* in, const float *null4
252
#if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX)
253
#define READ_SPINOR READ_SPINOR_DOUBLE
254
#define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP
255
#define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN
256
#define SPINORTEX in
257
#else
258
#define READ_SPINOR READ_SPINOR_DOUBLE_TEX
259
#define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP_TEX
260
#define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN_TEX
261
#ifdef USE_TEXTURE_OBJECTS
262
#define SPINORTEX param.inTex
263
#else
264
#define SPINORTEX spinorTexDouble
265
#endif // USE_TEXTURE_OBJECTS
266
#endif
267
#if (defined DIRECT_ACCESS_WILSON_INTER) || (defined FERMI_NO_DBLE_TEX)
268
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE
269
#define INTERTEX out
270
#else
271
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE_TEX
272
#ifdef USE_TEXTURE_OBJECTS
273
#define INTERTEX param.outTex
274
#else
275
#define INTERTEX interTexDouble
276
#endif
277
#endif
278
#define WRITE_SPINOR WRITE_SPINOR_DOUBLE2
279
#define SPINOR_DOUBLE
280
#if (DD_XPAY!=0)
281
#if (defined DIRECT_ACCESS_WILSON_ACCUM) || (defined FERMI_NO_DBLE_TEX)
282
#define ACCUMTEX x
283
#define READ_ACCUM READ_ACCUM_DOUBLE
284
#else
285
#ifdef USE_TEXTURE_OBJECTS
286
#define ACCUMTEX param.xTex
287
#else
288
#define ACCUMTEX accumTexDouble
289
#endif // USE_TEXTURE_OBJECTS
290
#define READ_ACCUM READ_ACCUM_DOUBLE_TEX
291
#endif
292
293
#endif
294
295
#define SPINOR_HOP 12
296
297
#elif (DD_PREC==1) // single-precision fields
298
299
#define TPROJSCALE tProjScale_f
300
301
// single-precision gauge field
302
#ifdef DIRECT_ACCESS_LINK
303
#define GAUGE0TEX gauge0
304
#define GAUGE1TEX gauge1
305
#else
306
#ifdef USE_TEXTURE_OBJECTS
307
#define GAUGE0TEX param.gauge0Tex
308
#define GAUGE1TEX param.gauge1Tex
309
#else
310
#if (DD_RECON_F == 18)
311
#define GAUGE0TEX gauge0TexSingle2
312
#define GAUGE1TEX gauge1TexSingle2
313
#else
314
#define GAUGE0TEX gauge0TexSingle4
315
#define GAUGE1TEX gauge1TexSingle4
316
#endif
317
#endif // USE_TEXTURE_OBJECTS
318
#endif
319
320
321
// single-precision spinor fields
322
#define DD_PARAM1 float4* out, float *null1
323
#define DD_PARAM3 const float4* in, const float *null4
324
#ifdef DIRECT_ACCESS_WILSON_SPINOR
325
#define READ_SPINOR READ_SPINOR_SINGLE
326
#define READ_SPINOR_UP READ_SPINOR_SINGLE_UP
327
#define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN
328
#define SPINORTEX in
329
#else
330
#define READ_SPINOR READ_SPINOR_SINGLE_TEX
331
#define READ_SPINOR_UP READ_SPINOR_SINGLE_UP_TEX
332
#define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN_TEX
333
#ifdef USE_TEXTURE_OBJECTS
334
#define SPINORTEX param.inTex
335
#else
336
#define SPINORTEX spinorTexSingle
337
#endif // USE_TEXTURE_OBJECTS
338
#endif
339
#ifdef DIRECT_ACCESS_WILSON_INTER
340
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE
341
#define INTERTEX out
342
#else
343
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE_TEX
344
#ifdef USE_TEXTURE_OBJECTS
345
#define INTERTEX param.outTex
346
#else
347
#define INTERTEX interTexSingle
348
#endif // USE_TEXTURE_OBJECTS
349
#endif
350
#define WRITE_SPINOR WRITE_SPINOR_FLOAT4
351
#if (DD_XPAY!=0)
352
#ifdef DIRECT_ACCESS_WILSON_ACCUM
353
#define ACCUMTEX x
354
#define READ_ACCUM READ_ACCUM_SINGLE
355
#else
356
#ifdef USE_TEXTURE_OBJECTS
357
#define ACCUMTEX param.xTex
358
#else
359
#define ACCUMTEX accumTexSingle
360
#endif // USE_TEXTURE_OBJECTS
361
#define READ_ACCUM READ_ACCUM_SINGLE_TEX
362
#endif
363
#endif
364
365
#define SPINOR_HOP 6
366
367
#else // half-precision fields
368
369
#define TPROJSCALE tProjScale_f
370
371
// half-precision gauge field
372
#ifdef DIRECT_ACCESS_LINK
373
#define GAUGE0TEX gauge0
374
#define GAUGE1TEX gauge1
375
#else
376
#ifdef USE_TEXTURE_OBJECTS
377
#define GAUGE0TEX param.gauge0Tex
378
#define GAUGE1TEX param.gauge1Tex
379
#else
380
#if (DD_RECON_F == 18)
381
#define GAUGE0TEX gauge0TexHalf2
382
#define GAUGE1TEX gauge1TexHalf2
383
#else
384
#define GAUGE0TEX gauge0TexHalf4
385
#define GAUGE1TEX gauge1TexHalf4
386
#endif
387
#endif // USE_TEXTURE_OBJECTS
388
#endif
389
390
391
// half-precision spinor fields
392
#ifdef DIRECT_ACCESS_WILSON_SPINOR
393
#define READ_SPINOR READ_SPINOR_HALF
394
#define READ_SPINOR_UP READ_SPINOR_HALF_UP
395
#define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN
396
#define SPINORTEX in
397
#else
398
#define READ_SPINOR READ_SPINOR_HALF_TEX
399
#define READ_SPINOR_UP READ_SPINOR_HALF_UP_TEX
400
#define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN_TEX
401
#ifdef USE_TEXTURE_OBJECTS
402
#define SPINORTEX param.inTex
403
#else
404
#define SPINORTEX spinorTexHalf
405
#endif // USE_TEXTURE_OBJECTS
406
#endif
407
#ifdef DIRECT_ACCESS_WILSON_INTER
408
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF
409
#define INTERTEX out
410
#else
411
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF_TEX
412
#ifdef USE_TEXTURE_OBJECTS
413
#define INTERTEX param.outTex
414
#else
415
#define INTERTEX interTexHalf
416
#endif // USE_TEXTURE_OBJECTS
417
#endif
418
#define DD_PARAM1 short4* out, float *outNorm
419
#define DD_PARAM3 const short4* in, const float *inNorm
420
#define WRITE_SPINOR WRITE_SPINOR_SHORT4
421
#if (DD_XPAY!=0)
423
#ifdef DIRECT_ACCESS_WILSON_ACCUM
424
#define ACCUMTEX x
425
#define READ_ACCUM READ_ACCUM_HALF
426
#else
427
#ifdef USE_TEXTURE_OBJECTS
428
#define ACCUMTEX param.xTex
429
#else
430
#define ACCUMTEX accumTexHalf
431
#endif // USE_TEXTURE_OBJECTS
432
#define READ_ACCUM READ_ACCUM_HALF_TEX
433
#endif
434
435
#endif
436
437
#define SPINOR_HOP 6
438
439
#endif
440
441
// only build double precision if supported
442
#if !(__COMPUTE_CAPABILITY__ < 130 && DD_PREC == 0)
443
444
#define DD_CONCAT(n,r,d,x) n ## r ## d ## x ## Kernel
445
#define DD_FUNC(n,r,d,x) DD_CONCAT(n,r,d,x)
446
447
// define the kernel
449
template
<KernelType kernel_type>
450
__global__
void
DD_FUNC
(
DD_NAME_F
,
DD_RECON_F
,
DD_DAG_F
,
DD_XPAY_F
)
451
(
DD_PARAM1
,
DD_PARAM2
,
DD_PARAMCLOVER
,
DD_PARAM3
,
DD_PARAM4
) {
452
453
#ifdef GPU_TWISTED_CLOVER_DIRAC
454
455
#if (__COMPUTE_CAPABILITY__ >= 200 && defined(SHARED_WILSON_DSLASH)) // Fermi optimal code
456
457
#if DD_DAG
458
#include "
tmc_dslash_dagger_fermi_core.h
"
459
#else
460
#include "
tmc_dslash_fermi_core.h
"
461
#endif
462
463
#elif (__COMPUTE_CAPABILITY__ >= 120) // GT200 optimal code
464
465
#if DD_DAG
466
#include "
tmc_dslash_dagger_gt200_core.h
"
467
#else
468
#include "
tmc_dslash_gt200_core.h
"
469
#endif
470
471
#else // fall-back is original G80
472
473
#if DD_DAG
474
#include "
tmc_dslash_dagger_g80_core.h
"
475
#else
476
#include "
tmc_dslash_g80_core.h
"
477
#endif
478
479
#endif
480
481
#endif
482
483
}
484
485
template
<>
486
__global__
void
DD_FUNC
(
DD_NAME_F
,
DD_RECON_F
,
DD_DAG_F
,
DD_XPAY_F
)<
EXTERIOR_KERNEL_ALL
>
487
(
DD_PARAM1
,
DD_PARAM2
,
DD_PARAMCLOVER
,
DD_PARAM3
,
DD_PARAM4
) {
488
489
#ifdef GPU_TWISTED_CLOVER_DIRAC
490
491
#if (__COMPUTE_CAPABILITY__ >= 200 && defined(SHARED_WILSON_DSLASH)) // Fermi optimal code
492
493
#if DD_DAG
494
#include "
tmc_fused_exterior_dslash_dagger_fermi_core.h
"
495
#else
496
#include "
tmc_fused_exterior_dslash_fermi_core.h
"
497
#endif
498
499
#elif (__COMPUTE_CAPABILITY__ >= 120) // GT200 optimal code
500
501
#if DD_DAG
502
#include "
tmc_fused_exterior_dslash_dagger_gt200_core.h
"
503
#else
504
#include "
tmc_fused_exterior_dslash_gt200_core.h
"
505
#endif
506
507
#else // fall-back is original G80
508
509
#if DD_DAG
510
#include "
tmc_fused_exterior_dslash_dagger_g80_core.h
"
511
#else
512
#include "
tmc_fused_exterior_dslash_g80_core.h
"
513
#endif
514
515
#endif
516
517
#endif
518
519
}
520
521
522
//NEW
523
#if (DD_XPAY==1) && (DD_TWIST==1)
524
#define CLOVER_TWIST_XPAY
525
526
//redefine kernel name:
527
#undef DD_NAME_F
528
#define DD_NAME_F twistedCloverDslashTwist
529
530
template
<KernelType kernel_type>
531
__global__
void
DD_FUNC
(
DD_NAME_F
,
DD_RECON_F
,
DD_DAG_F
,
DD_XPAY_F
)
532
(
DD_PARAM1
,
DD_PARAM2
,
DD_PARAMCLOVER
,
DD_PARAM3
,
DD_PARAM4
) {
533
534
#ifdef GPU_TWISTED_CLOVER_DIRAC
535
536
#if (__COMPUTE_CAPABILITY__ >= 200 && defined(SHARED_WILSON_DSLASH)) // Fermi optimal code
537
538
#if DD_DAG
539
#include "
tmc_dslash_dagger_fermi_core.h
"
540
#else
541
#include "
tmc_dslash_fermi_core.h
"
542
#endif
543
544
#elif (__COMPUTE_CAPABILITY__ >= 120) // GT200 optimal code
545
546
#if DD_DAG
547
#include "
tmc_dslash_dagger_gt200_core.h
"
548
#else
549
#include "
tmc_dslash_gt200_core.h
"
550
#endif
551
552
#else // fall-back is original G80
553
554
#if DD_DAG
555
#include "
tmc_dslash_dagger_g80_core.h
"
556
#else
557
#include "
tmc_dslash_g80_core.h
"
558
#endif
559
560
#endif
561
562
#endif
563
564
}
565
566
template
<>
567
__global__
void
DD_FUNC
(
DD_NAME_F
,
DD_RECON_F
,
DD_DAG_F
,
DD_XPAY_F
)<
EXTERIOR_KERNEL_ALL
>
568
(
DD_PARAM1
,
DD_PARAM2
,
DD_PARAMCLOVER
,
DD_PARAM3
,
DD_PARAM4
) {
569
570
#ifdef GPU_TWISTED_CLOVER_DIRAC
571
572
#if (__COMPUTE_CAPABILITY__ >= 200 && defined(SHARED_WILSON_DSLASH)) // Fermi optimal code
573
574
#if DD_DAG
575
#include "
tmc_fused_exterior_dslash_dagger_fermi_core.h
"
576
#else
577
#include "
tmc_fused_exterior_dslash_fermi_core.h
"
578
#endif
579
580
#elif (__COMPUTE_CAPABILITY__ >= 120) // GT200 optimal code
581
582
#if DD_DAG
583
#include "
tmc_fused_exterior_dslash_dagger_gt200_core.h
"
584
#else
585
#include "
tmc_fused_exterior_dslash_gt200_core.h
"
586
#endif
587
588
#else // fall-back is original G80
589
590
#if DD_DAG
591
#include "
tmc_fused_exterior_dslash_dagger_g80_core.h
"
592
#else
593
#include "
tmc_fused_exterior_dslash_g80_core.h
"
594
#endif
595
596
#endif
597
598
#endif
599
600
}
601
602
#undef CLOVER_TWIST_XPAY
603
#endif //(DD_XPAY==0) && (DD_TWIST==1)
604
605
606
//BEGIN DUMMY KERNEL (remove it later)
607
#if (DD_XPAY==0) && (DD_TWIST==1)
608
#define CLOVER_TWIST_XPAY
609
610
//redefine kernel name:
611
#undef DD_NAME_F
612
#define DD_NAME_F twistedCloverDslashTwist
613
614
template
<KernelType kernel_type>
615
__global__
void
DD_FUNC
(
DD_NAME_F
,
DD_RECON_F
,
DD_DAG_F
,
DD_XPAY_F
)
616
(
DD_PARAM1
,
DD_PARAM2
,
DD_PARAMCLOVER
,
DD_PARAM3
,
DD_PARAM4
) {
617
618
#ifdef GPU_TWISTED_CLOVER_DIRAC
619
620
#endif
621
622
}
623
#undef CLOVER_TWIST_XPAY
624
#endif //(DD_XPAY==0) && (DD_TWIST==1)
625
//END DUMMY KERNEL
626
627
#endif
628
629
// clean up
630
631
#undef DD_NAME_F
632
#undef DD_RECON_F
633
#undef DD_DAG_F
634
#undef DD_XPAY_F
635
#undef DD_PARAM1
636
#undef DD_PARAM2
637
#undef DD_PARAMCLOVER
638
#undef DD_PARAM3
639
#undef DD_PARAM4
640
#undef DD_CONCAT
641
#undef DD_FUNC
642
643
#undef DSLASH_XPAY
644
646
#undef CLOVER_TWIST_INV_DSLASH
647
#undef READ_GAUGE_MATRIX
649
#undef RECONSTRUCT_GAUGE_MATRIX
650
#undef GAUGE0TEX
651
#undef GAUGE1TEX
652
#undef READ_SPINOR
653
#undef READ_SPINOR_UP
654
#undef READ_SPINOR_DOWN
655
#undef SPINORTEX
656
#undef READ_INTERMEDIATE_SPINOR
657
#undef INTERTEX
658
#undef READ_ACCUM
659
#undef ACCUMTEX
660
#undef WRITE_SPINOR
661
#undef GAUGE_FLOAT2
662
#undef SPINOR_DOUBLE
663
664
#undef READ_CLOVER
665
#undef ASSN_CLOVER
666
#undef TMCLOVERTEX
667
#undef TMCLOVERTEXNORM
668
#undef TM_INV_CLOVERTEX
669
#undef TM_INV_CLOVERTEXNORM
670
#undef CLOVER_DOUBLE
671
672
#undef SPINOR_HOP
673
674
#undef TPROJSCALE
675
676
// prepare next set of options, or clean up after final iteration
677
678
#if (DD_DAG==0)
679
#undef DD_DAG
680
#define DD_DAG 1
681
#else
682
#undef DD_DAG
683
#define DD_DAG 0
684
685
#if (DD_TWIST==0)
686
#undef DD_TWIST
687
#define DD_TWIST 1
688
#else
689
#undef DD_TWIST
690
#define DD_TWIST 0
691
692
#if (DD_XPAY==0)
693
#undef DD_XPAY
694
#define DD_XPAY 1
695
#else
696
#undef DD_XPAY
697
#define DD_XPAY 0
698
699
#if (DD_RECON==0)
700
#undef DD_RECON
701
#define DD_RECON 1
702
#elif (DD_RECON==1)
703
#undef DD_RECON
704
#define DD_RECON 2
705
#else
706
#undef DD_RECON
707
#define DD_RECON 0
708
709
#if (DD_PREC==0)
710
#undef DD_PREC
711
#define DD_PREC 1
712
#elif (DD_PREC==1)
713
#undef DD_PREC
714
#define DD_PREC 2
715
716
#else
717
718
#undef DD_LOOP
719
#undef DD_DAG
720
#undef DD_TWIST
721
#undef DD_XPAY
722
#undef DD_RECON
723
#undef DD_PREC
724
725
#endif // DD_PREC
726
#endif // DD_RECON
727
#endif // DD_XPAY
728
#endif // DD_TWIST
729
#endif // DD_DAG
730
731
#ifdef DD_LOOP
732
#include "
tmc_dslash_def.h
"
733
#endif
DD_DAG_F
#define DD_DAG_F
Definition:
tmc_dslash_def.h:45
tmc_dslash_def.h
DD_PARAMCLOVER
#define DD_PARAMCLOVER
Definition:
tmc_dslash_def.h:67
tmc_dslash_dagger_fermi_core.h
DD_RECON_F
#define DD_RECON_F
Definition:
tmc_dslash_def.h:138
DD_PARAM4
#define DD_PARAM4
Definition:
tmc_dslash_def.h:130
tmc_dslash_fermi_core.h
DD_FUNC
#define DD_FUNC(x)
Definition:
clover_def.h:141
tmc_fused_exterior_dslash_dagger_g80_core.h
tmc_dslash_dagger_g80_core.h
tmc_dslash_dagger_gt200_core.h
tmc_fused_exterior_dslash_fermi_core.h
EXTERIOR_KERNEL_ALL
Definition:
dslash_constants.h:3
tmc_dslash_gt200_core.h
tmc_dslash_g80_core.h
DD_PARAM1
#define DD_PARAM1
Definition:
tmc_dslash_def.h:250
tmc_fused_exterior_dslash_dagger_fermi_core.h
tmc_fused_exterior_dslash_g80_core.h
DD_PARAM2
#define DD_PARAM2
Definition:
tmc_dslash_def.h:141
tmc_fused_exterior_dslash_gt200_core.h
DD_XPAY_F
#define DD_XPAY_F
Definition:
tmc_dslash_def.h:52
DD_NAME_F
#define DD_NAME_F
Definition:
tmc_dslash_def.h:59
DD_PARAM3
#define DD_PARAM3
Definition:
tmc_dslash_def.h:251
tmc_fused_exterior_dslash_dagger_gt200_core.h
Generated on Wed Feb 4 2015 17:00:12 for QUDA by
1.8.6