QUDA
v0.7.0
A library for QCD on GPUs
Main Page
Namespaces
Classes
Files
File List
File Members
•
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Pages
quda
lib
wilson_dslash_def.h
Go to the documentation of this file.
1
// wilson_dslash_def.h - Dslash kernel definitions
2
3
// There are currently 72 different variants of the Wilson Dslash
4
// kernel, each one characterized by a set of 5 options, where each
5
// option can take one of several values (2*3*2*2*3 = 72). This file
6
// is structured so that the C preprocessor loops through all 72
7
// variants (in a manner resembling a counter), sets the appropriate
8
// macros, and defines the corresponding functions.
9
//
10
// As an example of the function naming conventions, consider
11
//
12
// cloverDslash12DaggerXpayKernel(float4* out, ...).
13
//
14
// This is a clover Dslash^dagger kernel where the result is
15
// multiplied by "a" and summed with an input vector (Xpay), and the
16
// gauge matrix is reconstructed from 12 real numbers. More
17
// generally, each function name is given by the concatenation of the
18
// following 4 fields, with "Kernel" at the end:
19
//
20
// DD_NAME_F = dslash, cloverDslash
21
// DD_RECON_F = 8, 12, 18
22
// DD_DAG_F = Dagger, [blank]
23
// DD_XPAY_F = Xpay, [blank]
24
//
25
// In addition, the kernels are templated on the precision of the
26
// fields (double, single, or half).
27
28
// initialize on first iteration
29
30
#ifndef DD_LOOP
31
#define DD_LOOP
32
#define DD_DAG 0
33
#define DD_XPAY 0
34
#define DD_RECON 0
35
#define DD_PREC 0
36
#endif
37
38
// set options for current iteration
39
40
#if (DD_CLOVER==0) // no clover
41
#define DD_NAME_F dslash
42
#elif (DD_CLOVER==1) // clover
43
#define DSLASH_CLOVER
44
#define DD_NAME_F cloverDslash
45
#else
46
#define DSLASH_CLOVER
47
#define DSLASH_CLOVER_XPAY
48
#define DD_NAME_F asymCloverDslash
49
#endif
50
51
#if (DD_DAG==0) // no dagger
52
#define DD_DAG_F
53
#else // dagger
54
#define DD_DAG_F Dagger
55
#endif
56
57
// DSLASH_CLOVER_XPAY implies DD_XPAY=1
58
#if (DD_XPAY==0) && defined(DSLASH_CLOVER_XPAY)
59
#undef DD_XPAY
60
#define DD_XPAY 1
61
#endif
62
63
#if (DD_XPAY==0) // no xpay
64
#define DD_XPAY_F
65
#else // xpay
66
#define DD_XPAY_F Xpay
67
#define DSLASH_XPAY
68
#endif
69
70
#if (DD_PREC == 0)
71
#define DD_PARAM_XPAY const double2 *x, const float *xNorm, const double a,
72
#elif (DD_PREC == 1)
73
#define DD_PARAM_XPAY const float4 *x, const float *xNorm, const float a,
74
#else
75
#define DD_PARAM_XPAY const short4 *x, const float *xNorm, const float a,
76
#endif
77
78
#if (DD_RECON==0) // reconstruct from 8 reals
79
#define DD_RECON_F 8
80
81
#if (DD_PREC==0)
82
#define DD_PARAM_GAUGE const double2 *gauge0, const double2 *gauge1,
83
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_DOUBLE
84
#ifdef DIRECT_ACCESS_LINK
85
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2
86
#else
87
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_DOUBLE2_TEX
88
#endif // DIRECT_ACCESS_LINK
89
90
#elif (DD_PREC==1)
91
#define DD_PARAM_GAUGE const float4 *gauge0, const float4 *gauge1,
92
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
93
#ifdef DIRECT_ACCESS_LINK
94
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4
95
#else
96
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_FLOAT4_TEX
97
#endif // DIRECT_ACCESS_LINK
98
99
#else
100
#define DD_PARAM_GAUGE const short4 *gauge0, const short4* gauge1,
101
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_8_SINGLE
102
#ifdef DIRECT_ACCESS_LINK
103
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4
104
#else
105
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_8_SHORT4_TEX
106
#endif // DIRECT_ACCESS_LINK
107
#endif // DD_PREC
108
#elif (DD_RECON==1) // reconstruct from 12 reals
109
#define DD_RECON_F 12
110
111
#if (DD_PREC==0)
112
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_DOUBLE
113
#ifdef DIRECT_ACCESS_LINK
114
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2
115
#else
116
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_DOUBLE2_TEX
117
#endif // DIRECT_ACCESS_LINK
118
#define DD_PARAM_GAUGE const double2 *gauge0, const double2 *gauge1,
119
120
#elif (DD_PREC==1)
121
#define DD_PARAM_GAUGE const float4 *gauge0, const float4 *gauge1,
122
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
123
#ifdef DIRECT_ACCESS_LINK
124
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4
125
#else
126
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_FLOAT4_TEX
127
#endif // DIRECT_ACCESS_LINK
128
129
#else
130
#define DD_PARAM_GAUGE const short4 *gauge0, const short4 *gauge1,
131
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_12_SINGLE
132
#ifdef DIRECT_ACCESS_LINK
133
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4
134
#else
135
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_12_SHORT4_TEX
136
#endif // DIRECT_ACCESS_LINK
137
#endif // DD_PREC
138
#else // no reconstruct, load all components
139
#define DD_RECON_F 18
140
#define GAUGE_FLOAT2
141
#if (DD_PREC==0)
142
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_DOUBLE
143
#ifdef DIRECT_ACCESS_LINK
144
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2
145
#else
146
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_DOUBLE2_TEX
147
#endif // DIRECT_ACCESS_LINK
148
#define DD_PARAM_GAUGE const double2 *gauge0, const double2 *gauge1,
149
150
#elif (DD_PREC==1)
151
#define DD_PARAM_GAUGE const float4 *gauge0, const float4 *gauge1, // FIXME for direct reading, really float2
152
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
153
#ifdef DIRECT_ACCESS_LINK
154
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2
155
#else
156
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_FLOAT2_TEX
157
#endif // DIRECT_ACCESS_LINK
158
159
#else
160
#define DD_PARAM_GAUGE const short4 *gauge0, const short4 *gauge1, // FIXME for direct reading, really short2
161
#define RECONSTRUCT_GAUGE_MATRIX RECONSTRUCT_MATRIX_18_SINGLE
162
#ifdef DIRECT_ACCESS_LINK
163
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2
164
#else
165
#define READ_GAUGE_MATRIX READ_GAUGE_MATRIX_18_SHORT2_TEX
166
#endif //DIRECT_ACCESS_LINK
167
#endif
168
#endif
169
170
#if (DD_PREC==0) // double-precision fields
171
172
#define TPROJSCALE tProjScale
173
174
// double-precision gauge field
175
#if (defined DIRECT_ACCESS_LINK) || (defined FERMI_NO_DBLE_TEX)
176
#define GAUGE0TEX gauge0
177
#define GAUGE1TEX gauge1
178
#else
179
#ifdef USE_TEXTURE_OBJECTS
180
#define GAUGE0TEX param.gauge0Tex
181
#define GAUGE1TEX param.gauge1Tex
182
#else
183
#define GAUGE0TEX gauge0TexDouble2
184
#define GAUGE1TEX gauge1TexDouble2
185
#endif
186
#endif
187
188
#define GAUGE_FLOAT2
189
190
// double-precision spinor fields
191
#define DD_PARAM_OUT double2* out, float *null1,
192
#define DD_PARAM_IN const double2* in, const float *null4,
193
194
#if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX)
195
#define READ_SPINOR READ_SPINOR_DOUBLE
196
#define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP
197
#define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN
198
#define SPINORTEX in
199
#else
200
#define READ_SPINOR READ_SPINOR_DOUBLE_TEX
201
#define READ_SPINOR_UP READ_SPINOR_DOUBLE_UP_TEX
202
#define READ_SPINOR_DOWN READ_SPINOR_DOUBLE_DOWN_TEX
203
#ifdef USE_TEXTURE_OBJECTS
204
#define SPINORTEX param.inTex
205
#else
206
#define SPINORTEX spinorTexDouble
207
#endif // USE_TEXTURE_OBJECTS
208
#endif
209
#if (defined DIRECT_ACCESS_WILSON_INTER) || (defined FERMI_NO_DBLE_TEX)
210
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE
211
#define INTERTEX out
212
#else
213
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_DOUBLE_TEX
214
#ifdef USE_TEXTURE_OBJECTS
215
#define INTERTEX param.outTex
216
#else
217
#define INTERTEX interTexDouble
218
#endif
219
#endif
220
#define WRITE_SPINOR WRITE_SPINOR_DOUBLE2_STR
221
#define SPINOR_DOUBLE
222
#if (DD_XPAY==1)
223
#if (defined DIRECT_ACCESS_WILSON_ACCUM) || (defined FERMI_NO_DBLE_TEX)
224
#define ACCUMTEX x
225
#define READ_ACCUM READ_ACCUM_DOUBLE
226
#else
227
#ifdef USE_TEXTURE_OBJECTS
228
#define ACCUMTEX param.xTex
229
#else
230
#define ACCUMTEX accumTexDouble
231
#endif // USE_TEXTURE_OBJECTS
232
#define READ_ACCUM READ_ACCUM_DOUBLE_TEX
233
#endif
234
235
#endif
236
237
#define SPINOR_HOP 12
238
239
// double-precision clover field
240
#if (DD_CLOVER==0)
241
#define DD_PARAM_CLOVER
242
#else
243
#define DD_PARAM_CLOVER const double2 *clover, const float *null3,
244
#endif
245
#if (defined DIRECT_ACCESS_CLOVER) || (defined FERMI_NO_DBLE_TEX)
246
#define CLOVERTEX clover
247
#define READ_CLOVER READ_CLOVER_DOUBLE_STR
248
#else
249
#ifdef USE_TEXTURE_OBJECTS
250
#define CLOVERTEX (param.cloverTex)
251
#else
252
#define CLOVERTEX cloverTexDouble
253
#endif
254
#define READ_CLOVER READ_CLOVER_DOUBLE_TEX
255
#endif
256
#define CLOVER_DOUBLE
257
258
#elif (DD_PREC==1) // single-precision fields
259
260
#define TPROJSCALE tProjScale_f
261
262
// single-precision gauge field
263
#ifdef DIRECT_ACCESS_LINK
264
#define GAUGE0TEX gauge0
265
#define GAUGE1TEX gauge1
266
#else
267
#ifdef USE_TEXTURE_OBJECTS
268
#define GAUGE0TEX param.gauge0Tex
269
#define GAUGE1TEX param.gauge1Tex
270
#else
271
#if (DD_RECON_F == 18)
272
#define GAUGE0TEX gauge0TexSingle2
273
#define GAUGE1TEX gauge1TexSingle2
274
#else
275
#define GAUGE0TEX gauge0TexSingle4
276
#define GAUGE1TEX gauge1TexSingle4
277
#endif
278
#endif // USE_TEXTURE_OBJECTS
279
#endif
280
281
282
// single-precision spinor fields
283
#define DD_PARAM_OUT float4* out, float *null1,
284
#define DD_PARAM_IN const float4* in, const float *null4,
285
#ifdef DIRECT_ACCESS_WILSON_SPINOR
286
#define READ_SPINOR READ_SPINOR_SINGLE
287
#define READ_SPINOR_UP READ_SPINOR_SINGLE_UP
288
#define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN
289
#define SPINORTEX in
290
#else
291
#define READ_SPINOR READ_SPINOR_SINGLE_TEX
292
#define READ_SPINOR_UP READ_SPINOR_SINGLE_UP_TEX
293
#define READ_SPINOR_DOWN READ_SPINOR_SINGLE_DOWN_TEX
294
#ifdef USE_TEXTURE_OBJECTS
295
#define SPINORTEX param.inTex
296
#else
297
#define SPINORTEX spinorTexSingle
298
#endif // USE_TEXTURE_OBJECTS
299
#endif
300
#ifdef DIRECT_ACCESS_WILSON_INTER
301
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE
302
#define INTERTEX out
303
#else
304
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_SINGLE_TEX
305
#ifdef USE_TEXTURE_OBJECTS
306
#define INTERTEX param.outTex
307
#else
308
#define INTERTEX interTexSingle
309
#endif // USE_TEXTURE_OBJECTS
310
#endif
311
#define WRITE_SPINOR WRITE_SPINOR_FLOAT4_STR
312
#if (DD_XPAY==1)
313
#ifdef DIRECT_ACCESS_WILSON_ACCUM
314
#define ACCUMTEX x
315
#define READ_ACCUM READ_ACCUM_SINGLE
316
#else
317
#ifdef USE_TEXTURE_OBJECTS
318
#define ACCUMTEX param.xTex
319
#else
320
#define ACCUMTEX accumTexSingle
321
#endif // USE_TEXTURE_OBJECTS
322
#define READ_ACCUM READ_ACCUM_SINGLE_TEX
323
#endif
324
#endif
325
326
#define SPINOR_HOP 6
327
328
// single-precision clover field
329
#if (DD_CLOVER==0)
330
#define DD_PARAM_CLOVER
331
#else
332
#define DD_PARAM_CLOVER const float4 *clover, const float *null3,
333
#endif
334
#ifdef DIRECT_ACCESS_CLOVER
335
#define CLOVERTEX clover
336
#define READ_CLOVER READ_CLOVER_SINGLE
337
#else
338
#ifdef USE_TEXTURE_OBJECTS
339
#define CLOVERTEX (param.cloverTex)
340
#else
341
#define CLOVERTEX cloverTexSingle
342
#endif
343
#define READ_CLOVER READ_CLOVER_SINGLE_TEX
344
#endif
345
346
#else // half-precision fields
347
348
#define TPROJSCALE tProjScale_f
349
350
// half-precision gauge field
351
#ifdef DIRECT_ACCESS_LINK
352
#define GAUGE0TEX gauge0
353
#define GAUGE1TEX gauge1
354
#else
355
#ifdef USE_TEXTURE_OBJECTS
356
#define GAUGE0TEX param.gauge0Tex
357
#define GAUGE1TEX param.gauge1Tex
358
#else
359
#if (DD_RECON_F == 18)
360
#define GAUGE0TEX gauge0TexHalf2
361
#define GAUGE1TEX gauge1TexHalf2
362
#else
363
#define GAUGE0TEX gauge0TexHalf4
364
#define GAUGE1TEX gauge1TexHalf4
365
#endif
366
#endif // USE_TEXTURE_OBJECTS
367
#endif
368
369
370
// half-precision spinor fields
371
#ifdef DIRECT_ACCESS_WILSON_SPINOR
372
#define READ_SPINOR READ_SPINOR_HALF
373
#define READ_SPINOR_UP READ_SPINOR_HALF_UP
374
#define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN
375
#define SPINORTEX in
376
#else
377
#define READ_SPINOR READ_SPINOR_HALF_TEX
378
#define READ_SPINOR_UP READ_SPINOR_HALF_UP_TEX
379
#define READ_SPINOR_DOWN READ_SPINOR_HALF_DOWN_TEX
380
#ifdef USE_TEXTURE_OBJECTS
381
#define SPINORTEX param.inTex
382
#else
383
#define SPINORTEX spinorTexHalf
384
#endif // USE_TEXTURE_OBJECTS
385
#endif
386
#ifdef DIRECT_ACCESS_WILSON_INTER
387
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF
388
#define INTERTEX out
389
#else
390
#define READ_INTERMEDIATE_SPINOR READ_SPINOR_HALF_TEX
391
#ifdef USE_TEXTURE_OBJECTS
392
#define INTERTEX param.outTex
393
#else
394
#define INTERTEX interTexHalf
395
#endif // USE_TEXTURE_OBJECTS
396
#endif
397
#define DD_PARAM_OUT short4* out, float *outNorm,
398
#define DD_PARAM_IN const short4* in, const float *inNorm,
399
#define WRITE_SPINOR WRITE_SPINOR_SHORT4_STR
400
#if (DD_XPAY==1)
401
#ifdef DIRECT_ACCESS_WILSON_ACCUM
402
#define ACCUMTEX x
403
#define READ_ACCUM READ_ACCUM_HALF
404
#else
405
#ifdef USE_TEXTURE_OBJECTS
406
#define ACCUMTEX param.xTex
407
#else
408
#define ACCUMTEX accumTexHalf
409
#endif // USE_TEXTURE_OBJECTS
410
#define READ_ACCUM READ_ACCUM_HALF_TEX
411
#endif
412
#endif
413
414
#define SPINOR_HOP 6
415
416
// half-precision clover field
417
#if (DD_CLOVER==0)
418
#define DD_PARAM_CLOVER
419
#else
420
#define DD_PARAM_CLOVER const short4 *clover, const float *cloverNorm,
421
#endif
422
#ifdef DIRECT_ACCESS_CLOVER
423
#define CLOVERTEX clover
424
#define READ_CLOVER READ_CLOVER_HALF
425
#else
426
#ifdef USE_TEXTURE_OBJECTS
427
#define CLOVERTEX (param.cloverTex)
428
#define CLOVERTEXNORM (param.cloverNormTex)
429
#else
430
#define CLOVERTEX cloverTexHalf
431
#define CLOVERTEXNORM cloverTexNorm
432
#endif
433
#define READ_CLOVER READ_CLOVER_HALF_TEX
434
#endif
435
436
#endif
437
438
// only build double precision if supported
439
#if !(__COMPUTE_CAPABILITY__ < 130 && DD_PREC == 0)
440
441
#define DD_CONCAT(n,r,d,x) n ## r ## d ## x ## Kernel
442
#define DD_FUNC(n,r,d,x) DD_CONCAT(n,r,d,x)
443
444
// define the kernel
445
446
template
<KernelType kernel_type>
447
__global__
void
DD_FUNC
(
DD_NAME_F
,
DD_RECON_F
,
DD_DAG_F
,
DD_XPAY_F
)
448
(
DD_PARAM_OUT
DD_PARAM_GAUGE
DD_PARAM_CLOVER
DD_PARAM_IN
DD_PARAM_XPAY
const
DslashParam
param
) {
449
450
// build Wilson or clover as appropriate
451
#if ((DD_CLOVER==0 && defined(GPU_WILSON_DIRAC)) || ((DD_CLOVER==1 || DD_CLOVER==2) && defined(GPU_CLOVER_DIRAC)))
452
453
#if (__COMPUTE_CAPABILITY__ >= 200 && defined(SHARED_WILSON_DSLASH)) // Fermi optimal code
454
455
#ifdef DSLASH_CLOVER_XPAY
456
457
#if DD_DAG
458
#include "
asym_wilson_clover_dslash_dagger_fermi_core.h
"
459
#else
460
#include "
asym_wilson_clover_dslash_fermi_core.h
"
461
#endif
462
463
#else
464
465
#if DD_DAG
466
#include "
wilson_dslash_dagger_fermi_core.h
"
467
#else
468
#include "
wilson_dslash_fermi_core.h
"
469
#endif
470
471
#endif
472
473
#elif (__COMPUTE_CAPABILITY__ >= 120) // GT200 optimal code
474
475
#ifdef DSLASH_CLOVER_XPAY
476
477
#if DD_DAG
478
#include "
asym_wilson_clover_dslash_dagger_gt200_core.h
"
479
#else
480
#include "
asym_wilson_clover_dslash_gt200_core.h
"
481
#endif
482
483
#else
484
485
#if DD_DAG
486
#include "
wilson_dslash_dagger_gt200_core.h
"
487
#else
488
#include "
wilson_dslash_gt200_core.h
"
489
#endif
490
491
#endif
492
493
#else // fall-back is original G80
494
495
#ifdef DSLASH_CLOVER_XPAY
496
497
#if DD_DAG
498
#include "
asym_wilson_clover_dslash_dagger_g80_core.h
"
499
#else
500
#include "
asym_wilson_clover_dslash_g80_core.h
"
501
#endif
502
503
#else
504
505
#if DD_DAG
506
#include "
wilson_dslash_dagger_g80_core.h
"
507
#else
508
#include "
wilson_dslash_g80_core.h
"
509
#endif
510
511
#endif // DSLASH_CLOVER_XPAY
512
513
514
#endif // __COMPUTE_CAPABILITY
515
516
517
#endif // DD_CLOVER
518
519
}
520
521
522
template
<>
523
__global__
void
DD_FUNC
(
DD_NAME_F
,
DD_RECON_F
,
DD_DAG_F
,
DD_XPAY_F
)<
EXTERIOR_KERNEL_ALL
>
524
(
DD_PARAM_OUT
DD_PARAM_GAUGE
DD_PARAM_CLOVER
DD_PARAM_IN
DD_PARAM_XPAY
const
DslashParam
param
) {
525
526
// build Wilson or clover as appropriate
527
#if ((DD_CLOVER==0 && defined(GPU_WILSON_DIRAC)) || ((DD_CLOVER==1 || DD_CLOVER==2) && defined(GPU_CLOVER_DIRAC)))
528
529
#if (__COMPUTE_CAPABILITY__ >= 200 && defined(SHARED_WILSON_DSLASH)) // Fermi optimal code
530
531
#ifdef DSLASH_CLOVER_XPAY
532
533
#if DD_DAG
534
#include "
asym_wilson_clover_fused_exterior_dslash_dagger_fermi_core.h
"
535
#else
536
#include "
asym_wilson_clover_fused_exterior_dslash_fermi_core.h
"
537
#endif
538
539
#else
540
541
#if DD_DAG
542
#include "
wilson_fused_exterior_dslash_dagger_fermi_core.h
"
543
#else
544
#include "
wilson_fused_exterior_dslash_fermi_core.h
"
545
#endif
546
547
#endif
548
549
#elif (__COMPUTE_CAPABILITY__ >= 120) // GT200 optimal code
550
551
#ifdef DSLASH_CLOVER_XPAY
552
553
#if DD_DAG
554
#include "
asym_wilson_clover_fused_exterior_dslash_dagger_gt200_core.h
"
555
#else
556
#include "
asym_wilson_clover_fused_exterior_dslash_gt200_core.h
"
557
#endif
558
559
#else
560
561
#if DD_DAG
562
#include "
wilson_fused_exterior_dslash_dagger_gt200_core.h
"
563
#else
564
#include "
wilson_fused_exterior_dslash_gt200_core.h
"
565
#endif
566
567
#endif
568
569
#else // fall-back is original G80
570
571
#ifdef DSLASH_CLOVER_XPAY
572
573
#if DD_DAG
574
#include "
asym_wilson_clover_fused_exterior_dslash_dagger_g80_core.h
"
575
#else
576
#include "
asym_wilson_clover_fused_exterior_dslash_g80_core.h
"
577
#endif
578
579
#else
580
581
#if DD_DAG
582
#include "
wilson_fused_exterior_dslash_dagger_g80_core.h
"
583
#else
584
#include "
wilson_fused_exterior_dslash_g80_core.h
"
585
#endif
586
587
#endif // DSLASH_CLOVER_XPAY
588
589
590
#endif // __COMPUTE_CAPABILITY
591
592
593
#endif // DD_CLOVER
594
595
}
596
#endif
597
598
// clean up
599
600
#undef DD_NAME_F
601
#undef DD_RECON_F
602
#undef DD_DAG_F
603
#undef DD_XPAY_F
604
#undef DD_PARAM_OUT
605
#undef DD_PARAM_GAUGE
606
#undef DD_PARAM_CLOVER
607
#undef DD_PARAM_IN
608
#undef DD_PARAM_XPAY
609
#undef DD_CONCAT
610
#undef DD_FUNC
611
612
#undef DSLASH_XPAY
613
#undef READ_GAUGE_MATRIX
614
#undef RECONSTRUCT_GAUGE_MATRIX
615
#undef GAUGE0TEX
616
#undef GAUGE1TEX
617
#undef READ_SPINOR
618
#undef READ_SPINOR_UP
619
#undef READ_SPINOR_DOWN
620
#undef SPINORTEX
621
#undef READ_INTERMEDIATE_SPINOR
622
#undef INTERTEX
623
#undef WRITE_SPINOR
624
#undef READ_ACCUM
625
#undef ACCUMTEX
626
#undef READ_CLOVER
627
#undef CLOVERTEX
628
#undef DSLASH_CLOVER
629
#undef DSLASH_CLOVER_XPAY
630
#undef GAUGE_FLOAT2
631
#undef SPINOR_DOUBLE
632
#undef CLOVER_DOUBLE
633
#undef SPINOR_HOP
634
635
#undef TPROJSCALE
636
637
// prepare next set of options, or clean up after final iteration
638
639
#if (DD_DAG==0)
640
#undef DD_DAG
641
#define DD_DAG 1
642
#else
643
#undef DD_DAG
644
#define DD_DAG 0
645
646
#if (DD_XPAY==0)
647
#undef DD_XPAY
648
#define DD_XPAY 1
649
#else
650
#undef DD_XPAY
651
#define DD_XPAY 0
652
653
#if (DD_RECON==0)
654
#undef DD_RECON
655
#define DD_RECON 1
656
#elif (DD_RECON==1)
657
#undef DD_RECON
658
#define DD_RECON 2
659
#else
660
#undef DD_RECON
661
#define DD_RECON 0
662
663
#if (DD_PREC==0)
664
#undef DD_PREC
665
#define DD_PREC 1
666
#elif (DD_PREC==1)
667
#undef DD_PREC
668
#define DD_PREC 2
669
#else
670
671
#undef DD_LOOP
672
#undef DD_DAG
673
#undef DD_XPAY
674
#undef DD_RECON
675
#undef DD_PREC
676
677
#endif // DD_PREC
678
#endif // DD_RECON
679
#endif // DD_XPAY
680
#endif // DD_DAG
681
682
#ifdef DD_LOOP
683
#include "
wilson_dslash_def.h
"
684
#endif
DD_RECON_F
#define DD_RECON_F
Definition:
wilson_dslash_def.h:79
asym_wilson_clover_fused_exterior_dslash_dagger_g80_core.h
DD_PARAM_XPAY
#define DD_PARAM_XPAY
Definition:
wilson_dslash_def.h:71
asym_wilson_clover_dslash_dagger_g80_core.h
wilson_dslash_g80_core.h
asym_wilson_clover_fused_exterior_dslash_dagger_gt200_core.h
wilson_dslash_dagger_g80_core.h
DD_XPAY_F
#define DD_XPAY_F
Definition:
wilson_dslash_def.h:64
wilson_fused_exterior_dslash_gt200_core.h
asym_wilson_clover_fused_exterior_dslash_g80_core.h
wilson_fused_exterior_dslash_dagger_gt200_core.h
DD_PARAM_CLOVER
#define DD_PARAM_CLOVER
Definition:
wilson_dslash_def.h:241
DD_FUNC
#define DD_FUNC(x)
Definition:
clover_def.h:141
DD_DAG_F
#define DD_DAG_F
Definition:
wilson_dslash_def.h:52
wilson_dslash_gt200_core.h
wilson_dslash_dagger_fermi_core.h
param
QudaGaugeParam param
Definition:
pack_test.cpp:17
wilson_dslash_dagger_gt200_core.h
asym_wilson_clover_fused_exterior_dslash_dagger_fermi_core.h
wilson_fused_exterior_dslash_dagger_fermi_core.h
wilson_dslash_fermi_core.h
asym_wilson_clover_dslash_gt200_core.h
DD_PARAM_GAUGE
#define DD_PARAM_GAUGE
Definition:
wilson_dslash_def.h:82
asym_wilson_clover_dslash_dagger_gt200_core.h
wilson_dslash_def.h
asym_wilson_clover_dslash_g80_core.h
EXTERIOR_KERNEL_ALL
Definition:
dslash_constants.h:3
DD_PARAM_OUT
#define DD_PARAM_OUT
Definition:
wilson_dslash_def.h:191
wilson_fused_exterior_dslash_dagger_g80_core.h
wilson_fused_exterior_dslash_fermi_core.h
asym_wilson_clover_dslash_fermi_core.h
DD_NAME_F
#define DD_NAME_F
Definition:
wilson_dslash_def.h:41
asym_wilson_clover_dslash_dagger_fermi_core.h
asym_wilson_clover_fused_exterior_dslash_fermi_core.h
wilson_fused_exterior_dslash_g80_core.h
DD_PARAM_IN
#define DD_PARAM_IN
Definition:
wilson_dslash_def.h:192
DslashParam
Definition:
dslash_constants.h:10
asym_wilson_clover_fused_exterior_dslash_gt200_core.h
Generated on Wed Feb 4 2015 17:00:12 for QUDA by
1.8.6