QUDA
0.9.0
lib
dslash_core
wilson_pack_twisted_face_core.h
Go to the documentation of this file.
1
// input spinor
2
#ifdef SPINOR_DOUBLE
3
#define spinorFloat double
4
#define i00_re I0.x
5
#define i00_im I0.y
6
#define i01_re I1.x
7
#define i01_im I1.y
8
#define i02_re I2.x
9
#define i02_im I2.y
10
#define i10_re I3.x
11
#define i10_im I3.y
12
#define i11_re I4.x
13
#define i11_im I4.y
14
#define i12_re I5.x
15
#define i12_im I5.y
16
#define i20_re I6.x
17
#define i20_im I6.y
18
#define i21_re I7.x
19
#define i21_im I7.y
20
#define i22_re I8.x
21
#define i22_im I8.y
22
#define i30_re I9.x
23
#define i30_im I9.y
24
#define i31_re I10.x
25
#define i31_im I10.y
26
#define i32_re I11.x
27
#define i32_im I11.y
28
#else
29
#define spinorFloat float
30
#define i00_re I0.x
31
#define i00_im I0.y
32
#define i01_re I0.z
33
#define i01_im I0.w
34
#define i02_re I1.x
35
#define i02_im I1.y
36
#define i10_re I1.z
37
#define i10_im I1.w
38
#define i11_re I2.x
39
#define i11_im I2.y
40
#define i12_re I2.z
41
#define i12_im I2.w
42
#define i20_re I3.x
43
#define i20_im I3.y
44
#define i21_re I3.z
45
#define i21_im I3.w
46
#define i22_re I4.x
47
#define i22_im I4.y
48
#define i30_re I4.z
49
#define i30_im I4.w
50
#define i31_re I5.x
51
#define i31_im I5.y
52
#define i32_re I5.z
53
#define i32_im I5.w
54
#endif // SPINOR_DOUBLE
55
56
#include "
io_spinor.h
"
57
58
if
(
face_num
) {
59
60
switch
(
dim
) {
61
case
0:
62
{
63
// read spinor from device memory
64
#ifdef TWIST_INV_DSLASH
65
#ifdef SPINOR_DOUBLE
66
const
spinorFloat
a
=
param
.a;
67
const
spinorFloat
b
=
param
.b;
68
#else
69
const
spinorFloat
a
=
param
.a_f;
70
const
spinorFloat
b
=
param
.b_f;
71
#endif
72
#endif
73
READ_SPINOR
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
74
APPLY_TWIST_INV
(
a
,
b
,
i
);
75
76
spinorFloat
a0_re
,
a0_im
;
77
spinorFloat
a1_re
,
a1_im
;
78
spinorFloat
a2_re
,
a2_im
;
79
spinorFloat
b0_re
,
b0_im
;
80
spinorFloat
b1_re
,
b1_im
;
81
spinorFloat
b2_re
,
b2_im
;
82
83
// project spinor into half spinors
84
a0_re
= +
i00_re
-
i30_im
;
85
a0_im
= +
i00_im
+
i30_re
;
86
a1_re
= +
i01_re
-
i31_im
;
87
a1_im
= +
i01_im
+
i31_re
;
88
a2_re
= +
i02_re
-
i32_im
;
89
a2_im
= +
i02_im
+
i32_re
;
90
b0_re
= +
i10_re
-
i20_im
;
91
b0_im
= +
i10_im
+
i20_re
;
92
b1_re
= +
i11_re
-
i21_im
;
93
b1_im
= +
i11_im
+
i21_re
;
94
b2_re
= +
i12_re
-
i22_im
;
95
b2_im
= +
i12_im
+
i22_re
;
96
97
// write half spinor back to device memory
98
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
99
}
100
break
;
101
case
1:
102
{
103
// read spinor from device memory
104
#ifdef TWIST_INV_DSLASH
105
#ifdef SPINOR_DOUBLE
106
const
spinorFloat
a
=
param
.a;
107
const
spinorFloat
b
=
param
.b;
108
#else
109
const
spinorFloat
a
=
param
.a_f;
110
const
spinorFloat
b
=
param
.b_f;
111
#endif
112
#endif
113
READ_SPINOR
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
114
APPLY_TWIST_INV
(
a
,
b
,
i
);
115
116
spinorFloat
a0_re
,
a0_im
;
117
spinorFloat
a1_re
,
a1_im
;
118
spinorFloat
a2_re
,
a2_im
;
119
spinorFloat
b0_re
,
b0_im
;
120
spinorFloat
b1_re
,
b1_im
;
121
spinorFloat
b2_re
,
b2_im
;
122
123
// project spinor into half spinors
124
a0_re
= +
i00_re
+
i30_re
;
125
a0_im
= +
i00_im
+
i30_im
;
126
a1_re
= +
i01_re
+
i31_re
;
127
a1_im
= +
i01_im
+
i31_im
;
128
a2_re
= +
i02_re
+
i32_re
;
129
a2_im
= +
i02_im
+
i32_im
;
130
b0_re
= +
i10_re
-
i20_re
;
131
b0_im
= +
i10_im
-
i20_im
;
132
b1_re
= +
i11_re
-
i21_re
;
133
b1_im
= +
i11_im
-
i21_im
;
134
b2_re
= +
i12_re
-
i22_re
;
135
b2_im
= +
i12_im
-
i22_im
;
136
137
// write half spinor back to device memory
138
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
139
}
140
break
;
141
case
2:
142
{
143
// read spinor from device memory
144
#ifdef TWIST_INV_DSLASH
145
#ifdef SPINOR_DOUBLE
146
const
spinorFloat
a
=
param
.a;
147
const
spinorFloat
b
=
param
.b;
148
#else
149
const
spinorFloat
a
=
param
.a_f;
150
const
spinorFloat
b
=
param
.b_f;
151
#endif
152
#endif
153
READ_SPINOR
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
154
APPLY_TWIST_INV
(
a
,
b
,
i
);
155
156
spinorFloat
a0_re
,
a0_im
;
157
spinorFloat
a1_re
,
a1_im
;
158
spinorFloat
a2_re
,
a2_im
;
159
spinorFloat
b0_re
,
b0_im
;
160
spinorFloat
b1_re
,
b1_im
;
161
spinorFloat
b2_re
,
b2_im
;
162
163
// project spinor into half spinors
164
a0_re
= +
i00_re
-
i20_im
;
165
a0_im
= +
i00_im
+
i20_re
;
166
a1_re
= +
i01_re
-
i21_im
;
167
a1_im
= +
i01_im
+
i21_re
;
168
a2_re
= +
i02_re
-
i22_im
;
169
a2_im
= +
i02_im
+
i22_re
;
170
b0_re
= +
i10_re
+
i30_im
;
171
b0_im
= +
i10_im
-
i30_re
;
172
b1_re
= +
i11_re
+
i31_im
;
173
b1_im
= +
i11_im
-
i31_re
;
174
b2_re
= +
i12_re
+
i32_im
;
175
b2_im
= +
i12_im
-
i32_re
;
176
177
// write half spinor back to device memory
178
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
179
}
180
break
;
181
case
3:
182
{
183
// read spinor from device memory
184
#ifdef TWIST_INV_DSLASH
185
#ifdef SPINOR_DOUBLE
186
const
spinorFloat
a
=
param
.a;
187
const
spinorFloat
b
=
param
.b;
188
#else
189
const
spinorFloat
a
=
param
.a_f;
190
const
spinorFloat
b
=
param
.b_f;
191
#endif
192
#endif
193
READ_SPINOR
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
194
APPLY_TWIST_INV
(
a
,
b
,
i
);
195
196
spinorFloat
a0_re
,
a0_im
;
197
spinorFloat
a1_re
,
a1_im
;
198
spinorFloat
a2_re
,
a2_im
;
199
spinorFloat
b0_re
,
b0_im
;
200
spinorFloat
b1_re
,
b1_im
;
201
spinorFloat
b2_re
,
b2_im
;
202
203
// project spinor into half spinors
204
a0_re
= +2*
i00_re
;
205
a0_im
= +2*
i00_im
;
206
a1_re
= +2*
i01_re
;
207
a1_im
= +2*
i01_im
;
208
a2_re
= +2*
i02_re
;
209
a2_im
= +2*
i02_im
;
210
b0_re
= +2*
i10_re
;
211
b0_im
= +2*
i10_im
;
212
b1_re
= +2*
i11_re
;
213
b1_im
= +2*
i11_im
;
214
b2_re
= +2*
i12_re
;
215
b2_im
= +2*
i12_im
;
216
217
// write half spinor back to device memory
218
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
219
}
220
break
;
221
}
222
223
}
else
{
224
225
switch
(
dim
) {
226
case
0:
227
{
228
// read spinor from device memory
229
#ifdef TWIST_INV_DSLASH
230
#ifdef SPINOR_DOUBLE
231
const
spinorFloat
a
=
param
.a;
232
const
spinorFloat
b
=
param
.b;
233
#else
234
const
spinorFloat
a
=
param
.a_f;
235
const
spinorFloat
b
=
param
.b_f;
236
#endif
237
#endif
238
READ_SPINOR
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
239
APPLY_TWIST_INV
(
a
,
b
,
i
);
240
241
spinorFloat
a0_re
,
a0_im
;
242
spinorFloat
a1_re
,
a1_im
;
243
spinorFloat
a2_re
,
a2_im
;
244
spinorFloat
b0_re
,
b0_im
;
245
spinorFloat
b1_re
,
b1_im
;
246
spinorFloat
b2_re
,
b2_im
;
247
248
// project spinor into half spinors
249
a0_re
= +
i00_re
+
i30_im
;
250
a0_im
= +
i00_im
-
i30_re
;
251
a1_re
= +
i01_re
+
i31_im
;
252
a1_im
= +
i01_im
-
i31_re
;
253
a2_re
= +
i02_re
+
i32_im
;
254
a2_im
= +
i02_im
-
i32_re
;
255
b0_re
= +
i10_re
+
i20_im
;
256
b0_im
= +
i10_im
-
i20_re
;
257
b1_re
= +
i11_re
+
i21_im
;
258
b1_im
= +
i11_im
-
i21_re
;
259
b2_re
= +
i12_re
+
i22_im
;
260
b2_im
= +
i12_im
-
i22_re
;
261
262
// write half spinor back to device memory
263
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
264
}
265
break
;
266
case
1:
267
{
268
// read spinor from device memory
269
#ifdef TWIST_INV_DSLASH
270
#ifdef SPINOR_DOUBLE
271
const
spinorFloat
a
=
param
.a;
272
const
spinorFloat
b
=
param
.b;
273
#else
274
const
spinorFloat
a
=
param
.a_f;
275
const
spinorFloat
b
=
param
.b_f;
276
#endif
277
#endif
278
READ_SPINOR
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
279
APPLY_TWIST_INV
(
a
,
b
,
i
);
280
281
spinorFloat
a0_re
,
a0_im
;
282
spinorFloat
a1_re
,
a1_im
;
283
spinorFloat
a2_re
,
a2_im
;
284
spinorFloat
b0_re
,
b0_im
;
285
spinorFloat
b1_re
,
b1_im
;
286
spinorFloat
b2_re
,
b2_im
;
287
288
// project spinor into half spinors
289
a0_re
= +
i00_re
-
i30_re
;
290
a0_im
= +
i00_im
-
i30_im
;
291
a1_re
= +
i01_re
-
i31_re
;
292
a1_im
= +
i01_im
-
i31_im
;
293
a2_re
= +
i02_re
-
i32_re
;
294
a2_im
= +
i02_im
-
i32_im
;
295
b0_re
= +
i10_re
+
i20_re
;
296
b0_im
= +
i10_im
+
i20_im
;
297
b1_re
= +
i11_re
+
i21_re
;
298
b1_im
= +
i11_im
+
i21_im
;
299
b2_re
= +
i12_re
+
i22_re
;
300
b2_im
= +
i12_im
+
i22_im
;
301
302
// write half spinor back to device memory
303
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
304
}
305
break
;
306
case
2:
307
{
308
// read spinor from device memory
309
#ifdef TWIST_INV_DSLASH
310
#ifdef SPINOR_DOUBLE
311
const
spinorFloat
a
=
param
.a;
312
const
spinorFloat
b
=
param
.b;
313
#else
314
const
spinorFloat
a
=
param
.a_f;
315
const
spinorFloat
b
=
param
.b_f;
316
#endif
317
#endif
318
READ_SPINOR
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
319
APPLY_TWIST_INV
(
a
,
b
,
i
);
320
321
spinorFloat
a0_re
,
a0_im
;
322
spinorFloat
a1_re
,
a1_im
;
323
spinorFloat
a2_re
,
a2_im
;
324
spinorFloat
b0_re
,
b0_im
;
325
spinorFloat
b1_re
,
b1_im
;
326
spinorFloat
b2_re
,
b2_im
;
327
328
// project spinor into half spinors
329
a0_re
= +
i00_re
+
i20_im
;
330
a0_im
= +
i00_im
-
i20_re
;
331
a1_re
= +
i01_re
+
i21_im
;
332
a1_im
= +
i01_im
-
i21_re
;
333
a2_re
= +
i02_re
+
i22_im
;
334
a2_im
= +
i02_im
-
i22_re
;
335
b0_re
= +
i10_re
-
i30_im
;
336
b0_im
= +
i10_im
+
i30_re
;
337
b1_re
= +
i11_re
-
i31_im
;
338
b1_im
= +
i11_im
+
i31_re
;
339
b2_re
= +
i12_re
-
i32_im
;
340
b2_im
= +
i12_im
+
i32_re
;
341
342
// write half spinor back to device memory
343
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
344
}
345
break
;
346
case
3:
347
{
348
// read spinor from device memory
349
#ifdef TWIST_INV_DSLASH
350
#ifdef SPINOR_DOUBLE
351
const
spinorFloat
a
=
param
.a;
352
const
spinorFloat
b
=
param
.b;
353
#else
354
const
spinorFloat
a
=
param
.a_f;
355
const
spinorFloat
b
=
param
.b_f;
356
#endif
357
#endif
358
READ_SPINOR
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
359
APPLY_TWIST_INV
(
a
,
b
,
i
);
360
361
spinorFloat
a0_re
,
a0_im
;
362
spinorFloat
a1_re
,
a1_im
;
363
spinorFloat
a2_re
,
a2_im
;
364
spinorFloat
b0_re
,
b0_im
;
365
spinorFloat
b1_re
,
b1_im
;
366
spinorFloat
b2_re
,
b2_im
;
367
368
// project spinor into half spinors
369
a0_re
= +2*
i20_re
;
370
a0_im
= +2*
i20_im
;
371
a1_re
= +2*
i21_re
;
372
a1_im
= +2*
i21_im
;
373
a2_re
= +2*
i22_re
;
374
a2_im
= +2*
i22_im
;
375
b0_re
= +2*
i30_re
;
376
b0_im
= +2*
i30_im
;
377
b1_re
= +2*
i31_re
;
378
b1_im
= +2*
i31_im
;
379
b2_re
= +2*
i32_re
;
380
b2_im
= +2*
i32_im
;
381
382
// write half spinor back to device memory
383
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
384
}
385
break
;
386
}
387
388
}
389
390
// undefine to prevent warning when precision is changed
391
#undef spinorFloat
392
#undef SHARED_STRIDE
393
394
#undef i00_re
395
#undef i00_im
396
#undef i01_re
397
#undef i01_im
398
#undef i02_re
399
#undef i02_im
400
#undef i10_re
401
#undef i10_im
402
#undef i11_re
403
#undef i11_im
404
#undef i12_re
405
#undef i12_im
406
#undef i20_re
407
#undef i20_im
408
#undef i21_re
409
#undef i21_im
410
#undef i22_re
411
#undef i22_im
412
#undef i30_re
413
#undef i30_im
414
#undef i31_re
415
#undef i31_im
416
#undef i32_re
417
#undef i32_im
418
i00_re
#define i00_re
Definition:
wilson_pack_twisted_face_core.h:30
i31_re
#define i31_re
Definition:
wilson_pack_twisted_face_core.h:50
a1_re
spinorFloat a1_re
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:929
i21_im
#define i21_im
Definition:
wilson_pack_twisted_face_core.h:45
i31_im
#define i31_im
Definition:
wilson_pack_twisted_face_core.h:51
i10_im
#define i10_im
Definition:
wilson_pack_twisted_face_core.h:37
i00_im
#define i00_im
Definition:
wilson_pack_twisted_face_core.h:31
i01_re
#define i01_re
Definition:
wilson_pack_twisted_face_core.h:32
APPLY_TWIST_INV
#define APPLY_TWIST_INV(a, b, reg)
**************************only for deg tm:*******************************
Definition:
io_spinor.h:1314
a2_im
spinorFloat a2_im
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:930
b1_im
spinorFloat b1_im
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:932
dim
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
Definition:
CMakeCUDACompilerId.cpp1.ii:15687
i30_im
#define i30_im
Definition:
wilson_pack_twisted_face_core.h:49
i21_re
#define i21_re
Definition:
wilson_pack_twisted_face_core.h:44
i02_im
#define i02_im
Definition:
wilson_pack_twisted_face_core.h:35
i11_re
#define i11_re
Definition:
wilson_pack_twisted_face_core.h:38
i12_re
#define i12_re
Definition:
wilson_pack_twisted_face_core.h:40
b2_im
spinorFloat b2_im
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:933
a0_im
spinorFloat a0_im
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:928
param
QudaGaugeParam param
Definition:
pack_test.cpp:17
b
#define b
Definition:
dw_dslash4_core.h:83
i01_im
#define i01_im
Definition:
wilson_pack_twisted_face_core.h:33
i20_re
#define i20_re
Definition:
wilson_pack_twisted_face_core.h:42
b1_re
spinorFloat b1_re
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:932
i32_im
#define i32_im
Definition:
wilson_pack_twisted_face_core.h:53
SPINORTEX
#define SPINORTEX
Definition:
contract_core.h:891
fused_exterior_ndeg_tm_dslash_cuda_gen.i
int i
start here
Definition:
fused_exterior_ndeg_tm_dslash_cuda_gen.py:816
READ_SPINOR
#define READ_SPINOR
Definition:
contract_core.h:886
a1_im
spinorFloat a1_im
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:929
i32_re
#define i32_re
Definition:
wilson_pack_twisted_face_core.h:52
io_spinor.h
i20_im
#define i20_im
Definition:
wilson_pack_twisted_face_core.h:43
b0_im
spinorFloat b0_im
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:931
b0_re
spinorFloat b0_re
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:931
i12_im
#define i12_im
Definition:
wilson_pack_twisted_face_core.h:41
a2_re
spinorFloat a2_re
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:930
idx
int idx
Definition:
staggered_fused_exterior_dslash_core.h:355
face_volume
const int face_volume
Definition:
tm_ndeg_fused_exterior_dslash_core.h:199
i22_re
#define i22_re
Definition:
wilson_pack_twisted_face_core.h:46
i22_im
#define i22_im
Definition:
wilson_pack_twisted_face_core.h:47
face_idx
int face_idx
Definition:
dw_dslash4_core.h:198
face_num
const int face_num
Definition:
dw_dslash4_core.h:223
i10_re
#define i10_re
Definition:
wilson_pack_twisted_face_core.h:36
b2_re
spinorFloat b2_re
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:933
a
#define a
Definition:
dw_dslash4_core.h:82
a0_re
spinorFloat a0_re
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:928
spinorFloat
#define spinorFloat
Definition:
wilson_pack_twisted_face_core.h:29
i02_re
#define i02_re
Definition:
wilson_pack_twisted_face_core.h:34
i30_re
#define i30_re
Definition:
wilson_pack_twisted_face_core.h:48
i11_im
#define i11_im
Definition:
wilson_pack_twisted_face_core.h:39
Generated by
1.8.14