QUDA
0.9.0
lib
dslash_core
wilson_pack_face_dagger_core.h
Go to the documentation of this file.
1
// input spinor
2
#ifdef SPINOR_DOUBLE
3
#define spinorFloat double
4
#define i00_re I0.x
5
#define i00_im I0.y
6
#define i01_re I1.x
7
#define i01_im I1.y
8
#define i02_re I2.x
9
#define i02_im I2.y
10
#define i10_re I3.x
11
#define i10_im I3.y
12
#define i11_re I4.x
13
#define i11_im I4.y
14
#define i12_re I5.x
15
#define i12_im I5.y
16
#define i20_re I6.x
17
#define i20_im I6.y
18
#define i21_re I7.x
19
#define i21_im I7.y
20
#define i22_re I8.x
21
#define i22_im I8.y
22
#define i30_re I9.x
23
#define i30_im I9.y
24
#define i31_re I10.x
25
#define i31_im I10.y
26
#define i32_re I11.x
27
#define i32_im I11.y
28
#else
29
#define spinorFloat float
30
#define i00_re I0.x
31
#define i00_im I0.y
32
#define i01_re I0.z
33
#define i01_im I0.w
34
#define i02_re I1.x
35
#define i02_im I1.y
36
#define i10_re I1.z
37
#define i10_im I1.w
38
#define i11_re I2.x
39
#define i11_im I2.y
40
#define i12_re I2.z
41
#define i12_im I2.w
42
#define i20_re I3.x
43
#define i20_im I3.y
44
#define i21_re I3.z
45
#define i21_im I3.w
46
#define i22_re I4.x
47
#define i22_im I4.y
48
#define i30_re I4.z
49
#define i30_im I4.w
50
#define i31_re I5.x
51
#define i31_im I5.y
52
#define i32_re I5.z
53
#define i32_im I5.w
54
#endif // SPINOR_DOUBLE
55
56
#include "
io_spinor.h
"
57
58
if
(
face_num
) {
59
60
switch
(
dim
) {
61
case
0:
62
{
63
// read spinor from device memory
64
READ_SPINOR
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
65
66
spinorFloat
a0_re
,
a0_im
;
67
spinorFloat
a1_re
,
a1_im
;
68
spinorFloat
a2_re
,
a2_im
;
69
spinorFloat
b0_re
,
b0_im
;
70
spinorFloat
b1_re
,
b1_im
;
71
spinorFloat
b2_re
,
b2_im
;
72
73
// project spinor into half spinors
74
a0_re
= +
i00_re
+
i30_im
;
75
a0_im
= +
i00_im
-
i30_re
;
76
a1_re
= +
i01_re
+
i31_im
;
77
a1_im
= +
i01_im
-
i31_re
;
78
a2_re
= +
i02_re
+
i32_im
;
79
a2_im
= +
i02_im
-
i32_re
;
80
b0_re
= +
i10_re
+
i20_im
;
81
b0_im
= +
i10_im
-
i20_re
;
82
b1_re
= +
i11_re
+
i21_im
;
83
b1_im
= +
i11_im
-
i21_re
;
84
b2_re
= +
i12_re
+
i22_im
;
85
b2_im
= +
i12_im
-
i22_re
;
86
87
// write half spinor back to device memory
88
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
89
}
90
break
;
91
case
1:
92
{
93
// read spinor from device memory
94
READ_SPINOR
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
95
96
spinorFloat
a0_re
,
a0_im
;
97
spinorFloat
a1_re
,
a1_im
;
98
spinorFloat
a2_re
,
a2_im
;
99
spinorFloat
b0_re
,
b0_im
;
100
spinorFloat
b1_re
,
b1_im
;
101
spinorFloat
b2_re
,
b2_im
;
102
103
// project spinor into half spinors
104
a0_re
= +
i00_re
-
i30_re
;
105
a0_im
= +
i00_im
-
i30_im
;
106
a1_re
= +
i01_re
-
i31_re
;
107
a1_im
= +
i01_im
-
i31_im
;
108
a2_re
= +
i02_re
-
i32_re
;
109
a2_im
= +
i02_im
-
i32_im
;
110
b0_re
= +
i10_re
+
i20_re
;
111
b0_im
= +
i10_im
+
i20_im
;
112
b1_re
= +
i11_re
+
i21_re
;
113
b1_im
= +
i11_im
+
i21_im
;
114
b2_re
= +
i12_re
+
i22_re
;
115
b2_im
= +
i12_im
+
i22_im
;
116
117
// write half spinor back to device memory
118
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
119
}
120
break
;
121
case
2:
122
{
123
// read spinor from device memory
124
READ_SPINOR
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
125
126
spinorFloat
a0_re
,
a0_im
;
127
spinorFloat
a1_re
,
a1_im
;
128
spinorFloat
a2_re
,
a2_im
;
129
spinorFloat
b0_re
,
b0_im
;
130
spinorFloat
b1_re
,
b1_im
;
131
spinorFloat
b2_re
,
b2_im
;
132
133
// project spinor into half spinors
134
a0_re
= +
i00_re
+
i20_im
;
135
a0_im
= +
i00_im
-
i20_re
;
136
a1_re
= +
i01_re
+
i21_im
;
137
a1_im
= +
i01_im
-
i21_re
;
138
a2_re
= +
i02_re
+
i22_im
;
139
a2_im
= +
i02_im
-
i22_re
;
140
b0_re
= +
i10_re
-
i30_im
;
141
b0_im
= +
i10_im
+
i30_re
;
142
b1_re
= +
i11_re
-
i31_im
;
143
b1_im
= +
i11_im
+
i31_re
;
144
b2_re
= +
i12_re
-
i32_im
;
145
b2_im
= +
i12_im
+
i32_re
;
146
147
// write half spinor back to device memory
148
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
149
}
150
break
;
151
case
3:
152
{
153
// read spinor from device memory
154
READ_SPINOR_DOWN
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
155
156
spinorFloat
a0_re
,
a0_im
;
157
spinorFloat
a1_re
,
a1_im
;
158
spinorFloat
a2_re
,
a2_im
;
159
spinorFloat
b0_re
,
b0_im
;
160
spinorFloat
b1_re
,
b1_im
;
161
spinorFloat
b2_re
,
b2_im
;
162
163
// project spinor into half spinors
164
a0_re
= +2*
i20_re
;
165
a0_im
= +2*
i20_im
;
166
a1_re
= +2*
i21_re
;
167
a1_im
= +2*
i21_im
;
168
a2_re
= +2*
i22_re
;
169
a2_im
= +2*
i22_im
;
170
b0_re
= +2*
i30_re
;
171
b0_im
= +2*
i30_im
;
172
b1_re
= +2*
i31_re
;
173
b1_im
= +2*
i31_im
;
174
b2_re
= +2*
i32_re
;
175
b2_im
= +2*
i32_im
;
176
177
// write half spinor back to device memory
178
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
179
}
180
break
;
181
}
182
183
}
else
{
184
185
switch
(
dim
) {
186
case
0:
187
{
188
// read spinor from device memory
189
READ_SPINOR
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
190
191
spinorFloat
a0_re
,
a0_im
;
192
spinorFloat
a1_re
,
a1_im
;
193
spinorFloat
a2_re
,
a2_im
;
194
spinorFloat
b0_re
,
b0_im
;
195
spinorFloat
b1_re
,
b1_im
;
196
spinorFloat
b2_re
,
b2_im
;
197
198
// project spinor into half spinors
199
a0_re
= +
i00_re
-
i30_im
;
200
a0_im
= +
i00_im
+
i30_re
;
201
a1_re
= +
i01_re
-
i31_im
;
202
a1_im
= +
i01_im
+
i31_re
;
203
a2_re
= +
i02_re
-
i32_im
;
204
a2_im
= +
i02_im
+
i32_re
;
205
b0_re
= +
i10_re
-
i20_im
;
206
b0_im
= +
i10_im
+
i20_re
;
207
b1_re
= +
i11_re
-
i21_im
;
208
b1_im
= +
i11_im
+
i21_re
;
209
b2_re
= +
i12_re
-
i22_im
;
210
b2_im
= +
i12_im
+
i22_re
;
211
212
// write half spinor back to device memory
213
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
214
}
215
break
;
216
case
1:
217
{
218
// read spinor from device memory
219
READ_SPINOR
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
220
221
spinorFloat
a0_re
,
a0_im
;
222
spinorFloat
a1_re
,
a1_im
;
223
spinorFloat
a2_re
,
a2_im
;
224
spinorFloat
b0_re
,
b0_im
;
225
spinorFloat
b1_re
,
b1_im
;
226
spinorFloat
b2_re
,
b2_im
;
227
228
// project spinor into half spinors
229
a0_re
= +
i00_re
+
i30_re
;
230
a0_im
= +
i00_im
+
i30_im
;
231
a1_re
= +
i01_re
+
i31_re
;
232
a1_im
= +
i01_im
+
i31_im
;
233
a2_re
= +
i02_re
+
i32_re
;
234
a2_im
= +
i02_im
+
i32_im
;
235
b0_re
= +
i10_re
-
i20_re
;
236
b0_im
= +
i10_im
-
i20_im
;
237
b1_re
= +
i11_re
-
i21_re
;
238
b1_im
= +
i11_im
-
i21_im
;
239
b2_re
= +
i12_re
-
i22_re
;
240
b2_im
= +
i12_im
-
i22_im
;
241
242
// write half spinor back to device memory
243
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
244
}
245
break
;
246
case
2:
247
{
248
// read spinor from device memory
249
READ_SPINOR
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
250
251
spinorFloat
a0_re
,
a0_im
;
252
spinorFloat
a1_re
,
a1_im
;
253
spinorFloat
a2_re
,
a2_im
;
254
spinorFloat
b0_re
,
b0_im
;
255
spinorFloat
b1_re
,
b1_im
;
256
spinorFloat
b2_re
,
b2_im
;
257
258
// project spinor into half spinors
259
a0_re
= +
i00_re
-
i20_im
;
260
a0_im
= +
i00_im
+
i20_re
;
261
a1_re
= +
i01_re
-
i21_im
;
262
a1_im
= +
i01_im
+
i21_re
;
263
a2_re
= +
i02_re
-
i22_im
;
264
a2_im
= +
i02_im
+
i22_re
;
265
b0_re
= +
i10_re
+
i30_im
;
266
b0_im
= +
i10_im
-
i30_re
;
267
b1_re
= +
i11_re
+
i31_im
;
268
b1_im
= +
i11_im
-
i31_re
;
269
b2_re
= +
i12_re
+
i32_im
;
270
b2_im
= +
i12_im
-
i32_re
;
271
272
// write half spinor back to device memory
273
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
274
}
275
break
;
276
case
3:
277
{
278
// read spinor from device memory
279
READ_SPINOR_UP
(
SPINORTEX
,
param
.sp_stride,
idx
,
idx
);
280
281
spinorFloat
a0_re
,
a0_im
;
282
spinorFloat
a1_re
,
a1_im
;
283
spinorFloat
a2_re
,
a2_im
;
284
spinorFloat
b0_re
,
b0_im
;
285
spinorFloat
b1_re
,
b1_im
;
286
spinorFloat
b2_re
,
b2_im
;
287
288
// project spinor into half spinors
289
a0_re
= +2*
i00_re
;
290
a0_im
= +2*
i00_im
;
291
a1_re
= +2*
i01_re
;
292
a1_im
= +2*
i01_im
;
293
a2_re
= +2*
i02_re
;
294
a2_im
= +2*
i02_im
;
295
b0_re
= +2*
i10_re
;
296
b0_im
= +2*
i10_im
;
297
b1_re
= +2*
i11_re
;
298
b1_im
= +2*
i11_im
;
299
b2_re
= +2*
i12_re
;
300
b2_im
= +2*
i12_im
;
301
302
// write half spinor back to device memory
303
WRITE_HALF_SPINOR(
face_volume
,
face_idx
);
304
}
305
break
;
306
}
307
308
}
309
310
// undefine to prevent warning when precision is changed
311
#undef spinorFloat
312
#undef SHARED_STRIDE
313
314
#undef i00_re
315
#undef i00_im
316
#undef i01_re
317
#undef i01_im
318
#undef i02_re
319
#undef i02_im
320
#undef i10_re
321
#undef i10_im
322
#undef i11_re
323
#undef i11_im
324
#undef i12_re
325
#undef i12_im
326
#undef i20_re
327
#undef i20_im
328
#undef i21_re
329
#undef i21_im
330
#undef i22_re
331
#undef i22_im
332
#undef i30_re
333
#undef i30_im
334
#undef i31_re
335
#undef i31_im
336
#undef i32_re
337
#undef i32_im
338
i32_re
#define i32_re
Definition:
wilson_pack_face_dagger_core.h:52
i12_im
#define i12_im
Definition:
wilson_pack_face_dagger_core.h:41
a1_re
spinorFloat a1_re
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:929
i00_im
#define i00_im
Definition:
wilson_pack_face_dagger_core.h:31
i22_im
#define i22_im
Definition:
wilson_pack_face_dagger_core.h:47
i20_im
#define i20_im
Definition:
wilson_pack_face_dagger_core.h:43
i00_re
#define i00_re
Definition:
wilson_pack_face_dagger_core.h:30
a2_im
spinorFloat a2_im
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:930
b1_im
spinorFloat b1_im
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:932
dim
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
Definition:
CMakeCUDACompilerId.cpp1.ii:15687
i20_re
#define i20_re
Definition:
wilson_pack_face_dagger_core.h:42
i22_re
#define i22_re
Definition:
wilson_pack_face_dagger_core.h:46
i12_re
#define i12_re
Definition:
wilson_pack_face_dagger_core.h:40
b2_im
spinorFloat b2_im
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:933
a0_im
spinorFloat a0_im
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:928
i01_re
#define i01_re
Definition:
wilson_pack_face_dagger_core.h:32
param
QudaGaugeParam param
Definition:
pack_test.cpp:17
b1_re
spinorFloat b1_re
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:932
spinorFloat
#define spinorFloat
Definition:
wilson_pack_face_dagger_core.h:29
i32_im
#define i32_im
Definition:
wilson_pack_face_dagger_core.h:53
SPINORTEX
#define SPINORTEX
Definition:
contract_core.h:891
i11_im
#define i11_im
Definition:
wilson_pack_face_dagger_core.h:39
READ_SPINOR
#define READ_SPINOR
Definition:
contract_core.h:886
a1_im
spinorFloat a1_im
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:929
io_spinor.h
b0_im
spinorFloat b0_im
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:931
i21_im
#define i21_im
Definition:
wilson_pack_face_dagger_core.h:45
b0_re
spinorFloat b0_re
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:931
i31_re
#define i31_re
Definition:
wilson_pack_face_dagger_core.h:50
a2_re
spinorFloat a2_re
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:930
i02_re
#define i02_re
Definition:
wilson_pack_face_dagger_core.h:34
idx
int idx
Definition:
staggered_fused_exterior_dslash_core.h:355
face_volume
const int face_volume
Definition:
tm_ndeg_fused_exterior_dslash_core.h:199
i30_im
#define i30_im
Definition:
wilson_pack_face_dagger_core.h:49
face_idx
int face_idx
Definition:
dw_dslash4_core.h:198
i10_re
#define i10_re
Definition:
wilson_pack_face_dagger_core.h:36
face_num
const int face_num
Definition:
dw_dslash4_core.h:223
b2_re
spinorFloat b2_re
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:933
i10_im
#define i10_im
Definition:
wilson_pack_face_dagger_core.h:37
i02_im
#define i02_im
Definition:
wilson_pack_face_dagger_core.h:35
i11_re
#define i11_re
Definition:
wilson_pack_face_dagger_core.h:38
i30_re
#define i30_re
Definition:
wilson_pack_face_dagger_core.h:48
a0_re
spinorFloat a0_re
Definition:
asym_wilson_clover_dslash_dagger_fermi_core.h:928
READ_SPINOR_UP
#define READ_SPINOR_UP
Definition:
dw_dslash4_def.h:177
READ_SPINOR_DOWN
#define READ_SPINOR_DOWN
Definition:
dw_dslash4_def.h:178
i01_im
#define i01_im
Definition:
wilson_pack_face_dagger_core.h:33
i21_re
#define i21_re
Definition:
wilson_pack_face_dagger_core.h:44
i31_im
#define i31_im
Definition:
wilson_pack_face_dagger_core.h:51
Generated by
1.8.14