QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
wilson_pack_face_dagger_core.h
Go to the documentation of this file.
1 // input spinor
2 #ifdef SPINOR_DOUBLE
3 #define spinorFloat double
4 #define i00_re I0.x
5 #define i00_im I0.y
6 #define i01_re I1.x
7 #define i01_im I1.y
8 #define i02_re I2.x
9 #define i02_im I2.y
10 #define i10_re I3.x
11 #define i10_im I3.y
12 #define i11_re I4.x
13 #define i11_im I4.y
14 #define i12_re I5.x
15 #define i12_im I5.y
16 #define i20_re I6.x
17 #define i20_im I6.y
18 #define i21_re I7.x
19 #define i21_im I7.y
20 #define i22_re I8.x
21 #define i22_im I8.y
22 #define i30_re I9.x
23 #define i30_im I9.y
24 #define i31_re I10.x
25 #define i31_im I10.y
26 #define i32_re I11.x
27 #define i32_im I11.y
28 #else
29 #define spinorFloat float
30 #define i00_re I0.x
31 #define i00_im I0.y
32 #define i01_re I0.z
33 #define i01_im I0.w
34 #define i02_re I1.x
35 #define i02_im I1.y
36 #define i10_re I1.z
37 #define i10_im I1.w
38 #define i11_re I2.x
39 #define i11_im I2.y
40 #define i12_re I2.z
41 #define i12_im I2.w
42 #define i20_re I3.x
43 #define i20_im I3.y
44 #define i21_re I3.z
45 #define i21_im I3.w
46 #define i22_re I4.x
47 #define i22_im I4.y
48 #define i30_re I4.z
49 #define i30_im I4.w
50 #define i31_re I5.x
51 #define i31_im I5.y
52 #define i32_re I5.z
53 #define i32_im I5.w
54 #endif // SPINOR_DOUBLE
55 
56 #include "io_spinor.h"
57 
58 if (face_num) {
59 
60  switch(dim) {
61  case 0:
62  {
63  // read spinor from device memory
64  READ_SPINOR(SPINORTEX, param.sp_stride, idx, idx);
65 
72 
73  // project spinor into half spinors
74  a0_re = +i00_re+i30_im;
75  a0_im = +i00_im-i30_re;
76  a1_re = +i01_re+i31_im;
77  a1_im = +i01_im-i31_re;
78  a2_re = +i02_re+i32_im;
79  a2_im = +i02_im-i32_re;
80  b0_re = +i10_re+i20_im;
81  b0_im = +i10_im-i20_re;
82  b1_re = +i11_re+i21_im;
83  b1_im = +i11_im-i21_re;
84  b2_re = +i12_re+i22_im;
85  b2_im = +i12_im-i22_re;
86 
87  // write half spinor back to device memory
88  WRITE_HALF_SPINOR(face_volume, face_idx);
89  }
90  break;
91  case 1:
92  {
93  // read spinor from device memory
94  READ_SPINOR(SPINORTEX, param.sp_stride, idx, idx);
95 
102 
103  // project spinor into half spinors
104  a0_re = +i00_re-i30_re;
105  a0_im = +i00_im-i30_im;
106  a1_re = +i01_re-i31_re;
107  a1_im = +i01_im-i31_im;
108  a2_re = +i02_re-i32_re;
109  a2_im = +i02_im-i32_im;
110  b0_re = +i10_re+i20_re;
111  b0_im = +i10_im+i20_im;
112  b1_re = +i11_re+i21_re;
113  b1_im = +i11_im+i21_im;
114  b2_re = +i12_re+i22_re;
115  b2_im = +i12_im+i22_im;
116 
117  // write half spinor back to device memory
118  WRITE_HALF_SPINOR(face_volume, face_idx);
119  }
120  break;
121  case 2:
122  {
123  // read spinor from device memory
124  READ_SPINOR(SPINORTEX, param.sp_stride, idx, idx);
125 
132 
133  // project spinor into half spinors
134  a0_re = +i00_re+i20_im;
135  a0_im = +i00_im-i20_re;
136  a1_re = +i01_re+i21_im;
137  a1_im = +i01_im-i21_re;
138  a2_re = +i02_re+i22_im;
139  a2_im = +i02_im-i22_re;
140  b0_re = +i10_re-i30_im;
141  b0_im = +i10_im+i30_re;
142  b1_re = +i11_re-i31_im;
143  b1_im = +i11_im+i31_re;
144  b2_re = +i12_re-i32_im;
145  b2_im = +i12_im+i32_re;
146 
147  // write half spinor back to device memory
148  WRITE_HALF_SPINOR(face_volume, face_idx);
149  }
150  break;
151  case 3:
152  {
153  // read spinor from device memory
154  READ_SPINOR_DOWN(SPINORTEX, param.sp_stride, idx, idx);
155 
162 
163  // project spinor into half spinors
164  a0_re = +2*i20_re;
165  a0_im = +2*i20_im;
166  a1_re = +2*i21_re;
167  a1_im = +2*i21_im;
168  a2_re = +2*i22_re;
169  a2_im = +2*i22_im;
170  b0_re = +2*i30_re;
171  b0_im = +2*i30_im;
172  b1_re = +2*i31_re;
173  b1_im = +2*i31_im;
174  b2_re = +2*i32_re;
175  b2_im = +2*i32_im;
176 
177  // write half spinor back to device memory
178  WRITE_HALF_SPINOR(face_volume, face_idx);
179  }
180  break;
181  }
182 
183 } else {
184 
185  switch(dim) {
186  case 0:
187  {
188  // read spinor from device memory
189  READ_SPINOR(SPINORTEX, param.sp_stride, idx, idx);
190 
197 
198  // project spinor into half spinors
199  a0_re = +i00_re-i30_im;
200  a0_im = +i00_im+i30_re;
201  a1_re = +i01_re-i31_im;
202  a1_im = +i01_im+i31_re;
203  a2_re = +i02_re-i32_im;
204  a2_im = +i02_im+i32_re;
205  b0_re = +i10_re-i20_im;
206  b0_im = +i10_im+i20_re;
207  b1_re = +i11_re-i21_im;
208  b1_im = +i11_im+i21_re;
209  b2_re = +i12_re-i22_im;
210  b2_im = +i12_im+i22_re;
211 
212  // write half spinor back to device memory
213  WRITE_HALF_SPINOR(face_volume, face_idx);
214  }
215  break;
216  case 1:
217  {
218  // read spinor from device memory
219  READ_SPINOR(SPINORTEX, param.sp_stride, idx, idx);
220 
227 
228  // project spinor into half spinors
229  a0_re = +i00_re+i30_re;
230  a0_im = +i00_im+i30_im;
231  a1_re = +i01_re+i31_re;
232  a1_im = +i01_im+i31_im;
233  a2_re = +i02_re+i32_re;
234  a2_im = +i02_im+i32_im;
235  b0_re = +i10_re-i20_re;
236  b0_im = +i10_im-i20_im;
237  b1_re = +i11_re-i21_re;
238  b1_im = +i11_im-i21_im;
239  b2_re = +i12_re-i22_re;
240  b2_im = +i12_im-i22_im;
241 
242  // write half spinor back to device memory
243  WRITE_HALF_SPINOR(face_volume, face_idx);
244  }
245  break;
246  case 2:
247  {
248  // read spinor from device memory
249  READ_SPINOR(SPINORTEX, param.sp_stride, idx, idx);
250 
257 
258  // project spinor into half spinors
259  a0_re = +i00_re-i20_im;
260  a0_im = +i00_im+i20_re;
261  a1_re = +i01_re-i21_im;
262  a1_im = +i01_im+i21_re;
263  a2_re = +i02_re-i22_im;
264  a2_im = +i02_im+i22_re;
265  b0_re = +i10_re+i30_im;
266  b0_im = +i10_im-i30_re;
267  b1_re = +i11_re+i31_im;
268  b1_im = +i11_im-i31_re;
269  b2_re = +i12_re+i32_im;
270  b2_im = +i12_im-i32_re;
271 
272  // write half spinor back to device memory
273  WRITE_HALF_SPINOR(face_volume, face_idx);
274  }
275  break;
276  case 3:
277  {
278  // read spinor from device memory
279  READ_SPINOR_UP(SPINORTEX, param.sp_stride, idx, idx);
280 
287 
288  // project spinor into half spinors
289  a0_re = +2*i00_re;
290  a0_im = +2*i00_im;
291  a1_re = +2*i01_re;
292  a1_im = +2*i01_im;
293  a2_re = +2*i02_re;
294  a2_im = +2*i02_im;
295  b0_re = +2*i10_re;
296  b0_im = +2*i10_im;
297  b1_re = +2*i11_re;
298  b1_im = +2*i11_im;
299  b2_re = +2*i12_re;
300  b2_im = +2*i12_im;
301 
302  // write half spinor back to device memory
303  WRITE_HALF_SPINOR(face_volume, face_idx);
304  }
305  break;
306  }
307 
308 }
309 
310 // undefine to prevent warning when precision is changed
311 #undef spinorFloat
312 #undef SHARED_STRIDE
313 
314 #undef i00_re
315 #undef i00_im
316 #undef i01_re
317 #undef i01_im
318 #undef i02_re
319 #undef i02_im
320 #undef i10_re
321 #undef i10_im
322 #undef i11_re
323 #undef i11_im
324 #undef i12_re
325 #undef i12_im
326 #undef i20_re
327 #undef i20_im
328 #undef i21_re
329 #undef i21_im
330 #undef i22_re
331 #undef i22_im
332 #undef i30_re
333 #undef i30_im
334 #undef i31_re
335 #undef i31_im
336 #undef i32_re
337 #undef i32_im
338 
#define i32_re
#define i12_im
#define i00_im
#define i22_im
#define i20_im
#define i00_re
#define i20_re
#define i22_re
#define i12_re
#define i01_re
QudaGaugeParam param
Definition: pack_test.cpp:17
#define READ_SPINOR_UP
Definition: covDev.h:130
#define spinorFloat
#define i32_im
#define i11_im
#define i21_im
#define SPINORTEX
Definition: clover_def.h:40
#define i31_re
#define i02_re
#define i30_im
#define i10_re
#define i10_im
#define i02_im
#define READ_SPINOR
Definition: clover_def.h:36
#define i11_re
#define READ_SPINOR_DOWN
Definition: covDev.h:131
#define i30_re
#define i01_im
#define i21_re
#define i31_im