QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
covDev_mu3_dagger_core.h
Go to the documentation of this file.
1 // *** CUDA DSLASH DAGGER ***
2 
3 //#define DSLASH_SHARED_FLOATS_PER_THREAD 0
4 
5 
6 #if ((CUDA_VERSION >= 4010) && (__COMPUTE_CAPABILITY__ >= 200)) // NVVM compiler
7 #define VOLATILE
8 #else // Open64 compiler
9 #define VOLATILE volatile
10 #endif
11 // input spinor
12 #ifdef SPINOR_DOUBLE
13 #define spinorFloat double
14 #define WRITE_SPINOR_SHARED WRITE_SPINOR_SHARED_DOUBLE2
15 #define READ_SPINOR_SHARED READ_SPINOR_SHARED_DOUBLE2
16 #define i00_re I0.x
17 #define i00_im I0.y
18 #define i01_re I1.x
19 #define i01_im I1.y
20 #define i02_re I2.x
21 #define i02_im I2.y
22 #define i10_re I3.x
23 #define i10_im I3.y
24 #define i11_re I4.x
25 #define i11_im I4.y
26 #define i12_re I5.x
27 #define i12_im I5.y
28 #define i20_re I6.x
29 #define i20_im I6.y
30 #define i21_re I7.x
31 #define i21_im I7.y
32 #define i22_re I8.x
33 #define i22_im I8.y
34 #define i30_re I9.x
35 #define i30_im I9.y
36 #define i31_re I10.x
37 #define i31_im I10.y
38 #define i32_re I11.x
39 #define i32_im I11.y
40 #else
41 #define spinorFloat float
42 #define i00_re I0.x
43 #define i00_im I0.y
44 #define i01_re I0.z
45 #define i01_im I0.w
46 #define i02_re I1.x
47 #define i02_im I1.y
48 #define i10_re I1.z
49 #define i10_im I1.w
50 #define i11_re I2.x
51 #define i11_im I2.y
52 #define i12_re I2.z
53 #define i12_im I2.w
54 #define i20_re I3.x
55 #define i20_im I3.y
56 #define i21_re I3.z
57 #define i21_im I3.w
58 #define i22_re I4.x
59 #define i22_im I4.y
60 #define i30_re I4.z
61 #define i30_im I4.w
62 #define i31_re I5.x
63 #define i31_im I5.y
64 #define i32_re I5.z
65 #define i32_im I5.w
66 #endif // SPINOR_DOUBLE
67 
68 // gauge link
69 #ifdef GAUGE_FLOAT2
70 #define g00_re G0.x
71 #define g00_im G0.y
72 #define g01_re G1.x
73 #define g01_im G1.y
74 #define g02_re G2.x
75 #define g02_im G2.y
76 #define g10_re G3.x
77 #define g10_im G3.y
78 #define g11_re G4.x
79 #define g11_im G4.y
80 #define g12_re G5.x
81 #define g12_im G5.y
82 #define g20_re G6.x
83 #define g20_im G6.y
84 #define g21_re G7.x
85 #define g21_im G7.y
86 #define g22_re G8.x
87 #define g22_im G8.y
88 
89 #else
90 #define g00_re G0.x
91 #define g00_im G0.y
92 #define g01_re G0.z
93 #define g01_im G0.w
94 #define g02_re G1.x
95 #define g02_im G1.y
96 #define g10_re G1.z
97 #define g10_im G1.w
98 #define g11_re G2.x
99 #define g11_im G2.y
100 #define g12_re G2.z
101 #define g12_im G2.w
102 #define g20_re G3.x
103 #define g20_im G3.y
104 #define g21_re G3.z
105 #define g21_im G3.w
106 #define g22_re G4.x
107 #define g22_im G4.y
108 
109 #endif // GAUGE_DOUBLE
110 
111 // conjugated gauge link
112 #define gT00_re (+g00_re)
113 #define gT00_im (-g00_im)
114 #define gT01_re (+g10_re)
115 #define gT01_im (-g10_im)
116 #define gT02_re (+g20_re)
117 #define gT02_im (-g20_im)
118 #define gT10_re (+g01_re)
119 #define gT10_im (-g01_im)
120 #define gT11_re (+g11_re)
121 #define gT11_im (-g11_im)
122 #define gT12_re (+g21_re)
123 #define gT12_im (-g21_im)
124 #define gT20_re (+g02_re)
125 #define gT20_im (-g02_im)
126 #define gT21_re (+g12_re)
127 #define gT21_im (-g12_im)
128 #define gT22_re (+g22_re)
129 #define gT22_im (-g22_im)
130 
131 // output spinor
156 
157 #ifdef SPINOR_DOUBLE
158 #define SHARED_STRIDE 16 // to avoid bank conflicts on Fermi
159 #else
160 #define SHARED_STRIDE 32 // to avoid bank conflicts on Fermi
161 #endif
162 
163 #include "read_gauge.h"
164 #include "io_spinor.h"
165 
166 int x1, x2, x3, x4;
167 int X;
168 
169 #if (defined MULTI_GPU) && (DD_PREC==2) // half precision
170 int sp_norm_idx;
171 #endif // MULTI_GPU half precision
172 
173 int sid;
174 
175 sid = blockIdx.x*blockDim.x + threadIdx.x;
176 if (sid >= param.threads) return;
177 
178 #ifdef MULTI_GPU
179 int face_idx;
181 #endif
182 
183  // Inline by hand for the moment and assume even dimensions
184  //coordsFromIndex(X, x1, x2, x3, x4, sid, param.parity);
185 
186  X = 2*sid;
187  int aux1 = X / X1;
188  x1 = X - aux1 * X1;
189  int aux2 = aux1 / X2;
190  x2 = aux1 - aux2 * X2;
191  x4 = aux2 / X3;
192  x3 = aux2 - x4 * X3;
193  aux1 = (param.parity + x4 + x3 + x2) & 1;
194  x1 += aux1;
195  X += aux1;
196 
197  o00_re = 0; o00_im = 0;
198  o01_re = 0; o01_im = 0;
199  o02_re = 0; o02_im = 0;
200  o10_re = 0; o10_im = 0;
201  o11_re = 0; o11_im = 0;
202  o12_re = 0; o12_im = 0;
203  o20_re = 0; o20_im = 0;
204  o21_re = 0; o21_im = 0;
205  o22_re = 0; o22_im = 0;
206  o30_re = 0; o30_im = 0;
207  o31_re = 0; o31_im = 0;
208  o32_re = 0; o32_im = 0;
209 
210 #ifdef MULTI_GPU
211 } else { // exterior kernel
212 /*
213  const int dim = static_cast<int>(kernel_type);
214  const int face_volume = (param.threads >> 1); // volume of one face
215  const int face_num = (sid >= face_volume);
216 
217  face_idx = sid - face_num*face_volume; // index into the respective face
218 */
219 
220  const int dim = static_cast<int>(kernel_type);
221  const int face_volume = param.threads; // volume of one face
222  const int face_num = 0; //Era 1
223 
224  face_idx = sid; // index into the respective face
225 
226 
227  // ghostOffset is scaled to include body (includes stride) and number of FloatN arrays (SPINOR_HOP)
228  // face_idx not sid since faces are spin projected and share the same volume index (modulo UP/DOWN reading)
229  //sp_idx = face_idx + param.ghostOffset[dim];
230 
231 #if (DD_PREC==2) // half precision
232  sp_norm_idx = sid + param.ghostNormOffset[static_cast<int>(kernel_type)];
233 #endif
234 
235  const int dims[] = {X1, X2, X3, X4};
236  coordsFromFaceIndex<1>(X, sid, x1, x2, x3, x4, face_idx, face_volume, dim, face_num, param.parity, dims);
237 
238  o00_re = 0.; o00_im = 0.;
239  o01_re = 0.; o01_im = 0.;
240  o02_re = 0.; o02_im = 0.;
241  o10_re = 0.; o10_im = 0.;
242  o11_re = 0.; o11_im = 0.;
243  o12_re = 0.; o12_im = 0.;
244  o20_re = 0.; o20_im = 0.;
245  o21_re = 0.; o21_im = 0.;
246  o22_re = 0.; o22_im = 0.;
247  o30_re = 0.; o30_im = 0.;
248  o31_re = 0.; o31_im = 0.;
249  o32_re = 0.; o32_im = 0.;
250 }
251 #endif // MULTI_GPU
252 
253 
254 #ifdef MULTI_GPU
255 if ( (kernel_type == INTERIOR_KERNEL && (!param.ghostDim[3] || x4>0)) ||
256  (kernel_type == EXTERIOR_KERNEL_T && x4==0) )
257 #endif
258 {
259  // Projector P3-
260  // 0 0 0 0
261  // 0 0 0 0
262  // 0 0 2 0
263  // 0 0 0 2
264 
265 #ifdef MULTI_GPU
266  const int sp_idx = (kernel_type == INTERIOR_KERNEL) ? (x4==0 ? X+X4X3X2X1mX3X2X1 : X-X3X2X1) >> 1 :
267  face_idx + param.ghostOffset[static_cast<int>(kernel_type)];
268 #else
269  const int sp_idx = (x4==0 ? X+X4X3X2X1mX3X2X1 : X-X3X2X1) >> 1;
270 #endif
271 
272 #ifdef MULTI_GPU
273  const int ga_idx = ((kernel_type == INTERIOR_KERNEL) ? sp_idx : Vh+face_idx);
274 #else
275  const int ga_idx = sp_idx;
276 #endif
277 
278  {
279  // read gauge matrix from device memory
280  READ_GAUGE_MATRIX(G, GAUGE1TEX, 7, ga_idx, ga_stride);
281 
282 
283 #ifdef MULTI_GPU
284  if (kernel_type == INTERIOR_KERNEL) {
285 #endif
286 
287  // read spinor from device memory
288  READ_SPINOR(SPINORTEX, param.sp_stride, sp_idx, sp_idx);
289 
290  // Do nothing useful with the spinors
291  // reconstruct gauge matrix
293 
294  // multiply row 0
295  o00_re += gT00_re * i00_re;
296  o00_re -= gT00_im * i00_im;
297  o00_re += gT01_re * i01_re;
298  o00_re -= gT01_im * i01_im;
299  o00_re += gT02_re * i02_re;
300  o00_re -= gT02_im * i02_im;
301  o00_im += gT00_re * i00_im;
302  o00_im += gT00_im * i00_re;
303  o00_im += gT01_re * i01_im;
304  o00_im += gT01_im * i01_re;
305  o00_im += gT02_re * i02_im;
306  o00_im += gT02_im * i02_re;
307  o10_re += gT00_re * i10_re;
308  o10_re -= gT00_im * i10_im;
309  o10_re += gT01_re * i11_re;
310  o10_re -= gT01_im * i11_im;
311  o10_re += gT02_re * i12_re;
312  o10_re -= gT02_im * i12_im;
313  o10_im += gT00_re * i10_im;
314  o10_im += gT00_im * i10_re;
315  o10_im += gT01_re * i11_im;
316  o10_im += gT01_im * i11_re;
317  o10_im += gT02_re * i12_im;
318  o10_im += gT02_im * i12_re;
319  o20_re += gT00_re * i20_re;
320  o20_re -= gT00_im * i20_im;
321  o20_re += gT01_re * i21_re;
322  o20_re -= gT01_im * i21_im;
323  o20_re += gT02_re * i22_re;
324  o20_re -= gT02_im * i22_im;
325  o20_im += gT00_re * i20_im;
326  o20_im += gT00_im * i20_re;
327  o20_im += gT01_re * i21_im;
328  o20_im += gT01_im * i21_re;
329  o20_im += gT02_re * i22_im;
330  o20_im += gT02_im * i22_re;
331  o30_re += gT00_re * i30_re;
332  o30_re -= gT00_im * i30_im;
333  o30_re += gT01_re * i31_re;
334  o30_re -= gT01_im * i31_im;
335  o30_re += gT02_re * i32_re;
336  o30_re -= gT02_im * i32_im;
337  o30_im += gT00_re * i30_im;
338  o30_im += gT00_im * i30_re;
339  o30_im += gT01_re * i31_im;
340  o30_im += gT01_im * i31_re;
341  o30_im += gT02_re * i32_im;
342  o30_im += gT02_im * i32_re;
343 
344  // multiply row 1
345  o01_re += gT10_re * i00_re;
346  o01_re -= gT10_im * i00_im;
347  o01_re += gT11_re * i01_re;
348  o01_re -= gT11_im * i01_im;
349  o01_re += gT12_re * i02_re;
350  o01_re -= gT12_im * i02_im;
351  o01_im += gT10_re * i00_im;
352  o01_im += gT10_im * i00_re;
353  o01_im += gT11_re * i01_im;
354  o01_im += gT11_im * i01_re;
355  o01_im += gT12_re * i02_im;
356  o01_im += gT12_im * i02_re;
357  o11_re += gT10_re * i10_re;
358  o11_re -= gT10_im * i10_im;
359  o11_re += gT11_re * i11_re;
360  o11_re -= gT11_im * i11_im;
361  o11_re += gT12_re * i12_re;
362  o11_re -= gT12_im * i12_im;
363  o11_im += gT10_re * i10_im;
364  o11_im += gT10_im * i10_re;
365  o11_im += gT11_re * i11_im;
366  o11_im += gT11_im * i11_re;
367  o11_im += gT12_re * i12_im;
368  o11_im += gT12_im * i12_re;
369  o21_re += gT10_re * i20_re;
370  o21_re -= gT10_im * i20_im;
371  o21_re += gT11_re * i21_re;
372  o21_re -= gT11_im * i21_im;
373  o21_re += gT12_re * i22_re;
374  o21_re -= gT12_im * i22_im;
375  o21_im += gT10_re * i20_im;
376  o21_im += gT10_im * i20_re;
377  o21_im += gT11_re * i21_im;
378  o21_im += gT11_im * i21_re;
379  o21_im += gT12_re * i22_im;
380  o21_im += gT12_im * i22_re;
381  o31_re += gT10_re * i30_re;
382  o31_re -= gT10_im * i30_im;
383  o31_re += gT11_re * i31_re;
384  o31_re -= gT11_im * i31_im;
385  o31_re += gT12_re * i32_re;
386  o31_re -= gT12_im * i32_im;
387  o31_im += gT10_re * i30_im;
388  o31_im += gT10_im * i30_re;
389  o31_im += gT11_re * i31_im;
390  o31_im += gT11_im * i31_re;
391  o31_im += gT12_re * i32_im;
392  o31_im += gT12_im * i32_re;
393 
394  // multiply row 2
395  o02_re += gT20_re * i00_re;
396  o02_re -= gT20_im * i00_im;
397  o02_re += gT21_re * i01_re;
398  o02_re -= gT21_im * i01_im;
399  o02_re += gT22_re * i02_re;
400  o02_re -= gT22_im * i02_im;
401  o02_im += gT20_re * i00_im;
402  o02_im += gT20_im * i00_re;
403  o02_im += gT21_re * i01_im;
404  o02_im += gT21_im * i01_re;
405  o02_im += gT22_re * i02_im;
406  o02_im += gT22_im * i02_re;
407  o12_re += gT20_re * i10_re;
408  o12_re -= gT20_im * i10_im;
409  o12_re += gT21_re * i11_re;
410  o12_re -= gT21_im * i11_im;
411  o12_re += gT22_re * i12_re;
412  o12_re -= gT22_im * i12_im;
413  o12_im += gT20_re * i10_im;
414  o12_im += gT20_im * i10_re;
415  o12_im += gT21_re * i11_im;
416  o12_im += gT21_im * i11_re;
417  o12_im += gT22_re * i12_im;
418  o12_im += gT22_im * i12_re;
419  o22_re += gT20_re * i20_re;
420  o22_re -= gT20_im * i20_im;
421  o22_re += gT21_re * i21_re;
422  o22_re -= gT21_im * i21_im;
423  o22_re += gT22_re * i22_re;
424  o22_re -= gT22_im * i22_im;
425  o22_im += gT20_re * i20_im;
426  o22_im += gT20_im * i20_re;
427  o22_im += gT21_re * i21_im;
428  o22_im += gT21_im * i21_re;
429  o22_im += gT22_re * i22_im;
430  o22_im += gT22_im * i22_re;
431  o32_re += gT20_re * i30_re;
432  o32_re -= gT20_im * i30_im;
433  o32_re += gT21_re * i31_re;
434  o32_re -= gT21_im * i31_im;
435  o32_re += gT22_re * i32_re;
436  o32_re -= gT22_im * i32_im;
437  o32_im += gT20_re * i30_im;
438  o32_im += gT20_im * i30_re;
439  o32_im += gT21_re * i31_im;
440  o32_im += gT21_im * i31_re;
441  o32_im += gT22_re * i32_im;
442  o32_im += gT22_im * i32_re;
443 
444 
445 #ifdef MULTI_GPU
446  //JARLLLL
447  } else {
448 
449  const int sp_stride_pad = ghostFace[static_cast<int>(kernel_type)];
450 
451  // read full spinor from device memory
452  READ_SPINOR(SPINORTEX, sp_stride_pad, sp_idx, sp_norm_idx);
453 
454  // reconstruct gauge matrix
456 
457  // multiply row 0
458  o00_re += gT00_re * i00_re;
459  o00_re -= gT00_im * i00_im;
460  o00_re += gT01_re * i01_re;
461  o00_re -= gT01_im * i01_im;
462  o00_re += gT02_re * i02_re;
463  o00_re -= gT02_im * i02_im;
464  o00_im += gT00_re * i00_im;
465  o00_im += gT00_im * i00_re;
466  o00_im += gT01_re * i01_im;
467  o00_im += gT01_im * i01_re;
468  o00_im += gT02_re * i02_im;
469  o00_im += gT02_im * i02_re;
470  o10_re += gT00_re * i10_re;
471  o10_re -= gT00_im * i10_im;
472  o10_re += gT01_re * i11_re;
473  o10_re -= gT01_im * i11_im;
474  o10_re += gT02_re * i12_re;
475  o10_re -= gT02_im * i12_im;
476  o10_im += gT00_re * i10_im;
477  o10_im += gT00_im * i10_re;
478  o10_im += gT01_re * i11_im;
479  o10_im += gT01_im * i11_re;
480  o10_im += gT02_re * i12_im;
481  o10_im += gT02_im * i12_re;
482  o20_re += gT00_re * i20_re;
483  o20_re -= gT00_im * i20_im;
484  o20_re += gT01_re * i21_re;
485  o20_re -= gT01_im * i21_im;
486  o20_re += gT02_re * i22_re;
487  o20_re -= gT02_im * i22_im;
488  o20_im += gT00_re * i20_im;
489  o20_im += gT00_im * i20_re;
490  o20_im += gT01_re * i21_im;
491  o20_im += gT01_im * i21_re;
492  o20_im += gT02_re * i22_im;
493  o20_im += gT02_im * i22_re;
494  o30_re += gT00_re * i30_re;
495  o30_re -= gT00_im * i30_im;
496  o30_re += gT01_re * i31_re;
497  o30_re -= gT01_im * i31_im;
498  o30_re += gT02_re * i32_re;
499  o30_re -= gT02_im * i32_im;
500  o30_im += gT00_re * i30_im;
501  o30_im += gT00_im * i30_re;
502  o30_im += gT01_re * i31_im;
503  o30_im += gT01_im * i31_re;
504  o30_im += gT02_re * i32_im;
505  o30_im += gT02_im * i32_re;
506 
507  // multiply row 1
508  o01_re += gT10_re * i00_re;
509  o01_re -= gT10_im * i00_im;
510  o01_re += gT11_re * i01_re;
511  o01_re -= gT11_im * i01_im;
512  o01_re += gT12_re * i02_re;
513  o01_re -= gT12_im * i02_im;
514  o01_im += gT10_re * i00_im;
515  o01_im += gT10_im * i00_re;
516  o01_im += gT11_re * i01_im;
517  o01_im += gT11_im * i01_re;
518  o01_im += gT12_re * i02_im;
519  o01_im += gT12_im * i02_re;
520  o11_re += gT10_re * i10_re;
521  o11_re -= gT10_im * i10_im;
522  o11_re += gT11_re * i11_re;
523  o11_re -= gT11_im * i11_im;
524  o11_re += gT12_re * i12_re;
525  o11_re -= gT12_im * i12_im;
526  o11_im += gT10_re * i10_im;
527  o11_im += gT10_im * i10_re;
528  o11_im += gT11_re * i11_im;
529  o11_im += gT11_im * i11_re;
530  o11_im += gT12_re * i12_im;
531  o11_im += gT12_im * i12_re;
532  o21_re += gT10_re * i20_re;
533  o21_re -= gT10_im * i20_im;
534  o21_re += gT11_re * i21_re;
535  o21_re -= gT11_im * i21_im;
536  o21_re += gT12_re * i22_re;
537  o21_re -= gT12_im * i22_im;
538  o21_im += gT10_re * i20_im;
539  o21_im += gT10_im * i20_re;
540  o21_im += gT11_re * i21_im;
541  o21_im += gT11_im * i21_re;
542  o21_im += gT12_re * i22_im;
543  o21_im += gT12_im * i22_re;
544  o31_re += gT10_re * i30_re;
545  o31_re -= gT10_im * i30_im;
546  o31_re += gT11_re * i31_re;
547  o31_re -= gT11_im * i31_im;
548  o31_re += gT12_re * i32_re;
549  o31_re -= gT12_im * i32_im;
550  o31_im += gT10_re * i30_im;
551  o31_im += gT10_im * i30_re;
552  o31_im += gT11_re * i31_im;
553  o31_im += gT11_im * i31_re;
554  o31_im += gT12_re * i32_im;
555  o31_im += gT12_im * i32_re;
556 
557  // multiply row 2
558  o02_re += gT20_re * i00_re;
559  o02_re -= gT20_im * i00_im;
560  o02_re += gT21_re * i01_re;
561  o02_re -= gT21_im * i01_im;
562  o02_re += gT22_re * i02_re;
563  o02_re -= gT22_im * i02_im;
564  o02_im += gT20_re * i00_im;
565  o02_im += gT20_im * i00_re;
566  o02_im += gT21_re * i01_im;
567  o02_im += gT21_im * i01_re;
568  o02_im += gT22_re * i02_im;
569  o02_im += gT22_im * i02_re;
570  o12_re += gT20_re * i10_re;
571  o12_re -= gT20_im * i10_im;
572  o12_re += gT21_re * i11_re;
573  o12_re -= gT21_im * i11_im;
574  o12_re += gT22_re * i12_re;
575  o12_re -= gT22_im * i12_im;
576  o12_im += gT20_re * i10_im;
577  o12_im += gT20_im * i10_re;
578  o12_im += gT21_re * i11_im;
579  o12_im += gT21_im * i11_re;
580  o12_im += gT22_re * i12_im;
581  o12_im += gT22_im * i12_re;
582  o22_re += gT20_re * i20_re;
583  o22_re -= gT20_im * i20_im;
584  o22_re += gT21_re * i21_re;
585  o22_re -= gT21_im * i21_im;
586  o22_re += gT22_re * i22_re;
587  o22_re -= gT22_im * i22_im;
588  o22_im += gT20_re * i20_im;
589  o22_im += gT20_im * i20_re;
590  o22_im += gT21_re * i21_im;
591  o22_im += gT21_im * i21_re;
592  o22_im += gT22_re * i22_im;
593  o22_im += gT22_im * i22_re;
594  o32_re += gT20_re * i30_re;
595  o32_re -= gT20_im * i30_im;
596  o32_re += gT21_re * i31_re;
597  o32_re -= gT21_im * i31_im;
598  o32_re += gT22_re * i32_re;
599  o32_re -= gT22_im * i32_im;
600  o32_im += gT20_re * i30_im;
601  o32_im += gT20_im * i30_re;
602  o32_im += gT21_re * i31_im;
603  o32_im += gT21_im * i31_re;
604  o32_im += gT22_re * i32_im;
605  o32_im += gT22_im * i32_re;
606 
607  }
608 #endif // MULTI_GPU
609 
610  }
611 }
612 
613 
614 
615 // write spinor field back to device memory
616 WRITE_SPINOR(param.sp_stride);
617 
618 // undefine to prevent warning when precision is changed
619 #undef spinorFloat
620 #undef SHARED_STRIDE
621 
622 #undef g00_re
623 #undef g00_im
624 #undef gT00_re
625 #undef gT00_im
626 #undef g01_re
627 #undef g01_im
628 #undef gT01_re
629 #undef gT01_im
630 #undef g02_re
631 #undef g02_im
632 #undef gT02_re
633 #undef gT02_im
634 #undef g10_re
635 #undef g10_im
636 #undef gT10_re
637 #undef gT10_im
638 #undef g11_re
639 #undef g11_im
640 #undef gT11_re
641 #undef gT11_im
642 #undef g12_re
643 #undef g12_im
644 #undef gT12_re
645 #undef gT12_im
646 #undef g20_re
647 #undef g20_im
648 #undef gT20_re
649 #undef gT20_im
650 #undef g21_re
651 #undef g21_im
652 #undef gT21_re
653 #undef gT21_im
654 #undef g22_re
655 #undef g22_im
656 #undef gT22_re
657 #undef gT22_im
658 
659 #undef i00_re
660 #undef i00_im
661 #undef i01_re
662 #undef i01_im
663 #undef i02_re
664 #undef i02_im
665 #undef i10_re
666 #undef i10_im
667 #undef i11_re
668 #undef i11_im
669 #undef i12_re
670 #undef i12_im
671 #undef i20_re
672 #undef i20_im
673 #undef i21_re
674 #undef i21_im
675 #undef i22_re
676 #undef i22_im
677 #undef i30_re
678 #undef i30_im
679 #undef i31_re
680 #undef i31_im
681 #undef i32_re
682 #undef i32_im
683 
684 
#define i01_re
#define i22_im
VOLATILE spinorFloat o10_re
#define gT01_re
VOLATILE spinorFloat o11_re
#define gT20_im
__constant__ int Vh
VOLATILE spinorFloat o21_re
__constant__ int X2
#define gT20_re
#define i12_im
READ_SPINOR(SPINORTEX, param.sp_stride, sp_idx, sp_idx)
VOLATILE spinorFloat o11_im
#define i30_im
#define i11_im
VOLATILE spinorFloat o01_im
__constant__ int X1
#define i12_re
int sp_idx
#define gT21_im
#define i31_re
__constant__ int X3X2X1
VOLATILE spinorFloat o31_re
VOLATILE spinorFloat o20_re
#define gT01_im
#define i20_im
VOLATILE spinorFloat o12_im
#define spinorFloat
#define gT21_re
#define i02_im
QudaGaugeParam param
Definition: pack_test.cpp:17
__constant__ int ghostFace[QUDA_MAX_DIM+1]
VOLATILE spinorFloat o02_im
#define gT02_im
#define i11_re
#define gT02_re
RECONSTRUCT_GAUGE_MATRIX(7)
VOLATILE spinorFloat o32_re
#define gT12_re
const int ga_idx
VOLATILE spinorFloat o00_re
#define i32_re
VOLATILE spinorFloat o12_re
#define VOLATILE
#define gT10_im
VOLATILE spinorFloat o22_re
VOLATILE spinorFloat o10_im
VOLATILE spinorFloat o22_im
WRITE_SPINOR(param.sp_stride)
#define gT22_im
#define i10_im
#define gT10_re
#define gT12_im
#define SPINORTEX
Definition: clover_def.h:40
__constant__ int X4X3X2X1mX3X2X1
#define gT00_re
#define i21_im
#define i10_re
#define i00_re
VOLATILE spinorFloat o01_re
#define i02_re
__constant__ int ga_stride
#define i22_re
#define gT11_re
#define i01_im
VOLATILE spinorFloat o30_re
__constant__ int X3
VOLATILE spinorFloat o02_re
VOLATILE spinorFloat o30_im
VOLATILE spinorFloat o00_im
VOLATILE spinorFloat o21_im
#define GAUGE1TEX
Definition: covDev.h:113
#define gT00_im
VOLATILE spinorFloat o31_im
#define READ_GAUGE_MATRIX
Definition: covDev.h:44
VOLATILE spinorFloat o20_im
#define gT22_re
#define i31_im
#define i30_re
#define i00_im
#define i20_re
#define gT11_im
KernelType kernel_type
VOLATILE spinorFloat o32_im
__constant__ int X4
#define i21_re
#define i32_im