QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
covDev_mu2_dagger_core.h
Go to the documentation of this file.
1 // *** CUDA DSLASH DAGGER ***
2 
3 //#define DSLASH_SHARED_FLOATS_PER_THREAD 0
4 
5 
6 #if ((CUDA_VERSION >= 4010) && (__COMPUTE_CAPABILITY__ >= 200)) // NVVM compiler
7 #define VOLATILE
8 #else // Open64 compiler
9 #define VOLATILE volatile
10 #endif
11 // input spinor
12 #ifdef SPINOR_DOUBLE
13 #define spinorFloat double
14 #define i00_re I0.x
15 #define i00_im I0.y
16 #define i01_re I1.x
17 #define i01_im I1.y
18 #define i02_re I2.x
19 #define i02_im I2.y
20 #define i10_re I3.x
21 #define i10_im I3.y
22 #define i11_re I4.x
23 #define i11_im I4.y
24 #define i12_re I5.x
25 #define i12_im I5.y
26 #define i20_re I6.x
27 #define i20_im I6.y
28 #define i21_re I7.x
29 #define i21_im I7.y
30 #define i22_re I8.x
31 #define i22_im I8.y
32 #define i30_re I9.x
33 #define i30_im I9.y
34 #define i31_re I10.x
35 #define i31_im I10.y
36 #define i32_re I11.x
37 #define i32_im I11.y
38 #else
39 #define spinorFloat float
40 #define i00_re I0.x
41 #define i00_im I0.y
42 #define i01_re I0.z
43 #define i01_im I0.w
44 #define i02_re I1.x
45 #define i02_im I1.y
46 #define i10_re I1.z
47 #define i10_im I1.w
48 #define i11_re I2.x
49 #define i11_im I2.y
50 #define i12_re I2.z
51 #define i12_im I2.w
52 #define i20_re I3.x
53 #define i20_im I3.y
54 #define i21_re I3.z
55 #define i21_im I3.w
56 #define i22_re I4.x
57 #define i22_im I4.y
58 #define i30_re I4.z
59 #define i30_im I4.w
60 #define i31_re I5.x
61 #define i31_im I5.y
62 #define i32_re I5.z
63 #define i32_im I5.w
64 #endif // SPINOR_DOUBLE
65 
66 // gauge link
67 #ifdef GAUGE_FLOAT2
68 #define g00_re G0.x
69 #define g00_im G0.y
70 #define g01_re G1.x
71 #define g01_im G1.y
72 #define g02_re G2.x
73 #define g02_im G2.y
74 #define g10_re G3.x
75 #define g10_im G3.y
76 #define g11_re G4.x
77 #define g11_im G4.y
78 #define g12_re G5.x
79 #define g12_im G5.y
80 #define g20_re G6.x
81 #define g20_im G6.y
82 #define g21_re G7.x
83 #define g21_im G7.y
84 #define g22_re G8.x
85 #define g22_im G8.y
86 
87 #else
88 #define g00_re G0.x
89 #define g00_im G0.y
90 #define g01_re G0.z
91 #define g01_im G0.w
92 #define g02_re G1.x
93 #define g02_im G1.y
94 #define g10_re G1.z
95 #define g10_im G1.w
96 #define g11_re G2.x
97 #define g11_im G2.y
98 #define g12_re G2.z
99 #define g12_im G2.w
100 #define g20_re G3.x
101 #define g20_im G3.y
102 #define g21_re G3.z
103 #define g21_im G3.w
104 #define g22_re G4.x
105 #define g22_im G4.y
106 
107 #endif // GAUGE_DOUBLE
108 
109 // conjugated gauge link
110 #define gT00_re (+g00_re)
111 #define gT00_im (-g00_im)
112 #define gT01_re (+g10_re)
113 #define gT01_im (-g10_im)
114 #define gT02_re (+g20_re)
115 #define gT02_im (-g20_im)
116 #define gT10_re (+g01_re)
117 #define gT10_im (-g01_im)
118 #define gT11_re (+g11_re)
119 #define gT11_im (-g11_im)
120 #define gT12_re (+g21_re)
121 #define gT12_im (-g21_im)
122 #define gT20_re (+g02_re)
123 #define gT20_im (-g02_im)
124 #define gT21_re (+g12_re)
125 #define gT21_im (-g12_im)
126 #define gT22_re (+g22_re)
127 #define gT22_im (-g22_im)
128 
129 // output spinor
154 
155 #include "read_gauge.h"
156 #include "io_spinor.h"
157 
158 int x1, x2, x3, x4;
159 int X;
160 
161 #if (defined MULTI_GPU) && (DD_PREC==2) // half precision
162 int sp_norm_idx;
163 #endif // MULTI_GPU half precision
164 
165 int sid;
166 
167 sid = blockIdx.x*blockDim.x + threadIdx.x;
168 if (sid >= param.threads) return;
169 
170 #ifdef MULTI_GPU
171 int face_idx;
173 #endif
174 
175  // Inline by hand for the moment and assume even dimensions
176  //coordsFromIndex(X, x1, x2, x3, x4, sid, param.parity);
177 
178  X = 2*sid;
179  int aux1 = X / X1;
180  x1 = X - aux1 * X1;
181  int aux2 = aux1 / X2;
182  x2 = aux1 - aux2 * X2;
183  x4 = aux2 / X3;
184  x3 = aux2 - x4 * X3;
185  aux1 = (param.parity + x4 + x3 + x2) & 1;
186  x1 += aux1;
187  X += aux1;
188 
189  o00_re = 0; o00_im = 0;
190  o01_re = 0; o01_im = 0;
191  o02_re = 0; o02_im = 0;
192  o10_re = 0; o10_im = 0;
193  o11_re = 0; o11_im = 0;
194  o12_re = 0; o12_im = 0;
195  o20_re = 0; o20_im = 0;
196  o21_re = 0; o21_im = 0;
197  o22_re = 0; o22_im = 0;
198  o30_re = 0; o30_im = 0;
199  o31_re = 0; o31_im = 0;
200  o32_re = 0; o32_im = 0;
201 
202 #ifdef MULTI_GPU
203 } else { // exterior kernel
204 
205  const int dim = static_cast<int>(kernel_type);
206  const int face_volume = param.threads; // volume of one face
207  const int face_num = 0;
208 
209  face_idx = sid; // index into the respective face
210 
211  // ghostOffset is scaled to include body (includes stride) and number of FloatN arrays (SPINOR_HOP)
212  // face_idx not sid since faces are spin projected and share the same volume index (modulo UP/DOWN reading)
213  //sp_idx = face_idx + param.ghostOffset[dim];
214 
215 #if (DD_PREC==2) // half precision
216  sp_norm_idx = sid + param.ghostNormOffset[static_cast<int>(kernel_type)];
217 #endif
218 
219  const int dims[] = {X1, X2, X3, X4};
220  coordsFromFaceIndex<1>(X, sid, x1, x2, x3, x4, face_idx, face_volume, dim, face_num, param.parity, dims);
221 
222  o00_re = 0.; o00_im = 0.;
223  o01_re = 0.; o01_im = 0.;
224  o02_re = 0.; o02_im = 0.;
225  o10_re = 0.; o10_im = 0.;
226  o11_re = 0.; o11_im = 0.;
227  o12_re = 0.; o12_im = 0.;
228  o20_re = 0.; o20_im = 0.;
229  o21_re = 0.; o21_im = 0.;
230  o22_re = 0.; o22_im = 0.;
231  o30_re = 0.; o30_im = 0.;
232  o31_re = 0.; o31_im = 0.;
233  o32_re = 0.; o32_im = 0.;
234 }
235 #endif // MULTI_GPU
236 
237 
238 #ifdef MULTI_GPU
239 if ( (kernel_type == INTERIOR_KERNEL && (!param.ghostDim[2] || x3>0)) ||
240  (kernel_type == EXTERIOR_KERNEL_Z && x3==0) )
241 #endif
242 {
243  // Projector P2-
244  // 1 0 -i 0
245  // 0 1 0 i
246  // i 0 1 0
247  // 0 -i 0 1
248 
249 #ifdef MULTI_GPU
250  const int sp_idx = (kernel_type == INTERIOR_KERNEL) ? (x3==0 ? X+X3X2X1mX2X1 : X-X2X1) >> 1 :
251  face_idx + param.ghostOffset[static_cast<int>(kernel_type)];
252 #else
253  const int sp_idx = (x3==0 ? X+X3X2X1mX2X1 : X-X2X1) >> 1;
254 #endif
255 
256 #ifdef MULTI_GPU
257  const int ga_idx = ((kernel_type == INTERIOR_KERNEL) ? sp_idx : Vh+face_idx);
258 #else
259  const int ga_idx = sp_idx;
260 #endif
261 
262  // read gauge matrix from device memory
263  READ_GAUGE_MATRIX(G, GAUGE1TEX, 5, ga_idx, ga_stride);
264 
265 
266 #ifdef MULTI_GPU
267  if (kernel_type == INTERIOR_KERNEL) {
268 #endif
269 
270  // read spinor from device memory
271  READ_SPINOR(SPINORTEX, param.sp_stride, sp_idx, sp_idx);
272 
273  // Do nothing useful with the spinors
274  // reconstruct gauge matrix
276 
277  // multiply row 0
278  o00_re += gT00_re * i00_re;
279  o00_re -= gT00_im * i00_im;
280  o00_re += gT01_re * i01_re;
281  o00_re -= gT01_im * i01_im;
282  o00_re += gT02_re * i02_re;
283  o00_re -= gT02_im * i02_im;
284  o00_im += gT00_re * i00_im;
285  o00_im += gT00_im * i00_re;
286  o00_im += gT01_re * i01_im;
287  o00_im += gT01_im * i01_re;
288  o00_im += gT02_re * i02_im;
289  o00_im += gT02_im * i02_re;
290  o10_re += gT00_re * i10_re;
291  o10_re -= gT00_im * i10_im;
292  o10_re += gT01_re * i11_re;
293  o10_re -= gT01_im * i11_im;
294  o10_re += gT02_re * i12_re;
295  o10_re -= gT02_im * i12_im;
296  o10_im += gT00_re * i10_im;
297  o10_im += gT00_im * i10_re;
298  o10_im += gT01_re * i11_im;
299  o10_im += gT01_im * i11_re;
300  o10_im += gT02_re * i12_im;
301  o10_im += gT02_im * i12_re;
302  o20_re += gT00_re * i20_re;
303  o20_re -= gT00_im * i20_im;
304  o20_re += gT01_re * i21_re;
305  o20_re -= gT01_im * i21_im;
306  o20_re += gT02_re * i22_re;
307  o20_re -= gT02_im * i22_im;
308  o20_im += gT00_re * i20_im;
309  o20_im += gT00_im * i20_re;
310  o20_im += gT01_re * i21_im;
311  o20_im += gT01_im * i21_re;
312  o20_im += gT02_re * i22_im;
313  o20_im += gT02_im * i22_re;
314  o30_re += gT00_re * i30_re;
315  o30_re -= gT00_im * i30_im;
316  o30_re += gT01_re * i31_re;
317  o30_re -= gT01_im * i31_im;
318  o30_re += gT02_re * i32_re;
319  o30_re -= gT02_im * i32_im;
320  o30_im += gT00_re * i30_im;
321  o30_im += gT00_im * i30_re;
322  o30_im += gT01_re * i31_im;
323  o30_im += gT01_im * i31_re;
324  o30_im += gT02_re * i32_im;
325  o30_im += gT02_im * i32_re;
326 
327  // multiply row 1
328  o01_re += gT10_re * i00_re;
329  o01_re -= gT10_im * i00_im;
330  o01_re += gT11_re * i01_re;
331  o01_re -= gT11_im * i01_im;
332  o01_re += gT12_re * i02_re;
333  o01_re -= gT12_im * i02_im;
334  o01_im += gT10_re * i00_im;
335  o01_im += gT10_im * i00_re;
336  o01_im += gT11_re * i01_im;
337  o01_im += gT11_im * i01_re;
338  o01_im += gT12_re * i02_im;
339  o01_im += gT12_im * i02_re;
340  o11_re += gT10_re * i10_re;
341  o11_re -= gT10_im * i10_im;
342  o11_re += gT11_re * i11_re;
343  o11_re -= gT11_im * i11_im;
344  o11_re += gT12_re * i12_re;
345  o11_re -= gT12_im * i12_im;
346  o11_im += gT10_re * i10_im;
347  o11_im += gT10_im * i10_re;
348  o11_im += gT11_re * i11_im;
349  o11_im += gT11_im * i11_re;
350  o11_im += gT12_re * i12_im;
351  o11_im += gT12_im * i12_re;
352  o21_re += gT10_re * i20_re;
353  o21_re -= gT10_im * i20_im;
354  o21_re += gT11_re * i21_re;
355  o21_re -= gT11_im * i21_im;
356  o21_re += gT12_re * i22_re;
357  o21_re -= gT12_im * i22_im;
358  o21_im += gT10_re * i20_im;
359  o21_im += gT10_im * i20_re;
360  o21_im += gT11_re * i21_im;
361  o21_im += gT11_im * i21_re;
362  o21_im += gT12_re * i22_im;
363  o21_im += gT12_im * i22_re;
364  o31_re += gT10_re * i30_re;
365  o31_re -= gT10_im * i30_im;
366  o31_re += gT11_re * i31_re;
367  o31_re -= gT11_im * i31_im;
368  o31_re += gT12_re * i32_re;
369  o31_re -= gT12_im * i32_im;
370  o31_im += gT10_re * i30_im;
371  o31_im += gT10_im * i30_re;
372  o31_im += gT11_re * i31_im;
373  o31_im += gT11_im * i31_re;
374  o31_im += gT12_re * i32_im;
375  o31_im += gT12_im * i32_re;
376 
377  // multiply row 2
378  o02_re += gT20_re * i00_re;
379  o02_re -= gT20_im * i00_im;
380  o02_re += gT21_re * i01_re;
381  o02_re -= gT21_im * i01_im;
382  o02_re += gT22_re * i02_re;
383  o02_re -= gT22_im * i02_im;
384  o02_im += gT20_re * i00_im;
385  o02_im += gT20_im * i00_re;
386  o02_im += gT21_re * i01_im;
387  o02_im += gT21_im * i01_re;
388  o02_im += gT22_re * i02_im;
389  o02_im += gT22_im * i02_re;
390  o12_re += gT20_re * i10_re;
391  o12_re -= gT20_im * i10_im;
392  o12_re += gT21_re * i11_re;
393  o12_re -= gT21_im * i11_im;
394  o12_re += gT22_re * i12_re;
395  o12_re -= gT22_im * i12_im;
396  o12_im += gT20_re * i10_im;
397  o12_im += gT20_im * i10_re;
398  o12_im += gT21_re * i11_im;
399  o12_im += gT21_im * i11_re;
400  o12_im += gT22_re * i12_im;
401  o12_im += gT22_im * i12_re;
402  o22_re += gT20_re * i20_re;
403  o22_re -= gT20_im * i20_im;
404  o22_re += gT21_re * i21_re;
405  o22_re -= gT21_im * i21_im;
406  o22_re += gT22_re * i22_re;
407  o22_re -= gT22_im * i22_im;
408  o22_im += gT20_re * i20_im;
409  o22_im += gT20_im * i20_re;
410  o22_im += gT21_re * i21_im;
411  o22_im += gT21_im * i21_re;
412  o22_im += gT22_re * i22_im;
413  o22_im += gT22_im * i22_re;
414  o32_re += gT20_re * i30_re;
415  o32_re -= gT20_im * i30_im;
416  o32_re += gT21_re * i31_re;
417  o32_re -= gT21_im * i31_im;
418  o32_re += gT22_re * i32_re;
419  o32_re -= gT22_im * i32_im;
420  o32_im += gT20_re * i30_im;
421  o32_im += gT20_im * i30_re;
422  o32_im += gT21_re * i31_im;
423  o32_im += gT21_im * i31_re;
424  o32_im += gT22_re * i32_im;
425  o32_im += gT22_im * i32_re;
426 
427 
428 #ifdef MULTI_GPU
429  //JARLLLL
430  } else {
431 
432  const int sp_stride_pad = ghostFace[static_cast<int>(kernel_type)];
433 
434  // read full spinor from device memory
435  READ_SPINOR(SPINORTEX, sp_stride_pad, sp_idx, sp_norm_idx);
436 
437  // reconstruct gauge matrix
439 
440  // multiply row 0
441  o00_re += gT00_re * i00_re;
442  o00_re -= gT00_im * i00_im;
443  o00_re += gT01_re * i01_re;
444  o00_re -= gT01_im * i01_im;
445  o00_re += gT02_re * i02_re;
446  o00_re -= gT02_im * i02_im;
447  o00_im += gT00_re * i00_im;
448  o00_im += gT00_im * i00_re;
449  o00_im += gT01_re * i01_im;
450  o00_im += gT01_im * i01_re;
451  o00_im += gT02_re * i02_im;
452  o00_im += gT02_im * i02_re;
453  o10_re += gT00_re * i10_re;
454  o10_re -= gT00_im * i10_im;
455  o10_re += gT01_re * i11_re;
456  o10_re -= gT01_im * i11_im;
457  o10_re += gT02_re * i12_re;
458  o10_re -= gT02_im * i12_im;
459  o10_im += gT00_re * i10_im;
460  o10_im += gT00_im * i10_re;
461  o10_im += gT01_re * i11_im;
462  o10_im += gT01_im * i11_re;
463  o10_im += gT02_re * i12_im;
464  o10_im += gT02_im * i12_re;
465  o20_re += gT00_re * i20_re;
466  o20_re -= gT00_im * i20_im;
467  o20_re += gT01_re * i21_re;
468  o20_re -= gT01_im * i21_im;
469  o20_re += gT02_re * i22_re;
470  o20_re -= gT02_im * i22_im;
471  o20_im += gT00_re * i20_im;
472  o20_im += gT00_im * i20_re;
473  o20_im += gT01_re * i21_im;
474  o20_im += gT01_im * i21_re;
475  o20_im += gT02_re * i22_im;
476  o20_im += gT02_im * i22_re;
477  o30_re += gT00_re * i30_re;
478  o30_re -= gT00_im * i30_im;
479  o30_re += gT01_re * i31_re;
480  o30_re -= gT01_im * i31_im;
481  o30_re += gT02_re * i32_re;
482  o30_re -= gT02_im * i32_im;
483  o30_im += gT00_re * i30_im;
484  o30_im += gT00_im * i30_re;
485  o30_im += gT01_re * i31_im;
486  o30_im += gT01_im * i31_re;
487  o30_im += gT02_re * i32_im;
488  o30_im += gT02_im * i32_re;
489 
490  // multiply row 1
491  o01_re += gT10_re * i00_re;
492  o01_re -= gT10_im * i00_im;
493  o01_re += gT11_re * i01_re;
494  o01_re -= gT11_im * i01_im;
495  o01_re += gT12_re * i02_re;
496  o01_re -= gT12_im * i02_im;
497  o01_im += gT10_re * i00_im;
498  o01_im += gT10_im * i00_re;
499  o01_im += gT11_re * i01_im;
500  o01_im += gT11_im * i01_re;
501  o01_im += gT12_re * i02_im;
502  o01_im += gT12_im * i02_re;
503  o11_re += gT10_re * i10_re;
504  o11_re -= gT10_im * i10_im;
505  o11_re += gT11_re * i11_re;
506  o11_re -= gT11_im * i11_im;
507  o11_re += gT12_re * i12_re;
508  o11_re -= gT12_im * i12_im;
509  o11_im += gT10_re * i10_im;
510  o11_im += gT10_im * i10_re;
511  o11_im += gT11_re * i11_im;
512  o11_im += gT11_im * i11_re;
513  o11_im += gT12_re * i12_im;
514  o11_im += gT12_im * i12_re;
515  o21_re += gT10_re * i20_re;
516  o21_re -= gT10_im * i20_im;
517  o21_re += gT11_re * i21_re;
518  o21_re -= gT11_im * i21_im;
519  o21_re += gT12_re * i22_re;
520  o21_re -= gT12_im * i22_im;
521  o21_im += gT10_re * i20_im;
522  o21_im += gT10_im * i20_re;
523  o21_im += gT11_re * i21_im;
524  o21_im += gT11_im * i21_re;
525  o21_im += gT12_re * i22_im;
526  o21_im += gT12_im * i22_re;
527  o31_re += gT10_re * i30_re;
528  o31_re -= gT10_im * i30_im;
529  o31_re += gT11_re * i31_re;
530  o31_re -= gT11_im * i31_im;
531  o31_re += gT12_re * i32_re;
532  o31_re -= gT12_im * i32_im;
533  o31_im += gT10_re * i30_im;
534  o31_im += gT10_im * i30_re;
535  o31_im += gT11_re * i31_im;
536  o31_im += gT11_im * i31_re;
537  o31_im += gT12_re * i32_im;
538  o31_im += gT12_im * i32_re;
539 
540  // multiply row 2
541  o02_re += gT20_re * i00_re;
542  o02_re -= gT20_im * i00_im;
543  o02_re += gT21_re * i01_re;
544  o02_re -= gT21_im * i01_im;
545  o02_re += gT22_re * i02_re;
546  o02_re -= gT22_im * i02_im;
547  o02_im += gT20_re * i00_im;
548  o02_im += gT20_im * i00_re;
549  o02_im += gT21_re * i01_im;
550  o02_im += gT21_im * i01_re;
551  o02_im += gT22_re * i02_im;
552  o02_im += gT22_im * i02_re;
553  o12_re += gT20_re * i10_re;
554  o12_re -= gT20_im * i10_im;
555  o12_re += gT21_re * i11_re;
556  o12_re -= gT21_im * i11_im;
557  o12_re += gT22_re * i12_re;
558  o12_re -= gT22_im * i12_im;
559  o12_im += gT20_re * i10_im;
560  o12_im += gT20_im * i10_re;
561  o12_im += gT21_re * i11_im;
562  o12_im += gT21_im * i11_re;
563  o12_im += gT22_re * i12_im;
564  o12_im += gT22_im * i12_re;
565  o22_re += gT20_re * i20_re;
566  o22_re -= gT20_im * i20_im;
567  o22_re += gT21_re * i21_re;
568  o22_re -= gT21_im * i21_im;
569  o22_re += gT22_re * i22_re;
570  o22_re -= gT22_im * i22_im;
571  o22_im += gT20_re * i20_im;
572  o22_im += gT20_im * i20_re;
573  o22_im += gT21_re * i21_im;
574  o22_im += gT21_im * i21_re;
575  o22_im += gT22_re * i22_im;
576  o22_im += gT22_im * i22_re;
577  o32_re += gT20_re * i30_re;
578  o32_re -= gT20_im * i30_im;
579  o32_re += gT21_re * i31_re;
580  o32_re -= gT21_im * i31_im;
581  o32_re += gT22_re * i32_re;
582  o32_re -= gT22_im * i32_im;
583  o32_im += gT20_re * i30_im;
584  o32_im += gT20_im * i30_re;
585  o32_im += gT21_re * i31_im;
586  o32_im += gT21_im * i31_re;
587  o32_im += gT22_re * i32_im;
588  o32_im += gT22_im * i32_re;
589 
590  }
591 #endif // MULTI_GPU
592 
593 }
594 
595 
596 
597 // write spinor field back to device memory
598 WRITE_SPINOR(param.sp_stride);
599 
600 // undefine to prevent warning when precision is changed
601 #undef spinorFloat
602 #undef SHARED_STRIDE
603 
604 #undef g00_re
605 #undef g00_im
606 #undef gT00_re
607 #undef gT00_im
608 #undef g01_re
609 #undef g01_im
610 #undef gT01_re
611 #undef gT01_im
612 #undef g02_re
613 #undef g02_im
614 #undef gT02_re
615 #undef gT02_im
616 #undef g10_re
617 #undef g10_im
618 #undef gT10_re
619 #undef gT10_im
620 #undef g11_re
621 #undef g11_im
622 #undef gT11_re
623 #undef gT11_im
624 #undef g12_re
625 #undef g12_im
626 #undef gT12_re
627 #undef gT12_im
628 #undef g20_re
629 #undef g20_im
630 #undef gT20_re
631 #undef gT20_im
632 #undef g21_re
633 #undef g21_im
634 #undef gT21_re
635 #undef gT21_im
636 #undef g22_re
637 #undef g22_im
638 #undef gT22_re
639 #undef gT22_im
640 
641 #undef i00_re
642 #undef i00_im
643 #undef i01_re
644 #undef i01_im
645 #undef i02_re
646 #undef i02_im
647 #undef i10_re
648 #undef i10_im
649 #undef i11_re
650 #undef i11_im
651 #undef i12_re
652 #undef i12_im
653 #undef i20_re
654 #undef i20_im
655 #undef i21_re
656 #undef i21_im
657 #undef i22_re
658 #undef i22_im
659 #undef i30_re
660 #undef i30_im
661 #undef i31_re
662 #undef i31_im
663 #undef i32_re
664 #undef i32_im
665 
666 
#define gT20_re
__constant__ int Vh
VOLATILE spinorFloat o02_im
#define i11_re
#define i02_im
__constant__ int X2
VOLATILE spinorFloat o00_im
VOLATILE spinorFloat o11_re
#define gT21_im
#define i32_re
__constant__ int X3X2X1mX2X1
__constant__ int X1
int sp_idx
RECONSTRUCT_GAUGE_MATRIX(5)
#define gT10_im
VOLATILE spinorFloat o31_re
VOLATILE spinorFloat o22_im
READ_SPINOR(SPINORTEX, param.sp_stride, sp_idx, sp_idx)
#define i30_im
#define gT12_im
VOLATILE spinorFloat o10_im
WRITE_SPINOR(param.sp_stride)
#define gT21_re
#define gT02_re
#define i32_im
const int ga_idx
#define gT02_im
VOLATILE spinorFloat o01_re
#define i01_re
#define i22_im
#define gT12_re
#define gT11_re
QudaGaugeParam param
Definition: pack_test.cpp:17
#define i02_re
__constant__ int ghostFace[QUDA_MAX_DIM+1]
#define i12_im
VOLATILE spinorFloat o21_im
VOLATILE spinorFloat o01_im
VOLATILE spinorFloat o31_im
#define i12_re
#define i31_im
#define i21_im
#define i30_re
#define VOLATILE
#define SPINORTEX
Definition: clover_def.h:40
#define i22_re
VOLATILE spinorFloat o12_re
#define spinorFloat
#define gT22_im
#define gT10_re
#define i20_im
#define i21_re
VOLATILE spinorFloat o30_im
__constant__ int ga_stride
VOLATILE spinorFloat o10_re
VOLATILE spinorFloat o32_im
#define gT01_re
#define i10_re
__constant__ int X3
VOLATILE spinorFloat o32_re
VOLATILE spinorFloat o21_re
#define i00_im
#define i20_re
VOLATILE spinorFloat o20_im
VOLATILE spinorFloat o00_re
READ_GAUGE_MATRIX(G, GAUGE1TEX, 5, ga_idx, ga_stride)
VOLATILE spinorFloat o11_im
#define GAUGE1TEX
Definition: covDev.h:113
VOLATILE spinorFloat o30_re
#define i11_im
VOLATILE spinorFloat o22_re
#define gT11_im
#define gT20_im
#define i10_im
VOLATILE spinorFloat o02_re
#define gT22_re
#define i31_re
KernelType kernel_type
#define gT01_im
#define gT00_re
#define gT00_im
#define i01_im
__constant__ int X4
#define i00_re
__constant__ int X2X1
VOLATILE spinorFloat o20_re
VOLATILE spinorFloat o12_im