QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
covDev_mu2_core.h
Go to the documentation of this file.
1 // *** CUDA DSLASH ***
2 
3 //#define DSLASH_SHARED_FLOATS_PER_THREAD 0
4 
5 
6 #if ((CUDA_VERSION >= 4010) && (__COMPUTE_CAPABILITY__ >= 200)) // NVVM compiler
7 #define VOLATILE
8 #else // Open64 compiler
9 #define VOLATILE volatile
10 #endif
11 // input spinor
12 #ifdef SPINOR_DOUBLE
13 #define spinorFloat double
14 #define i00_re I0.x
15 #define i00_im I0.y
16 #define i01_re I1.x
17 #define i01_im I1.y
18 #define i02_re I2.x
19 #define i02_im I2.y
20 #define i10_re I3.x
21 #define i10_im I3.y
22 #define i11_re I4.x
23 #define i11_im I4.y
24 #define i12_re I5.x
25 #define i12_im I5.y
26 #define i20_re I6.x
27 #define i20_im I6.y
28 #define i21_re I7.x
29 #define i21_im I7.y
30 #define i22_re I8.x
31 #define i22_im I8.y
32 #define i30_re I9.x
33 #define i30_im I9.y
34 #define i31_re I10.x
35 #define i31_im I10.y
36 #define i32_re I11.x
37 #define i32_im I11.y
38 #else
39 #define spinorFloat float
40 #define i00_re I0.x
41 #define i00_im I0.y
42 #define i01_re I0.z
43 #define i01_im I0.w
44 #define i02_re I1.x
45 #define i02_im I1.y
46 #define i10_re I1.z
47 #define i10_im I1.w
48 #define i11_re I2.x
49 #define i11_im I2.y
50 #define i12_re I2.z
51 #define i12_im I2.w
52 #define i20_re I3.x
53 #define i20_im I3.y
54 #define i21_re I3.z
55 #define i21_im I3.w
56 #define i22_re I4.x
57 #define i22_im I4.y
58 #define i30_re I4.z
59 #define i30_im I4.w
60 #define i31_re I5.x
61 #define i31_im I5.y
62 #define i32_re I5.z
63 #define i32_im I5.w
64 #endif // SPINOR_DOUBLE
65 
66 // gauge link
67 #ifdef GAUGE_FLOAT2
68 #define g00_re G0.x
69 #define g00_im G0.y
70 #define g01_re G1.x
71 #define g01_im G1.y
72 #define g02_re G2.x
73 #define g02_im G2.y
74 #define g10_re G3.x
75 #define g10_im G3.y
76 #define g11_re G4.x
77 #define g11_im G4.y
78 #define g12_re G5.x
79 #define g12_im G5.y
80 #define g20_re G6.x
81 #define g20_im G6.y
82 #define g21_re G7.x
83 #define g21_im G7.y
84 #define g22_re G8.x
85 #define g22_im G8.y
86 
87 #else
88 #define g00_re G0.x
89 #define g00_im G0.y
90 #define g01_re G0.z
91 #define g01_im G0.w
92 #define g02_re G1.x
93 #define g02_im G1.y
94 #define g10_re G1.z
95 #define g10_im G1.w
96 #define g11_re G2.x
97 #define g11_im G2.y
98 #define g12_re G2.z
99 #define g12_im G2.w
100 #define g20_re G3.x
101 #define g20_im G3.y
102 #define g21_re G3.z
103 #define g21_im G3.w
104 #define g22_re G4.x
105 #define g22_im G4.y
106 
107 #endif // GAUGE_DOUBLE
108 
109 // conjugated gauge link
110 #define gT00_re (+g00_re)
111 #define gT00_im (-g00_im)
112 #define gT01_re (+g10_re)
113 #define gT01_im (-g10_im)
114 #define gT02_re (+g20_re)
115 #define gT02_im (-g20_im)
116 #define gT10_re (+g01_re)
117 #define gT10_im (-g01_im)
118 #define gT11_re (+g11_re)
119 #define gT11_im (-g11_im)
120 #define gT12_re (+g21_re)
121 #define gT12_im (-g21_im)
122 #define gT20_re (+g02_re)
123 #define gT20_im (-g02_im)
124 #define gT21_re (+g12_re)
125 #define gT21_im (-g12_im)
126 #define gT22_re (+g22_re)
127 #define gT22_im (-g22_im)
128 
129 // output spinor
154 
155 #include "read_gauge.h"
156 #include "io_spinor.h"
157 
158 int x1, x2, x3, x4;
159 int X;
160 
161 #if (defined MULTI_GPU) && (DD_PREC==2) // half precision
162 int sp_norm_idx;
163 #endif // MULTI_GPU half precision
164 
165 int sid;
166 
167 sid = blockIdx.x*blockDim.x + threadIdx.x;
168 if (sid >= param.threads) return;
169 
170 #ifdef MULTI_GPU
171 int face_idx;
173 #endif
174 
175  // Inline by hand for the moment and assume even dimensions
176  //coordsFromIndex(X, x1, x2, x3, x4, sid, param.parity);
177 
178  X = 2*sid;
179  int aux1 = X / X1;
180  x1 = X - aux1 * X1;
181  int aux2 = aux1 / X2;
182  x2 = aux1 - aux2 * X2;
183  x4 = aux2 / X3;
184  x3 = aux2 - x4 * X3;
185  aux1 = (param.parity + x4 + x3 + x2) & 1;
186  x1 += aux1;
187  X += aux1;
188 
189  o00_re = 0; o00_im = 0;
190  o01_re = 0; o01_im = 0;
191  o02_re = 0; o02_im = 0;
192  o10_re = 0; o10_im = 0;
193  o11_re = 0; o11_im = 0;
194  o12_re = 0; o12_im = 0;
195  o20_re = 0; o20_im = 0;
196  o21_re = 0; o21_im = 0;
197  o22_re = 0; o22_im = 0;
198  o30_re = 0; o30_im = 0;
199  o31_re = 0; o31_im = 0;
200  o32_re = 0; o32_im = 0;
201 
202 #ifdef MULTI_GPU
203 } else { // exterior kernel
204 
205  const int dim = static_cast<int>(kernel_type);
206  const int face_volume = param.threads; // volume of one face
207  const int face_num = 1;
208 
209  face_idx = sid; // index into the respective face
210 
211  // ghostOffset is scaled to include body (includes stride) and number of FloatN arrays (SPINOR_HOP)
212  // face_idx not sid since faces are spin projected and share the same volume index (modulo UP/DOWN reading)
213  //sp_idx = face_idx + param.ghostOffset[dim];
214 
215 #if (DD_PREC==2) // half precision
216  sp_norm_idx = sid + param.ghostNormOffset[static_cast<int>(kernel_type)];
217 #endif
218 
219  const int dims[] = {X1, X2, X3, X4};
220  coordsFromFaceIndex<1>(X, sid, x1, x2, x3, x4, face_idx, face_volume, dim, face_num, param.parity, dims);
221 
222  o00_re = 0.; o00_im = 0.;
223  o01_re = 0.; o01_im = 0.;
224  o02_re = 0.; o02_im = 0.;
225  o10_re = 0.; o10_im = 0.;
226  o11_re = 0.; o11_im = 0.;
227  o12_re = 0.; o12_im = 0.;
228  o20_re = 0.; o20_im = 0.;
229  o21_re = 0.; o21_im = 0.;
230  o22_re = 0.; o22_im = 0.;
231  o30_re = 0.; o30_im = 0.;
232  o31_re = 0.; o31_im = 0.;
233  o32_re = 0.; o32_im = 0.;
234 }
235 #endif // MULTI_GPU
236 
237 
238 #ifdef MULTI_GPU
239 if ( (kernel_type == INTERIOR_KERNEL && (!param.ghostDim[2] || x3<X3m1)) ||
241 #endif
242 {
243  // Projector P2-
244  // 1 0 -i 0
245  // 0 1 0 i
246  // i 0 1 0
247  // 0 -i 0 1
248 
249 #ifdef MULTI_GPU
250  const int sp_idx = (kernel_type == INTERIOR_KERNEL) ? (x3==X3m1 ? X-X3X2X1mX2X1 : X+X2X1) >> 1 :
251  face_idx + param.ghostOffset[static_cast<int>(kernel_type)];
252 #else
253  const int sp_idx = (x3==X3m1 ? X-X3X2X1mX2X1 : X+X2X1) >> 1;
254 #endif
255 
256  const int ga_idx = sid;
257 
258  // read gauge matrix from device memory
259  READ_GAUGE_MATRIX(G, GAUGE0TEX, 4, ga_idx, ga_stride);
260 
261 
262 #ifdef MULTI_GPU
263  if (kernel_type == INTERIOR_KERNEL) {
264 #endif
265 
266  // read spinor from device memory
267  READ_SPINOR(SPINORTEX, param.sp_stride, sp_idx, sp_idx);
268 
269  // Do nothing useful with the spinors
270  // reconstruct gauge matrix
272 
273  // multiply row 0
274  o00_re += g00_re * i00_re;
275  o00_re -= g00_im * i00_im;
276  o00_re += g01_re * i01_re;
277  o00_re -= g01_im * i01_im;
278  o00_re += g02_re * i02_re;
279  o00_re -= g02_im * i02_im;
280  o00_im += g00_re * i00_im;
281  o00_im += g00_im * i00_re;
282  o00_im += g01_re * i01_im;
283  o00_im += g01_im * i01_re;
284  o00_im += g02_re * i02_im;
285  o00_im += g02_im * i02_re;
286  o10_re += g00_re * i10_re;
287  o10_re -= g00_im * i10_im;
288  o10_re += g01_re * i11_re;
289  o10_re -= g01_im * i11_im;
290  o10_re += g02_re * i12_re;
291  o10_re -= g02_im * i12_im;
292  o10_im += g00_re * i10_im;
293  o10_im += g00_im * i10_re;
294  o10_im += g01_re * i11_im;
295  o10_im += g01_im * i11_re;
296  o10_im += g02_re * i12_im;
297  o10_im += g02_im * i12_re;
298  o20_re += g00_re * i20_re;
299  o20_re -= g00_im * i20_im;
300  o20_re += g01_re * i21_re;
301  o20_re -= g01_im * i21_im;
302  o20_re += g02_re * i22_re;
303  o20_re -= g02_im * i22_im;
304  o20_im += g00_re * i20_im;
305  o20_im += g00_im * i20_re;
306  o20_im += g01_re * i21_im;
307  o20_im += g01_im * i21_re;
308  o20_im += g02_re * i22_im;
309  o20_im += g02_im * i22_re;
310  o30_re += g00_re * i30_re;
311  o30_re -= g00_im * i30_im;
312  o30_re += g01_re * i31_re;
313  o30_re -= g01_im * i31_im;
314  o30_re += g02_re * i32_re;
315  o30_re -= g02_im * i32_im;
316  o30_im += g00_re * i30_im;
317  o30_im += g00_im * i30_re;
318  o30_im += g01_re * i31_im;
319  o30_im += g01_im * i31_re;
320  o30_im += g02_re * i32_im;
321  o30_im += g02_im * i32_re;
322 
323  // multiply row 1
324  o01_re += g10_re * i00_re;
325  o01_re -= g10_im * i00_im;
326  o01_re += g11_re * i01_re;
327  o01_re -= g11_im * i01_im;
328  o01_re += g12_re * i02_re;
329  o01_re -= g12_im * i02_im;
330  o01_im += g10_re * i00_im;
331  o01_im += g10_im * i00_re;
332  o01_im += g11_re * i01_im;
333  o01_im += g11_im * i01_re;
334  o01_im += g12_re * i02_im;
335  o01_im += g12_im * i02_re;
336  o11_re += g10_re * i10_re;
337  o11_re -= g10_im * i10_im;
338  o11_re += g11_re * i11_re;
339  o11_re -= g11_im * i11_im;
340  o11_re += g12_re * i12_re;
341  o11_re -= g12_im * i12_im;
342  o11_im += g10_re * i10_im;
343  o11_im += g10_im * i10_re;
344  o11_im += g11_re * i11_im;
345  o11_im += g11_im * i11_re;
346  o11_im += g12_re * i12_im;
347  o11_im += g12_im * i12_re;
348  o21_re += g10_re * i20_re;
349  o21_re -= g10_im * i20_im;
350  o21_re += g11_re * i21_re;
351  o21_re -= g11_im * i21_im;
352  o21_re += g12_re * i22_re;
353  o21_re -= g12_im * i22_im;
354  o21_im += g10_re * i20_im;
355  o21_im += g10_im * i20_re;
356  o21_im += g11_re * i21_im;
357  o21_im += g11_im * i21_re;
358  o21_im += g12_re * i22_im;
359  o21_im += g12_im * i22_re;
360  o31_re += g10_re * i30_re;
361  o31_re -= g10_im * i30_im;
362  o31_re += g11_re * i31_re;
363  o31_re -= g11_im * i31_im;
364  o31_re += g12_re * i32_re;
365  o31_re -= g12_im * i32_im;
366  o31_im += g10_re * i30_im;
367  o31_im += g10_im * i30_re;
368  o31_im += g11_re * i31_im;
369  o31_im += g11_im * i31_re;
370  o31_im += g12_re * i32_im;
371  o31_im += g12_im * i32_re;
372 
373  // multiply row 2
374  o02_re += g20_re * i00_re;
375  o02_re -= g20_im * i00_im;
376  o02_re += g21_re * i01_re;
377  o02_re -= g21_im * i01_im;
378  o02_re += g22_re * i02_re;
379  o02_re -= g22_im * i02_im;
380  o02_im += g20_re * i00_im;
381  o02_im += g20_im * i00_re;
382  o02_im += g21_re * i01_im;
383  o02_im += g21_im * i01_re;
384  o02_im += g22_re * i02_im;
385  o02_im += g22_im * i02_re;
386  o12_re += g20_re * i10_re;
387  o12_re -= g20_im * i10_im;
388  o12_re += g21_re * i11_re;
389  o12_re -= g21_im * i11_im;
390  o12_re += g22_re * i12_re;
391  o12_re -= g22_im * i12_im;
392  o12_im += g20_re * i10_im;
393  o12_im += g20_im * i10_re;
394  o12_im += g21_re * i11_im;
395  o12_im += g21_im * i11_re;
396  o12_im += g22_re * i12_im;
397  o12_im += g22_im * i12_re;
398  o22_re += g20_re * i20_re;
399  o22_re -= g20_im * i20_im;
400  o22_re += g21_re * i21_re;
401  o22_re -= g21_im * i21_im;
402  o22_re += g22_re * i22_re;
403  o22_re -= g22_im * i22_im;
404  o22_im += g20_re * i20_im;
405  o22_im += g20_im * i20_re;
406  o22_im += g21_re * i21_im;
407  o22_im += g21_im * i21_re;
408  o22_im += g22_re * i22_im;
409  o22_im += g22_im * i22_re;
410  o32_re += g20_re * i30_re;
411  o32_re -= g20_im * i30_im;
412  o32_re += g21_re * i31_re;
413  o32_re -= g21_im * i31_im;
414  o32_re += g22_re * i32_re;
415  o32_re -= g22_im * i32_im;
416  o32_im += g20_re * i30_im;
417  o32_im += g20_im * i30_re;
418  o32_im += g21_re * i31_im;
419  o32_im += g21_im * i31_re;
420  o32_im += g22_re * i32_im;
421  o32_im += g22_im * i32_re;
422 
423 
424 #ifdef MULTI_GPU
425  //JARLLLL
426  } else {
427 
428  const int sp_stride_pad = ghostFace[static_cast<int>(kernel_type)];
429 
430  // read full spinor from device memory
431  READ_SPINOR(SPINORTEX, sp_stride_pad, sp_idx /*+ (SPINOR_HOP)*sp_stride_pad*/, sp_norm_idx);
432 
433  // reconstruct gauge matrix
435 
436  // multiply row 0
437  o00_re += g00_re * i00_re;
438  o00_re -= g00_im * i00_im;
439  o00_re += g01_re * i01_re;
440  o00_re -= g01_im * i01_im;
441  o00_re += g02_re * i02_re;
442  o00_re -= g02_im * i02_im;
443  o00_im += g00_re * i00_im;
444  o00_im += g00_im * i00_re;
445  o00_im += g01_re * i01_im;
446  o00_im += g01_im * i01_re;
447  o00_im += g02_re * i02_im;
448  o00_im += g02_im * i02_re;
449  o10_re += g00_re * i10_re;
450  o10_re -= g00_im * i10_im;
451  o10_re += g01_re * i11_re;
452  o10_re -= g01_im * i11_im;
453  o10_re += g02_re * i12_re;
454  o10_re -= g02_im * i12_im;
455  o10_im += g00_re * i10_im;
456  o10_im += g00_im * i10_re;
457  o10_im += g01_re * i11_im;
458  o10_im += g01_im * i11_re;
459  o10_im += g02_re * i12_im;
460  o10_im += g02_im * i12_re;
461  o20_re += g00_re * i20_re;
462  o20_re -= g00_im * i20_im;
463  o20_re += g01_re * i21_re;
464  o20_re -= g01_im * i21_im;
465  o20_re += g02_re * i22_re;
466  o20_re -= g02_im * i22_im;
467  o20_im += g00_re * i20_im;
468  o20_im += g00_im * i20_re;
469  o20_im += g01_re * i21_im;
470  o20_im += g01_im * i21_re;
471  o20_im += g02_re * i22_im;
472  o20_im += g02_im * i22_re;
473  o30_re += g00_re * i30_re;
474  o30_re -= g00_im * i30_im;
475  o30_re += g01_re * i31_re;
476  o30_re -= g01_im * i31_im;
477  o30_re += g02_re * i32_re;
478  o30_re -= g02_im * i32_im;
479  o30_im += g00_re * i30_im;
480  o30_im += g00_im * i30_re;
481  o30_im += g01_re * i31_im;
482  o30_im += g01_im * i31_re;
483  o30_im += g02_re * i32_im;
484  o30_im += g02_im * i32_re;
485 
486  // multiply row 1
487  o01_re += g10_re * i00_re;
488  o01_re -= g10_im * i00_im;
489  o01_re += g11_re * i01_re;
490  o01_re -= g11_im * i01_im;
491  o01_re += g12_re * i02_re;
492  o01_re -= g12_im * i02_im;
493  o01_im += g10_re * i00_im;
494  o01_im += g10_im * i00_re;
495  o01_im += g11_re * i01_im;
496  o01_im += g11_im * i01_re;
497  o01_im += g12_re * i02_im;
498  o01_im += g12_im * i02_re;
499  o11_re += g10_re * i10_re;
500  o11_re -= g10_im * i10_im;
501  o11_re += g11_re * i11_re;
502  o11_re -= g11_im * i11_im;
503  o11_re += g12_re * i12_re;
504  o11_re -= g12_im * i12_im;
505  o11_im += g10_re * i10_im;
506  o11_im += g10_im * i10_re;
507  o11_im += g11_re * i11_im;
508  o11_im += g11_im * i11_re;
509  o11_im += g12_re * i12_im;
510  o11_im += g12_im * i12_re;
511  o21_re += g10_re * i20_re;
512  o21_re -= g10_im * i20_im;
513  o21_re += g11_re * i21_re;
514  o21_re -= g11_im * i21_im;
515  o21_re += g12_re * i22_re;
516  o21_re -= g12_im * i22_im;
517  o21_im += g10_re * i20_im;
518  o21_im += g10_im * i20_re;
519  o21_im += g11_re * i21_im;
520  o21_im += g11_im * i21_re;
521  o21_im += g12_re * i22_im;
522  o21_im += g12_im * i22_re;
523  o31_re += g10_re * i30_re;
524  o31_re -= g10_im * i30_im;
525  o31_re += g11_re * i31_re;
526  o31_re -= g11_im * i31_im;
527  o31_re += g12_re * i32_re;
528  o31_re -= g12_im * i32_im;
529  o31_im += g10_re * i30_im;
530  o31_im += g10_im * i30_re;
531  o31_im += g11_re * i31_im;
532  o31_im += g11_im * i31_re;
533  o31_im += g12_re * i32_im;
534  o31_im += g12_im * i32_re;
535 
536  // multiply row 2
537  o02_re += g20_re * i00_re;
538  o02_re -= g20_im * i00_im;
539  o02_re += g21_re * i01_re;
540  o02_re -= g21_im * i01_im;
541  o02_re += g22_re * i02_re;
542  o02_re -= g22_im * i02_im;
543  o02_im += g20_re * i00_im;
544  o02_im += g20_im * i00_re;
545  o02_im += g21_re * i01_im;
546  o02_im += g21_im * i01_re;
547  o02_im += g22_re * i02_im;
548  o02_im += g22_im * i02_re;
549  o12_re += g20_re * i10_re;
550  o12_re -= g20_im * i10_im;
551  o12_re += g21_re * i11_re;
552  o12_re -= g21_im * i11_im;
553  o12_re += g22_re * i12_re;
554  o12_re -= g22_im * i12_im;
555  o12_im += g20_re * i10_im;
556  o12_im += g20_im * i10_re;
557  o12_im += g21_re * i11_im;
558  o12_im += g21_im * i11_re;
559  o12_im += g22_re * i12_im;
560  o12_im += g22_im * i12_re;
561  o22_re += g20_re * i20_re;
562  o22_re -= g20_im * i20_im;
563  o22_re += g21_re * i21_re;
564  o22_re -= g21_im * i21_im;
565  o22_re += g22_re * i22_re;
566  o22_re -= g22_im * i22_im;
567  o22_im += g20_re * i20_im;
568  o22_im += g20_im * i20_re;
569  o22_im += g21_re * i21_im;
570  o22_im += g21_im * i21_re;
571  o22_im += g22_re * i22_im;
572  o22_im += g22_im * i22_re;
573  o32_re += g20_re * i30_re;
574  o32_re -= g20_im * i30_im;
575  o32_re += g21_re * i31_re;
576  o32_re -= g21_im * i31_im;
577  o32_re += g22_re * i32_re;
578  o32_re -= g22_im * i32_im;
579  o32_im += g20_re * i30_im;
580  o32_im += g20_im * i30_re;
581  o32_im += g21_re * i31_im;
582  o32_im += g21_im * i31_re;
583  o32_im += g22_re * i32_im;
584  o32_im += g22_im * i32_re;
585 
586  }
587 #endif // MULTI_GPU
588 
589 }
590 
591 
592 
593 // write spinor field back to device memory
594 WRITE_SPINOR(param.sp_stride);
595 
596 // undefine to prevent warning when precision is changed
597 #undef spinorFloat
598 #undef SHARED_STRIDE
599 
600 #undef g00_re
601 #undef g00_im
602 #undef gT00_re
603 #undef gT00_im
604 #undef g01_re
605 #undef g01_im
606 #undef gT01_re
607 #undef gT01_im
608 #undef g02_re
609 #undef g02_im
610 #undef gT02_re
611 #undef gT02_im
612 #undef g10_re
613 #undef g10_im
614 #undef gT10_re
615 #undef gT10_im
616 #undef g11_re
617 #undef g11_im
618 #undef gT11_re
619 #undef gT11_im
620 #undef g12_re
621 #undef g12_im
622 #undef gT12_re
623 #undef gT12_im
624 #undef g20_re
625 #undef g20_im
626 #undef gT20_re
627 #undef gT20_im
628 #undef g21_re
629 #undef g21_im
630 #undef gT21_re
631 #undef gT21_im
632 #undef g22_re
633 #undef g22_im
634 #undef gT22_re
635 #undef gT22_im
636 
637 #undef i00_re
638 #undef i00_im
639 #undef i01_re
640 #undef i01_im
641 #undef i02_re
642 #undef i02_im
643 #undef i10_re
644 #undef i10_im
645 #undef i11_re
646 #undef i11_im
647 #undef i12_re
648 #undef i12_im
649 #undef i20_re
650 #undef i20_im
651 #undef i21_re
652 #undef i21_im
653 #undef i22_re
654 #undef i22_im
655 #undef i30_re
656 #undef i30_im
657 #undef i31_re
658 #undef i31_im
659 #undef i32_re
660 #undef i32_im
661 
662 
VOLATILE spinorFloat o12_im
#define i21_im
#define i21_re
int x1
VOLATILE spinorFloat o22_re
#define g20_im
#define i01_im
__constant__ int X2
VOLATILE spinorFloat o01_re
VOLATILE spinorFloat o12_re
VOLATILE spinorFloat o20_im
#define g12_im
int aux1
VOLATILE spinorFloat o30_re
const int ga_idx
#define i20_im
int x3
#define g22_im
#define i10_re
#define g01_im
VOLATILE spinorFloat o10_re
__constant__ int X3X2X1mX2X1
VOLATILE spinorFloat o30_im
__constant__ int X1
#define i30_im
#define i00_re
int sp_idx
#define i00_im
#define i11_im
#define g21_im
#define g00_re
#define i10_im
VOLATILE spinorFloat o11_im
#define g20_re
VOLATILE spinorFloat o20_re
#define i32_re
VOLATILE spinorFloat o02_im
int x2
VOLATILE spinorFloat o00_re
#define g21_re
#define g00_im
QudaGaugeParam param
Definition: pack_test.cpp:17
__constant__ int ghostFace[QUDA_MAX_DIM+1]
#define i31_im
RECONSTRUCT_GAUGE_MATRIX(4)
#define g10_im
#define i02_re
#define i32_im
#define GAUGE0TEX
Definition: covDev.h:112
VOLATILE spinorFloat o10_im
VOLATILE spinorFloat o31_re
int sid
#define i11_re
#define i22_im
#define g02_re
#define SPINORTEX
Definition: clover_def.h:40
VOLATILE spinorFloat o32_re
#define g11_im
#define i22_re
#define i20_re
#define VOLATILE
VOLATILE spinorFloat o32_im
VOLATILE spinorFloat o21_re
READ_GAUGE_MATRIX(G, GAUGE0TEX, 4, ga_idx, ga_stride)
#define i12_im
__constant__ int ga_stride
VOLATILE spinorFloat o11_re
#define i30_re
#define g11_re
int aux2
__constant__ int X3
#define g12_re
VOLATILE spinorFloat o00_im
#define spinorFloat
int X
#define i02_im
VOLATILE spinorFloat o02_re
VOLATILE spinorFloat o21_im
WRITE_SPINOR(param.sp_stride)
VOLATILE spinorFloat o22_im
#define i01_re
VOLATILE spinorFloat o01_im
#define i12_re
#define g02_im
#define i31_re
#define g22_re
VOLATILE spinorFloat o31_im
int x4
READ_SPINOR(SPINORTEX, param.sp_stride, sp_idx, sp_idx)
KernelType kernel_type
#define g01_re
__constant__ int X4
__constant__ int X3m1
#define g10_re
__constant__ int X2X1