QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
covDev_mu3_core.h
Go to the documentation of this file.
1 // *** CUDA DSLASH ***
2 
3 //#define DSLASH_SHARED_FLOATS_PER_THREAD 0
4 
5 
6 #if ((CUDA_VERSION >= 4010) && (__COMPUTE_CAPABILITY__ >= 200)) // NVVM compiler
7 #define VOLATILE
8 #else // Open64 compiler
9 #define VOLATILE volatile
10 #endif
11 // input spinor
12 #ifdef SPINOR_DOUBLE
13 #define spinorFloat double
14 #define WRITE_SPINOR_SHARED WRITE_SPINOR_SHARED_DOUBLE2
15 #define READ_SPINOR_SHARED READ_SPINOR_SHARED_DOUBLE2
16 #define i00_re I0.x
17 #define i00_im I0.y
18 #define i01_re I1.x
19 #define i01_im I1.y
20 #define i02_re I2.x
21 #define i02_im I2.y
22 #define i10_re I3.x
23 #define i10_im I3.y
24 #define i11_re I4.x
25 #define i11_im I4.y
26 #define i12_re I5.x
27 #define i12_im I5.y
28 #define i20_re I6.x
29 #define i20_im I6.y
30 #define i21_re I7.x
31 #define i21_im I7.y
32 #define i22_re I8.x
33 #define i22_im I8.y
34 #define i30_re I9.x
35 #define i30_im I9.y
36 #define i31_re I10.x
37 #define i31_im I10.y
38 #define i32_re I11.x
39 #define i32_im I11.y
40 #else
41 #define spinorFloat float
42 #define i00_re I0.x
43 #define i00_im I0.y
44 #define i01_re I0.z
45 #define i01_im I0.w
46 #define i02_re I1.x
47 #define i02_im I1.y
48 #define i10_re I1.z
49 #define i10_im I1.w
50 #define i11_re I2.x
51 #define i11_im I2.y
52 #define i12_re I2.z
53 #define i12_im I2.w
54 #define i20_re I3.x
55 #define i20_im I3.y
56 #define i21_re I3.z
57 #define i21_im I3.w
58 #define i22_re I4.x
59 #define i22_im I4.y
60 #define i30_re I4.z
61 #define i30_im I4.w
62 #define i31_re I5.x
63 #define i31_im I5.y
64 #define i32_re I5.z
65 #define i32_im I5.w
66 #endif // SPINOR_DOUBLE
67 
68 // gauge link
69 #ifdef GAUGE_FLOAT2
70 #define g00_re G0.x
71 #define g00_im G0.y
72 #define g01_re G1.x
73 #define g01_im G1.y
74 #define g02_re G2.x
75 #define g02_im G2.y
76 #define g10_re G3.x
77 #define g10_im G3.y
78 #define g11_re G4.x
79 #define g11_im G4.y
80 #define g12_re G5.x
81 #define g12_im G5.y
82 #define g20_re G6.x
83 #define g20_im G6.y
84 #define g21_re G7.x
85 #define g21_im G7.y
86 #define g22_re G8.x
87 #define g22_im G8.y
88 
89 #else
90 #define g00_re G0.x
91 #define g00_im G0.y
92 #define g01_re G0.z
93 #define g01_im G0.w
94 #define g02_re G1.x
95 #define g02_im G1.y
96 #define g10_re G1.z
97 #define g10_im G1.w
98 #define g11_re G2.x
99 #define g11_im G2.y
100 #define g12_re G2.z
101 #define g12_im G2.w
102 #define g20_re G3.x
103 #define g20_im G3.y
104 #define g21_re G3.z
105 #define g21_im G3.w
106 #define g22_re G4.x
107 #define g22_im G4.y
108 
109 #endif // GAUGE_DOUBLE
110 
111 // conjugated gauge link
112 #define gT00_re (+g00_re)
113 #define gT00_im (-g00_im)
114 #define gT01_re (+g10_re)
115 #define gT01_im (-g10_im)
116 #define gT02_re (+g20_re)
117 #define gT02_im (-g20_im)
118 #define gT10_re (+g01_re)
119 #define gT10_im (-g01_im)
120 #define gT11_re (+g11_re)
121 #define gT11_im (-g11_im)
122 #define gT12_re (+g21_re)
123 #define gT12_im (-g21_im)
124 #define gT20_re (+g02_re)
125 #define gT20_im (-g02_im)
126 #define gT21_re (+g12_re)
127 #define gT21_im (-g12_im)
128 #define gT22_re (+g22_re)
129 #define gT22_im (-g22_im)
130 
131 // output spinor
156 
157 #ifdef SPINOR_DOUBLE
158 #define SHARED_STRIDE 16 // to avoid bank conflicts on Fermi
159 #else
160 #define SHARED_STRIDE 32 // to avoid bank conflicts on Fermi
161 #endif
162 
163 #include "read_gauge.h"
164 #include "io_spinor.h"
165 
166 int x1, x2, x3, x4;
167 int X;
168 
169 #if (defined MULTI_GPU) && (DD_PREC==2) // half precision
170 int sp_norm_idx;
171 #endif // MULTI_GPU half precision
172 
173 int sid;
174 
175 sid = blockIdx.x*blockDim.x + threadIdx.x;
176 if (sid >= param.threads) return;
177 
178 #ifdef MULTI_GPU
179 int face_idx;
181 #endif
182 
183  // Inline by hand for the moment and assume even dimensions
184  //coordsFromIndex(X, x1, x2, x3, x4, sid, param.parity);
185 
186  X = 2*sid;
187  int aux1 = X / X1;
188  x1 = X - aux1 * X1;
189  int aux2 = aux1 / X2;
190  x2 = aux1 - aux2 * X2;
191  x4 = aux2 / X3;
192  x3 = aux2 - x4 * X3;
193  aux1 = (param.parity + x4 + x3 + x2) & 1;
194  x1 += aux1;
195  X += aux1;
196 
197  o00_re = 0; o00_im = 0;
198  o01_re = 0; o01_im = 0;
199  o02_re = 0; o02_im = 0;
200  o10_re = 0; o10_im = 0;
201  o11_re = 0; o11_im = 0;
202  o12_re = 0; o12_im = 0;
203  o20_re = 0; o20_im = 0;
204  o21_re = 0; o21_im = 0;
205  o22_re = 0; o22_im = 0;
206  o30_re = 0; o30_im = 0;
207  o31_re = 0; o31_im = 0;
208  o32_re = 0; o32_im = 0;
209 
210 #ifdef MULTI_GPU
211 } else { // exterior kernel
212 /*
213  const int dim = static_cast<int>(kernel_type);
214  const int face_volume = (param.threads >> 1); // volume of one face
215  const int face_num = (sid >= face_volume);
216 
217  face_idx = sid - face_num*face_volume; // index into the respective face
218 */
219  const int dim = static_cast<int>(kernel_type);
220  const int face_volume = param.threads; // volume of one face
221  const int face_num = 1; //Era 1
222 
223  face_idx = sid; // index into the respective face
224 
225  // ghostOffset is scaled to include body (includes stride) and number of FloatN arrays (SPINOR_HOP)
226  // face_idx not sid since faces are spin projected and share the same volume index (modulo UP/DOWN reading)
227  //sp_idx = face_idx + param.ghostOffset[dim];
228 
229 #if (DD_PREC==2) // half precision
230  sp_norm_idx = sid + param.ghostNormOffset[static_cast<int>(kernel_type)];
231 #endif
232 
233  const int dims[] = {X1, X2, X3, X4};
234  coordsFromFaceIndex<1>(X, sid, x1, x2, x3, x4, face_idx, face_volume, dim, face_num, param.parity, dims);
235 
236  o00_re = 0.; o00_im = 0.;
237  o01_re = 0.; o01_im = 0.;
238  o02_re = 0.; o02_im = 0.;
239  o10_re = 0.; o10_im = 0.;
240  o11_re = 0.; o11_im = 0.;
241  o12_re = 0.; o12_im = 0.;
242  o20_re = 0.; o20_im = 0.;
243  o21_re = 0.; o21_im = 0.;
244  o22_re = 0.; o22_im = 0.;
245  o30_re = 0.; o30_im = 0.;
246  o31_re = 0.; o31_im = 0.;
247  o32_re = 0.; o32_im = 0.;
248 }
249 #endif // MULTI_GPU
250 
251 
252 #ifdef MULTI_GPU
253 if ( (kernel_type == INTERIOR_KERNEL && (!param.ghostDim[3] || x4<X4m1)) ||
255 #endif
256 {
257  // Projector P3-
258  // 0 0 0 0
259  // 0 0 0 0
260  // 0 0 2 0
261  // 0 0 0 2
262 
263 #ifdef MULTI_GPU
264  const int sp_idx = (kernel_type == INTERIOR_KERNEL) ? (x4==X4m1 ? X-X4X3X2X1mX3X2X1 : X+X3X2X1) >> 1 :
265  face_idx + param.ghostOffset[static_cast<int>(kernel_type)];
266 #else
267  const int sp_idx = (x4==X4m1 ? X-X4X3X2X1mX3X2X1 : X+X3X2X1) >> 1;
268 #endif
269 
270  const int ga_idx = sid;
271 
272  {
273  // read gauge matrix from device memory
274  READ_GAUGE_MATRIX(G, GAUGE0TEX, 6, ga_idx, ga_stride);
275 
276 
277 #ifdef MULTI_GPU
278  if (kernel_type == INTERIOR_KERNEL) {
279 #endif
280 
281  // read spinor from device memory
282  READ_SPINOR(SPINORTEX, param.sp_stride, sp_idx, sp_idx);
283 
284  // Do nothing useful with the spinors
285  // reconstruct gauge matrix
287 
288  // multiply row 0
289  o00_re += g00_re * i00_re;
290  o00_re -= g00_im * i00_im;
291  o00_re += g01_re * i01_re;
292  o00_re -= g01_im * i01_im;
293  o00_re += g02_re * i02_re;
294  o00_re -= g02_im * i02_im;
295  o00_im += g00_re * i00_im;
296  o00_im += g00_im * i00_re;
297  o00_im += g01_re * i01_im;
298  o00_im += g01_im * i01_re;
299  o00_im += g02_re * i02_im;
300  o00_im += g02_im * i02_re;
301  o10_re += g00_re * i10_re;
302  o10_re -= g00_im * i10_im;
303  o10_re += g01_re * i11_re;
304  o10_re -= g01_im * i11_im;
305  o10_re += g02_re * i12_re;
306  o10_re -= g02_im * i12_im;
307  o10_im += g00_re * i10_im;
308  o10_im += g00_im * i10_re;
309  o10_im += g01_re * i11_im;
310  o10_im += g01_im * i11_re;
311  o10_im += g02_re * i12_im;
312  o10_im += g02_im * i12_re;
313  o20_re += g00_re * i20_re;
314  o20_re -= g00_im * i20_im;
315  o20_re += g01_re * i21_re;
316  o20_re -= g01_im * i21_im;
317  o20_re += g02_re * i22_re;
318  o20_re -= g02_im * i22_im;
319  o20_im += g00_re * i20_im;
320  o20_im += g00_im * i20_re;
321  o20_im += g01_re * i21_im;
322  o20_im += g01_im * i21_re;
323  o20_im += g02_re * i22_im;
324  o20_im += g02_im * i22_re;
325  o30_re += g00_re * i30_re;
326  o30_re -= g00_im * i30_im;
327  o30_re += g01_re * i31_re;
328  o30_re -= g01_im * i31_im;
329  o30_re += g02_re * i32_re;
330  o30_re -= g02_im * i32_im;
331  o30_im += g00_re * i30_im;
332  o30_im += g00_im * i30_re;
333  o30_im += g01_re * i31_im;
334  o30_im += g01_im * i31_re;
335  o30_im += g02_re * i32_im;
336  o30_im += g02_im * i32_re;
337 
338  // multiply row 1
339  o01_re += g10_re * i00_re;
340  o01_re -= g10_im * i00_im;
341  o01_re += g11_re * i01_re;
342  o01_re -= g11_im * i01_im;
343  o01_re += g12_re * i02_re;
344  o01_re -= g12_im * i02_im;
345  o01_im += g10_re * i00_im;
346  o01_im += g10_im * i00_re;
347  o01_im += g11_re * i01_im;
348  o01_im += g11_im * i01_re;
349  o01_im += g12_re * i02_im;
350  o01_im += g12_im * i02_re;
351  o11_re += g10_re * i10_re;
352  o11_re -= g10_im * i10_im;
353  o11_re += g11_re * i11_re;
354  o11_re -= g11_im * i11_im;
355  o11_re += g12_re * i12_re;
356  o11_re -= g12_im * i12_im;
357  o11_im += g10_re * i10_im;
358  o11_im += g10_im * i10_re;
359  o11_im += g11_re * i11_im;
360  o11_im += g11_im * i11_re;
361  o11_im += g12_re * i12_im;
362  o11_im += g12_im * i12_re;
363  o21_re += g10_re * i20_re;
364  o21_re -= g10_im * i20_im;
365  o21_re += g11_re * i21_re;
366  o21_re -= g11_im * i21_im;
367  o21_re += g12_re * i22_re;
368  o21_re -= g12_im * i22_im;
369  o21_im += g10_re * i20_im;
370  o21_im += g10_im * i20_re;
371  o21_im += g11_re * i21_im;
372  o21_im += g11_im * i21_re;
373  o21_im += g12_re * i22_im;
374  o21_im += g12_im * i22_re;
375  o31_re += g10_re * i30_re;
376  o31_re -= g10_im * i30_im;
377  o31_re += g11_re * i31_re;
378  o31_re -= g11_im * i31_im;
379  o31_re += g12_re * i32_re;
380  o31_re -= g12_im * i32_im;
381  o31_im += g10_re * i30_im;
382  o31_im += g10_im * i30_re;
383  o31_im += g11_re * i31_im;
384  o31_im += g11_im * i31_re;
385  o31_im += g12_re * i32_im;
386  o31_im += g12_im * i32_re;
387 
388  // multiply row 2
389  o02_re += g20_re * i00_re;
390  o02_re -= g20_im * i00_im;
391  o02_re += g21_re * i01_re;
392  o02_re -= g21_im * i01_im;
393  o02_re += g22_re * i02_re;
394  o02_re -= g22_im * i02_im;
395  o02_im += g20_re * i00_im;
396  o02_im += g20_im * i00_re;
397  o02_im += g21_re * i01_im;
398  o02_im += g21_im * i01_re;
399  o02_im += g22_re * i02_im;
400  o02_im += g22_im * i02_re;
401  o12_re += g20_re * i10_re;
402  o12_re -= g20_im * i10_im;
403  o12_re += g21_re * i11_re;
404  o12_re -= g21_im * i11_im;
405  o12_re += g22_re * i12_re;
406  o12_re -= g22_im * i12_im;
407  o12_im += g20_re * i10_im;
408  o12_im += g20_im * i10_re;
409  o12_im += g21_re * i11_im;
410  o12_im += g21_im * i11_re;
411  o12_im += g22_re * i12_im;
412  o12_im += g22_im * i12_re;
413  o22_re += g20_re * i20_re;
414  o22_re -= g20_im * i20_im;
415  o22_re += g21_re * i21_re;
416  o22_re -= g21_im * i21_im;
417  o22_re += g22_re * i22_re;
418  o22_re -= g22_im * i22_im;
419  o22_im += g20_re * i20_im;
420  o22_im += g20_im * i20_re;
421  o22_im += g21_re * i21_im;
422  o22_im += g21_im * i21_re;
423  o22_im += g22_re * i22_im;
424  o22_im += g22_im * i22_re;
425  o32_re += g20_re * i30_re;
426  o32_re -= g20_im * i30_im;
427  o32_re += g21_re * i31_re;
428  o32_re -= g21_im * i31_im;
429  o32_re += g22_re * i32_re;
430  o32_re -= g22_im * i32_im;
431  o32_im += g20_re * i30_im;
432  o32_im += g20_im * i30_re;
433  o32_im += g21_re * i31_im;
434  o32_im += g21_im * i31_re;
435  o32_im += g22_re * i32_im;
436  o32_im += g22_im * i32_re;
437 
438 
439 #ifdef MULTI_GPU
440  //JARLLLL
441  } else {
442 
443  const int sp_stride_pad = ghostFace[static_cast<int>(kernel_type)];
444 
445  // read full spinor from device memory
446  READ_SPINOR(SPINORTEX, sp_stride_pad, sp_idx /*+ (SPINOR_HOP)*sp_stride_pad*/, sp_norm_idx);
447 
448  // reconstruct gauge matrix
450 
451  // multiply row 0
452  o00_re += g00_re * i00_re;
453  o00_re -= g00_im * i00_im;
454  o00_re += g01_re * i01_re;
455  o00_re -= g01_im * i01_im;
456  o00_re += g02_re * i02_re;
457  o00_re -= g02_im * i02_im;
458  o00_im += g00_re * i00_im;
459  o00_im += g00_im * i00_re;
460  o00_im += g01_re * i01_im;
461  o00_im += g01_im * i01_re;
462  o00_im += g02_re * i02_im;
463  o00_im += g02_im * i02_re;
464  o10_re += g00_re * i10_re;
465  o10_re -= g00_im * i10_im;
466  o10_re += g01_re * i11_re;
467  o10_re -= g01_im * i11_im;
468  o10_re += g02_re * i12_re;
469  o10_re -= g02_im * i12_im;
470  o10_im += g00_re * i10_im;
471  o10_im += g00_im * i10_re;
472  o10_im += g01_re * i11_im;
473  o10_im += g01_im * i11_re;
474  o10_im += g02_re * i12_im;
475  o10_im += g02_im * i12_re;
476  o20_re += g00_re * i20_re;
477  o20_re -= g00_im * i20_im;
478  o20_re += g01_re * i21_re;
479  o20_re -= g01_im * i21_im;
480  o20_re += g02_re * i22_re;
481  o20_re -= g02_im * i22_im;
482  o20_im += g00_re * i20_im;
483  o20_im += g00_im * i20_re;
484  o20_im += g01_re * i21_im;
485  o20_im += g01_im * i21_re;
486  o20_im += g02_re * i22_im;
487  o20_im += g02_im * i22_re;
488  o30_re += g00_re * i30_re;
489  o30_re -= g00_im * i30_im;
490  o30_re += g01_re * i31_re;
491  o30_re -= g01_im * i31_im;
492  o30_re += g02_re * i32_re;
493  o30_re -= g02_im * i32_im;
494  o30_im += g00_re * i30_im;
495  o30_im += g00_im * i30_re;
496  o30_im += g01_re * i31_im;
497  o30_im += g01_im * i31_re;
498  o30_im += g02_re * i32_im;
499  o30_im += g02_im * i32_re;
500 
501  // multiply row 1
502  o01_re += g10_re * i00_re;
503  o01_re -= g10_im * i00_im;
504  o01_re += g11_re * i01_re;
505  o01_re -= g11_im * i01_im;
506  o01_re += g12_re * i02_re;
507  o01_re -= g12_im * i02_im;
508  o01_im += g10_re * i00_im;
509  o01_im += g10_im * i00_re;
510  o01_im += g11_re * i01_im;
511  o01_im += g11_im * i01_re;
512  o01_im += g12_re * i02_im;
513  o01_im += g12_im * i02_re;
514  o11_re += g10_re * i10_re;
515  o11_re -= g10_im * i10_im;
516  o11_re += g11_re * i11_re;
517  o11_re -= g11_im * i11_im;
518  o11_re += g12_re * i12_re;
519  o11_re -= g12_im * i12_im;
520  o11_im += g10_re * i10_im;
521  o11_im += g10_im * i10_re;
522  o11_im += g11_re * i11_im;
523  o11_im += g11_im * i11_re;
524  o11_im += g12_re * i12_im;
525  o11_im += g12_im * i12_re;
526  o21_re += g10_re * i20_re;
527  o21_re -= g10_im * i20_im;
528  o21_re += g11_re * i21_re;
529  o21_re -= g11_im * i21_im;
530  o21_re += g12_re * i22_re;
531  o21_re -= g12_im * i22_im;
532  o21_im += g10_re * i20_im;
533  o21_im += g10_im * i20_re;
534  o21_im += g11_re * i21_im;
535  o21_im += g11_im * i21_re;
536  o21_im += g12_re * i22_im;
537  o21_im += g12_im * i22_re;
538  o31_re += g10_re * i30_re;
539  o31_re -= g10_im * i30_im;
540  o31_re += g11_re * i31_re;
541  o31_re -= g11_im * i31_im;
542  o31_re += g12_re * i32_re;
543  o31_re -= g12_im * i32_im;
544  o31_im += g10_re * i30_im;
545  o31_im += g10_im * i30_re;
546  o31_im += g11_re * i31_im;
547  o31_im += g11_im * i31_re;
548  o31_im += g12_re * i32_im;
549  o31_im += g12_im * i32_re;
550 
551  // multiply row 2
552  o02_re += g20_re * i00_re;
553  o02_re -= g20_im * i00_im;
554  o02_re += g21_re * i01_re;
555  o02_re -= g21_im * i01_im;
556  o02_re += g22_re * i02_re;
557  o02_re -= g22_im * i02_im;
558  o02_im += g20_re * i00_im;
559  o02_im += g20_im * i00_re;
560  o02_im += g21_re * i01_im;
561  o02_im += g21_im * i01_re;
562  o02_im += g22_re * i02_im;
563  o02_im += g22_im * i02_re;
564  o12_re += g20_re * i10_re;
565  o12_re -= g20_im * i10_im;
566  o12_re += g21_re * i11_re;
567  o12_re -= g21_im * i11_im;
568  o12_re += g22_re * i12_re;
569  o12_re -= g22_im * i12_im;
570  o12_im += g20_re * i10_im;
571  o12_im += g20_im * i10_re;
572  o12_im += g21_re * i11_im;
573  o12_im += g21_im * i11_re;
574  o12_im += g22_re * i12_im;
575  o12_im += g22_im * i12_re;
576  o22_re += g20_re * i20_re;
577  o22_re -= g20_im * i20_im;
578  o22_re += g21_re * i21_re;
579  o22_re -= g21_im * i21_im;
580  o22_re += g22_re * i22_re;
581  o22_re -= g22_im * i22_im;
582  o22_im += g20_re * i20_im;
583  o22_im += g20_im * i20_re;
584  o22_im += g21_re * i21_im;
585  o22_im += g21_im * i21_re;
586  o22_im += g22_re * i22_im;
587  o22_im += g22_im * i22_re;
588  o32_re += g20_re * i30_re;
589  o32_re -= g20_im * i30_im;
590  o32_re += g21_re * i31_re;
591  o32_re -= g21_im * i31_im;
592  o32_re += g22_re * i32_re;
593  o32_re -= g22_im * i32_im;
594  o32_im += g20_re * i30_im;
595  o32_im += g20_im * i30_re;
596  o32_im += g21_re * i31_im;
597  o32_im += g21_im * i31_re;
598  o32_im += g22_re * i32_im;
599  o32_im += g22_im * i32_re;
600 
601  }
602 #endif // MULTI_GPU
603 
604  }
605 }
606 
607 
608 
609 // write spinor field back to device memory
610 WRITE_SPINOR(param.sp_stride);
611 
612 // undefine to prevent warning when precision is changed
613 #undef spinorFloat
614 #undef SHARED_STRIDE
615 
616 #undef g00_re
617 #undef g00_im
618 #undef gT00_re
619 #undef gT00_im
620 #undef g01_re
621 #undef g01_im
622 #undef gT01_re
623 #undef gT01_im
624 #undef g02_re
625 #undef g02_im
626 #undef gT02_re
627 #undef gT02_im
628 #undef g10_re
629 #undef g10_im
630 #undef gT10_re
631 #undef gT10_im
632 #undef g11_re
633 #undef g11_im
634 #undef gT11_re
635 #undef gT11_im
636 #undef g12_re
637 #undef g12_im
638 #undef gT12_re
639 #undef gT12_im
640 #undef g20_re
641 #undef g20_im
642 #undef gT20_re
643 #undef gT20_im
644 #undef g21_re
645 #undef g21_im
646 #undef gT21_re
647 #undef gT21_im
648 #undef g22_re
649 #undef g22_im
650 #undef gT22_re
651 #undef gT22_im
652 
653 #undef i00_re
654 #undef i00_im
655 #undef i01_re
656 #undef i01_im
657 #undef i02_re
658 #undef i02_im
659 #undef i10_re
660 #undef i10_im
661 #undef i11_re
662 #undef i11_im
663 #undef i12_re
664 #undef i12_im
665 #undef i20_re
666 #undef i20_im
667 #undef i21_re
668 #undef i21_im
669 #undef i22_re
670 #undef i22_im
671 #undef i30_re
672 #undef i30_im
673 #undef i31_re
674 #undef i31_im
675 #undef i32_re
676 #undef i32_im
677 
678 
#define i11_re
#define i32_im
#define g02_re
VOLATILE spinorFloat o32_re
VOLATILE spinorFloat o10_im
#define g11_im
__constant__ int X2
#define VOLATILE
VOLATILE spinorFloat o31_re
VOLATILE spinorFloat o32_im
#define i20_re
int sid
#define i22_im
VOLATILE spinorFloat o11_re
__constant__ int X1
#define i22_re
VOLATILE spinorFloat o21_re
int sp_idx
#define g11_re
#define i12_im
#define i30_re
__constant__ int X3X2X1
#define spinorFloat
int X
VOLATILE spinorFloat o21_im
int aux2
#define i02_im
#define g12_re
VOLATILE spinorFloat o00_im
VOLATILE spinorFloat o22_im
VOLATILE spinorFloat o01_im
#define g02_im
QudaGaugeParam param
Definition: pack_test.cpp:17
VOLATILE spinorFloat o02_re
__constant__ int ghostFace[QUDA_MAX_DIM+1]
int x4
READ_SPINOR(SPINORTEX, param.sp_stride, sp_idx, sp_idx)
#define g01_re
WRITE_SPINOR(param.sp_stride)
#define i01_re
#define i12_re
#define i31_re
VOLATILE spinorFloat o12_im
#define g22_re
#define GAUGE0TEX
Definition: covDev.h:112
VOLATILE spinorFloat o31_im
#define g20_im
VOLATILE spinorFloat o01_re
#define g10_re
RECONSTRUCT_GAUGE_MATRIX(6)
#define i21_im
VOLATILE spinorFloat o30_re
int aux1
const int ga_idx
#define i21_re
#define i10_re
#define SPINORTEX
Definition: clover_def.h:40
int x1
VOLATILE spinorFloat o22_re
#define i01_im
__constant__ int X4X3X2X1mX3X2X1
#define i00_re
VOLATILE spinorFloat o12_re
VOLATILE spinorFloat o20_im
#define g12_im
#define i20_im
#define i00_im
#define i11_im
__constant__ int ga_stride
int x3
#define g22_im
#define g21_im
#define g01_im
#define g00_re
VOLATILE spinorFloat o10_re
VOLATILE spinorFloat o30_im
#define g20_re
#define i30_im
#define i32_re
__constant__ int X3
int x2
#define i10_im
#define g21_re
VOLATILE spinorFloat o11_im
#define g00_im
VOLATILE spinorFloat o20_re
#define READ_GAUGE_MATRIX
Definition: covDev.h:44
__constant__ int X4m1
VOLATILE spinorFloat o02_im
VOLATILE spinorFloat o00_re
#define i02_re
#define i31_im
KernelType kernel_type
#define g10_im
__constant__ int X4