QUDA  v0.5.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
dslash_util.h
Go to the documentation of this file.
1 #ifndef _DSLASH_UTIL_H
2 #define _DSLASH_UTIL_H
3 
4 #include <test_util.h>
5 
6 template <typename Float>
7 static inline void sum(Float *dst, Float *a, Float *b, int cnt) {
8  for (int i = 0; i < cnt; i++)
9  dst[i] = a[i] + b[i];
10 }
11 
12 template <typename Float>
13 static inline void sub(Float *dst, Float *a, Float *b, int cnt) {
14  for (int i = 0; i < cnt; i++)
15  dst[i] = a[i] - b[i];
16 }
17 
18 template <typename Float>
19 static inline void ax(Float *dst, Float a, Float *x, int cnt) {
20  for (int i = 0; i < cnt; i++)
21  dst[i] = a * x[i];
22 }
23 
24 // performs the operation y[i] = x[i] + a*y[i]
25 template <typename Float>
26 static inline void xpay(Float *x, Float a, Float *y, int len) {
27  for (int i=0; i<len; i++) y[i] = x[i] + a*y[i];
28 }
29 // performs the operation y[i] = a*x[i] - y[i]
30 template <typename Float>
31 static inline void axmy(Float *x, Float a, Float *y, int len) {
32  for (int i=0; i<len; i++) y[i] = a*x[i] - y[i];
33 }
34 
35 template <typename Float>
36 static double norm2(Float *v, int len) {
37  double sum=0.0;
38  for (int i=0; i<len; i++) sum += v[i]*v[i];
39  return sum;
40 }
41 
42 template <typename Float>
43 static inline void negx(Float *x, int len) {
44  for (int i=0; i<len; i++) x[i] = -x[i];
45 }
46 
47 template <typename sFloat, typename gFloat>
48 static inline void dot(sFloat* res, gFloat* a, sFloat* b) {
49  res[0] = res[1] = 0;
50  for (int m = 0; m < 3; m++) {
51  sFloat a_re = a[2*m+0];
52  sFloat a_im = a[2*m+1];
53  sFloat b_re = b[2*m+0];
54  sFloat b_im = b[2*m+1];
55  res[0] += a_re * b_re - a_im * b_im;
56  res[1] += a_re * b_im + a_im * b_re;
57  }
58 }
59 
60 template <typename Float>
61 static inline void su3Transpose(Float *res, Float *mat) {
62  for (int m = 0; m < 3; m++) {
63  for (int n = 0; n < 3; n++) {
64  res[m*(3*2) + n*(2) + 0] = + mat[n*(3*2) + m*(2) + 0];
65  res[m*(3*2) + n*(2) + 1] = - mat[n*(3*2) + m*(2) + 1];
66  }
67  }
68 }
69 
70 
71 template <typename sFloat, typename gFloat>
72 static inline void su3Mul(sFloat *res, gFloat *mat, sFloat *vec) {
73  for (int n = 0; n < 3; n++) dot(&res[n*(2)], &mat[n*(3*2)], vec);
74 }
75 
76 template <typename sFloat, typename gFloat>
77 static inline void su3Tmul(sFloat *res, gFloat *mat, sFloat *vec) {
78  gFloat matT[3*3*2];
79  su3Transpose(matT, mat);
80  su3Mul(res, matT, vec);
81 }
82 
83 
84 // i represents a "half index" into an even or odd "half lattice".
85 // when oddBit={0,1} the half lattice is {even,odd}.
86 //
87 // the displacements, such as dx, refer to the full lattice coordinates.
88 //
89 // neighborIndex() takes a "half index", displaces it, and returns the
90 // new "half index", which can be an index into either the even or odd lattices.
91 // displacements of magnitude one always interchange odd and even lattices.
92 //
93 
94 
95 template <typename Float>
96 static inline Float *gaugeLink(int i, int dir, int oddBit, Float **gaugeEven, Float **gaugeOdd, int nbr_distance) {
97  Float **gaugeField;
98  int j;
99  int d = nbr_distance;
100  if (dir % 2 == 0) {
101  j = i;
102  gaugeField = (oddBit ? gaugeOdd : gaugeEven);
103  }
104  else {
105  switch (dir) {
106  case 1: j = neighborIndex(i, oddBit, 0, 0, 0, -d); break;
107  case 3: j = neighborIndex(i, oddBit, 0, 0, -d, 0); break;
108  case 5: j = neighborIndex(i, oddBit, 0, -d, 0, 0); break;
109  case 7: j = neighborIndex(i, oddBit, -d, 0, 0, 0); break;
110  default: j = -1; break;
111  }
112  gaugeField = (oddBit ? gaugeEven : gaugeOdd);
113  }
114 
115  return &gaugeField[dir/2][j*(3*3*2)];
116 }
117 
118 template <typename Float>
119 static inline Float *spinorNeighbor(int i, int dir, int oddBit, Float *spinorField, int neighbor_distance)
120 {
121  int j;
122  int nb = neighbor_distance;
123  switch (dir) {
124  case 0: j = neighborIndex(i, oddBit, 0, 0, 0, +nb); break;
125  case 1: j = neighborIndex(i, oddBit, 0, 0, 0, -nb); break;
126  case 2: j = neighborIndex(i, oddBit, 0, 0, +nb, 0); break;
127  case 3: j = neighborIndex(i, oddBit, 0, 0, -nb, 0); break;
128  case 4: j = neighborIndex(i, oddBit, 0, +nb, 0, 0); break;
129  case 5: j = neighborIndex(i, oddBit, 0, -nb, 0, 0); break;
130  case 6: j = neighborIndex(i, oddBit, +nb, 0, 0, 0); break;
131  case 7: j = neighborIndex(i, oddBit, -nb, 0, 0, 0); break;
132  default: j = -1; break;
133  }
134 
135  return &spinorField[j*(mySpinorSiteSize)];
136 }
137 
138 
139 #ifdef MULTI_GPU
140 
141 static inline int
142 x4_mg(int i, int oddBit)
143 {
144  int Y = fullLatticeIndex(i, oddBit);
145  int x4 = Y/(Z[2]*Z[1]*Z[0]);
146  return x4;
147 }
148 
149 template <typename Float>
150 static inline Float *gaugeLink_mg4dir(int i, int dir, int oddBit, Float **gaugeEven, Float **gaugeOdd,
151  Float** ghostGaugeEven, Float** ghostGaugeOdd, int n_ghost_faces, int nbr_distance) {
152  Float **gaugeField;
153  int j;
154  int d = nbr_distance;
155  if (dir % 2 == 0) {
156  j = i;
157  gaugeField = (oddBit ? gaugeOdd : gaugeEven);
158  }
159  else {
160 
161  int Y = fullLatticeIndex(i, oddBit);
162  int x4 = Y/(Z[2]*Z[1]*Z[0]);
163  int x3 = (Y/(Z[1]*Z[0])) % Z[2];
164  int x2 = (Y/Z[0]) % Z[1];
165  int x1 = Y % Z[0];
166  int X1= Z[0];
167  int X2= Z[1];
168  int X3= Z[2];
169  int X4= Z[3];
170  Float* ghostGaugeField;
171 
172  switch (dir) {
173  case 1:
174  { //-X direction
175  int new_x1 = (x1 - d + X1 )% X1;
176  if (x1 -d < 0){
177  ghostGaugeField = (oddBit?ghostGaugeEven[0]: ghostGaugeOdd[0]);
178  int offset = (n_ghost_faces + x1 -d)*X4*X3*X2/2 + (x4*X3*X2 + x3*X2+x2)/2;
179  return &ghostGaugeField[offset*(3*3*2)];
180  }
181  j = (x4*X3*X2*X1 + x3*X2*X1 + x2*X1 + new_x1) / 2;
182  break;
183  }
184  case 3:
185  { //-Y direction
186  int new_x2 = (x2 - d + X2 )% X2;
187  if (x2 -d < 0){
188  ghostGaugeField = (oddBit?ghostGaugeEven[1]: ghostGaugeOdd[1]);
189  int offset = (n_ghost_faces + x2 -d)*X4*X3*X1/2 + (x4*X3*X1 + x3*X1+x1)/2;
190  return &ghostGaugeField[offset*(3*3*2)];
191  }
192  j = (x4*X3*X2*X1 + x3*X2*X1 + new_x2*X1 + x1) / 2;
193  break;
194 
195  }
196  case 5:
197  { //-Z direction
198  int new_x3 = (x3 - d + X3 )% X3;
199  if (x3 -d < 0){
200  ghostGaugeField = (oddBit?ghostGaugeEven[2]: ghostGaugeOdd[2]);
201  int offset = (n_ghost_faces + x3 -d)*X4*X2*X1/2 + (x4*X2*X1 + x2*X1+x1)/2;
202  return &ghostGaugeField[offset*(3*3*2)];
203  }
204  j = (x4*X3*X2*X1 + new_x3*X2*X1 + x2*X1 + x1) / 2;
205  break;
206  }
207  case 7:
208  { //-T direction
209  int new_x4 = (x4 - d + X4)% X4;
210  if (x4 -d < 0){
211  ghostGaugeField = (oddBit?ghostGaugeEven[3]: ghostGaugeOdd[3]);
212  int offset = (n_ghost_faces + x4 -d)*X1*X2*X3/2 + (x3*X2*X1 + x2*X1+x1)/2;
213  return &ghostGaugeField[offset*(3*3*2)];
214  }
215  j = (new_x4*(X3*X2*X1) + x3*(X2*X1) + x2*(X1) + x1) / 2;
216  break;
217  }//7
218 
219  default: j = -1; printf("ERROR: wrong dir \n"); exit(1);
220  }
221  gaugeField = (oddBit ? gaugeEven : gaugeOdd);
222 
223  }
224 
225  return &gaugeField[dir/2][j*(3*3*2)];
226 }
227 
228 template <typename Float>
229 static inline Float *spinorNeighbor_mg4dir(int i, int dir, int oddBit, Float *spinorField, Float** fwd_nbr_spinor,
230  Float** back_nbr_spinor, int neighbor_distance, int nFace)
231 {
232  int j;
233  int nb = neighbor_distance;
234  int Y = fullLatticeIndex(i, oddBit);
235  int x4 = Y/(Z[2]*Z[1]*Z[0]);
236  int x3 = (Y/(Z[1]*Z[0])) % Z[2];
237  int x2 = (Y/Z[0]) % Z[1];
238  int x1 = Y % Z[0];
239  int X1= Z[0];
240  int X2= Z[1];
241  int X3= Z[2];
242  int X4= Z[3];
243 
244  switch (dir) {
245  case 0://+X
246  {
247  int new_x1 = (x1 + nb)% X1;
248  if(x1+nb >=X1){
249  int offset = ( x1 + nb -X1)*X4*X3*X2/2+(x4*X3*X2 + x3*X2+x2)/2;
250  return fwd_nbr_spinor[0] + offset*mySpinorSiteSize;
251  }
252  j = (x4*X3*X2*X1 + x3*X2*X1 + x2*X1 + new_x1) / 2;
253  break;
254 
255  }
256  case 1://-X
257  {
258  int new_x1 = (x1 - nb + X1)% X1;
259  if(x1 - nb < 0){
260  int offset = ( x1+nFace- nb)*X4*X3*X2/2+(x4*X3*X2 + x3*X2+x2)/2;
261  return back_nbr_spinor[0] + offset*mySpinorSiteSize;
262  }
263  j = (x4*X3*X2*X1 + x3*X2*X1 + x2*X1 + new_x1) / 2;
264  break;
265  }
266  case 2://+Y
267  {
268  int new_x2 = (x2 + nb)% X2;
269  if(x2+nb >=X2){
270  int offset = ( x2 + nb -X2)*X4*X3*X1/2+(x4*X3*X1 + x3*X1+x1)/2;
271  return fwd_nbr_spinor[1] + offset*mySpinorSiteSize;
272  }
273  j = (x4*X3*X2*X1 + x3*X2*X1 + new_x2*X1 + x1) / 2;
274  break;
275  }
276  case 3:// -Y
277  {
278  int new_x2 = (x2 - nb + X2)% X2;
279  if(x2 - nb < 0){
280  int offset = ( x2 + nFace -nb)*X4*X3*X1/2+(x4*X3*X1 + x3*X1+x1)/2;
281  return back_nbr_spinor[1] + offset*mySpinorSiteSize;
282  }
283  j = (x4*X3*X2*X1 + x3*X2*X1 + new_x2*X1 + x1) / 2;
284  break;
285  }
286  case 4://+Z
287  {
288  int new_x3 = (x3 + nb)% X3;
289  if(x3+nb >=X3){
290  int offset = ( x3 + nb -X3)*X4*X2*X1/2+(x4*X2*X1 + x2*X1+x1)/2;
291  return fwd_nbr_spinor[2] + offset*mySpinorSiteSize;
292  }
293  j = (x4*X3*X2*X1 + new_x3*X2*X1 + x2*X1 + x1) / 2;
294  break;
295  }
296  case 5://-Z
297  {
298  int new_x3 = (x3 - nb + X3)% X3;
299  if(x3 - nb < 0){
300  int offset = ( x3 + nFace -nb)*X4*X2*X1/2+(x4*X2*X1 + x2*X1+x1)/2;
301  return back_nbr_spinor[2] + offset*mySpinorSiteSize;
302  }
303  j = (x4*X3*X2*X1 + new_x3*X2*X1 + x2*X1 + x1) / 2;
304  break;
305  }
306  case 6://+T
307  {
308  j = neighborIndex_mg(i, oddBit, +nb, 0, 0, 0);
309  int x4 = x4_mg(i, oddBit);
310  if ( (x4 + nb) >= Z[3]){
311  int offset = (x4+nb - Z[3])*Vsh_t;
312  return &fwd_nbr_spinor[3][(offset+j)*mySpinorSiteSize];
313  }
314  break;
315  }
316  case 7://-T
317  {
318  j = neighborIndex_mg(i, oddBit, -nb, 0, 0, 0);
319  int x4 = x4_mg(i, oddBit);
320  if ( (x4 - nb) < 0){
321  int offset = ( x4 - nb +nFace)*Vsh_t;
322  return &back_nbr_spinor[3][(offset+j)*mySpinorSiteSize];
323  }
324  break;
325  }
326  default: j = -1; printf("ERROR: wrong dir\n"); exit(1);
327  }
328 
329  return &spinorField[j*(mySpinorSiteSize)];
330 }
331 
332 #endif // MULTI_GPU
333 
334 #endif // _DSLASH_UTIL_H