QUDA  v1.1.0
A library for QCD on GPUs
dslash_reference.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include <host_utils.h>
4 #include <comm_quda.h>
5 
6 template <typename Float>
7 static inline void sum(Float *dst, Float *a, Float *b, int cnt) {
8  for (int i = 0; i < cnt; i++)
9  dst[i] = a[i] + b[i];
10 }
11 
12 template <typename Float>
13 static inline void sub(Float *dst, Float *a, Float *b, int cnt) {
14  for (int i = 0; i < cnt; i++)
15  dst[i] = a[i] - b[i];
16 }
17 
18 template <typename Float>
19 static inline void ax(Float *dst, Float a, Float *x, int cnt) {
20  for (int i = 0; i < cnt; i++)
21  dst[i] = a * x[i];
22 }
23 
24 // performs the operation y[i] = a*x[i] + y[i]
25 template <typename Float>
26 static inline void axpy(Float a, Float *x, Float *y, int len) {
27  for (int i=0; i<len; i++) y[i] = a*x[i] + y[i];
28 }
29 
30 // performs the operation y[i] = a*x[i] + b*y[i]
31 template <typename Float>
32 static inline void axpby(Float a, Float *x, Float b, Float *y, int len) {
33  for (int i=0; i<len; i++) y[i] = a*x[i] + b*y[i];
34 }
35 
36 // performs the operation y[i] = a*x[i] - y[i]
37 template <typename Float>
38 static inline void axmy(Float *x, Float a, Float *y, int len) {
39  for (int i=0; i<len; i++) y[i] = a*x[i] - y[i];
40 }
41 
42 template <typename Float>
43 static double norm2(Float *v, int len) {
44  double sum=0.0;
45  for (int i=0; i<len; i++) sum += v[i]*v[i];
46  return sum;
47 }
48 
49 template <typename Float>
50 static inline void negx(Float *x, int len) {
51  for (int i=0; i<len; i++) x[i] = -x[i];
52 }
53 
54 template <typename sFloat, typename gFloat>
55 static inline void dot(sFloat* res, gFloat* a, sFloat* b) {
56  res[0] = res[1] = 0;
57  for (int m = 0; m < 3; m++) {
58  sFloat a_re = a[2*m+0];
59  sFloat a_im = a[2*m+1];
60  sFloat b_re = b[2*m+0];
61  sFloat b_im = b[2*m+1];
62  res[0] += a_re * b_re - a_im * b_im;
63  res[1] += a_re * b_im + a_im * b_re;
64  }
65 }
66 
67 template <typename Float>
68 static inline void su3Transpose(Float *res, Float *mat) {
69  for (int m = 0; m < 3; m++) {
70  for (int n = 0; n < 3; n++) {
71  res[m*(3*2) + n*(2) + 0] = + mat[n*(3*2) + m*(2) + 0];
72  res[m*(3*2) + n*(2) + 1] = - mat[n*(3*2) + m*(2) + 1];
73  }
74  }
75 }
76 
77 
78 template <typename sFloat, typename gFloat>
79 static inline void su3Mul(sFloat *res, gFloat *mat, sFloat *vec) {
80  for (int n = 0; n < 3; n++) dot(&res[n*(2)], &mat[n*(3*2)], vec);
81 }
82 
83 template <typename sFloat, typename gFloat>
84 static inline void su3Tmul(sFloat *res, gFloat *mat, sFloat *vec) {
85  gFloat matT[3*3*2];
86  su3Transpose(matT, mat);
87  su3Mul(res, matT, vec);
88 }
89 void verifyInversion(void *spinorOut, void *spinorIn, void *spinorCheck, QudaGaugeParam &gauge_param,
90  QudaInvertParam &inv_param, void **gauge, void *clover, void *clover_inv);
91 
92 void verifyInversion(void *spinorOut, void **spinorOutMulti, void *spinorIn, void *spinorCheck,
93  QudaGaugeParam &gauge_param, QudaInvertParam &inv_param, void **gauge, void *clover,
94  void *clover_inv);
95 
96 void verifyDomainWallTypeInversion(void *spinorOut, void **spinorOutMulti, void *spinorIn, void *spinorCheck,
97  QudaGaugeParam &gauge_param, QudaInvertParam &inv_param, void **gauge, void *clover,
98  void *clover_inv);
99 
100 void verifyWilsonTypeInversion(void *spinorOut, void **spinorOutMulti, void *spinorIn, void *spinorCheck,
101  QudaGaugeParam &gauge_param, QudaInvertParam &inv_param, void **gauge, void *clover,
102  void *clover_inv);
103 
105  quda::ColorSpinorField *out, double mass, void *qdp_fatlink[], void *qdp_longlink[],
106  void **ghost_fatlink, void **ghost_longlink, QudaGaugeParam &gauge_param,
107  QudaInvertParam &inv_param, int shift);
108 
109 // i represents a "half index" into an even or odd "half lattice".
110 // when oddBit={0,1} the half lattice is {even,odd}.
111 //
112 // the displacements, such as dx, refer to the full lattice coordinates.
113 //
114 // neighborIndex() takes a "half index", displaces it, and returns the
115 // new "half index", which can be an index into either the even or odd lattices.
116 // displacements of magnitude one always interchange odd and even lattices.
117 //
118 
119 
120 template <typename Float>
121 static inline Float *gaugeLink(int i, int dir, int oddBit, Float **gaugeEven, Float **gaugeOdd, int nbr_distance) {
122  Float **gaugeField;
123  int j;
124  int d = nbr_distance;
125  if (dir % 2 == 0) {
126  j = i;
127  gaugeField = (oddBit ? gaugeOdd : gaugeEven);
128  }
129  else {
130  switch (dir) {
131  case 1: j = neighborIndex(i, oddBit, 0, 0, 0, -d); break;
132  case 3: j = neighborIndex(i, oddBit, 0, 0, -d, 0); break;
133  case 5: j = neighborIndex(i, oddBit, 0, -d, 0, 0); break;
134  case 7: j = neighborIndex(i, oddBit, -d, 0, 0, 0); break;
135  default: j = -1; break;
136  }
137  gaugeField = (oddBit ? gaugeEven : gaugeOdd);
138  }
139 
140  return &gaugeField[dir/2][j*(3*3*2)];
141 }
142 
143 template <typename Float>
144 static inline Float *spinorNeighbor(int i, int dir, int oddBit, Float *spinorField, int neighbor_distance,
145  int site_size = 24)
146 {
147  int j;
148  int nb = neighbor_distance;
149  switch (dir) {
150  case 0: j = neighborIndex(i, oddBit, 0, 0, 0, +nb); break;
151  case 1: j = neighborIndex(i, oddBit, 0, 0, 0, -nb); break;
152  case 2: j = neighborIndex(i, oddBit, 0, 0, +nb, 0); break;
153  case 3: j = neighborIndex(i, oddBit, 0, 0, -nb, 0); break;
154  case 4: j = neighborIndex(i, oddBit, 0, +nb, 0, 0); break;
155  case 5: j = neighborIndex(i, oddBit, 0, -nb, 0, 0); break;
156  case 6: j = neighborIndex(i, oddBit, +nb, 0, 0, 0); break;
157  case 7: j = neighborIndex(i, oddBit, -nb, 0, 0, 0); break;
158  default: j = -1; break;
159  }
160 
161  return &spinorField[j * site_size];
162 }
163 
164 
165 // i represents a "half index" into an even or odd "half lattice".
166 // when oddBit={0,1} the half lattice is {even,odd}.
167 //
168 // the displacements, such as dx, refer to the full lattice coordinates.
169 //
170 // neighborIndex() takes a "half index", displaces it, and returns the
171 // new "half index", which can be an index into either the even or odd lattices.
172 // displacements of magnitude one always interchange odd and even lattices.
173 //
174 //
175 template <QudaPCType type> int neighborIndex_5d(int i, int oddBit, int dxs, int dx4, int dx3, int dx2, int dx1)
176 {
177  // fullLatticeIndex was modified for fullLatticeIndex_4d. It is in util_quda.cpp.
178  // This code bit may not properly perform 5dPC.
179  int X = type == QUDA_5D_PC ? fullLatticeIndex_5d(i, oddBit) : fullLatticeIndex_5d_4dpc(i, oddBit);
180  // Checked that this matches code in dslash_core_ante.h.
181  int xs = X/(Z[3]*Z[2]*Z[1]*Z[0]);
182  int x4 = (X/(Z[2]*Z[1]*Z[0])) % Z[3];
183  int x3 = (X/(Z[1]*Z[0])) % Z[2];
184  int x2 = (X/Z[0]) % Z[1];
185  int x1 = X % Z[0];
186  // Displace and project back into domain 0,...,Ls-1.
187  // Note that we add Ls to avoid the negative problem
188  // of the C % operator.
189  xs = (xs+dxs+Ls) % Ls;
190  // Etc.
191  x4 = (x4+dx4+Z[3]) % Z[3];
192  x3 = (x3+dx3+Z[2]) % Z[2];
193  x2 = (x2+dx2+Z[1]) % Z[1];
194  x1 = (x1+dx1+Z[0]) % Z[0];
195  // Return linear half index. Remember that integer division
196  // rounds down.
197  return (xs*(Z[3]*Z[2]*Z[1]*Z[0]) + x4*(Z[2]*Z[1]*Z[0]) + x3*(Z[1]*Z[0]) + x2*(Z[0]) + x1) / 2;
198 }
199 
200 template <QudaPCType type, typename Float>
201 Float *spinorNeighbor_5d(int i, int dir, int oddBit, Float *spinorField, int neighbor_distance = 1, int site_size = 24)
202 {
203  int nb = neighbor_distance;
204  int j;
205  switch (dir) {
206  case 0: j = neighborIndex_5d<type>(i, oddBit, 0, 0, 0, 0, +nb); break;
207  case 1: j = neighborIndex_5d<type>(i, oddBit, 0, 0, 0, 0, -nb); break;
208  case 2: j = neighborIndex_5d<type>(i, oddBit, 0, 0, 0, +nb, 0); break;
209  case 3: j = neighborIndex_5d<type>(i, oddBit, 0, 0, 0, -nb, 0); break;
210  case 4: j = neighborIndex_5d<type>(i, oddBit, 0, 0, +nb, 0, 0); break;
211  case 5: j = neighborIndex_5d<type>(i, oddBit, 0, 0, -nb, 0, 0); break;
212  case 6: j = neighborIndex_5d<type>(i, oddBit, 0, +nb, 0, 0, 0); break;
213  case 7: j = neighborIndex_5d<type>(i, oddBit, 0, -nb, 0, 0, 0); break;
214  case 8: j = neighborIndex_5d<type>(i, oddBit, +nb, 0, 0, 0, 0); break;
215  case 9: j = neighborIndex_5d<type>(i, oddBit, -nb, 0, 0, 0, 0); break;
216  default: j = -1; break;
217  }
218  return &spinorField[j * site_size];
219 }
220 
221 #ifdef MULTI_GPU
222 inline int x4_mg(int i, int oddBit)
223 {
224  int Y = fullLatticeIndex(i, oddBit);
225  int x4 = Y/(Z[2]*Z[1]*Z[0]);
226  return x4;
227 }
228 
229 template <typename Float>
230 static inline Float *gaugeLink_mg4dir(int i, int dir, int oddBit, Float **gaugeEven, Float **gaugeOdd,
231  Float** ghostGaugeEven, Float** ghostGaugeOdd, int n_ghost_faces, int nbr_distance) {
232  Float **gaugeField;
233  int j;
234  int d = nbr_distance;
235  if (dir % 2 == 0) {
236  j = i;
237  gaugeField = (oddBit ? gaugeOdd : gaugeEven);
238  }
239  else {
240 
241  int Y = fullLatticeIndex(i, oddBit);
242  int x4 = Y/(Z[2]*Z[1]*Z[0]);
243  int x3 = (Y/(Z[1]*Z[0])) % Z[2];
244  int x2 = (Y/Z[0]) % Z[1];
245  int x1 = Y % Z[0];
246  int X1= Z[0];
247  int X2= Z[1];
248  int X3= Z[2];
249  int X4= Z[3];
250  Float* ghostGaugeField;
251 
252  switch (dir) {
253  case 1:
254  { //-X direction
255  int new_x1 = (x1 - d + X1 )% X1;
256  if (x1 -d < 0 && comm_dim_partitioned(0)){
257  ghostGaugeField = (oddBit?ghostGaugeEven[0]: ghostGaugeOdd[0]);
258  int offset = (n_ghost_faces + x1 -d)*X4*X3*X2/2 + (x4*X3*X2 + x3*X2+x2)/2;
259  return &ghostGaugeField[offset*(3*3*2)];
260  }
261  j = (x4*X3*X2*X1 + x3*X2*X1 + x2*X1 + new_x1) / 2;
262  break;
263  }
264  case 3:
265  { //-Y direction
266  int new_x2 = (x2 - d + X2 )% X2;
267  if (x2 -d < 0 && comm_dim_partitioned(1)){
268  ghostGaugeField = (oddBit?ghostGaugeEven[1]: ghostGaugeOdd[1]);
269  int offset = (n_ghost_faces + x2 -d)*X4*X3*X1/2 + (x4*X3*X1 + x3*X1+x1)/2;
270  return &ghostGaugeField[offset*(3*3*2)];
271  }
272  j = (x4*X3*X2*X1 + x3*X2*X1 + new_x2*X1 + x1) / 2;
273  break;
274 
275  }
276  case 5:
277  { //-Z direction
278  int new_x3 = (x3 - d + X3 )% X3;
279  if (x3 -d < 0 && comm_dim_partitioned(2)){
280  ghostGaugeField = (oddBit?ghostGaugeEven[2]: ghostGaugeOdd[2]);
281  int offset = (n_ghost_faces + x3 -d)*X4*X2*X1/2 + (x4*X2*X1 + x2*X1+x1)/2;
282  return &ghostGaugeField[offset*(3*3*2)];
283  }
284  j = (x4*X3*X2*X1 + new_x3*X2*X1 + x2*X1 + x1) / 2;
285  break;
286  }
287  case 7:
288  { //-T direction
289  int new_x4 = (x4 - d + X4)% X4;
290  if (x4 -d < 0 && comm_dim_partitioned(3)){
291  ghostGaugeField = (oddBit?ghostGaugeEven[3]: ghostGaugeOdd[3]);
292  int offset = (n_ghost_faces + x4 -d)*X1*X2*X3/2 + (x3*X2*X1 + x2*X1+x1)/2;
293  return &ghostGaugeField[offset*(3*3*2)];
294  }
295  j = (new_x4*(X3*X2*X1) + x3*(X2*X1) + x2*(X1) + x1) / 2;
296  break;
297  }//7
298 
299  default: j = -1; printf("ERROR: wrong dir \n"); exit(1);
300  }
301  gaugeField = (oddBit ? gaugeEven : gaugeOdd);
302 
303  }
304 
305  return &gaugeField[dir/2][j*(3*3*2)];
306 }
307 
308 template <typename Float>
309 static inline Float *spinorNeighbor_mg4dir(int i, int dir, int oddBit, Float *spinorField, Float **fwd_nbr_spinor,
310  Float **back_nbr_spinor, int neighbor_distance, int nFace, int site_size = 24)
311 {
312  int j;
313  int nb = neighbor_distance;
314  int Y = fullLatticeIndex(i, oddBit);
315  int x4 = Y/(Z[2]*Z[1]*Z[0]);
316  int x3 = (Y/(Z[1]*Z[0])) % Z[2];
317  int x2 = (Y/Z[0]) % Z[1];
318  int x1 = Y % Z[0];
319  int X1= Z[0];
320  int X2= Z[1];
321  int X3= Z[2];
322  int X4= Z[3];
323 
324  switch (dir) {
325  case 0://+X
326  {
327  int new_x1 = (x1 + nb)% X1;
328  if(x1+nb >=X1 && comm_dim_partitioned(0) ){
329  int offset = ( x1 + nb -X1)*X4*X3*X2/2+(x4*X3*X2 + x3*X2+x2)/2;
330  return fwd_nbr_spinor[0] + offset * site_size;
331  }
332  j = (x4*X3*X2*X1 + x3*X2*X1 + x2*X1 + new_x1) / 2;
333  break;
334  }
335  case 1://-X
336  {
337  int new_x1 = (x1 - nb + X1)% X1;
338  if(x1 - nb < 0 && comm_dim_partitioned(0)){
339  int offset = ( x1+nFace- nb)*X4*X3*X2/2+(x4*X3*X2 + x3*X2+x2)/2;
340  return back_nbr_spinor[0] + offset * site_size;
341  }
342  j = (x4*X3*X2*X1 + x3*X2*X1 + x2*X1 + new_x1) / 2;
343  break;
344  }
345  case 2://+Y
346  {
347  int new_x2 = (x2 + nb)% X2;
348  if(x2+nb >=X2 && comm_dim_partitioned(1)){
349  int offset = ( x2 + nb -X2)*X4*X3*X1/2+(x4*X3*X1 + x3*X1+x1)/2;
350  return fwd_nbr_spinor[1] + offset * site_size;
351  }
352  j = (x4*X3*X2*X1 + x3*X2*X1 + new_x2*X1 + x1) / 2;
353  break;
354  }
355  case 3:// -Y
356  {
357  int new_x2 = (x2 - nb + X2)% X2;
358  if(x2 - nb < 0 && comm_dim_partitioned(1)){
359  int offset = ( x2 + nFace -nb)*X4*X3*X1/2+(x4*X3*X1 + x3*X1+x1)/2;
360  return back_nbr_spinor[1] + offset * site_size;
361  }
362  j = (x4*X3*X2*X1 + x3*X2*X1 + new_x2*X1 + x1) / 2;
363  break;
364  }
365  case 4://+Z
366  {
367  int new_x3 = (x3 + nb)% X3;
368  if(x3+nb >=X3 && comm_dim_partitioned(2)){
369  int offset = ( x3 + nb -X3)*X4*X2*X1/2+(x4*X2*X1 + x2*X1+x1)/2;
370  return fwd_nbr_spinor[2] + offset * site_size;
371  }
372  j = (x4*X3*X2*X1 + new_x3*X2*X1 + x2*X1 + x1) / 2;
373  break;
374  }
375  case 5://-Z
376  {
377  int new_x3 = (x3 - nb + X3)% X3;
378  if(x3 - nb < 0 && comm_dim_partitioned(2)){
379  int offset = ( x3 + nFace -nb)*X4*X2*X1/2+(x4*X2*X1 + x2*X1+x1)/2;
380  return back_nbr_spinor[2] + offset * site_size;
381  }
382  j = (x4*X3*X2*X1 + new_x3*X2*X1 + x2*X1 + x1) / 2;
383  break;
384  }
385  case 6://+T
386  {
387  j = neighborIndex_mg(i, oddBit, +nb, 0, 0, 0);
388  int x4 = x4_mg(i, oddBit);
389  if ( (x4 + nb) >= Z[3] && comm_dim_partitioned(3)){
390  int offset = (x4+nb - Z[3])*Vsh_t;
391  return &fwd_nbr_spinor[3][(offset + j) * site_size];
392  }
393  break;
394  }
395  case 7://-T
396  {
397  j = neighborIndex_mg(i, oddBit, -nb, 0, 0, 0);
398  int x4 = x4_mg(i, oddBit);
399  if ( (x4 - nb) < 0 && comm_dim_partitioned(3)){
400  int offset = ( x4 - nb +nFace)*Vsh_t;
401  return &back_nbr_spinor[3][(offset + j) * site_size];
402  }
403  break;
404  }
405  default: j = -1; printf("ERROR: wrong dir\n"); exit(1);
406  }
407 
408  return &spinorField[j * site_size];
409 }
410 
411 template <QudaPCType type> int neighborIndex_5d_mgpu(int i, int oddBit, int dxs, int dx4, int dx3, int dx2, int dx1)
412 {
413  int ret;
414 
415  int Y = (type == QUDA_5D_PC) ? fullLatticeIndex_5d(i, oddBit) : fullLatticeIndex_5d_4dpc(i, oddBit);
416 
417  int xs = Y/(Z[3]*Z[2]*Z[1]*Z[0]);
418  int x4 = (Y/(Z[2]*Z[1]*Z[0])) % Z[3];
419  int x3 = (Y/(Z[1]*Z[0])) % Z[2];
420  int x2 = (Y/Z[0]) % Z[1];
421  int x1 = Y % Z[0];
422  int ghost_x4 = x4+ dx4;
423 
424  xs = (xs+dxs+Ls) % Ls;
425  x4 = (x4+dx4+Z[3]) % Z[3];
426  x3 = (x3+dx3+Z[2]) % Z[2];
427  x2 = (x2+dx2+Z[1]) % Z[1];
428  x1 = (x1+dx1+Z[0]) % Z[0];
429 
430  if ( (ghost_x4 >= 0 && ghost_x4) < Z[3] || !comm_dim_partitioned(3)){
431  ret = (xs*Z[3]*Z[2]*Z[1]*Z[0] + x4*Z[2]*Z[1]*Z[0] + x3*Z[1]*Z[0] + x2*Z[0] + x1) >> 1;
432  }else{
433  ret = (xs*Z[2]*Z[1]*Z[0] + x3*Z[1]*Z[0] + x2*Z[0] + x1) >> 1;
434  }
435 
436  return ret;
437 }
438 
439 template <QudaPCType type> int x4_5d_mgpu(int i, int oddBit)
440 {
441  int Y = (type == QUDA_5D_PC) ? fullLatticeIndex_5d(i, oddBit) : fullLatticeIndex_5d_4dpc(i, oddBit);
442  return (Y/(Z[2]*Z[1]*Z[0])) % Z[3];
443 }
444 
445 template <QudaPCType type, typename Float>
446 Float *spinorNeighbor_5d_mgpu(int i, int dir, int oddBit, Float *spinorField, Float **fwd_nbr_spinor,
447  Float **back_nbr_spinor, int neighbor_distance, int nFace, int site_size = 24)
448 {
449  int j;
450  int nb = neighbor_distance;
451  int Y = (type == QUDA_5D_PC) ? fullLatticeIndex_5d(i, oddBit) : fullLatticeIndex_5d_4dpc(i, oddBit);
452 
453  int xs = Y/(Z[3]*Z[2]*Z[1]*Z[0]);
454  int x4 = (Y/(Z[2]*Z[1]*Z[0])) % Z[3];
455  int x3 = (Y/(Z[1]*Z[0])) % Z[2];
456  int x2 = (Y/Z[0]) % Z[1];
457  int x1 = Y % Z[0];
458 
459  int X1= Z[0];
460  int X2= Z[1];
461  int X3= Z[2];
462  int X4= Z[3];
463  switch (dir) {
464  case 0://+X
465  {
466  int new_x1 = (x1 + nb)% X1;
467  if(x1+nb >=X1 && comm_dim_partitioned(0)) {
468  int offset = ((x1 + nb -X1)*Ls*X4*X3*X2+xs*X4*X3*X2+x4*X3*X2 + x3*X2+x2) >> 1;
469  return fwd_nbr_spinor[0] + offset * site_size;
470  }
471  j = (xs*X4*X3*X2*X1 + x4*X3*X2*X1 + x3*X2*X1 + x2*X1 + new_x1) >> 1;
472  break;
473  }
474  case 1://-X
475  {
476  int new_x1 = (x1 - nb + X1)% X1;
477  if(x1 - nb < 0 && comm_dim_partitioned(0)) {
478  int offset = (( x1+nFace- nb)*Ls*X4*X3*X2 + xs*X4*X3*X2 + x4*X3*X2 + x3*X2 + x2) >> 1;
479  return back_nbr_spinor[0] + offset * site_size;
480  }
481  j = (xs*X4*X3*X2*X1 + x4*X3*X2*X1 + x3*X2*X1 + x2*X1 + new_x1) >> 1;
482  break;
483  }
484  case 2://+Y
485  {
486  int new_x2 = (x2 + nb)% X2;
487  if(x2+nb >=X2 && comm_dim_partitioned(1)) {
488  int offset = (( x2 + nb -X2)*Ls*X4*X3*X1+xs*X4*X3*X1+x4*X3*X1 + x3*X1+x1) >> 1;
489  return fwd_nbr_spinor[1] + offset * site_size;
490  }
491  j = (xs*X4*X3*X2*X1 + x4*X3*X2*X1 + x3*X2*X1 + new_x2*X1 + x1) >> 1;
492  break;
493  }
494  case 3:// -Y
495  {
496  int new_x2 = (x2 - nb + X2)% X2;
497  if(x2 - nb < 0 && comm_dim_partitioned(1)) {
498  int offset = (( x2 + nFace -nb)*Ls*X4*X3*X1+xs*X4*X3*X1+ x4*X3*X1 + x3*X1+x1) >> 1;
499  return back_nbr_spinor[1] + offset * site_size;
500  }
501  j = (xs*X4*X3*X2*X1 + x4*X3*X2*X1 + x3*X2*X1 + new_x2*X1 + x1) >> 1;
502  break;
503  }
504  case 4://+Z
505  {
506  int new_x3 = (x3 + nb)% X3;
507  if(x3+nb >=X3 && comm_dim_partitioned(2)) {
508  int offset = (( x3 + nb -X3)*Ls*X4*X2*X1+xs*X4*X2*X1+x4*X2*X1 + x2*X1+x1) >> 1;
509  return fwd_nbr_spinor[2] + offset * site_size;
510  }
511  j = (xs*X4*X3*X2*X1 + x4*X3*X2*X1 + new_x3*X2*X1 + x2*X1 + x1) >> 1;
512  break;
513  }
514  case 5://-Z
515  {
516  int new_x3 = (x3 - nb + X3)% X3;
517  if(x3 - nb < 0 && comm_dim_partitioned(2)){
518  int offset = (( x3 + nFace -nb)*Ls*X4*X2*X1+xs*X4*X2*X1+x4*X2*X1+x2*X1+x1) >> 1;
519  return back_nbr_spinor[2] + offset * site_size;
520  }
521  j = (xs*X4*X3*X2*X1 + x4*X3*X2*X1 + new_x3*X2*X1 + x2*X1 + x1) >> 1;
522  break;
523  }
524  case 6://+T
525  {
526  int x4 = x4_5d_mgpu<type>(i, oddBit);
527  if ( (x4 + nb) >= Z[3] && comm_dim_partitioned(3)) {
528  int offset = ((x4 + nb - Z[3])*Ls*X3*X2*X1+xs*X3*X2*X1+x3*X2*X1+x2*X1+x1) >> 1;
529  return fwd_nbr_spinor[3] + offset * site_size;
530  }
531  j = neighborIndex_5d_mgpu<type>(i, oddBit, 0, +nb, 0, 0, 0);
532  break;
533  }
534  case 7://-T
535  {
536  int x4 = x4_5d_mgpu<type>(i, oddBit);
537  if ( (x4 - nb) < 0 && comm_dim_partitioned(3)) {
538  int offset = (( x4 - nb +nFace)*Ls*X3*X2*X1+xs*X3*X2*X1+x3*X2*X1+x2*X1+x1) >> 1;
539  return back_nbr_spinor[3] + offset * site_size;
540  }
541  j = neighborIndex_5d_mgpu<type>(i, oddBit, 0, -nb, 0, 0, 0);
542  break;
543  }
544  default: j = -1; printf("ERROR: wrong dir\n"); exit(1);
545  }
546 
547  return &spinorField[j * site_size];
548 }
549 
550 
551 #endif // MULTI_GPU
int comm_dim_partitioned(int dim)
double mass
int Z[4]
Definition: host_utils.cpp:36
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:34
cpuColorSpinorField * spinorOut
Definition: covdev_test.cpp:31
QudaGaugeParam gauge_param
Definition: covdev_test.cpp:26
QudaInvertParam inv_param
Definition: covdev_test.cpp:27
Float * spinorNeighbor_5d(int i, int dir, int oddBit, Float *spinorField, int neighbor_distance=1, int site_size=24)
void verifyInversion(void *spinorOut, void *spinorIn, void *spinorCheck, QudaGaugeParam &gauge_param, QudaInvertParam &inv_param, void **gauge, void *clover, void *clover_inv)
void verifyStaggeredInversion(quda::ColorSpinorField *tmp, quda::ColorSpinorField *ref, quda::ColorSpinorField *in, quda::ColorSpinorField *out, double mass, void *qdp_fatlink[], void *qdp_longlink[], void **ghost_fatlink, void **ghost_longlink, QudaGaugeParam &gauge_param, QudaInvertParam &inv_param, int shift)
int neighborIndex_5d(int i, int oddBit, int dxs, int dx4, int dx3, int dx2, int dx1)
void verifyDomainWallTypeInversion(void *spinorOut, void **spinorOutMulti, void *spinorIn, void *spinorCheck, QudaGaugeParam &gauge_param, QudaInvertParam &inv_param, void **gauge, void *clover, void *clover_inv)
void verifyWilsonTypeInversion(void *spinorOut, void **spinorOutMulti, void *spinorIn, void *spinorCheck, QudaGaugeParam &gauge_param, QudaInvertParam &inv_param, void **gauge, void *clover, void *clover_inv)
@ QUDA_5D_PC
Definition: enum_quda.h:397
int fullLatticeIndex_5d(int i, int oddBit)
Definition: host_utils.cpp:940
int Vsh_t
Definition: host_utils.cpp:40
int Ls
Definition: host_utils.cpp:48
int fullLatticeIndex(int dim[4], int index, int oddBit)
Definition: host_utils.cpp:591
int neighborIndex(int i, int oddBit, int dx4, int dx3, int dx2, int dx1)
Definition: host_utils.cpp:423
int neighborIndex_mg(int i, int oddBit, int dx4, int dx3, int dx2, int dx1)
Definition: host_utils.cpp:457
int fullLatticeIndex_5d_4dpc(int i, int oddBit)
Definition: host_utils.cpp:947
void ax(double a, ColorSpinorField &x)
double norm2(const ColorSpinorField &a)
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:43
void axpby(double a, ColorSpinorField &x, double b, ColorSpinorField &y)
Definition: blas_quda.h:44
FloatingPoint< float > Float
__host__ __device__ T sum(const array< T, s > &a)
Definition: utility.h:76