QUDA  v1.1.0
A library for QCD on GPUs
wilson_dslash_reference.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 
5 #include <util_quda.h>
6 
7 #include <host_utils.h>
9 
10 #include <gauge_field.h>
11 #include <color_spinor_field.h>
12 
13 #include <dslash_reference.h>
14 #include <string.h>
15 
16 using namespace quda;
17 
18 static const double projector[8][4][4][2] = {
19  {
20  {{1,0}, {0,0}, {0,0}, {0,-1}},
21  {{0,0}, {1,0}, {0,-1}, {0,0}},
22  {{0,0}, {0,1}, {1,0}, {0,0}},
23  {{0,1}, {0,0}, {0,0}, {1,0}}
24  },
25  {
26  {{1,0}, {0,0}, {0,0}, {0,1}},
27  {{0,0}, {1,0}, {0,1}, {0,0}},
28  {{0,0}, {0,-1}, {1,0}, {0,0}},
29  {{0,-1}, {0,0}, {0,0}, {1,0}}
30  },
31  {
32  {{1,0}, {0,0}, {0,0}, {1,0}},
33  {{0,0}, {1,0}, {-1,0}, {0,0}},
34  {{0,0}, {-1,0}, {1,0}, {0,0}},
35  {{1,0}, {0,0}, {0,0}, {1,0}}
36  },
37  {
38  {{1,0}, {0,0}, {0,0}, {-1,0}},
39  {{0,0}, {1,0}, {1,0}, {0,0}},
40  {{0,0}, {1,0}, {1,0}, {0,0}},
41  {{-1,0}, {0,0}, {0,0}, {1,0}}
42  },
43  {
44  {{1,0}, {0,0}, {0,-1}, {0,0}},
45  {{0,0}, {1,0}, {0,0}, {0,1}},
46  {{0,1}, {0,0}, {1,0}, {0,0}},
47  {{0,0}, {0,-1}, {0,0}, {1,0}}
48  },
49  {
50  {{1,0}, {0,0}, {0,1}, {0,0}},
51  {{0,0}, {1,0}, {0,0}, {0,-1}},
52  {{0,-1}, {0,0}, {1,0}, {0,0}},
53  {{0,0}, {0,1}, {0,0}, {1,0}}
54  },
55  {
56  {{1,0}, {0,0}, {-1,0}, {0,0}},
57  {{0,0}, {1,0}, {0,0}, {-1,0}},
58  {{-1,0}, {0,0}, {1,0}, {0,0}},
59  {{0,0}, {-1,0}, {0,0}, {1,0}}
60  },
61  {
62  {{1,0}, {0,0}, {1,0}, {0,0}},
63  {{0,0}, {1,0}, {0,0}, {1,0}},
64  {{1,0}, {0,0}, {1,0}, {0,0}},
65  {{0,0}, {1,0}, {0,0}, {1,0}}
66  }
67 };
68 
69 
70 // todo pass projector
71 template <typename Float>
72 void multiplySpinorByDiracProjector(Float *res, int projIdx, Float *spinorIn) {
73  for (int i=0; i<4*3*2; i++) res[i] = 0.0;
74 
75  for (int s = 0; s < 4; s++) {
76  for (int t = 0; t < 4; t++) {
77  Float projRe = projector[projIdx][s][t][0];
78  Float projIm = projector[projIdx][s][t][1];
79 
80  for (int m = 0; m < 3; m++) {
81  Float spinorRe = spinorIn[t*(3*2) + m*(2) + 0];
82  Float spinorIm = spinorIn[t*(3*2) + m*(2) + 1];
83  res[s*(3*2) + m*(2) + 0] += projRe*spinorRe - projIm*spinorIm;
84  res[s*(3*2) + m*(2) + 1] += projRe*spinorIm + projIm*spinorRe;
85  }
86  }
87  }
88 }
89 
90 
91 //
92 // dslashReference()
93 //
94 // if oddBit is zero: calculate odd parity spinor elements (using even parity spinor)
95 // if oddBit is one: calculate even parity spinor elements
96 //
97 // if daggerBit is zero: perform ordinary dslash operator
98 // if daggerBit is one: perform hermitian conjugate of dslash
99 //
100 
101 #ifndef MULTI_GPU
102 
103 template <typename sFloat, typename gFloat>
104 void dslashReference(sFloat *res, gFloat **gaugeFull, sFloat *spinorField, int oddBit, int daggerBit)
105 {
106  for (int i = 0; i < Vh * spinor_site_size; i++) res[i] = 0.0;
107 
108  gFloat *gaugeEven[4], *gaugeOdd[4];
109  for (int dir = 0; dir < 4; dir++) {
110  gaugeEven[dir] = gaugeFull[dir];
111  gaugeOdd[dir] = gaugeFull[dir] + Vh * gauge_site_size;
112  }
113 
114  for (int i = 0; i < Vh; i++) {
115  for (int dir = 0; dir < 8; dir++) {
116  gFloat *gauge = gaugeLink(i, dir, oddBit, gaugeEven, gaugeOdd, 1);
117  sFloat *spinor = spinorNeighbor(i, dir, oddBit, spinorField, 1);
118 
119  sFloat projectedSpinor[spinor_site_size], gaugedSpinor[spinor_site_size];
120  int projIdx = 2*(dir/2)+(dir+daggerBit)%2;
121  multiplySpinorByDiracProjector(projectedSpinor, projIdx, spinor);
122 
123  for (int s = 0; s < 4; s++) {
124  if (dir % 2 == 0) su3Mul(&gaugedSpinor[s*(3*2)], gauge, &projectedSpinor[s*(3*2)]);
125  else su3Tmul(&gaugedSpinor[s*(3*2)], gauge, &projectedSpinor[s*(3*2)]);
126  }
127 
128  sum(&res[i * spinor_site_size], &res[i * spinor_site_size], gaugedSpinor, spinor_site_size);
129  }
130  }
131 }
132 
133 #else
134 
135 template <typename sFloat, typename gFloat>
136 void dslashReference(sFloat *res, gFloat **gaugeFull, gFloat **ghostGauge, sFloat *spinorField, sFloat **fwdSpinor,
137  sFloat **backSpinor, int oddBit, int daggerBit)
138 {
139  for (int i = 0; i < Vh * spinor_site_size; i++) res[i] = 0.0;
140 
141  gFloat *gaugeEven[4], *gaugeOdd[4];
142  gFloat *ghostGaugeEven[4], *ghostGaugeOdd[4];
143  for (int dir = 0; dir < 4; dir++) {
144  gaugeEven[dir] = gaugeFull[dir];
145  gaugeOdd[dir] = gaugeFull[dir] + Vh * gauge_site_size;
146 
147  ghostGaugeEven[dir] = ghostGauge[dir];
148  ghostGaugeOdd[dir] = ghostGauge[dir] + (faceVolume[dir] / 2) * gauge_site_size;
149  }
150 
151  for (int i = 0; i < Vh; i++) {
152 
153  for (int dir = 0; dir < 8; dir++) {
154  gFloat *gauge = gaugeLink_mg4dir(i, dir, oddBit, gaugeEven, gaugeOdd, ghostGaugeEven, ghostGaugeOdd, 1, 1);
155  sFloat *spinor = spinorNeighbor_mg4dir(i, dir, oddBit, spinorField, fwdSpinor, backSpinor, 1, 1);
156 
157  sFloat projectedSpinor[spinor_site_size], gaugedSpinor[spinor_site_size];
158  int projIdx = 2*(dir/2)+(dir+daggerBit)%2;
159  multiplySpinorByDiracProjector(projectedSpinor, projIdx, spinor);
160 
161  for (int s = 0; s < 4; s++) {
162  if (dir % 2 == 0) su3Mul(&gaugedSpinor[s*(3*2)], gauge, &projectedSpinor[s*(3*2)]);
163  else su3Tmul(&gaugedSpinor[s*(3*2)], gauge, &projectedSpinor[s*(3*2)]);
164  }
165 
166  sum(&res[i * spinor_site_size], &res[i * spinor_site_size], gaugedSpinor, spinor_site_size);
167  }
168 
169  }
170 }
171 
172 #endif
173 
174 // this actually applies the preconditioned dslash, e.g., D_ee^{-1} D_eo or D_oo^{-1} D_oe
175 void wil_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit,
177 
178 #ifndef MULTI_GPU
179  if (precision == QUDA_DOUBLE_PRECISION)
180  dslashReference((double*)out, (double**)gauge, (double*)in, oddBit, daggerBit);
181  else
182  dslashReference((float*)out, (float**)gauge, (float*)in, oddBit, daggerBit);
183 #else
184 
185  GaugeFieldParam gauge_field_param(gauge, gauge_param);
186  gauge_field_param.ghostExchange = QUDA_GHOST_EXCHANGE_PAD;
187  cpuGaugeField cpu(gauge_field_param);
188  void **ghostGauge = (void**)cpu.Ghost();
189 
190  // Get spinor ghost fields
191  // First wrap the input spinor into a ColorSpinorField
193  csParam.v = in;
194  csParam.nColor = 3;
195  csParam.nSpin = 4;
196  csParam.nDim = 4;
197  for (int d=0; d<4; d++) csParam.x[d] = Z[d];
198  csParam.setPrecision(precision);
199  csParam.pad = 0;
201  csParam.x[0] /= 2;
206 
207  cpuColorSpinorField inField(csParam);
208 
209  { // Now do the exchange
210  QudaParity otherParity = QUDA_INVALID_PARITY;
211  if (oddBit == QUDA_EVEN_PARITY) otherParity = QUDA_ODD_PARITY;
212  else if (oddBit == QUDA_ODD_PARITY) otherParity = QUDA_EVEN_PARITY;
213  else errorQuda("ERROR: full parity not supported in function %s", __FUNCTION__);
214  const int nFace = 1;
215 
216  inField.exchangeGhost(otherParity, nFace, daggerBit);
217  }
218  void** fwd_nbr_spinor = inField.fwdGhostFaceBuffer;
219  void** back_nbr_spinor = inField.backGhostFaceBuffer;
220 
221  if (precision == QUDA_DOUBLE_PRECISION) {
222  dslashReference((double*)out, (double**)gauge, (double**)ghostGauge, (double*)in,
223  (double**)fwd_nbr_spinor, (double**)back_nbr_spinor, oddBit, daggerBit);
224  } else{
225  dslashReference((float*)out, (float**)gauge, (float**)ghostGauge, (float*)in,
226  (float**)fwd_nbr_spinor, (float**)back_nbr_spinor, oddBit, daggerBit);
227  }
228 
229 #endif
230 
231 }
232 
233 // applies b*(1 + i*a*gamma_5)
234 template <typename sFloat>
235 void twistGamma5(sFloat *out, sFloat *in, const int dagger, const sFloat kappa, const sFloat mu,
236  const QudaTwistFlavorType flavor, const int V, QudaTwistGamma5Type twist) {
237 
238  sFloat a=0.0,b=0.0;
239  if (twist == QUDA_TWIST_GAMMA5_DIRECT) { // applying the twist
240  a = 2.0 * kappa * mu * flavor; // mu already includes the flavor
241  b = 1.0;
242  } else if (twist == QUDA_TWIST_GAMMA5_INVERSE) { // applying the inverse twist
243  a = -2.0 * kappa * mu * flavor;
244  b = 1.0 / (1.0 + a*a);
245  } else {
246  printf("Twist type %d not defined\n", twist);
247  exit(0);
248  }
249 
250  if (dagger) a *= -1.0;
251 
252  for(int i = 0; i < V; i++) {
253  sFloat tmp[24];
254  for(int s = 0; s < 4; s++)
255  for(int c = 0; c < 3; c++) {
256  sFloat a5 = ((s / 2) ? -1.0 : +1.0) * a;
257  tmp[s * 6 + c * 2 + 0] = b* (in[i * 24 + s * 6 + c * 2 + 0] - a5*in[i * 24 + s * 6 + c * 2 + 1]);
258  tmp[s * 6 + c * 2 + 1] = b* (in[i * 24 + s * 6 + c * 2 + 1] + a5*in[i * 24 + s * 6 + c * 2 + 0]);
259  }
260 
261  for (int j=0; j<24; j++) out[i*24+j] = tmp[j];
262  }
263 
264 }
265 
266 void twist_gamma5(void *out, void *in, int daggerBit, double kappa, double mu, QudaTwistFlavorType flavor,
267  int V, QudaTwistGamma5Type twist, QudaPrecision precision) {
268 
269  if (precision == QUDA_DOUBLE_PRECISION) {
270  twistGamma5((double*)out, (double*)in, daggerBit, kappa, mu, flavor, V, twist);
271  } else {
272  twistGamma5((float*)out, (float*)in, daggerBit, (float)kappa, (float)mu, flavor, V, twist);
273  }
274 }
275 
276 
277 void tm_dslash(void *res, void **gaugeFull, void *spinorField, double kappa, double mu,
278  QudaTwistFlavorType flavor, int oddBit, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision,
280 {
281 
282  if (daggerBit && (matpc_type == QUDA_MATPC_EVEN_EVEN || matpc_type == QUDA_MATPC_ODD_ODD))
283  twist_gamma5(spinorField, spinorField, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
284 
285  wil_dslash(res, gaugeFull, spinorField, oddBit, daggerBit, precision, gauge_param);
286 
287  if (!daggerBit || (daggerBit && (matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC || matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC))) {
288  twist_gamma5(res, res, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
289  } else {
290  twist_gamma5(spinorField, spinorField, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_DIRECT, precision);
291  }
292 }
293 
294 void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision,
296 
297  void *inEven = in;
298  void *inOdd = (char *)in + Vh * spinor_site_size * precision;
299  void *outEven = out;
300  void *outOdd = (char *)out + Vh * spinor_site_size * precision;
301 
302  wil_dslash(outOdd, gauge, inEven, 1, dagger_bit, precision, gauge_param);
303  wil_dslash(outEven, gauge, inOdd, 0, dagger_bit, precision, gauge_param);
304 
305  // lastly apply the kappa term
306  xpay(in, -kappa, out, V * spinor_site_size, precision);
307 }
308 
309 void tm_mat(void *out, void **gauge, void *in, double kappa, double mu,
310  QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision,
312 
313  void *inEven = in;
314  void *inOdd = (char *)in + Vh * spinor_site_size * precision;
315  void *outEven = out;
316  void *outOdd = (char *)out + Vh * spinor_site_size * precision;
317  void *tmp = malloc(V * spinor_site_size * precision);
318 
319  wil_dslash(outOdd, gauge, inEven, 1, dagger_bit, precision, gauge_param);
320  wil_dslash(outEven, gauge, inOdd, 0, dagger_bit, precision, gauge_param);
321 
322  // apply the twist term to the full lattice
323  twist_gamma5(tmp, in, dagger_bit, kappa, mu, flavor, V, QUDA_TWIST_GAMMA5_DIRECT, precision);
324 
325  // combine
326  xpay(tmp, -kappa, (double *)out, V * spinor_site_size, precision);
327 
328  free(tmp);
329 }
330 
331 // Apply the even-odd preconditioned Dirac operator
332 void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa,
333  QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision,
335 
336  void *tmp = malloc(Vh * spinor_site_size * precision);
337 
338  // FIXME: remove once reference clover is finished
339  // full dslash operator
341  wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
342  wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
343  } else {
344  wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
345  wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
346  }
347 
348  // lastly apply the kappa term
349  double kappa2 = -kappa*kappa;
350  xpay(inEven, kappa2, outEven, Vh * spinor_site_size, precision);
351 
352  free(tmp);
353 }
354 
355 // Apply the even-odd preconditioned Dirac operator
356 void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor,
357  QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param) {
358 
359  void *tmp = malloc(Vh * spinor_site_size * precision);
360 
362  wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
363  twist_gamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
364  wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
365  twist_gamma5(tmp, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_DIRECT, precision);
367  wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
368  twist_gamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
369  wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
370  twist_gamma5(tmp, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_DIRECT, precision);
371  } else if (!daggerBit) {
373  wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
374  twist_gamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
375  wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
376  twist_gamma5(outEven, outEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
377  } else if (matpc_type == QUDA_MATPC_ODD_ODD) {
378  wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
379  twist_gamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
380  wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
381  twist_gamma5(outEven, outEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
382  }
383  } else {
385  twist_gamma5(inEven, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
386  wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
387  twist_gamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
388  wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
389  twist_gamma5(inEven, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_DIRECT, precision);
390  } else if (matpc_type == QUDA_MATPC_ODD_ODD) {
391  twist_gamma5(inEven, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
392  wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
393  twist_gamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
394  wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
395  twist_gamma5(inEven, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_DIRECT, precision); // undo
396  }
397  }
398  // lastly apply the kappa term
399  double kappa2 = -kappa*kappa;
401  xpay(inEven, kappa2, outEven, Vh * spinor_site_size, precision);
402  } else {
403  xpay(tmp, kappa2, outEven, Vh * spinor_site_size, precision);
404  }
405 
406  free(tmp);
407 }
408 
409 
410 //----- for non-degenerate dslash only----
411 template <typename sFloat>
412 void ndegTwistGamma5(sFloat *out1, sFloat *out2, sFloat *in1, sFloat *in2, const int dagger, const sFloat kappa, const sFloat mu,
413  const sFloat epsilon, const int V, QudaTwistGamma5Type twist) {
414 
415  sFloat a=0.0, b=0.0, d=0.0;
416  if (twist == QUDA_TWIST_GAMMA5_DIRECT) { // applying the twist
417  a = 2.0 * kappa * mu;
418  b = -2.0 * kappa * epsilon;
419  d = 1.0;
420  } else if (twist == QUDA_TWIST_GAMMA5_INVERSE) { // applying the inverse twist
421  a = -2.0 * kappa * mu;
422  b = 2.0 * kappa * epsilon;
423  d = 1.0 / (1.0 + a*a - b*b);
424  } else {
425  printf("Twist type %d not defined\n", twist);
426  exit(0);
427  }
428 
429  if (dagger) a *= -1.0;
430 
431  for(int i = 0; i < V; i++) {
432  sFloat tmp1[24];
433  sFloat tmp2[24];
434  for(int s = 0; s < 4; s++)
435  for(int c = 0; c < 3; c++) {
436  sFloat a5 = ((s / 2) ? -1.0 : +1.0) * a;
437  tmp1[s * 6 + c * 2 + 0] = d
438  * (in1[i * 24 + s * 6 + c * 2 + 0] - a5 * in1[i * 24 + s * 6 + c * 2 + 1]
439  + b * in2[i * 24 + s * 6 + c * 2 + 0]);
440  tmp1[s * 6 + c * 2 + 1] = d
441  * (in1[i * 24 + s * 6 + c * 2 + 1] + a5 * in1[i * 24 + s * 6 + c * 2 + 0]
442  + b * in2[i * 24 + s * 6 + c * 2 + 1]);
443  tmp2[s * 6 + c * 2 + 0] = d
444  * (in2[i * 24 + s * 6 + c * 2 + 0] + a5 * in2[i * 24 + s * 6 + c * 2 + 1]
445  + b * in1[i * 24 + s * 6 + c * 2 + 0]);
446  tmp2[s * 6 + c * 2 + 1] = d
447  * (in2[i * 24 + s * 6 + c * 2 + 1] - a5 * in2[i * 24 + s * 6 + c * 2 + 0]
448  + b * in1[i * 24 + s * 6 + c * 2 + 1]);
449  }
450  for (int j=0; j<24; j++) out1[i*24+j] = tmp1[j], out2[i*24+j] = tmp2[j];
451  }
452 
453 }
454 
455 void ndeg_twist_gamma5(void *outf1, void *outf2, void *inf1, void *inf2, const int dagger, const double kappa, const double mu,
456  const double epsilon, const int Vf, QudaTwistGamma5Type twist, QudaPrecision precision)
457 {
458  if (precision == QUDA_DOUBLE_PRECISION)
459  {
460  ndegTwistGamma5((double*)outf1, (double*)outf2, (double*)inf1, (double*)inf2, dagger, kappa, mu, epsilon, Vf, twist);
461  }
462  else //single precision dslash
463  {
464  ndegTwistGamma5((float*)outf1, (float*)outf2, (float*)inf1, (float*)inf2, dagger, (float)kappa, (float)mu, (float)epsilon, Vf, twist);
465  }
466 }
467 
468 void tm_ndeg_dslash(void *res1, void *res2, void **gauge, void *spinorField1, void *spinorField2, double kappa, double mu,
469  double epsilon, int oddBit, int daggerBit, QudaMatPCType matpc_type, QudaPrecision precision, QudaGaugeParam &gauge_param)
470 {
471  if (daggerBit && (matpc_type == QUDA_MATPC_EVEN_EVEN || matpc_type == QUDA_MATPC_ODD_ODD))
472  ndeg_twist_gamma5(spinorField1, spinorField2, spinorField1, spinorField2, daggerBit, kappa, mu, epsilon, Vh,
473  QUDA_TWIST_GAMMA5_INVERSE, precision);
474 
475  wil_dslash(res1, gauge, spinorField1, oddBit, daggerBit, precision, gauge_param);
476  wil_dslash(res2, gauge, spinorField2, oddBit, daggerBit, precision, gauge_param);
477 
479  ndeg_twist_gamma5(res1, res2, res1, res2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
480  }
481 }
482 
483 
484 void tm_ndeg_matpc(void *outEven1, void *outEven2, void **gauge, void *inEven1, void *inEven2, double kappa, double mu, double epsilon,
485  QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param) {
486 
487  void *tmp1 = malloc(Vh * spinor_site_size * precision);
488  void *tmp2 = malloc(Vh * spinor_site_size * precision);
489 
490  if (!daggerBit) {
492  wil_dslash(tmp1, gauge, inEven1, 1, daggerBit, precision, gauge_param);
493  wil_dslash(tmp2, gauge, inEven2, 1, daggerBit, precision, gauge_param);
494  ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
495  wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
496  wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
497  ndeg_twist_gamma5(outEven1, outEven2, outEven1, outEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
498  } else if (matpc_type == QUDA_MATPC_ODD_ODD) {
499  wil_dslash(tmp1, gauge, inEven1, 0, daggerBit, precision, gauge_param);
500  wil_dslash(tmp2, gauge, inEven2, 0, daggerBit, precision, gauge_param);
501  ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
502  wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
503  wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
504  ndeg_twist_gamma5(outEven1, outEven2, outEven1, outEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
505  }
506  } else {
509  tmp1, tmp2, inEven1, inEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
510  wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
511  wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
512  ndeg_twist_gamma5(tmp1, tmp2, outEven1, outEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
513  wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
514  wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
515  } else if (matpc_type == QUDA_MATPC_ODD_ODD) {
517  tmp1, tmp2, inEven1, inEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
518  wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
519  wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
520  ndeg_twist_gamma5(tmp1, tmp2, outEven1, outEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
521  wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
522  wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
523  }
524  }
525 
527  wil_dslash(tmp1, gauge, inEven1, 1, daggerBit, precision, gauge_param);
528  wil_dslash(tmp2, gauge, inEven2, 1, daggerBit, precision, gauge_param);
529  ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
530  wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
531  wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
533  wil_dslash(tmp1, gauge, inEven1, 0, daggerBit, precision, gauge_param);
534  wil_dslash(tmp2, gauge, inEven2, 0, daggerBit, precision, gauge_param);
535  ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
536  wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
537  wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
538  }
539 
540  // lastly apply the kappa term
541  double kappa2 = -kappa*kappa;
543  ndeg_twist_gamma5(inEven1, inEven2, inEven1, inEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_DIRECT, precision);
544  }
545 
546  xpay(inEven1, kappa2, outEven1, Vh * spinor_site_size, precision);
547  xpay(inEven2, kappa2, outEven2, Vh * spinor_site_size, precision);
548 
549  free(tmp1);
550  free(tmp2);
551 }
552 
553 
554 void tm_ndeg_mat(void *evenOut, void* oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
555 {
556  //V-4d volume and Vh=V/2
557  void *inEven1 = evenIn;
558  void *inEven2 = (char *)evenIn + precision * Vh * spinor_site_size;
559 
560  void *inOdd1 = oddIn;
561  void *inOdd2 = (char *)oddIn + precision * Vh * spinor_site_size;
562 
563  void *outEven1 = evenOut;
564  void *outEven2 = (char *)evenOut + precision * Vh * spinor_site_size;
565 
566  void *outOdd1 = oddOut;
567  void *outOdd2 = (char *)oddOut + precision * Vh * spinor_site_size;
568 
569  void *tmpEven1 = malloc(Vh * spinor_site_size * precision);
570  void *tmpEven2 = malloc(Vh * spinor_site_size * precision);
571 
572  void *tmpOdd1 = malloc(Vh * spinor_site_size * precision);
573  void *tmpOdd2 = malloc(Vh * spinor_site_size * precision);
574 
575  // full dslash operator:
576  wil_dslash(outOdd1, gauge, inEven1, 1, daggerBit, precision, gauge_param);
577  wil_dslash(outOdd2, gauge, inEven2, 1, daggerBit, precision, gauge_param);
578 
579  wil_dslash(outEven1, gauge, inOdd1, 0, daggerBit, precision, gauge_param);
580  wil_dslash(outEven2, gauge, inOdd2, 0, daggerBit, precision, gauge_param);
581 
582  // apply the twist term
583  ndeg_twist_gamma5(tmpEven1, tmpEven2, inEven1, inEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_DIRECT, precision);
584  ndeg_twist_gamma5(tmpOdd1, tmpOdd2, inOdd1, inOdd2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_DIRECT, precision);
585  // combine
586  xpay(tmpOdd1, -kappa, outOdd1, Vh * spinor_site_size, precision);
587  xpay(tmpOdd2, -kappa, outOdd2, Vh * spinor_site_size, precision);
588 
589  xpay(tmpEven1, -kappa, outEven1, Vh * spinor_site_size, precision);
590  xpay(tmpEven2, -kappa, outEven2, Vh * spinor_site_size, precision);
591 
592  free(tmpOdd1);
593  free(tmpOdd2);
594  //
595  free(tmpEven1);
596  free(tmpEven2);
597 }
598 
599 //End of nondeg TM
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
const void ** Ghost() const
Definition: gauge_field.h:368
void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr, const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION) const
This is a unified ghost exchange function for doing a complete halo exchange regardless of the type o...
static void * fwdGhostFaceBuffer[QUDA_MAX_DIM]
static void * backGhostFaceBuffer[QUDA_MAX_DIM]
double kappa
double epsilon
double mu
QudaMatPCType matpc_type
bool dagger
int Vh
Definition: host_utils.cpp:38
int Z[4]
Definition: host_utils.cpp:36
int V
Definition: host_utils.cpp:37
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:34
cpuColorSpinorField * spinor
Definition: covdev_test.cpp:31
QudaGaugeParam gauge_param
Definition: covdev_test.cpp:26
const double projector[10][4][4][2]
enum QudaPrecision_s QudaPrecision
enum QudaTwistFlavorType_s QudaTwistFlavorType
@ QUDA_PARITY_SITE_SUBSET
Definition: enum_quda.h:332
@ QUDA_DEGRAND_ROSSI_GAMMA_BASIS
Definition: enum_quda.h:368
enum QudaTwistGamma5Type_s QudaTwistGamma5Type
@ QUDA_EVEN_PARITY
Definition: enum_quda.h:284
@ QUDA_ODD_PARITY
Definition: enum_quda.h:284
@ QUDA_INVALID_PARITY
Definition: enum_quda.h:284
@ QUDA_GHOST_EXCHANGE_PAD
Definition: enum_quda.h:509
@ QUDA_MATPC_ODD_ODD_ASYMMETRIC
Definition: enum_quda.h:219
@ QUDA_MATPC_EVEN_EVEN_ASYMMETRIC
Definition: enum_quda.h:218
@ QUDA_MATPC_ODD_ODD
Definition: enum_quda.h:217
@ QUDA_MATPC_EVEN_EVEN
Definition: enum_quda.h:216
enum QudaMatPCType_s QudaMatPCType
@ QUDA_TWIST_GAMMA5_INVERSE
Definition: enum_quda.h:424
@ QUDA_TWIST_GAMMA5_DIRECT
Definition: enum_quda.h:423
@ QUDA_EVEN_ODD_SITE_ORDER
Definition: enum_quda.h:340
@ QUDA_DOUBLE_PRECISION
Definition: enum_quda.h:65
@ QUDA_SPACE_SPIN_COLOR_FIELD_ORDER
Definition: enum_quda.h:351
@ QUDA_REFERENCE_FIELD_CREATE
Definition: enum_quda.h:363
enum QudaParity_s QudaParity
#define gauge_site_size
Definition: face_gauge.cpp:34
int faceVolume[4]
Definition: host_utils.cpp:41
#define spinor_site_size
Definition: host_utils.h:9
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
Definition: blas_quda.h:45
FloatingPoint< float > Float
__host__ __device__ T sum(const array< T, s > &a)
Definition: utility.h:76
ColorSpinorParam csParam
Definition: pack_test.cpp:25
QudaGhostExchange ghostExchange
Definition: lattice_field.h:77
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:68
QudaSiteSubset siteSubset
Definition: lattice_field.h:72
#define errorQuda(...)
Definition: util_quda.h:120
void multiplySpinorByDiracProjector(Float *res, int projIdx, Float *spinorIn)
void dslashReference(sFloat *res, gFloat **gaugeFull, sFloat *spinorField, int oddBit, int daggerBit)
void ndeg_twist_gamma5(void *outf1, void *outf2, void *inf1, void *inf2, const int dagger, const double kappa, const double mu, const double epsilon, const int Vf, QudaTwistGamma5Type twist, QudaPrecision precision)
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void twistGamma5(sFloat *out, sFloat *in, const int dagger, const sFloat kappa, const sFloat mu, const QudaTwistFlavorType flavor, const int V, QudaTwistGamma5Type twist)
void ndegTwistGamma5(sFloat *out1, sFloat *out2, sFloat *in1, sFloat *in2, const int dagger, const sFloat kappa, const sFloat mu, const sFloat epsilon, const int V, QudaTwistGamma5Type twist)
void tm_dslash(void *res, void **gaugeFull, void *spinorField, double kappa, double mu, QudaTwistFlavorType flavor, int oddBit, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_ndeg_mat(void *evenOut, void *oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void wil_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_ndeg_matpc(void *outEven1, void *outEven2, void **gauge, void *inEven1, void *inEven2, double kappa, double mu, double epsilon, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void twist_gamma5(void *out, void *in, int daggerBit, double kappa, double mu, QudaTwistFlavorType flavor, int V, QudaTwistGamma5Type twist, QudaPrecision precision)
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_ndeg_dslash(void *res1, void *res2, void **gauge, void *spinorField1, void *spinorField2, double kappa, double mu, double epsilon, int oddBit, int daggerBit, QudaMatPCType matpc_type, QudaPrecision precision, QudaGaugeParam &gauge_param)