QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
wilson_dslash_reference.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 
5 #include <util_quda.h>
6 
7 #include <test_util.h>
8 #include <blas_reference.h>
10 
11 #include <gauge_field.h>
12 #include <color_spinor_field.h>
13 #include <face_quda.h>
14 
15 #include <dslash_util.h>
16 #include <string.h>
17 
18 using namespace quda;
19 
20 static const double projector[8][4][4][2] = {
21  {
22  {{1,0}, {0,0}, {0,0}, {0,-1}},
23  {{0,0}, {1,0}, {0,-1}, {0,0}},
24  {{0,0}, {0,1}, {1,0}, {0,0}},
25  {{0,1}, {0,0}, {0,0}, {1,0}}
26  },
27  {
28  {{1,0}, {0,0}, {0,0}, {0,1}},
29  {{0,0}, {1,0}, {0,1}, {0,0}},
30  {{0,0}, {0,-1}, {1,0}, {0,0}},
31  {{0,-1}, {0,0}, {0,0}, {1,0}}
32  },
33  {
34  {{1,0}, {0,0}, {0,0}, {1,0}},
35  {{0,0}, {1,0}, {-1,0}, {0,0}},
36  {{0,0}, {-1,0}, {1,0}, {0,0}},
37  {{1,0}, {0,0}, {0,0}, {1,0}}
38  },
39  {
40  {{1,0}, {0,0}, {0,0}, {-1,0}},
41  {{0,0}, {1,0}, {1,0}, {0,0}},
42  {{0,0}, {1,0}, {1,0}, {0,0}},
43  {{-1,0}, {0,0}, {0,0}, {1,0}}
44  },
45  {
46  {{1,0}, {0,0}, {0,-1}, {0,0}},
47  {{0,0}, {1,0}, {0,0}, {0,1}},
48  {{0,1}, {0,0}, {1,0}, {0,0}},
49  {{0,0}, {0,-1}, {0,0}, {1,0}}
50  },
51  {
52  {{1,0}, {0,0}, {0,1}, {0,0}},
53  {{0,0}, {1,0}, {0,0}, {0,-1}},
54  {{0,-1}, {0,0}, {1,0}, {0,0}},
55  {{0,0}, {0,1}, {0,0}, {1,0}}
56  },
57  {
58  {{1,0}, {0,0}, {-1,0}, {0,0}},
59  {{0,0}, {1,0}, {0,0}, {-1,0}},
60  {{-1,0}, {0,0}, {1,0}, {0,0}},
61  {{0,0}, {-1,0}, {0,0}, {1,0}}
62  },
63  {
64  {{1,0}, {0,0}, {1,0}, {0,0}},
65  {{0,0}, {1,0}, {0,0}, {1,0}},
66  {{1,0}, {0,0}, {1,0}, {0,0}},
67  {{0,0}, {1,0}, {0,0}, {1,0}}
68  }
69 };
70 
71 
72 // todo pass projector
73 template <typename Float>
74 void multiplySpinorByDiracProjector(Float *res, int projIdx, Float *spinorIn) {
75  for (int i=0; i<4*3*2; i++) res[i] = 0.0;
76 
77  for (int s = 0; s < 4; s++) {
78  for (int t = 0; t < 4; t++) {
79  Float projRe = projector[projIdx][s][t][0];
80  Float projIm = projector[projIdx][s][t][1];
81 
82  for (int m = 0; m < 3; m++) {
83  Float spinorRe = spinorIn[t*(3*2) + m*(2) + 0];
84  Float spinorIm = spinorIn[t*(3*2) + m*(2) + 1];
85  res[s*(3*2) + m*(2) + 0] += projRe*spinorRe - projIm*spinorIm;
86  res[s*(3*2) + m*(2) + 1] += projRe*spinorIm + projIm*spinorRe;
87  }
88  }
89  }
90 }
91 
92 
93 //
94 // dslashReference()
95 //
96 // if oddBit is zero: calculate odd parity spinor elements (using even parity spinor)
97 // if oddBit is one: calculate even parity spinor elements
98 //
99 // if daggerBit is zero: perform ordinary dslash operator
100 // if daggerBit is one: perform hermitian conjugate of dslash
101 //
102 
103 #ifndef MULTI_GPU
104 
105 template <typename sFloat, typename gFloat>
106 void dslashReference(sFloat *res, gFloat **gaugeFull, sFloat *spinorField, int oddBit, int daggerBit) {
107  for (int i=0; i<Vh*mySpinorSiteSize; i++) res[i] = 0.0;
108 
109  gFloat *gaugeEven[4], *gaugeOdd[4];
110  for (int dir = 0; dir < 4; dir++) {
111  gaugeEven[dir] = gaugeFull[dir];
112  gaugeOdd[dir] = gaugeFull[dir]+Vh*gaugeSiteSize;
113  }
114 
115  for (int i = 0; i < Vh; i++) {
116  for (int dir = 0; dir < 8; dir++) {
117  gFloat *gauge = gaugeLink(i, dir, oddBit, gaugeEven, gaugeOdd, 1);
118  sFloat *spinor = spinorNeighbor(i, dir, oddBit, spinorField, 1);
119 
120  sFloat projectedSpinor[4*3*2], gaugedSpinor[4*3*2];
121  int projIdx = 2*(dir/2)+(dir+daggerBit)%2;
122  multiplySpinorByDiracProjector(projectedSpinor, projIdx, spinor);
123 
124  for (int s = 0; s < 4; s++) {
125  if (dir % 2 == 0) su3Mul(&gaugedSpinor[s*(3*2)], gauge, &projectedSpinor[s*(3*2)]);
126  else su3Tmul(&gaugedSpinor[s*(3*2)], gauge, &projectedSpinor[s*(3*2)]);
127  }
128 
129  sum(&res[i*(4*3*2)], &res[i*(4*3*2)], gaugedSpinor, 4*3*2);
130  }
131  }
132 }
133 
134 #else
135 
136 template <typename sFloat, typename gFloat>
137 void dslashReference(sFloat *res, gFloat **gaugeFull, gFloat **ghostGauge, sFloat *spinorField,
138  sFloat **fwdSpinor, sFloat **backSpinor, int oddBit, int daggerBit) {
139  for (int i=0; i<Vh*mySpinorSiteSize; i++) res[i] = 0.0;
140 
141  gFloat *gaugeEven[4], *gaugeOdd[4];
142  gFloat *ghostGaugeEven[4], *ghostGaugeOdd[4];
143  for (int dir = 0; dir < 4; dir++) {
144  gaugeEven[dir] = gaugeFull[dir];
145  gaugeOdd[dir] = gaugeFull[dir]+Vh*gaugeSiteSize;
146 
147  ghostGaugeEven[dir] = ghostGauge[dir];
148  ghostGaugeOdd[dir] = ghostGauge[dir] + (faceVolume[dir]/2)*gaugeSiteSize;
149  }
150 
151  for (int i = 0; i < Vh; i++) {
152 
153  for (int dir = 0; dir < 8; dir++) {
154  gFloat *gauge = gaugeLink_mg4dir(i, dir, oddBit, gaugeEven, gaugeOdd, ghostGaugeEven, ghostGaugeOdd, 1, 1);
155  sFloat *spinor = spinorNeighbor_mg4dir(i, dir, oddBit, spinorField, fwdSpinor, backSpinor, 1, 1);
156 
157  sFloat projectedSpinor[mySpinorSiteSize], gaugedSpinor[mySpinorSiteSize];
158  int projIdx = 2*(dir/2)+(dir+daggerBit)%2;
159  multiplySpinorByDiracProjector(projectedSpinor, projIdx, spinor);
160 
161  for (int s = 0; s < 4; s++) {
162  if (dir % 2 == 0) su3Mul(&gaugedSpinor[s*(3*2)], gauge, &projectedSpinor[s*(3*2)]);
163  else su3Tmul(&gaugedSpinor[s*(3*2)], gauge, &projectedSpinor[s*(3*2)]);
164  }
165 
166  sum(&res[i*(4*3*2)], &res[i*(4*3*2)], gaugedSpinor, 4*3*2);
167  }
168 
169  }
170 }
171 
172 #endif
173 
174 // this actually applies the preconditioned dslash, e.g., D_ee^{-1} D_eo or D_oo^{-1} D_oe
175 void wil_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit,
177 
178 #ifndef MULTI_GPU
179  if (precision == QUDA_DOUBLE_PRECISION)
180  dslashReference((double*)out, (double**)gauge, (double*)in, oddBit, daggerBit);
181  else
182  dslashReference((float*)out, (float**)gauge, (float*)in, oddBit, daggerBit);
183 #else
184 
185  GaugeFieldParam gauge_field_param(gauge, gauge_param);
186  cpuGaugeField cpu(gauge_field_param);
187  void **ghostGauge = (void**)cpu.Ghost();
188 
189  // Get spinor ghost fields
190  // First wrap the input spinor into a ColorSpinorField
192  csParam.v = in;
193  csParam.nColor = 3;
194  csParam.nSpin = 4;
195  csParam.nDim = 4;
196  for (int d=0; d<4; d++) csParam.x[d] = Z[d];
197  csParam.precision = precision;
198  csParam.pad = 0;
200  csParam.x[0] /= 2;
205 
206  cpuColorSpinorField inField(csParam);
207 
208  { // Now do the exchange
209  QudaParity otherParity = QUDA_INVALID_PARITY;
210  if (oddBit == QUDA_EVEN_PARITY) otherParity = QUDA_ODD_PARITY;
211  else if (oddBit == QUDA_ODD_PARITY) otherParity = QUDA_EVEN_PARITY;
212  else errorQuda("ERROR: full parity not supported in function %s", __FUNCTION__);
213 
214  int nFace = 1;
215  FaceBuffer faceBuf(Z, 4, mySpinorSiteSize, nFace, precision);
216  faceBuf.exchangeCpuSpinor(inField, otherParity, daggerBit);
217  }
218  void** fwd_nbr_spinor = inField.fwdGhostFaceBuffer;
219  void** back_nbr_spinor = inField.backGhostFaceBuffer;
220 
221  if (precision == QUDA_DOUBLE_PRECISION) {
222  dslashReference((double*)out, (double**)gauge, (double**)ghostGauge, (double*)in,
223  (double**)fwd_nbr_spinor, (double**)back_nbr_spinor, oddBit, daggerBit);
224  } else{
225  dslashReference((float*)out, (float**)gauge, (float**)ghostGauge, (float*)in,
226  (float**)fwd_nbr_spinor, (float**)back_nbr_spinor, oddBit, daggerBit);
227  }
228 
229 #endif
230 
231 }
232 
233 // applies b*(1 + i*a*gamma_5)
234 template <typename sFloat>
235 void twistGamma5(sFloat *out, sFloat *in, const int dagger, const sFloat kappa, const sFloat mu,
236  const QudaTwistFlavorType flavor, const int V, QudaTwistGamma5Type twist) {
237 
238  sFloat a=0.0,b=0.0;
239  if (twist == QUDA_TWIST_GAMMA5_DIRECT) { // applying the twist
240  a = 2.0 * kappa * mu * flavor; // mu already includes the flavor
241  b = 1.0;
242  } else if (twist == QUDA_TWIST_GAMMA5_INVERSE) { // applying the inverse twist
243  a = -2.0 * kappa * mu * flavor;
244  b = 1.0 / (1.0 + a*a);
245  } else {
246  printf("Twist type %d not defined\n", twist);
247  exit(0);
248  }
249 
250  if (dagger) a *= -1.0;
251 
252  for(int i = 0; i < V; i++) {
253  sFloat tmp[24];
254  for(int s = 0; s < 4; s++)
255  for(int c = 0; c < 3; c++) {
256  sFloat a5 = ((s / 2) ? -1.0 : +1.0) * a;
257  tmp[s * 6 + c * 2 + 0] = b* (in[i * 24 + s * 6 + c * 2 + 0] - a5*in[i * 24 + s * 6 + c * 2 + 1]);
258  tmp[s * 6 + c * 2 + 1] = b* (in[i * 24 + s * 6 + c * 2 + 1] + a5*in[i * 24 + s * 6 + c * 2 + 0]);
259  }
260 
261  for (int j=0; j<24; j++) out[i*24+j] = tmp[j];
262  }
263 
264 }
265 
266 void twist_gamma5(void *out, void *in, int daggerBit, double kappa, double mu, QudaTwistFlavorType flavor,
267  int V, QudaTwistGamma5Type twist, QudaPrecision precision) {
268 
269  if (precision == QUDA_DOUBLE_PRECISION) {
270  twistGamma5((double*)out, (double*)in, daggerBit, kappa, mu, flavor, V, twist);
271  } else {
272  twistGamma5((float*)out, (float*)in, daggerBit, (float)kappa, (float)mu, flavor, V, twist);
273  }
274 }
275 
276 
277 void tm_dslash(void *res, void **gaugeFull, void *spinorField, double kappa, double mu,
278  QudaTwistFlavorType flavor, int oddBit, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision,
280 {
281 
282  if (daggerBit && (matpc_type == QUDA_MATPC_EVEN_EVEN || matpc_type == QUDA_MATPC_ODD_ODD)) twist_gamma5(spinorField, spinorField, daggerBit, kappa, mu,
283  flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
284 
285  wil_dslash(res, gaugeFull, spinorField, oddBit, daggerBit, precision, gauge_param);
286 
287  if (!daggerBit || (daggerBit && (matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC || matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC))) {
288  twist_gamma5(res, res, daggerBit, kappa, mu, flavor,
289  Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
290  } else {
291  twist_gamma5(spinorField, spinorField, daggerBit, kappa, mu, flavor,
292  Vh, QUDA_TWIST_GAMMA5_DIRECT, precision);
293  }
294 }
295 
296 void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision,
298 
299  void *inEven = in;
300  void *inOdd = (char*)in + Vh*spinorSiteSize*precision;
301  void *outEven = out;
302  void *outOdd = (char*)out + Vh*spinorSiteSize*precision;
303 
304  wil_dslash(outOdd, gauge, inEven, 1, dagger_bit, precision, gauge_param);
305  wil_dslash(outEven, gauge, inOdd, 0, dagger_bit, precision, gauge_param);
306 
307  // lastly apply the kappa term
308  if (precision == QUDA_DOUBLE_PRECISION) xpay((double*)in, -kappa, (double*)out, V*spinorSiteSize);
309  else xpay((float*)in, -(float)kappa, (float*)out, V*spinorSiteSize);
310 }
311 
312 void tm_mat(void *out, void **gauge, void *in, double kappa, double mu,
313  QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision,
315 
316  void *inEven = in;
317  void *inOdd = (char*)in + Vh*spinorSiteSize*precision;
318  void *outEven = out;
319  void *outOdd = (char*)out + Vh*spinorSiteSize*precision;
320  void *tmp = malloc(V*spinorSiteSize*precision);
321 
322  wil_dslash(outOdd, gauge, inEven, 1, dagger_bit, precision, gauge_param);
323  wil_dslash(outEven, gauge, inOdd, 0, dagger_bit, precision, gauge_param);
324 
325  // apply the twist term to the full lattice
326  twist_gamma5(tmp, in, dagger_bit, kappa, mu, flavor, V, QUDA_TWIST_GAMMA5_DIRECT, precision);
327 
328  // combine
329  if (precision == QUDA_DOUBLE_PRECISION) xpay((double*)tmp, -kappa, (double*)out, V*spinorSiteSize);
330  else xpay((float*)tmp, -(float)kappa, (float*)out, V*spinorSiteSize);
331 
332  free(tmp);
333 }
334 
335 // Apply the even-odd preconditioned Dirac operator
336 void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa,
337  QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision,
339 
340  void *tmp = malloc(Vh*spinorSiteSize*precision);
341 
342  // FIXME: remove once reference clover is finished
343  // full dslash operator
344  if (matpc_type == QUDA_MATPC_EVEN_EVEN || matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {
345  wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
346  wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
347  } else {
348  wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
349  wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
350  }
351 
352  // lastly apply the kappa term
353  double kappa2 = -kappa*kappa;
354  if (precision == QUDA_DOUBLE_PRECISION) xpay((double*)inEven, kappa2, (double*)outEven, Vh*spinorSiteSize);
355  else xpay((float*)inEven, (float)kappa2, (float*)outEven, Vh*spinorSiteSize);
356 
357  free(tmp);
358 }
359 
360 // Apply the even-odd preconditioned Dirac operator
361 void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor,
362  QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param) {
363 
364  void *tmp = malloc(Vh*spinorSiteSize*precision);
365 
366  if (matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {
367  wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
368  twist_gamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
369  wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
370  twist_gamma5(tmp, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_DIRECT, precision);
371  } else if (matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {
372  wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
373  twist_gamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
374  wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
375  twist_gamma5(tmp, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_DIRECT, precision);
376  } else if (!daggerBit) {
377  if (matpc_type == QUDA_MATPC_EVEN_EVEN) {
378  wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
379  twist_gamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
380  wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
381  twist_gamma5(outEven, outEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
382  } else if (matpc_type == QUDA_MATPC_ODD_ODD) {
383  wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
384  twist_gamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
385  wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
386  twist_gamma5(outEven, outEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
387  }
388  } else {
389  if (matpc_type == QUDA_MATPC_EVEN_EVEN) {
390  twist_gamma5(inEven, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
391  wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
392  twist_gamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
393  wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
394  twist_gamma5(inEven, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_DIRECT, precision);
395  } else if (matpc_type == QUDA_MATPC_ODD_ODD) {
396  twist_gamma5(inEven, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
397  wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
398  twist_gamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
399  wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
400  twist_gamma5(inEven, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_DIRECT, precision); // undo
401  }
402  }
403  // lastly apply the kappa term
404  double kappa2 = -kappa*kappa;
405  if (matpc_type == QUDA_MATPC_EVEN_EVEN || matpc_type == QUDA_MATPC_ODD_ODD) {
406  if (precision == QUDA_DOUBLE_PRECISION) xpay((double*)inEven, kappa2, (double*)outEven, Vh*spinorSiteSize);
407  else xpay((float*)inEven, (float)kappa2, (float*)outEven, Vh*spinorSiteSize);
408  } else {
409  if (precision == QUDA_DOUBLE_PRECISION) xpay((double*)tmp, kappa2, (double*)outEven, Vh*spinorSiteSize);
410  else xpay((float*)tmp, (float)kappa2, (float*)outEven, Vh*spinorSiteSize);
411  }
412 
413  free(tmp);
414 }
415 
416 
417 //----- for non-degenerate dslash only----
418 template <typename sFloat>
419 void ndegTwistGamma5(sFloat *out1, sFloat *out2, sFloat *in1, sFloat *in2, const int dagger, const sFloat kappa, const sFloat mu,
420  const sFloat epsilon, const int V, QudaTwistGamma5Type twist) {
421 
422  sFloat a=0.0, b=0.0, d=0.0;
423  if (twist == QUDA_TWIST_GAMMA5_DIRECT) { // applying the twist
424  a = 2.0 * kappa * mu;
425  b = -2.0 * kappa * epsilon;
426  d = 1.0;
427  } else if (twist == QUDA_TWIST_GAMMA5_INVERSE) { // applying the inverse twist
428  a = -2.0 * kappa * mu;
429  b = 2.0 * kappa * epsilon;
430  d = 1.0 / (1.0 + a*a - b*b);
431  } else {
432  printf("Twist type %d not defined\n", twist);
433  exit(0);
434  }
435 
436  if (dagger) a *= -1.0;
437 
438  for(int i = 0; i < V; i++) {
439  sFloat tmp1[24];
440  sFloat tmp2[24];
441  for(int s = 0; s < 4; s++)
442  for(int c = 0; c < 3; c++) {
443  sFloat a5 = ((s / 2) ? -1.0 : +1.0) * a;
444  tmp1[s * 6 + c * 2 + 0] = d* (in1[i * 24 + s * 6 + c * 2 + 0] - a5*in1[i * 24 + s * 6 + c * 2 + 1] + b*in2[i * 24 + s * 6 + c * 2 + 0]);
445  tmp1[s * 6 + c * 2 + 1] = d* (in1[i * 24 + s * 6 + c * 2 + 1] + a5*in1[i * 24 + s * 6 + c * 2 + 0] + b*in2[i * 24 + s * 6 + c * 2 + 1]);
446  tmp2[s * 6 + c * 2 + 0] = d* (in2[i * 24 + s * 6 + c * 2 + 0] + a5*in2[i * 24 + s * 6 + c * 2 + 1] + b*in1[i * 24 + s * 6 + c * 2 + 0]);
447  tmp2[s * 6 + c * 2 + 1] = d* (in2[i * 24 + s * 6 + c * 2 + 1] - a5*in2[i * 24 + s * 6 + c * 2 + 0] + b*in1[i * 24 + s * 6 + c * 2 + 1]);
448  }
449  for (int j=0; j<24; j++) out1[i*24+j] = tmp1[j], out2[i*24+j] = tmp2[j];
450  }
451 
452 }
453 
454 void ndeg_twist_gamma5(void *outf1, void *outf2, void *inf1, void *inf2, const int dagger, const double kappa, const double mu,
455  const double epsilon, const int Vf, QudaTwistGamma5Type twist, QudaPrecision precision)
456 {
457  if (precision == QUDA_DOUBLE_PRECISION)
458  {
459  ndegTwistGamma5((double*)outf1, (double*)outf2, (double*)inf1, (double*)inf2, dagger, kappa, mu, epsilon, Vf, twist);
460  }
461  else //single precision dslash
462  {
463  ndegTwistGamma5((float*)outf1, (float*)outf2, (float*)inf1, (float*)inf2, dagger, (float)kappa, (float)mu, (float)epsilon, Vf, twist);
464  }
465 }
466 
467 void tm_ndeg_dslash(void *res1, void *res2, void **gauge, void *spinorField1, void *spinorField2, double kappa, double mu,
468  double epsilon, int oddBit, int daggerBit, QudaMatPCType matpc_type, QudaPrecision precision, QudaGaugeParam &gauge_param)
469 {
470  if (daggerBit && (matpc_type == QUDA_MATPC_EVEN_EVEN || matpc_type == QUDA_MATPC_ODD_ODD))
471  ndeg_twist_gamma5(spinorField1, spinorField2, spinorField1, spinorField2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
472 
473  wil_dslash(res1, gauge, spinorField1, oddBit, daggerBit, precision, gauge_param);
474  wil_dslash(res2, gauge, spinorField2, oddBit, daggerBit, precision, gauge_param);
475 
476  if (!daggerBit || (matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC || matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC)) {
477  ndeg_twist_gamma5(res1, res2, res1, res2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
478  }
479 }
480 
481 
482 void tm_ndeg_matpc(void *outEven1, void *outEven2, void **gauge, void *inEven1, void *inEven2, double kappa, double mu, double epsilon,
483  QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param) {
484 
485  void *tmp1 = malloc(Vh*spinorSiteSize*precision);
486  void *tmp2 = malloc(Vh*spinorSiteSize*precision);
487 
488  if (!daggerBit) {
489  if (matpc_type == QUDA_MATPC_EVEN_EVEN) {
490  wil_dslash(tmp1, gauge, inEven1, 1, daggerBit, precision, gauge_param);
491  wil_dslash(tmp2, gauge, inEven2, 1, daggerBit, precision, gauge_param);
492  ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
493  wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
494  wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
495  ndeg_twist_gamma5(outEven1, outEven2, outEven1, outEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
496  } else if (matpc_type == QUDA_MATPC_ODD_ODD) {
497  wil_dslash(tmp1, gauge, inEven1, 0, daggerBit, precision, gauge_param);
498  wil_dslash(tmp2, gauge, inEven2, 0, daggerBit, precision, gauge_param);
499  ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
500  wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
501  wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
502  ndeg_twist_gamma5(outEven1, outEven2, outEven1, outEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
503  }
504  } else {
505  if (matpc_type == QUDA_MATPC_EVEN_EVEN) {
506  ndeg_twist_gamma5(tmp1, tmp2, inEven1, inEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
507  wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
508  wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
509  ndeg_twist_gamma5(tmp1, tmp2, outEven1, outEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
510  wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
511  wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
512  } else if (matpc_type == QUDA_MATPC_ODD_ODD) {
513  ndeg_twist_gamma5(tmp1, tmp2, inEven1, inEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
514  wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
515  wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
516  ndeg_twist_gamma5(tmp1, tmp2, outEven1, outEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
517  wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
518  wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
519  }
520  }
521 
522  if (matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {
523  wil_dslash(tmp1, gauge, inEven1, 1, daggerBit, precision, gauge_param);
524  wil_dslash(tmp2, gauge, inEven2, 1, daggerBit, precision, gauge_param);
525  ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
526  wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
527  wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
528  } else if (matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {
529  wil_dslash(tmp1, gauge, inEven1, 0, daggerBit, precision, gauge_param);
530  wil_dslash(tmp2, gauge, inEven2, 0, daggerBit, precision, gauge_param);
531  ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_INVERSE, precision);
532  wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
533  wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
534  }
535 
536  // lastly apply the kappa term
537  double kappa2 = -kappa*kappa;
538  if (matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC || matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {
539  ndeg_twist_gamma5(inEven1, inEven2, inEven1, inEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_DIRECT, precision);
540  }
541 
542  if (precision == QUDA_DOUBLE_PRECISION){
543  xpay((double*)inEven1, kappa2, (double*)outEven1, Vh*spinorSiteSize);
544  xpay((double*)inEven2, kappa2, (double*)outEven2, Vh*spinorSiteSize);
545  }
546  else{
547  xpay((float*)inEven1, (float)kappa2, (float*)outEven1, Vh*spinorSiteSize);
548  xpay((float*)inEven2, (float)kappa2, (float*)outEven2, Vh*spinorSiteSize);
549  }
550 
551  free(tmp1);
552  free(tmp2);
553 }
554 
555 
556 void tm_ndeg_mat(void *evenOut, void* oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
557 {
558  //V-4d volume and Vh=V/2
559  void *inEven1 = evenIn;
560  void *inEven2 = (char*)evenIn + precision*Vh*spinorSiteSize;
561 
562  void *inOdd1 = oddIn;
563  void *inOdd2 = (char*)oddIn + precision*Vh*spinorSiteSize;
564 
565  void *outEven1 = evenOut;
566  void *outEven2 = (char*)evenOut + precision*Vh*spinorSiteSize;
567 
568  void *outOdd1 = oddOut;
569  void *outOdd2 = (char*)oddOut + precision*Vh*spinorSiteSize;
570 
571  void *tmpEven1 = malloc(Vh*spinorSiteSize*precision);
572  void *tmpEven2 = malloc(Vh*spinorSiteSize*precision);
573 
574  void *tmpOdd1 = malloc(Vh*spinorSiteSize*precision);
575  void *tmpOdd2 = malloc(Vh*spinorSiteSize*precision);
576 
577  // full dslash operator:
578  wil_dslash(outOdd1, gauge, inEven1, 1, daggerBit, precision, gauge_param);
579  wil_dslash(outOdd2, gauge, inEven2, 1, daggerBit, precision, gauge_param);
580 
581  wil_dslash(outEven1, gauge, inOdd1, 0, daggerBit, precision, gauge_param);
582  wil_dslash(outEven2, gauge, inOdd2, 0, daggerBit, precision, gauge_param);
583 
584  // apply the twist term
585  ndeg_twist_gamma5(tmpEven1, tmpEven2, inEven1, inEven2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_DIRECT, precision);
586  ndeg_twist_gamma5(tmpOdd1, tmpOdd2, inOdd1, inOdd2, daggerBit, kappa, mu, epsilon, Vh, QUDA_TWIST_GAMMA5_DIRECT, precision);
587  // combine
588  if (precision == QUDA_DOUBLE_PRECISION){
589  xpay((double*)tmpOdd1, -kappa, (double*)outOdd1, Vh*spinorSiteSize);
590  xpay((double*)tmpOdd2, -kappa, (double*)outOdd2, Vh*spinorSiteSize);
591 
592  xpay((double*)tmpEven1, -kappa, (double*)outEven1, Vh*spinorSiteSize);
593  xpay((double*)tmpEven2, -kappa, (double*)outEven2, Vh*spinorSiteSize);
594  }
595  else{
596  xpay((float*)tmpOdd1, (float)(-kappa), (float*)outOdd1, Vh*spinorSiteSize);
597  xpay((float*)tmpOdd2, (float)(-kappa), (float*)outOdd2, Vh*spinorSiteSize);
598 
599  xpay((float*)tmpEven1, (float)(-kappa), (float*)outEven1, Vh*spinorSiteSize);
600  xpay((float*)tmpEven2, (float)(-kappa), (float*)outEven2, Vh*spinorSiteSize);
601  }
602 
603  free(tmpOdd1);
604  free(tmpOdd2);
605  //
606  free(tmpEven1);
607  free(tmpEven2);
608 }
609 
610 //End of nondeg TM
QudaGaugeParam gauge_param
Definition: dslash_test.cpp:37
__constant__ int Vh
const void ** Ghost() const
Definition: gauge_field.h:209
enum QudaPrecision_s QudaPrecision
int V
Definition: test_util.cpp:29
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_dslash(void *res, void **gaugeFull, void *spinorField, double kappa, double mu, QudaTwistFlavorType flavor, int oddBit, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
#define errorQuda(...)
Definition: util_quda.h:73
__global__ void const RealA *const const RealA *const const RealA *const const RealB *const const RealB *const int int mu
cudaColorSpinorField * tmp1
Definition: dslash_test.cpp:41
void tm_ndeg_mat(void *evenOut, void *oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaPrecision precision
Definition: lattice_field.h:41
#define gaugeSiteSize
cpuColorSpinorField * spinor
Definition: dslash_test.cpp:40
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void twist_gamma5(void *out, void *in, int daggerBit, double kappa, double mu, QudaTwistFlavorType flavor, int V, QudaTwistGamma5Type twist, QudaPrecision precision)
#define spinorSiteSize
QudaSiteSubset siteSubset
Definition: lattice_field.h:42
QudaDagType dagger
Definition: test_util.cpp:1558
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:38
cudaColorSpinorField * tmp2
Definition: dslash_test.cpp:41
cudaColorSpinorField * tmp
VOLATILE spinorFloat kappa
FloatingPoint< float > Float
Definition: gtest.h:7350
ColorSpinorParam csParam
Definition: pack_test.cpp:24
cpuColorSpinorField * in
void exchangeCpuSpinor(quda::cpuColorSpinorField &in, int parity, int dagger)
#define mySpinorSiteSize
enum QudaMatPCType_s QudaMatPCType
void multiplySpinorByDiracProjector(Float *res, int projIdx, Float *spinorIn)
void ndegTwistGamma5(sFloat *out1, sFloat *out2, sFloat *in1, sFloat *in2, const int dagger, const sFloat kappa, const sFloat mu, const sFloat epsilon, const int V, QudaTwistGamma5Type twist)
void tm_ndeg_matpc(void *outEven1, void *outEven2, void **gauge, void *inEven1, void *inEven2, double kappa, double mu, double epsilon, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
enum QudaParity_s QudaParity
void tm_ndeg_dslash(void *res1, void *res2, void **gauge, void *spinorField1, void *spinorField2, double kappa, double mu, double epsilon, int oddBit, int daggerBit, QudaMatPCType matpc_type, QudaPrecision precision, QudaGaugeParam &gauge_param)
void ndeg_twist_gamma5(void *outf1, void *outf2, void *inf1, void *inf2, const int dagger, const double kappa, const double mu, const double epsilon, const int Vf, QudaTwistGamma5Type twist, QudaPrecision precision)
void dslashReference(sFloat *res, gFloat **gaugeFull, sFloat *spinorField, int oddBit, int daggerBit)
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
cpuColorSpinorField * out
int Z[4]
Definition: test_util.cpp:28
const double projector[10][4][4][2]
QudaMatPCType matpc_type
Definition: test_util.cpp:1573
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
enum QudaTwistGamma5Type_s QudaTwistGamma5Type
void wil_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
VOLATILE spinorFloat * s
void * gauge[4]
Definition: su3_test.cpp:15
int oddBit
void twistGamma5(sFloat *out, sFloat *in, const int dagger, const sFloat kappa, const sFloat mu, const QudaTwistFlavorType flavor, const int V, QudaTwistGamma5Type twist)
enum QudaTwistFlavorType_s QudaTwistFlavorType