20 static const double projector[8][4][4][2] = {
22 {{1,0}, {0,0}, {0,0}, {0,-1}},
23 {{0,0}, {1,0}, {0,-1}, {0,0}},
24 {{0,0}, {0,1}, {1,0}, {0,0}},
25 {{0,1}, {0,0}, {0,0}, {1,0}}
28 {{1,0}, {0,0}, {0,0}, {0,1}},
29 {{0,0}, {1,0}, {0,1}, {0,0}},
30 {{0,0}, {0,-1}, {1,0}, {0,0}},
31 {{0,-1}, {0,0}, {0,0}, {1,0}}
34 {{1,0}, {0,0}, {0,0}, {1,0}},
35 {{0,0}, {1,0}, {-1,0}, {0,0}},
36 {{0,0}, {-1,0}, {1,0}, {0,0}},
37 {{1,0}, {0,0}, {0,0}, {1,0}}
40 {{1,0}, {0,0}, {0,0}, {-1,0}},
41 {{0,0}, {1,0}, {1,0}, {0,0}},
42 {{0,0}, {1,0}, {1,0}, {0,0}},
43 {{-1,0}, {0,0}, {0,0}, {1,0}}
46 {{1,0}, {0,0}, {0,-1}, {0,0}},
47 {{0,0}, {1,0}, {0,0}, {0,1}},
48 {{0,1}, {0,0}, {1,0}, {0,0}},
49 {{0,0}, {0,-1}, {0,0}, {1,0}}
52 {{1,0}, {0,0}, {0,1}, {0,0}},
53 {{0,0}, {1,0}, {0,0}, {0,-1}},
54 {{0,-1}, {0,0}, {1,0}, {0,0}},
55 {{0,0}, {0,1}, {0,0}, {1,0}}
58 {{1,0}, {0,0}, {-1,0}, {0,0}},
59 {{0,0}, {1,0}, {0,0}, {-1,0}},
60 {{-1,0}, {0,0}, {1,0}, {0,0}},
61 {{0,0}, {-1,0}, {0,0}, {1,0}}
64 {{1,0}, {0,0}, {1,0}, {0,0}},
65 {{0,0}, {1,0}, {0,0}, {1,0}},
66 {{1,0}, {0,0}, {1,0}, {0,0}},
67 {{0,0}, {1,0}, {0,0}, {1,0}}
73 template <
typename Float>
75 for (
int i=0; i<4*3*2; i++) res[i] = 0.0;
77 for (
int s = 0;
s < 4;
s++) {
78 for (
int t = 0; t < 4; t++) {
82 for (
int m = 0; m < 3; m++) {
83 Float spinorRe = spinorIn[t*(3*2) + m*(2) + 0];
84 Float spinorIm = spinorIn[t*(3*2) + m*(2) + 1];
85 res[
s*(3*2) + m*(2) + 0] += projRe*spinorRe - projIm*spinorIm;
86 res[
s*(3*2) + m*(2) + 1] += projRe*spinorIm + projIm*spinorRe;
105 template <
typename sFloat,
typename gFloat>
106 void dslashReference(sFloat *res, gFloat **gaugeFull, sFloat *spinorField,
int oddBit,
int daggerBit) {
109 gFloat *gaugeEven[4], *gaugeOdd[4];
111 gaugeEven[
dir] = gaugeFull[
dir];
115 for (
int i = 0; i <
Vh; i++) {
117 gFloat *
gauge = gaugeLink(i,
dir, oddBit, gaugeEven, gaugeOdd, 1);
118 sFloat *
spinor = spinorNeighbor(i,
dir, oddBit, spinorField, 1);
120 sFloat projectedSpinor[4*3*2], gaugedSpinor[4*3*2];
121 int projIdx = 2*(
dir/2)+(
dir+daggerBit)%2;
124 for (
int s = 0;
s < 4;
s++) {
125 if (
dir % 2 == 0) su3Mul(&gaugedSpinor[
s*(3*2)], gauge, &projectedSpinor[
s*(3*2)]);
126 else su3Tmul(&gaugedSpinor[
s*(3*2)], gauge, &projectedSpinor[
s*(3*2)]);
129 sum(&res[i*(4*3*2)], &res[i*(4*3*2)], gaugedSpinor, 4*3*2);
136 template <
typename sFloat,
typename gFloat>
137 void dslashReference(sFloat *res, gFloat **gaugeFull, gFloat **ghostGauge, sFloat *spinorField,
138 sFloat **fwdSpinor, sFloat **backSpinor,
int oddBit,
int daggerBit) {
141 gFloat *gaugeEven[4], *gaugeOdd[4];
142 gFloat *ghostGaugeEven[4], *ghostGaugeOdd[4];
144 gaugeEven[
dir] = gaugeFull[
dir];
147 ghostGaugeEven[
dir] = ghostGauge[
dir];
151 for (
int i = 0; i <
Vh; i++) {
154 gFloat *
gauge = gaugeLink_mg4dir(i,
dir, oddBit, gaugeEven, gaugeOdd, ghostGaugeEven, ghostGaugeOdd, 1, 1);
155 sFloat *
spinor = spinorNeighbor_mg4dir(i,
dir, oddBit, spinorField, fwdSpinor, backSpinor, 1, 1);
158 int projIdx = 2*(
dir/2)+(
dir+daggerBit)%2;
161 for (
int s = 0;
s < 4;
s++) {
162 if (
dir % 2 == 0) su3Mul(&gaugedSpinor[
s*(3*2)], gauge, &projectedSpinor[
s*(3*2)]);
163 else su3Tmul(&gaugedSpinor[
s*(3*2)], gauge, &projectedSpinor[
s*(3*2)]);
166 sum(&res[i*(4*3*2)], &res[i*(4*3*2)], gaugedSpinor, 4*3*2);
180 dslashReference((
double*)out, (
double**)gauge, (
double*)in, oddBit, daggerBit);
182 dslashReference((
float*)out, (
float**)gauge, (
float*)in, oddBit, daggerBit);
188 void **ghostGauge = (
void**)cpu.
Ghost();
197 for (
int d=0; d<4; d++) csParam.
x[d] =
Z[d];
213 else errorQuda(
"ERROR: full parity not supported in function %s", __FUNCTION__);
216 FaceBuffer faceBuf(
Z, 4, mySpinorSiteSize, nFace, precision);
219 void** fwd_nbr_spinor = inField.fwdGhostFaceBuffer;
220 void** back_nbr_spinor = inField.backGhostFaceBuffer;
223 dslashReference((
double*)out, (
double**)gauge, (
double**)ghostGauge, (
double*)in,
224 (
double**)fwd_nbr_spinor, (
double**)back_nbr_spinor, oddBit, daggerBit);
226 dslashReference((
float*)out, (
float**)gauge, (
float**)ghostGauge, (
float*)in,
227 (
float**)fwd_nbr_spinor, (
float**)back_nbr_spinor, oddBit, daggerBit);
235 template <
typename sFloat>
241 a = 2.0 * kappa * mu * flavor;
244 a = -2.0 * kappa * mu * flavor;
245 b = 1.0 / (1.0 + a*a);
247 printf(
"Twist type %d not defined\n", twist);
251 if (dagger) a *= -1.0;
253 for(
int i = 0; i <
V; i++) {
255 for(
int s = 0;
s < 4;
s++)
256 for(
int c = 0; c < 3; c++) {
257 sFloat a5 = ((
s / 2) ? -1.0 : +1.0) * a;
258 tmp[
s * 6 + c * 2 + 0] = b* (in[i * 24 +
s * 6 + c * 2 + 0] - a5*in[i * 24 +
s * 6 + c * 2 + 1]);
259 tmp[
s * 6 + c * 2 + 1] = b* (in[i * 24 +
s * 6 + c * 2 + 1] + a5*in[i * 24 +
s * 6 + c * 2 + 0]);
262 for (
int j=0; j<24; j++) out[i*24+j] = tmp[j];
271 twistGamma5((
double*)out, (
double*)in, daggerBit, kappa, mu, flavor, V, twist);
273 twistGamma5((
float*)out, (
float*)in, daggerBit, (
float)kappa, (
float)mu, flavor, V, twist);
283 if (daggerBit)
twist_gamma5(spinorField, spinorField, daggerBit, kappa, mu,
286 wil_dslash(res, gaugeFull, spinorField, oddBit, daggerBit, precision, gauge_param);
292 twist_gamma5(spinorField, spinorField, daggerBit, kappa, mu, flavor,
306 wil_dslash(outOdd, gauge, inEven, 1, dagger_bit, precision, gauge_param);
307 wil_dslash(outEven, gauge, inOdd, 0, dagger_bit, precision, gauge_param);
311 else xpay((
float*)in, -(
float)kappa, (
float*)out,
V*spinorSiteSize);
324 wil_dslash(outOdd, gauge, inEven, 1, dagger_bit, precision, gauge_param);
325 wil_dslash(outEven, gauge, inOdd, 0, dagger_bit, precision, gauge_param);
332 else xpay((
float*)tmp, -(
float)kappa, (
float*)out,
V*spinorSiteSize);
347 wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
348 wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
350 wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
351 wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
355 double kappa2 = -kappa*
kappa;
357 else xpay((
float*)inEven, (
float)kappa2, (
float*)outEven, Vh*spinorSiteSize);
369 wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
371 wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
374 wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
376 wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
378 }
else if (!daggerBit) {
380 wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
382 wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
385 wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
387 wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
393 wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
395 wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
399 wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
401 wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
406 double kappa2 = -kappa*
kappa;
409 else xpay((
float*)inEven, (
float)kappa2, (
float*)outEven, Vh*spinorSiteSize);
412 else xpay((
float*)tmp, (
float)kappa2, (
float*)outEven, Vh*spinorSiteSize);
420 template <
typename sFloat>
424 sFloat a=0.0, b=0.0, d=0.0;
426 a = 2.0 * kappa *
mu;
427 b = -2.0 * kappa * epsilon;
430 a = -2.0 * kappa *
mu;
431 b = 2.0 * kappa * epsilon;
432 d = 1.0 / (1.0 + a*a - b*b);
434 printf(
"Twist type %d not defined\n", twist);
438 if (dagger) a *= -1.0;
440 for(
int i = 0; i <
V; i++) {
443 for(
int s = 0;
s < 4;
s++)
444 for(
int c = 0; c < 3; c++) {
445 sFloat a5 = ((
s / 2) ? -1.0 : +1.0) * a;
446 tmp1[
s * 6 + c * 2 + 0] = d* (in1[i * 24 +
s * 6 + c * 2 + 0] - a5*in1[i * 24 +
s * 6 + c * 2 + 1] + b*in2[i * 24 +
s * 6 + c * 2 + 0]);
447 tmp1[
s * 6 + c * 2 + 1] = d* (in1[i * 24 +
s * 6 + c * 2 + 1] + a5*in1[i * 24 +
s * 6 + c * 2 + 0] + b*in2[i * 24 +
s * 6 + c * 2 + 1]);
448 tmp2[
s * 6 + c * 2 + 0] = d* (in2[i * 24 +
s * 6 + c * 2 + 0] + a5*in2[i * 24 +
s * 6 + c * 2 + 1] + b*in1[i * 24 +
s * 6 + c * 2 + 0]);
449 tmp2[
s * 6 + c * 2 + 1] = d* (in2[i * 24 +
s * 6 + c * 2 + 1] - a5*in2[i * 24 +
s * 6 + c * 2 + 0] + b*in1[i * 24 +
s * 6 + c * 2 + 1]);
451 for (
int j=0; j<24; j++) out1[i*24+j] = tmp1[j], out2[i*24+j] = tmp2[j];
461 ndegTwistGamma5((
double*)outf1, (
double*)outf2, (
double*)inf1, (
double*)inf2, dagger, kappa, mu, epsilon, Vf, twist);
465 ndegTwistGamma5((
float*)outf1, (
float*)outf2, (
float*)inf1, (
float*)inf2, dagger, (
float)kappa, (
float)mu, (
float)epsilon, Vf, twist);
469 void tm_ndeg_dslash(
void *res1,
void *res2,
void **gauge,
void *spinorField1,
void *spinorField2,
double kappa,
double mu,
473 ndeg_twist_gamma5(spinorField1, spinorField2, spinorField1, spinorField2, daggerBit, kappa, mu, epsilon, Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
475 wil_dslash(res1, gauge, spinorField1, oddBit, daggerBit, precision, gauge_param);
476 wil_dslash(res2, gauge, spinorField2, oddBit, daggerBit, precision, gauge_param);
479 ndeg_twist_gamma5(res1, res2, res1, res2, daggerBit, kappa, mu, epsilon, Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
484 void tm_ndeg_matpc(
void *outEven1,
void *outEven2,
void **gauge,
void *inEven1,
void *inEven2,
double kappa,
double mu,
double epsilon,
492 wil_dslash(tmp1, gauge, inEven1, 1, daggerBit, precision, gauge_param);
493 wil_dslash(tmp2, gauge, inEven2, 1, daggerBit, precision, gauge_param);
494 ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon, Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
495 wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
496 wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
497 ndeg_twist_gamma5(outEven1, outEven2, outEven1, outEven2, daggerBit, kappa, mu, epsilon, Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
499 wil_dslash(tmp1, gauge, inEven1, 0, daggerBit, precision, gauge_param);
500 wil_dslash(tmp2, gauge, inEven2, 0, daggerBit, precision, gauge_param);
501 ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon, Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
502 wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
503 wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
504 ndeg_twist_gamma5(outEven1, outEven2, outEven1, outEven2, daggerBit, kappa, mu, epsilon, Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
508 ndeg_twist_gamma5(tmp1, tmp2, inEven1, inEven2, daggerBit, kappa, mu, epsilon, Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
509 wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
510 wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
511 ndeg_twist_gamma5(tmp1, tmp2, outEven1, outEven2, daggerBit, kappa, mu, epsilon, Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
512 wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
513 wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
515 ndeg_twist_gamma5(tmp1, tmp2, inEven1, inEven2, daggerBit, kappa, mu, epsilon, Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
516 wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
517 wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
518 ndeg_twist_gamma5(tmp1, tmp2, outEven1, outEven2, daggerBit, kappa, mu, epsilon, Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
519 wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
520 wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
525 wil_dslash(tmp1, gauge, inEven1, 1, daggerBit, precision, gauge_param);
526 wil_dslash(tmp2, gauge, inEven2, 1, daggerBit, precision, gauge_param);
527 ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon, Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
528 wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
529 wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
531 wil_dslash(tmp1, gauge, inEven1, 0, daggerBit, precision, gauge_param);
532 wil_dslash(tmp2, gauge, inEven2, 0, daggerBit, precision, gauge_param);
533 ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon, Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
534 wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
535 wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
539 double kappa2 = -kappa*
kappa;
541 ndeg_twist_gamma5(inEven1, inEven2, inEven1, inEven2, daggerBit, kappa, mu, epsilon, Vh,
QUDA_TWIST_GAMMA5_DIRECT, precision);
546 xpay((
double*)inEven2, kappa2, (
double*)outEven2, Vh*spinorSiteSize);
550 xpay((
float*)inEven2, (
float)kappa2, (
float*)outEven2, Vh*spinorSiteSize);
558 void tm_ndeg_mat(
void *evenOut,
void* oddOut,
void **gauge,
void *evenIn,
void *oddIn,
double kappa,
double mu,
double epsilon,
int daggerBit,
QudaPrecision precision,
QudaGaugeParam &
gauge_param)
561 void *inEven1 = evenIn;
564 void *inOdd1 = oddIn;
567 void *outEven1 = evenOut;
570 void *outOdd1 = oddOut;
580 wil_dslash(outOdd1, gauge, inEven1, 1, daggerBit, precision, gauge_param);
581 wil_dslash(outOdd2, gauge, inEven2, 1, daggerBit, precision, gauge_param);
583 wil_dslash(outEven1, gauge, inOdd1, 0, daggerBit, precision, gauge_param);
584 wil_dslash(outEven2, gauge, inOdd2, 0, daggerBit, precision, gauge_param);
587 ndeg_twist_gamma5(tmpEven1, tmpEven2, inEven1, inEven2, daggerBit, kappa, mu, epsilon, Vh,
QUDA_TWIST_GAMMA5_DIRECT, precision);
588 ndeg_twist_gamma5(tmpOdd1, tmpOdd2, inOdd1, inOdd2, daggerBit, kappa, mu, epsilon, Vh,
QUDA_TWIST_GAMMA5_DIRECT, precision);
592 xpay((
double*)tmpOdd2, -kappa, (
double*)outOdd2, Vh*spinorSiteSize);
594 xpay((
double*)tmpEven1, -kappa, (
double*)outEven1, Vh*spinorSiteSize);
595 xpay((
double*)tmpEven2, -kappa, (
double*)outEven2, Vh*spinorSiteSize);
599 xpay((
float*)tmpOdd2, (
float)(-kappa), (
float*)outOdd2, Vh*spinorSiteSize);
601 xpay((
float*)tmpEven1, (
float)(-kappa), (
float*)outEven1, Vh*spinorSiteSize);
602 xpay((
float*)tmpEven2, (
float)(-kappa), (
float*)outEven2, Vh*spinorSiteSize);