22 {{1,0}, {0,0}, {0,0}, {0,-1}},
23 {{0,0}, {1,0}, {0,-1}, {0,0}},
24 {{0,0}, {0,1}, {1,0}, {0,0}},
25 {{0,1}, {0,0}, {0,0}, {1,0}}
28 {{1,0}, {0,0}, {0,0}, {0,1}},
29 {{0,0}, {1,0}, {0,1}, {0,0}},
30 {{0,0}, {0,-1}, {1,0}, {0,0}},
31 {{0,-1}, {0,0}, {0,0}, {1,0}}
34 {{1,0}, {0,0}, {0,0}, {1,0}},
35 {{0,0}, {1,0}, {-1,0}, {0,0}},
36 {{0,0}, {-1,0}, {1,0}, {0,0}},
37 {{1,0}, {0,0}, {0,0}, {1,0}}
40 {{1,0}, {0,0}, {0,0}, {-1,0}},
41 {{0,0}, {1,0}, {1,0}, {0,0}},
42 {{0,0}, {1,0}, {1,0}, {0,0}},
43 {{-1,0}, {0,0}, {0,0}, {1,0}}
46 {{1,0}, {0,0}, {0,-1}, {0,0}},
47 {{0,0}, {1,0}, {0,0}, {0,1}},
48 {{0,1}, {0,0}, {1,0}, {0,0}},
49 {{0,0}, {0,-1}, {0,0}, {1,0}}
52 {{1,0}, {0,0}, {0,1}, {0,0}},
53 {{0,0}, {1,0}, {0,0}, {0,-1}},
54 {{0,-1}, {0,0}, {1,0}, {0,0}},
55 {{0,0}, {0,1}, {0,0}, {1,0}}
58 {{1,0}, {0,0}, {-1,0}, {0,0}},
59 {{0,0}, {1,0}, {0,0}, {-1,0}},
60 {{-1,0}, {0,0}, {1,0}, {0,0}},
61 {{0,0}, {-1,0}, {0,0}, {1,0}}
64 {{1,0}, {0,0}, {1,0}, {0,0}},
65 {{0,0}, {1,0}, {0,0}, {1,0}},
66 {{1,0}, {0,0}, {1,0}, {0,0}},
67 {{0,0}, {1,0}, {0,0}, {1,0}}
73 template <
typename Float>
75 for (
int i=0; i<4*3*2; i++) res[i] = 0.0;
77 for (
int s = 0;
s < 4;
s++) {
78 for (
int t = 0; t < 4; t++) {
82 for (
int m = 0; m < 3; m++) {
83 Float spinorRe = spinorIn[t*(3*2) + m*(2) + 0];
84 Float spinorIm = spinorIn[t*(3*2) + m*(2) + 1];
85 res[
s*(3*2) + m*(2) + 0] += projRe*spinorRe - projIm*spinorIm;
86 res[
s*(3*2) + m*(2) + 1] += projRe*spinorIm + projIm*spinorRe;
105 template <
typename sFloat,
typename gFloat>
106 void dslashReference(sFloat *res, gFloat **gaugeFull, sFloat *spinorField,
int oddBit,
int daggerBit) {
109 gFloat *gaugeEven[4], *gaugeOdd[4];
110 for (
int dir = 0; dir < 4; dir++) {
111 gaugeEven[dir] = gaugeFull[dir];
115 for (
int i = 0; i <
Vh; i++) {
116 for (
int dir = 0; dir < 8; dir++) {
117 gFloat *gauge =
gaugeLink(i, dir, oddBit, gaugeEven, gaugeOdd, 1);
120 sFloat projectedSpinor[4*3*2], gaugedSpinor[4*3*2];
121 int projIdx = 2*(dir/2)+(dir+daggerBit)%2;
124 for (
int s = 0;
s < 4;
s++) {
125 if (dir % 2 == 0)
su3Mul(&gaugedSpinor[
s*(3*2)], gauge, &projectedSpinor[
s*(3*2)]);
126 else su3Tmul(&gaugedSpinor[
s*(3*2)], gauge, &projectedSpinor[
s*(3*2)]);
129 sum(&res[i*(4*3*2)], &res[i*(4*3*2)], gaugedSpinor, 4*3*2);
136 template <
typename sFloat,
typename gFloat>
137 void dslashReference(sFloat *res, gFloat **gaugeFull, gFloat **ghostGauge, sFloat *spinorField,
138 sFloat **fwdSpinor, sFloat **backSpinor,
int oddBit,
int daggerBit) {
141 gFloat *gaugeEven[4], *gaugeOdd[4];
142 gFloat *ghostGaugeEven[4], *ghostGaugeOdd[4];
143 for (
int dir = 0; dir < 4; dir++) {
144 gaugeEven[dir] = gaugeFull[dir];
147 ghostGaugeEven[dir] = ghostGauge[dir];
148 ghostGaugeOdd[dir] = ghostGauge[dir] + (
faceVolume[dir]/2)*gaugeSiteSize;
151 for (
int i = 0; i <
Vh; i++) {
153 for (
int dir = 0; dir < 8; dir++) {
154 gFloat *gauge = gaugeLink_mg4dir(i, dir, oddBit, gaugeEven, gaugeOdd, ghostGaugeEven, ghostGaugeOdd, 1, 1);
155 sFloat *
spinor = spinorNeighbor_mg4dir(i, dir, oddBit, spinorField, fwdSpinor, backSpinor, 1, 1);
158 int projIdx = 2*(dir/2)+(dir+daggerBit)%2;
161 for (
int s = 0;
s < 4;
s++) {
162 if (dir % 2 == 0)
su3Mul(&gaugedSpinor[
s*(3*2)], gauge, &projectedSpinor[
s*(3*2)]);
163 else su3Tmul(&gaugedSpinor[
s*(3*2)], gauge, &projectedSpinor[
s*(3*2)]);
166 sum(&res[i*(4*3*2)], &res[i*(4*3*2)], gaugedSpinor, 4*3*2);
180 dslashReference((
double*)out, (
double**)gauge, (
double*)in, oddBit, daggerBit);
182 dslashReference((
float*)out, (
float**)gauge, (
float*)in, oddBit, daggerBit);
188 void **ghostGauge = (
void**)cpu.
Ghost();
197 for (
int d=0; d<4; d++) csParam.
x[d] =
Z[d];
213 else errorQuda(
"ERROR: full parity not supported in function %s", __FUNCTION__);
222 dslashReference((
double*)out, (
double**)gauge, (
double**)ghostGauge, (
double*)in,
223 (
double**)fwd_nbr_spinor, (
double**)back_nbr_spinor, oddBit, daggerBit);
225 dslashReference((
float*)out, (
float**)gauge, (
float**)ghostGauge, (
float*)in,
226 (
float**)fwd_nbr_spinor, (
float**)back_nbr_spinor, oddBit, daggerBit);
234 template <
typename sFloat>
240 a = 2.0 * kappa * mu * flavor;
243 a = -2.0 * kappa * mu * flavor;
244 b = 1.0 / (1.0 + a*a);
246 printf(
"Twist type %d not defined\n", twist);
250 if (dagger) a *= -1.0;
252 for(
int i = 0; i <
V; i++) {
254 for(
int s = 0;
s < 4;
s++)
255 for(
int c = 0; c < 3; c++) {
256 sFloat a5 = ((
s / 2) ? -1.0 : +1.0) * a;
257 tmp[
s * 6 + c * 2 + 0] = b* (in[i * 24 +
s * 6 + c * 2 + 0] - a5*in[i * 24 +
s * 6 + c * 2 + 1]);
258 tmp[
s * 6 + c * 2 + 1] = b* (in[i * 24 +
s * 6 + c * 2 + 1] + a5*in[i * 24 +
s * 6 + c * 2 + 0]);
261 for (
int j=0; j<24; j++) out[i*24+j] = tmp[j];
270 twistGamma5((
double*)out, (
double*)in, daggerBit, kappa, mu, flavor, V, twist);
272 twistGamma5((
float*)out, (
float*)in, daggerBit, (
float)kappa, (
float)mu, flavor, V, twist);
285 wil_dslash(res, gaugeFull, spinorField, oddBit, daggerBit, precision, gauge_param);
302 wil_dslash(outOdd, gauge, inEven, 1, dagger_bit, precision, gauge_param);
303 wil_dslash(outEven, gauge, inOdd, 0, dagger_bit, precision, gauge_param);
319 wil_dslash(outOdd, gauge, inEven, 1, dagger_bit, precision, gauge_param);
320 wil_dslash(outEven, gauge, inOdd, 0, dagger_bit, precision, gauge_param);
341 wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
342 wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
344 wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
345 wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
349 double kappa2 = -kappa*
kappa;
362 wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
364 wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
367 wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
369 wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
371 }
else if (!daggerBit) {
373 wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
375 wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
378 wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
380 wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
386 wil_dslash(tmp, gauge, inEven, 1, daggerBit, precision, gauge_param);
388 wil_dslash(outEven, gauge, tmp, 0, daggerBit, precision, gauge_param);
392 wil_dslash(tmp, gauge, inEven, 0, daggerBit, precision, gauge_param);
394 wil_dslash(outEven, gauge, tmp, 1, daggerBit, precision, gauge_param);
399 double kappa2 = -kappa*
kappa;
411 template <
typename sFloat>
415 sFloat a=0.0, b=0.0, d=0.0;
417 a = 2.0 * kappa *
mu;
421 a = -2.0 * kappa *
mu;
423 d = 1.0 / (1.0 + a*a - b*b);
425 printf(
"Twist type %d not defined\n", twist);
429 if (dagger) a *= -1.0;
431 for(
int i = 0; i <
V; i++) {
434 for(
int s = 0;
s < 4;
s++)
435 for(
int c = 0; c < 3; c++) {
436 sFloat a5 = ((
s / 2) ? -1.0 : +1.0) * a;
437 tmp1[
s * 6 + c * 2 + 0] = d
438 * (in1[i * 24 +
s * 6 + c * 2 + 0] - a5 * in1[i * 24 +
s * 6 + c * 2 + 1]
439 + b * in2[i * 24 +
s * 6 + c * 2 + 0]);
440 tmp1[
s * 6 + c * 2 + 1] = d
441 * (in1[i * 24 +
s * 6 + c * 2 + 1] + a5 * in1[i * 24 +
s * 6 + c * 2 + 0]
442 + b * in2[i * 24 +
s * 6 + c * 2 + 1]);
443 tmp2[
s * 6 + c * 2 + 0] = d
444 * (in2[i * 24 +
s * 6 + c * 2 + 0] + a5 * in2[i * 24 +
s * 6 + c * 2 + 1]
445 + b * in1[i * 24 +
s * 6 + c * 2 + 0]);
446 tmp2[
s * 6 + c * 2 + 1] = d
447 * (in2[i * 24 +
s * 6 + c * 2 + 1] - a5 * in2[i * 24 +
s * 6 + c * 2 + 0]
448 + b * in1[i * 24 +
s * 6 + c * 2 + 1]);
450 for (
int j=0; j<24; j++) out1[i*24+j] = tmp1[j], out2[i*24+j] = tmp2[j];
460 ndegTwistGamma5((
double*)outf1, (
double*)outf2, (
double*)inf1, (
double*)inf2, dagger, kappa, mu, epsilon, Vf, twist);
464 ndegTwistGamma5((
float*)outf1, (
float*)outf2, (
float*)inf1, (
float*)inf2, dagger, (
float)kappa, (
float)mu, (
float)epsilon, Vf, twist);
468 void tm_ndeg_dslash(
void *res1,
void *res2,
void **gauge,
void *spinorField1,
void *spinorField2,
double kappa,
double mu,
472 ndeg_twist_gamma5(spinorField1, spinorField2, spinorField1, spinorField2, daggerBit, kappa, mu, epsilon,
Vh,
475 wil_dslash(res1, gauge, spinorField1, oddBit, daggerBit, precision, gauge_param);
476 wil_dslash(res2, gauge, spinorField2, oddBit, daggerBit, precision, gauge_param);
479 ndeg_twist_gamma5(res1, res2, res1, res2, daggerBit, kappa, mu, epsilon,
Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
492 wil_dslash(tmp1, gauge, inEven1, 1, daggerBit, precision, gauge_param);
493 wil_dslash(tmp2, gauge, inEven2, 1, daggerBit, precision, gauge_param);
494 ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon,
Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
495 wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
496 wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
497 ndeg_twist_gamma5(outEven1, outEven2, outEven1, outEven2, daggerBit, kappa, mu, epsilon,
Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
499 wil_dslash(tmp1, gauge, inEven1, 0, daggerBit, precision, gauge_param);
500 wil_dslash(tmp2, gauge, inEven2, 0, daggerBit, precision, gauge_param);
501 ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon,
Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
502 wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
503 wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
504 ndeg_twist_gamma5(outEven1, outEven2, outEven1, outEven2, daggerBit, kappa, mu, epsilon,
Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
509 tmp1, tmp2, inEven1, inEven2, daggerBit, kappa, mu, epsilon,
Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
510 wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
511 wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
512 ndeg_twist_gamma5(tmp1, tmp2, outEven1, outEven2, daggerBit, kappa, mu, epsilon,
Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
513 wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
514 wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
517 tmp1, tmp2, inEven1, inEven2, daggerBit, kappa, mu, epsilon,
Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
518 wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
519 wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
520 ndeg_twist_gamma5(tmp1, tmp2, outEven1, outEven2, daggerBit, kappa, mu, epsilon,
Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
521 wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
522 wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
527 wil_dslash(tmp1, gauge, inEven1, 1, daggerBit, precision, gauge_param);
528 wil_dslash(tmp2, gauge, inEven2, 1, daggerBit, precision, gauge_param);
529 ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon,
Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
530 wil_dslash(outEven1, gauge, tmp1, 0, daggerBit, precision, gauge_param);
531 wil_dslash(outEven2, gauge, tmp2, 0, daggerBit, precision, gauge_param);
533 wil_dslash(tmp1, gauge, inEven1, 0, daggerBit, precision, gauge_param);
534 wil_dslash(tmp2, gauge, inEven2, 0, daggerBit, precision, gauge_param);
535 ndeg_twist_gamma5(tmp1, tmp2, tmp1, tmp2, daggerBit, kappa, mu, epsilon,
Vh,
QUDA_TWIST_GAMMA5_INVERSE, precision);
536 wil_dslash(outEven1, gauge, tmp1, 1, daggerBit, precision, gauge_param);
537 wil_dslash(outEven2, gauge, tmp2, 1, daggerBit, precision, gauge_param);
541 double kappa2 = -kappa*
kappa;
543 ndeg_twist_gamma5(inEven1, inEven2, inEven1, inEven2, daggerBit, kappa, mu, epsilon,
Vh,
QUDA_TWIST_GAMMA5_DIRECT, precision);
547 xpay(inEven2, kappa2, outEven2,
Vh*spinorSiteSize, precision);
554 void tm_ndeg_mat(
void *evenOut,
void* oddOut,
void **gauge,
void *evenIn,
void *oddIn,
double kappa,
double mu,
double epsilon,
int daggerBit,
QudaPrecision precision,
QudaGaugeParam &
gauge_param)
557 void *inEven1 = evenIn;
560 void *inOdd1 = oddIn;
563 void *outEven1 = evenOut;
566 void *outOdd1 = oddOut;
576 wil_dslash(outOdd1, gauge, inEven1, 1, daggerBit, precision, gauge_param);
577 wil_dslash(outOdd2, gauge, inEven2, 1, daggerBit, precision, gauge_param);
579 wil_dslash(outEven1, gauge, inOdd1, 0, daggerBit, precision, gauge_param);
580 wil_dslash(outEven2, gauge, inOdd2, 0, daggerBit, precision, gauge_param);
583 ndeg_twist_gamma5(tmpEven1, tmpEven2, inEven1, inEven2, daggerBit, kappa, mu, epsilon,
Vh,
QUDA_TWIST_GAMMA5_DIRECT, precision);
584 ndeg_twist_gamma5(tmpOdd1, tmpOdd2, inOdd1, inOdd2, daggerBit, kappa, mu, epsilon,
Vh,
QUDA_TWIST_GAMMA5_DIRECT, precision);
587 xpay(tmpOdd2, -kappa, outOdd2,
Vh*spinorSiteSize, precision);
589 xpay(tmpEven1, -kappa, outEven1,
Vh*spinorSiteSize, precision);
590 xpay(tmpEven2, -kappa, outEven2,
Vh*spinorSiteSize, precision);
cudaColorSpinorField * tmp2
QudaGhostExchange ghostExchange
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
cudaColorSpinorField * tmp1
enum QudaPrecision_s QudaPrecision
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_dslash(void *res, void **gaugeFull, void *spinorField, double kappa, double mu, QudaTwistFlavorType flavor, int oddBit, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
cudaColorSpinorField * tmp
void tm_ndeg_mat(void *evenOut, void *oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaGaugeParam gauge_param
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void twist_gamma5(void *out, void *in, int daggerBit, double kappa, double mu, QudaTwistFlavorType flavor, int V, QudaTwistGamma5Type twist, QudaPrecision precision)
__host__ __device__ void sum(double &a, double &b)
QudaSiteSubset siteSubset
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
static Float * spinorNeighbor(int i, int dir, int oddBit, Float *spinorField, int neighbor_distance)
QudaFieldOrder fieldOrder
enum QudaMatPCType_s QudaMatPCType
QudaGammaBasis gammaBasis
void multiplySpinorByDiracProjector(Float *res, int projIdx, Float *spinorIn)
static const double projector[8][4][4][2]
void ndegTwistGamma5(sFloat *out1, sFloat *out2, sFloat *in1, sFloat *in2, const int dagger, const sFloat kappa, const sFloat mu, const sFloat epsilon, const int V, QudaTwistGamma5Type twist)
static void * backGhostFaceBuffer[QUDA_MAX_DIM]
const void ** Ghost() const
void tm_ndeg_matpc(void *outEven1, void *outEven2, void **gauge, void *inEven1, void *inEven2, double kappa, double mu, double epsilon, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
enum QudaParity_s QudaParity
void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr, const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION) const
This is a unified ghost exchange function for doing a complete halo exchange regardless of the type o...
static void * fwdGhostFaceBuffer[QUDA_MAX_DIM]
void tm_ndeg_dslash(void *res1, void *res2, void **gauge, void *spinorField1, void *spinorField2, double kappa, double mu, double epsilon, int oddBit, int daggerBit, QudaMatPCType matpc_type, QudaPrecision precision, QudaGaugeParam &gauge_param)
void ndeg_twist_gamma5(void *outf1, void *outf2, void *inf1, void *inf2, const int dagger, const double kappa, const double mu, const double epsilon, const int Vf, QudaTwistGamma5Type twist, QudaPrecision precision)
void dslashReference(sFloat *res, gFloat **gaugeFull, sFloat *spinorField, int oddBit, int daggerBit)
static Float * gaugeLink(int i, int dir, int oddBit, Float **gaugeEven, Float **gaugeOdd, int nbr_distance)
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
cpuColorSpinorField * out
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
static void su3Mul(sFloat *res, gFloat *mat, sFloat *vec)
enum QudaTwistGamma5Type_s QudaTwistGamma5Type
void wil_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
static void su3Tmul(sFloat *res, gFloat *mat, sFloat *vec)
cpuColorSpinorField * spinor
void twistGamma5(sFloat *out, sFloat *in, const int dagger, const sFloat kappa, const sFloat mu, const QudaTwistFlavorType flavor, const int V, QudaTwistGamma5Type twist)
enum QudaTwistFlavorType_s QudaTwistFlavorType