30 if (i < 0 || i >= (
Z[0]*
Z[1]*
Z[2]*
Z[3]/2))
31 {
printf(
"i out of range in neighborIndex_4d\n");
exit(-1); }
36 int x4 =
X/(
Z[2]*
Z[1]*
Z[0]);
37 int x3 = (
X/(
Z[1]*
Z[0])) %
Z[2];
38 int x2 = (
X/
Z[0]) %
Z[1];
41 x4 = (x4+dx4+
Z[3]) %
Z[3];
42 x3 = (x3+dx3+
Z[2]) %
Z[2];
43 x2 = (x2+dx2+
Z[1]) %
Z[1];
44 x1 = (x1+dx1+
Z[0]) %
Z[0];
46 return (x4*(
Z[2]*
Z[1]*
Z[0]) + x3*(
Z[1]*
Z[0]) + x2*(
Z[0]) + x1) / 2;
56 template <
typename Float>
66 gaugeField = (oddBit ? gaugeOdd : gaugeEven);
75 default: j = -1;
break;
77 gaugeField = (oddBit ? gaugeEven : gaugeOdd);
80 return &gaugeField[dir/2][j*(3*3*2)];
87 template <
typename Float>
88 Float *
gaugeLink_mgpu(
int i,
int dir,
int oddBit, Float **gaugeEven, Float **gaugeOdd, Float** ghostGaugeEven, Float** ghostGaugeOdd,
int n_ghost_faces,
int nbr_distance) {
94 gaugeField = (oddBit ? gaugeOdd : gaugeEven);
99 int x4 = Y/(
Z[2]*
Z[1]*
Z[0]);
100 int x3 = (Y/(
Z[1]*
Z[0])) %
Z[2];
101 int x2 = (Y/
Z[0]) %
Z[1];
107 Float* ghostGaugeField;
112 int new_x1 = (x1 -
d + X1 )% X1;
114 ghostGaugeField = (oddBit?ghostGaugeEven[0]: ghostGaugeOdd[0]);
115 int offset = (n_ghost_faces + x1 -
d)*X4*X3*X2/2 + (x4*X3*X2 + x3*X2+x2)/2;
116 return &ghostGaugeField[
offset*(3*3*2)];
118 j = (x4*X3*X2*X1 + x3*X2*X1 + x2*X1 + new_x1) / 2;
123 int new_x2 = (x2 -
d + X2 )% X2;
125 ghostGaugeField = (oddBit?ghostGaugeEven[1]: ghostGaugeOdd[1]);
126 int offset = (n_ghost_faces + x2 -
d)*X4*X3*X1/2 + (x4*X3*X1 + x3*X1+x1)/2;
127 return &ghostGaugeField[
offset*(3*3*2)];
129 j = (x4*X3*X2*X1 + x3*X2*X1 + new_x2*X1 + x1) / 2;
135 int new_x3 = (x3 -
d + X3 )% X3;
137 ghostGaugeField = (oddBit?ghostGaugeEven[2]: ghostGaugeOdd[2]);
138 int offset = (n_ghost_faces + x3 -
d)*X4*X2*X1/2 + (x4*X2*X1 + x2*X1+x1)/2;
139 return &ghostGaugeField[
offset*(3*3*2)];
141 j = (x4*X3*X2*X1 + new_x3*X2*X1 + x2*X1 + x1) / 2;
146 int new_x4 = (x4 -
d + X4)% X4;
148 ghostGaugeField = (oddBit?ghostGaugeEven[3]: ghostGaugeOdd[3]);
149 int offset = (n_ghost_faces + x4 -
d)*X1*X2*X3/2 + (x3*X2*X1 + x2*X1+x1)/2;
150 return &ghostGaugeField[
offset*(3*3*2)];
152 j = (new_x4*(X3*X2*X1) + x3*(X2*X1) + x2*(X1) + x1) / 2;
156 default: j = -1;
printf(
"ERROR: wrong dir \n");
exit(1);
158 gaugeField = (oddBit ? gaugeEven : gaugeOdd);
162 return &gaugeField[dir/2][j*(3*3*2)];
171 {{1,0}, {0,0}, {0,0}, {0,-1}},
172 {{0,0}, {1,0}, {0,-1}, {0,0}},
173 {{0,0}, {0,1}, {1,0}, {0,0}},
174 {{0,1}, {0,0}, {0,0}, {1,0}}
177 {{1,0}, {0,0}, {0,0}, {0,1}},
178 {{0,0}, {1,0}, {0,1}, {0,0}},
179 {{0,0}, {0,-1}, {1,0}, {0,0}},
180 {{0,-1}, {0,0}, {0,0}, {1,0}}
183 {{1,0}, {0,0}, {0,0}, {1,0}},
184 {{0,0}, {1,0}, {-1,0}, {0,0}},
185 {{0,0}, {-1,0}, {1,0}, {0,0}},
186 {{1,0}, {0,0}, {0,0}, {1,0}}
189 {{1,0}, {0,0}, {0,0}, {-1,0}},
190 {{0,0}, {1,0}, {1,0}, {0,0}},
191 {{0,0}, {1,0}, {1,0}, {0,0}},
192 {{-1,0}, {0,0}, {0,0}, {1,0}}
195 {{1,0}, {0,0}, {0,-1}, {0,0}},
196 {{0,0}, {1,0}, {0,0}, {0,1}},
197 {{0,1}, {0,0}, {1,0}, {0,0}},
198 {{0,0}, {0,-1}, {0,0}, {1,0}}
201 {{1,0}, {0,0}, {0,1}, {0,0}},
202 {{0,0}, {1,0}, {0,0}, {0,-1}},
203 {{0,-1}, {0,0}, {1,0}, {0,0}},
204 {{0,0}, {0,1}, {0,0}, {1,0}}
207 {{1,0}, {0,0}, {-1,0}, {0,0}},
208 {{0,0}, {1,0}, {0,0}, {-1,0}},
209 {{-1,0}, {0,0}, {1,0}, {0,0}},
210 {{0,0}, {-1,0}, {0,0}, {1,0}}
213 {{1,0}, {0,0}, {1,0}, {0,0}},
214 {{0,0}, {1,0}, {0,0}, {1,0}},
215 {{1,0}, {0,0}, {1,0}, {0,0}},
216 {{0,0}, {1,0}, {0,0}, {1,0}}
220 {{0,0}, {0,0}, {0,0}, {0,0}},
221 {{0,0}, {0,0}, {0,0}, {0,0}},
222 {{0,0}, {0,0}, {2,0}, {0,0}},
223 {{0,0}, {0,0}, {0,0}, {2,0}}
227 {{2,0}, {0,0}, {0,0}, {0,0}},
228 {{0,0}, {2,0}, {0,0}, {0,0}},
229 {{0,0}, {0,0}, {0,0}, {0,0}},
230 {{0,0}, {0,0}, {0,0}, {0,0}}
236 template <
typename Float>
238 for (
int i=0;
i<4*3*2;
i++) res[
i] = 0.0;
240 for (
int s = 0;
s < 4;
s++) {
241 for (
int t = 0;
t < 4;
t++) {
245 for (
int m = 0; m < 3; m++) {
246 Float spinorRe = spinorIn[
t*(3*2) + m*(2) + 0];
247 Float spinorIm = spinorIn[
t*(3*2) + m*(2) + 1];
248 res[
s*(3*2) + m*(2) + 0] += projRe*spinorRe - projIm*spinorIm;
249 res[
s*(3*2) + m*(2) + 1] += projRe*spinorIm + projIm*spinorRe;
270 template <QudaDWFPCType type,
typename sFloat,
typename gFloat>
272 int oddBit,
int daggerBit) {
276 for (
int i=0;
i<
V5h*4*3*2;
i++) res[
i] = 0.0;
279 gFloat *gaugeEven[4], *gaugeOdd[4];
282 for (
int dir = 0; dir < 4; dir++) {
283 gaugeEven[dir] = gaugeFull[dir];
289 for (
int xs=0;xs<
Ls;xs++) {
290 for (
int gge_idx = 0; gge_idx <
Vh; gge_idx++) {
291 for (
int dir = 0; dir < 8; dir++) {
298 gaugeOddBit = (xs%2 == 0 || type ==
QUDA_4D_PC) ? oddBit : (oddBit+1) % 2;
299 gFloat *gauge =
gaugeLink_sgpu(gge_idx, dir, gaugeOddBit, gaugeEven, gaugeOdd);
303 sFloat *
spinor = spinorNeighbor_5d<type>(
sp_idx, dir, oddBit, spinorField);
304 sFloat projectedSpinor[4*3*2], gaugedSpinor[4*3*2];
305 int projIdx = 2*(dir/2)+(dir+daggerBit)%2;
308 for (
int s = 0;
s < 4;
s++) {
310 su3Mul(&gaugedSpinor[
s*(3*2)], gauge, &projectedSpinor[
s*(3*2)]);
312 std::cout <<
"spinor:" << std::endl;
314 std::cout <<
"gauge:" << std::endl;
317 su3Tmul(&gaugedSpinor[
s*(3*2)], gauge, &projectedSpinor[
s*(3*2)]);
328 template <QudaDWFPCType type,
typename sFloat,
typename gFloat>
329 void dslashReference_4d_mgpu(sFloat *res, gFloat **gaugeFull, gFloat **ghostGauge, sFloat *spinorField,
330 sFloat **fwdSpinor, sFloat **backSpinor,
int oddBit,
int daggerBit)
335 gFloat *gaugeEven[4], *gaugeOdd[4];
336 gFloat *ghostGaugeEven[4], *ghostGaugeOdd[4];
338 for (
int dir = 0; dir < 4; dir++)
340 gaugeEven[dir] = gaugeFull[dir];
343 ghostGaugeEven[dir] = ghostGauge[dir];
346 for (
int xs=0;xs<
Ls;xs++)
349 for (
int i = 0;
i <
Vh;
i++)
352 for (
int dir = 0; dir < 8; dir++)
354 int gaugeOddBit = (xs%2 == 0 || type ==
QUDA_4D_PC) ? oddBit : (oddBit + 1) % 2;
356 gFloat *gauge =
gaugeLink_mgpu(
i, dir, gaugeOddBit, gaugeEven, gaugeOdd, ghostGaugeEven, ghostGaugeOdd, 1, 1);
357 sFloat *
spinor = spinorNeighbor_5d_mgpu<type>(
sp_idx, dir, oddBit, spinorField, fwdSpinor, backSpinor, 1, 1);
360 int projIdx = 2*(dir/2)+(dir+daggerBit)%2;
363 for (
int s = 0;
s < 4;
s++)
365 if (dir % 2 == 0)
su3Mul(&gaugedSpinor[
s*(3*2)], gauge, &projectedSpinor[
s*(3*2)]);
366 else su3Tmul(&gaugedSpinor[
s*(3*2)], gauge, &projectedSpinor[
s*(3*2)]);
376 template <QudaDWFPCType type,
bool zero_initialize=false,
typename sFloat>
378 int oddBit,
int daggerBit, sFloat
mferm) {
379 for (
int i = 0;
i <
V5h;
i++) {
380 if (zero_initialize)
for(
int one_site = 0 ; one_site < 24 ; one_site++)
381 res[
i*(4*3*2)+one_site] = 0.0;
382 for (
int dir = 8; dir < 10; dir++) {
386 sFloat *
spinor = spinorNeighbor_5d<type>(
i, dir, oddBit, spinorField);
387 sFloat projectedSpinor[4*3*2];
388 int projIdx = 2*(dir/2)+(dir+daggerBit)%2;
392 int xs =
X/(
Z[3]*
Z[2]*
Z[1]*
Z[0]);
394 if ( (xs == 0 && dir == 9) || (xs ==
Ls-1 && dir == 8) ) {
395 ax(projectedSpinor,(sFloat)(-
mferm),projectedSpinor,4*3*2);
397 sum(&res[
i*(4*3*2)], &res[
i*(4*3*2)], projectedSpinor, 4*3*2);
403 template <
typename sFloat>
405 int oddBit,
int daggerBit, sFloat
mferm,
double *
kappa) {
406 double *inv_Ftr = (
double*)
malloc(
Ls*
sizeof(sFloat));
407 double *Ftr = (
double*)
malloc(
Ls*
sizeof(sFloat));
408 for(
int xs = 0 ; xs <
Ls ; xs++)
412 for (
int i = 0;
i <
Vh;
i++) {
413 memcpy(&res[24*(
i+
Vh*xs)], &spinorField[24*(
i+
Vh*xs)], 24*
sizeof(sFloat));
419 for (
int i = 0;
i <
Vh;
i++) {
420 ax(&res[12+24*(
i+
Vh*(
Ls-1))],(sFloat)(inv_Ftr[0]), &spinorField[12+24*(
i+
Vh*(
Ls-1))], 12);
424 for(
int xs = 0 ; xs <=
Ls-2 ; ++xs)
426 for (
int i = 0;
i <
Vh;
i++) {
427 axpy((sFloat)(2.0*
kappa[xs]), &res[24*(
i+
Vh*xs)], &res[24*(
i+
Vh*(xs+1))], 12);
428 axpy((sFloat)Ftr[xs], &res[12+24*(
i+
Vh*xs)], &res[12+24*(
i+
Vh*(
Ls-1))], 12);
430 for (
int tmp_s = 0 ; tmp_s <
Ls ; tmp_s++)
431 Ftr[tmp_s] *= 2.0*
kappa[tmp_s];
433 for(
int xs = 0 ; xs <
Ls ; xs++)
438 for(
int xs =
Ls-2 ; xs >=0 ; --xs)
440 for (
int i = 0;
i <
Vh;
i++) {
441 axpy((sFloat)Ftr[xs], &res[24*(
i+
Vh*(
Ls-1))], &res[24*(
i+
Vh*xs)], 12);
442 axpy((sFloat)(2.0*
kappa[xs]), &res[12+24*(
i+
Vh*(xs+1))], &res[12+24*(
i+
Vh*xs)], 12);
444 for (
int tmp_s = 0 ; tmp_s <
Ls ; tmp_s++)
445 Ftr[tmp_s] /= 2.0*
kappa[tmp_s];
448 for (
int i = 0;
i <
Vh;
i++) {
449 ax(&res[24*(
i+
Vh*(
Ls-1))], (sFloat)(inv_Ftr[
Ls-1]), &res[24*(
i+
Vh*(
Ls-1))], 12);
455 for (
int i = 0;
i <
Vh;
i++) {
456 ax(&res[24*(
i+
Vh*(
Ls-1))],(sFloat)(inv_Ftr[0]), &spinorField[24*(
i+
Vh*(
Ls-1))], 12);
460 for(
int xs = 0 ; xs <=
Ls-2 ; ++xs)
462 for (
int i = 0;
i <
Vh;
i++) {
463 axpy((sFloat)Ftr[xs], &res[24*(
i+
Vh*xs)], &res[24*(
i+
Vh*(
Ls-1))], 12);
464 axpy((sFloat)(2.0*
kappa[xs]), &res[12+24*(
i+
Vh*xs)], &res[12+24*(
i+
Vh*(xs+1))], 12);
466 for (
int tmp_s = 0 ; tmp_s <
Ls ; tmp_s++)
467 Ftr[tmp_s] *= 2.0*
kappa[tmp_s];
469 for(
int xs = 0 ; xs <
Ls ; xs++)
474 for(
int xs =
Ls-2 ; xs >=0 ; --xs)
476 for (
int i = 0;
i <
Vh;
i++) {
477 axpy((sFloat)(2.0*
kappa[xs]), &res[24*(
i+
Vh*(xs+1))], &res[24*(
i+
Vh*xs)], 12);
478 axpy((sFloat)Ftr[xs], &res[12+24*(
i+
Vh*(
Ls-1))], &res[12+24*(
i+
Vh*xs)], 12);
480 for (
int tmp_s = 0 ; tmp_s <
Ls ; tmp_s++)
481 Ftr[tmp_s] /= 2.0*
kappa[tmp_s];
484 for (
int i = 0;
i <
Vh;
i++) {
485 ax(&res[12+24*(
i+
Vh*(
Ls-1))], (sFloat)(inv_Ftr[
Ls-1]), &res[12+24*(
i+
Vh*(
Ls-1))], 12);
498 dslashReference_4d_sgpu<QUDA_5D_PC>((
double*)
out, (
double**)gauge, (
double*)
in, oddBit, daggerBit);
499 dslashReference_5th<QUDA_5D_PC>((
double*)
out, (
double*)
in, oddBit, daggerBit,
mferm);
501 dslashReference_4d_sgpu<QUDA_5D_PC>((
float*)
out, (
float**)gauge, (
float*)
in, oddBit, daggerBit);
502 dslashReference_5th<QUDA_5D_PC>((
float*)
out, (
float*)
in, oddBit, daggerBit, (
float)
mferm);
509 void **ghostGauge = (
void**)cpu.
Ghost();
536 else errorQuda(
"ERROR: full parity not supported in function %s", __FUNCTION__);
539 inField.exchangeGhost(otherParity, nFace, daggerBit);
541 void** fwd_nbr_spinor = inField.fwdGhostFaceBuffer;
542 void** back_nbr_spinor = inField.backGhostFaceBuffer;
545 dslashReference_4d_mgpu<QUDA_5D_PC>((
double*)
out, (
double**)gauge, (
double**)ghostGauge, (
double*)
in,(
double**)fwd_nbr_spinor, (
double**)back_nbr_spinor, oddBit, daggerBit);
547 dslashReference_5th<QUDA_5D_PC>((
double*)
out, (
double*)
in, oddBit, daggerBit,
mferm);
549 dslashReference_4d_mgpu<QUDA_5D_PC>((
float*)
out, (
float**)gauge, (
float**)ghostGauge, (
float*)
in,
550 (
float**)fwd_nbr_spinor, (
float**)back_nbr_spinor, oddBit, daggerBit);
551 dslashReference_5th<QUDA_5D_PC>((
float*)
out, (
float*)
in, oddBit, daggerBit, (
float)
mferm);
562 dslashReference_4d_sgpu<QUDA_4D_PC>((
double*)
out, (
double**)gauge, (
double*)
in, oddBit, daggerBit);
564 dslashReference_4d_sgpu<QUDA_4D_PC>((
float*)
out, (
float**)gauge, (
float*)
in, oddBit, daggerBit);
571 void **ghostGauge = (
void**)cpu.
Ghost();
598 else errorQuda(
"ERROR: full parity not supported in function %s", __FUNCTION__);
601 inField.exchangeGhost(otherParity, nFace, daggerBit);
603 void** fwd_nbr_spinor = inField.fwdGhostFaceBuffer;
604 void** back_nbr_spinor = inField.backGhostFaceBuffer;
606 dslashReference_4d_mgpu<QUDA_4D_PC>((
double*)
out, (
double**)gauge, (
double**)ghostGauge, (
double*)
in,(
double**)fwd_nbr_spinor, (
double**)back_nbr_spinor, oddBit, daggerBit);
608 dslashReference_4d_mgpu<QUDA_4D_PC>((
float*)
out, (
float**)gauge, (
float**)ghostGauge, (
float*)
in,
609 (
float**)fwd_nbr_spinor, (
float**)back_nbr_spinor, oddBit, daggerBit);
619 if (zero_initialize) dslashReference_5th<QUDA_4D_PC, true>((
double*)
out, (
double*)
in, oddBit, daggerBit,
mferm);
620 else dslashReference_5th<QUDA_4D_PC, false>((
double*)
out, (
double*)
in, oddBit, daggerBit,
mferm);
622 if (zero_initialize) dslashReference_5th<QUDA_4D_PC, true>((
float*)
out, (
float*)
in, oddBit, daggerBit, (
float)
mferm);
623 else dslashReference_5th<QUDA_4D_PC, false>((
float*)
out, (
float*)
in, oddBit, daggerBit, (
float)
mferm);
639 if (zero_initialize) dslashReference_5th<QUDA_4D_PC,true>((
double*)
out, (
double*)
in, oddBit, daggerBit,
mferm);
640 else dslashReference_5th<QUDA_4D_PC,false>((
double*)
out, (
double*)
in, oddBit, daggerBit,
mferm);
642 if (zero_initialize) dslashReference_5th<QUDA_4D_PC,true>((
float*)
out, (
float*)
in, oddBit, daggerBit, (
float)
mferm);
643 else dslashReference_5th<QUDA_4D_PC,false>((
float*)
out, (
float*)
in, oddBit, daggerBit, (
float)
mferm);
645 for(
int xs = 0 ; xs <
Ls ; xs++) {
650 void mdw_dslash_4_pre(
void *
out,
void **gauge,
void *
in,
int oddBit,
int daggerBit,
QudaPrecision precision,
QudaGaugeParam &
gauge_param,
double mferm,
double *b5,
double *c5,
bool zero_initialize)
653 if (zero_initialize) dslashReference_5th<QUDA_4D_PC, true>((
double*)
out, (
double*)
in, oddBit, daggerBit,
mferm);
654 else dslashReference_5th<QUDA_4D_PC, false>((
double*)
out, (
double*)
in, oddBit, daggerBit,
mferm);
655 for(
int xs = 0 ; xs <
Ls ; xs++)
660 if (zero_initialize) dslashReference_5th<QUDA_4D_PC, true>((
float*)
out, (
float*)
in, oddBit, daggerBit, (
float)
mferm);
661 else dslashReference_5th<QUDA_4D_PC,false>((
float*)
out, (
float*)
in, oddBit, daggerBit, (
float)
mferm);
662 for(
int xs = 0 ; xs <
Ls ; xs++)
701 void mdw_mat(
void *
out,
void **gauge,
void *
in,
double *kappa_b,
double *kappa_c,
int dagger,
QudaPrecision precision,
QudaGaugeParam &
gauge_param,
double mferm,
double *b5,
double *c5) {
706 for(
int xs = 0; xs <
Ls ; xs++)
kappa5[xs] = 0.5*kappa_b[xs]/kappa_c[xs];
713 mdw_dslash_4_pre(
tmp, gauge, inEven, 0,
dagger, precision,
gauge_param,
mferm, b5, c5,
true);
717 for(
int xs = 0 ; xs <
Ls ; xs++) {
722 mdw_dslash_4_pre(
tmp, gauge, inOdd, 1,
dagger, precision,
gauge_param,
mferm, b5, c5,
true);
726 for(
int xs = 0 ; xs <
Ls ; xs++) {
741 dagger_bit = (dagger_bit == 1) ? 0 : 1;
771 for(
int xs = 0; xs <
Ls ; xs++)
775 double *output = (
double*)
out;
784 if (symmetric && !dagger_bit) {
790 }
else if (symmetric && dagger_bit) {
808 void mdw_matpc(
void *
out,
void **gauge,
void *
in,
double *kappa_b,
double *kappa_c,
QudaMatPCType matpc_type,
int dagger,
QudaPrecision precision,
QudaGaugeParam &
gauge_param,
double mferm,
double *b5,
double *c5)
812 double *kappa2 = (
double*)
malloc(
Ls*
sizeof(
double));
813 double *kappa_mdwf = (
double*)
malloc(
Ls*
sizeof(
double));
814 for(
int xs = 0; xs <
Ls ; xs++)
816 kappa5[xs] = 0.5*kappa_b[xs]/kappa_c[xs];
817 kappa2[xs] = -kappa_b[xs]*kappa_b[xs];
818 kappa_mdwf[xs] = -
kappa5[xs];
825 if (symmetric && !
dagger) {
826 mdw_dslash_4_pre(
tmp, gauge,
in,
parity[1],
dagger, precision,
gauge_param,
mferm, b5, c5,
true);
829 mdw_dslash_4_pre(
out, gauge,
tmp,
parity[0],
dagger, precision,
gauge_param,
mferm, b5, c5,
true);
832 for(
int xs = 0 ; xs <
Ls ; xs++) {
836 }
else if (symmetric &&
dagger) {
839 mdw_dslash_4_pre(
tmp, gauge,
out,
parity[0],
dagger, precision,
gauge_param,
mferm, b5, c5,
true);
842 mdw_dslash_4_pre(
out, gauge,
tmp,
parity[1],
dagger, precision,
gauge_param,
mferm, b5, c5,
true);
843 for(
int xs = 0 ; xs <
Ls ; xs++) {
847 }
else if (!symmetric && !
dagger) {
848 mdw_dslash_4_pre(
out, gauge,
in,
parity[1],
dagger, precision,
gauge_param,
mferm, b5, c5,
true);
851 mdw_dslash_4_pre(
tmp, gauge,
out,
parity[0],
dagger, precision,
gauge_param,
mferm, b5, c5,
true);
854 for(
int xs = 0 ; xs <
Ls ; xs++) {
858 }
else if (!symmetric &&
dagger) {
860 mdw_dslash_4_pre(
tmp, gauge,
out,
parity[1],
dagger, precision,
gauge_param,
mferm, b5, c5,
true);
863 mdw_dslash_4_pre(
out, gauge,
tmp,
parity[0],
dagger, precision,
gauge_param,
mferm, b5, c5,
true);
865 for(
int xs = 0 ; xs <
Ls ; xs++) {
932 void matpc(
void *outEven,
void **gauge,
void *inEven,
double kappa,
QudaGhostExchange ghostExchange
void dw_4d_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void dw_dslash_5_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, bool zero_initialize)
void xpay(ColorSpinorField &x, const double &a, ColorSpinorField &y)
enum QudaPrecision_s QudaPrecision
void dslashReference_5th(sFloat *res, sFloat *spinorField, int oddBit, int daggerBit, sFloat mferm)
void printSpinorElement(void *spinor, int X, QudaPrecision precision)
cudaColorSpinorField * tmp
int fullLatticeIndex_5d_4dpc(int i, int oddBit)
void dw_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaGaugeParam gauge_param
void mdw_mat(void *out, void **gauge, void *in, double *kappa_b, double *kappa_c, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *b5, double *c5)
void dw_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaSiteSubset siteSubset
void exit(int) __attribute__((noreturn))
static void axpby(Float a, Float *x, Float b, Float *y, int len)
int fullLatticeIndex(int dim[4], int index, int oddBit)
void matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision, double mferm)
void dw_matdagmat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void dw_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void ax(const double &a, GaugeField &u)
Scale the gauge field by the scalar a.
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
int printf(const char *,...) __attribute__((__format__(__printf__
void mdw_dslash_5(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *kappa, bool zero_initialize)
VOLATILE spinorFloat kappa
QudaFieldOrder fieldOrder
__host__ __device__ void sum(double &a, double &b)
void mdw_dslash_4_pre(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *b5, double *c5, bool zero_initialize)
void dw_4d_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
Float * gaugeLink_sgpu(int i, int dir, int oddBit, Float **gaugeEven, Float **gaugeOdd)
enum QudaMatPCType_s QudaMatPCType
QudaGammaBasis gammaBasis
void mdw_matpc(void *out, void **gauge, void *in, double *kappa_b, double *kappa_c, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *b5, double *c5)
void dslashReference_4d_sgpu(sFloat *res, gFloat **gaugeFull, sFloat *spinorField, int oddBit, int daggerBit)
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
const void ** Ghost() const
enum QudaParity_s QudaParity
void matpcdagmatpc(void *out, void **gauge, void *in, double kappa, QudaPrecision sPrecision, QudaPrecision gPrecision, double mferm, QudaMatPCType matpc_type)
void * memcpy(void *__dst, const void *__src, size_t __n)
void dslashReference_5th_inv(sFloat *res, sFloat *spinorField, int oddBit, int daggerBit, sFloat mferm, double *kappa)
int fullLatticeIndex_5d(int i, int oddBit)
cpuColorSpinorField * out
Main header file for the QUDA library.
int fullLatticeIndex_4d(int i, int oddBit)
const double projector[10][4][4][2]
void dslash_4_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void matdagmat(void *out, void **gauge, void *in, double kappa, QudaPrecision sPrecision, QudaPrecision gPrecision, double mferm)
static void su3Mul(sFloat *res, gFloat *mat, sFloat *vec)
__device__ void axpy(real a, const real *x, Link &y)
static void su3Tmul(sFloat *res, gFloat *mat, sFloat *vec)
Float * gaugeLink_mgpu(int i, int dir, int oddBit, Float **gaugeEven, Float **gaugeOdd, Float **ghostGaugeEven, Float **ghostGaugeOdd, int n_ghost_faces, int nbr_distance)
static __inline__ size_t size_t d
int neighborIndex_4d(int i, int oddBit, int dx4, int dx3, int dx2, int dx1)
void dslash_5_inv(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *kappa)
void multiplySpinorByDiracProjector5(Float *res, int projIdx, Float *spinorIn)
cpuColorSpinorField * spinor
int comm_dim_partitioned(int dim)