16 extern void *
memset(
void *
s,
int c,
size_t n);
29 template<
typename Float>
34 for (i = 0;i < 3; i++){
36 printf(
"(%10f,%10f) \t", link[i*3*2 + j*2], link[i*3*2 + j*2 + 1]);
45 template <
typename sFloat,
typename gFloat>
47 int oddBit,
int daggerBit)
49 for (
int i=0; i<
Vh*1*3*2; i++) res[i] = 0.0;
51 gFloat *fatlinkEven[4], *fatlinkOdd[4];
52 gFloat *longlinkEven[4], *longlinkOdd[4];
55 fatlinkEven[
dir] = fatlink[
dir];
57 longlinkEven[
dir] =longlink[
dir];
61 for (
int i = 0; i <
Vh; i++) {
64 gFloat* fatlnk = gaugeLink(i,
dir, oddBit, fatlinkEven, fatlinkOdd, 1);
65 gFloat* longlnk = gaugeLink(i,
dir, oddBit, longlinkEven, longlinkOdd, 3);
67 sFloat *first_neighbor_spinor = spinorNeighbor(i,
dir, oddBit, spinorField, 1);
68 sFloat *third_neighbor_spinor = spinorNeighbor(i,
dir, oddBit, spinorField, 3);
74 su3Mul(gaugedSpinor, fatlnk, first_neighbor_spinor);
75 sum(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
76 su3Mul(gaugedSpinor, longlnk, third_neighbor_spinor);
77 sum(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
79 su3Tmul(gaugedSpinor, fatlnk, first_neighbor_spinor);
80 sub(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
82 su3Tmul(gaugedSpinor, longlnk, third_neighbor_spinor);
83 sub(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
87 negx(&res[i*mySpinorSiteSize], mySpinorSiteSize);
101 dslashReference((
double*)res, (
double**)fatlink, (
double**)longlink, (
double*)spinorField, oddBit, daggerBit);
103 dslashReference((
double*)res, (
float**)fatlink, (
float**)longlink, (
double*)spinorField, oddBit, daggerBit);
108 dslashReference((
float*)res, (
double**)fatlink, (
double**)longlink, (
float*)spinorField, oddBit, daggerBit);
110 dslashReference((
float*)res, (
float**)fatlink, (
float**)longlink, (
float*)spinorField, oddBit, daggerBit);
118 template <
typename sFloat,
typename gFloat>
123 sFloat *outEven =
out;
131 xpay(in, -kappa, out,
V*mySpinorSiteSize);
142 Mat((
double*)out, (
double**)fatlink, (
double**)longlink, (
double*)in, (
double)kappa, dagger_bit);
144 Mat((
double*)out, (
float**)fatlink, (
float**)longlink, (
double*)in, (
double)kappa, dagger_bit);
148 Mat((
float*)out, (
double**)fatlink, (
double**)longlink, (
float*)in, (
float)kappa, dagger_bit);
150 Mat((
float*)out, (
float**)fatlink, (
float**)longlink, (
float*)in, (
float)kappa, dagger_bit);
157 template <
typename sFloat,
typename gFloat>
162 sFloat msq_x4 = mass*mass*4;
168 sFloat *outEven =
out;
179 sFloat *outOdd =
out;
189 fprintf(stderr,
"ERROR: invalid parity in %s,line %d\n", __FUNCTION__, __LINE__);
204 Matdagmat((
double*)out, (
double**)fatlink, (
double**)longlink, (
double*)in, (
double)mass, dagger_bit, (
double*)tmp, parity);
206 Matdagmat((
double*)out, (
float**)fatlink, (
float**)longlink, (
double*)in, (
double)mass, dagger_bit, (
double*) tmp, parity);
210 Matdagmat((
float*)out, (
double**)fatlink, (
double**)longlink, (
float*)in, (
float)mass, dagger_bit, (
float*)tmp, parity);
212 Matdagmat((
float*)out, (
float**)fatlink, (
float**)longlink, (
float*)in, (
float)mass, dagger_bit, (
float*)tmp, parity);
222 template <
typename sFloat,
typename gFloat>
223 static void MatPC(sFloat *outEven, gFloat **
fatlink, gFloat**
longlink, sFloat *inEven, sFloat
kappa,
241 sFloat kappa2 = -kappa*
kappa;
249 staggered_matpc(
void *outEven,
void **fatlink,
void**longlink,
void *inEven,
double kappa,
255 MatPC((
double*)outEven, (
double**)fatlink, (
double**)longlink, (
double*)inEven, (
double)kappa, dagger_bit, matpc_type);
258 MatPC((
double*)outEven, (
double**)fatlink, (
double**)longlink, (
double*)inEven, (
double)kappa, dagger_bit, matpc_type);
262 MatPC((
float*)outEven, (
double**)fatlink, (
double**)longlink, (
float*)inEven, (
float)kappa, dagger_bit, matpc_type);
264 MatPC((
float*)outEven, (
float**)fatlink, (
float**)longlink, (
float*)inEven, (
float)kappa, dagger_bit, matpc_type);
271 template <
typename sFloat,
typename gFloat>
272 void dslashReference_mg4dir(sFloat *res, gFloat **fatlink, gFloat** longlink,
273 gFloat** ghostFatlink, gFloat** ghostLonglink,
274 sFloat *spinorField, sFloat** fwd_nbr_spinor,
275 sFloat** back_nbr_spinor,
int oddBit,
int daggerBit)
277 for (
int i=0; i<
Vh*1*3*2; i++) res[i] = 0.0;
280 gFloat *fatlinkEven[4], *fatlinkOdd[4];
281 gFloat *longlinkEven[4], *longlinkOdd[4];
282 gFloat *ghostFatlinkEven[4], *ghostFatlinkOdd[4];
283 gFloat *ghostLonglinkEven[4], *ghostLonglinkOdd[4];
286 fatlinkEven[
dir] = fatlink[
dir];
288 longlinkEven[
dir] =longlink[
dir];
291 ghostFatlinkEven[
dir] = ghostFatlink[
dir];
293 ghostLonglinkEven[
dir] = ghostLonglink[
dir];
297 for (
int i = 0; i <
Vh; i++) {
300 gFloat* fatlnk = gaugeLink_mg4dir(i,
dir, oddBit, fatlinkEven, fatlinkOdd, ghostFatlinkEven, ghostFatlinkOdd, 1, 1);
301 gFloat* longlnk = gaugeLink_mg4dir(i,
dir, oddBit, longlinkEven, longlinkOdd, ghostLonglinkEven, ghostLonglinkOdd, 3, 3);
303 sFloat *first_neighbor_spinor = spinorNeighbor_mg4dir(i,
dir, oddBit, spinorField, fwd_nbr_spinor, back_nbr_spinor, 1, 3);
304 sFloat *third_neighbor_spinor = spinorNeighbor_mg4dir(i,
dir, oddBit, spinorField, fwd_nbr_spinor, back_nbr_spinor, 3, 3);
310 su3Mul(gaugedSpinor, fatlnk, first_neighbor_spinor);
311 sum(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
312 su3Mul(gaugedSpinor, longlnk, third_neighbor_spinor);
313 sum(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
316 su3Tmul(gaugedSpinor, fatlnk, first_neighbor_spinor);
317 sub(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
319 su3Tmul(gaugedSpinor, longlnk, third_neighbor_spinor);
320 sub(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
326 negx(&res[i*mySpinorSiteSize], mySpinorSiteSize);
345 errorQuda(
"ERROR: full parity not supported in function %s", __FUNCTION__);
351 faceBuf.exchangeCpuSpinor(*in, otherparity, daggerBit);
358 dslashReference_mg4dir((
double*)out->
V(), (
double**)fatlink, (
double**)
longlink,
359 (
double**)ghost_fatlink, (
double**)ghost_longlink, (
double*)in->
V(),
360 (
double**)fwd_nbr_spinor, (
double**)back_nbr_spinor, oddBit, daggerBit);
362 dslashReference_mg4dir((
double*)out->
V(), (
float**)fatlink, (
float**)
longlink, (
float**)ghost_fatlink, (
float**)ghost_longlink,
363 (
double*)in->V(), (
double**)fwd_nbr_spinor, (
double**)back_nbr_spinor, oddBit, daggerBit);
368 dslashReference_mg4dir((
float*)out->
V(), (
double**)fatlink, (
double**)
longlink, (
double**)ghost_fatlink, (
double**)ghost_longlink,
369 (
float*)in->V(), (
float**)fwd_nbr_spinor, (
float**)back_nbr_spinor, oddBit, daggerBit);
371 dslashReference_mg4dir((
float*)out->
V(), (
float**)fatlink, (
float**)
longlink, (
float**)ghost_fatlink, (
float**)ghost_longlink,
372 (
float*)in->V(), (
float**)fwd_nbr_spinor, (
float**)back_nbr_spinor, oddBit, daggerBit);
385 if (sPrecision != gPrecision){
386 errorQuda(
"Spinor precision and gPrecison is not the same");
395 errorQuda(
"ERROR: full parity not supported in function %s\n", __FUNCTION__);
399 in, otherparity, dagger_bit, sPrecision, gPrecision);
402 tmp, parity, dagger_bit, sPrecision, gPrecision);
404 double msq_x4 = mass*mass*4;
406 axmy((
double*)in->V(), (double)msq_x4, (
double*)out->V(), Vh*
mySpinorSiteSize);
408 axmy((
float*)in->V(), (float)msq_x4, (
float*)out->V(), Vh*
mySpinorSiteSize);