16 extern void *
memset(
void *
s,
int c,
size_t n);
29 template<
typename Float>
34 for (i = 0;i < 3; i++){
36 printf(
"(%10f,%10f) \t", link[i*3*2 + j*2], link[i*3*2 + j*2 + 1]);
45 template <
typename sFloat,
typename gFloat>
49 for (
int i=0; i<
Vh*1*3*2; i++) res[i] = 0.0;
51 gFloat *fatlinkEven[4], *fatlinkOdd[4];
52 gFloat *longlinkEven[4], *longlinkOdd[4];
54 for (
int dir = 0; dir < 4; dir++) {
55 fatlinkEven[dir] = fatlink[dir];
57 longlinkEven[dir] =longlink[dir];
61 for (
int i = 0; i <
Vh; i++) {
63 for (
int dir = 0; dir < 8; dir++) {
64 gFloat* fatlnk = gaugeLink(i, dir, oddBit, fatlinkEven, fatlinkOdd, 1);
65 gFloat* longlnk = gaugeLink(i, dir, oddBit, longlinkEven, longlinkOdd, 3);
67 sFloat *first_neighbor_spinor = spinorNeighbor(i, dir, oddBit, spinorField, 1);
68 sFloat *third_neighbor_spinor = spinorNeighbor(i, dir, oddBit, spinorField, 3);
74 su3Mul(gaugedSpinor, fatlnk, first_neighbor_spinor);
75 sum(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
76 su3Mul(gaugedSpinor, longlnk, third_neighbor_spinor);
77 sum(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
79 su3Tmul(gaugedSpinor, fatlnk, first_neighbor_spinor);
80 sub(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
82 su3Tmul(gaugedSpinor, longlnk, third_neighbor_spinor);
83 sub(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
87 negx(&res[i*mySpinorSiteSize], mySpinorSiteSize);
101 dslashReference((
double*)res, (
double**)fatlink, (
double**)longlink, (
double*)spinorField, oddBit, daggerBit);
103 dslashReference((
double*)res, (
float**)fatlink, (
float**)longlink, (
double*)spinorField, oddBit, daggerBit);
108 dslashReference((
float*)res, (
double**)fatlink, (
double**)longlink, (
float*)spinorField, oddBit, daggerBit);
110 dslashReference((
float*)res, (
float**)fatlink, (
float**)longlink, (
float*)spinorField, oddBit, daggerBit);
118 template <
typename sFloat,
typename gFloat>
123 sFloat *outEven =
out;
131 xpay(in, -kappa, out,
V*mySpinorSiteSize);
142 Mat((
double*)out, (
double**)fatlink, (
double**)longlink, (
double*)in, (
double)kappa, dagger_bit);
144 Mat((
double*)out, (
float**)fatlink, (
float**)longlink, (
double*)in, (
double)kappa, dagger_bit);
148 Mat((
float*)out, (
double**)fatlink, (
double**)longlink, (
float*)in, (
float)kappa, dagger_bit);
150 Mat((
float*)out, (
float**)fatlink, (
float**)longlink, (
float*)in, (
float)kappa, dagger_bit);
157 template <
typename sFloat,
typename gFloat>
162 sFloat msq_x4 = mass*mass*4;
168 sFloat *outEven =
out;
179 sFloat *outOdd =
out;
189 fprintf(stderr,
"ERROR: invalid parity in %s,line %d\n", __FUNCTION__, __LINE__);
204 Matdagmat((
double*)out, (
double**)fatlink, (
double**)longlink, (
double*)in, (
double)mass, dagger_bit, (
double*)tmp, parity);
206 Matdagmat((
double*)out, (
float**)fatlink, (
float**)longlink, (
double*)in, (
double)mass, dagger_bit, (
double*) tmp, parity);
210 Matdagmat((
float*)out, (
double**)fatlink, (
double**)longlink, (
float*)in, (
float)mass, dagger_bit, (
float*)tmp, parity);
212 Matdagmat((
float*)out, (
float**)fatlink, (
float**)longlink, (
float*)in, (
float)mass, dagger_bit, (
float*)tmp, parity);
222 template <
typename sFloat,
typename gFloat>
223 static void MatPC(sFloat *outEven, gFloat **
fatlink, gFloat**
longlink, sFloat *inEven, sFloat
kappa,
241 sFloat kappa2 = -kappa*
kappa;
249 staggered_matpc(
void *outEven,
void **fatlink,
void**longlink,
void *inEven,
double kappa,
255 MatPC((
double*)outEven, (
double**)fatlink, (
double**)longlink, (
double*)inEven, (
double)kappa, dagger_bit, matpc_type);
258 MatPC((
double*)outEven, (
double**)fatlink, (
double**)longlink, (
double*)inEven, (
double)kappa, dagger_bit, matpc_type);
262 MatPC((
float*)outEven, (
double**)fatlink, (
double**)longlink, (
float*)inEven, (
float)kappa, dagger_bit, matpc_type);
264 MatPC((
float*)outEven, (
float**)fatlink, (
float**)longlink, (
float*)inEven, (
float)kappa, dagger_bit, matpc_type);
271 template <
typename sFloat,
typename gFloat>
272 void dslashReference_mg4dir(sFloat *res, gFloat **fatlink, gFloat** longlink,
273 gFloat** ghostFatlink, gFloat** ghostLonglink,
274 sFloat *spinorField, sFloat** fwd_nbr_spinor,
275 sFloat** back_nbr_spinor,
int oddBit,
int daggerBit)
277 for (
int i=0; i<
Vh*1*3*2; i++) res[i] = 0.0;
280 gFloat *fatlinkEven[4], *fatlinkOdd[4];
281 gFloat *longlinkEven[4], *longlinkOdd[4];
282 gFloat *ghostFatlinkEven[4], *ghostFatlinkOdd[4];
283 gFloat *ghostLonglinkEven[4], *ghostLonglinkOdd[4];
285 for (
int dir = 0; dir < 4; dir++) {
286 fatlinkEven[dir] = fatlink[dir];
288 longlinkEven[dir] =longlink[dir];
291 ghostFatlinkEven[dir] = ghostFatlink[dir];
292 ghostFatlinkOdd[dir] = ghostFatlink[dir] + Vsh[dir]*
gaugeSiteSize;
293 ghostLonglinkEven[dir] = ghostLonglink[dir];
294 ghostLonglinkOdd[dir] = ghostLonglink[dir] + 3*Vsh[dir]*
gaugeSiteSize;
297 for (
int i = 0; i <
Vh; i++) {
299 for (
int dir = 0; dir < 8; dir++) {
300 gFloat* fatlnk = gaugeLink_mg4dir(i, dir, oddBit, fatlinkEven, fatlinkOdd, ghostFatlinkEven, ghostFatlinkOdd, 1, 1);
301 gFloat* longlnk = gaugeLink_mg4dir(i, dir, oddBit, longlinkEven, longlinkOdd, ghostLonglinkEven, ghostLonglinkOdd, 3, 3);
303 sFloat *first_neighbor_spinor = spinorNeighbor_mg4dir(i, dir, oddBit, spinorField, fwd_nbr_spinor, back_nbr_spinor, 1, 3);
304 sFloat *third_neighbor_spinor = spinorNeighbor_mg4dir(i, dir, oddBit, spinorField, fwd_nbr_spinor, back_nbr_spinor, 3, 3);
310 su3Mul(gaugedSpinor, fatlnk, first_neighbor_spinor);
311 sum(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
312 su3Mul(gaugedSpinor, longlnk, third_neighbor_spinor);
313 sum(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
316 su3Tmul(gaugedSpinor, fatlnk, first_neighbor_spinor);
317 sub(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
319 su3Tmul(gaugedSpinor, longlnk, third_neighbor_spinor);
320 sub(&res[i*mySpinorSiteSize], &res[i*mySpinorSiteSize], gaugedSpinor, mySpinorSiteSize);
326 negx(&res[i*mySpinorSiteSize], mySpinorSiteSize);
345 errorQuda(
"ERROR: full parity not supported in function %s", __FUNCTION__);
351 faceBuf.exchangeCpuSpinor(*in, otherparity, daggerBit);
358 dslashReference_mg4dir((
double*)out->
V(), (
double**)fatlink, (
double**)
longlink,
359 (
double**)ghost_fatlink, (
double**)ghost_longlink, (
double*)in->
V(),
360 (
double**)fwd_nbr_spinor, (
double**)back_nbr_spinor, oddBit, daggerBit);
362 dslashReference_mg4dir((
double*)out->
V(), (
float**)fatlink, (
float**)
longlink, (
float**)ghost_fatlink, (
float**)ghost_longlink,
363 (
double*)in->V(), (
double**)fwd_nbr_spinor, (
double**)back_nbr_spinor, oddBit, daggerBit);
368 dslashReference_mg4dir((
float*)out->
V(), (
double**)fatlink, (
double**)
longlink, (
double**)ghost_fatlink, (
double**)ghost_longlink,
369 (
float*)in->V(), (
float**)fwd_nbr_spinor, (
float**)back_nbr_spinor, oddBit, daggerBit);
371 dslashReference_mg4dir((
float*)out->
V(), (
float**)fatlink, (
float**)
longlink, (
float**)ghost_fatlink, (
float**)ghost_longlink,
372 (
float*)in->V(), (
float**)fwd_nbr_spinor, (
float**)back_nbr_spinor, oddBit, daggerBit);
385 if (sPrecision != gPrecision){
386 errorQuda(
"Spinor precision and gPrecison is not the same");
395 errorQuda(
"ERROR: full parity not supported in function %s\n", __FUNCTION__);
399 in, otherparity, dagger_bit, sPrecision, gPrecision);
402 tmp, parity, dagger_bit, sPrecision, gPrecision);
404 double msq_x4 = mass*mass*4;
406 axmy((
double*)in->V(), (double)msq_x4, (
double*)out->V(), Vh*
mySpinorSiteSize);
408 axmy((
float*)in->V(), (float)msq_x4, (
float*)out->V(), Vh*
mySpinorSiteSize);
void display_link_internal(Float *link)
enum QudaPrecision_s QudaPrecision
void matdagmat_mg4dir(cpuColorSpinorField *out, void **fatlink, void **longlink, void **ghost_fatlink, void **ghost_longlink, cpuColorSpinorField *in, double mass, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision, cpuColorSpinorField *tmp, QudaParity parity)
void staggered_matpc(void *outEven, void **fatlink, void **longlink, void *inEven, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision)
void staggered_dslash(void *res, void **fatlink, void **longlink, void *spinorField, int oddBit, int daggerBit, QudaPrecision sPrecision, QudaPrecision gPrecision)
void mat(void *out, void **fatlink, void **longlink, void *in, double kappa, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision)
cudaColorSpinorField * tmp
void matdagmat(void *out, void **fatlink, void **longlink, void *in, double mass, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision, void *tmp, QudaParity parity)
VOLATILE spinorFloat kappa
FloatingPoint< float > Float
enum QudaMatPCType_s QudaMatPCType
static void * backGhostFaceBuffer[QUDA_MAX_DIM]
enum QudaParity_s QudaParity
static void * fwdGhostFaceBuffer[QUDA_MAX_DIM]
cpuColorSpinorField * out
void * memset(void *s, int c, size_t n)
Main header file for the QUDA library.
void staggered_dslash_mg4dir(cpuColorSpinorField *out, void **fatlink, void **longlink, void **ghost_fatlink, void **ghost_longlink, cpuColorSpinorField *in, int oddBit, int daggerBit, QudaPrecision sPrecision, QudaPrecision gPrecision)
void dslashReference(sFloat *res, gFloat **fatlink, gFloat **longlink, sFloat *spinorField, int oddBit, int daggerBit)
void Matdagmat(sFloat *out, gFloat **fatlink, gFloat **longlink, sFloat *in, sFloat mass, int daggerBit, sFloat *tmp, QudaParity parity)
void Mat(sFloat *out, gFloat **fatlink, gFloat **longlink, sFloat *in, sFloat kappa, int daggerBit)