16 extern void *
memset(
void *
s,
int c,
size_t n);
29 template<
typename Float>
34 for (i = 0;i < 3; i++){
36 printf(
"(%10f,%10f) \t", link[i*3*2 + j*2], link[i*3*2 + j*2 + 1]);
44 template <
typename sFloat,
typename gFloat>
46 sFloat *spinorField, sFloat **fwd_nbr_spinor, sFloat **back_nbr_spinor,
int oddBit,
int daggerBit,
int nSrc,
51 gFloat *fatlinkEven[4], *fatlinkOdd[4];
52 gFloat *longlinkEven[4], *longlinkOdd[4];
55 gFloat *ghostFatlinkEven[4], *ghostFatlinkOdd[4];
56 gFloat *ghostLonglinkEven[4], *ghostLonglinkOdd[4];
59 for (
int dir = 0; dir < 4; dir++) {
60 fatlinkEven[dir] = fatlink[dir];
62 longlinkEven[dir] =longlink[dir];
66 ghostFatlinkEven[dir] = ghostFatlink[dir];
67 ghostFatlinkOdd[dir] = ghostFatlink[dir] + (
faceVolume[dir]/2)*gaugeSiteSize;
68 ghostLonglinkEven[dir] = ghostLonglink[dir];
69 ghostLonglinkOdd[dir] = ghostLonglink[dir] + 3*(
faceVolume[dir]/2)*gaugeSiteSize;
73 for (
int xs=0; xs<nSrc; xs++) {
75 for (
int i = 0; i <
Vh; i++) {
79 for (
int dir = 0; dir < 8; dir++) {
82 gFloat* fatlnk = gaugeLink_mg4dir(i, dir, oddBit, fatlinkEven, fatlinkOdd, ghostFatlinkEven, ghostFatlinkOdd, 1, 1);
84 gaugeLink_mg4dir(i, dir, oddBit, longlinkEven, longlinkOdd, ghostLonglinkEven, ghostLonglinkOdd, 3, 3) :
86 sFloat *first_neighbor_spinor = spinorNeighbor_5d_mgpu<QUDA_4D_PC>(
87 sid, dir, oddBit, spinorField, fwd_nbr_spinor, back_nbr_spinor, 1, nFace,
mySpinorSiteSize);
89 spinorNeighbor_5d_mgpu<QUDA_4D_PC>(
90 sid, dir, oddBit, spinorField, fwd_nbr_spinor, back_nbr_spinor, 3, nFace,
mySpinorSiteSize) :
93 gFloat *fatlnk =
gaugeLink(i, dir, oddBit, fatlinkEven, fatlinkOdd, 1);
96 sFloat *first_neighbor_spinor = spinorNeighbor_5d<QUDA_4D_PC>(sid, dir, oddBit, spinorField, 1,
mySpinorSiteSize);
98 spinorNeighbor_5d<QUDA_4D_PC>(sid, dir, oddBit, spinorField, 3,
mySpinorSiteSize) :
104 su3Mul(gaugedSpinor, fatlnk, first_neighbor_spinor);
108 su3Mul(gaugedSpinor, longlnk, third_neighbor_spinor);
112 su3Tmul(gaugedSpinor, fatlnk, first_neighbor_spinor);
120 su3Tmul(gaugedSpinor, longlnk, third_neighbor_spinor);
136 const int nSrc = in->
X(4);
144 errorQuda(
"ERROR: full parity not supported in function %s", __FUNCTION__);
156 (
double **)
ghost_longlink, (
double *)in->
V(), (
double **)fwd_nbr_spinor, (
double **)back_nbr_spinor, oddBit,
160 (
float **)
ghost_longlink, (
double *)in->
V(), (
double **)fwd_nbr_spinor, (
double **)back_nbr_spinor, oddBit,
166 (
double **)
ghost_longlink, (
float *)in->
V(), (
float **)fwd_nbr_spinor, (
float **)back_nbr_spinor, oddBit,
170 (
float **)
ghost_longlink, (
float *)in->
V(), (
float **)fwd_nbr_spinor, (
float **)back_nbr_spinor, oddBit,
181 if (sPrecision != gPrecision){
182 errorQuda(
"Spinor precision and gPrecison is not the same");
191 errorQuda(
"ERROR: full parity not supported in function %s\n", __FUNCTION__);
194 staggered_dslash(tmp, fatlink, longlink, ghost_fatlink, ghost_longlink, in, otherparity, dagger_bit, sPrecision,
195 gPrecision, dslash_type);
197 staggered_dslash(out, fatlink, longlink, ghost_fatlink, ghost_longlink, tmp, parity, dagger_bit, sPrecision,
198 gPrecision, dslash_type);
200 double msq_x4 = mass*mass*4;
QudaDslashType dslash_type
void display_link_internal(Float *link)
static void sum(Float *dst, Float *a, Float *b, int cnt)
enum QudaPrecision_s QudaPrecision
static void sub(Float *dst, Float *a, Float *b, int cnt)
cudaColorSpinorField * tmp
static void axmy(Float *x, Float a, Float *y, int len)
void staggered_dslash(cpuColorSpinorField *out, void **fatlink, void **longlink, void **ghost_fatlink, void **ghost_longlink, cpuColorSpinorField *in, int oddBit, int daggerBit, QudaPrecision sPrecision, QudaPrecision gPrecision, QudaDslashType dslash_type)
static void * backGhostFaceBuffer[QUDA_MAX_DIM]
enum QudaParity_s QudaParity
void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr, const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION) const
This is a unified ghost exchange function for doing a complete halo exchange regardless of the type o...
static void * fwdGhostFaceBuffer[QUDA_MAX_DIM]
static Float * gaugeLink(int i, int dir, int oddBit, Float **gaugeEven, Float **gaugeOdd, int nbr_distance)
cpuColorSpinorField * out
void * memset(void *s, int c, size_t n)
Main header file for the QUDA library.
void matdagmat(cpuColorSpinorField *out, void **fatlink, void **longlink, void **ghost_fatlink, void **ghost_longlink, cpuColorSpinorField *in, double mass, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision, cpuColorSpinorField *tmp, QudaParity parity, QudaDslashType dslash_type)
enum QudaDslashType_s QudaDslashType
static void su3Mul(sFloat *res, gFloat *mat, sFloat *vec)
static void su3Tmul(sFloat *res, gFloat *mat, sFloat *vec)
void dslashReference(sFloat *res, gFloat **fatlink, gFloat **longlink, gFloat **ghostFatlink, gFloat **ghostLonglink, sFloat *spinorField, sFloat **fwd_nbr_spinor, sFloat **back_nbr_spinor, int oddBit, int daggerBit, int nSrc, QudaDslashType dslash_type)
static void negx(Float *x, int len)