15 extern void *
memset(
void *s,
int c,
size_t n);
23 for (i = 0; i < 3; i++) {
24 for (j = 0; j < 3; j++) { printf(
"(%10f,%10f) \t", link[i * 3 * 2 + j * 2], link[i * 3 * 2 + j * 2 + 1]); }
37 template <
typename sFloat,
typename gFloat>
39 gFloat **ghostLonglink, sFloat *spinorField, sFloat **fwd_nbr_spinor,
44 gFloat *fatlinkEven[4], *fatlinkOdd[4];
45 gFloat *longlinkEven[4], *longlinkOdd[4];
48 gFloat *ghostFatlinkEven[4], *ghostFatlinkOdd[4];
49 gFloat *ghostLonglinkEven[4], *ghostLonglinkOdd[4];
52 for (
int dir = 0; dir < 4; dir++) {
53 fatlinkEven[dir] = fatlink[dir];
55 longlinkEven[dir] = longlink[dir];
59 ghostFatlinkEven[dir] = ghostFatlink[dir];
61 ghostLonglinkEven[dir] = ghostLonglink[dir];
66 for (
int xs = 0; xs < nSrc; xs++) {
68 for (
int i = 0; i <
Vh; i++) {
69 int sid = i + xs *
Vh;
72 for (
int dir = 0; dir < 8; dir++) {
76 = gaugeLink_mg4dir(i, dir, oddBit, fatlinkEven, fatlinkOdd, ghostFatlinkEven, ghostFatlinkOdd, 1, 1);
78 gaugeLink_mg4dir(i, dir, oddBit, longlinkEven, longlinkOdd, ghostLonglinkEven, ghostLonglinkOdd, 3, 3) :
80 sFloat *first_neighbor_spinor = spinorNeighbor_5d_mgpu<QUDA_4D_PC>(
83 spinorNeighbor_5d_mgpu<QUDA_4D_PC>(sid, dir, oddBit, spinorField, fwd_nbr_spinor, back_nbr_spinor, 3, nFace,
87 gFloat *fatlnk = gaugeLink(i, dir, oddBit, fatlinkEven, fatlinkOdd, 1);
90 sFloat *first_neighbor_spinor
99 su3Mul(gaugedSpinor, fatlnk, first_neighbor_spinor);
103 su3Mul(gaugedSpinor, longlnk, third_neighbor_spinor);
107 su3Tmul(gaugedSpinor, fatlnk, first_neighbor_spinor);
115 su3Tmul(gaugedSpinor, longlnk, third_neighbor_spinor);
130 const int nSrc = in->
X(4);
138 errorQuda(
"ERROR: full parity not supported in function %s", __FUNCTION__);
150 (
double **)ghost_longlink, (
double *)in->
V(), (
double **)fwd_nbr_spinor,
151 (
double **)back_nbr_spinor, oddBit, daggerBit, nSrc,
dslash_type);
154 (
float **)ghost_longlink, (
double *)in->
V(), (
double **)fwd_nbr_spinor,
155 (
double **)back_nbr_spinor, oddBit, daggerBit, nSrc,
dslash_type);
160 (
double **)ghost_longlink, (
float *)in->
V(), (
float **)fwd_nbr_spinor,
161 (
float **)back_nbr_spinor, oddBit, daggerBit, nSrc,
dslash_type);
164 (
float **)ghost_longlink, (
float *)in->
V(), (
float **)fwd_nbr_spinor,
165 (
float **)back_nbr_spinor, oddBit, daggerBit, nSrc,
dslash_type);
176 if (sPrecision != gPrecision) {
errorQuda(
"Spinor precision and gPrecison is not the same"); }
184 errorQuda(
"ERROR: full parity not supported in function %s\n", __FUNCTION__);
187 staggeredDslash(
tmp, fatlink, longlink, ghost_fatlink, ghost_longlink, in, otherparity, dagger_bit, sPrecision,
virtual void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr, const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION) const =0
QudaDslashType dslash_type
cudaColorSpinorField * tmp
enum QudaPrecision_s QudaPrecision
enum QudaDslashType_s QudaDslashType
enum QudaParity_s QudaParity
#define stag_spinor_site_size
FloatingPoint< float > Float
__host__ __device__ T sum(const array< T, s > &a)
Main header file for the QUDA library.
void staggeredDslashReference(sFloat *res, gFloat **fatlink, gFloat **longlink, gFloat **ghostFatlink, gFloat **ghostLonglink, sFloat *spinorField, sFloat **fwd_nbr_spinor, sFloat **back_nbr_spinor, int oddBit, int daggerBit, int nSrc, QudaDslashType dslash_type)
void display_link_internal(Float *link)
void staggeredMatDagMat(ColorSpinorField *out, void **fatlink, void **longlink, void **ghost_fatlink, void **ghost_longlink, ColorSpinorField *in, double mass, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision, ColorSpinorField *tmp, QudaParity parity, QudaDslashType dslash_type)
void staggeredDslash(ColorSpinorField *out, void **fatlink, void **longlink, void **ghost_fatlink, void **ghost_longlink, ColorSpinorField *in, int oddBit, int daggerBit, QudaPrecision sPrecision, QudaPrecision gPrecision, QudaDslashType dslash_type)
void * memset(void *s, int c, size_t n)