16 extern void *
memset(
void *
s,
int c,
size_t n);
29 template<
typename Float>
34 for (
i = 0;
i < 3;
i++){
36 printf(
"(%10f,%10f) \t", link[
i*3*2 + j*2], link[
i*3*2 + j*2 + 1]);
45 template <
typename sFloat,
typename gFloat>
47 int oddBit,
int daggerBit)
53 gFloat *fatlinkEven[4], *fatlinkOdd[4];
54 gFloat *longlinkEven[4], *longlinkOdd[4];
56 for (
int dir = 0; dir < 4; dir++) {
57 fatlinkEven[dir] =
fatlink[dir];
63 for (
int xs=0; xs<nSrc; xs++) {
65 for (
int i = 0;
i <
Vh;
i++) {
69 for (
int dir = 0; dir < 8; dir++) {
70 gFloat* fatlnk =
gaugeLink(
i, dir, oddBit, fatlinkEven, fatlinkOdd, 1);
71 gFloat* longlnk =
gaugeLink(
i, dir, oddBit, longlinkEven, longlinkOdd, 3);
73 sFloat *first_neighbor_spinor = spinorNeighbor_5d<QUDA_4D_PC>(
sid, dir, oddBit, spinorField, 1,
mySpinorSiteSize);
74 sFloat *third_neighbor_spinor = spinorNeighbor_5d<QUDA_4D_PC>(
sid, dir, oddBit, spinorField, 3,
mySpinorSiteSize);
79 su3Mul(gaugedSpinor, fatlnk, first_neighbor_spinor);
81 su3Mul(gaugedSpinor, longlnk, third_neighbor_spinor);
84 su3Tmul(gaugedSpinor, fatlnk, first_neighbor_spinor);
86 su3Tmul(gaugedSpinor, longlnk, third_neighbor_spinor);
122 template <
typename sFloat,
typename gFloat>
127 sFloat *outEven =
out;
161 template <
typename sFloat,
typename gFloat>
172 sFloat *outEven =
out;
183 sFloat *outOdd =
out;
193 fprintf(stderr,
"ERROR: invalid parity in %s,line %d\n", __FUNCTION__, __LINE__);
226 template <
typename sFloat,
typename gFloat>
271 template <
typename sFloat,
typename gFloat>
272 void dslashReference_mg4dir(sFloat *res, gFloat **
fatlink, gFloat**
longlink,
273 gFloat** ghostFatlink, gFloat** ghostLonglink,
274 sFloat *spinorField, sFloat** fwd_nbr_spinor,
275 sFloat** back_nbr_spinor,
int oddBit,
int daggerBit,
int nSrc)
279 gFloat *fatlinkEven[4], *fatlinkOdd[4];
280 gFloat *longlinkEven[4], *longlinkOdd[4];
281 gFloat *ghostFatlinkEven[4], *ghostFatlinkOdd[4];
282 gFloat *ghostLonglinkEven[4], *ghostLonglinkOdd[4];
284 for (
int dir = 0; dir < 4; dir++) {
285 fatlinkEven[dir] =
fatlink[dir];
290 ghostFatlinkEven[dir] = ghostFatlink[dir];
292 ghostLonglinkEven[dir] = ghostLonglink[dir];
296 for (
int xs=0; xs<nSrc; xs++) {
298 for (
int i = 0;
i <
Vh;
i++) {
302 for (
int dir = 0; dir < 8; dir++) {
303 gFloat* fatlnk = gaugeLink_mg4dir(
i, dir, oddBit, fatlinkEven, fatlinkOdd, ghostFatlinkEven, ghostFatlinkOdd, 1, 1);
304 gFloat* longlnk = gaugeLink_mg4dir(
i, dir, oddBit, longlinkEven, longlinkOdd, ghostLonglinkEven, ghostLonglinkOdd, 3, 3);
306 sFloat *first_neighbor_spinor = spinorNeighbor_5d_mgpu<QUDA_4D_PC>(
sid, dir, oddBit, spinorField, fwd_nbr_spinor, back_nbr_spinor, 1, 3,
mySpinorSiteSize);
307 sFloat *third_neighbor_spinor = spinorNeighbor_5d_mgpu<QUDA_4D_PC>(
sid, dir, oddBit, spinorField, fwd_nbr_spinor, back_nbr_spinor, 3, 3,
mySpinorSiteSize);
312 su3Mul(gaugedSpinor, fatlnk, first_neighbor_spinor);
314 su3Mul(gaugedSpinor, longlnk, third_neighbor_spinor);
317 su3Tmul(gaugedSpinor, fatlnk, first_neighbor_spinor);
319 su3Tmul(gaugedSpinor, longlnk, third_neighbor_spinor);
336 const int nSrc =
in->
X(4);
344 errorQuda(
"ERROR: full parity not supported in function %s", __FUNCTION__);
355 dslashReference_mg4dir((
double*)
out->
V(), (
double**)
fatlink, (
double**)
longlink, (
double**)ghost_fatlink, (
double**)ghost_longlink,
356 (
double*)
in->
V(), (
double**)fwd_nbr_spinor, (
double**)back_nbr_spinor, oddBit, daggerBit, nSrc);
358 dslashReference_mg4dir((
double*)
out->
V(), (
float**)
fatlink, (
float**)
longlink, (
float**)ghost_fatlink, (
float**)ghost_longlink,
359 (
double*)
in->
V(), (
double**)fwd_nbr_spinor, (
double**)back_nbr_spinor, oddBit, daggerBit, nSrc);
363 dslashReference_mg4dir((
float*)
out->
V(), (
double**)
fatlink, (
double**)
longlink, (
double**)ghost_fatlink, (
double**)ghost_longlink,
364 (
float*)
in->
V(), (
float**)fwd_nbr_spinor, (
float**)back_nbr_spinor, oddBit, daggerBit, nSrc);
366 dslashReference_mg4dir((
float*)
out->
V(), (
float**)
fatlink, (
float**)
longlink, (
float**)ghost_fatlink, (
float**)ghost_longlink,
367 (
float*)
in->
V(), (
float**)fwd_nbr_spinor, (
float**)back_nbr_spinor, oddBit, daggerBit, nSrc);
379 if (sPrecision != gPrecision){
380 errorQuda(
"Spinor precision and gPrecison is not the same");
389 errorQuda(
"ERROR: full parity not supported in function %s\n", __FUNCTION__);
393 in, otherparity, dagger_bit, sPrecision, gPrecision);
396 tmp,
parity, dagger_bit, sPrecision, gPrecision);
void display_link_internal(Float *link)
void xpay(ColorSpinorField &x, const double &a, ColorSpinorField &y)
enum QudaPrecision_s QudaPrecision
static void MatPC(sFloat *outEven, gFloat **fatlink, gFloat **longlink, sFloat *inEven, int dagger, QudaMatPCType matpc_type)
void staggered_matpc(void *outEven, void **fatlink, void **longlink, void *inEven, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision)
static void sub(Float *dst, Float *a, Float *b, int cnt)
void staggered_dslash(void *res, void **fatlink, void **longlink, void *spinorField, int oddBit, int daggerBit, QudaPrecision sPrecision, QudaPrecision gPrecision)
cudaColorSpinorField * tmp
void mat(void *out, void **fatlink, void **longlink, void *in, double kappa, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision)
static void axmy(Float *x, Float a, Float *y, int len)
void matdagmat_mg4dir(cpuColorSpinorField *out, void **link, void **ghostLink, cpuColorSpinorField *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision, cpuColorSpinorField *tmp, QudaParity parity)
void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr, const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false) const
This is a unified ghost exchange function for doing a complete halo exchange regardless of the type o...
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
void matdagmat(void *out, void **fatlink, void **longlink, void *in, double mass, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision, void *tmp, QudaParity parity)
int printf(const char *,...) __attribute__((__format__(__printf__
VOLATILE spinorFloat kappa
__host__ __device__ void sum(double &a, double &b)
enum QudaMatPCType_s QudaMatPCType
static void * backGhostFaceBuffer[QUDA_MAX_DIM]
enum QudaParity_s QudaParity
int fprintf(FILE *, const char *,...) __attribute__((__format__(__printf__
static void * fwdGhostFaceBuffer[QUDA_MAX_DIM]
static Float * gaugeLink(int i, int dir, int oddBit, Float **gaugeEven, Float **gaugeOdd, int nbr_distance)
cpuColorSpinorField * out
void * memset(void *s, int c, size_t n)
Main header file for the QUDA library.
void staggered_dslash_mg4dir(cpuColorSpinorField *out, void **fatlink, void **longlink, void **ghost_fatlink, void **ghost_longlink, cpuColorSpinorField *in, int oddBit, int daggerBit, QudaPrecision sPrecision, QudaPrecision gPrecision)
static void su3Mul(sFloat *res, gFloat *mat, sFloat *vec)
static void su3Tmul(sFloat *res, gFloat *mat, sFloat *vec)
void dslashReference(sFloat *res, gFloat **fatlink, gFloat **longlink, sFloat *spinorField, int oddBit, int daggerBit)
void Matdagmat(sFloat *out, gFloat **fatlink, gFloat **longlink, sFloat *in, sFloat mass, int daggerBit, sFloat *tmp, QudaParity parity)
void Mat(sFloat *out, gFloat **fatlink, gFloat **longlink, sFloat *in, sFloat kappa, int daggerBit)
static void negx(Float *x, int len)