26 template <
typename su3_matrix,
typename Real>
28 void **fatlink, Real coef,
int use_staple)
51 for (i = 0; i <
V; i++) {
90 for (i = 0; i <
V; i++) {
96 if (nbr_idx >=
V || nbr_idx < 0) {
97 fprintf(stderr,
"ERROR: invliad nbr_idx(%d), line=%d\n", nbr_idx, __LINE__);
104 B = mulink + nbr_idx;
106 B = mulink + nbr_idx;
109 memset(dx, 0,
sizeof(dx));
117 if (staple != NULL) {
137 template <
typename su3_matrix,
typename Float>
141 if (staple == NULL) {
142 fprintf(stderr,
"Error: malloc failed for staple in function %s\n", __FUNCTION__);
147 if (tempmat1 == NULL) {
148 fprintf(stderr,
"ERROR: malloc failed for tempmat1 in function %s\n", __FUNCTION__);
153 Float one_link = (act_path_coeff[0] - 6.0 * act_path_coeff[5]);
155 for (
int dir =
XUP; dir <=
TUP; dir++) {
158 for (
int i = 0; i <
V; i++) {
164 for (
int dir =
XUP; dir <=
TUP; dir++) {
165 for (
int nu =
XUP; nu <=
TUP; nu++) {
174 for (
int rho =
XUP; rho <=
TUP; rho++) {
175 if ((rho != dir) && (rho != nu)) {
178 for (
int sig =
XUP; sig <=
TUP; sig++) {
179 if ((sig != dir) && (sig != nu) && (sig != rho)) {
181 act_path_coeff[4], 1);
216 fprintf(stderr,
"ERROR: unsupported precision(%d)\n",
prec);
225 template <
typename su3_matrix,
typename Real>
228 su3_matrix **ghost_sitelink_diag,
void **fatlink, Real coef,
int use_staple)
261 for (i = 0; i <
V; i++) {
271 int sid = half_index;
273 int x1h = sid - za * X1h;
275 int x2 = za - zb * X2;
277 int x3 = zb - x4 * X3;
278 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
279 int x1 = 2 * x1h + x1odd;
280 int x[4] = {x1, x2, x3, x4};
281 int space_con[4] = {(x4 * X3X2 + x3 * X2 + x2) / 2, (x4 * X3X1 + x3 * X1 + x1) / 2, (x4 * X2X1 + x2 * X1 + x1) / 2,
282 (x3 * X2X1 + x2 * X1 + x1) / 2};
287 memset(dx, 0,
sizeof(dx));
293 if (x[nu] + dx[nu] >=
Z[nu]) {
294 B = ghost_mulink[nu] + Vs[nu] + (1 - oddBit) * Vsh[nu] + space_con[nu];
297 B = mulink + nbr_idx;
300 if (x[nu] + dx[nu] >=
Z[nu]) {
301 B = ghost_sitelink[nu] + 4 * Vs[nu] +
mu * Vs[nu] + (1 - oddBit) * Vsh[nu] + space_con[nu];
304 B = sitelink[
mu] + nbr_idx;
310 memset(dx, 0,
sizeof(dx));
313 C = ghost_sitelink[
mu] + 4 * Vs[
mu] + nu * Vs[
mu] + (1 - oddBit) * Vsh[
mu] + space_con[
mu];
316 C = sitelink[nu] + nbr_idx;
321 if (staple != NULL) {
338 for (i = 0; i <
V; i++) {
347 int sid = half_index;
349 int x1h = sid - za * X1h;
351 int x2 = za - zb * X2;
353 int x3 = zb - x4 * X3;
354 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
355 int x1 = 2 * x1h + x1odd;
356 int x[4] = {x1, x2, x3, x4};
357 int space_con[4] = {(x4 * X3X2 + x3 * X2 + x2) / 2, (x4 * X3X1 + x3 * X1 + x1) / 2, (x4 * X2X1 + x2 * X1 + x1) / 2,
358 (x3 * X2X1 + x2 * X1 + x1) / 2};
366 memset(dx, 0,
sizeof(dx));
370 if (x[nu] + dx[nu] < 0) {
371 A = ghost_sitelink[nu] + nu * Vs[nu] + (1 - oddBit) * Vsh[nu] + space_con[nu];
374 A = sitelink[nu] + nbr_idx;
380 if (x[nu] + dx[nu] < 0) {
381 B = ghost_mulink[nu] + (1 - oddBit) * Vsh[nu] + space_con[nu];
383 B = mulink + nbr_idx;
386 if (x[nu] + dx[nu] < 0) {
387 B = ghost_sitelink[nu] +
mu * Vs[nu] + (1 - oddBit) * Vsh[nu] + space_con[nu];
390 B = sitelink[
mu] + nbr_idx;
397 memset(dx, 0,
sizeof(dx));
403 int new_x1, new_x2, new_x3, new_x4;
404 new_x1 = (x[0] + dx[0] +
Z[0]) %
Z[0];
405 new_x2 = (x[1] + dx[1] +
Z[1]) %
Z[1];
406 new_x3 = (x[2] + dx[2] +
Z[2]) %
Z[2];
407 new_x4 = (x[3] + dx[3] +
Z[3]) %
Z[3];
408 int new_x[4] = {new_x1, new_x2, new_x3, new_x4};
409 space_con[0] = (new_x4 * X3X2 + new_x3 * X2 + new_x2) / 2;
410 space_con[1] = (new_x4 * X3X1 + new_x3 * X1 + new_x1) / 2;
411 space_con[2] = (new_x4 * X2X1 + new_x2 * X1 + new_x1) / 2;
412 space_con[3] = (new_x3 * X2X1 + new_x2 * X1 + new_x1) / 2;
414 if ((x[nu] + dx[nu]) < 0 && (x[
mu] + dx[
mu] >=
Z[
mu])) {
418 for (dir1 = 0; dir1 < 4; dir1++) {
419 if (dir1 != nu && dir1 !=
mu) {
break; }
421 for (dir2 = 0; dir2 < 4; dir2++) {
422 if (dir2 != nu && dir2 !=
mu && dir2 != dir1) {
break; }
424 C = ghost_sitelink_diag[nu * 4 +
mu] + oddBit *
Z[dir1] *
Z[dir2] / 2 + (new_x[dir2] *
Z[dir1] + new_x[dir1]) / 2;
425 }
else if (x[nu] + dx[nu] < 0) {
426 C = ghost_sitelink[nu] + nu * Vs[nu] + oddBit * Vsh[nu] + space_con[nu];
427 }
else if (x[
mu] + dx[
mu] >=
Z[
mu]) {
428 C = ghost_sitelink[
mu] + 4 * Vs[
mu] + nu * Vs[
mu] + oddBit * Vsh[
mu] + space_con[
mu];
430 C = sitelink[nu] + nbr_idx;
435 if (staple != NULL) {
446 template <
typename su3_matrix,
typename Float>
448 Float *act_path_coeff)
451 if (
sizeof(
Float) == 4) {
458 if (staple == NULL) {
459 fprintf(stderr,
"Error: malloc failed for staple in function %s\n", __FUNCTION__);
466 for (
int i = 0; i < 4; i++) {
468 if (ghost_staple[i] == NULL) {
469 fprintf(stderr,
"Error: malloc failed for ghost staple in function %s\n", __FUNCTION__);
474 if (ghost_staple1[i] == NULL) {
475 fprintf(stderr,
"Error: malloc failed for ghost staple1 in function %s\n", __FUNCTION__);
481 if (tempmat1 == NULL) {
482 fprintf(stderr,
"ERROR: malloc failed for tempmat1 in function %s\n", __FUNCTION__);
487 Float one_link = (act_path_coeff[0] - 6.0 * act_path_coeff[5]);
489 for (
int dir =
XUP; dir <=
TUP; dir++) {
492 for (
int i = 0; i <
V; i++) {
498 for (
int dir =
XUP; dir <=
TUP; dir++) {
499 for (
int nu =
XUP; nu <=
TUP; nu++) {
501 llfat_compute_gen_staple_field_mg(staple, dir, nu, sitelink[dir], (
su3_matrix **)NULL, sitelink, ghost_sitelink,
502 ghost_sitelink_diag, fatlink, act_path_coeff[2], 0);
508 llfat_compute_gen_staple_field_mg((
su3_matrix *)NULL, dir, nu, staple, ghost_staple, sitelink, ghost_sitelink,
509 ghost_sitelink_diag, fatlink, act_path_coeff[5], 1);
511 for (
int rho =
XUP; rho <=
TUP; rho++) {
512 if ((rho != dir) && (rho != nu)) {
513 llfat_compute_gen_staple_field_mg(tempmat1, dir, rho, staple, ghost_staple, sitelink, ghost_sitelink,
514 ghost_sitelink_diag, fatlink, act_path_coeff[3], 1);
518 for (
int sig =
XUP; sig <=
TUP; sig++) {
519 if ((sig != dir) && (sig != nu) && (sig != rho)) {
521 llfat_compute_gen_staple_field_mg((
su3_matrix *)NULL, dir, sig, tempmat1, ghost_staple1, sitelink,
522 ghost_sitelink, ghost_sitelink_diag, fatlink, act_path_coeff[4], 1);
534 for (
int i = 0; i < 4; i++) {
535 free(ghost_staple[i]);
536 free(ghost_staple1[i]);
541 void llfat_reference_mg(
void **fatlink,
void **sitelink,
void **ghost_sitelink,
void **ghost_sitelink_diag,
566 fprintf(stderr,
"ERROR: unsupported precision(%d)\n",
prec);
void * memset(void *s, int c, size_t n)
enum QudaPrecision_s QudaPrecision
void exchange_cpu_staple(int *X, void *staple, void **ghost_staple, QudaPrecision gPrecision)
int neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
int neighborIndexFullLattice_mg(int i, int dx4, int dx3, int dx2, int dx1)
void llfat_compute_gen_staple_field(su3_matrix *staple, int mu, int nu, su3_matrix *mulink, su3_matrix **sitelink, void **fatlink, Real coef, int use_staple)
void llfat_reference(void **fatlink, void **sitelink, QudaPrecision prec, void *act_path_coeff)
void llfat_cpu(void **fatlink, su3_matrix **sitelink, Float *act_path_coeff)
void llfat_mult_su3_nn(su3_matrix *a, su3_matrix *b, su3_matrix *c)
void llfat_scalar_mult_su3_matrix(su3_matrix *a, Real s, su3_matrix *b)
void llfat_mult_su3_an(su3_matrix *a, su3_matrix *b, su3_matrix *c)
void llfat_scalar_mult_add_su3_matrix(su3_matrix *a, su3_matrix *b, Real s, su3_matrix *c)
void llfat_add_su3_matrix(su3_matrix *a, su3_matrix *b, su3_matrix *c)
void llfat_reference_mg(void **fatlink, void **sitelink, void **ghost_sitelink, void **ghost_sitelink_diag, QudaPrecision prec, void *act_path_coeff)
void llfat_mult_su3_na(su3_matrix *a, su3_matrix *b, su3_matrix *c)
FloatingPoint< float > Float
Main header file for the QUDA library.