20 template<
typename real>
struct su3_matrix { std::complex<real>
e[3][3]; };
21 template<
typename real>
struct su3_vector { std::complex<real>
e[3]; };
26 template<
typename su3_matrix,
typename Real>
32 for(
i=0;
i<3;
i++)
for(j=0;j<3;j++){
33 b->e[
i][j] =
s*
a->e[
i][j];
39 template<
typename su3_matrix,
typename Real>
44 for(
i=0;
i<3;
i++)
for(j=0;j<3;j++){
45 c->e[
i][j] =
a->e[
i][j] +
s*
b->e[
i][j];
50 template <
typename su3_matrix>
55 typename std::remove_reference<decltype(
a->e[0][0])>::type
x,
y;
56 for(
i=0;
i<3;
i++)
for(j=0;j<3;j++){
66 template <
typename su3_matrix>
71 typename std::remove_reference<decltype(
a->e[0][0])>::type
x,
y;
72 for(
i=0;
i<3;
i++)
for(j=0;j<3;j++){
75 y =
a->e[
i][k] *
b->e[k][j];
82 template<
typename su3_matrix>
87 typename std::remove_reference<decltype(
a->e[0][0])>::type
x,
y;
88 for(
i=0;
i<3;
i++)
for(j=0;j<3;j++){
102 template<
typename su3_matrix>
107 for(
i=0;
i<3;
i++)
for(j=0;j<3;j++){
108 c->e[
i][j] =
a->e[
i][j] +
b->e[
i][j];
114 template<
typename su3_matrix,
typename Real>
146 memset(dx, 0,
sizeof(dx));
151 B = mulink + nbr_idx;
153 B = mulink + nbr_idx;
156 memset(dx, 0,
sizeof(dx));
183 memset(dx, 0,
sizeof(dx));
186 if (nbr_idx >=
V || nbr_idx <0){
187 fprintf(stderr,
"ERROR: invliad nbr_idx(%d), line=%d\n", nbr_idx, __LINE__);
194 B = mulink + nbr_idx;
196 B = mulink + nbr_idx;
199 memset(dx, 0,
sizeof(dx));
230 template <
typename su3_matrix,
typename Float>
236 fprintf(stderr,
"Error: malloc failed for staple in function %s\n", __FUNCTION__);
241 if(tempmat1 == NULL){
242 fprintf(stderr,
"ERROR: malloc failed for tempmat1 in function %s\n", __FUNCTION__);
247 Float one_link = (act_path_coeff[0] - 6.0*act_path_coeff[5]);
250 for (
int dir=
XUP; dir<=
TUP; dir++){
253 for(
int i=0;
i <
V;
i ++){
262 for (
int dir=
XUP; dir<=
TUP; dir++){
263 for(
int nu=
XUP; nu<=
TUP; nu++){
272 for(
int rho=
XUP; rho<=
TUP; rho++) {
273 if((rho!=dir)&&(rho!=nu)){
276 for(
int sig=
XUP; sig<=
TUP; sig++){
277 if((sig!=dir)&&(sig!=nu)&&(sig!=rho)){
298 template<
typename su3_matrix,
typename Float>
300 Float* act_path_coeff)
304 for(
int dir=
XUP; dir<=
TUP; ++dir){
305 int dx[4] = {0,0,0,0};
306 for(
int i=0;
i<
V; ++
i){
321 template<
typename su3_matrix,
typename Float>
325 for(
int dir=0; dir<4; ++dir)
E[dir] =
Z[dir]+4;
328 const int extended_volume =
E[3]*
E[2]*
E[1]*
E[0];
331 for(
int t=0;
t<
Z[3]; ++
t){
332 for(
int z=0;
z<
Z[2]; ++
z){
333 for(
int y=0;
y<
Z[1]; ++
y){
334 for(
int x=0;
x<
Z[0]; ++
x){
335 const int oddBit = (
x+
y+
z+
t)&1;
336 int little_index = ((((
t*
Z[2] +
z)*
Z[1] +
y)*
Z[0] +
x)/2) + oddBit*
Vh;
337 int large_index = (((((
t+2)*
E[2] + (
z+2))*
E[1] + (
y+2))*
E[0] +
x+2)/2) + oddBit*(extended_volume/2);
340 for(
int dir=
XUP; dir<=
TUP; ++dir){
341 int dx[4] = {0,0,0,0};
372 fprintf(stderr,
"ERROR: unsupported precision(%d)\n",
prec);
405 fprintf(stderr,
"ERROR: unsupported precision(%d)\n",
prec);
417 template<
typename su3_matrix,
typename Real>
419 llfat_compute_gen_staple_field_mg(
su3_matrix *staple,
int mu,
int nu,
471 int x1h =
sid -
za*X1h;
476 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
477 int x1 = 2*x1h + x1odd;
478 int x[4] = {x1,x2,x3,x4};
480 (x4*X3X2+x3*X2+x2)/2,
481 (x4*X3X1+x3*X1+x1)/2,
482 (x4*X2X1+x2*X1+x1)/2,
489 memset(dx, 0,
sizeof(dx));
495 if (
x[nu] + dx[nu] >=
Z[nu]){
496 B = ghost_mulink[nu] +
Vs[nu] + (1-oddBit)*
Vsh[nu] + space_con[nu];
499 B = mulink + nbr_idx;
502 if(
x[nu]+dx[nu] >=
Z[nu]){
503 B = ghost_sitelink[nu] + 4*
Vs[nu] +
mu*
Vs[nu] + (1-oddBit)*
Vsh[nu] + space_con[nu];
506 B = sitelink[
mu] + nbr_idx;
513 memset(dx, 0,
sizeof(dx));
516 C = ghost_sitelink[
mu] + 4*
Vs[
mu] + nu*
Vs[
mu] + (1-oddBit)*
Vsh[
mu] + space_con[
mu];
519 C = sitelink[nu] + nbr_idx;
552 int x1h =
sid -
za*X1h;
557 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
558 int x1 = 2*x1h + x1odd;
559 int x[4] = {x1,x2,x3,x4};
561 (x4*X3X2+x3*X2+x2)/2,
562 (x4*X3X1+x3*X1+x1)/2,
563 (x4*X2X1+x2*X1+x1)/2,
573 memset(dx, 0,
sizeof(dx));
577 if(
x[nu] + dx[nu] < 0){
578 A = ghost_sitelink[nu] + nu*
Vs[nu] + (1-oddBit)*
Vsh[nu] + space_con[nu];
581 A = sitelink[nu] + nbr_idx;
587 if (
x[nu] + dx[nu] < 0){
588 B = ghost_mulink[nu] + (1-oddBit)*
Vsh[nu] + space_con[nu];
590 B = mulink + nbr_idx;
593 if(
x[nu] + dx[nu] < 0){
594 B = ghost_sitelink[nu] +
mu*
Vs[nu] + (1-oddBit)*
Vsh[nu] + space_con[nu];
597 B = sitelink[
mu] + nbr_idx;
604 memset(dx, 0,
sizeof(dx));
610 int new_x1, new_x2, new_x3, new_x4;
611 new_x1 = (
x[0] + dx[0] +
Z[0])%
Z[0];
612 new_x2 = (
x[1] + dx[1] +
Z[1])%
Z[1];
613 new_x3 = (
x[2] + dx[2] +
Z[2])%
Z[2];
614 new_x4 = (
x[3] + dx[3] +
Z[3])%
Z[3];
615 int new_x[4] = {new_x1, new_x2, new_x3, new_x4};
616 space_con[0] = (new_x4*X3X2 + new_x3*X2 + new_x2)/2;
617 space_con[1] = (new_x4*X3X1 + new_x3*X1 + new_x1)/2;
618 space_con[2] = (new_x4*X2X1 + new_x2*X1 + new_x1)/2;
619 space_con[3] = (new_x3*X2X1 + new_x2*X1 + new_x1)/2;
621 if( (
x[nu] + dx[nu]) < 0 && (
x[
mu] + dx[
mu] >=
Z[
mu])){
625 for(dir1=0; dir1 < 4; dir1 ++){
626 if(dir1 != nu && dir1 !=
mu){
630 for(dir2=0; dir2 < 4; dir2 ++){
631 if(dir2 != nu && dir2 !=
mu && dir2 != dir1){
635 C = ghost_sitelink_diag[nu*4+
mu] + oddBit*
Z[dir1]*
Z[dir2]/2 + (new_x[dir2]*
Z[dir1]+new_x[dir1])/2;
636 }
else if (
x[nu] + dx[nu] < 0){
637 C = ghost_sitelink[nu] + nu*
Vs[nu] + oddBit*
Vsh[nu]+ space_con[nu];
641 C = sitelink[nu] + nbr_idx;
658 template <
typename su3_matrix,
typename Float>
660 su3_matrix** ghost_sitelink_diag, Float* act_path_coeff)
663 if (
sizeof(Float) == 4){
671 fprintf(stderr,
"Error: malloc failed for staple in function %s\n", __FUNCTION__);
679 for(
int i=0;
i < 4;
i++){
681 if (ghost_staple[
i] == NULL){
682 fprintf(stderr,
"Error: malloc failed for ghost staple in function %s\n", __FUNCTION__);
687 if (ghost_staple1[
i] == NULL){
688 fprintf(stderr,
"Error: malloc failed for ghost staple1 in function %s\n", __FUNCTION__);
694 if(tempmat1 == NULL){
695 fprintf(stderr,
"ERROR: malloc failed for tempmat1 in function %s\n", __FUNCTION__);
700 Float one_link = (act_path_coeff[0] - 6.0*act_path_coeff[5]);
703 for (
int dir=
XUP; dir<=
TUP; dir++){
706 for(
int i=0;
i <
V;
i ++){
712 for (
int dir=
XUP; dir<=
TUP; dir++){
713 for(
int nu=
XUP; nu<=
TUP; nu++){
715 llfat_compute_gen_staple_field_mg(staple,dir,nu,
717 sitelink, ghost_sitelink, ghost_sitelink_diag,
718 fatlink, act_path_coeff[2], 0);
724 llfat_compute_gen_staple_field_mg((
su3_matrix*)NULL,dir,nu,
726 sitelink, ghost_sitelink, ghost_sitelink_diag,
729 for(
int rho=
XUP; rho<=
TUP; rho++) {
730 if((rho!=dir)&&(rho!=nu)){
731 llfat_compute_gen_staple_field_mg( tempmat1, dir, rho,
733 sitelink, ghost_sitelink, ghost_sitelink_diag,
734 fatlink, act_path_coeff[3], 1);
739 for(
int sig=
XUP; sig<=
TUP; sig++){
740 if((sig!=dir)&&(sig!=nu)&&(sig!=rho)){
742 llfat_compute_gen_staple_field_mg((
su3_matrix*)NULL,dir,sig,
743 tempmat1, ghost_staple1,
744 sitelink, ghost_sitelink, ghost_sitelink_diag,
745 fatlink, act_path_coeff[4], 1);
760 for(
int i=0;
i < 4;
i++){
761 free(ghost_staple[
i]);
762 free(ghost_staple1[
i]);
797 fprintf(stderr,
"ERROR: unsupported precision(%d)\n",
prec);
enum QudaPrecision_s QudaPrecision
Matrix< N, std::complex< T > > conj(const Matrix< N, std::complex< T > > &mat)
void llfat_mult_su3_nn(su3_matrix *a, su3_matrix *b, su3_matrix *c)
int neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
std::complex< real > e[3]
void llfat_add_su3_matrix(su3_matrix *a, su3_matrix *b, su3_matrix *c)
void exit(int) __attribute__((noreturn))
void llfat_compute_gen_staple_field(su3_matrix *staple, int mu, int nu, su3_matrix *mulink, su3_matrix **sitelink, void **fatlink, Real coef, int use_staple)
void llfat_scalar_mult_su3_matrix(su3_matrix *a, Real s, su3_matrix *b)
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
void llfat_scalar_mult_add_su3_matrix(su3_matrix *a, su3_matrix *b, Real s, su3_matrix *c)
void exchange_cpu_staple(int *X, void *staple, void **ghost_staple, QudaPrecision gPrecision)
void llfat_mult_su3_na(su3_matrix *a, su3_matrix *b, su3_matrix *c)
int fprintf(FILE *, const char *,...) __attribute__((__format__(__printf__
std::complex< real > e[3][3]
void computeLongLinkCPU(void **longlink, su3_matrix **sitelink, Float *act_path_coeff)
void * memset(void *__b, int __c, size_t __len)
int neighborIndexFullLattice_mg(int i, int dx4, int dx3, int dx2, int dx1)
void llfat_cpu(void **fatlink, su3_matrix **sitelink, Float *act_path_coeff)
Main header file for the QUDA library.
void llfat_reference_mg(void **fatlink, void **sitelink, void **ghost_sitelink, void **ghost_sitelink_diag, QudaPrecision prec, void *act_path_coeff)
void llfat_mult_su3_an(su3_matrix *a, su3_matrix *b, su3_matrix *c)
void llfat_reference(void **fatlink, void **sitelink, QudaPrecision prec, void *act_path_coeff)