36 #define CADD(a,b,c) { (c).real = (a).real + (b).real; \
37 (c).imag = (a).imag + (b).imag; }
38 #define CMUL(a,b,c) { (c).real = (a).real*(b).real - (a).imag*(b).imag; \
39 (c).imag = (a).real*(b).imag + (a).imag*(b).real; }
40 #define CSUM(a,b) { (a).real += (b).real; (a).imag += (b).imag; }
43 #define CMULJ_(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \
44 (c).imag = (a).real*(b).imag - (a).imag*(b).real; }
47 #define CMUL_J(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \
48 (c).imag = (a).imag*(b).real - (a).real*(b).imag; }
53 template<
typename su3_matrix,
typename Real>
59 for(i=0;i<3;i++)
for(j=0;j<3;j++){
60 b->e[i][j].real = s*a->e[i][j].real;
61 b->e[i][j].imag = s*a->e[i][j].imag;
67 template<
typename su3_matrix,
typename Real>
72 for(i=0;i<3;i++)
for(j=0;j<3;j++){
73 c->e[i][j].real = a->e[i][j].real + s*b->e[i][j].real;
74 c->e[i][j].imag = a->e[i][j].imag + s*b->e[i][j].imag;
79 template <
typename su3_matrix>
84 typeof(a->e[0][0])
x,y;
85 for(i=0;i<3;i++)
for(j=0;j<3;j++){
88 CMUL_J( a->e[i][k] , b->e[j][k] , y );
95 template <
typename su3_matrix>
100 typeof(a->e[0][0])
x,y;
101 for(i=0;i<3;i++)
for(j=0;j<3;j++){
104 CMUL( a->e[i][k] , b->e[k][j] , y );
111 template<
typename su3_matrix>
116 typeof(a->e[0][0])
x,y;
117 for(i=0;i<3;i++)
for(j=0;j<3;j++){
120 CMULJ_( a->e[k][i] , b->e[k][j], y );
131 template<
typename su3_matrix>
136 for(i=0;i<3;i++)
for(j=0;j<3;j++){
137 CADD( a->e[i][j], b->e[i][j], c->e[i][j] );
143 template<
typename su3_matrix,
typename Real>
146 su3_matrix* mulink, su3_matrix** sitelink,
void**
fatlink, Real coef,
149 su3_matrix tmat1,tmat2;
172 fat1 = ((su3_matrix*)fatlink[mu]) + i;
173 su3_matrix* A = sitelink[nu] + i;
175 memset(dx, 0,
sizeof(dx));
180 B = mulink + nbr_idx;
182 B = mulink + nbr_idx;
185 memset(dx, 0,
sizeof(dx));
188 su3_matrix* C = sitelink[nu] + nbr_idx;
211 fat1 = ((su3_matrix*)fatlink[mu]) + i;
212 memset(dx, 0,
sizeof(dx));
215 if (nbr_idx >= V || nbr_idx <0){
216 fprintf(stderr,
"ERROR: invliad nbr_idx(%d), line=%d\n", nbr_idx, __LINE__);
219 su3_matrix* A = sitelink[nu] + nbr_idx;
223 B = mulink + nbr_idx;
225 B = mulink + nbr_idx;
228 memset(dx, 0,
sizeof(dx));
231 su3_matrix* C = sitelink[nu] + nbr_idx;
259 template <
typename su3_matrix,
typename Float>
263 su3_matrix* staple = (su3_matrix *)malloc(
V*
sizeof(su3_matrix));
265 fprintf(stderr,
"Error: malloc failed for staple in function %s\n", __FUNCTION__);
269 su3_matrix* tempmat1 = (su3_matrix *)malloc(
V*
sizeof(su3_matrix));
270 if(tempmat1 == NULL){
271 fprintf(stderr,
"ERROR: malloc failed for tempmat1 in function %s\n", __FUNCTION__);
276 Float one_link = (act_path_coeff[0] - 6.0*act_path_coeff[5]);
282 for(
int i=0;i <
V;i ++){
283 su3_matrix* fat1 = ((su3_matrix*)fatlink[
dir]) + i;
292 for(
int nu=
XUP; nu<=
TUP; nu++){
301 for(
int rho=
XUP; rho<=
TUP; rho++) {
302 if((rho!=dir)&&(rho!=nu)){
352 fprintf(stderr,
"ERROR: unsupported precision(%d)\n", prec);
364 template<
typename su3_matrix,
typename Real>
366 llfat_compute_gen_staple_field_mg(su3_matrix *staple,
int mu,
int nu,
367 su3_matrix* mulink, su3_matrix** ghost_mulink,
368 su3_matrix** sitelink, su3_matrix** ghost_sitelink, su3_matrix** ghost_sitelink_diag,
372 su3_matrix tmat1,tmat2;
423 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
427 (x4*X3X2+x3*X2+
x2)/2,
428 (x4*X3X1+x3*X1+x1)/2,
429 (x4*X2X1+x2*X1+
x1)/2,
433 fat1 = ((su3_matrix*)fatlink[mu]) + i;
434 su3_matrix* A = sitelink[nu] + i;
436 memset(dx, 0,
sizeof(dx));
442 if (x[nu] + dx[nu] >=
Z[nu]){
443 B = ghost_mulink[nu] +
Vs[nu] + (1-oddBit)*
Vsh[nu] + space_con[nu];
446 B = mulink + nbr_idx;
449 if(x[nu]+dx[nu] >=
Z[nu]){
450 B = ghost_sitelink[nu] + 4*
Vs[nu] + mu*
Vs[nu] + (1-oddBit)*
Vsh[nu] + space_con[nu];
453 B = sitelink[
mu] + nbr_idx;
460 memset(dx, 0,
sizeof(dx));
462 if(x[mu] + dx[mu] >=
Z[mu]){
463 C = ghost_sitelink[
mu] + 4*
Vs[
mu] + nu*
Vs[
mu] + (1-oddBit)*
Vsh[mu] + space_con[mu];
466 C = sitelink[nu] + nbr_idx;
499 int x1h = sid - za*
X1h;
504 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
505 int x1 = 2*x1h +
x1odd;
508 (x4*X3X2+x3*X2+
x2)/2,
509 (x4*X3X1+x3*X1+x1)/2,
510 (x4*X2X1+x2*X1+
x1)/2,
516 fat1 = ((su3_matrix*)fatlink[mu]) + i;
520 memset(dx, 0,
sizeof(dx));
524 if(x[nu] + dx[nu] < 0){
525 A = ghost_sitelink[nu] + nu*
Vs[nu] + (1-oddBit)*
Vsh[nu] + space_con[nu];
528 A = sitelink[nu] + nbr_idx;
534 if (x[nu] + dx[nu] < 0){
535 B = ghost_mulink[nu] + (1-oddBit)*
Vsh[nu] + space_con[nu];
537 B = mulink + nbr_idx;
540 if(x[nu] + dx[nu] < 0){
541 B = ghost_sitelink[nu] + mu*
Vs[nu] + (1-oddBit)*
Vsh[nu] + space_con[nu];
544 B = sitelink[
mu] + nbr_idx;
551 memset(dx, 0,
sizeof(dx));
557 int new_x1, new_x2, new_x3, new_x4;
558 new_x1 = (x[0] + dx[0] +
Z[0])%
Z[0];
559 new_x2 = (x[1] + dx[1] +
Z[1])%
Z[1];
560 new_x3 = (x[2] + dx[2] +
Z[2])%
Z[2];
561 new_x4 = (x[3] + dx[3] +
Z[3])%
Z[3];
562 int new_x[4] = {new_x1, new_x2, new_x3, new_x4};
563 space_con[0] = (new_x4*X3X2 + new_x3*X2 + new_x2)/2;
564 space_con[1] = (new_x4*X3X1 + new_x3*X1 + new_x1)/2;
565 space_con[2] = (new_x4*X2X1 + new_x2*X1 + new_x1)/2;
566 space_con[3] = (new_x3*X2X1 + new_x2*X1 + new_x1)/2;
568 if( (x[nu] + dx[nu]) < 0 && (x[
mu] + dx[
mu] >=
Z[
mu])){
572 for(dir1=0; dir1 < 4; dir1 ++){
573 if(dir1 != nu && dir1 != mu){
577 for(dir2=0; dir2 < 4; dir2 ++){
578 if(dir2 != nu && dir2 != mu && dir2 != dir1){
582 C = ghost_sitelink_diag[nu*4+
mu] + oddBit*
Z[dir1]*
Z[dir2]/2 + (new_x[dir2]*
Z[dir1]+new_x[dir1])/2;
583 }
else if (x[nu] + dx[nu] < 0){
584 C = ghost_sitelink[nu] + nu*
Vs[nu] + oddBit*
Vsh[nu]+ space_con[nu];
585 }
else if (x[mu] + dx[mu] >=
Z[mu]){
588 C = sitelink[nu] + nbr_idx;
605 template <
typename su3_matrix,
typename Float>
606 void llfat_cpu_mg(
void** fatlink, su3_matrix** sitelink, su3_matrix** ghost_sitelink,
607 su3_matrix** ghost_sitelink_diag,
Float* act_path_coeff)
610 if (
sizeof(
Float) == 4){
616 su3_matrix* staple = (su3_matrix *)malloc(V*
sizeof(su3_matrix));
618 fprintf(stderr,
"Error: malloc failed for staple in function %s\n", __FUNCTION__);
623 su3_matrix* ghost_staple[4];
624 su3_matrix* ghost_staple1[4];
626 for(
int i=0;i < 4;i++){
627 ghost_staple[i] = (su3_matrix*)malloc(2*
Vs[i]*
sizeof(su3_matrix));
628 if (ghost_staple[i] == NULL){
629 fprintf(stderr,
"Error: malloc failed for ghost staple in function %s\n", __FUNCTION__);
633 ghost_staple1[i] = (su3_matrix*)malloc(2*
Vs[i]*
sizeof(su3_matrix));
634 if (ghost_staple1[i] == NULL){
635 fprintf(stderr,
"Error: malloc failed for ghost staple1 in function %s\n", __FUNCTION__);
640 su3_matrix* tempmat1 = (su3_matrix *)malloc(V*
sizeof(su3_matrix));
641 if(tempmat1 == NULL){
642 fprintf(stderr,
"ERROR: malloc failed for tempmat1 in function %s\n", __FUNCTION__);
647 Float one_link = (act_path_coeff[0] - 6.0*act_path_coeff[5]);
653 for(
int i=0;i <
V;i ++){
654 su3_matrix* fat1 = ((su3_matrix*)fatlink[
dir]) + i;
660 for(
int nu=
XUP; nu<=
TUP; nu++){
662 llfat_compute_gen_staple_field_mg(staple,
dir,nu,
663 sitelink[
dir], (su3_matrix**)NULL,
664 sitelink, ghost_sitelink, ghost_sitelink_diag,
665 fatlink, act_path_coeff[2], 0);
671 llfat_compute_gen_staple_field_mg((su3_matrix*)NULL,dir,nu,
673 sitelink, ghost_sitelink, ghost_sitelink_diag,
674 fatlink, act_path_coeff[5],1);
676 for(
int rho=
XUP; rho<=
TUP; rho++) {
677 if((rho!=dir)&&(rho!=nu)){
678 llfat_compute_gen_staple_field_mg( tempmat1, dir, rho,
680 sitelink, ghost_sitelink, ghost_sitelink_diag,
681 fatlink, act_path_coeff[3], 1);
689 llfat_compute_gen_staple_field_mg((su3_matrix*)NULL,dir,
sig,
690 tempmat1, ghost_staple1,
691 sitelink, ghost_sitelink, ghost_sitelink_diag,
692 fatlink, act_path_coeff[4], 1);
707 for(
int i=0;i < 4;i++){
708 free(ghost_staple[i]);
709 free(ghost_staple1[i]);
719 void** ghost_sitelink_diag,
QudaPrecision prec,
void* act_path_coeff)
735 (
dsu3_matrix**)ghost_sitelink_diag, (
double*) act_path_coeff);
740 (
fsu3_matrix**)ghost_sitelink_diag, (
float*) act_path_coeff);
744 fprintf(stderr,
"ERROR: unsupported precision(%d)\n", prec);