17 #define CADD(a,b,c) { (c).real = (a).real + (b).real; \
18 (c).imag = (a).imag + (b).imag; }
19 #define CMUL(a,b,c) { (c).real = (a).real*(b).real - (a).imag*(b).imag; \
20 (c).imag = (a).real*(b).imag + (a).imag*(b).real; }
21 #define CSUM(a,b) { (a).real += (b).real; (a).imag += (b).imag; }
24 #define CMULJ_(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \
25 (c).imag = (a).real*(b).imag - (a).imag*(b).real; }
28 #define CMUL_J(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \
29 (c).imag = (a).imag*(b).real - (a).real*(b).imag; }
31 #define CONJG(a,b) { (b).real = (a).real; (b).imag = -(a).imag; }
51 float m00im,m11im,m22im;
57 double m00im,m11im,m22im;
65 template<
typename su3_matrix>
69 return (p + 4*idx + dir);
74 errorQuda(
"get_su3_matrix: unsupported ordering scheme!\n");
79 template<
typename su3_matrix>
84 for(i=0;i<3;i++)
for(j=0;j<3;j++){
85 CONJG( a->
e[j][i], b->
e[i][j] );
89 template<
typename su3_matrix>
95 for(i=0;i<3;i++)
for(j=0;j<3;j++){
102 template<
typename su3_matrix,
typename anti_hermitmat>
106 auto temp = (m3->
e[0][0].imag + m3->
e[1][1].imag + m3->
e[2][2].imag)*0.33333333333333333;
107 ah3->m00im = m3->
e[0][0].imag - temp;
108 ah3->m11im = m3->
e[1][1].imag - temp;
109 ah3->m22im = m3->
e[2][2].imag - temp;
110 ah3->m01.real = (m3->
e[0][1].real - m3->
e[1][0].real)*0.5;
111 ah3->m02.real = (m3->
e[0][2].real - m3->
e[2][0].real)*0.5;
112 ah3->m12.real = (m3->
e[1][2].real - m3->
e[2][1].real)*0.5;
113 ah3->m01.imag = (m3->
e[0][1].imag + m3->
e[1][0].imag)*0.5;
114 ah3->m02.imag = (m3->
e[0][2].imag + m3->
e[2][0].imag)*0.5;
115 ah3->m12.imag = (m3->
e[1][2].imag + m3->
e[2][1].imag)*0.5;
119 template <
typename anti_hermitmat,
typename su3_matrix>
121 uncompress_anti_hermitian(anti_hermitmat *mat_antihermit,
124 typename std::remove_reference<decltype(mat_antihermit->m00im)>::type temp1;
125 mat_su3->
e[0][0].imag=mat_antihermit->m00im;
126 mat_su3->
e[0][0].real=0.;
127 mat_su3->
e[1][1].imag=mat_antihermit->m11im;
128 mat_su3->
e[1][1].real=0.;
129 mat_su3->
e[2][2].imag=mat_antihermit->m22im;
130 mat_su3->
e[2][2].real=0.;
131 mat_su3->
e[0][1].imag=mat_antihermit->m01.imag;
132 temp1=mat_antihermit->m01.real;
133 mat_su3->
e[0][1].real=temp1;
134 mat_su3->
e[1][0].real= -temp1;
135 mat_su3->
e[1][0].imag=mat_antihermit->m01.imag;
136 mat_su3->
e[0][2].imag=mat_antihermit->m02.imag;
137 temp1=mat_antihermit->m02.real;
138 mat_su3->
e[0][2].real=temp1;
139 mat_su3->
e[2][0].real= -temp1;
140 mat_su3->
e[2][0].imag=mat_antihermit->m02.imag;
141 mat_su3->
e[1][2].imag=mat_antihermit->m12.imag;
142 temp1=mat_antihermit->m12.real;
143 mat_su3->
e[1][2].real=temp1;
144 mat_su3->
e[2][1].real= -temp1;
145 mat_su3->
e[2][1].imag=mat_antihermit->m12.imag;
148 template <
typename su3_matrix,
typename Float>
155 c->
e[i][j].real = a->
e[i][j].real - s*b->
e[i][j].real;
156 c->
e[i][j].imag = a->
e[i][j].imag - s*b->
e[i][j].imag;
161 template <
typename su3_matrix,
typename Float>
168 c->
e[i][j].real = a->
e[i][j].real + s*b->
e[i][j].real;
169 c->
e[i][j].imag = a->
e[i][j].imag + s*b->
e[i][j].imag;
173 template <
typename su3_matrix,
typename Float>
180 a->
e[i][j].real = a->
e[i][j].real * s;
181 a->
e[i][j].imag = a->
e[i][j].imag * s;
186 template<
typename su3_matrix,
typename su3_vector>
191 typename std::remove_reference<decltype(a->
e[0][0])>::type x,y;
195 CMUL( a->
e[i][j] , b->c[j] , y );
201 template<
typename su3_matrix,
typename su3_vector>
206 typename std::remove_reference<decltype(a->
e[0][0])>::type x,y,z;
211 CMUL( z , b->c[j], y );
218 template<
typename su3_vector,
typename su3_matrix>
223 for(i=0;i<3;i++)
for(j=0;j<3;j++){
224 CMUL_J( a->c[i], b->c[j], c->
e[i][j] );
228 template<
typename su3_vector,
typename Real>
235 c->c[i].real = a->c[i].real + s*b->c[i].real;
236 c->c[i].imag = a->c[i].imag + s*b->c[i].imag;
242 template <
typename su3_matrix>
249 printf(
"(%f %f)\t", a->
e[i][j].real, a->
e[i][j].imag);
259 template<
typename su3_matrix>
263 typename std::remove_reference<decltype(c->
e[0][0])>::type x;
264 for(
int i=0; i<3; i++){
265 for(
int j=0; j<3; j++){
266 c->
e[i][j].real = 0.;
267 c->
e[i][j].imag = 0.;
268 for(
int k=0; k<3; k++){
269 CMUL(a->
e[i][k],b->
e[k][j],x);
270 c->
e[i][j].real += x.real;
271 c->
e[i][j].imag += x.imag;
279 template<
typename su3_matrix>
283 typename std::remove_reference<decltype(c->
e[0][0])>::type x;
284 for(
int i=0; i<3; i++){
285 for(
int j=0; j<3; j++){
286 c->
e[i][j].real = 0.;
287 c->
e[i][j].imag = 0.;
288 for(
int k=0; k<3; k++){
290 c->
e[i][j].real += x.real;
291 c->
e[i][j].imag += x.imag;
300 template<
typename su3_matrix>
304 typename std::remove_reference<decltype(c->
e[0][0])>::type x;
305 for(
int i=0; i<3; i++){
306 for(
int j=0; j<3; j++){
307 c->
e[i][j].real = 0.; c->
e[i][j].imag = 0.;
308 for(
int k=0; k<3; k++){
310 c->
e[i][j].real += x.real;
311 c->
e[i][j].imag += x.imag;
318 template<
typename su3_matrix>
324 matrix_mult_na(&a_adjoint, b, c);
327 template <
typename su3_matrix,
typename anti_hermitmat,
typename Float>
329 update_mom(anti_hermitmat* momentum,
int dir,
su3_matrix* sitelink,
340 anti_hermitmat* mom = momentum + 4*i+dir;
342 mult_su3_na(lnk, stp, &tmat1);
343 uncompress_anti_hermitian(mom, &tmat2);
345 scalar_mult_sub_su3_matrix(&tmat2, &tmat1, eb3, &tmat3);
353 template<
typename half_wilson_vector,
typename su3_matrix>
355 u_shift_hw(half_wilson_vector *src, half_wilson_vector *dest,
int dir,
su3_matrix* sitelink )
360 dx[3]=dx[2]=dx[1]=dx[0]=0;
366 half_wilson_vector* hw = src + nbr_idx;
368 mult_su3_mat_vec(link, &hw->h[0], &dest[i].h[0]);
369 mult_su3_mat_vec(link, &hw->h[1], &dest[i].h[1]);
375 half_wilson_vector* hw = src + nbr_idx;
377 mult_adj_su3_mat_vec(link, &hw->h[0], &dest[i].h[0]);
378 mult_adj_su3_mat_vec(link, &hw->h[1], &dest[i].h[1]);
385 template<
typename half_wilson_vector,
typename su3_matrix>
387 shifted_outer_prod(half_wilson_vector *src,
su3_matrix* dest,
int dir)
393 dx[3]=dx[2]=dx[1]=dx[0]=0;
401 half_wilson_vector* hw = src + nbr_idx;
402 su3_projector( &src[i].h[0], &(hw->h[0]), &dest[i]);
407 template<
typename half_wilson_vector,
typename su3_matrix>
409 forward_shifted_outer_prod(half_wilson_vector *src,
su3_matrix* dest,
int dir)
415 dx[3]=dx[2]=dx[1]=dx[0]=0;
423 half_wilson_vector* hw = src + nbr_idx;
425 su3_projector( &(hw->h[0]), &src[i].h[0], &dest[i]);
435 template <
typename half_wilson_vector,
typename su3_matrix>
440 for(
int i=0; i<
V; ++i){
441 for(
int dir=0; dir<4; ++dir){
442 dx[3]=dx[2]=dx[1]=dx[0]=0;
445 half_wilson_vector* hw = src + nbr_idx;
447 su3_projector( &(hw->h[0]), &src[i].h[0], p);
454 template <
typename half_wilson_vector,
typename su3_matrix>
459 for(
int i=0; i<
V; ++i){
460 for(
int dir=0; dir<4; ++dir){
461 dx[3]=dx[2]=dx[1]=dx[0]=0;
464 half_wilson_vector* hw = src + nbr_idx;
466 su3_projector( &(hw->h[0]), &src[i].h[0], p);
497 template<
typename half_wilson_vector,
typename su3_matrix>
498 static void shiftedOuterProduct(half_wilson_vector *src,
su3_matrix* dest){
499 for(
int dir=0; dir<4; dir++){
500 shifted_outer_prod(src, &dest[dir*
V],
OPP_DIR(dir));
504 template<
typename half_wilson_vector,
typename su3_matrix>
505 static void forwardShiftedOuterProduct(half_wilson_vector *src,
su3_matrix* dest){
506 for(
int dir=0; dir<4; dir++){
507 forward_shifted_outer_prod(src, &dest[dir*
V], dir);
525 template<
typename su3_matrix>
531 dx[3]=dx[2]=dx[1]=dx[0]=0;
539 matrix_mult_nn(link,
mat, &dest[i]);
547 matrix_mult_an(link,
mat, &dest[i]);
559 template <
typename half_wilson_vector,
560 typename anti_hermitmat,
typename Real>
562 add_3f_force_to_mom(half_wilson_vector *back, half_wilson_vector *forw,
563 int dir, Real coeff[2], anti_hermitmat* momentum)
572 my_coeff[0] = -coeff[0];
573 my_coeff[1] = -coeff[1];
576 my_coeff[0] = coeff[0];
577 my_coeff[1] = coeff[1];
582 tmp_coeff[0] = my_coeff[0];
583 tmp_coeff[1] = my_coeff[1];
585 tmp_coeff[0] = -my_coeff[0];
586 tmp_coeff[1] = -my_coeff[1] ;
589 if (
sizeof(Real) ==
sizeof(
float)){
592 anti_hermitmat* mom = momentum+ 4* i + mydir;
593 uncompress_anti_hermitian(mom, &mom_matrix);
594 su3_projector( &back[i].h[0], &forw[i].h[0], &tmat);
595 scalar_mult_add_su3_matrix(&mom_matrix, &tmat, tmp_coeff[0], &mom_matrix );
600 anti_hermitmat* mom = momentum+ 4* i + mydir;
601 uncompress_anti_hermitian(mom, &mom_matrix);
602 su3_projector( &back[i].h[0], &forw[i].h[0], &tmat);
603 scalar_mult_add_su3_matrix(&mom_matrix, &tmat, tmp_coeff[0], &mom_matrix );
610 template<
typename Real,
typename half_wilson_vector,
typename anti_hermitmat>
612 side_link_3f_force(
int mu,
int nu, Real coeff[2], half_wilson_vector *Path ,
613 half_wilson_vector *Path_nu, half_wilson_vector *Path_mu,
614 half_wilson_vector *Path_numu, anti_hermitmat* mom)
618 m_coeff[0] = -coeff[0] ;
619 m_coeff[1] = -coeff[1] ;
623 add_3f_force_to_mom(Path_numu, Path,
mu, coeff, mom) ;
625 add_3f_force_to_mom(Path,Path_numu,
OPP_DIR(
mu),m_coeff, mom);
630 add_3f_force_to_mom(Path_nu, Path_mu,
mu, m_coeff, mom);
632 add_3f_force_to_mom(Path_mu, Path_nu,
OPP_DIR(
mu), coeff, mom) ;
637 template<
typename Real,
typename su3_matrix,
typename anti_hermitmat>
640 int dir, Real coeff, anti_hermitmat* momentum)
657 if(i<
Vh){ tmp_coeff = my_coeff; }
658 else{ tmp_coeff = -my_coeff; }
663 anti_hermitmat* mom = momentum + 4*i + mydir;
664 uncompress_anti_hermitian(mom, &mom_matrix);
665 matrix_mult_na(&back[i], &forw[i], &tmat);
666 scalar_mult_add_su3_matrix(&mom_matrix, &tmat, tmp_coeff, &mom_matrix);
674 template<
typename Real,
typename su3_matrix,
typename anti_hermitmat>
676 side_link_force(
int mu,
int nu, Real coeff,
su3_matrix *Path,
685 add_force_to_momentum(Path_numu, Path,
mu, coeff, mom);
689 add_force_to_momentum(Path, Path_numu,
OPP_DIR(
mu), m_coeff, mom);
695 add_force_to_momentum(Path_nu, Path_mu,
mu, m_coeff, mom);
698 add_force_to_momentum(Path_mu, Path_nu,
OPP_DIR(
mu), coeff, mom);
708 #define Pmu tempmat[0]
709 #define Pnumu tempmat[1]
710 #define Prhonumu tempmat[2]
711 #define P7 tempmat[3]
712 #define P7rho tempmat[4]
713 #define P5 tempmat[5]
714 #define P3 tempmat[6]
715 #define P5nu tempmat[3]
716 #define P3mu tempmat[3]
717 #define Popmu tempmat[4]
718 #define Pmumumu tempmat[4]
720 #define Qmu tempmat[7]
721 #define Qnumu tempmat[8]
722 #define Qrhonumu tempmat[2]
728 template<
typename su3_matrix>
731 for(
int i=0; i<3; i++){
732 for(
int j=0; j<3; j++){
741 template<
typename su3_matrix>
742 static void set_identity(
su3_matrix* matrices,
int num_dirs){
743 for(
int i=0; i<
V*num_dirs; i++){
744 set_identity_matrix(&matrices[i]);
749 template <
typename Real,
typename su3_matrix,
typename anti_hermitmat>
755 int mu, nu, rho, sig;
757 Real OneLink, Lepage, FiveSt, ThreeSt, SevenSt;
759 Real mLepage, mFiveSt, mThreeSt, mSevenSt;
762 int sites_on_node =
V;
766 ferm_epsilon = 2.0*weight*eps;
767 OneLink = act_path_coeff[0]*ferm_epsilon ;
769 ThreeSt = act_path_coeff[2]*ferm_epsilon ; mThreeSt = -ThreeSt;
770 FiveSt = act_path_coeff[3]*ferm_epsilon ; mFiveSt = -FiveSt;
771 SevenSt = act_path_coeff[4]*ferm_epsilon ; mSevenSt = -SevenSt;
772 Lepage = act_path_coeff[5]*ferm_epsilon ; mLepage = -Lepage;
799 printf(
"Calling modified hisq\n");
803 for(sig=0; sig < 8; sig++){
807 u_shift_mat(&temp_xx[
OPP_DIR(sig)*
V],
Pmu, sig, sitelink);
808 add_force_to_momentum(
Pmu,
id, sig, OneLink, mom);
814 for(
mu = 0;
mu < 8;
mu++){
831 u_shift_mat(
Pmu,
P3, sig, sitelink);
834 add_force_to_momentum(
P3,
Qmu, sig, mThreeSt, mom);
838 for(nu=0; nu < 8; nu++){
839 if (nu == sig || nu ==
OPP_DIR(sig)
859 u_shift_mat(
Pnumu,
P5, sig, sitelink);
861 add_force_to_momentum(
P5,
Qnumu, sig, FiveSt, mom);
863 for(rho =0; rho < 8; rho++){
864 if (rho == sig || rho ==
OPP_DIR(sig)
866 || rho == nu || rho ==
OPP_DIR(nu)){
880 add_force_to_momentum(
P7,
Qrhonumu, sig, mSevenSt, mom) ;
882 u_shift_mat(
P7,
P7rho, rho, sitelink);
884 if(FiveSt != 0)coeff = SevenSt/FiveSt ;
else coeff = 0;
886 scalar_mult_add_su3_matrix(&
P5[i], &
P7rho[i], coeff, &
P5[i]);
891 u_shift_mat(
P5,
P5nu, nu, sitelink);
894 if(ThreeSt != 0)coeff = FiveSt/ThreeSt;
else coeff = 0;
897 scalar_mult_add_su3_matrix(&
P3[i], &
P5nu[i], coeff, &
P3[i]);
905 u_shift_mat(
Pnumu,
P5, sig, sitelink);
907 add_force_to_momentum(
P5,
Qnumu, sig, Lepage, mom);
910 u_shift_mat(
P5,
P5nu,
mu, sitelink);
914 if(ThreeSt != 0)coeff = Lepage/ThreeSt;
else coeff = 0;
917 scalar_mult_add_su3_matrix(&
P3[i], &
P5nu[i], coeff, &
P3[i]);
921 u_shift_mat(
P3,
P3mu,
mu, sitelink);
923 side_link_force(
mu, sig, ThreeSt,
id,
P3,
Qmu,
P3mu, mom);
935 template <
typename Real,
typename su3_matrix,
typename anti_hermitmat,
typename half_wilson_vector>
937 half_wilson_vector* temp_x, Real* act_path_coeff,
941 int mu, nu, rho, sig;
943 Real OneLink, Lepage, FiveSt, ThreeSt, SevenSt;
945 Real mLepage, mFiveSt, mThreeSt, mSevenSt;
948 int sites_on_node =
V;
952 ferm_epsilon = 2.0*weight*eps;
953 OneLink = act_path_coeff[0]*ferm_epsilon ;
955 ThreeSt = act_path_coeff[2]*ferm_epsilon ; mThreeSt = -ThreeSt;
956 FiveSt = act_path_coeff[3]*ferm_epsilon ; mFiveSt = -FiveSt;
957 SevenSt = act_path_coeff[4]*ferm_epsilon ; mSevenSt = -SevenSt;
958 Lepage = act_path_coeff[5]*ferm_epsilon ; mLepage = -Lepage;
981 printf(
"Calling hisq reference routine\n");
982 for(sig=0; sig < 8; sig++){
983 shifted_outer_prod(temp_x, temp_mat,
OPP_DIR(sig));
988 u_shift_mat(temp_mat,
Pmu, sig, sitelink);
989 add_force_to_momentum(
Pmu,
id, sig, OneLink, mom);
992 for(
mu = 0;
mu < 8;
mu++){
1009 u_shift_mat(
Pmu,
P3, sig, sitelink);
1012 add_force_to_momentum(
P3,
Qmu, sig, mThreeSt, mom);
1015 for(nu=0; nu < 8; nu++){
1016 if (nu == sig || nu ==
OPP_DIR(sig)
1036 u_shift_mat(
Pnumu,
P5, sig, sitelink);
1038 add_force_to_momentum(
P5,
Qnumu, sig, FiveSt, mom);
1041 for(rho =0; rho < 8; rho++){
1042 if (rho == sig || rho ==
OPP_DIR(sig)
1044 || rho == nu || rho ==
OPP_DIR(nu)){
1058 add_force_to_momentum(
P7,
Qrhonumu, sig, mSevenSt, mom) ;
1061 u_shift_mat(
P7,
P7rho, rho, sitelink);
1063 if(FiveSt != 0)coeff = SevenSt/FiveSt ;
else coeff = 0;
1065 scalar_mult_add_su3_matrix(&
P5[i], &
P7rho[i], coeff, &
P5[i]);
1070 u_shift_mat(
P5,
P5nu, nu, sitelink);
1073 if(ThreeSt != 0)coeff = FiveSt/ThreeSt;
else coeff = 0;
1076 scalar_mult_add_su3_matrix(&
P3[i], &
P5nu[i], coeff, &
P3[i]);
1084 u_shift_mat(
Pnumu,
P5, sig, sitelink);
1086 add_force_to_momentum(
P5,
Qnumu, sig, Lepage, mom);
1089 u_shift_mat(
P5,
P5nu,
mu, sitelink);
1093 if(ThreeSt != 0)coeff = Lepage/ThreeSt;
else coeff = 0;
1096 scalar_mult_add_su3_matrix(&
P3[i], &
P5nu[i], coeff, &
P3[i]);
1100 u_shift_mat(
P3,
P3mu,
mu, sitelink);
1102 side_link_force(
mu, sig, ThreeSt,
id,
P3,
Qmu,
P3mu, mom);
1139 void* act_path_coeff,
void* temp_x,
1140 void* sitelink,
void* mom)
1151 void* act_path_coeff,
void* temp_x,
1152 void* sitelink,
void* mom)
1163 void* act_path_coeff,
void* temp_xx,
1164 void* sitelink,
void* mom)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
enum QudaPrecision_s QudaPrecision
void su3_adjoint(su3_matrix *a, su3_matrix *b)
int neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
void make_anti_hermitian(su3_matrix *m3, anti_hermitmat *ah3)
void print_su3_matrix(su3_matrix *a)
void color_matrix_hisq_force_reference(float eps, float weight, void *act_path_coeff, void *temp_xx, void *sitelink, void *mom)
void halfwilson_hisq_force_reference(float eps, float weight, void *act_path_coeff, void *temp_x, void *sitelink, void *mom)
void computeHisqOuterProduct(void *src, void *dest, QudaPrecision precision)
su3_matrix * get_su3_matrix(int gauge_order, su3_matrix *p, int idx, int dir)
void do_halfwilson_hisq_force_reference(Real eps, Real weight, half_wilson_vector *temp_x, Real *act_path_coeff, su3_matrix *sitelink, anti_hermitmat *mom)
void computeLinkOrderedOuterProduct(void *src, void *dst, QudaPrecision precision, int gauge_order)
void do_color_matrix_hisq_force_reference(Real eps, Real weight, su3_matrix *temp_xx, Real *act_path_coeff, su3_matrix *sitelink, anti_hermitmat *mom)
QudaGaugeFieldOrder gauge_order
#define GOES_BACKWARDS(dir)
#define GOES_FORWARDS(dir)
FloatingPoint< float > Float
Main header file for the QUDA library.
std::complex< real > e[3][3]