17 #define CADD(a,b,c) { (c).real = (a).real + (b).real; \ 18 (c).imag = (a).imag + (b).imag; } 19 #define CMUL(a,b,c) { (c).real = (a).real*(b).real - (a).imag*(b).imag; \ 20 (c).imag = (a).real*(b).imag + (a).imag*(b).real; } 21 #define CSUM(a,b) { (a).real += (b).real; (a).imag += (b).imag; } 24 #define CMULJ_(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \ 25 (c).imag = (a).real*(b).imag - (a).imag*(b).real; } 28 #define CMUL_J(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \ 29 (c).imag = (a).imag*(b).real - (a).real*(b).imag; } 31 #define CONJG(a,b) { (b).real = (a).real; (b).imag = -(a).imag; } 51 float m00im,m11im,m22im;
57 double m00im,m11im,m22im;
65 template<
typename su3_matrix>
69 return (p + 4*idx + dir);
74 errorQuda(
"get_su3_matrix: unsupported ordering scheme!\n");
79 template<
typename su3_matrix>
84 for(i=0;i<3;i++)
for(j=0;j<3;j++){
85 CONJG( a->
e[j][i], b->
e[i][j] );
89 template<
typename su3_matrix>
95 for(i=0;i<3;i++)
for(j=0;j<3;j++){
102 template<
typename su3_matrix,
typename anti_hermitmat>
106 auto temp = (m3->
e[0][0].imag + m3->
e[1][1].imag + m3->
e[2][2].imag)*0.33333333333333333;
107 ah3->m00im = m3->
e[0][0].imag - temp;
108 ah3->m11im = m3->
e[1][1].imag - temp;
109 ah3->m22im = m3->
e[2][2].imag - temp;
110 ah3->m01.real = (m3->
e[0][1].real - m3->
e[1][0].real)*0.5;
111 ah3->m02.real = (m3->
e[0][2].real - m3->
e[2][0].real)*0.5;
112 ah3->m12.real = (m3->
e[1][2].real - m3->
e[2][1].real)*0.5;
113 ah3->m01.imag = (m3->
e[0][1].imag + m3->
e[1][0].imag)*0.5;
114 ah3->m02.imag = (m3->
e[0][2].imag + m3->
e[2][0].imag)*0.5;
115 ah3->m12.imag = (m3->
e[1][2].imag + m3->
e[2][1].imag)*0.5;
119 template <
typename anti_hermitmat,
typename su3_matrix>
124 typename std::remove_reference<decltype(mat_antihermit->m00im)>::type temp1;
125 mat_su3->
e[0][0].imag=mat_antihermit->m00im;
126 mat_su3->
e[0][0].real=0.;
127 mat_su3->
e[1][1].imag=mat_antihermit->m11im;
128 mat_su3->
e[1][1].real=0.;
129 mat_su3->
e[2][2].imag=mat_antihermit->m22im;
130 mat_su3->
e[2][2].real=0.;
131 mat_su3->
e[0][1].imag=mat_antihermit->m01.imag;
132 temp1=mat_antihermit->m01.real;
133 mat_su3->
e[0][1].real=temp1;
134 mat_su3->
e[1][0].real= -temp1;
135 mat_su3->
e[1][0].imag=mat_antihermit->m01.imag;
136 mat_su3->
e[0][2].imag=mat_antihermit->m02.imag;
137 temp1=mat_antihermit->m02.real;
138 mat_su3->
e[0][2].real=temp1;
139 mat_su3->
e[2][0].real= -temp1;
140 mat_su3->
e[2][0].imag=mat_antihermit->m02.imag;
141 mat_su3->
e[1][2].imag=mat_antihermit->m12.imag;
142 temp1=mat_antihermit->m12.real;
143 mat_su3->
e[1][2].real=temp1;
144 mat_su3->
e[2][1].real= -temp1;
145 mat_su3->
e[2][1].imag=mat_antihermit->m12.imag;
148 template <
typename su3_matrix,
typename Float>
155 c->
e[i][j].real = a->
e[i][j].real - s*b->
e[i][j].real;
156 c->
e[i][j].imag = a->
e[i][j].imag - s*b->
e[i][j].imag;
161 template <
typename su3_matrix,
typename Float>
168 c->
e[i][j].real = a->
e[i][j].real + s*b->
e[i][j].real;
169 c->
e[i][j].imag = a->
e[i][j].imag + s*b->
e[i][j].imag;
173 template <
typename su3_matrix,
typename Float>
180 a->
e[i][j].real = a->
e[i][j].real *
s;
181 a->
e[i][j].imag = a->
e[i][j].imag *
s;
186 template<
typename su3_matrix,
typename su3_vector>
191 typename std::remove_reference<decltype(a->
e[0][0])>::type x,y;
195 CMUL( a->
e[i][j] , b->c[j] , y );
201 template<
typename su3_matrix,
typename su3_vector>
206 typename std::remove_reference<decltype(a->
e[0][0])>::type x,y,z;
211 CMUL( z , b->c[j], y );
218 template<
typename su3_vector,
typename su3_matrix>
223 for(i=0;i<3;i++)
for(j=0;j<3;j++){
224 CMUL_J( a->c[i], b->c[j], c->
e[i][j] );
228 template<
typename su3_vector,
typename Real>
235 c->c[i].real = a->c[i].real + s*b->c[i].real;
236 c->c[i].imag = a->c[i].imag + s*b->c[i].imag;
242 template <
typename su3_matrix>
249 printf(
"(%f %f)\t", a->
e[i][j].real, a->
e[i][j].imag);
259 template<
typename su3_matrix>
263 typename std::remove_reference<decltype(c->
e[0][0])>::type x;
264 for(
int i=0; i<3; i++){
265 for(
int j=0; j<3; j++){
266 c->
e[i][j].real = 0.;
267 c->
e[i][j].imag = 0.;
268 for(
int k=0; k<3; k++){
269 CMUL(a->
e[i][k],b->
e[k][j],x);
270 c->
e[i][j].real += x.real;
271 c->
e[i][j].imag += x.imag;
279 template<
typename su3_matrix>
283 typename std::remove_reference<decltype(c->
e[0][0])>::type x;
284 for(
int i=0; i<3; i++){
285 for(
int j=0; j<3; j++){
286 c->
e[i][j].real = 0.;
287 c->
e[i][j].imag = 0.;
288 for(
int k=0; k<3; k++){
290 c->
e[i][j].real += x.real;
291 c->
e[i][j].imag += x.imag;
300 template<
typename su3_matrix>
304 typename std::remove_reference<decltype(c->
e[0][0])>::type x;
305 for(
int i=0; i<3; i++){
306 for(
int j=0; j<3; j++){
307 c->
e[i][j].real = 0.; c->
e[i][j].imag = 0.;
308 for(
int k=0; k<3; k++){
310 c->
e[i][j].real += x.real;
311 c->
e[i][j].imag += x.imag;
318 template<
typename su3_matrix>
327 template <
typename su3_matrix,
typename anti_hermitmat,
typename Float>
340 anti_hermitmat* mom = momentum + 4*i+dir;
353 template<
typename half_wilson_vector,
typename su3_matrix>
360 dx[3]=dx[2]=dx[1]=dx[0]=0;
366 half_wilson_vector*
hw = src + nbr_idx;
375 half_wilson_vector*
hw = src + nbr_idx;
385 template<
typename half_wilson_vector,
typename su3_matrix>
393 dx[3]=dx[2]=dx[1]=dx[0]=0;
401 half_wilson_vector*
hw = src + nbr_idx;
407 template<
typename half_wilson_vector,
typename su3_matrix>
415 dx[3]=dx[2]=dx[1]=dx[0]=0;
423 half_wilson_vector*
hw = src + nbr_idx;
435 template <
typename half_wilson_vector,
typename su3_matrix>
440 for(
int i=0; i<
V; ++i){
441 for(
int dir=0; dir<4; ++dir){
442 dx[3]=dx[2]=dx[1]=dx[0]=0;
445 half_wilson_vector*
hw = src + nbr_idx;
454 template <
typename half_wilson_vector,
typename su3_matrix>
459 for(
int i=0; i<
V; ++i){
460 for(
int dir=0; dir<4; ++dir){
461 dx[3]=dx[2]=dx[1]=dx[0]=0;
464 half_wilson_vector*
hw = src + nbr_idx;
497 template<
typename half_wilson_vector,
typename su3_matrix>
499 for(
int dir=0; dir<4; dir++){
504 template<
typename half_wilson_vector,
typename su3_matrix>
506 for(
int dir=0; dir<4; dir++){
525 template<
typename su3_matrix>
531 dx[3]=dx[2]=dx[1]=dx[0]=0;
559 template <
typename half_wilson_vector,
560 typename anti_hermitmat,
typename Real>
563 int dir, Real coeff[2], anti_hermitmat* momentum)
572 my_coeff[0] = -coeff[0];
573 my_coeff[1] = -coeff[1];
576 my_coeff[0] = coeff[0];
577 my_coeff[1] = coeff[1];
582 tmp_coeff[0] = my_coeff[0];
583 tmp_coeff[1] = my_coeff[1];
585 tmp_coeff[0] = -my_coeff[0];
586 tmp_coeff[1] = -my_coeff[1] ;
589 if (
sizeof(Real) ==
sizeof(
float)){
592 anti_hermitmat* mom = momentum+ 4* i + mydir;
600 anti_hermitmat* mom = momentum+ 4* i + mydir;
610 template<
typename Real,
typename half_wilson_vector,
typename anti_hermitmat>
613 half_wilson_vector *Path_nu, half_wilson_vector *Path_mu,
614 half_wilson_vector *Path_numu, anti_hermitmat* mom)
618 m_coeff[0] = -coeff[0] ;
619 m_coeff[1] = -coeff[1] ;
637 template<
typename Real,
typename su3_matrix,
typename anti_hermitmat>
640 int dir, Real coeff, anti_hermitmat* momentum)
657 if(i<
Vh){ tmp_coeff = my_coeff; }
658 else{ tmp_coeff = -my_coeff; }
663 anti_hermitmat* mom = momentum + 4*i + mydir;
674 template<
typename Real,
typename su3_matrix,
typename anti_hermitmat>
708 #define Pmu tempmat[0] 709 #define Pnumu tempmat[1] 710 #define Prhonumu tempmat[2] 711 #define P7 tempmat[3] 712 #define P7rho tempmat[4] 713 #define P5 tempmat[5] 714 #define P3 tempmat[6] 715 #define P5nu tempmat[3] 716 #define P3mu tempmat[3] 717 #define Popmu tempmat[4] 718 #define Pmumumu tempmat[4] 720 #define Qmu tempmat[7] 721 #define Qnumu tempmat[8] 722 #define Qrhonumu tempmat[2] // same as Prhonumu 728 template<
typename su3_matrix>
731 for(
int i=0; i<3; i++){
732 for(
int j=0; j<3; j++){
741 template<
typename su3_matrix>
743 for(
int i=0; i<
V*num_dirs; i++){
749 template <
typename Real,
typename su3_matrix,
typename anti_hermitmat>
755 int mu, nu, rho, sig;
757 Real OneLink, Lepage, FiveSt, ThreeSt, SevenSt;
759 Real mLepage, mFiveSt, mThreeSt, mSevenSt;
766 ferm_epsilon = 2.0*weight*eps;
767 OneLink = act_path_coeff[0]*ferm_epsilon ;
769 ThreeSt = act_path_coeff[2]*ferm_epsilon ; mThreeSt = -ThreeSt;
770 FiveSt = act_path_coeff[3]*ferm_epsilon ; mFiveSt = -FiveSt;
771 SevenSt = act_path_coeff[4]*ferm_epsilon ; mSevenSt = -SevenSt;
772 Lepage = act_path_coeff[5]*ferm_epsilon ; mLepage = -Lepage;
778 for(mu=0; mu<9; mu++){
799 printf(
"Calling modified hisq\n");
803 for(sig=0; sig < 8; sig++){
814 for(mu = 0; mu < 8; mu++){
815 if ( (mu == sig) || (mu ==
OPP_DIR(sig))){
838 for(nu=0; nu < 8; nu++){
839 if (nu == sig || nu ==
OPP_DIR(sig)
840 || nu == mu || nu ==
OPP_DIR(mu)){
863 for(rho =0; rho < 8; rho++){
864 if (rho == sig || rho ==
OPP_DIR(sig)
865 || rho == mu || rho ==
OPP_DIR(mu)
866 || rho == nu || rho ==
OPP_DIR(nu)){
884 if(FiveSt != 0)coeff = SevenSt/FiveSt ;
else coeff = 0;
894 if(ThreeSt != 0)coeff = FiveSt/ThreeSt;
else coeff = 0;
914 if(ThreeSt != 0)coeff = Lepage/ThreeSt;
else coeff = 0;
935 template <
typename Real,
typename su3_matrix,
typename anti_hermitmat,
typename half_wilson_vector>
937 half_wilson_vector* temp_x, Real* act_path_coeff,
941 int mu, nu, rho, sig;
943 Real OneLink, Lepage, FiveSt, ThreeSt, SevenSt;
945 Real mLepage, mFiveSt, mThreeSt, mSevenSt;
952 ferm_epsilon = 2.0*weight*eps;
953 OneLink = act_path_coeff[0]*ferm_epsilon ;
955 ThreeSt = act_path_coeff[2]*ferm_epsilon ; mThreeSt = -ThreeSt;
956 FiveSt = act_path_coeff[3]*ferm_epsilon ; mFiveSt = -FiveSt;
957 SevenSt = act_path_coeff[4]*ferm_epsilon ; mSevenSt = -SevenSt;
958 Lepage = act_path_coeff[5]*ferm_epsilon ; mLepage = -Lepage;
965 for(mu=0; mu<9; mu++){
981 printf(
"Calling hisq reference routine\n");
982 for(sig=0; sig < 8; sig++){
992 for(mu = 0; mu < 8; mu++){
993 if ( (mu == sig) || (mu ==
OPP_DIR(sig))){
1015 for(nu=0; nu < 8; nu++){
1016 if (nu == sig || nu ==
OPP_DIR(sig)
1017 || nu == mu || nu ==
OPP_DIR(mu)){
1041 for(rho =0; rho < 8; rho++){
1042 if (rho == sig || rho ==
OPP_DIR(sig)
1043 || rho == mu || rho ==
OPP_DIR(mu)
1044 || rho == nu || rho ==
OPP_DIR(nu)){
1063 if(FiveSt != 0)coeff = SevenSt/FiveSt ;
else coeff = 0;
1073 if(ThreeSt != 0)coeff = FiveSt/ThreeSt;
else coeff = 0;
1093 if(ThreeSt != 0)coeff = Lepage/ThreeSt;
else coeff = 0;
1106 for(mu=0; mu<9; mu++){
1139 void* act_path_coeff,
void* temp_x,
1140 void* sitelink,
void* mom)
1151 void* act_path_coeff,
void* temp_x,
1152 void* sitelink,
void* mom)
1163 void* act_path_coeff,
void* temp_xx,
1164 void* sitelink,
void* mom)
static void scalar_mult_add_su3_vector(su3_vector *a, su3_vector *b, Real s, su3_vector *c)
static void add_force_to_momentum(su3_matrix *back, su3_matrix *forw, int dir, Real coeff, anti_hermitmat *momentum)
static void set_identity(su3_matrix *matrices, int num_dirs)
enum QudaPrecision_s QudaPrecision
static void forward_shifted_outer_prod(half_wilson_vector *src, su3_matrix *dest, int dir)
static void scale_su3_matrix(su3_matrix *a, Float s)
static void matrix_mult_na(su3_matrix *a, su3_matrix *b, su3_matrix *c)
int neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
static void set_identity_matrix(su3_matrix *mat)
void halfwilson_hisq_force_reference(float eps, float weight, void *act_path_coeff, void *temp_x, void *sitelink, void *mom)
static void matrix_mult_an(su3_matrix *a, su3_matrix *b, su3_matrix *c)
#define GOES_FORWARDS(dir)
static void add_3f_force_to_mom(half_wilson_vector *back, half_wilson_vector *forw, int dir, Real coeff[2], anti_hermitmat *momentum)
static void matrix_mult_aa(su3_matrix *a, su3_matrix *b, su3_matrix *c)
static void u_shift_hw(half_wilson_vector *src, half_wilson_vector *dest, int dir, su3_matrix *sitelink)
static void su3_adjoint(su3_matrix *a, su3_matrix *b)
static void mult_su3_mat_vec(su3_matrix *a, su3_vector *b, su3_vector *c)
static void print_su3_matrix(su3_matrix *a)
static void matrix_mult_nn(su3_matrix *a, su3_matrix *b, su3_matrix *c)
static void scalar_mult_add_su3_matrix(su3_matrix *a, su3_matrix *b, Float s, su3_matrix *c)
static void computeLinkOrderedOuterProduct(half_wilson_vector *src, su3_matrix *dest, int gauge_order)
static void make_anti_hermitian(su3_matrix *m3, anti_hermitmat *ah3)
static void shiftedOuterProduct(half_wilson_vector *src, su3_matrix *dest)
std::complex< real > e[3][3]
void do_color_matrix_hisq_force_reference(Real eps, Real weight, su3_matrix *temp_xx, Real *act_path_coeff, su3_matrix *sitelink, anti_hermitmat *mom)
static void mult_adj_su3_mat_vec(su3_matrix *a, su3_vector *b, su3_vector *c)
void computeHisqOuterProduct(void *src, void *dest, QudaPrecision precision)
static void scalar_mult_sub_su3_matrix(su3_matrix *a, su3_matrix *b, Float s, su3_matrix *c)
static void update_mom(anti_hermitmat *momentum, int dir, su3_matrix *sitelink, su3_matrix *staple, Float eb3)
QudaGaugeFieldOrder gauge_order
Main header file for the QUDA library.
static void u_shift_mat(su3_matrix *src, su3_matrix *dest, int dir, su3_matrix *sitelink)
static void mult_su3_na(su3_matrix *a, su3_matrix *b, su3_matrix *c)
static void shifted_outer_prod(half_wilson_vector *src, su3_matrix *dest, int dir)
static void adjoint_su3_matrix(su3_matrix *a)
#define GOES_BACKWARDS(dir)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
void do_halfwilson_hisq_force_reference(Real eps, Real weight, half_wilson_vector *temp_x, Real *act_path_coeff, su3_matrix *sitelink, anti_hermitmat *mom)
static void su3_projector(su3_vector *a, su3_vector *b, su3_matrix *c)
void color_matrix_hisq_force_reference(float eps, float weight, void *act_path_coeff, void *temp_xx, void *sitelink, void *mom)
static void side_link_3f_force(int mu, int nu, Real coeff[2], half_wilson_vector *Path, half_wilson_vector *Path_nu, half_wilson_vector *Path_mu, half_wilson_vector *Path_numu, anti_hermitmat *mom)
static void side_link_force(int mu, int nu, Real coeff, su3_matrix *Path, su3_matrix *Path_nu, su3_matrix *Path_mu, su3_matrix *Path_numu, anti_hermitmat *mom)
static void forwardShiftedOuterProduct(half_wilson_vector *src, su3_matrix *dest)
su3_matrix * get_su3_matrix(int gauge_order, su3_matrix *p, int idx, int dir)
static void uncompress_anti_hermitian(anti_hermitmat *mat_antihermit, su3_matrix *mat_su3)