17 #define CADD(a,b,c) { (c).real = (a).real + (b).real; \ 18 (c).imag = (a).imag + (b).imag; } 19 #define CMUL(a,b,c) { (c).real = (a).real*(b).real - (a).imag*(b).imag; \ 20 (c).imag = (a).real*(b).imag + (a).imag*(b).real; } 21 #define CSUM(a,b) { (a).real += (b).real; (a).imag += (b).imag; } 24 #define CMULJ_(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \ 25 (c).imag = (a).real*(b).imag - (a).imag*(b).real; } 28 #define CMUL_J(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \ 29 (c).imag = (a).imag*(b).real - (a).real*(b).imag; } 31 #define CONJG(a,b) { (b).real = (a).real; (b).imag = -(a).imag; } 51 float m00im,m11im,m22im;
57 double m00im,m11im,m22im;
65 template<
typename su3_matrix>
69 return (
p + 4*
idx + dir);
74 errorQuda(
"get_su3_matrix: unsupported ordering scheme!\n");
79 template<
typename su3_matrix>
84 for(
i=0;
i<3;
i++)
for(j=0;j<3;j++){
89 template<
typename su3_matrix>
95 for(
i=0;
i<3;
i++)
for(j=0;j<3;j++){
102 template<
typename su3_matrix,
typename anti_hermitmat>
106 auto temp = (m3->
e[0][0].imag + m3->
e[1][1].imag + m3->
e[2][2].imag)*0.33333333333333333;
107 ah3->m00im = m3->
e[0][0].imag - temp;
108 ah3->m11im = m3->
e[1][1].imag - temp;
109 ah3->m22im = m3->
e[2][2].imag - temp;
110 ah3->m01.real = (m3->
e[0][1].real - m3->
e[1][0].real)*0.5;
111 ah3->m02.real = (m3->
e[0][2].real - m3->
e[2][0].real)*0.5;
112 ah3->m12.real = (m3->
e[1][2].real - m3->
e[2][1].real)*0.5;
113 ah3->m01.imag = (m3->
e[0][1].imag + m3->
e[1][0].imag)*0.5;
114 ah3->m02.imag = (m3->
e[0][2].imag + m3->
e[2][0].imag)*0.5;
115 ah3->m12.imag = (m3->
e[1][2].imag + m3->
e[2][1].imag)*0.5;
119 template <
typename anti_hermitmat,
typename su3_matrix>
124 typename std::remove_reference<decltype(mat_antihermit->m00im)>::type temp1;
125 mat_su3->
e[0][0].imag=mat_antihermit->m00im;
126 mat_su3->
e[0][0].real=0.;
127 mat_su3->
e[1][1].imag=mat_antihermit->m11im;
128 mat_su3->
e[1][1].real=0.;
129 mat_su3->
e[2][2].imag=mat_antihermit->m22im;
130 mat_su3->
e[2][2].real=0.;
131 mat_su3->
e[0][1].imag=mat_antihermit->m01.imag;
132 temp1=mat_antihermit->m01.real;
133 mat_su3->
e[0][1].real=temp1;
134 mat_su3->
e[1][0].real= -temp1;
135 mat_su3->
e[1][0].imag=mat_antihermit->m01.imag;
136 mat_su3->
e[0][2].imag=mat_antihermit->m02.imag;
137 temp1=mat_antihermit->m02.real;
138 mat_su3->
e[0][2].real=temp1;
139 mat_su3->
e[2][0].real= -temp1;
140 mat_su3->
e[2][0].imag=mat_antihermit->m02.imag;
141 mat_su3->
e[1][2].imag=mat_antihermit->m12.imag;
142 temp1=mat_antihermit->m12.real;
143 mat_su3->
e[1][2].real=temp1;
144 mat_su3->
e[2][1].real= -temp1;
145 mat_su3->
e[2][1].imag=mat_antihermit->m12.imag;
148 template <
typename su3_matrix,
typename Float>
155 c->e[
i][j].real =
a->e[
i][j].real -
s*
b->e[
i][j].real;
156 c->e[
i][j].imag =
a->e[
i][j].imag -
s*
b->e[
i][j].imag;
161 template <
typename su3_matrix,
typename Float>
168 c->e[
i][j].real =
a->e[
i][j].real +
s*
b->e[
i][j].real;
169 c->e[
i][j].imag =
a->e[
i][j].imag +
s*
b->e[
i][j].imag;
173 template <
typename su3_matrix,
typename Float>
180 a->e[
i][j].real =
a->e[
i][j].real *
s;
181 a->e[
i][j].imag =
a->e[
i][j].imag *
s;
186 template<
typename su3_matrix,
typename su3_vector>
191 typename std::remove_reference<decltype(
a->e[0][0])>::type
x,
y;
201 template<
typename su3_matrix,
typename su3_vector>
206 typename std::remove_reference<decltype(
a->e[0][0])>::type
x,
y,
z;
218 template<
typename su3_vector,
typename su3_matrix>
223 for(
i=0;
i<3;
i++)
for(j=0;j<3;j++){
228 template<
typename su3_vector,
typename Real>
235 c->c[
i].real =
a->c[
i].real +
s*
b->c[
i].real;
236 c->c[
i].imag =
a->c[
i].imag +
s*
b->c[
i].imag;
242 template <
typename su3_matrix>
249 printf(
"(%f %f)\t",
a->e[
i][j].real,
a->e[
i][j].imag);
259 template<
typename su3_matrix>
263 typename std::remove_reference<decltype(
c->e[0][0])>::type
x;
264 for(
int i=0;
i<3;
i++){
265 for(
int j=0; j<3; j++){
266 c->e[
i][j].real = 0.;
267 c->e[
i][j].imag = 0.;
268 for(
int k=0; k<3; k++){
270 c->e[
i][j].real +=
x.real;
271 c->e[
i][j].imag +=
x.imag;
279 template<
typename su3_matrix>
283 typename std::remove_reference<decltype(
c->e[0][0])>::type
x;
284 for(
int i=0;
i<3;
i++){
285 for(
int j=0; j<3; j++){
286 c->e[
i][j].real = 0.;
287 c->e[
i][j].imag = 0.;
288 for(
int k=0; k<3; k++){
290 c->e[
i][j].real +=
x.real;
291 c->e[
i][j].imag +=
x.imag;
300 template<
typename su3_matrix>
304 typename std::remove_reference<decltype(
c->e[0][0])>::type
x;
305 for(
int i=0;
i<3;
i++){
306 for(
int j=0; j<3; j++){
307 c->e[
i][j].real = 0.;
c->e[
i][j].imag = 0.;
308 for(
int k=0; k<3; k++){
310 c->e[
i][j].real +=
x.real;
311 c->e[
i][j].imag +=
x.imag;
318 template<
typename su3_matrix>
327 template <
typename su3_matrix,
typename anti_hermitmat,
typename Float>
340 anti_hermitmat* mom = momentum + 4*
i+dir;
353 template<
typename half_wilson_vector,
typename su3_matrix>
360 dx[3]=dx[2]=dx[1]=dx[0]=0;
366 half_wilson_vector*
hw =
src + nbr_idx;
375 half_wilson_vector*
hw =
src + nbr_idx;
385 template<
typename half_wilson_vector,
typename su3_matrix>
393 dx[3]=dx[2]=dx[1]=dx[0]=0;
401 half_wilson_vector*
hw =
src + nbr_idx;
407 template<
typename half_wilson_vector,
typename su3_matrix>
415 dx[3]=dx[2]=dx[1]=dx[0]=0;
423 half_wilson_vector*
hw =
src + nbr_idx;
435 template <
typename half_wilson_vector,
typename su3_matrix>
440 for(
int i=0;
i<
V; ++
i){
441 for(
int dir=0; dir<4; ++dir){
442 dx[3]=dx[2]=dx[1]=dx[0]=0;
445 half_wilson_vector*
hw =
src + nbr_idx;
454 template <
typename half_wilson_vector,
typename su3_matrix>
459 for(
int i=0;
i<
V; ++
i){
460 for(
int dir=0; dir<4; ++dir){
461 dx[3]=dx[2]=dx[1]=dx[0]=0;
464 half_wilson_vector*
hw =
src + nbr_idx;
497 template<
typename half_wilson_vector,
typename su3_matrix>
499 for(
int dir=0; dir<4; dir++){
504 template<
typename half_wilson_vector,
typename su3_matrix>
506 for(
int dir=0; dir<4; dir++){
525 template<
typename su3_matrix>
531 dx[3]=dx[2]=dx[1]=dx[0]=0;
559 template <
typename half_wilson_vector,
560 typename anti_hermitmat,
typename Real>
563 int dir, Real
coeff[2], anti_hermitmat* momentum)
572 my_coeff[0] = -
coeff[0];
573 my_coeff[1] = -
coeff[1];
576 my_coeff[0] =
coeff[0];
577 my_coeff[1] =
coeff[1];
582 tmp_coeff[0] = my_coeff[0];
583 tmp_coeff[1] = my_coeff[1];
585 tmp_coeff[0] = -my_coeff[0];
586 tmp_coeff[1] = -my_coeff[1] ;
589 if (
sizeof(Real) ==
sizeof(
float)){
592 anti_hermitmat* mom = momentum+ 4*
i + mydir;
600 anti_hermitmat* mom = momentum+ 4*
i + mydir;
610 template<
typename Real,
typename half_wilson_vector,
typename anti_hermitmat>
613 half_wilson_vector *Path_nu, half_wilson_vector *Path_mu,
614 half_wilson_vector *Path_numu, anti_hermitmat* mom)
618 m_coeff[0] = -
coeff[0] ;
619 m_coeff[1] = -
coeff[1] ;
637 template<
typename Real,
typename su3_matrix,
typename anti_hermitmat>
640 int dir, Real
coeff, anti_hermitmat* momentum)
657 if(
i<
Vh){ tmp_coeff = my_coeff; }
658 else{ tmp_coeff = -my_coeff; }
663 anti_hermitmat* mom = momentum + 4*
i + mydir;
674 template<
typename Real,
typename su3_matrix,
typename anti_hermitmat>
708 #define Pmu tempmat[0] 709 #define Pnumu tempmat[1] 710 #define Prhonumu tempmat[2] 711 #define P7 tempmat[3] 712 #define P7rho tempmat[4] 713 #define P5 tempmat[5] 714 #define P3 tempmat[6] 715 #define P5nu tempmat[3] 716 #define P3mu tempmat[3] 717 #define Popmu tempmat[4] 718 #define Pmumumu tempmat[4] 720 #define Qmu tempmat[7] 721 #define Qnumu tempmat[8] 722 #define Qrhonumu tempmat[2] // same as Prhonumu 728 template<
typename su3_matrix>
731 for(
int i=0;
i<3;
i++){
732 for(
int j=0; j<3; j++){
741 template<
typename su3_matrix>
743 for(
int i=0;
i<
V*num_dirs;
i++){
749 template <
typename Real,
typename su3_matrix,
typename anti_hermitmat>
755 int mu, nu, rho, sig;
757 Real OneLink, Lepage, FiveSt, ThreeSt, SevenSt;
759 Real mLepage, mFiveSt, mThreeSt, mSevenSt;
766 ferm_epsilon = 2.0*weight*eps;
767 OneLink = act_path_coeff[0]*ferm_epsilon ;
769 ThreeSt = act_path_coeff[2]*ferm_epsilon ; mThreeSt = -ThreeSt;
770 FiveSt = act_path_coeff[3]*ferm_epsilon ; mFiveSt = -FiveSt;
771 SevenSt = act_path_coeff[4]*ferm_epsilon ; mSevenSt = -SevenSt;
772 Lepage = act_path_coeff[5]*ferm_epsilon ; mLepage = -Lepage;
799 printf(
"Calling modified hisq\n");
803 for(sig=0; sig < 8; sig++){
814 for(
mu = 0;
mu < 8;
mu++){
838 for(nu=0; nu < 8; nu++){
839 if (nu == sig || nu ==
OPP_DIR(sig)
863 for(rho =0; rho < 8; rho++){
864 if (rho == sig || rho ==
OPP_DIR(sig)
866 || rho == nu || rho ==
OPP_DIR(nu)){
884 if(FiveSt != 0)
coeff = SevenSt/FiveSt ;
else coeff = 0;
894 if(ThreeSt != 0)
coeff = FiveSt/ThreeSt;
else coeff = 0;
914 if(ThreeSt != 0)
coeff = Lepage/ThreeSt;
else coeff = 0;
935 template <
typename Real,
typename su3_matrix,
typename anti_hermitmat,
typename half_wilson_vector>
937 half_wilson_vector* temp_x, Real* act_path_coeff,
941 int mu, nu, rho, sig;
943 Real OneLink, Lepage, FiveSt, ThreeSt, SevenSt;
945 Real mLepage, mFiveSt, mThreeSt, mSevenSt;
952 ferm_epsilon = 2.0*weight*eps;
953 OneLink = act_path_coeff[0]*ferm_epsilon ;
955 ThreeSt = act_path_coeff[2]*ferm_epsilon ; mThreeSt = -ThreeSt;
956 FiveSt = act_path_coeff[3]*ferm_epsilon ; mFiveSt = -FiveSt;
957 SevenSt = act_path_coeff[4]*ferm_epsilon ; mSevenSt = -SevenSt;
958 Lepage = act_path_coeff[5]*ferm_epsilon ; mLepage = -Lepage;
981 printf(
"Calling hisq reference routine\n");
982 for(sig=0; sig < 8; sig++){
992 for(
mu = 0;
mu < 8;
mu++){
1015 for(nu=0; nu < 8; nu++){
1016 if (nu == sig || nu ==
OPP_DIR(sig)
1041 for(rho =0; rho < 8; rho++){
1042 if (rho == sig || rho ==
OPP_DIR(sig)
1044 || rho == nu || rho ==
OPP_DIR(nu)){
1063 if(FiveSt != 0)
coeff = SevenSt/FiveSt ;
else coeff = 0;
1073 if(ThreeSt != 0)
coeff = FiveSt/ThreeSt;
else coeff = 0;
1093 if(ThreeSt != 0)
coeff = Lepage/ThreeSt;
else coeff = 0;
1139 void* act_path_coeff,
void* temp_x,
1140 void* sitelink,
void* mom)
1151 void* act_path_coeff,
void* temp_x,
1152 void* sitelink,
void* mom)
1163 void* act_path_coeff,
void* temp_xx,
1164 void* sitelink,
void* mom)
static void scalar_mult_add_su3_vector(su3_vector *a, su3_vector *b, Real s, su3_vector *c)
static void add_force_to_momentum(su3_matrix *back, su3_matrix *forw, int dir, Real coeff, anti_hermitmat *momentum)
static void set_identity(su3_matrix *matrices, int num_dirs)
enum QudaPrecision_s QudaPrecision
static void forward_shifted_outer_prod(half_wilson_vector *src, su3_matrix *dest, int dir)
static void scale_su3_matrix(su3_matrix *a, Float s)
static void matrix_mult_na(su3_matrix *a, su3_matrix *b, su3_matrix *c)
int neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
static void set_identity_matrix(su3_matrix *mat)
void halfwilson_hisq_force_reference(float eps, float weight, void *act_path_coeff, void *temp_x, void *sitelink, void *mom)
static void matrix_mult_an(su3_matrix *a, su3_matrix *b, su3_matrix *c)
#define GOES_FORWARDS(dir)
static void add_3f_force_to_mom(half_wilson_vector *back, half_wilson_vector *forw, int dir, Real coeff[2], anti_hermitmat *momentum)
static void matrix_mult_aa(su3_matrix *a, su3_matrix *b, su3_matrix *c)
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
static void u_shift_hw(half_wilson_vector *src, half_wilson_vector *dest, int dir, su3_matrix *sitelink)
int printf(const char *,...) __attribute__((__format__(__printf__
static void su3_adjoint(su3_matrix *a, su3_matrix *b)
static void mult_su3_mat_vec(su3_matrix *a, su3_vector *b, su3_vector *c)
static void print_su3_matrix(su3_matrix *a)
static void matrix_mult_nn(su3_matrix *a, su3_matrix *b, su3_matrix *c)
static void scalar_mult_add_su3_matrix(su3_matrix *a, su3_matrix *b, Float s, su3_matrix *c)
def id
projector matrices ######################################################################## ...
static __inline__ size_t p
static void computeLinkOrderedOuterProduct(half_wilson_vector *src, su3_matrix *dest, int gauge_order)
static void make_anti_hermitian(su3_matrix *m3, anti_hermitmat *ah3)
static void shiftedOuterProduct(half_wilson_vector *src, su3_matrix *dest)
static __inline__ size_t h
std::complex< real > e[3][3]
void do_color_matrix_hisq_force_reference(Real eps, Real weight, su3_matrix *temp_xx, Real *act_path_coeff, su3_matrix *sitelink, anti_hermitmat *mom)
static void mult_adj_su3_mat_vec(su3_matrix *a, su3_vector *b, su3_vector *c)
void computeHisqOuterProduct(void *src, void *dest, QudaPrecision precision)
static void scalar_mult_sub_su3_matrix(su3_matrix *a, su3_matrix *b, Float s, su3_matrix *c)
static void update_mom(anti_hermitmat *momentum, int dir, su3_matrix *sitelink, su3_matrix *staple, Float eb3)
QudaGaugeFieldOrder gauge_order
Main header file for the QUDA library.
static void u_shift_mat(su3_matrix *src, su3_matrix *dest, int dir, su3_matrix *sitelink)
static void mult_su3_na(su3_matrix *a, su3_matrix *b, su3_matrix *c)
static void shifted_outer_prod(half_wilson_vector *src, su3_matrix *dest, int dir)
static void adjoint_su3_matrix(su3_matrix *a)
#define GOES_BACKWARDS(dir)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
void do_halfwilson_hisq_force_reference(Real eps, Real weight, half_wilson_vector *temp_x, Real *act_path_coeff, su3_matrix *sitelink, anti_hermitmat *mom)
static void su3_projector(su3_vector *a, su3_vector *b, su3_matrix *c)
void color_matrix_hisq_force_reference(float eps, float weight, void *act_path_coeff, void *temp_xx, void *sitelink, void *mom)
static void side_link_3f_force(int mu, int nu, Real coeff[2], half_wilson_vector *Path, half_wilson_vector *Path_nu, half_wilson_vector *Path_mu, half_wilson_vector *Path_numu, anti_hermitmat *mom)
static void side_link_force(int mu, int nu, Real coeff, su3_matrix *Path, su3_matrix *Path_nu, su3_matrix *Path_mu, su3_matrix *Path_numu, anti_hermitmat *mom)
static void forwardShiftedOuterProduct(half_wilson_vector *src, su3_matrix *dest)
su3_matrix * get_su3_matrix(int gauge_order, su3_matrix *p, int idx, int dir)
static void uncompress_anti_hermitian(anti_hermitmat *mat_antihermit, su3_matrix *mat_su3)