QUDA v0.4.0
A library for QCD on GPUs
|
00001 #include <stdio.h> 00002 #include <stdlib.h> 00003 #include <math.h> 00004 #include <string.h> 00005 00006 #include "quda.h" 00007 #include "test_util.h" 00008 #include "misc.h" 00009 #include "hisq_force_reference.h" 00010 00011 extern int Z[4]; 00012 extern int V; 00013 extern int Vh; 00014 00015 00016 #define CADD(a,b,c) { (c).real = (a).real + (b).real; \ 00017 (c).imag = (a).imag + (b).imag; } 00018 #define CMUL(a,b,c) { (c).real = (a).real*(b).real - (a).imag*(b).imag; \ 00019 (c).imag = (a).real*(b).imag + (a).imag*(b).real; } 00020 #define CSUM(a,b) { (a).real += (b).real; (a).imag += (b).imag; } 00021 00022 /* c = a* * b */ 00023 #define CMULJ_(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \ 00024 (c).imag = (a).real*(b).imag - (a).imag*(b).real; } 00025 00026 /* c = a * b* */ 00027 #define CMUL_J(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \ 00028 (c).imag = (a).imag*(b).real - (a).real*(b).imag; } 00029 00030 #define CONJG(a,b) { (b).real = (a).real; (b).imag = -(a).imag; } 00031 00032 typedef struct { 00033 float real; 00034 float imag; 00035 } fcomplex; 00036 00037 /* specific for double complex */ 00038 typedef struct { 00039 double real; 00040 double imag; 00041 } dcomplex; 00042 00043 typedef struct { fcomplex e[3][3]; } fsu3_matrix; 00044 typedef struct { fcomplex c[3]; } fsu3_vector; 00045 typedef struct { dcomplex e[3][3]; } dsu3_matrix; 00046 typedef struct { dcomplex c[3]; } dsu3_vector; 00047 00048 typedef struct { 00049 fcomplex m01,m02,m12; 00050 float m00im,m11im,m22im; 00051 float space; 00052 } fanti_hermitmat; 00053 00054 typedef struct { 00055 dcomplex m01,m02,m12; 00056 double m00im,m11im,m22im; 00057 double space; 00058 } danti_hermitmat; 00059 00060 typedef struct { fsu3_vector h[2]; } fhalf_wilson_vector; 00061 typedef struct { dsu3_vector h[2]; } dhalf_wilson_vector; 00062 00063 00064 template<typename su3_matrix> 00065 static void 00066 su3_adjoint( su3_matrix *a, su3_matrix *b ) 00067 { 00068 int i,j; 00069 for(i=0;i<3;i++)for(j=0;j<3;j++){ 00070 CONJG( a->e[j][i], b->e[i][j] ); 00071 } 00072 } 00073 00074 00075 template<typename su3_matrix, typename anti_hermitmat> 00076 static void 00077 make_anti_hermitian( su3_matrix *m3, anti_hermitmat *ah3 ) 00078 { 00079 00080 typeof(ah3->m00im) temp = 00081 (m3->e[0][0].imag + m3->e[1][1].imag + m3->e[2][2].imag)*0.33333333333333333; 00082 ah3->m00im = m3->e[0][0].imag - temp; 00083 ah3->m11im = m3->e[1][1].imag - temp; 00084 ah3->m22im = m3->e[2][2].imag - temp; 00085 ah3->m01.real = (m3->e[0][1].real - m3->e[1][0].real)*0.5; 00086 ah3->m02.real = (m3->e[0][2].real - m3->e[2][0].real)*0.5; 00087 ah3->m12.real = (m3->e[1][2].real - m3->e[2][1].real)*0.5; 00088 ah3->m01.imag = (m3->e[0][1].imag + m3->e[1][0].imag)*0.5; 00089 ah3->m02.imag = (m3->e[0][2].imag + m3->e[2][0].imag)*0.5; 00090 ah3->m12.imag = (m3->e[1][2].imag + m3->e[2][1].imag)*0.5; 00091 00092 } 00093 00094 template <typename anti_hermitmat, typename su3_matrix> 00095 static void 00096 uncompress_anti_hermitian(anti_hermitmat *mat_antihermit, 00097 su3_matrix *mat_su3 ) 00098 { 00099 typeof(mat_antihermit->m00im) temp1; 00100 mat_su3->e[0][0].imag=mat_antihermit->m00im; 00101 mat_su3->e[0][0].real=0.; 00102 mat_su3->e[1][1].imag=mat_antihermit->m11im; 00103 mat_su3->e[1][1].real=0.; 00104 mat_su3->e[2][2].imag=mat_antihermit->m22im; 00105 mat_su3->e[2][2].real=0.; 00106 mat_su3->e[0][1].imag=mat_antihermit->m01.imag; 00107 temp1=mat_antihermit->m01.real; 00108 mat_su3->e[0][1].real=temp1; 00109 mat_su3->e[1][0].real= -temp1; 00110 mat_su3->e[1][0].imag=mat_antihermit->m01.imag; 00111 mat_su3->e[0][2].imag=mat_antihermit->m02.imag; 00112 temp1=mat_antihermit->m02.real; 00113 mat_su3->e[0][2].real=temp1; 00114 mat_su3->e[2][0].real= -temp1; 00115 mat_su3->e[2][0].imag=mat_antihermit->m02.imag; 00116 mat_su3->e[1][2].imag=mat_antihermit->m12.imag; 00117 temp1=mat_antihermit->m12.real; 00118 mat_su3->e[1][2].real=temp1; 00119 mat_su3->e[2][1].real= -temp1; 00120 mat_su3->e[2][1].imag=mat_antihermit->m12.imag; 00121 } 00122 00123 template <typename su3_matrix, typename Float> 00124 static void 00125 scalar_mult_sub_su3_matrix(su3_matrix *a,su3_matrix *b, Float s, su3_matrix *c) 00126 { 00127 int i,j; 00128 for(i=0;i<3;i++){ 00129 for(j=0;j<3;j++){ 00130 c->e[i][j].real = a->e[i][j].real - s*b->e[i][j].real; 00131 c->e[i][j].imag = a->e[i][j].imag - s*b->e[i][j].imag; 00132 } 00133 } 00134 } 00135 00136 template <typename su3_matrix, typename Float> 00137 static void 00138 scalar_mult_add_su3_matrix(su3_matrix *a,su3_matrix *b, Float s, su3_matrix *c) 00139 { 00140 int i,j; 00141 for(i=0;i<3;i++){ 00142 for(j=0;j<3;j++){ 00143 c->e[i][j].real = a->e[i][j].real + s*b->e[i][j].real; 00144 c->e[i][j].imag = a->e[i][j].imag + s*b->e[i][j].imag; 00145 } 00146 } 00147 } 00148 00149 00150 template<typename su3_matrix, typename su3_vector> 00151 static void 00152 mult_su3_mat_vec( su3_matrix *a, su3_vector *b, su3_vector *c ) 00153 { 00154 int i,j; 00155 typeof(a->e[0][0]) x,y; 00156 for(i=0;i<3;i++){ 00157 x.real=x.imag=0.0; 00158 for(j=0;j<3;j++){ 00159 CMUL( a->e[i][j] , b->c[j] , y ); 00160 CSUM( x , y ); 00161 } 00162 c->c[i]=x; 00163 } 00164 } 00165 template<typename su3_matrix, typename su3_vector> 00166 static void 00167 mult_adj_su3_mat_vec( su3_matrix *a, su3_vector *b, su3_vector *c ) 00168 { 00169 int i,j; 00170 typeof(a->e[0][0]) x,y,z; 00171 for(i=0;i<3;i++){ 00172 x.real=x.imag=0.0; 00173 for(j=0;j<3;j++){ 00174 CONJG( a->e[j][i], z ); 00175 CMUL( z , b->c[j], y ); 00176 CSUM( x , y ); 00177 } 00178 c->c[i] = x; 00179 } 00180 } 00181 00182 template<typename su3_vector, typename su3_matrix> 00183 static void 00184 su3_projector( su3_vector *a, su3_vector *b, su3_matrix *c ) 00185 { 00186 int i,j; 00187 for(i=0;i<3;i++)for(j=0;j<3;j++){ 00188 CMUL_J( a->c[i], b->c[j], c->e[i][j] ); 00189 } 00190 } 00191 00192 template<typename su3_vector, typename Real> 00193 static void 00194 scalar_mult_add_su3_vector(su3_vector *a, su3_vector *b, Real s, 00195 su3_vector *c) 00196 { 00197 int i; 00198 for(i=0;i<3;i++){ 00199 c->c[i].real = a->c[i].real + s*b->c[i].real; 00200 c->c[i].imag = a->c[i].imag + s*b->c[i].imag; 00201 } 00202 } 00203 00204 00205 00206 template < typename su3_matrix> 00207 static void 00208 print_su3_matrix(su3_matrix *a) 00209 { 00210 int i, j; 00211 for(i=0;i < 3; i++){ 00212 for(j=0;j < 3;j++){ 00213 printf("(%f %f)\t", a->e[i][j].real, a->e[i][j].imag); 00214 } 00215 printf("\n"); 00216 } 00217 00218 } 00219 00220 00221 00222 // Add a matrix multiplication function 00223 template<typename su3_matrix> 00224 static void 00225 matrix_mult_nn(su3_matrix* a, su3_matrix* b, su3_matrix* c){ 00226 // c = a*b 00227 typeof(c->e[0][0]) x; 00228 for(int i=0; i<3; i++){ 00229 for(int j=0; j<3; j++){ 00230 c->e[i][j].real = 0.; 00231 c->e[i][j].imag = 0.; 00232 for(int k=0; k<3; k++){ 00233 CMUL(a->e[i][k],b->e[k][j],x); 00234 c->e[i][j].real += x.real; 00235 c->e[i][j].imag += x.imag; 00236 } 00237 } 00238 } 00239 return; 00240 } 00241 00242 00243 template<typename su3_matrix> 00244 static void 00245 matrix_mult_an(su3_matrix* a, su3_matrix* b, su3_matrix* c){ 00246 // c = (a^{\dagger})*b 00247 typeof(c->e[0][0]) x; 00248 for(int i=0; i<3; i++){ 00249 for(int j=0; j<3; j++){ 00250 c->e[i][j].real = 0.; 00251 c->e[i][j].imag = 0.; 00252 for(int k=0; k<3; k++){ 00253 CMULJ_(a->e[k][i],b->e[k][j],x); 00254 c->e[i][j].real += x.real; 00255 c->e[i][j].imag += x.imag; 00256 } 00257 } 00258 } 00259 return; 00260 } 00261 00262 00263 00264 template<typename su3_matrix> 00265 static void 00266 matrix_mult_na(su3_matrix* a, su3_matrix* b, su3_matrix* c){ 00267 // c = a*b^{\dagger} 00268 typeof(c->e[0][0]) x; 00269 for(int i=0; i<3; i++){ 00270 for(int j=0; j<3; j++){ 00271 c->e[i][j].real = 0.; c->e[i][j].imag = 0.; 00272 for(int k=0; k<3; k++){ 00273 CMUL_J(a->e[i][k],b->e[j][k],x); 00274 c->e[i][j].real += x.real; 00275 c->e[i][j].imag += x.imag; 00276 } 00277 } 00278 } 00279 return; 00280 } 00281 00282 00283 00284 00285 template <typename su3_matrix, typename anti_hermitmat, typename Float> 00286 static void 00287 update_mom(anti_hermitmat* momentum, int dir, su3_matrix* sitelink, 00288 su3_matrix* staple, Float eb3) 00289 { 00290 int i; 00291 for(i=0;i <V; i++){ 00292 su3_matrix tmat1; 00293 su3_matrix tmat2; 00294 su3_matrix tmat3; 00295 00296 su3_matrix* lnk = sitelink + 4*i+dir; 00297 su3_matrix* stp = staple + i; 00298 anti_hermitmat* mom = momentum + 4*i+dir; 00299 00300 mult_su3_na(lnk, stp, &tmat1); 00301 uncompress_anti_hermitian(mom, &tmat2); 00302 00303 scalar_mult_sub_su3_matrix(&tmat2, &tmat1, eb3, &tmat3); 00304 make_anti_hermitian(&tmat3, mom); 00305 00306 } 00307 00308 } 00309 00310 00311 template<typename half_wilson_vector, typename su3_matrix> 00312 static void 00313 u_shift_hw(half_wilson_vector *src, half_wilson_vector *dest, int dir, su3_matrix* sitelink ) 00314 { 00315 int i ; 00316 int dx[4]; 00317 00318 dx[3]=dx[2]=dx[1]=dx[0]=0; 00319 00320 if(GOES_FORWARDS(dir)){ 00321 dx[dir]=1; 00322 for(i=0;i < V; i++){ 00323 int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]); 00324 half_wilson_vector* hw = src + nbr_idx; 00325 su3_matrix* link = sitelink + i*4 + dir; 00326 mult_su3_mat_vec(link, &hw->h[0], &dest[i].h[0]); 00327 mult_su3_mat_vec(link, &hw->h[1], &dest[i].h[1]); 00328 } 00329 }else{ 00330 dx[OPP_DIR(dir)]=-1; 00331 for(i=0;i < V; i++){ 00332 int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]); 00333 half_wilson_vector* hw = src + nbr_idx; 00334 su3_matrix* link = sitelink + nbr_idx*4 + OPP_DIR(dir); 00335 mult_adj_su3_mat_vec(link, &hw->h[0], &dest[i].h[0]); 00336 mult_adj_su3_mat_vec(link, &hw->h[1], &dest[i].h[1]); 00337 } 00338 } 00339 } 00340 00341 00342 00343 template<typename half_wilson_vector, typename su3_matrix> 00344 static void 00345 shifted_outer_prod(half_wilson_vector *src, su3_matrix* dest, int dir) 00346 { 00347 00348 int i; 00349 int dx[4]; 00350 00351 dx[3]=dx[2]=dx[1]=dx[0]=0; 00352 00353 if(GOES_FORWARDS(dir)){ 00354 dx[dir]=1; 00355 }else{ dx[OPP_DIR(dir)]=-1; } 00356 00357 for(i=0;i < V; i++){ 00358 int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]); 00359 half_wilson_vector* hw = src + nbr_idx; 00360 su3_projector( &src[i].h[0], &(hw->h[0]), &dest[i]); 00361 } 00362 } 00363 00364 00365 template<typename half_wilson_vector, typename su3_matrix> 00366 static void 00367 forward_shifted_outer_prod(half_wilson_vector *src, su3_matrix* dest, int dir) 00368 { 00369 00370 int i; 00371 int dx[4]; 00372 00373 dx[3]=dx[2]=dx[1]=dx[0]=0; 00374 00375 if(GOES_FORWARDS(dir)){ 00376 dx[dir]=1; 00377 }else{ dx[OPP_DIR(dir)]=-1; } 00378 00379 for(i=0;i < V; i++){ 00380 int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]); 00381 half_wilson_vector* hw = src + nbr_idx; 00382 //su3_projector( &src[i].h[0], &(hw->h[0]), &dest[i]); 00383 su3_projector( &(hw->h[0]), &src[i].h[0], &dest[i]); 00384 } 00385 00386 return; 00387 } 00388 00389 00390 00391 template <typename half_wilson_vector, typename su3_matrix> 00392 static void 00393 computeLinkOrderedOuterProduct(half_wilson_vector *src, su3_matrix* dest) 00394 { 00395 int dx[4]; 00396 for(int i=0; i<V; ++i){ 00397 for(int dir=0; dir<4; ++dir){ 00398 dx[3]=dx[2]=dx[1]=dx[0]=0; 00399 dx[dir] = 1; 00400 int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]); 00401 half_wilson_vector* hw = src + nbr_idx; 00402 su3_projector( &(hw->h[0]), &src[i].h[0], &dest[i*4 + dir]); 00403 } // dir 00404 } // i 00405 return; 00406 } 00407 00408 00409 template <typename half_wilson_vector, typename su3_matrix> 00410 static void 00411 computeLinkOrderedOuterProduct(half_wilson_vector *src, su3_matrix* dest, size_t nhops) 00412 { 00413 int dx[4]; 00414 for(int i=0; i<V; ++i){ 00415 for(int dir=0; dir<4; ++dir){ 00416 dx[3]=dx[2]=dx[1]=dx[0]=0; 00417 dx[dir] = nhops; 00418 int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]); 00419 half_wilson_vector* hw = src + nbr_idx; 00420 su3_projector( &(hw->h[0]), &src[i].h[0], &dest[i*4 + dir]); 00421 } // dir 00422 } // i 00423 return; 00424 } 00425 00426 00427 00428 00429 void computeLinkOrderedOuterProduct(void *src, void *dst, QudaPrecision precision) 00430 { 00431 if(precision == QUDA_SINGLE_PRECISION){ 00432 computeLinkOrderedOuterProduct((fhalf_wilson_vector*)src,(fsu3_matrix*)dst); 00433 }else{ 00434 computeLinkOrderedOuterProduct((dhalf_wilson_vector*)src,(dsu3_matrix*)dst); 00435 } 00436 return; 00437 } 00438 00439 void computeLinkOrderedOuterProduct(void *src, void *dst, QudaPrecision precision, size_t nhops) 00440 { 00441 if(precision == QUDA_SINGLE_PRECISION){ 00442 computeLinkOrderedOuterProduct((fhalf_wilson_vector*)src,(fsu3_matrix*)dst, nhops); 00443 }else{ 00444 computeLinkOrderedOuterProduct((dhalf_wilson_vector*)src,(dsu3_matrix*)dst, nhops); 00445 } 00446 return; 00447 } 00448 00449 00450 00451 template<typename half_wilson_vector, typename su3_matrix> 00452 static void shiftedOuterProduct(half_wilson_vector *src, su3_matrix* dest){ 00453 for(int dir=0; dir<4; dir++){ 00454 shifted_outer_prod(src, &dest[dir*V], OPP_DIR(dir)); 00455 } 00456 } 00457 00458 template<typename half_wilson_vector, typename su3_matrix> 00459 static void forwardShiftedOuterProduct(half_wilson_vector *src, su3_matrix* dest){ 00460 for(int dir=0; dir<4; dir++){ 00461 forward_shifted_outer_prod(src, &dest[dir*V], dir); 00462 } 00463 } 00464 00465 00466 void computeHisqOuterProduct(void* src, void* dest, QudaPrecision precision){ 00467 if(precision == QUDA_SINGLE_PRECISION){ 00468 forwardShiftedOuterProduct((fhalf_wilson_vector*)src,(fsu3_matrix*)dest); 00469 }else{ 00470 forwardShiftedOuterProduct((dhalf_wilson_vector*)src,(dsu3_matrix*)dest); 00471 } 00472 } 00473 00474 00475 // Note that the hisq routines do not involve half-wilson vectors, 00476 // but instead require colour matrix shifts 00477 00478 00479 template<typename su3_matrix> 00480 static void 00481 u_shift_mat(su3_matrix *src, su3_matrix *dest, int dir, su3_matrix* sitelink) 00482 { 00483 int i; 00484 int dx[4]; 00485 dx[3]=dx[2]=dx[1]=dx[0]=0; 00486 00487 if(GOES_FORWARDS(dir)){ 00488 dx[dir]=1; 00489 for(i=0; i<V; i++){ 00490 int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]); 00491 su3_matrix* mat = src+nbr_idx; // No need for a factor of 4 here, the colour matrices do not have a Lorentz index 00492 su3_matrix* link = sitelink + i*4 + dir; 00493 matrix_mult_nn(link, mat, &dest[i]); 00494 } 00495 }else{ 00496 dx[OPP_DIR(dir)]=-1; 00497 for(i=0; i<V; i++){ 00498 int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]); 00499 su3_matrix* mat = src+nbr_idx; // No need for a factor of 4 here, the colour matrices do not have a Lorentz index 00500 su3_matrix* link = sitelink + nbr_idx*4 + OPP_DIR(dir); 00501 matrix_mult_an(link, mat, &dest[i]); 00502 } 00503 } 00504 return; 00505 } 00506 00507 00508 00509 00510 00511 00512 00513 template <typename half_wilson_vector, 00514 typename anti_hermitmat, typename Real> 00515 static void 00516 add_3f_force_to_mom(half_wilson_vector *back, half_wilson_vector *forw, 00517 int dir, Real coeff[2], anti_hermitmat* momentum) 00518 { 00519 Real my_coeff[2] ; 00520 Real tmp_coeff[2] ; 00521 int mydir; 00522 int i; 00523 00524 if(GOES_BACKWARDS(dir)){ 00525 mydir = OPP_DIR(dir); 00526 my_coeff[0] = -coeff[0]; 00527 my_coeff[1] = -coeff[1]; 00528 }else{ 00529 mydir = dir; 00530 my_coeff[0] = coeff[0]; 00531 my_coeff[1] = coeff[1]; 00532 } 00533 00534 for(i=0;i < V;i++){ 00535 if (i < Vh){ 00536 tmp_coeff[0] = my_coeff[0]; 00537 tmp_coeff[1] = my_coeff[1]; 00538 }else{ 00539 tmp_coeff[0] = -my_coeff[0]; 00540 tmp_coeff[1] = -my_coeff[1] ; 00541 } 00542 00543 if (sizeof(Real) == sizeof(float)){ 00544 fsu3_matrix tmat; 00545 fsu3_matrix mom_matrix; 00546 anti_hermitmat* mom = momentum+ 4* i + mydir; 00547 uncompress_anti_hermitian(mom, &mom_matrix); 00548 su3_projector( &back[i].h[0], &forw[i].h[0], &tmat); 00549 scalar_mult_add_su3_matrix(&mom_matrix, &tmat, tmp_coeff[0], &mom_matrix ); 00550 make_anti_hermitian(&mom_matrix, mom); 00551 }else{ 00552 dsu3_matrix tmat; 00553 dsu3_matrix mom_matrix; 00554 anti_hermitmat* mom = momentum+ 4* i + mydir; 00555 uncompress_anti_hermitian(mom, &mom_matrix); 00556 su3_projector( &back[i].h[0], &forw[i].h[0], &tmat); 00557 scalar_mult_add_su3_matrix(&mom_matrix, &tmat, tmp_coeff[0], &mom_matrix ); 00558 make_anti_hermitian(&mom_matrix, mom); 00559 } 00560 } 00561 } 00562 00563 00564 template<typename Real, typename half_wilson_vector, typename anti_hermitmat> 00565 static void 00566 side_link_3f_force(int mu, int nu, Real coeff[2], half_wilson_vector *Path , 00567 half_wilson_vector *Path_nu, half_wilson_vector *Path_mu, 00568 half_wilson_vector *Path_numu, anti_hermitmat* mom) 00569 { 00570 Real m_coeff[2] ; 00571 00572 m_coeff[0] = -coeff[0] ; 00573 m_coeff[1] = -coeff[1] ; 00574 00575 if(GOES_FORWARDS(mu)){ 00576 if(GOES_FORWARDS(nu)){ 00577 add_3f_force_to_mom(Path_numu, Path, mu, coeff, mom) ; 00578 }else{ 00579 add_3f_force_to_mom(Path,Path_numu,OPP_DIR(mu),m_coeff, mom); 00580 } 00581 } 00582 else{ 00583 if(GOES_FORWARDS(nu)) 00584 add_3f_force_to_mom(Path_nu, Path_mu, mu, m_coeff, mom); 00585 else 00586 add_3f_force_to_mom(Path_mu, Path_nu, OPP_DIR(mu), coeff, mom) ; 00587 } 00588 } 00589 00590 00591 template<typename Real, typename su3_matrix, typename anti_hermitmat> 00592 static void 00593 add_force_to_momentum(su3_matrix *back, su3_matrix *forw, 00594 int dir, Real coeff, anti_hermitmat* momentum) 00595 { 00596 Real my_coeff; 00597 Real tmp_coeff; 00598 int mydir; 00599 int i; 00600 00601 if(GOES_BACKWARDS(dir)){ 00602 mydir = OPP_DIR(dir); 00603 my_coeff = -coeff; 00604 }else{ 00605 mydir = dir; 00606 my_coeff = coeff; 00607 } 00608 00609 00610 for(i=0; i<V; i++){ 00611 if(i<Vh){ tmp_coeff = my_coeff; } 00612 else{ tmp_coeff = -my_coeff; } 00613 00614 00615 su3_matrix tmat; 00616 su3_matrix mom_matrix; 00617 anti_hermitmat* mom = momentum + 4*i + mydir; 00618 uncompress_anti_hermitian(mom, &mom_matrix); 00619 matrix_mult_na(&back[i], &forw[i], &tmat); 00620 scalar_mult_add_su3_matrix(&mom_matrix, &tmat, tmp_coeff, &mom_matrix); 00621 00622 make_anti_hermitian(&mom_matrix, mom); 00623 } 00624 return; 00625 } 00626 00627 00628 template<typename Real, typename su3_matrix, typename anti_hermitmat> 00629 static void 00630 side_link_force(int mu, int nu, Real coeff, su3_matrix *Path, 00631 su3_matrix *Path_nu, su3_matrix *Path_mu, 00632 su3_matrix *Path_numu, anti_hermitmat* mom) 00633 { 00634 Real m_coeff; 00635 m_coeff = -coeff; 00636 00637 if(GOES_FORWARDS(mu)){ 00638 if(GOES_FORWARDS(nu)){ 00639 add_force_to_momentum(Path_numu, Path, mu, coeff, mom); 00640 // In the example below: 00641 // add_force_to_momentum(P7rho, Qnumu, rho, coeff, mom) 00642 }else{ 00643 add_force_to_momentum(Path, Path_numu, OPP_DIR(mu), m_coeff, mom); 00644 // add_force_to_momentum(Qnumu, P7rho, -Qnumu, rho, m_coeff, mom) 00645 } 00646 } 00647 else { // if (GOES_BACKWARDS(mu)) 00648 if(GOES_FORWARDS(nu)){ 00649 add_force_to_momentum(Path_nu, Path_mu, mu, m_coeff, mom); 00650 // add_force_to_momentum(P7, Qrhonumu, rho, m_coeff, mom) 00651 }else{ 00652 add_force_to_momentum(Path_mu, Path_nu, OPP_DIR(mu), coeff, mom); 00653 // add_force_to_momentum(Qrhonumu, P7, rho, coeff, mom) 00654 } 00655 } 00656 return; 00657 } 00658 00659 00660 00661 00662 #define Pmu tempmat[0] 00663 #define Pnumu tempmat[1] 00664 #define Prhonumu tempmat[2] 00665 #define P7 tempmat[3] 00666 #define P7rho tempmat[4] 00667 #define P5 tempmat[5] 00668 #define P3 tempmat[6] 00669 #define P5nu tempmat[3] 00670 #define P3mu tempmat[3] 00671 #define Popmu tempmat[4] 00672 #define Pmumumu tempmat[4] 00673 00674 #define Qmu tempmat[7] 00675 #define Qnumu tempmat[8] 00676 #define Qrhonumu tempmat[2] // same as Prhonumu 00677 00678 00679 00680 00681 00682 template<typename su3_matrix> 00683 static void set_identity_matrix(su3_matrix* mat) 00684 { 00685 for(int i=0; i<3; i++){ 00686 for(int j=0; j<3; j++){ 00687 mat->e[i][j].real=0; 00688 mat->e[i][j].imag=0; 00689 } 00690 mat->e[i][i].real=1; 00691 } 00692 } 00693 00694 00695 template<typename su3_matrix> 00696 static void set_identity(su3_matrix* matrices, int num_dirs){ 00697 for(int i=0; i<V*num_dirs; i++){ 00698 set_identity_matrix(&matrices[i]); 00699 } 00700 } 00701 00702 00703 template <typename Real, typename su3_matrix, typename anti_hermitmat> 00704 void do_color_matrix_hisq_force_reference(Real eps, Real weight, 00705 su3_matrix* temp_xx, Real* act_path_coeff, 00706 su3_matrix* sitelink, anti_hermitmat* mom) 00707 { 00708 int i; 00709 int mu, nu, rho, sig; 00710 Real coeff; 00711 Real OneLink, Lepage, FiveSt, ThreeSt, SevenSt; 00712 // Real Naik; 00713 Real mLepage, mFiveSt, mThreeSt, mSevenSt; 00714 00715 su3_matrix* tempmat[9]; 00716 int sites_on_node = V; 00717 //int DirectLinks[8] ; 00718 00719 Real ferm_epsilon; 00720 ferm_epsilon = 2.0*weight*eps; 00721 OneLink = act_path_coeff[0]*ferm_epsilon ; 00722 // Naik = act_path_coeff[1]*ferm_epsilon ; mNaik = -Naik; 00723 ThreeSt = act_path_coeff[2]*ferm_epsilon ; mThreeSt = -ThreeSt; 00724 FiveSt = act_path_coeff[3]*ferm_epsilon ; mFiveSt = -FiveSt; 00725 SevenSt = act_path_coeff[4]*ferm_epsilon ; mSevenSt = -SevenSt; 00726 Lepage = act_path_coeff[5]*ferm_epsilon ; mLepage = -Lepage; 00727 00728 // for(mu=0;mu<8;mu++){ 00729 // DirectLinks[mu] = 0 ; 00730 // } 00731 00732 for(mu=0; mu<9; mu++){ 00733 tempmat[mu] = (su3_matrix *)malloc( sites_on_node*sizeof(su3_matrix) ); 00734 } 00735 00736 00737 su3_matrix* id; 00738 id = (su3_matrix *)malloc(sites_on_node*sizeof(su3_matrix) ); 00739 //su3_matrix* id4; 00740 //id4 = (su3_matrix *)malloc(sites_on_node*4*sizeof(su3_matrix) ); 00741 00742 // su3_matrix* temp_mat; 00743 // temp_mat = (su3_matrix *)malloc(sites_on_node*sizeof(su3_matrix) ); 00744 00745 00746 // su3_matrix* temp_xx; 00747 // temp_xx = (su3_matrix *)malloc(sites_on_node*8*sizeof(su3_matrix) ); 00748 // shiftedOuterProduct(temp_x, temp_xx); 00749 00750 // initialise id so that it is the identity matrix on each lattice site 00751 set_identity(id,1); 00752 00753 printf("Calling modified hisq\n"); 00754 for(sig=0; sig < 8; sig++){ 00755 // One-link term - don't have the savings here that we get when working with the 00756 // half-wilson vectors 00757 if(GOES_FORWARDS(sig)){ 00758 u_shift_mat(&temp_xx[OPP_DIR(sig)*V], Pmu, sig, sitelink); 00759 add_force_to_momentum(Pmu, id, sig, OneLink, mom); // I could optimise functions which 00760 // involve id 00761 } 00762 00763 for(mu = 0; mu < 8; mu++){ 00764 if ( (mu == sig) || (mu == OPP_DIR(sig))){ 00765 continue; 00766 } 00767 // 3 link path 00768 // sig 00769 // A _______ 00770 // | | 00771 // mu /|\ \|/ 00772 // | | 00773 // 00774 // 00775 u_shift_mat(&temp_xx[OPP_DIR(sig)*V], Pmu, OPP_DIR(mu), sitelink); // temp_xx[sig] stores |X(x)><X(x-sig)| 00776 u_shift_mat(id, Qmu, OPP_DIR(mu), sitelink); // This returns the path less the outer-product of quark fields at the end 00777 // Qmu = U[mu] 00778 00779 00780 u_shift_mat(Pmu, P3, sig, sitelink); // P3 is U[sig](X)U[-mu](X+sig) temp_xx 00781 if (GOES_FORWARDS(sig)){ 00782 // add contribution from middle link 00783 add_force_to_momentum(P3, Qmu, sig, mThreeSt, mom); 00784 // matrix_mult_na(P3[x],Qmu[x],tmp); 00785 // mom[sig][x] += mThreeSt*tmp; 00786 } 00787 for(nu=0; nu < 8; nu++){ 00788 if (nu == sig || nu == OPP_DIR(sig) 00789 || nu == mu || nu == OPP_DIR(mu)){ 00790 continue; 00791 } 00792 00793 // 5 link path 00794 // 00795 // sig 00796 // A ________ 00797 // | | 00798 // /|\ \|/ 00799 // | | 00800 // \ \ 00801 // \ \ 00802 00803 u_shift_mat(Pmu, Pnumu, OPP_DIR(nu), sitelink); 00804 u_shift_mat(Qmu, Qnumu, OPP_DIR(nu), sitelink); 00805 00806 u_shift_mat(Pnumu, P5, sig, sitelink); 00807 if (GOES_FORWARDS(sig)){ 00808 add_force_to_momentum(P5, Qnumu, sig, FiveSt, mom); 00809 } // seems to do what I think it should 00810 for(rho =0; rho < 8; rho++){ 00811 if (rho == sig || rho == OPP_DIR(sig) 00812 || rho == mu || rho == OPP_DIR(mu) 00813 || rho == nu || rho == OPP_DIR(nu)){ 00814 continue; 00815 } 00816 00817 //7 link path 00818 u_shift_mat(Pnumu, Prhonumu, OPP_DIR(rho), sitelink); 00819 u_shift_mat(Prhonumu, P7, sig, sitelink); 00820 00821 // Prhonumu = tempmat[2] is not needed again 00822 // => store Qrhonumu in the same memory 00823 u_shift_mat(Qnumu, Qrhonumu, OPP_DIR(rho), sitelink); 00824 00825 00826 if(GOES_FORWARDS(sig)){ 00827 add_force_to_momentum(P7, Qrhonumu, sig, mSevenSt, mom) ; 00828 } 00829 u_shift_mat(P7, P7rho, rho, sitelink); 00830 side_link_force(rho, sig, SevenSt, Qnumu, P7, Qrhonumu, P7rho, mom); 00831 if(FiveSt != 0)coeff = SevenSt/FiveSt ; else coeff = 0; 00832 for(i=0; i<V; i++){ 00833 scalar_mult_add_su3_matrix(&P5[i], &P7rho[i], coeff, &P5[i]); 00834 } // end loop over volume 00835 } // end loop over rho 00836 00837 00838 u_shift_mat(P5, P5nu, nu, sitelink); 00839 side_link_force(nu,sig,mFiveSt,Qmu,P5,Qnumu,P5nu,mom); // I believe this should do what I want it to 00840 // check this! 00841 if(ThreeSt != 0)coeff = FiveSt/ThreeSt; else coeff = 0; 00842 00843 for(i=0; i<V; i++){ 00844 scalar_mult_add_su3_matrix(&P3[i], &P5nu[i], coeff, &P3[i]); 00845 } // end loop over volume 00846 } // end loop over nu 00847 00848 // Lepage term 00849 u_shift_mat(Pmu, Pnumu, OPP_DIR(mu), sitelink); 00850 u_shift_mat(Qmu, Qnumu, OPP_DIR(mu), sitelink); 00851 00852 u_shift_mat(Pnumu, P5, sig, sitelink); 00853 if(GOES_FORWARDS(sig)){ 00854 add_force_to_momentum(P5, Qnumu, sig, Lepage, mom); 00855 } 00856 00857 u_shift_mat(P5, P5nu, mu, sitelink); 00858 side_link_force(mu, sig, mLepage, Qmu, P5, Qnumu, P5nu, mom); 00859 00860 00861 if(ThreeSt != 0)coeff = Lepage/ThreeSt; else coeff = 0; 00862 00863 for(i=0; i<V; i++){ 00864 scalar_mult_add_su3_matrix(&P3[i], &P5nu[i], coeff, &P3[i]); 00865 } 00866 00867 if(GOES_FORWARDS(mu)){ 00868 u_shift_mat(P3, P3mu, mu, sitelink); 00869 } 00870 side_link_force(mu, sig, ThreeSt, id, P3, Qmu, P3mu, mom); 00871 } // end loop over mu 00872 } // end loop over sig 00873 } // modified hisq_force_reference 00874 00875 00876 00877 00878 00879 00880 // This version of the test routine uses 00881 // half-wilson vectors instead of color matrices. 00882 template <typename Real, typename su3_matrix, typename anti_hermitmat, typename half_wilson_vector> 00883 void do_halfwilson_hisq_force_reference(Real eps, Real weight, 00884 half_wilson_vector* temp_x, Real* act_path_coeff, 00885 su3_matrix* sitelink, anti_hermitmat* mom) 00886 { 00887 int i; 00888 int mu, nu, rho, sig; 00889 Real coeff; 00890 Real OneLink, Lepage, FiveSt, ThreeSt, SevenSt; 00891 //Real Naik; 00892 Real mLepage, mFiveSt, mThreeSt, mSevenSt; 00893 00894 su3_matrix* tempmat[9]; 00895 int sites_on_node = V; 00896 // int DirectLinks[8] ; 00897 00898 Real ferm_epsilon; 00899 ferm_epsilon = 2.0*weight*eps; 00900 OneLink = act_path_coeff[0]*ferm_epsilon ; 00901 // Naik = act_path_coeff[1]*ferm_epsilon ; 00902 ThreeSt = act_path_coeff[2]*ferm_epsilon ; mThreeSt = -ThreeSt; 00903 FiveSt = act_path_coeff[3]*ferm_epsilon ; mFiveSt = -FiveSt; 00904 SevenSt = act_path_coeff[4]*ferm_epsilon ; mSevenSt = -SevenSt; 00905 Lepage = act_path_coeff[5]*ferm_epsilon ; mLepage = -Lepage; 00906 00907 00908 // for(mu=0;mu<8;mu++){ 00909 // DirectLinks[mu] = 0 ; 00910 // } 00911 00912 for(mu=0; mu<9; mu++){ 00913 tempmat[mu] = (su3_matrix *)malloc( sites_on_node*sizeof(su3_matrix) ); 00914 } 00915 00916 00917 su3_matrix* id; 00918 id = (su3_matrix *)malloc(sites_on_node*sizeof(su3_matrix) ); 00919 su3_matrix* id4; 00920 id4 = (su3_matrix *)malloc(sites_on_node*4*sizeof(su3_matrix) ); 00921 00922 su3_matrix* temp_mat; 00923 temp_mat = (su3_matrix *)malloc(sites_on_node*sizeof(su3_matrix) ); 00924 00925 // initialise id so that it is the identity matrix on each lattice site 00926 set_identity(id,1); 00927 00928 printf("Calling hisq reference routine\n"); 00929 for(sig=0; sig < 8; sig++){ 00930 shifted_outer_prod(temp_x, temp_mat, OPP_DIR(sig)); 00931 00932 // One-link term - don't have the savings here that we get when working with the 00933 // half-wilson vectors 00934 if(GOES_FORWARDS(sig)){ 00935 u_shift_mat(temp_mat, Pmu, sig, sitelink); 00936 add_force_to_momentum(Pmu, id, sig, OneLink, mom); 00937 } 00938 00939 for(mu = 0; mu < 8; mu++){ 00940 if ( (mu == sig) || (mu == OPP_DIR(sig))){ 00941 continue; 00942 } 00943 // 3 link path 00944 // sig 00945 // A _______ 00946 // | | 00947 // mu /|\ \|/ 00948 // | | 00949 // 00950 // 00951 u_shift_mat(temp_mat, Pmu, OPP_DIR(mu), sitelink); // temp_xx[sig] stores |X(x)><X(x-sig)| 00952 u_shift_mat(id, Qmu, OPP_DIR(mu), sitelink); // This returns the path less the outer-product of quark fields at the end 00953 // Qmu = U[mu] 00954 00955 00956 u_shift_mat(Pmu, P3, sig, sitelink); // P3 is U[sig](X)U[-mu](X+sig) temp_xx 00957 if (GOES_FORWARDS(sig)){ 00958 // add contribution from middle link 00959 add_force_to_momentum(P3, Qmu, sig, mThreeSt, mom); // matrix_mult_na(P3[x],Qmu[x],tmp); 00960 // mom[sig][x] += mThreeSt*tmp; 00961 } 00962 for(nu=0; nu < 8; nu++){ 00963 if (nu == sig || nu == OPP_DIR(sig) 00964 || nu == mu || nu == OPP_DIR(mu)){ 00965 continue; 00966 } 00967 00968 // 5 link path 00969 // 00970 // sig 00971 // A ________ 00972 // | | 00973 // /|\ \|/ 00974 // | | 00975 // \ \ 00976 // \ \ 00977 00978 u_shift_mat(Pmu, Pnumu, OPP_DIR(nu), sitelink); 00979 u_shift_mat(Qmu, Qnumu, OPP_DIR(nu), sitelink); 00980 00981 u_shift_mat(Pnumu, P5, sig, sitelink); 00982 if (GOES_FORWARDS(sig)){ 00983 add_force_to_momentum(P5, Qnumu, sig, FiveSt, mom); 00984 } // seems to do what I think it should 00985 00986 for(rho =0; rho < 8; rho++){ 00987 if (rho == sig || rho == OPP_DIR(sig) 00988 || rho == mu || rho == OPP_DIR(mu) 00989 || rho == nu || rho == OPP_DIR(nu)){ 00990 continue; 00991 } 00992 00993 //7 link path 00994 u_shift_mat(Pnumu, Prhonumu, OPP_DIR(rho), sitelink); 00995 u_shift_mat(Prhonumu, P7, sig, sitelink); 00996 00997 // Prhonumu = tempmat[2] is not needed again 00998 // => store Qrhonumu in the same memory 00999 u_shift_mat(Qnumu, Qrhonumu, OPP_DIR(rho), sitelink); 01000 01001 01002 if(GOES_FORWARDS(sig)){ 01003 add_force_to_momentum(P7, Qrhonumu, sig, mSevenSt, mom) ; 01004 } 01005 01006 u_shift_mat(P7, P7rho, rho, sitelink); 01007 side_link_force(rho, sig, SevenSt, Qnumu, P7, Qrhonumu, P7rho, mom); 01008 if(FiveSt != 0)coeff = SevenSt/FiveSt ; else coeff = 0; 01009 for(i=0; i<V; i++){ 01010 scalar_mult_add_su3_matrix(&P5[i], &P7rho[i], coeff, &P5[i]); 01011 } // end loop over volume 01012 } // end loop over rho 01013 01014 01015 u_shift_mat(P5, P5nu, nu, sitelink); 01016 side_link_force(nu,sig,mFiveSt,Qmu,P5,Qnumu,P5nu,mom); // I believe this should do what I want it to 01017 // check this! 01018 if(ThreeSt != 0)coeff = FiveSt/ThreeSt; else coeff = 0; 01019 01020 for(i=0; i<V; i++){ 01021 scalar_mult_add_su3_matrix(&P3[i], &P5nu[i], coeff, &P3[i]); 01022 } // end loop over volume 01023 } // end loop over nu 01024 01025 // Lepage term 01026 u_shift_mat(Pmu, Pnumu, OPP_DIR(mu), sitelink); 01027 u_shift_mat(Qmu, Qnumu, OPP_DIR(mu), sitelink); 01028 01029 u_shift_mat(Pnumu, P5, sig, sitelink); 01030 if(GOES_FORWARDS(sig)){ 01031 add_force_to_momentum(P5, Qnumu, sig, Lepage, mom); 01032 } 01033 01034 u_shift_mat(P5, P5nu, mu, sitelink); 01035 side_link_force(mu, sig, mLepage, Qmu, P5, Qnumu, P5nu, mom); 01036 01037 01038 if(ThreeSt != 0)coeff = Lepage/ThreeSt; else coeff = 0; 01039 01040 for(i=0; i<V; i++){ 01041 scalar_mult_add_su3_matrix(&P3[i], &P5nu[i], coeff, &P3[i]); 01042 } 01043 01044 if(GOES_FORWARDS(mu)){ 01045 u_shift_mat(P3, P3mu, mu, sitelink); 01046 } 01047 side_link_force(mu, sig, ThreeSt, id, P3, Qmu, P3mu, mom); 01048 } // end loop over mu 01049 } // end loop over sig 01050 01051 for(mu=0; mu<9; mu++){ 01052 free(tempmat[mu]); 01053 } 01054 01055 free(id); 01056 free(id4); 01057 free(temp_mat); 01058 } 01059 01060 #undef Pmu 01061 #undef Pnumu 01062 #undef Prhonumu 01063 #undef P3 01064 #undef P3mu 01065 #undef P5 01066 #undef P5nu 01067 #undef P7 01068 #undef P7rho 01069 #undef P7rhonu 01070 01071 #undef Popmu 01072 #undef Pmumumu 01073 01074 #undef Qmu 01075 #undef Qnumu 01076 #undef Qrhonumu 01077 01078 01079 01080 static 01081 void set_identity(fsu3_matrix *sitelink){ 01082 int tot = V*4; 01083 for(int i=0; i<tot; i++){ // loop over sites and directions 01084 for(int a=0; a<3; a++){ 01085 for(int b=0; b<3; b++){ 01086 sitelink[i].e[a][b].real = sitelink[i].e[a][b].imag = 0.; 01087 } 01088 } 01089 for(int a=0; a<3; a++){ // set the diagonal elements to unity 01090 sitelink[i].e[a][a].real = 1.; 01091 } 01092 } 01093 return; 01094 } 01095 01096 01097 01098 01099 void halfwilson_hisq_force_reference(float eps, float weight, 01100 void* act_path_coeff, void* temp_x, 01101 void* sitelink, void* mom) 01102 { 01103 do_halfwilson_hisq_force_reference((float)eps, (float)weight, 01104 (fhalf_wilson_vector*) temp_x, (float*)act_path_coeff, 01105 (fsu3_matrix*)sitelink, (fanti_hermitmat*)mom); 01106 return; 01107 } 01108 01109 01110 01111 void halfwilson_hisq_force_reference(double eps, double weight, 01112 void* act_path_coeff, void* temp_x, 01113 void* sitelink, void* mom) 01114 { 01115 do_halfwilson_hisq_force_reference((double)eps, (double)weight, 01116 (dhalf_wilson_vector*) temp_x, (double*)act_path_coeff, 01117 (dsu3_matrix*)sitelink, (danti_hermitmat*)mom); 01118 return; 01119 } 01120 01121 01122 01123 void color_matrix_hisq_force_reference(float eps, float weight, 01124 void* act_path_coeff, void* temp_xx, 01125 void* sitelink, void* mom) 01126 { 01127 do_color_matrix_hisq_force_reference((float)eps, (float)weight, 01128 (fsu3_matrix*) temp_xx, (float*)act_path_coeff, 01129 (fsu3_matrix*)sitelink, (fanti_hermitmat*)mom); 01130 return; 01131 }