16 #define CADD(a,b,c) { (c).real = (a).real + (b).real; \
17 (c).imag = (a).imag + (b).imag; }
18 #define CMUL(a,b,c) { (c).real = (a).real*(b).real - (a).imag*(b).imag; \
19 (c).imag = (a).real*(b).imag + (a).imag*(b).real; }
20 #define CSUM(a,b) { (a).real += (b).real; (a).imag += (b).imag; }
23 #define CMULJ_(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \
24 (c).imag = (a).real*(b).imag - (a).imag*(b).real; }
27 #define CMUL_J(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \
28 (c).imag = (a).imag*(b).real - (a).real*(b).imag; }
30 #define CONJG(a,b) { (b).real = (a).real; (b).imag = -(a).imag; }
64 template<
typename su3_matrix>
69 for(i=0;i<3;i++)
for(j=0;j<3;j++){
70 CONJG( a->e[j][i], b->e[i][j] );
75 template<
typename su3_matrix,
typename anti_hermitmat>
80 typeof(ah3->m00im) temp =
81 (m3->e[0][0].imag + m3->e[1][1].imag + m3->e[2][2].imag)*0.33333333333333333;
82 ah3->m00im = m3->e[0][0].imag - temp;
83 ah3->m11im = m3->e[1][1].imag - temp;
84 ah3->m22im = m3->e[2][2].imag - temp;
85 ah3->m01.real = (m3->e[0][1].real - m3->e[1][0].real)*0.5;
86 ah3->m02.real = (m3->e[0][2].real - m3->e[2][0].real)*0.5;
87 ah3->m12.real = (m3->e[1][2].real - m3->e[2][1].real)*0.5;
88 ah3->m01.imag = (m3->e[0][1].imag + m3->e[1][0].imag)*0.5;
89 ah3->m02.imag = (m3->e[0][2].imag + m3->e[2][0].imag)*0.5;
90 ah3->m12.imag = (m3->e[1][2].imag + m3->e[2][1].imag)*0.5;
94 template <typename anti_hermitmat, typename su3_matrix>
96 uncompress_anti_hermitian(anti_hermitmat *mat_antihermit,
99 typeof(mat_antihermit->m00im) temp1;
100 mat_su3->e[0][0].imag=mat_antihermit->m00im;
101 mat_su3->e[0][0].real=0.;
102 mat_su3->e[1][1].imag=mat_antihermit->m11im;
103 mat_su3->e[1][1].real=0.;
104 mat_su3->e[2][2].imag=mat_antihermit->m22im;
105 mat_su3->e[2][2].real=0.;
106 mat_su3->e[0][1].imag=mat_antihermit->m01.imag;
107 temp1=mat_antihermit->m01.real;
108 mat_su3->e[0][1].real=temp1;
109 mat_su3->e[1][0].real= -temp1;
110 mat_su3->e[1][0].imag=mat_antihermit->m01.imag;
111 mat_su3->e[0][2].imag=mat_antihermit->m02.imag;
112 temp1=mat_antihermit->m02.real;
113 mat_su3->e[0][2].real=temp1;
114 mat_su3->e[2][0].real= -temp1;
115 mat_su3->e[2][0].imag=mat_antihermit->m02.imag;
116 mat_su3->e[1][2].imag=mat_antihermit->m12.imag;
117 temp1=mat_antihermit->m12.real;
118 mat_su3->e[1][2].real=temp1;
119 mat_su3->e[2][1].real= -temp1;
120 mat_su3->e[2][1].imag=mat_antihermit->m12.imag;
123 template <typename su3_matrix, typename
Float>
125 scalar_mult_sub_su3_matrix(su3_matrix *a,su3_matrix *b,
Float s, su3_matrix *c)
130 c->e[i][j].real = a->e[i][j].real - s*b->e[i][j].real;
131 c->e[i][j].imag = a->e[i][j].imag - s*b->e[i][j].imag;
136 template <
typename su3_matrix,
typename Float>
138 scalar_mult_add_su3_matrix(su3_matrix *a,su3_matrix *b,
Float s, su3_matrix *c)
143 c->e[i][j].real = a->e[i][j].real + s*b->e[i][j].real;
144 c->e[i][j].imag = a->e[i][j].imag + s*b->e[i][j].imag;
150 template<
typename su3_matrix,
typename su3_vector>
152 mult_su3_mat_vec( su3_matrix *a, su3_vector *b, su3_vector *c )
155 typeof(a->e[0][0])
x,
y;
159 CMUL( a->e[i][j] , b->c[j] ,
y );
165 template<
typename su3_matrix,
typename su3_vector>
167 mult_adj_su3_mat_vec( su3_matrix *a, su3_vector *b, su3_vector *c )
170 typeof(a->e[0][0])
x,
y,z;
174 CONJG( a->e[j][i], z );
175 CMUL( z , b->c[j],
y );
182 template<
typename su3_vector,
typename su3_matrix>
184 su3_projector( su3_vector *a, su3_vector *b, su3_matrix *c )
187 for(i=0;i<3;i++)
for(j=0;j<3;j++){
188 CMUL_J( a->c[i], b->c[j], c->e[i][j] );
192 template<
typename su3_vector,
typename Real>
194 scalar_mult_add_su3_vector(su3_vector *a, su3_vector *b, Real s,
199 c->c[i].real = a->c[i].real + s*b->c[i].real;
200 c->c[i].imag = a->c[i].imag + s*b->c[i].imag;
206 template <
typename su3_matrix>
213 printf(
"(%f %f)\t", a->e[i][j].real, a->e[i][j].imag);
223 template <
typename su3_matrix,
typename anti_hermitmat,
typename Float>
225 update_mom(anti_hermitmat* momentum,
int dir, su3_matrix* sitelink,
226 su3_matrix* staple,
Float eb3)
234 su3_matrix* lnk = sitelink + 4*i+dir;
235 su3_matrix* stp = staple + i;
236 anti_hermitmat* mom = momentum + 4*i+dir;
238 mult_su3_na(lnk, stp, &tmat1);
239 uncompress_anti_hermitian(mom, &tmat2);
241 scalar_mult_sub_su3_matrix(&tmat2, &tmat1, eb3, &tmat3);
249 template<
typename half_wilson_vector,
typename su3_matrix>
251 u_shift_hw(half_wilson_vector *src, half_wilson_vector *dest,
int dir, su3_matrix* sitelink )
256 dx[3]=dx[2]=dx[1]=dx[0]=0;
262 half_wilson_vector* hw = src + nbr_idx;
263 su3_matrix* link = sitelink + i*4 + dir;
264 mult_su3_mat_vec(link, &hw->h[0], &dest[i].h[0]);
265 mult_su3_mat_vec(link, &hw->h[1], &dest[i].h[1]);
271 half_wilson_vector* hw = src + nbr_idx;
272 su3_matrix* link = sitelink + nbr_idx*4 +
OPP_DIR(dir);
273 mult_adj_su3_mat_vec(link, &hw->h[0], &dest[i].h[0]);
274 mult_adj_su3_mat_vec(link, &hw->h[1], &dest[i].h[1]);
281 template <
class su3_matrix,
typename half_wilson_vector,
282 typename anti_hermitmat,
typename Real>
284 add_3f_force_to_mom(half_wilson_vector *back, half_wilson_vector *forw,
285 int dir, Real
coeff[2], anti_hermitmat* momentum)
294 my_coeff[0] = -coeff[0];
295 my_coeff[1] = -coeff[1];
298 my_coeff[0] = coeff[0];
299 my_coeff[1] = coeff[1];
304 tmp_coeff[0] = my_coeff[0];
305 tmp_coeff[1] = my_coeff[1];
307 tmp_coeff[0] = -my_coeff[0];
308 tmp_coeff[1] = -my_coeff[1] ;
312 su3_matrix mom_matrix;
313 anti_hermitmat* mom = momentum+ 4* i + mydir;
314 uncompress_anti_hermitian(mom, &mom_matrix);
315 su3_projector( &back[i].h[0], &forw[i].h[0], &tmat);
316 scalar_mult_add_su3_matrix(&mom_matrix, &tmat, tmp_coeff[0], &mom_matrix );
317 su3_projector( &back[i].h[1], &forw[i].h[1], &tmat);
318 scalar_mult_add_su3_matrix(&mom_matrix, &tmat, tmp_coeff[1], &mom_matrix );
323 template<
class su3_matrix,
typename Real,
typename half_wilson_vector,
typename anti_hermitmat>
325 side_link_3f_force(
int mu,
int nu, Real coeff[2], half_wilson_vector *Path ,
326 half_wilson_vector *Path_nu, half_wilson_vector *Path_mu,
327 half_wilson_vector *Path_numu, anti_hermitmat* mom)
332 m_coeff[0] = -coeff[0] ;
333 m_coeff[1] = -coeff[1] ;
337 add_3f_force_to_mom<su3_matrix>(Path_numu, Path,
mu,
coeff, mom) ;
339 add_3f_force_to_mom<su3_matrix>(Path,Path_numu,
OPP_DIR(mu),m_coeff, mom);
344 add_3f_force_to_mom<su3_matrix>(Path_nu, Path_mu, mu, m_coeff, mom);
346 add_3f_force_to_mom<su3_matrix>(Path_mu, Path_nu,
OPP_DIR(mu),
coeff, mom) ;
352 #define Pmu tempvec[0]
353 #define Pnumu tempvec[1]
354 #define Prhonumu tempvec[2]
355 #define P7 tempvec[3]
356 #define P7rho tempvec[4]
357 #define P7rhonu tempvec[5]
358 #define P5 tempvec[6]
359 #define P3 tempvec[7]
360 #define P5nu tempvec[3]
361 #define P3mu tempvec[3]
362 #define Popmu tempvec[4]
363 #define Pmumumu tempvec[4]
367 template <
typename Real,
typename half_wilson_vector,
typename anti_hermitmat,
370 do_fermion_force_reference(Real eps, Real weight1, Real weight2,
371 half_wilson_vector* temp_x, Real* act_path_coeff,
372 su3_matrix* sitelink, anti_hermitmat* mom)
375 int mu, nu, rho,
sig;
377 Real OneLink[2], Lepage[2], Naik[2], FiveSt[2], ThreeSt[2], SevenSt[2] ;
378 Real mNaik[2], mLepage[2], mFiveSt[2], mThreeSt[2], mSevenSt[2];
379 half_wilson_vector *tempvec[8] ;
380 int sites_on_node =
V;
384 ferm_epsilon = 2.0*weight1*eps;
385 OneLink[0] = act_path_coeff[0]*ferm_epsilon ;
386 Naik[0] = act_path_coeff[1]*ferm_epsilon ; mNaik[0] = -Naik[0];
387 ThreeSt[0] = act_path_coeff[2]*ferm_epsilon ; mThreeSt[0] = -ThreeSt[0];
388 FiveSt[0] = act_path_coeff[3]*ferm_epsilon ; mFiveSt[0] = -FiveSt[0];
389 SevenSt[0] = act_path_coeff[4]*ferm_epsilon ; mSevenSt[0] = -SevenSt[0];
390 Lepage[0] = act_path_coeff[5]*ferm_epsilon ; mLepage[0] = -Lepage[0];
392 ferm_epsilon = 2.0*weight2*eps;
393 OneLink[1] = act_path_coeff[0]*ferm_epsilon ;
394 Naik[1] = act_path_coeff[1]*ferm_epsilon ; mNaik[1] = -Naik[1];
395 ThreeSt[1] = act_path_coeff[2]*ferm_epsilon ; mThreeSt[1] = -ThreeSt[1];
396 FiveSt[1] = act_path_coeff[3]*ferm_epsilon ; mFiveSt[1] = -FiveSt[1];
397 SevenSt[1] = act_path_coeff[4]*ferm_epsilon ; mSevenSt[1] = -SevenSt[1];
398 Lepage[1] = act_path_coeff[5]*ferm_epsilon ; mLepage[1] = -Lepage[1];
401 DirectLinks[
mu] = 0 ;
405 tempvec[
mu] = (half_wilson_vector *)malloc( sites_on_node*
sizeof(half_wilson_vector) );
408 for(sig=0; sig < 8; sig++){
409 for(mu = 0; mu < 8; mu++){
410 if ( (mu == sig) || (mu ==
OPP_DIR(sig))){
415 u_shift_hw(temp_x,
Pmu,
OPP_DIR(mu), sitelink);
416 u_shift_hw(
Pmu,
P3, sig, sitelink);
418 add_3f_force_to_mom<su3_matrix>(
P3,
Pmu,
sig, mThreeSt, mom);
420 for(nu=0; nu < 8; nu++){
421 if (nu == sig || nu ==
OPP_DIR(sig)
422 || nu == mu || nu ==
OPP_DIR(mu)){
428 u_shift_hw(
Pnumu,
P5, sig, sitelink);
430 add_3f_force_to_mom<su3_matrix>(
P5,
Pnumu,
sig, FiveSt, mom);
434 for(rho =0; rho < 8; rho++){
435 if (rho == sig || rho ==
OPP_DIR(sig)
436 || rho == mu || rho ==
OPP_DIR(mu)
437 || rho == nu || rho ==
OPP_DIR(nu)){
445 add_3f_force_to_mom<su3_matrix>(
P7,
Prhonumu,
sig, mSevenSt, mom) ;
447 u_shift_hw(
P7,
P7rho, rho, sitelink);
449 if(FiveSt[0] != 0)coeff[0] = SevenSt[0]/FiveSt[0] ;
else coeff[0] = 0;
450 if(FiveSt[1] != 0)coeff[1] = SevenSt[1]/FiveSt[1] ;
else coeff[1] = 0;
452 scalar_mult_add_su3_vector(&(
P5[i].h[0]),&(P7rho[i].h[0]), coeff[0],
454 scalar_mult_add_su3_vector(&(
P5[i].h[1]),&(P7rho[i].h[1]), coeff[1],
460 u_shift_hw(
P5,
P5nu, nu, sitelink);
462 if(ThreeSt[0] != 0)coeff[0] = FiveSt[0]/ThreeSt[0] ;
else coeff[0] = 0;
463 if(ThreeSt[1] != 0)coeff[1] = FiveSt[1]/ThreeSt[1] ;
else coeff[1] = 0;
464 for(i=0; i <
V; i++){
465 scalar_mult_add_su3_vector(&(
P3[i].h[0]),&(P5nu[i].h[0]),coeff[0],&(
P3[i].h[0]));
466 scalar_mult_add_su3_vector(&(
P3[i].h[1]),&(P5nu[i].h[1]),coeff[1],&(
P3[i].h[1]));
473 u_shift_hw(
Pnumu,
P5, sig, sitelink);
475 add_3f_force_to_mom<su3_matrix>(
P5,
Pnumu,
sig, Lepage, mom);
478 u_shift_hw(
P5,
P5nu, mu, sitelink);
480 if(ThreeSt[0] != 0) coeff[0] = Lepage[0]/ThreeSt[0] ;
else coeff[0] = 0;
481 if(ThreeSt[1] != 0) coeff[1] = Lepage[1]/ThreeSt[1] ;
else coeff[1] = 0;
483 scalar_mult_add_su3_vector(&(
P3[i].h[0]),&(P5nu[i].h[0]),coeff[0],&(
P3[i].h[0]));
484 scalar_mult_add_su3_vector(&(
P3[i].h[1]),&(P5nu[i].h[1]),coeff[1],&(
P3[i].h[1]));
488 u_shift_hw(
P3,
P3mu, mu, sitelink);
490 side_link_3f_force<su3_matrix>(
mu,
sig, ThreeSt, temp_x,
P3,
Pmu,
P3mu, mom);
493 if( (!DirectLinks[mu]) ){
497 add_3f_force_to_mom<su3_matrix>(
Pmu, temp_x,
OPP_DIR(mu), OneLink, mom);
498 u_shift_hw(temp_x,
Popmu, mu, sitelink);
501 add_3f_force_to_mom<su3_matrix>(
Pmumumu, temp_x,
OPP_DIR(mu), Naik, mom);
503 u_shift_hw(temp_x,
Popmu, mu, sitelink);
504 add_3f_force_to_mom<su3_matrix>(
Popmu,
Pnumu,
mu, Naik, mom);
533 void* act_path_coeff,
void* temp_x,
void* sitelink,
void* mom)
535 do_fermion_force_reference((
float)eps, (
float)weight1, (
float)weight2,
544 void* act_path_coeff,
void* temp_x,
void* sitelink,
void* mom)
546 do_fermion_force_reference((
double)eps, (
double)weight1, (
double)weight2,
void print_su3_matrix(su3_matrix *a)
__global__ void const RealA *const const RealA *const const RealA *const const RealB *const const RealB *const int int mu
void make_anti_hermitian(su3_matrix *m3, anti_hermitmat *ah3)
int neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
void fermion_force_reference(float eps, float weight1, float weight2, void *act_path_coeff, void *temp_x, void *sitelink, void *mom)
void su3_adjoint(su3_matrix *a, su3_matrix *b)
FloatingPoint< float > Float
for(int s=0;s< param.Ls;s++)
__constant__ double coeff
#define GOES_BACKWARDS(dir)
Main header file for the QUDA library.
#define GOES_FORWARDS(dir)
__global__ void const RealA *const const RealA *const const RealA *const const RealB *const const RealB *const int sig