18 #define CADD(a,b,c) { (c).real = (a).real + (b).real; \
19 (c).imag = (a).imag + (b).imag; }
20 #define CMUL(a,b,c) { (c).real = (a).real*(b).real - (a).imag*(b).imag; \
21 (c).imag = (a).real*(b).imag + (a).imag*(b).real; }
22 #define CSUM(a,b) { (a).real += (b).real; (a).imag += (b).imag; }
25 #define CMULJ_(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \
26 (c).imag = (a).real*(b).imag - (a).imag*(b).real; }
29 #define CMUL_J(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \
30 (c).imag = (a).imag*(b).real - (a).real*(b).imag; }
32 #define CONJG(a,b) { (b).real = (a).real; (b).imag = -(a).imag; }
52 float m00im,m11im,m22im;
58 double m00im,m11im,m22im;
64 template<
typename su3_matrix>
68 for(i=0;i<3;i++)
for(j=0;j<3;j++){
69 CONJG( a->e[j][i], b->e[i][j] );
74 template<
typename su3_matrix,
typename anti_hermitmat>
79 typeof(ah3->m00im) temp =
80 (m3->e[0][0].imag + m3->e[1][1].imag + m3->e[2][2].imag)*0.33333333333333333;
81 ah3->m00im = m3->e[0][0].imag - temp;
82 ah3->m11im = m3->e[1][1].imag - temp;
83 ah3->m22im = m3->e[2][2].imag - temp;
84 ah3->m01.real = (m3->e[0][1].real - m3->e[1][0].real)*0.5;
85 ah3->m02.real = (m3->e[0][2].real - m3->e[2][0].real)*0.5;
86 ah3->m12.real = (m3->e[1][2].real - m3->e[2][1].real)*0.5;
87 ah3->m01.imag = (m3->e[0][1].imag + m3->e[1][0].imag)*0.5;
88 ah3->m02.imag = (m3->e[0][2].imag + m3->e[2][0].imag)*0.5;
89 ah3->m12.imag = (m3->e[1][2].imag + m3->e[2][1].imag)*0.5;
93 template <
typename anti_hermitmat,
typename su3_matrix>
95 uncompress_anti_hermitian(anti_hermitmat *mat_antihermit,
98 typeof(mat_antihermit->m00im) temp1;
99 mat_su3->e[0][0].imag=mat_antihermit->m00im;
100 mat_su3->e[0][0].real=0.;
101 mat_su3->e[1][1].imag=mat_antihermit->m11im;
102 mat_su3->e[1][1].real=0.;
103 mat_su3->e[2][2].imag=mat_antihermit->m22im;
104 mat_su3->e[2][2].real=0.;
105 mat_su3->e[0][1].imag=mat_antihermit->m01.imag;
106 temp1=mat_antihermit->m01.real;
107 mat_su3->e[0][1].real=temp1;
108 mat_su3->e[1][0].real= -temp1;
109 mat_su3->e[1][0].imag=mat_antihermit->m01.imag;
110 mat_su3->e[0][2].imag=mat_antihermit->m02.imag;
111 temp1=mat_antihermit->m02.real;
112 mat_su3->e[0][2].real=temp1;
113 mat_su3->e[2][0].real= -temp1;
114 mat_su3->e[2][0].imag=mat_antihermit->m02.imag;
115 mat_su3->e[1][2].imag=mat_antihermit->m12.imag;
116 temp1=mat_antihermit->m12.real;
117 mat_su3->e[1][2].real=temp1;
118 mat_su3->e[2][1].real= -temp1;
119 mat_su3->e[2][1].imag=mat_antihermit->m12.imag;
122 template <typename su3_matrix, typename
Float>
124 scalar_mult_sub_su3_matrix(su3_matrix *a,su3_matrix *b,
Float s, su3_matrix *c)
129 c->e[i][j].real = a->e[i][j].real - s*b->e[i][j].real;
130 c->e[i][j].imag = a->e[i][j].imag - s*b->e[i][j].imag;
135 template <
typename su3_matrix,
typename Float>
137 scalar_mult_add_su3_matrix(su3_matrix *a,su3_matrix *b,
Float s, su3_matrix *c)
142 c->e[i][j].real = a->e[i][j].real + s*b->e[i][j].real;
143 c->e[i][j].imag = a->e[i][j].imag + s*b->e[i][j].imag;
148 template <
typename su3_matrix>
150 mult_su3_nn(su3_matrix* a, su3_matrix* b, su3_matrix* c)
153 typeof(a->e[0][0])
x,
y;
158 CMUL( a->e[i][k] , b->e[k][j] ,
y );
165 template<
typename su3_matrix>
167 mult_su3_an( su3_matrix *a, su3_matrix *b, su3_matrix *c )
170 typeof(a->e[0][0])
x,
y;
175 CMULJ_( a->e[k][i] , b->e[k][j],
y );
183 template<
typename su3_matrix>
185 mult_su3_na( su3_matrix *a, su3_matrix *b, su3_matrix *c )
188 typeof(a->e[0][0])
x,
y;
193 CMUL_J( a->e[i][k] , b->e[j][k] ,
y );
201 template <
typename su3_matrix>
208 printf(
"(%f %f)\t", a->e[i][j].real, a->e[i][j].imag);
231 int za = half_idx/
X1h;
232 int x1h = half_idx - za*
X1h;
246 int nbr_half_idx = ( (x4+2)*(
E[2]*
E[1]*
E[0]) + (x3+2)*(E[1]*E[0]) + (x2+2)*(E[0]) + (x1+2)) / 2;
248 x4 = (x4+dx4+
Z[3]) %
Z[3];
249 x3 = (x3+dx3+
Z[2]) %
Z[2];
250 x2 = (x2+dx2+
Z[1]) %
Z[1];
251 x1 = (x1+dx1+
Z[0]) %
Z[0];
253 int nbr_half_idx = (x4*(
Z[2]*
Z[1]*
Z[0]) + x3*(
Z[1]*
Z[0]) + x2*(Z[0]) + x1) / 2;
256 int oddBitChanged = (dx4+dx3+dx2+dx1)%2;
260 int ret = nbr_half_idx;
263 ret =
Vh_ex + nbr_half_idx;
265 ret =
Vh + nbr_half_idx;
275 template<
typename su3_matrix,
typename Float>
277 compute_path_product(su3_matrix* staple, su3_matrix** sitelink, su3_matrix** sitelink_ex_2d,
278 int* path,
int len,
Float loop_coeff,
int dir)
282 su3_matrix prev_matrix, curr_matrix, tmat;
287 memset(&curr_matrix, 0,
sizeof(curr_matrix));
289 curr_matrix.e[0][0].real = 1.0;
290 curr_matrix.e[1][1].real = 1.0;
291 curr_matrix.e[2][2].real = 1.0;
294 for(j=0; j < len;j++){
297 prev_matrix = curr_matrix;
308 su3_matrix* lnk = sitelink_ex_2d[lnkdir] + nbr_idx;
310 su3_matrix* lnk = sitelink[lnkdir]+ nbr_idx;
313 mult_su3_nn(&prev_matrix, lnk, &curr_matrix);
315 mult_su3_na(&prev_matrix, lnk, &curr_matrix);
329 scalar_mult_add_su3_matrix(staple + i , &tmat, loop_coeff, staple+i);
337 template <
typename su3_matrix,
typename anti_hermitmat,
typename Float>
339 update_mom(anti_hermitmat* momentum,
int dir, su3_matrix** sitelink,
340 su3_matrix* staple,
Float eb3)
348 su3_matrix* lnk = sitelink[dir] + i;
349 su3_matrix* stp = staple + i;
350 anti_hermitmat* mom = momentum + 4*i+dir;
352 mult_su3_na(lnk, stp, &tmat1);
353 uncompress_anti_hermitian(mom, &tmat2);
355 scalar_mult_sub_su3_matrix(&tmat2, &tmat1, eb3, &tmat3);
370 int **path_dir,
int*
length,
void* loop_coeff,
int num_paths)
379 fprintf(stderr,
"ERROR: malloc failed for staple in functon %s\n", __FUNCTION__);
385 for(i=0;i < num_paths; i++){
387 double* my_loop_coeff = (
double*)loop_coeff;
389 path_dir[i], length[i], my_loop_coeff[i], dir);
391 float* my_loop_coeff = (
float*)loop_coeff;
393 path_dir[i], length[i], my_loop_coeff[i], dir);
409 int ***path_dir,
int*
length,
void* loop_coeff,
int num_paths)
411 for(
int dir =0; dir < 4; dir++){
413 length, loop_coeff, num_paths);
void gauge_force_reference_dir(void *refMom, int dir, double eb3, void **sitelink, void **sitelink_ex_2d, QudaPrecision prec, int **path_dir, int *length, void *loop_coeff, int num_paths)
enum QudaPrecision_s QudaPrecision
void print_su3_matrix(su3_matrix *a)
int gf_neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
void make_anti_hermitian(su3_matrix *m3, anti_hermitmat *ah3)
int neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
void gauge_force_reference(void *refMom, double eb3, void **sitelink, void **sitelink_ex_2d, QudaPrecision prec, int ***path_dir, int *length, void *loop_coeff, int num_paths)
void su3_adjoint(su3_matrix *a, su3_matrix *b)
FloatingPoint< float > Float
for(int s=0;s< param.Ls;s++)
void * memset(void *s, int c, size_t n)
Main header file for the QUDA library.
#define GOES_FORWARDS(dir)