19 #define CADD(a, b, c) \
21 (c).real = (a).real + (b).real; \
22 (c).imag = (a).imag + (b).imag; \
24 #define CMUL(a, b, c) \
26 (c).real = (a).real * (b).real - (a).imag * (b).imag; \
27 (c).imag = (a).real * (b).imag + (a).imag * (b).real; \
31 (a).real += (b).real; \
32 (a).imag += (b).imag; \
36 #define CMULJ_(a, b, c) \
38 (c).real = (a).real * (b).real + (a).imag * (b).imag; \
39 (c).imag = (a).real * (b).imag - (a).imag * (b).real; \
43 #define CMUL_J(a, b, c) \
45 (c).real = (a).real * (b).real + (a).imag * (b).imag; \
46 (c).imag = (a).imag * (b).real - (a).real * (b).imag; \
51 (b).real = (a).real; \
52 (b).imag = -(a).imag; \
95 for (
int i = 0; i < 3; i++) {
96 for (
int j = 0; j < 3; j++) {
CONJG(a->
e[j][i], b->
e[i][j]); }
102 auto temp = (m3->
e[0][0].imag + m3->
e[1][1].imag + m3->
e[2][2].imag) * 0.33333333333333333;
103 ah3->m00im = m3->
e[0][0].imag - temp;
104 ah3->m11im = m3->
e[1][1].imag - temp;
105 ah3->m22im = m3->
e[2][2].imag - temp;
106 ah3->m01.real = (m3->
e[0][1].real - m3->
e[1][0].real) * 0.5;
107 ah3->m02.real = (m3->
e[0][2].real - m3->
e[2][0].real) * 0.5;
108 ah3->m12.real = (m3->
e[1][2].real - m3->
e[2][1].real) * 0.5;
109 ah3->m01.imag = (m3->
e[0][1].imag + m3->
e[1][0].imag) * 0.5;
110 ah3->m02.imag = (m3->
e[0][2].imag + m3->
e[2][0].imag) * 0.5;
111 ah3->m12.imag = (m3->
e[1][2].imag + m3->
e[2][1].imag) * 0.5;
114 template <
typename anti_hermitmat,
typename su3_matrix>
115 static void uncompress_anti_hermitian(anti_hermitmat *mat_antihermit,
su3_matrix *mat_su3)
117 typename std::remove_reference<decltype(mat_antihermit->m00im)>::type temp1;
118 mat_su3->
e[0][0].imag = mat_antihermit->m00im;
119 mat_su3->
e[0][0].real = 0.;
120 mat_su3->
e[1][1].imag = mat_antihermit->m11im;
121 mat_su3->
e[1][1].real = 0.;
122 mat_su3->
e[2][2].imag = mat_antihermit->m22im;
123 mat_su3->
e[2][2].real = 0.;
124 mat_su3->
e[0][1].imag = mat_antihermit->m01.imag;
125 temp1 = mat_antihermit->m01.real;
126 mat_su3->
e[0][1].real = temp1;
127 mat_su3->
e[1][0].real = -temp1;
128 mat_su3->
e[1][0].imag = mat_antihermit->m01.imag;
129 mat_su3->
e[0][2].imag = mat_antihermit->m02.imag;
130 temp1 = mat_antihermit->m02.real;
131 mat_su3->
e[0][2].real = temp1;
132 mat_su3->
e[2][0].real = -temp1;
133 mat_su3->
e[2][0].imag = mat_antihermit->m02.imag;
134 mat_su3->
e[1][2].imag = mat_antihermit->m12.imag;
135 temp1 = mat_antihermit->m12.real;
136 mat_su3->
e[1][2].real = temp1;
137 mat_su3->
e[2][1].real = -temp1;
138 mat_su3->
e[2][1].imag = mat_antihermit->m12.imag;
141 template <
typename su3_matrix,
typename Float>
144 for (
int i = 0; i < 3; i++) {
145 for (
int j = 0; j < 3; j++) {
146 c->
e[i][j].real = a->
e[i][j].real - s * b->
e[i][j].real;
147 c->
e[i][j].imag = a->
e[i][j].imag - s * b->
e[i][j].imag;
152 template <
typename su3_matrix,
typename Float>
155 for (
int i = 0; i < 3; i++) {
156 for (
int j = 0; j < 3; j++) {
157 c->
e[i][j].real = a->
e[i][j].real + s * b->
e[i][j].real;
158 c->
e[i][j].imag = a->
e[i][j].imag + s * b->
e[i][j].imag;
165 typename std::remove_reference<decltype(a->
e[0][0])>::type x, y;
166 for (
int i = 0; i < 3; i++) {
167 for (
int j = 0; j < 3; j++) {
168 x.real = x.imag = 0.0;
169 for (
int k = 0; k < 3; k++) {
170 CMUL(a->
e[i][k], b->
e[k][j], y);
180 typename std::remove_reference<decltype(a->
e[0][0])>::type x, y;
181 for (
int i = 0; i < 3; i++) {
182 for (
int j = 0; j < 3; j++) {
183 x.real = x.imag = 0.0;
184 for (
int k = 0; k < 3; k++) {
195 typename std::remove_reference<decltype(a->
e[0][0])>::type x, y;
196 for (
int i = 0; i < 3; i++) {
197 for (
int j = 0; j < 3; j++) {
198 x.real = x.imag = 0.0;
199 for (
int k = 0; k < 3; k++) {
210 for (
int i = 0; i < 3; i++) {
211 for (
int j = 0; j < 3; j++) { printf(
"(%f %f)\t", a->
e[i][j].real, a->
e[i][j].imag); }
230 int za = half_idx / X1h;
231 int x1h = half_idx - za * X1h;
233 int x2 = za - zb * X2;
235 int x3 = zb - x4 * X3;
236 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
237 int x1 = 2 * x1h + x1odd;
245 int nbr_half_idx = ((x4 + 2) * (
E[2] *
E[1] *
E[0]) + (x3 + 2) * (
E[1] *
E[0]) + (x2 + 2) * (
E[0]) + (x1 + 2)) / 2;
247 x4 = (x4 + dx4 +
Z[3]) %
Z[3];
248 x3 = (x3 + dx3 +
Z[2]) %
Z[2];
249 x2 = (x2 + dx2 +
Z[1]) %
Z[1];
250 x1 = (x1 + dx1 +
Z[0]) %
Z[0];
252 int nbr_half_idx = (x4 * (
Z[2] *
Z[1] *
Z[0]) + x3 * (
Z[1] *
Z[0]) + x2 * (
Z[0]) + x1) / 2;
255 int oddBitChanged = (dx4 + dx3 + dx2 + dx1) % 2;
256 if (oddBitChanged) { oddBit = 1 - oddBit; }
257 int ret = nbr_half_idx;
260 ret =
Vh_ex + nbr_half_idx;
262 ret =
Vh + nbr_half_idx;
270 template <
typename su3_matrix,
typename Float>
272 int len,
Float loop_coeff,
int dir)
277 for (
int i = 0; i <
V; i++) {
278 memset(dx, 0,
sizeof(dx));
279 memset(&curr_matrix, 0,
sizeof(curr_matrix));
281 curr_matrix.
e[0][0].real = 1.0;
282 curr_matrix.
e[1][1].real = 1.0;
283 curr_matrix.
e[2][2].real = 1.0;
286 for (
int j = 0; j < len; j++) {
289 prev_matrix = curr_matrix;
300 su3_matrix *lnk = sitelink_ex_2d[lnkdir] + nbr_idx;
305 mult_su3_nn(&prev_matrix, lnk, &curr_matrix);
307 mult_su3_na(&prev_matrix, lnk, &curr_matrix);
319 scalar_mult_add_su3_matrix(staple + i, &tmat, loop_coeff, staple + i);
323 template <
typename su3_matrix,
typename anti_hermitmat,
typename Float>
326 for (
int i = 0; i <
V; i++) {
333 anti_hermitmat *mom = momentum + 4 * i + dir;
335 mult_su3_na(lnk, stp, &tmat1);
336 uncompress_anti_hermitian(mom, &tmat2);
338 scalar_mult_sub_su3_matrix(&tmat2, &tmat1, eb3, &tmat3);
353 if (staple == NULL) {
354 fprintf(stderr,
"ERROR: malloc failed for staple in functon %s\n", __FUNCTION__);
360 for (
int i = 0; i < num_paths; i++) {
362 double *my_loop_coeff = (
double *)loop_coeff;
364 length[i], my_loop_coeff[i], dir);
366 float *my_loop_coeff = (
float *)loop_coeff;
368 length[i], my_loop_coeff[i], dir);
382 void *loop_coeff,
int num_paths)
385 int R[] = {2, 2, 2, 2};
393 for (
int dir = 0; dir < 4; dir++) {
395 loop_coeff, num_paths);
void * memset(void *s, int c, size_t n)
enum QudaPrecision_s QudaPrecision
void su3_adjoint(su3_matrix *a, su3_matrix *b)
int neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
void gauge_force_reference(void *refMom, double eb3, void **sitelink, QudaPrecision prec, int ***path_dir, int *length, void *loop_coeff, int num_paths)
void gauge_force_reference_dir(void *refMom, int dir, double eb3, void **sitelink, void **sitelink_ex_2d, QudaPrecision prec, int **path_dir, int *length, void *loop_coeff, int num_paths)
void make_anti_hermitian(su3_matrix *m3, anti_hermitmat *ah3)
int gf_neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
void print_su3_matrix(su3_matrix *a)
#define GOES_FORWARDS(dir)
cudaGaugeField * createExtendedGauge(cudaGaugeField &in, const int *R, TimeProfile &profile, bool redundant_comms=false, QudaReconstructType recon=QUDA_RECONSTRUCT_INVALID)
FloatingPoint< float > Float
Main header file for the QUDA library.
QudaGaugeParam newQudaGaugeParam(void)
QudaGaugeFieldOrder gauge_order
std::complex< real > e[3][3]
void setGaugeParam(QudaGaugeParam &gauge_param)