QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
gauge_force_reference.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <string.h>
5 #include <type_traits>
6 
7 #include "quda.h"
8 #include "test_util.h"
9 #include "misc.h"
10 #include "gauge_force_reference.h"
11 
12 extern int Z[4];
13 extern int V;
14 extern int Vh;
15 extern int Vh_ex;
16 extern int E[4];
17 
18 
19 #define CADD(a,b,c) { (c).real = (a).real + (b).real; \
20  (c).imag = (a).imag + (b).imag; }
21 #define CMUL(a,b,c) { (c).real = (a).real*(b).real - (a).imag*(b).imag; \
22  (c).imag = (a).real*(b).imag + (a).imag*(b).real; }
23 #define CSUM(a,b) { (a).real += (b).real; (a).imag += (b).imag; }
24 
25 /* c = a* * b */
26 #define CMULJ_(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \
27  (c).imag = (a).real*(b).imag - (a).imag*(b).real; }
28 
29 /* c = a * b* */
30 #define CMUL_J(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \
31  (c).imag = (a).imag*(b).real - (a).real*(b).imag; }
32 
33 #define CONJG(a,b) { (b).real = (a).real; (b).imag = -(a).imag; }
34 
35 typedef struct {
36  float real;
37  float imag;
38 } fcomplex;
39 
40 /* specific for double complex */
41 typedef struct {
42  double real;
43  double imag;
44 } dcomplex;
45 
46 typedef struct { fcomplex e[3][3]; } fsu3_matrix;
47 typedef struct { fcomplex c[3]; } fsu3_vector;
48 typedef struct { dcomplex e[3][3]; } dsu3_matrix;
49 typedef struct { dcomplex c[3]; } dsu3_vector;
50 
51 typedef struct {
52  fcomplex m01,m02,m12;
53  float m00im,m11im,m22im;
54  float space;
56 
57 typedef struct {
58  dcomplex m01,m02,m12;
59  double m00im,m11im,m22im;
60  double space;
62 
63 extern int neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1);
64 
65 template<typename su3_matrix>
67 {
68  int i,j;
69  for(i=0;i<3;i++)for(j=0;j<3;j++){
70  CONJG( a->e[j][i], b->e[i][j] );
71  }
72 }
73 
74 
75 template<typename su3_matrix, typename anti_hermitmat>
76 void
77 make_anti_hermitian( su3_matrix *m3, anti_hermitmat *ah3 )
78 {
79  auto temp =
80  (m3->e[0][0].imag + m3->e[1][1].imag + m3->e[2][2].imag)*0.33333333333333333;
81  ah3->m00im = m3->e[0][0].imag - temp;
82  ah3->m11im = m3->e[1][1].imag - temp;
83  ah3->m22im = m3->e[2][2].imag - temp;
84  ah3->m01.real = (m3->e[0][1].real - m3->e[1][0].real)*0.5;
85  ah3->m02.real = (m3->e[0][2].real - m3->e[2][0].real)*0.5;
86  ah3->m12.real = (m3->e[1][2].real - m3->e[2][1].real)*0.5;
87  ah3->m01.imag = (m3->e[0][1].imag + m3->e[1][0].imag)*0.5;
88  ah3->m02.imag = (m3->e[0][2].imag + m3->e[2][0].imag)*0.5;
89  ah3->m12.imag = (m3->e[1][2].imag + m3->e[2][1].imag)*0.5;
90 
91 }
92 
93 template <typename anti_hermitmat, typename su3_matrix>
94 static void
95 uncompress_anti_hermitian(anti_hermitmat *mat_antihermit,
96  su3_matrix *mat_su3 )
97 {
98  typename std::remove_reference<decltype(mat_antihermit->m00im)>::type temp1;
99  mat_su3->e[0][0].imag=mat_antihermit->m00im;
100  mat_su3->e[0][0].real=0.;
101  mat_su3->e[1][1].imag=mat_antihermit->m11im;
102  mat_su3->e[1][1].real=0.;
103  mat_su3->e[2][2].imag=mat_antihermit->m22im;
104  mat_su3->e[2][2].real=0.;
105  mat_su3->e[0][1].imag=mat_antihermit->m01.imag;
106  temp1=mat_antihermit->m01.real;
107  mat_su3->e[0][1].real=temp1;
108  mat_su3->e[1][0].real= -temp1;
109  mat_su3->e[1][0].imag=mat_antihermit->m01.imag;
110  mat_su3->e[0][2].imag=mat_antihermit->m02.imag;
111  temp1=mat_antihermit->m02.real;
112  mat_su3->e[0][2].real=temp1;
113  mat_su3->e[2][0].real= -temp1;
114  mat_su3->e[2][0].imag=mat_antihermit->m02.imag;
115  mat_su3->e[1][2].imag=mat_antihermit->m12.imag;
116  temp1=mat_antihermit->m12.real;
117  mat_su3->e[1][2].real=temp1;
118  mat_su3->e[2][1].real= -temp1;
119  mat_su3->e[2][1].imag=mat_antihermit->m12.imag;
120 }
121 
122 template <typename su3_matrix, typename Float>
123 static void
125 {
126  int i,j;
127  for(i=0;i<3;i++){
128  for(j=0;j<3;j++){
129  c->e[i][j].real = a->e[i][j].real - s*b->e[i][j].real;
130  c->e[i][j].imag = a->e[i][j].imag - s*b->e[i][j].imag;
131  }
132  }
133 }
134 
135 template <typename su3_matrix, typename Float>
136 static void
138 {
139  int i,j;
140  for(i=0;i<3;i++){
141  for(j=0;j<3;j++){
142  c->e[i][j].real = a->e[i][j].real + s*b->e[i][j].real;
143  c->e[i][j].imag = a->e[i][j].imag + s*b->e[i][j].imag;
144  }
145  }
146 }
147 
148 template <typename su3_matrix>
149 static void
151 {
152  int i,j,k;
153  typename std::remove_reference<decltype(a->e[0][0])>::type x,y;
154  for(i=0;i<3;i++){
155  for(j=0;j<3;j++){
156  x.real=x.imag=0.0;
157  for(k=0;k<3;k++){
158  CMUL( a->e[i][k] , b->e[k][j] , y );
159  CSUM( x , y );
160  }
161  c->e[i][j] = x;
162  }
163  }
164 }
165 template<typename su3_matrix>
166 static void
168 {
169  int i,j,k;
170  typename std::remove_reference<decltype(a->e[0][0])>::type x,y;
171  for(i=0;i<3;i++){
172  for(j=0;j<3;j++){
173  x.real=x.imag=0.0;
174  for(k=0;k<3;k++){
175  CMULJ_( a->e[k][i] , b->e[k][j], y );
176  CSUM( x , y );
177  }
178  c->e[i][j] = x;
179  }
180  }
181 }
182 
183 template<typename su3_matrix>
184 static void
186 {
187  int i,j,k;
188  typename std::remove_reference<decltype(a->e[0][0])>::type x,y;
189  for(i=0;i<3;i++){
190  for(j=0;j<3;j++){
191  x.real=x.imag=0.0;
192  for(k=0;k<3;k++){
193  CMUL_J( a->e[i][k] , b->e[j][k] , y );
194  CSUM( x , y );
195  }
196  c->e[i][j] = x;
197  }
198  }
199 }
200 
201 template < typename su3_matrix>
202 void
204 {
205  int i, j;
206  for(i=0;i < 3; i++){
207  for(j=0;j < 3;j++){
208  printf("(%f %f)\t", a->e[i][j].real, a->e[i][j].imag);
209  }
210  printf("\n");
211  }
212 
213 }
214 
215 
216 int
217 gf_neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
218 {
219  int oddBit = 0;
220  int half_idx = i;
221  if (i >= Vh){
222  oddBit =1;
223  half_idx = i - Vh;
224  }
225  int X1 = Z[0];
226  int X2 = Z[1];
227  int X3 = Z[2];
228  //int X4 = Z[3];
229  int X1h =X1/2;
230 
231  int za = half_idx/X1h;
232  int x1h = half_idx - za*X1h;
233  int zb = za/X2;
234  int x2 = za - zb*X2;
235  int x4 = zb/X3;
236  int x3 = zb - x4*X3;
237  int x1odd = (x2 + x3 + x4 + oddBit) & 1;
238  int x1 = 2*x1h + x1odd;
239 
240 #ifdef MULTI_GPU
241  x4 = x4+dx4;
242  x3 = x3+dx3;
243  x2 = x2+dx2;
244  x1 = x1+dx1;
245 
246  int nbr_half_idx = ( (x4+2)*(E[2]*E[1]*E[0]) + (x3+2)*(E[1]*E[0]) + (x2+2)*(E[0]) + (x1+2)) / 2;
247 #else
248  x4 = (x4+dx4+Z[3]) % Z[3];
249  x3 = (x3+dx3+Z[2]) % Z[2];
250  x2 = (x2+dx2+Z[1]) % Z[1];
251  x1 = (x1+dx1+Z[0]) % Z[0];
252 
253  int nbr_half_idx = (x4*(Z[2]*Z[1]*Z[0]) + x3*(Z[1]*Z[0]) + x2*(Z[0]) + x1) / 2;
254 #endif
255 
256  int oddBitChanged = (dx4+dx3+dx2+dx1)%2;
257  if (oddBitChanged){
258  oddBit = 1 - oddBit;
259  }
260  int ret = nbr_half_idx;
261  if (oddBit){
262 #ifdef MULTI_GPU
263  ret = Vh_ex + nbr_half_idx;
264 #else
265  ret = Vh + nbr_half_idx;
266 #endif
267  }
268 
269  return ret;
270 }
271 
272 
273 
274 //this functon compute one path for all lattice sites
275 template<typename su3_matrix, typename Float>
276 static void
277 compute_path_product(su3_matrix* staple, su3_matrix** sitelink, su3_matrix** sitelink_ex_2d,
278  int* path, int len, Float loop_coeff, int dir)
279 {
280  int i, j;
281 
282  su3_matrix prev_matrix, curr_matrix, tmat;
283  int dx[4];
284 
285  for(i=0;i<V;i++){
286  memset(dx,0, sizeof(dx));
287  memset(&curr_matrix, 0, sizeof(curr_matrix));
288 
289  curr_matrix.e[0][0].real = 1.0;
290  curr_matrix.e[1][1].real = 1.0;
291  curr_matrix.e[2][2].real = 1.0;
292 
293  dx[dir] =1;
294  for(j=0; j < len;j++){
295  int lnkdir;
296 
297  prev_matrix = curr_matrix;
298  if (GOES_FORWARDS(path[j])){
299  //dx[path[j]] +=1;
300  lnkdir = path[j];
301  }else{
302  dx[OPP_DIR(path[j])] -=1;
303  lnkdir=OPP_DIR(path[j]);
304  }
305 
306  int nbr_idx = gf_neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]);
307 #ifdef MULTI_GPU
308  su3_matrix* lnk = sitelink_ex_2d[lnkdir] + nbr_idx;
309 #else
310  su3_matrix* lnk = sitelink[lnkdir]+ nbr_idx;
311 #endif
312  if (GOES_FORWARDS(path[j])){
313  mult_su3_nn(&prev_matrix, lnk, &curr_matrix);
314  }else{
315  mult_su3_na(&prev_matrix, lnk, &curr_matrix);
316  }
317 
318  if (GOES_FORWARDS(path[j])){
319  dx[path[j]] +=1;
320  }else{
321  //we already substract one in the code above
322  }
323 
324 
325  }//j
326 
327  su3_adjoint(&curr_matrix, &tmat );
328 
329  scalar_mult_add_su3_matrix(staple + i , &tmat, loop_coeff, staple+i);
330 
331  }//i
332 
333  return;
334 }
335 
336 
337 template <typename su3_matrix, typename anti_hermitmat, typename Float>
338 static void
339 update_mom(anti_hermitmat* momentum, int dir, su3_matrix** sitelink,
340  su3_matrix* staple, Float eb3)
341 {
342  int i;
343  for(i=0;i <V; i++){
344  su3_matrix tmat1;
345  su3_matrix tmat2;
346  su3_matrix tmat3;
347 
348  su3_matrix* lnk = sitelink[dir] + i;
349  su3_matrix* stp = staple + i;
350  anti_hermitmat* mom = momentum + 4*i+dir;
351 
352  mult_su3_na(lnk, stp, &tmat1);
353  uncompress_anti_hermitian(mom, &tmat2);
354 
355  scalar_mult_sub_su3_matrix(&tmat2, &tmat1, eb3, &tmat3);
356  make_anti_hermitian(&tmat3, mom);
357 
358  }
359 
360 }
361 
362 
363 
364 /* This function only computes one direction @dir
365  *
366  */
367 
368 void
369 gauge_force_reference_dir(void* refMom, int dir, double eb3, void** sitelink, void** sitelink_ex_2d, QudaPrecision prec,
370  int **path_dir, int* length, void* loop_coeff, int num_paths)
371 {
372  int i;
373 
374  void* staple;
375  int gSize = prec;
376 
377  staple = malloc(V* gaugeSiteSize* gSize);
378  if (staple == NULL){
379  fprintf(stderr, "ERROR: malloc failed for staple in functon %s\n", __FUNCTION__);
380  exit(1);
381  }
382 
383  memset(staple, 0, V*gaugeSiteSize* gSize);
384 
385  for(i=0;i < num_paths; i++){
386  if (prec == QUDA_DOUBLE_PRECISION){
387  double* my_loop_coeff = (double*)loop_coeff;
388  compute_path_product((dsu3_matrix*)staple, (dsu3_matrix**)sitelink, (dsu3_matrix**)sitelink_ex_2d,
389  path_dir[i], length[i], my_loop_coeff[i], dir);
390  }else{
391  float* my_loop_coeff = (float*)loop_coeff;
392  compute_path_product((fsu3_matrix*)staple, (fsu3_matrix**)sitelink, (fsu3_matrix**)sitelink_ex_2d,
393  path_dir[i], length[i], my_loop_coeff[i], dir);
394  }
395  }
396 
397  if (prec == QUDA_DOUBLE_PRECISION){
398  update_mom((danti_hermitmat*) refMom, dir, (dsu3_matrix**)sitelink, (dsu3_matrix*)staple, (double)eb3);
399  }else{
400  update_mom((fanti_hermitmat*)refMom, dir, (fsu3_matrix**)sitelink, (fsu3_matrix*)staple, (float)eb3);
401  }
402 
403  free(staple);
404 }
405 
406 
407 void
408 gauge_force_reference(void* refMom, double eb3, void** sitelink, void** sitelink_ex_2d, QudaPrecision prec,
409  int ***path_dir, int* length, void* loop_coeff, int num_paths)
410 {
411  for(int dir =0; dir < 4; dir++){
412  gauge_force_reference_dir(refMom, dir, eb3, sitelink, sitelink_ex_2d, prec, path_dir[dir],
413  length, loop_coeff, num_paths);
414 
415  }
416 
417 }
void gauge_force_reference_dir(void *refMom, int dir, double eb3, void **sitelink, void **sitelink_ex_2d, QudaPrecision prec, int **path_dir, int *length, void *loop_coeff, int num_paths)
static size_t gSize
int Vh
Definition: test_util.cpp:28
enum QudaPrecision_s QudaPrecision
#define CSUM(a, b)
static int X2
Definition: face_gauge.cpp:42
void print_su3_matrix(su3_matrix *a)
int gf_neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
void make_anti_hermitian(su3_matrix *m3, anti_hermitmat *ah3)
#define CMUL_J(a, b, c)
int neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
Definition: test_util.cpp:556
int Z[4]
Definition: test_util.cpp:26
int E[4]
Definition: test_util.cpp:35
int length[]
#define GOES_FORWARDS(dir)
Definition: misc.h:86
static void scalar_mult_add_su3_matrix(su3_matrix *a, su3_matrix *b, Float s, su3_matrix *c)
void gauge_force_reference(void *refMom, double eb3, void **sitelink, void **sitelink_ex_2d, QudaPrecision prec, int ***path_dir, int *length, void *loop_coeff, int num_paths)
void su3_adjoint(su3_matrix *a, su3_matrix *b)
cpuGaugeField * refMom
int V
Definition: test_util.cpp:27
static void uncompress_anti_hermitian(anti_hermitmat *mat_antihermit, su3_matrix *mat_su3)
int Vh_ex
Definition: test_util.cpp:36
static void compute_path_product(su3_matrix *staple, su3_matrix **sitelink, su3_matrix **sitelink_ex_2d, int *path, int len, Float loop_coeff, int dir)
static void mult_su3_nn(su3_matrix *a, su3_matrix *b, su3_matrix *c)
static void update_mom(anti_hermitmat *momentum, int dir, su3_matrix **sitelink, su3_matrix *staple, Float eb3)
static void scalar_mult_sub_su3_matrix(su3_matrix *a, su3_matrix *b, Float s, su3_matrix *c)
static int X3
Definition: face_gauge.cpp:42
static int X1
Definition: face_gauge.cpp:42
std::complex< real > e[3][3]
void * memset(void *s, int c, size_t n)
#define OPP_DIR(dir)
Definition: misc.h:85
#define CONJG(a, b)
Main header file for the QUDA library.
#define CMUL(a, b, c)
__shared__ float s[]
static void mult_su3_an(su3_matrix *a, su3_matrix *b, su3_matrix *c)
static void mult_su3_na(su3_matrix *a, su3_matrix *b, su3_matrix *c)
#define CMULJ_(a, b, c)
QudaPrecision prec
Definition: test_util.cpp:1608
#define gaugeSiteSize
Definition: face_gauge.cpp:34