QUDA  v1.1.0
A library for QCD on GPUs
hisq_force_reference.cpp
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <string.h>
5 #include <type_traits>
6 
7 #include <quda.h>
8 #include <host_utils.h>
9 #include <misc.h>
10 #include <hisq_force_reference.h>
11 
12 extern int Z[4];
13 extern int V;
14 extern int Vh;
15 
16 
17 #define CADD(a,b,c) { (c).real = (a).real + (b).real; \
18  (c).imag = (a).imag + (b).imag; }
19 #define CMUL(a,b,c) { (c).real = (a).real*(b).real - (a).imag*(b).imag; \
20  (c).imag = (a).real*(b).imag + (a).imag*(b).real; }
21 #define CSUM(a,b) { (a).real += (b).real; (a).imag += (b).imag; }
22 
23 /* c = a* * b */
24 #define CMULJ_(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \
25  (c).imag = (a).real*(b).imag - (a).imag*(b).real; }
26 
27 /* c = a * b* */
28 #define CMUL_J(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \
29  (c).imag = (a).imag*(b).real - (a).real*(b).imag; }
30 
31 #define CONJG(a,b) { (b).real = (a).real; (b).imag = -(a).imag; }
32 
33 typedef struct {
34  float real;
35  float imag;
36 } fcomplex;
37 
38 /* specific for double complex */
39 typedef struct {
40  double real;
41  double imag;
42 } dcomplex;
43 
44 typedef struct { fcomplex e[3][3]; } fsu3_matrix;
45 typedef struct { fcomplex c[3]; } fsu3_vector;
46 typedef struct { dcomplex e[3][3]; } dsu3_matrix;
47 typedef struct { dcomplex c[3]; } dsu3_vector;
48 
49 typedef struct {
50  fcomplex m01,m02,m12;
51  float m00im,m11im,m22im;
52  float space;
54 
55 typedef struct {
56  dcomplex m01,m02,m12;
57  double m00im,m11im,m22im;
58  double space;
60 
61 typedef struct { fsu3_vector h[2]; } fhalf_wilson_vector;
62 typedef struct { dsu3_vector h[2]; } dhalf_wilson_vector;
63 
64 
65 template<typename su3_matrix>
66 su3_matrix* get_su3_matrix(int gauge_order, su3_matrix* p, int idx, int dir)
67 {
69  return (p + 4*idx + dir);
70  }else if(gauge_order == QUDA_QDP_GAUGE_ORDER){ // This is nasty!
71  su3_matrix* data = ((su3_matrix**)p)[dir];
72  return data + idx;
73  }else{
74  errorQuda("get_su3_matrix: unsupported ordering scheme!\n");
75  }
76  return NULL;
77 }
78 
79 template<typename su3_matrix>
80 static void
82 {
83  int i,j;
84  for(i=0;i<3;i++)for(j=0;j<3;j++){
85  CONJG( a->e[j][i], b->e[i][j] );
86  }
87 }
88 
89 template<typename su3_matrix>
90 static void
91 adjoint_su3_matrix( su3_matrix *a)
92 {
93  su3_matrix b;
94  int i,j;
95  for(i=0;i<3;i++)for(j=0;j<3;j++){
96  CONJG( a->e[j][i], b.e[i][j] );
97  }
98 
99  *a = b;
100 }
101 
102 template<typename su3_matrix, typename anti_hermitmat>
103 static void
104 make_anti_hermitian( su3_matrix *m3, anti_hermitmat *ah3 )
105 {
106  auto temp = (m3->e[0][0].imag + m3->e[1][1].imag + m3->e[2][2].imag)*0.33333333333333333;
107  ah3->m00im = m3->e[0][0].imag - temp;
108  ah3->m11im = m3->e[1][1].imag - temp;
109  ah3->m22im = m3->e[2][2].imag - temp;
110  ah3->m01.real = (m3->e[0][1].real - m3->e[1][0].real)*0.5;
111  ah3->m02.real = (m3->e[0][2].real - m3->e[2][0].real)*0.5;
112  ah3->m12.real = (m3->e[1][2].real - m3->e[2][1].real)*0.5;
113  ah3->m01.imag = (m3->e[0][1].imag + m3->e[1][0].imag)*0.5;
114  ah3->m02.imag = (m3->e[0][2].imag + m3->e[2][0].imag)*0.5;
115  ah3->m12.imag = (m3->e[1][2].imag + m3->e[2][1].imag)*0.5;
116 
117 }
118 
119 template <typename anti_hermitmat, typename su3_matrix>
120 static void
121 uncompress_anti_hermitian(anti_hermitmat *mat_antihermit,
122  su3_matrix *mat_su3 )
123 {
124  typename std::remove_reference<decltype(mat_antihermit->m00im)>::type temp1;
125  mat_su3->e[0][0].imag=mat_antihermit->m00im;
126  mat_su3->e[0][0].real=0.;
127  mat_su3->e[1][1].imag=mat_antihermit->m11im;
128  mat_su3->e[1][1].real=0.;
129  mat_su3->e[2][2].imag=mat_antihermit->m22im;
130  mat_su3->e[2][2].real=0.;
131  mat_su3->e[0][1].imag=mat_antihermit->m01.imag;
132  temp1=mat_antihermit->m01.real;
133  mat_su3->e[0][1].real=temp1;
134  mat_su3->e[1][0].real= -temp1;
135  mat_su3->e[1][0].imag=mat_antihermit->m01.imag;
136  mat_su3->e[0][2].imag=mat_antihermit->m02.imag;
137  temp1=mat_antihermit->m02.real;
138  mat_su3->e[0][2].real=temp1;
139  mat_su3->e[2][0].real= -temp1;
140  mat_su3->e[2][0].imag=mat_antihermit->m02.imag;
141  mat_su3->e[1][2].imag=mat_antihermit->m12.imag;
142  temp1=mat_antihermit->m12.real;
143  mat_su3->e[1][2].real=temp1;
144  mat_su3->e[2][1].real= -temp1;
145  mat_su3->e[2][1].imag=mat_antihermit->m12.imag;
146 }
147 
148 template <typename su3_matrix, typename Float>
149 static void
150 scalar_mult_sub_su3_matrix(su3_matrix *a,su3_matrix *b, Float s, su3_matrix *c)
151 {
152  int i,j;
153  for(i=0;i<3;i++){
154  for(j=0;j<3;j++){
155  c->e[i][j].real = a->e[i][j].real - s*b->e[i][j].real;
156  c->e[i][j].imag = a->e[i][j].imag - s*b->e[i][j].imag;
157  }
158  }
159 }
160 
161 template <typename su3_matrix, typename Float>
162 static void
163 scalar_mult_add_su3_matrix(su3_matrix *a,su3_matrix *b, Float s, su3_matrix *c)
164 {
165  int i,j;
166  for(i=0;i<3;i++){
167  for(j=0;j<3;j++){
168  c->e[i][j].real = a->e[i][j].real + s*b->e[i][j].real;
169  c->e[i][j].imag = a->e[i][j].imag + s*b->e[i][j].imag;
170  }
171  }
172 }
173 template <typename su3_matrix, typename Float>
174 static void
175 scale_su3_matrix(su3_matrix *a, Float s)
176 {
177  int i,j;
178  for(i=0;i<3;i++){
179  for(j=0;j<3;j++){
180  a->e[i][j].real = a->e[i][j].real * s;
181  a->e[i][j].imag = a->e[i][j].imag * s;
182  }
183  }
184 }
185 
186 template<typename su3_matrix, typename su3_vector>
187 static void
188 mult_su3_mat_vec( su3_matrix *a, su3_vector *b, su3_vector *c )
189 {
190  int i,j;
191  typename std::remove_reference<decltype(a->e[0][0])>::type x,y;
192  for(i=0;i<3;i++){
193  x.real=x.imag=0.0;
194  for(j=0;j<3;j++){
195  CMUL( a->e[i][j] , b->c[j] , y );
196  CSUM( x , y );
197  }
198  c->c[i]=x;
199  }
200 }
201 template<typename su3_matrix, typename su3_vector>
202 static void
203 mult_adj_su3_mat_vec( su3_matrix *a, su3_vector *b, su3_vector *c )
204 {
205  int i,j;
206  typename std::remove_reference<decltype(a->e[0][0])>::type x,y,z;
207  for(i=0;i<3;i++){
208  x.real=x.imag=0.0;
209  for(j=0;j<3;j++){
210  CONJG( a->e[j][i], z );
211  CMUL( z , b->c[j], y );
212  CSUM( x , y );
213  }
214  c->c[i] = x;
215  }
216 }
217 
218 template<typename su3_vector, typename su3_matrix>
219 static void
220 su3_projector( su3_vector *a, su3_vector *b, su3_matrix *c )
221 {
222  int i,j;
223  for(i=0;i<3;i++)for(j=0;j<3;j++){
224  CMUL_J( a->c[i], b->c[j], c->e[i][j] );
225  }
226 }
227 
228 template<typename su3_vector, typename Real>
229 static void
230 scalar_mult_add_su3_vector(su3_vector *a, su3_vector *b, Real s,
231  su3_vector *c)
232 {
233  int i;
234  for(i=0;i<3;i++){
235  c->c[i].real = a->c[i].real + s*b->c[i].real;
236  c->c[i].imag = a->c[i].imag + s*b->c[i].imag;
237  }
238 }
239 
240 
241 
242 template < typename su3_matrix>
243 static void
245 {
246  int i, j;
247  for(i=0;i < 3; i++){
248  for(j=0;j < 3;j++){
249  printf("(%f %f)\t", a->e[i][j].real, a->e[i][j].imag);
250  }
251  printf("\n");
252  }
253 
254 }
255 
256 
257 
258 // Add a matrix multiplication function
259 template<typename su3_matrix>
260 static void
261 matrix_mult_nn(su3_matrix* a, su3_matrix* b, su3_matrix* c){
262  // c = a*b
263  typename std::remove_reference<decltype(c->e[0][0])>::type x;
264  for(int i=0; i<3; i++){
265  for(int j=0; j<3; j++){
266  c->e[i][j].real = 0.;
267  c->e[i][j].imag = 0.;
268  for(int k=0; k<3; k++){
269  CMUL(a->e[i][k],b->e[k][j],x);
270  c->e[i][j].real += x.real;
271  c->e[i][j].imag += x.imag;
272  }
273  }
274  }
275  return;
276 }
277 
278 
279 template<typename su3_matrix>
280 static void
281 matrix_mult_an(su3_matrix* a, su3_matrix* b, su3_matrix* c){
282  // c = (a^{\dagger})*b
283  typename std::remove_reference<decltype(c->e[0][0])>::type x;
284  for(int i=0; i<3; i++){
285  for(int j=0; j<3; j++){
286  c->e[i][j].real = 0.;
287  c->e[i][j].imag = 0.;
288  for(int k=0; k<3; k++){
289  CMULJ_(a->e[k][i],b->e[k][j],x);
290  c->e[i][j].real += x.real;
291  c->e[i][j].imag += x.imag;
292  }
293  }
294  }
295  return;
296 }
297 
298 
299 
300 template<typename su3_matrix>
301 static void
302 matrix_mult_na(su3_matrix* a, su3_matrix* b, su3_matrix* c){
303  // c = a*b^{\dagger}
304  typename std::remove_reference<decltype(c->e[0][0])>::type x;
305  for(int i=0; i<3; i++){
306  for(int j=0; j<3; j++){
307  c->e[i][j].real = 0.; c->e[i][j].imag = 0.;
308  for(int k=0; k<3; k++){
309  CMUL_J(a->e[i][k],b->e[j][k],x);
310  c->e[i][j].real += x.real;
311  c->e[i][j].imag += x.imag;
312  }
313  }
314  }
315  return;
316 }
317 
318 template<typename su3_matrix>
319 static void
320 matrix_mult_aa(su3_matrix* a, su3_matrix* b, su3_matrix* c){
321 
322  su3_matrix a_adjoint;
323  su3_adjoint(a, &a_adjoint);
324  matrix_mult_na(&a_adjoint, b, c);
325 }
326 
327 template <typename su3_matrix, typename anti_hermitmat, typename Float>
328 static void
329 update_mom(anti_hermitmat* momentum, int dir, su3_matrix* sitelink,
330  su3_matrix* staple, Float eb3)
331 {
332  int i;
333  for(i=0;i <V; i++){
334  su3_matrix tmat1;
335  su3_matrix tmat2;
336  su3_matrix tmat3;
337 
338  su3_matrix* lnk = sitelink + 4*i+dir;
339  su3_matrix* stp = staple + i;
340  anti_hermitmat* mom = momentum + 4*i+dir;
341 
342  mult_su3_na(lnk, stp, &tmat1);
343  uncompress_anti_hermitian(mom, &tmat2);
344 
345  scalar_mult_sub_su3_matrix(&tmat2, &tmat1, eb3, &tmat3);
346  make_anti_hermitian(&tmat3, mom);
347 
348  }
349 
350 }
351 
352 
353 template<typename half_wilson_vector, typename su3_matrix>
354 static void
355 u_shift_hw(half_wilson_vector *src, half_wilson_vector *dest, int dir, su3_matrix* sitelink )
356 {
357  int i ;
358  int dx[4];
359 
360  dx[3]=dx[2]=dx[1]=dx[0]=0;
361 
362  if(GOES_FORWARDS(dir)){
363  dx[dir]=1;
364  for(i=0;i < V; i++){
365  int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]);
366  half_wilson_vector* hw = src + nbr_idx;
367  su3_matrix* link = sitelink + i*4 + dir;
368  mult_su3_mat_vec(link, &hw->h[0], &dest[i].h[0]);
369  mult_su3_mat_vec(link, &hw->h[1], &dest[i].h[1]);
370  }
371  }else{
372  dx[OPP_DIR(dir)]=-1;
373  for(i=0;i < V; i++){
374  int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]);
375  half_wilson_vector* hw = src + nbr_idx;
376  su3_matrix* link = sitelink + nbr_idx*4 + OPP_DIR(dir);
377  mult_adj_su3_mat_vec(link, &hw->h[0], &dest[i].h[0]);
378  mult_adj_su3_mat_vec(link, &hw->h[1], &dest[i].h[1]);
379  }
380  }
381 }
382 
383 
384 
385 template<typename half_wilson_vector, typename su3_matrix>
386 static void
387 shifted_outer_prod(half_wilson_vector *src, su3_matrix* dest, int dir)
388 {
389 
390  int i;
391  int dx[4];
392 
393  dx[3]=dx[2]=dx[1]=dx[0]=0;
394 
395  if(GOES_FORWARDS(dir)){
396  dx[dir]=1;
397  }else{ dx[OPP_DIR(dir)]=-1; }
398 
399  for(i=0;i < V; i++){
400  int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]);
401  half_wilson_vector* hw = src + nbr_idx;
402  su3_projector( &src[i].h[0], &(hw->h[0]), &dest[i]);
403  }
404 }
405 
406 
407 template<typename half_wilson_vector, typename su3_matrix>
408 static void
409 forward_shifted_outer_prod(half_wilson_vector *src, su3_matrix* dest, int dir)
410 {
411 
412  int i;
413  int dx[4];
414 
415  dx[3]=dx[2]=dx[1]=dx[0]=0;
416 
417  if(GOES_FORWARDS(dir)){
418  dx[dir]=1;
419  }else{ dx[OPP_DIR(dir)]=-1; }
420 
421  for(i=0;i < V; i++){
422  int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]);
423  half_wilson_vector* hw = src + nbr_idx;
424  //su3_projector( &src[i].h[0], &(hw->h[0]), &dest[i]);
425  su3_projector( &(hw->h[0]), &src[i].h[0], &dest[i]);
426  }
427 
428  return;
429 }
430 
431 
432 
433 
434 
435 template <typename half_wilson_vector, typename su3_matrix>
436 static void
437 computeLinkOrderedOuterProduct(half_wilson_vector *src, su3_matrix* dest, int gauge_order)
438 {
439  int dx[4];
440  for(int i=0; i<V; ++i){
441  for(int dir=0; dir<4; ++dir){
442  dx[3]=dx[2]=dx[1]=dx[0]=0;
443  dx[dir] = 1;
444  int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]);
445  half_wilson_vector* hw = src + nbr_idx;
446  su3_matrix* p = get_su3_matrix(gauge_order, dest, i, dir);
447  su3_projector( &(hw->h[0]), &src[i].h[0], p);
448  } // dir
449  } // i
450  return;
451 }
452 
453 
454 template <typename half_wilson_vector, typename su3_matrix>
455 static void
456 computeLinkOrderedOuterProduct(half_wilson_vector *src, su3_matrix* dest, size_t nhops, int gauge_order)
457 {
458  int dx[4];
459  for(int i=0; i<V; ++i){
460  for(int dir=0; dir<4; ++dir){
461  dx[3]=dx[2]=dx[1]=dx[0]=0;
462  dx[dir] = nhops;
463  int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]);
464  half_wilson_vector* hw = src + nbr_idx;
465  su3_matrix* p = get_su3_matrix(gauge_order, dest, i, dir);
466  su3_projector( &(hw->h[0]), &src[i].h[0], p);
467  } // dir
468  } // i
469  return;
470 }
471 
472 
473 
474 
475 void computeLinkOrderedOuterProduct(void *src, void *dst, QudaPrecision precision, int gauge_order)
476 {
477  if(precision == QUDA_SINGLE_PRECISION){
479  }else{
481  }
482  return;
483 }
484 
485 void computeLinkOrderedOuterProduct(void *src, void *dst, QudaPrecision precision, size_t nhops, int gauge_order)
486 {
487  if(precision == QUDA_SINGLE_PRECISION){
489  }else{
491  }
492  return;
493 }
494 
495 
496 
497 template<typename half_wilson_vector, typename su3_matrix>
498 static void shiftedOuterProduct(half_wilson_vector *src, su3_matrix* dest){
499  for(int dir=0; dir<4; dir++){
500  shifted_outer_prod(src, &dest[dir*V], OPP_DIR(dir));
501  }
502 }
503 
504 template<typename half_wilson_vector, typename su3_matrix>
505 static void forwardShiftedOuterProduct(half_wilson_vector *src, su3_matrix* dest){
506  for(int dir=0; dir<4; dir++){
507  forward_shifted_outer_prod(src, &dest[dir*V], dir);
508  }
509 }
510 
511 
512 void computeHisqOuterProduct(void* src, void* dest, QudaPrecision precision){
513  if(precision == QUDA_SINGLE_PRECISION){
514  forwardShiftedOuterProduct((fhalf_wilson_vector*)src,(fsu3_matrix*)dest);
515  }else{
516  forwardShiftedOuterProduct((dhalf_wilson_vector*)src,(dsu3_matrix*)dest);
517  }
518 }
519 
520 
521 // Note that the hisq routines do not involve half-wilson vectors,
522 // but instead require colour matrix shifts
523 
524 
525 template<typename su3_matrix>
526 static void
527 u_shift_mat(su3_matrix *src, su3_matrix *dest, int dir, su3_matrix* sitelink)
528 {
529  int i;
530  int dx[4];
531  dx[3]=dx[2]=dx[1]=dx[0]=0;
532 
533  if(GOES_FORWARDS(dir)){
534  dx[dir]=1;
535  for(i=0; i<V; i++){
536  int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]);
537  su3_matrix* mat = src+nbr_idx; // No need for a factor of 4 here, the colour matrices do not have a Lorentz index
538  su3_matrix* link = sitelink + i*4 + dir;
539  matrix_mult_nn(link, mat, &dest[i]);
540  }
541  }else{
542  dx[OPP_DIR(dir)]=-1;
543  for(i=0; i<V; i++){
544  int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]);
545  su3_matrix* mat = src+nbr_idx; // No need for a factor of 4 here, the colour matrices do not have a Lorentz index
546  su3_matrix* link = sitelink + nbr_idx*4 + OPP_DIR(dir);
547  matrix_mult_an(link, mat, &dest[i]);
548  }
549  }
550  return;
551 }
552 
553 
554 
555 
556 
557 
558 
559 template <typename half_wilson_vector,
560  typename anti_hermitmat, typename Real>
561 static void
562 add_3f_force_to_mom(half_wilson_vector *back, half_wilson_vector *forw,
563  int dir, Real coeff[2], anti_hermitmat* momentum)
564 {
565  Real my_coeff[2] ;
566  Real tmp_coeff[2] ;
567  int mydir;
568  int i;
569 
570  if(GOES_BACKWARDS(dir)){
571  mydir = OPP_DIR(dir);
572  my_coeff[0] = -coeff[0];
573  my_coeff[1] = -coeff[1];
574  }else{
575  mydir = dir;
576  my_coeff[0] = coeff[0];
577  my_coeff[1] = coeff[1];
578  }
579 
580  for(i=0;i < V;i++){
581  if (i < Vh){
582  tmp_coeff[0] = my_coeff[0];
583  tmp_coeff[1] = my_coeff[1];
584  }else{
585  tmp_coeff[0] = -my_coeff[0];
586  tmp_coeff[1] = -my_coeff[1] ;
587  }
588 
589  if (sizeof(Real) == sizeof(float)){
590  fsu3_matrix tmat;
591  fsu3_matrix mom_matrix;
592  anti_hermitmat* mom = momentum+ 4* i + mydir;
593  uncompress_anti_hermitian(mom, &mom_matrix);
594  su3_projector( &back[i].h[0], &forw[i].h[0], &tmat);
595  scalar_mult_add_su3_matrix(&mom_matrix, &tmat, tmp_coeff[0], &mom_matrix );
596  make_anti_hermitian(&mom_matrix, mom);
597  }else{
598  dsu3_matrix tmat;
599  dsu3_matrix mom_matrix;
600  anti_hermitmat* mom = momentum+ 4* i + mydir;
601  uncompress_anti_hermitian(mom, &mom_matrix);
602  su3_projector( &back[i].h[0], &forw[i].h[0], &tmat);
603  scalar_mult_add_su3_matrix(&mom_matrix, &tmat, tmp_coeff[0], &mom_matrix );
604  make_anti_hermitian(&mom_matrix, mom);
605  }
606  }
607 }
608 
609 
610 template<typename Real, typename half_wilson_vector, typename anti_hermitmat>
611  static void
612 side_link_3f_force(int mu, int nu, Real coeff[2], half_wilson_vector *Path ,
613  half_wilson_vector *Path_nu, half_wilson_vector *Path_mu,
614  half_wilson_vector *Path_numu, anti_hermitmat* mom)
615 {
616  Real m_coeff[2] ;
617 
618  m_coeff[0] = -coeff[0] ;
619  m_coeff[1] = -coeff[1] ;
620 
621  if(GOES_FORWARDS(mu)){
622  if(GOES_FORWARDS(nu)){
623  add_3f_force_to_mom(Path_numu, Path, mu, coeff, mom) ;
624  }else{
625  add_3f_force_to_mom(Path,Path_numu,OPP_DIR(mu),m_coeff, mom);
626  }
627  }
628  else{
629  if(GOES_FORWARDS(nu))
630  add_3f_force_to_mom(Path_nu, Path_mu, mu, m_coeff, mom);
631  else
632  add_3f_force_to_mom(Path_mu, Path_nu, OPP_DIR(mu), coeff, mom) ;
633  }
634 }
635 
636 
637 template<typename Real, typename su3_matrix, typename anti_hermitmat>
638  static void
639 add_force_to_momentum(su3_matrix *back, su3_matrix *forw,
640  int dir, Real coeff, anti_hermitmat* momentum)
641 {
642  Real my_coeff;
643  Real tmp_coeff;
644  int mydir;
645  int i;
646 
647  if(GOES_BACKWARDS(dir)){
648  mydir = OPP_DIR(dir);
649  my_coeff = -coeff;
650  }else{
651  mydir = dir;
652  my_coeff = coeff;
653  }
654 
655 
656  for(i=0; i<V; i++){
657  if(i<Vh){ tmp_coeff = my_coeff; }
658  else{ tmp_coeff = -my_coeff; }
659 
660 
661  su3_matrix tmat;
662  su3_matrix mom_matrix;
663  anti_hermitmat* mom = momentum + 4*i + mydir;
664  uncompress_anti_hermitian(mom, &mom_matrix);
665  matrix_mult_na(&back[i], &forw[i], &tmat);
666  scalar_mult_add_su3_matrix(&mom_matrix, &tmat, tmp_coeff, &mom_matrix);
667 
668  make_anti_hermitian(&mom_matrix, mom);
669  }
670  return;
671 }
672 
673 
674 template<typename Real, typename su3_matrix, typename anti_hermitmat>
675 static void
676 side_link_force(int mu, int nu, Real coeff, su3_matrix *Path,
677  su3_matrix *Path_nu, su3_matrix *Path_mu,
678  su3_matrix *Path_numu, anti_hermitmat* mom)
679 {
680  Real m_coeff;
681  m_coeff = -coeff;
682 
683  if(GOES_FORWARDS(mu)){
684  if(GOES_FORWARDS(nu)){
685  add_force_to_momentum(Path_numu, Path, mu, coeff, mom);
686  // In the example below:
687  // add_force_to_momentum(P7rho, Qnumu, rho, coeff, mom)
688  }else{
689  add_force_to_momentum(Path, Path_numu, OPP_DIR(mu), m_coeff, mom);
690  // add_force_to_momentum(Qnumu, P7rho, -Qnumu, rho, m_coeff, mom)
691  }
692  }
693  else { // if (GOES_BACKWARDS(mu))
694  if(GOES_FORWARDS(nu)){
695  add_force_to_momentum(Path_nu, Path_mu, mu, m_coeff, mom);
696  // add_force_to_momentum(P7, Qrhonumu, rho, m_coeff, mom)
697  }else{
698  add_force_to_momentum(Path_mu, Path_nu, OPP_DIR(mu), coeff, mom);
699  // add_force_to_momentum(Qrhonumu, P7, rho, coeff, mom)
700  }
701  }
702  return;
703 }
704 
705 
706 
707 
708 #define Pmu tempmat[0]
709 #define Pnumu tempmat[1]
710 #define Prhonumu tempmat[2]
711 #define P7 tempmat[3]
712 #define P7rho tempmat[4]
713 #define P5 tempmat[5]
714 #define P3 tempmat[6]
715 #define P5nu tempmat[3]
716 #define P3mu tempmat[3]
717 #define Popmu tempmat[4]
718 #define Pmumumu tempmat[4]
719 
720 #define Qmu tempmat[7]
721 #define Qnumu tempmat[8]
722 #define Qrhonumu tempmat[2] // same as Prhonumu
723 
724 
725 
726 
727 
728 template<typename su3_matrix>
729 static void set_identity_matrix(su3_matrix* mat)
730 {
731  for(int i=0; i<3; i++){
732  for(int j=0; j<3; j++){
733  mat->e[i][j].real=0;
734  mat->e[i][j].imag=0;
735  }
736  mat->e[i][i].real=1;
737  }
738 }
739 
740 
741 template<typename su3_matrix>
742 static void set_identity(su3_matrix* matrices, int num_dirs){
743  for(int i=0; i<V*num_dirs; i++){
744  set_identity_matrix(&matrices[i]);
745  }
746 }
747 
748 
749 template <typename Real, typename su3_matrix, typename anti_hermitmat>
750 void do_color_matrix_hisq_force_reference(Real eps, Real weight,
751  su3_matrix* temp_xx, Real* act_path_coeff,
752  su3_matrix* sitelink, anti_hermitmat* mom)
753 {
754  int i;
755  int mu, nu, rho, sig;
756  Real coeff;
757  Real OneLink, Lepage, FiveSt, ThreeSt, SevenSt;
758 // Real Naik;
759  Real mLepage, mFiveSt, mThreeSt, mSevenSt;
760 
761  su3_matrix* tempmat[9];
762  int sites_on_node = V;
763  //int DirectLinks[8] ;
764 
765  Real ferm_epsilon;
766  ferm_epsilon = 2.0*weight*eps;
767  OneLink = act_path_coeff[0]*ferm_epsilon ;
768 // Naik = act_path_coeff[1]*ferm_epsilon ; mNaik = -Naik;
769  ThreeSt = act_path_coeff[2]*ferm_epsilon ; mThreeSt = -ThreeSt;
770  FiveSt = act_path_coeff[3]*ferm_epsilon ; mFiveSt = -FiveSt;
771  SevenSt = act_path_coeff[4]*ferm_epsilon ; mSevenSt = -SevenSt;
772  Lepage = act_path_coeff[5]*ferm_epsilon ; mLepage = -Lepage;
773 
774 // for(mu=0;mu<8;mu++){
775 // DirectLinks[mu] = 0 ;
776 // }
777 
778  for(mu=0; mu<9; mu++){
779  tempmat[mu] = (su3_matrix *)malloc( sites_on_node*sizeof(su3_matrix) );
780  }
781 
782 
783  su3_matrix* id;
784  id = (su3_matrix *)malloc(sites_on_node*sizeof(su3_matrix) );
785  //su3_matrix* id4;
786  //id4 = (su3_matrix *)malloc(sites_on_node*4*sizeof(su3_matrix) );
787 
788  // su3_matrix* temp_mat;
789 // temp_mat = (su3_matrix *)malloc(sites_on_node*sizeof(su3_matrix) );
790 
791 
792  // su3_matrix* temp_xx;
793  // temp_xx = (su3_matrix *)malloc(sites_on_node*8*sizeof(su3_matrix) );
794  // shiftedOuterProduct(temp_x, temp_xx);
795 
796  // initialise id so that it is the identity matrix on each lattice site
797  set_identity(id,1);
798 
799  printf("Calling modified hisq\n");
800  return;
801 
802 
803  for(sig=0; sig < 8; sig++){
804  // One-link term - don't have the savings here that we get when working with the
805  // half-wilson vectors
806  if(GOES_FORWARDS(sig)){
807  u_shift_mat(&temp_xx[OPP_DIR(sig)*V], Pmu, sig, sitelink);
808  add_force_to_momentum(Pmu, id, sig, OneLink, mom); // I could optimise functions which
809  // involve id
810  }
811 
812 
813 
814  for(mu = 0; mu < 8; mu++){
815  if ( (mu == sig) || (mu == OPP_DIR(sig))){
816  continue;
817  }
818  // 3 link path
819  // sig
820  // A _______
821  // | |
822  // mu /|\ \|/
823  // | |
824  //
825  //
826  u_shift_mat(&temp_xx[OPP_DIR(sig)*V], Pmu, OPP_DIR(mu), sitelink); // temp_xx[sig] stores |X(x)><X(x-sig)|
827  u_shift_mat(id, Qmu, OPP_DIR(mu), sitelink); // This returns the path less the outer-product of quark fields at the end
828  // Qmu = U[mu]
829 
830 
831  u_shift_mat(Pmu, P3, sig, sitelink); // P3 is U[sig](X)U[-mu](X+sig) temp_xx
832  if (GOES_FORWARDS(sig)){
833  // add contribution from middle link
834  add_force_to_momentum(P3, Qmu, sig, mThreeSt, mom);
835  // matrix_mult_na(P3[x],Qmu[x],tmp);
836  // mom[sig][x] += mThreeSt*tmp;
837  }
838  for(nu=0; nu < 8; nu++){
839  if (nu == sig || nu == OPP_DIR(sig)
840  || nu == mu || nu == OPP_DIR(mu)){
841  continue;
842  }
843 
844  /*
845  5 link path
846 
847  sig
848  A ________
849  | |
850  /|\ \|/
851  | |
852  \ \
853  \ \
854  */
855 
856  u_shift_mat(Pmu, Pnumu, OPP_DIR(nu), sitelink);
857  u_shift_mat(Qmu, Qnumu, OPP_DIR(nu), sitelink);
858 
859  u_shift_mat(Pnumu, P5, sig, sitelink);
860  if (GOES_FORWARDS(sig)){
861  add_force_to_momentum(P5, Qnumu, sig, FiveSt, mom);
862  } // seems to do what I think it should
863  for(rho =0; rho < 8; rho++){
864  if (rho == sig || rho == OPP_DIR(sig)
865  || rho == mu || rho == OPP_DIR(mu)
866  || rho == nu || rho == OPP_DIR(nu)){
867  continue;
868  }
869 
870  //7 link path
871  u_shift_mat(Pnumu, Prhonumu, OPP_DIR(rho), sitelink);
872  u_shift_mat(Prhonumu, P7, sig, sitelink);
873 
874  // Prhonumu = tempmat[2] is not needed again
875  // => store Qrhonumu in the same memory
876  u_shift_mat(Qnumu, Qrhonumu, OPP_DIR(rho), sitelink);
877 
878 
879  if(GOES_FORWARDS(sig)){
880  add_force_to_momentum(P7, Qrhonumu, sig, mSevenSt, mom) ;
881  }
882  u_shift_mat(P7, P7rho, rho, sitelink);
883  side_link_force(rho, sig, SevenSt, Qnumu, P7, Qrhonumu, P7rho, mom);
884  if(FiveSt != 0)coeff = SevenSt/FiveSt ; else coeff = 0;
885  for(i=0; i<V; i++){
886  scalar_mult_add_su3_matrix(&P5[i], &P7rho[i], coeff, &P5[i]);
887  } // end loop over volume
888  } // end loop over rho
889 
890 
891  u_shift_mat(P5, P5nu, nu, sitelink);
892  side_link_force(nu,sig,mFiveSt,Qmu,P5,Qnumu,P5nu,mom); // I believe this should do what I want it to
893  // check this!
894  if(ThreeSt != 0)coeff = FiveSt/ThreeSt; else coeff = 0;
895 
896  for(i=0; i<V; i++){
897  scalar_mult_add_su3_matrix(&P3[i], &P5nu[i], coeff, &P3[i]);
898  } // end loop over volume
899  } // end loop over nu
900 
901  // Lepage term
902  u_shift_mat(Pmu, Pnumu, OPP_DIR(mu), sitelink);
903  u_shift_mat(Qmu, Qnumu, OPP_DIR(mu), sitelink);
904 
905  u_shift_mat(Pnumu, P5, sig, sitelink);
906  if(GOES_FORWARDS(sig)){
907  add_force_to_momentum(P5, Qnumu, sig, Lepage, mom);
908  }
909 
910  u_shift_mat(P5, P5nu, mu, sitelink);
911  side_link_force(mu, sig, mLepage, Qmu, P5, Qnumu, P5nu, mom);
912 
913 
914  if(ThreeSt != 0)coeff = Lepage/ThreeSt; else coeff = 0;
915 
916  for(i=0; i<V; i++){
917  scalar_mult_add_su3_matrix(&P3[i], &P5nu[i], coeff, &P3[i]);
918  }
919 
920  if(GOES_FORWARDS(mu)){
921  u_shift_mat(P3, P3mu, mu, sitelink);
922  }
923  side_link_force(mu, sig, ThreeSt, id, P3, Qmu, P3mu, mom);
924  } // end loop over mu
925  } // end loop over sig
926 } // modified hisq_force_reference
927 
928 
929 
930 
931 
932 
933 // This version of the test routine uses
934 // half-wilson vectors instead of color matrices.
935 template <typename Real, typename su3_matrix, typename anti_hermitmat, typename half_wilson_vector>
936 void do_halfwilson_hisq_force_reference(Real eps, Real weight,
937  half_wilson_vector* temp_x, Real* act_path_coeff,
938  su3_matrix* sitelink, anti_hermitmat* mom)
939 {
940  int i;
941  int mu, nu, rho, sig;
942  Real coeff;
943  Real OneLink, Lepage, FiveSt, ThreeSt, SevenSt;
944  //Real Naik;
945  Real mLepage, mFiveSt, mThreeSt, mSevenSt;
946 
947  su3_matrix* tempmat[9];
948  int sites_on_node = V;
949 // int DirectLinks[8] ;
950 
951  Real ferm_epsilon;
952  ferm_epsilon = 2.0*weight*eps;
953  OneLink = act_path_coeff[0]*ferm_epsilon ;
954 // Naik = act_path_coeff[1]*ferm_epsilon ;
955  ThreeSt = act_path_coeff[2]*ferm_epsilon ; mThreeSt = -ThreeSt;
956  FiveSt = act_path_coeff[3]*ferm_epsilon ; mFiveSt = -FiveSt;
957  SevenSt = act_path_coeff[4]*ferm_epsilon ; mSevenSt = -SevenSt;
958  Lepage = act_path_coeff[5]*ferm_epsilon ; mLepage = -Lepage;
959 
960 
961 // for(mu=0;mu<8;mu++){
962 // DirectLinks[mu] = 0 ;
963 // }
964 
965  for(mu=0; mu<9; mu++){
966  tempmat[mu] = (su3_matrix *)malloc( sites_on_node*sizeof(su3_matrix) );
967  }
968 
969 
970  su3_matrix* id;
971  id = (su3_matrix *)malloc(sites_on_node*sizeof(su3_matrix) );
972  su3_matrix* id4;
973  id4 = (su3_matrix *)malloc(sites_on_node*4*sizeof(su3_matrix) );
974 
975  su3_matrix* temp_mat;
976  temp_mat = (su3_matrix *)malloc(sites_on_node*sizeof(su3_matrix) );
977 
978  // initialise id so that it is the identity matrix on each lattice site
979  set_identity(id,1);
980 
981  printf("Calling hisq reference routine\n");
982  for(sig=0; sig < 8; sig++){
983  shifted_outer_prod(temp_x, temp_mat, OPP_DIR(sig));
984 
985  // One-link term - don't have the savings here that we get when working with the
986  // half-wilson vectors
987  if(GOES_FORWARDS(sig)){
988  u_shift_mat(temp_mat, Pmu, sig, sitelink);
989  add_force_to_momentum(Pmu, id, sig, OneLink, mom);
990  }
991 
992  for(mu = 0; mu < 8; mu++){
993  if ( (mu == sig) || (mu == OPP_DIR(sig))){
994  continue;
995  }
996  // 3 link path
997  // sig
998  // A _______
999  // | |
1000  // mu /|\ \|/
1001  // | |
1002  //
1003  //
1004  u_shift_mat(temp_mat, Pmu, OPP_DIR(mu), sitelink); // temp_xx[sig] stores |X(x)><X(x-sig)|
1005  u_shift_mat(id, Qmu, OPP_DIR(mu), sitelink); // This returns the path less the outer-product of quark fields at the end
1006  // Qmu = U[mu]
1007 
1008 
1009  u_shift_mat(Pmu, P3, sig, sitelink); // P3 is U[sig](X)U[-mu](X+sig) temp_xx
1010  if (GOES_FORWARDS(sig)){
1011  // add contribution from middle link
1012  add_force_to_momentum(P3, Qmu, sig, mThreeSt, mom); // matrix_mult_na(P3[x],Qmu[x],tmp);
1013  // mom[sig][x] += mThreeSt*tmp;
1014  }
1015  for(nu=0; nu < 8; nu++){
1016  if (nu == sig || nu == OPP_DIR(sig)
1017  || nu == mu || nu == OPP_DIR(mu)){
1018  continue;
1019  }
1020 
1021  /*
1022  5 link path
1023 
1024  sig
1025  A ________
1026  | |
1027  /|\ \|/
1028  | |
1029  \ \
1030  \ \
1031  */
1032 
1033  u_shift_mat(Pmu, Pnumu, OPP_DIR(nu), sitelink);
1034  u_shift_mat(Qmu, Qnumu, OPP_DIR(nu), sitelink);
1035 
1036  u_shift_mat(Pnumu, P5, sig, sitelink);
1037  if (GOES_FORWARDS(sig)){
1038  add_force_to_momentum(P5, Qnumu, sig, FiveSt, mom);
1039  } // seems to do what I think it should
1040 
1041  for(rho =0; rho < 8; rho++){
1042  if (rho == sig || rho == OPP_DIR(sig)
1043  || rho == mu || rho == OPP_DIR(mu)
1044  || rho == nu || rho == OPP_DIR(nu)){
1045  continue;
1046  }
1047 
1048  //7 link path
1049  u_shift_mat(Pnumu, Prhonumu, OPP_DIR(rho), sitelink);
1050  u_shift_mat(Prhonumu, P7, sig, sitelink);
1051 
1052  // Prhonumu = tempmat[2] is not needed again
1053  // => store Qrhonumu in the same memory
1054  u_shift_mat(Qnumu, Qrhonumu, OPP_DIR(rho), sitelink);
1055 
1056 
1057  if(GOES_FORWARDS(sig)){
1058  add_force_to_momentum(P7, Qrhonumu, sig, mSevenSt, mom) ;
1059  }
1060 
1061  u_shift_mat(P7, P7rho, rho, sitelink);
1062  side_link_force(rho, sig, SevenSt, Qnumu, P7, Qrhonumu, P7rho, mom);
1063  if(FiveSt != 0)coeff = SevenSt/FiveSt ; else coeff = 0;
1064  for(i=0; i<V; i++){
1065  scalar_mult_add_su3_matrix(&P5[i], &P7rho[i], coeff, &P5[i]);
1066  } // end loop over volume
1067  } // end loop over rho
1068 
1069 
1070  u_shift_mat(P5, P5nu, nu, sitelink);
1071  side_link_force(nu,sig,mFiveSt,Qmu,P5,Qnumu,P5nu,mom); // I believe this should do what I want it to
1072  // check this!
1073  if(ThreeSt != 0)coeff = FiveSt/ThreeSt; else coeff = 0;
1074 
1075  for(i=0; i<V; i++){
1076  scalar_mult_add_su3_matrix(&P3[i], &P5nu[i], coeff, &P3[i]);
1077  } // end loop over volume
1078  } // end loop over nu
1079 
1080  // Lepage term
1081  u_shift_mat(Pmu, Pnumu, OPP_DIR(mu), sitelink);
1082  u_shift_mat(Qmu, Qnumu, OPP_DIR(mu), sitelink);
1083 
1084  u_shift_mat(Pnumu, P5, sig, sitelink);
1085  if(GOES_FORWARDS(sig)){
1086  add_force_to_momentum(P5, Qnumu, sig, Lepage, mom);
1087  }
1088 
1089  u_shift_mat(P5, P5nu, mu, sitelink);
1090  side_link_force(mu, sig, mLepage, Qmu, P5, Qnumu, P5nu, mom);
1091 
1092 
1093  if(ThreeSt != 0)coeff = Lepage/ThreeSt; else coeff = 0;
1094 
1095  for(i=0; i<V; i++){
1096  scalar_mult_add_su3_matrix(&P3[i], &P5nu[i], coeff, &P3[i]);
1097  }
1098 
1099  if(GOES_FORWARDS(mu)){
1100  u_shift_mat(P3, P3mu, mu, sitelink);
1101  }
1102  side_link_force(mu, sig, ThreeSt, id, P3, Qmu, P3mu, mom);
1103  } // end loop over mu
1104  } // end loop over sig
1105 
1106  for(mu=0; mu<9; mu++){
1107  free(tempmat[mu]);
1108  }
1109 
1110  free(id);
1111  free(id4);
1112  free(temp_mat);
1113 }
1114 
1115 #undef Pmu
1116 #undef Pnumu
1117 #undef Prhonumu
1118 #undef P3
1119 #undef P3mu
1120 #undef P5
1121 #undef P5nu
1122 #undef P7
1123 #undef P7rho
1124 #undef P7rhonu
1125 
1126 #undef Popmu
1127 #undef Pmumumu
1128 
1129 #undef Qmu
1130 #undef Qnumu
1131 #undef Qrhonumu
1132 
1133 
1134 
1135 
1136 
1137 
1138 void halfwilson_hisq_force_reference(float eps, float weight,
1139  void* act_path_coeff, void* temp_x,
1140  void* sitelink, void* mom)
1141 {
1142  do_halfwilson_hisq_force_reference((float)eps, (float)weight,
1143  (fhalf_wilson_vector*) temp_x, (float*)act_path_coeff,
1144  (fsu3_matrix*)sitelink, (fanti_hermitmat*)mom);
1145  return;
1146 }
1147 
1148 
1149 
1150 void halfwilson_hisq_force_reference(double eps, double weight,
1151  void* act_path_coeff, void* temp_x,
1152  void* sitelink, void* mom)
1153 {
1154  do_halfwilson_hisq_force_reference((double)eps, (double)weight,
1155  (dhalf_wilson_vector*) temp_x, (double*)act_path_coeff,
1156  (dsu3_matrix*)sitelink, (danti_hermitmat*)mom);
1157  return;
1158 }
1159 
1160 
1161 
1162 void color_matrix_hisq_force_reference(float eps, float weight,
1163  void* act_path_coeff, void* temp_xx,
1164  void* sitelink, void* mom)
1165 {
1166  do_color_matrix_hisq_force_reference((float)eps, (float)weight,
1167  (fsu3_matrix*) temp_xx, (float*)act_path_coeff,
1168  (fsu3_matrix*)sitelink, (fanti_hermitmat*)mom);
1169  return;
1170 }
1171 
1172 
1173 
double mu
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
enum QudaPrecision_s QudaPrecision
@ QUDA_SINGLE_PRECISION
Definition: enum_quda.h:64
@ QUDA_QDP_GAUGE_ORDER
Definition: enum_quda.h:44
@ QUDA_MILC_GAUGE_ORDER
Definition: enum_quda.h:47
void su3_adjoint(su3_matrix *a, su3_matrix *b)
int neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
Definition: host_utils.cpp:490
void make_anti_hermitian(su3_matrix *m3, anti_hermitmat *ah3)
void print_su3_matrix(su3_matrix *a)
#define P7
#define P3
#define Qmu
int Vh
Definition: host_utils.cpp:38
#define CSUM(a, b)
void color_matrix_hisq_force_reference(float eps, float weight, void *act_path_coeff, void *temp_xx, void *sitelink, void *mom)
#define P5
void halfwilson_hisq_force_reference(float eps, float weight, void *act_path_coeff, void *temp_x, void *sitelink, void *mom)
#define P5nu
int Z[4]
Definition: host_utils.cpp:36
void computeHisqOuterProduct(void *src, void *dest, QudaPrecision precision)
#define CMUL(a, b, c)
su3_matrix * get_su3_matrix(int gauge_order, su3_matrix *p, int idx, int dir)
#define Pnumu
void do_halfwilson_hisq_force_reference(Real eps, Real weight, half_wilson_vector *temp_x, Real *act_path_coeff, su3_matrix *sitelink, anti_hermitmat *mom)
void computeLinkOrderedOuterProduct(void *src, void *dst, QudaPrecision precision, int gauge_order)
#define P3mu
#define Pmu
int V
Definition: host_utils.cpp:37
#define CONJG(a, b)
#define Qnumu
#define P7rho
#define Prhonumu
#define CMULJ_(a, b, c)
#define CMUL_J(a, b, c)
#define Qrhonumu
void do_color_matrix_hisq_force_reference(Real eps, Real weight, su3_matrix *temp_xx, Real *act_path_coeff, su3_matrix *sitelink, anti_hermitmat *mom)
QudaGaugeFieldOrder gauge_order
#define GOES_BACKWARDS(dir)
Definition: misc.h:35
#define OPP_DIR(dir)
Definition: misc.h:33
#define GOES_FORWARDS(dir)
Definition: misc.h:34
FloatingPoint< float > Float
Main header file for the QUDA library.
std::complex< real > e[3][3]
Definition: llfat_utils.h:4
#define errorQuda(...)
Definition: util_quda.h:120