QUDA: quda/tests/gauge_force_reference.cpp Source File

QUDA v0.3.2
A library for QCD on GPUs
00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <math.h>
00004 #include <string.h>
00005 
00006 #include "quda.h"
00007 #include "test_util.h"
00008 #include "misc.h"
00009 #include "gauge_force_reference.h"
00010 
00011 extern int Z[4];
00012 extern int V;
00013 extern int Vh;
00014 
00015 
00016 
00017 #define CADD(a,b,c) { (c).real = (a).real + (b).real;   \
00018         (c).imag = (a).imag + (b).imag; }
00019 #define CMUL(a,b,c) { (c).real = (a).real*(b).real - (a).imag*(b).imag; \
00020                       (c).imag = (a).real*(b).imag + (a).imag*(b).real; }
00021 #define CSUM(a,b) { (a).real += (b).real; (a).imag += (b).imag; }
00022 
00023 /* c = a* * b */
00024 #define CMULJ_(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \
00025                         (c).imag = (a).real*(b).imag - (a).imag*(b).real; }
00026 
00027 /* c = a * b* */
00028 #define CMUL_J(a,b,c) { (c).real = (a).real*(b).real + (a).imag*(b).imag; \
00029                         (c).imag = (a).imag*(b).real - (a).real*(b).imag; }
00030 
00031 #define CONJG(a,b) { (b).real = (a).real; (b).imag = -(a).imag; }
00032 
00033 typedef struct {   
00034     float real;    
00035     float imag; 
00036 } fcomplex;  
00037 
00038 /* specific for double complex */
00039 typedef struct {
00040     double real;
00041     double imag;
00042 } dcomplex;
00043 
00044 typedef struct { fcomplex e[3][3]; } fsu3_matrix;
00045 typedef struct { fcomplex c[3]; } fsu3_vector;
00046 typedef struct { dcomplex e[3][3]; } dsu3_matrix;
00047 typedef struct { dcomplex c[3]; } dsu3_vector;
00048 
00049 typedef struct { 
00050     fcomplex m01,m02,m12; 
00051     float m00im,m11im,m22im; 
00052     float space; 
00053 } fanti_hermitmat;
00054 
00055 typedef struct { 
00056     dcomplex m01,m02,m12; 
00057     double m00im,m11im,m22im; 
00058     double space; 
00059 } danti_hermitmat;
00060 
00061 extern int neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1);
00062 
00063 template<typename su3_matrix>
00064 void  su3_adjoint( su3_matrix *a, su3_matrix *b )
00065 {
00066     int i,j;
00067     for(i=0;i<3;i++)for(j=0;j<3;j++){
00068             CONJG( a->e[j][i], b->e[i][j] );
00069         }
00070 }
00071 
00072 
00073 template<typename su3_matrix, typename anti_hermitmat>
00074 void
00075 make_anti_hermitian( su3_matrix *m3, anti_hermitmat *ah3 ) 
00076 {
00077     
00078     typeof(ah3->m00im) temp =
00079         (m3->e[0][0].imag + m3->e[1][1].imag + m3->e[2][2].imag)*0.33333333333333333;
00080     ah3->m00im = m3->e[0][0].imag - temp;
00081     ah3->m11im = m3->e[1][1].imag - temp;
00082     ah3->m22im = m3->e[2][2].imag - temp;
00083     ah3->m01.real = (m3->e[0][1].real - m3->e[1][0].real)*0.5;
00084     ah3->m02.real = (m3->e[0][2].real - m3->e[2][0].real)*0.5;
00085     ah3->m12.real = (m3->e[1][2].real - m3->e[2][1].real)*0.5;
00086     ah3->m01.imag = (m3->e[0][1].imag + m3->e[1][0].imag)*0.5;
00087     ah3->m02.imag = (m3->e[0][2].imag + m3->e[2][0].imag)*0.5;
00088     ah3->m12.imag = (m3->e[1][2].imag + m3->e[2][1].imag)*0.5;
00089     
00090 }
00091 
00092 template <typename anti_hermitmat, typename su3_matrix>
00093 static void
00094 uncompress_anti_hermitian(anti_hermitmat *mat_antihermit,
00095                           su3_matrix *mat_su3 )
00096 {
00097     typeof(mat_antihermit->m00im) temp1;
00098     mat_su3->e[0][0].imag=mat_antihermit->m00im;
00099     mat_su3->e[0][0].real=0.;
00100     mat_su3->e[1][1].imag=mat_antihermit->m11im;
00101     mat_su3->e[1][1].real=0.;
00102     mat_su3->e[2][2].imag=mat_antihermit->m22im;
00103     mat_su3->e[2][2].real=0.;
00104     mat_su3->e[0][1].imag=mat_antihermit->m01.imag;
00105     temp1=mat_antihermit->m01.real;
00106     mat_su3->e[0][1].real=temp1;
00107     mat_su3->e[1][0].real= -temp1;
00108     mat_su3->e[1][0].imag=mat_antihermit->m01.imag;
00109     mat_su3->e[0][2].imag=mat_antihermit->m02.imag;
00110     temp1=mat_antihermit->m02.real;
00111     mat_su3->e[0][2].real=temp1;
00112     mat_su3->e[2][0].real= -temp1;
00113     mat_su3->e[2][0].imag=mat_antihermit->m02.imag;
00114     mat_su3->e[1][2].imag=mat_antihermit->m12.imag;
00115     temp1=mat_antihermit->m12.real;
00116     mat_su3->e[1][2].real=temp1;
00117     mat_su3->e[2][1].real= -temp1;
00118     mat_su3->e[2][1].imag=mat_antihermit->m12.imag;    
00119 }
00120 
00121 template <typename su3_matrix, typename Float>
00122 static void
00123 scalar_mult_sub_su3_matrix(su3_matrix *a,su3_matrix *b, Float s, su3_matrix *c)
00124 {    
00125     int i,j;
00126     for(i=0;i<3;i++){
00127         for(j=0;j<3;j++){           
00128             c->e[i][j].real = a->e[i][j].real - s*b->e[i][j].real;
00129             c->e[i][j].imag = a->e[i][j].imag - s*b->e[i][j].imag;
00130         }
00131     }
00132 }
00133 
00134 template <typename su3_matrix, typename Float>
00135 static void
00136 scalar_mult_add_su3_matrix(su3_matrix *a,su3_matrix *b, Float s, su3_matrix *c)
00137 {
00138     int i,j;
00139     for(i=0;i<3;i++){
00140         for(j=0;j<3;j++){           
00141             c->e[i][j].real = a->e[i][j].real + s*b->e[i][j].real;
00142             c->e[i][j].imag = a->e[i][j].imag + s*b->e[i][j].imag;
00143         }
00144     }    
00145 }
00146 
00147 template <typename su3_matrix>
00148 static void
00149 mult_su3_nn(su3_matrix* a, su3_matrix* b, su3_matrix* c)
00150 {
00151     int i,j,k;
00152     typeof(a->e[0][0]) x,y;
00153     for(i=0;i<3;i++){
00154         for(j=0;j<3;j++){
00155             x.real=x.imag=0.0;
00156             for(k=0;k<3;k++){
00157                 CMUL( a->e[i][k] , b->e[k][j] , y );
00158                 CSUM( x , y );
00159             }
00160             c->e[i][j] = x;
00161         }
00162     }    
00163 }
00164 template<typename su3_matrix>
00165 static void 
00166 mult_su3_an( su3_matrix *a, su3_matrix *b, su3_matrix *c )
00167 {
00168     int i,j,k;
00169     typeof(a->e[0][0]) x,y;
00170     for(i=0;i<3;i++){
00171         for(j=0;j<3;j++){
00172             x.real=x.imag=0.0;
00173             for(k=0;k<3;k++){
00174                 CMULJ_( a->e[k][i] , b->e[k][j], y );
00175                 CSUM( x , y );
00176             }
00177             c->e[i][j] = x;
00178         }
00179     }
00180 }
00181 
00182 template<typename su3_matrix>
00183 static void
00184 mult_su3_na(  su3_matrix *a, su3_matrix *b, su3_matrix *c )
00185 {
00186     int i,j,k;
00187     typeof(a->e[0][0]) x,y;
00188     for(i=0;i<3;i++){
00189         for(j=0;j<3;j++){
00190             x.real=x.imag=0.0;
00191             for(k=0;k<3;k++){
00192                 CMUL_J( a->e[i][k] , b->e[j][k] , y );
00193                 CSUM( x , y );
00194             }
00195             c->e[i][j] = x;
00196         }
00197     }
00198 }
00199 
00200 template < typename su3_matrix>
00201 void
00202 print_su3_matrix(su3_matrix *a)
00203 {
00204     int i, j;
00205     for(i=0;i < 3; i++){
00206         for(j=0;j < 3;j++){
00207             printf("(%f %f)\t", a->e[i][j].real, a->e[i][j].imag);
00208         }
00209         printf("\n");
00210     }
00211     
00212 }
00213 
00214 
00215 //this functon compute one path for all lattice sites
00216 template<typename su3_matrix, typename Float>
00217 static void
00218 compute_path_product(su3_matrix* staple, su3_matrix* sitelink, int* path, int len, Float loop_coeff, int dir)
00219 {
00220     int i, j;
00221 
00222     su3_matrix prev_matrix, curr_matrix, tmat;
00223     int dx[4];
00224 
00225     for(i=0;i<V;i++){
00226         memset(dx,0, sizeof(dx));
00227         memset(&curr_matrix, 0, sizeof(curr_matrix));
00228         
00229         curr_matrix.e[0][0].real = 1.0;
00230         curr_matrix.e[1][1].real = 1.0;
00231         curr_matrix.e[2][2].real = 1.0;
00232         
00233         dx[dir] =1;
00234         for(j=0; j < len;j++){
00235             int lnkdir;
00236 
00237             prev_matrix = curr_matrix;
00238             if (GOES_FORWARDS(path[j])){
00239                 //dx[path[j]] +=1;
00240                 lnkdir = path[j];
00241             }else{              
00242                 dx[OPP_DIR(path[j])] -=1;
00243                 lnkdir=OPP_DIR(path[j]);
00244             }
00245             
00246             int nbr_idx = neighborIndexFullLattice(i, dx[3], dx[2], dx[1], dx[0]);
00247             
00248             su3_matrix* lnk = sitelink + nbr_idx*4 + lnkdir;
00249             if (GOES_FORWARDS(path[j])){
00250                 mult_su3_nn(&prev_matrix, lnk, &curr_matrix);           
00251             }else{
00252                 mult_su3_na(&prev_matrix, lnk, &curr_matrix);           
00253             }
00254 
00255             if (GOES_FORWARDS(path[j])){
00256                 dx[path[j]] +=1;
00257             }else{              
00258                 //dx[OPP_DIR(path[j])] -=1;
00259             }       
00260 
00261             
00262         }//j
00263 
00264         su3_adjoint(&curr_matrix, &tmat );
00265         
00266         scalar_mult_add_su3_matrix(staple + i , &tmat, loop_coeff, staple+i); 
00267 
00268     }//i
00269     
00270     return;
00271 }
00272 
00273 
00274 template <typename su3_matrix, typename anti_hermitmat, typename Float>
00275 static void
00276 update_mom(anti_hermitmat* momentum, int dir, su3_matrix* sitelink,
00277            su3_matrix* staple, Float eb3)
00278 {
00279     int i;
00280     for(i=0;i <V; i++){
00281         su3_matrix tmat1;
00282         su3_matrix tmat2;
00283         su3_matrix tmat3;
00284 
00285         su3_matrix* lnk = sitelink + 4*i+dir;
00286         su3_matrix* stp = staple + i;
00287         anti_hermitmat* mom = momentum + 4*i+dir;
00288         
00289         mult_su3_na(lnk, stp, &tmat1);
00290         uncompress_anti_hermitian(mom, &tmat2);
00291         
00292         scalar_mult_sub_su3_matrix(&tmat2, &tmat1, eb3, &tmat3);
00293         make_anti_hermitian(&tmat3, mom);
00294         
00295     }
00296     
00297 }
00298 
00299 
00300 
00301 /* This function only computes one direction @dir
00302  * 
00303  */
00304 
00305 void
00306 gauge_force_reference(void* refMom, int dir, double eb3, void* sitelink, QudaPrecision prec, 
00307                       int **path_dir, int* length, void* loop_coeff, int num_paths)
00308 {
00309     int i;
00310     
00311     void* staple;
00312     int gSize;    
00313     if (prec == QUDA_DOUBLE_PRECISION){
00314         gSize= sizeof(double);
00315     }else{
00316         gSize= sizeof(float);
00317     }
00318 
00319     staple = malloc(V* gaugeSiteSize* gSize);
00320     if (staple == NULL){
00321         fprintf(stderr, "ERROR: malloc failed for staple in functon %s\n", __FUNCTION__);
00322         exit(1);
00323     }
00324     
00325     memset(staple, 0,  V*gaugeSiteSize* gSize);
00326     
00327     for(i=0;i < num_paths; i++){        
00328         if (prec == QUDA_DOUBLE_PRECISION){
00329             double* my_loop_coeff = (double*)loop_coeff;
00330             compute_path_product((dsu3_matrix*)staple, (dsu3_matrix*)sitelink, path_dir[i], length[i], my_loop_coeff[i], dir);
00331         }else{
00332             float* my_loop_coeff = (float*)loop_coeff;
00333             compute_path_product((fsu3_matrix*)staple, (fsu3_matrix*)sitelink, path_dir[i], length[i], my_loop_coeff[i], dir);
00334         }       
00335     }        
00336     
00337     if (prec == QUDA_DOUBLE_PRECISION){
00338         update_mom((danti_hermitmat*) refMom, dir, (dsu3_matrix*)sitelink, (dsu3_matrix*)staple, (double)eb3);
00339     }else{
00340         update_mom((fanti_hermitmat*)refMom, dir, (fsu3_matrix*)sitelink, (fsu3_matrix*)staple, (float)eb3);
00341     }
00342     
00343 }
00344 
00345