QUDA v0.3.2
A library for QCD on GPUs

quda/tests/fermion_force_test.c

Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 
00005 #include <quda.h>
00006 #include "test_util.h"
00007 #include "gauge_quda.h"
00008 #include "misc.h"
00009 #include "fermion_force_reference.h"
00010 #include "fermion_force_quda.h"
00011 #include "hw_quda.h"
00012 #include <sys/time.h>
00013 
00014 int device = 0;
00015 static FullGauge cudaSiteLink;
00016 static FullMom cudaMom;
00017 static FullHw cudaHw;
00018 static QudaGaugeParam gaugeParam;
00019 static void* siteLink;
00020 static void* mom;
00021 static void* refMom;
00022 static void* hw; //the array of half_wilson_vector
00023 static int X[4];
00024 
00025 int verify_results = 0;
00026 
00027 extern void initDslashCuda(FullGauge gauge);
00028 
00029 static int sdim= 8;
00030 
00031 int ODD_BIT = 1;
00032 static int tdim = 8;
00033 
00034 QudaReconstructType link_recon = QUDA_RECONSTRUCT_12;
00035 QudaPrecision link_prec = QUDA_SINGLE_PRECISION;
00036 QudaPrecision hw_prec = QUDA_SINGLE_PRECISION;
00037 QudaPrecision mom_prec = QUDA_SINGLE_PRECISION;
00038 
00039 QudaPrecision cpu_hw_prec = QUDA_SINGLE_PRECISION;
00040 
00041 typedef struct {
00042   double real;
00043   double imag;
00044 } dcomplex;
00045 
00046 typedef struct { dcomplex e[3][3]; } dsu3_matrix;
00047 
00048 
00049 int Z[4];
00050 int V;
00051 int Vh;
00052 void
00053 setDims(int *X) {
00054   V = 1;
00055   for (int d=0; d< 4; d++) {
00056     V *= X[d];
00057     Z[d] = X[d];
00058   }
00059   Vh = V/2;
00060 }
00061 
00062 static void
00063 fermion_force_init()
00064 { 
00065   initQuda(device);
00066   //cudaSetDevice(dev); CUERR;
00067     
00068   X[0] = gaugeParam.X[0] = sdim;
00069   X[1] = gaugeParam.X[1] = sdim;
00070   X[2] = gaugeParam.X[2] = sdim;
00071   X[3] = gaugeParam.X[3] = tdim;
00072     
00073   setDims(gaugeParam.X);
00074     
00075   gaugeParam.cpu_prec = link_prec;
00076   gaugeParam.cuda_prec = link_prec;
00077   gaugeParam.reconstruct = link_recon;
00078     
00079     
00080   size_t gSize = (gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
00081     
00082   siteLink = malloc(4*V*gaugeSiteSize* gSize);
00083   if (siteLink == NULL){
00084     fprintf(stderr, "ERROR: malloc failed for sitelink\n");
00085     exit(1);
00086   }
00087   createSiteLinkCPU(siteLink, gaugeParam.cpu_prec,1);
00088 
00089 #if 1
00090   site_link_sanity_check(siteLink, V, gaugeParam.cpu_prec, &gaugeParam);
00091 #endif
00092 
00093   mom = malloc(4*V*momSiteSize*gSize);
00094   if (mom == NULL){
00095     fprintf(stderr, "ERROR: malloc failed for mom\n");
00096     exit(1);
00097   }
00098   createMomCPU(mom,mom_prec);    
00099   memset(mom, 0, 4*V*momSiteSize*gSize);
00100 
00101   refMom = malloc(4*V*momSiteSize*gSize);
00102   if (refMom == NULL){
00103     fprintf(stderr, "ERROR: malloc failed for refMom\n");
00104     exit(1);
00105   }    
00106   memcpy(refMom, mom, 4*V*momSiteSize*gSize);
00107     
00108     
00109   hw = malloc(4*V*hwSiteSize*gSize);
00110   if (hw == NULL){
00111     fprintf(stderr, "ERROR: malloc failed for hw\n");
00112     exit(1);    
00113   }
00114   createHwCPU(hw, hw_prec);
00115     
00116     
00117   createLinkQuda(&cudaSiteLink, &gaugeParam);
00118   createMomQuda(&cudaMom, &gaugeParam);    
00119   cudaHw = createHwQuda(X, hw_prec);
00120     
00121   return;
00122 }
00123 
00124 static void 
00125 fermion_force_end() 
00126 {
00127   free(siteLink);
00128   free(mom);
00129   free(refMom);
00130   free(hw);
00131     
00132   freeLinkQuda(&cudaSiteLink);
00133   freeMomQuda(&cudaMom);
00134 }
00135 
00136 
00137 static void 
00138 fermion_force_test(void) 
00139 {
00140  
00141   fermion_force_init();
00142   initDslashConstants(cudaSiteLink, Vh, Vh);
00143   fermion_force_init_cuda(&gaugeParam);
00144 
00145     
00146   float eps= 0.02;
00147   float weight1 =1.0;
00148   float weight2 =1.0;
00149   float act_path_coeff[6];
00150     
00151   act_path_coeff[0] = 0.625000;
00152   act_path_coeff[1] = -0.058479;
00153   act_path_coeff[2] = -0.087719;
00154   act_path_coeff[3] = 0.030778;
00155   act_path_coeff[4] = -0.007200;
00156   act_path_coeff[5] = -0.123113;        
00157     
00158   loadMomToGPU(cudaMom, mom, &gaugeParam);
00159   loadLinkToGPU(cudaSiteLink, siteLink, &gaugeParam);
00160   loadHwToGPU(cudaHw, hw, cpu_hw_prec);
00161 
00162     
00163   if (verify_results){  
00164     fermion_force_reference(eps, weight1, weight2, act_path_coeff, hw, siteLink, refMom);
00165   }
00166     
00167     
00168   /*
00169    * The flops number comes from CPU implementation in MILC
00170    * function eo_fermion_force_twoterms_field(), fermion_force_asqtad.c
00171    *
00172    */
00173   int flops = 433968;
00174 
00175   struct timeval t0, t1;
00176   gettimeofday(&t0, NULL);
00177     
00178   fermion_force_cuda(eps, weight1, weight2, act_path_coeff, cudaHw, cudaSiteLink, cudaMom, &gaugeParam);
00179   cudaThreadSynchronize();
00180   gettimeofday(&t1, NULL);
00181   double secs = t1.tv_sec - t0.tv_sec + 0.000001*(t1.tv_usec - t0.tv_usec);
00182     
00183   storeMomToCPU(mom, cudaMom, &gaugeParam);
00184     
00185   int res;
00186   res = compare_floats(mom, refMom, 4*V*momSiteSize, 1e-5, gaugeParam.cpu_prec);
00187     
00188   strong_check_mom(mom, refMom, 4*V, gaugeParam.cpu_prec);
00189     
00190   printf("Test %s\n",(1 == res) ? "PASSED" : "FAILED");     
00191     
00192   int volume = gaugeParam.X[0]*gaugeParam.X[1]*gaugeParam.X[2]*gaugeParam.X[3];
00193   double perf = 1.0* flops*volume/(secs*1024*1024*1024);
00194   printf("gpu time =%.2f ms, flops= %.2f Gflops\n", secs*1000, perf);
00195     
00196   fermion_force_end();
00197 
00198   if (res == 0){//failed
00199     printf("\n");
00200     printf("Warning: you test failed. \n");
00201     printf("        Did you use --verify?\n");
00202     printf("        Did you check the GPU health by running cuda memtest?\n");
00203   }
00204 
00205     
00206 }            
00207 
00208 
00209 static void
00210 display_test_info()
00211 {
00212   printf("running the following fermion force computation test:\n");
00213     
00214   printf("link_precision           link_reconstruct           S_dimension         T_dimension\n");
00215   printf("%s                       %s                         %d                  %d \n", 
00216          get_prec_str(link_prec),
00217          get_recon_str(link_recon), 
00218          sdim, tdim);
00219   return ;
00220     
00221 }
00222 
00223 static void
00224 usage(char** argv )
00225 {
00226   printf("Usage: %s <args>\n", argv[0]);
00227   printf("  --device <dev_id>               Set which device to run on\n");
00228   printf("  --gprec <double/single/half>    Link precision\n"); 
00229   printf("  --recon <8/12>                  Link reconstruction type\n"); 
00230   printf("  --sdim <n>                      Set spacial dimention\n");
00231   printf("  --tdim                          Set T dimention size(default 24)\n"); 
00232   printf("  --sdim                          Set spalce dimention size(default 16)\n"); 
00233   printf("  --verify                        Verify the GPU results using CPU results\n");
00234   printf("  --help                          Print out this message\n"); 
00235   exit(1);
00236   return ;
00237 }
00238 
00239 int 
00240 main(int argc, char **argv) 
00241 {
00242   int i;
00243   for (i =1;i < argc; i++){
00244         
00245     if( strcmp(argv[i], "--help")== 0){
00246       usage(argv);
00247     }
00248         
00249     if( strcmp(argv[i], "--gprec") == 0){
00250       if (i+1 >= argc){
00251         usage(argv);
00252       }     
00253       link_prec =  get_prec(argv[i+1]);
00254       i++;
00255       continue;     
00256     }
00257         
00258     if( strcmp(argv[i], "--recon") == 0){
00259       if (i+1 >= argc){
00260         usage(argv);
00261       }     
00262       link_recon =  get_recon(argv[i+1]);
00263       i++;
00264       continue;     
00265     }
00266         
00267     if( strcmp(argv[i], "--tdim") == 0){
00268       if (i+1 >= argc){
00269         usage(argv);
00270       }     
00271       tdim =  atoi(argv[i+1]);
00272       if (tdim < 0 || tdim > 128){
00273         fprintf(stderr, "Error: invalid t dimention\n");
00274         exit(1);
00275       }
00276       i++;
00277       continue;     
00278     }
00279     if( strcmp(argv[i], "--sdim") == 0){
00280       if (i+1 >= argc){
00281         usage(argv);
00282       }     
00283       sdim =  atoi(argv[i+1]);
00284       if (sdim < 0 || sdim > 128){
00285         fprintf(stderr, "Error: invalid space dimention\n");
00286         exit(1);
00287       }
00288       i++;
00289       continue;     
00290     }
00291     if( strcmp(argv[i], "--device") == 0){
00292         if (i+1 >= argc){
00293                 usage(argv);
00294             }
00295             device =  atoi(argv[i+1]);
00296             if (device < 0){
00297                 fprintf(stderr, "Error: invalid device number(%d)\n", device);
00298                 exit(1);
00299             }
00300             i++;
00301             continue;
00302     }
00303 
00304     if( strcmp(argv[i], "--verify") == 0){
00305       verify_results=1;
00306       continue;     
00307     }   
00308     fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
00309     usage(argv);
00310   }
00311     
00312   display_test_info();
00313     
00314   fermion_force_test();
00315     
00316     
00317   return 0;
00318 }
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines