QUDA v0.4.0
A library for QCD on GPUs
quda/tests/fermion_force_test.cpp
Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 
00005 #include <quda.h>
00006 #include "test_util.h"
00007 #include "gauge_field.h"
00008 #include "fat_force_quda.h"
00009 #include "misc.h"
00010 #include "fermion_force_reference.h"
00011 #include "fermion_force_quda.h"
00012 #include "hw_quda.h"
00013 #include <sys/time.h>
00014 
00015 extern void usage(char** argv);
00016 extern int device;
00017 cudaGaugeField *cudaGauge = NULL;
00018 cpuGaugeField *cpuGauge = NULL;
00019 
00020 cudaGaugeField *cudaMom = NULL;
00021 cpuGaugeField *cpuMom = NULL;
00022 cpuGaugeField *refMom = NULL;
00023 
00024 static FullHw cudaHw;
00025 static QudaGaugeParam gaugeParam;
00026 static void* hw; //the array of half_wilson_vector
00027 
00028 extern int gridsize_from_cmdline[];
00029 
00030 int verify_results = 0;
00031 
00032 int ODD_BIT = 1;
00033 extern int xdim, ydim, zdim, tdim;
00034 
00035 extern QudaReconstructType link_recon;
00036 QudaPrecision link_prec = QUDA_SINGLE_PRECISION;
00037 extern QudaPrecision prec;
00038 QudaPrecision hw_prec = QUDA_SINGLE_PRECISION;
00039 QudaPrecision mom_prec = QUDA_SINGLE_PRECISION;
00040 
00041 QudaPrecision cpu_hw_prec = QUDA_SINGLE_PRECISION;
00042 
00043 int Z[4];
00044 int V;
00045 int Vh;
00046 void
00047 setDims(int *X) {
00048   V = 1;
00049   for (int d=0; d< 4; d++) {
00050     V *= X[d];
00051     Z[d] = X[d];
00052   }
00053   Vh = V/2;
00054 }
00055 
00056 extern void initCommonConstants(const LatticeField &gauge);
00057 
00058 static void
00059 fermion_force_init()
00060 { 
00061   initQuda(device);
00062   //cudaSetDevice(dev); CUERR;
00063     
00064   gaugeParam.X[0] = xdim;
00065   gaugeParam.X[1] = ydim;
00066   gaugeParam.X[2] = zdim;
00067   gaugeParam.X[3] = tdim;
00068   setDims(gaugeParam.X);
00069 
00070   gaugeParam.cpu_prec = link_prec;
00071   gaugeParam.cuda_prec = link_prec;
00072   gaugeParam.reconstruct = link_recon;
00073     
00074   gaugeParam.gauge_order = QUDA_MILC_GAUGE_ORDER;
00075 
00076   GaugeFieldParam gParam(0, gaugeParam);
00077   gParam.create = QUDA_NULL_FIELD_CREATE;
00078 
00079   cpuGauge = new cpuGaugeField(gParam);
00080     
00081   // this is a hack to have site link generated in 2d 
00082   // then copied to 1d array in "MILC" format
00083   void* siteLink_2d[4];
00084   for(int i=0;i < 4;i++){
00085     siteLink_2d[i] = malloc(cpuGauge->Volume()*gaugeSiteSize*gaugeParam.cpu_prec);
00086     if (siteLink_2d[i] == NULL){
00087       errorQuda("ERROR: malloc failed for siteLink_2d\n");
00088     }
00089   }
00090     
00091   // fills the gauge field with random numbers
00092   createSiteLinkCPU(siteLink_2d, gaugeParam.cpu_prec, 1);
00093 
00094   //copy the 2d sitelink to 1d milc format 
00095   for(int dir=0;dir < 4; dir++){
00096     for(int i=0;i < cpuGauge->Volume(); i++){
00097       char* src =  ((char*)siteLink_2d[dir]) + i * gaugeSiteSize* gaugeParam.cpu_prec;
00098       char* dst =  ((char*)cpuGauge->Gauge_p()) + (4*i+dir)*gaugeSiteSize*gaugeParam.cpu_prec ;
00099       memcpy(dst, src, gaugeSiteSize*gaugeParam.cpu_prec);
00100     }
00101   }
00102 
00103   for(int i=0;i < 4;i++){
00104     free(siteLink_2d[i]);
00105   }
00106   
00107 #if 0
00108   site_link_sanity_check(siteLink, V, gaugeParam.cpu_prec, &gaugeParam);
00109 #endif
00110 
00111   //gaugeParam.site_ga_pad = gaugeParam.ga_pad = 0;
00112   //gaugeParam.reconstruct = link_recon;
00113 
00114   gParam.precision = gaugeParam.cuda_prec;
00115   gParam.reconstruct = link_recon;
00116   cudaGauge = new cudaGaugeField(gParam);
00117 
00118   gParam.reconstruct = QUDA_RECONSTRUCT_10;
00119   gParam.precision = gaugeParam.cpu_prec;
00120   cpuMom = new cpuGaugeField(gParam);
00121   refMom = new cpuGaugeField(gParam);
00122 
00123   createMomCPU(cpuMom->Gauge_p(), mom_prec);    
00124   //memset(cpuMom->Gauge_p(), 0, 4*cpuMom->Volume()*momSiteSize*gaugeParam.cpu_prec);
00125 
00126   memcpy(refMom->Gauge_p(), cpuMom->Gauge_p(), 4*cpuMom->Volume()*momSiteSize*gaugeParam.cpu_prec);
00127     
00128   gParam.precision = gaugeParam.cuda_prec;
00129   cudaMom = new cudaGaugeField(gParam);
00130     
00131   hw = malloc(4*cpuGauge->Volume()*hwSiteSize*gaugeParam.cpu_prec);
00132   if (hw == NULL){
00133     fprintf(stderr, "ERROR: malloc failed for hw\n");
00134     exit(1);    
00135   }
00136   createHwCPU(hw, hw_prec);
00137 
00138   cudaHw = createHwQuda(gaugeParam.X, hw_prec);
00139     
00140   return;
00141 }
00142 
00143 static void 
00144 fermion_force_end() 
00145 {
00146   delete cudaMom;
00147   delete cudaGauge;
00148   
00149   delete cpuGauge;
00150   delete cpuMom;
00151   delete refMom;
00152 
00153   freeHwQuda(cudaHw);
00154   free(hw);
00155   
00156   endQuda();
00157 }
00158 
00159 
00160 static int 
00161 fermion_force_test(void) 
00162 {
00163  
00164   fermion_force_init();
00165   initCommonConstants(*cudaGauge);
00166   fermion_force_init_cuda(&gaugeParam);
00167 
00168     
00169   float eps= 0.02;
00170   float weight1 =1.0;
00171   float weight2 =1.0;
00172   float act_path_coeff[6];
00173     
00174   act_path_coeff[0] = 0.625000;
00175   act_path_coeff[1] = -0.058479;
00176   act_path_coeff[2] = -0.087719;
00177   act_path_coeff[3] = 0.030778;
00178   act_path_coeff[4] = -0.007200;
00179   act_path_coeff[5] = -0.123113;        
00180     
00181   // download the momentum field to the GPU
00182   cudaMom->loadCPUField(*cpuMom, QUDA_CPU_FIELD_LOCATION);
00183 
00184   // download the gauge field to the GPU
00185   cudaGauge->loadCPUField(*cpuGauge, QUDA_CPU_FIELD_LOCATION);
00186 
00187   loadHwToGPU(cudaHw, hw, cpu_hw_prec);
00188 
00189     
00190   if (verify_results){  
00191     fermion_force_reference(eps, weight1, weight2, act_path_coeff, hw, cpuGauge->Gauge_p(), refMom->Gauge_p());
00192   }
00193     
00194     
00195   /*
00196    * The flops number comes from CPU implementation in MILC
00197    * function eo_fermion_force_twoterms_field(), fermion_force_asqtad.c
00198    *
00199    */
00200   int flops = 433968;
00201 
00202   struct timeval t0, t1;
00203   cudaThreadSynchronize();    
00204 
00205   gettimeofday(&t0, NULL);
00206   fermion_force_cuda(eps, weight1, weight2, act_path_coeff, cudaHw, *cudaGauge, *cudaMom, &gaugeParam);
00207   cudaThreadSynchronize();
00208   gettimeofday(&t1, NULL);
00209   double secs = t1.tv_sec - t0.tv_sec + 0.000001*(t1.tv_usec - t0.tv_usec);
00210     
00211   // copy the new momentum back on the CPU
00212   cudaMom->saveCPUField(*cpuMom, QUDA_CPU_FIELD_LOCATION);
00213 
00214   int res;
00215   res = compare_floats(cpuMom->Gauge_p(), refMom->Gauge_p(), 4*cpuMom->Volume()*momSiteSize, 1e-5, gaugeParam.cpu_prec);
00216     
00217   int accuracy_level;
00218   accuracy_level =  strong_check_mom(cpuMom->Gauge_p(), refMom->Gauge_p(), 4*cpuMom->Volume(), gaugeParam.cpu_prec);
00219   
00220   printf("Test %s\n",(1 == res) ? "PASSED" : "FAILED");     
00221     
00222   int volume = gaugeParam.X[0]*gaugeParam.X[1]*gaugeParam.X[2]*gaugeParam.X[3];
00223   double perf = 1.0* flops*volume/(secs*1024*1024*1024);
00224   printf("GPU runtime =%.2f ms, kernel performance= %.2f GFLOPS\n", secs*1000, perf);
00225     
00226   fermion_force_end();
00227 
00228   if (res == 0){//failed
00229     printf("\n");
00230     printf("Warning: you test failed. \n");
00231     printf("        Did you use --verify?\n");
00232     printf("        Did you check the GPU health by running cuda memtest?\n");
00233   }
00234   
00235   return accuracy_level;
00236 }            
00237 
00238 
00239 static void
00240 display_test_info()
00241 {
00242   printf("running the following fermion force computation test:\n");
00243     
00244   printf("link_precision           link_reconstruct           space_dim(x/y/z)         T_dimension\n");
00245   printf("%s                       %s                         %d/%d/%d                  %d \n", 
00246          get_prec_str(link_prec),
00247          get_recon_str(link_recon), 
00248          xdim, ydim, zdim, tdim);
00249   return ;
00250     
00251 }
00252 
00253 void
00254 usage_extra(char** argv )
00255 {
00256   printf("Extra options: \n");
00257   printf("    --verify                                  # Verify the GPU results using CPU results\n");
00258   return ;
00259 }
00260 
00261 int 
00262 main(int argc, char **argv) 
00263 {
00264   int i;
00265   for (i =1;i < argc; i++){     
00266     if(process_command_line_option(argc, argv, &i) == 0){
00267       continue;
00268     }
00269         
00270     if( strcmp(argv[i], "--verify") == 0){
00271       verify_results=1;
00272       continue;     
00273     }   
00274     fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
00275     usage(argv);
00276   }
00277 
00278 #ifdef MULTI_GPU
00279     initCommsQuda(argc, argv, gridsize_from_cmdline, 4);
00280 #endif
00281 
00282   link_prec = prec;
00283 
00284   display_test_info();
00285   
00286   int accuracy_level = fermion_force_test();
00287   printfQuda("accuracy_level=%d\n", accuracy_level);
00288     
00289 
00290 #ifdef MULTI_GPU
00291     endCommsQuda();
00292 #endif
00293     
00294     int ret;
00295     if(accuracy_level >=3 ){
00296       ret = 0;
00297     }else{
00298       ret = 1; //we delclare the test failed
00299     }
00300 
00301     
00302   return ret;
00303 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines