QUDA v0.4.0
A library for QCD on GPUs
quda/tests/hisq_unitarize_force_test.cpp
Go to the documentation of this file.
00001 #include <cstdio>
00002 #include <cstdlib>
00003 #include <cstring>
00004 
00005 #include <quda.h>
00006 #include "test_util.h"
00007 #include "gauge_field.h"
00008 #include "fat_force_quda.h"
00009 #include "misc.h"
00010 #include "hisq_force_reference.h"
00011 #include "hisq_force_quda.h"
00012 #include "hisq_force_utils.h"
00013 #include "hw_quda.h"
00014 #include <sys/time.h>
00015 
00016 using namespace hisq::fermion_force;
00017 extern void usage(char** argv);
00018 static int device = 0;
00019 cudaGaugeField *cudaGauge = NULL;
00020 cpuGaugeField  *cpuGauge  = NULL;
00021 
00022 cudaGaugeField *cudaForce = NULL;
00023 cpuGaugeField  *cpuForce = NULL;
00024 
00025 cpuGaugeField *cpuReference = NULL;
00026 
00027 static FullHw cudaHw;
00028 static QudaGaugeParam gaugeParam;
00029 static void* hw; // the array of half_wilson_vector
00030 
00031 
00032 cpuGaugeField *cpuOprod = NULL;
00033 cudaGaugeField *cudaOprod = NULL;
00034 
00035 
00036 int verify_results = 0;
00037 int ODD_BIT = 1;
00038 extern int xdim, ydim, zdim, tdim;
00039 
00040 extern QudaReconstructType link_recon;
00041 extern QudaPrecision prec;
00042 QudaPrecision link_prec = QUDA_SINGLE_PRECISION;
00043 QudaPrecision hw_prec = QUDA_SINGLE_PRECISION;
00044 QudaPrecision cpu_hw_prec = QUDA_SINGLE_PRECISION;
00045 QudaPrecision mom_prec = QUDA_SINGLE_PRECISION;
00046 
00047 void setPrecision(QudaPrecision precision)
00048 {
00049   link_prec   = precision;
00050   hw_prec     = precision;
00051   cpu_hw_prec = precision;
00052   mom_prec    = precision;
00053   
00054   return;
00055 }
00056 
00057 
00058 int Z[4];
00059 int V;
00060 int Vh;
00061 
00062 
00063 void
00064 setDims(int *X){
00065   V = 1;
00066   for(int dir=0; dir<4; ++dir){
00067     V *= X[dir];
00068     Z[dir] = X[dir];
00069   }
00070   Vh = V/2;
00071   return;
00072 }
00073 
00074 
00075 void initDslashConstants(const cudaGaugeField &gauge, const int sp_stride);
00076 
00077 
00078 // allocate memory
00079 // set the layout, etc.
00080 static void
00081 hisq_force_init()
00082 {
00083   initQuda(device);
00084 
00085   gaugeParam.X[0] = xdim;
00086   gaugeParam.X[1] = ydim;
00087   gaugeParam.X[2] = zdim;
00088   gaugeParam.X[3] = tdim;
00089 
00090   setDims(gaugeParam.X);
00091 
00092   gaugeParam.cpu_prec = link_prec;
00093   gaugeParam.cuda_prec = link_prec;
00094   gaugeParam.reconstruct = link_recon;
00095 
00096   gaugeParam.gauge_order = QUDA_MILC_GAUGE_ORDER;
00097 
00098   GaugeFieldParam gParam(0, gaugeParam);
00099   gParam.create = QUDA_NULL_FIELD_CREATE;
00100 
00101   cpuGauge = new cpuGaugeField(gParam);
00102 
00103   // this is a hack to get the gauge field to appear as a void** rather than void*
00104   void* siteLink_2d[4];
00105     for(int i=0;i < 4;i++){
00106        siteLink_2d[i] = ((char*)cpuGauge->Gauge_p()) + i*cpuGauge->Volume()* gaugeSiteSize* gaugeParam.cpu_prec;
00107      }
00108 
00109   // fills the gauge field with random numbers
00110   createSiteLinkCPU(siteLink_2d, gaugeParam.cpu_prec, 0);
00111 
00112   gParam.precision = gaugeParam.cuda_prec;
00113   gParam.reconstruct = QUDA_RECONSTRUCT_NO;
00114   cudaGauge = new cudaGaugeField(gParam);
00115 
00116   // create the force matrix
00117   // cannot reconstruct, since the force matrix is not in SU(3)
00118   gParam.precision = gaugeParam.cpu_prec;
00119   gParam.reconstruct = QUDA_RECONSTRUCT_NO;
00120   cpuForce = new cpuGaugeField(gParam); 
00121   memset(cpuForce->Gauge_p(), 0, 4*cpuForce->Volume()*gaugeSiteSize*gaugeParam.cpu_prec);
00122 
00123   cpuReference = new cpuGaugeField(gParam);
00124   memset(cpuReference->Gauge_p(), 0, 4*cpuReference->Volume()*gaugeSiteSize*gaugeParam.cpu_prec);
00125   
00126 
00127 
00128   gParam.precision = gaugeParam.cuda_prec;
00129   gParam.reconstruct = QUDA_RECONSTRUCT_NO;
00130   cudaForce = new cudaGaugeField(gParam); 
00131   cudaMemset((void**)(cudaForce->Gauge_p()), 0, cudaForce->Bytes());
00132 
00133   cudaForce->loadCPUField(*cpuForce, QUDA_CPU_FIELD_LOCATION);
00134   
00135 
00136 
00137   gParam.precision = gaugeParam.cuda_prec;
00138 
00139   hw = malloc(4*cpuGauge->Volume()*hwSiteSize*gaugeParam.cpu_prec);
00140   if (hw == NULL){
00141     fprintf(stderr, "ERROR: malloc failed for hw\n");
00142     exit(1);
00143   }
00144 
00145   createHwCPU(hw, hw_prec);
00146   cudaHw = createHwQuda(gaugeParam.X, hw_prec);
00147 
00148 
00149 
00150   gParam.reconstruct = QUDA_RECONSTRUCT_NO;
00151   gParam.precision = gaugeParam.cpu_prec;
00152   cpuOprod = new cpuGaugeField(gParam);
00153   computeLinkOrderedOuterProduct(hw, cpuOprod->Gauge_p(), hw_prec);
00154 
00155 
00156   gParam.precision = hw_prec;
00157   cudaOprod = new cudaGaugeField(gParam);
00158 
00159   checkCudaError();     
00160   
00161   return;
00162 }
00163 
00164 
00165 static void 
00166 hisq_force_end()
00167 {
00168   delete cudaForce;
00169   delete cudaOprod;
00170   delete cudaGauge;
00171   freeHwQuda(cudaHw);
00172   
00173   delete cpuGauge;
00174   delete cpuForce;
00175   delete cpuOprod;
00176   delete cpuReference;
00177   free(hw);
00178 
00179   endQuda();
00180   return;
00181 }
00182 
00183 static void 
00184 hisq_force_test()
00185 {
00186   hisq_force_init();
00187 
00188   initDslashConstants(*cudaGauge, cudaGauge->VolumeCB());
00189   hisqForceInitCuda(&gaugeParam);
00190 
00191   float act_path_coeff[6];
00192   act_path_coeff[0] = 0.625000;
00193   // act_path_coeff[1] = -0.058479;
00194   act_path_coeff[1] = 0.0; // set Naik term to zero, temporarily
00195   act_path_coeff[2] = -0.087719;
00196   act_path_coeff[3] = 0.030778;
00197   act_path_coeff[4] = -0.007200;
00198   act_path_coeff[5] = -0.123113;
00199 
00200   double d_act_path_coeff[6];
00201   for(int i=0; i<6; ++i){
00202     d_act_path_coeff[i] = act_path_coeff[i];
00203   }
00204 
00205 
00206   // copy the gauge field to the GPU
00207   cudaGauge->loadCPUField(*cpuGauge, QUDA_CPU_FIELD_LOCATION);
00208 
00209   // copy the outer product field to the GPU
00210   cudaOprod->loadCPUField(*cpuOprod, QUDA_CPU_FIELD_LOCATION);
00211 
00212   loadHwToGPU(cudaHw, hw, cpu_hw_prec);
00213 
00214 
00215   const double unitarize_eps = 1e-5;
00216   const double hisq_force_filter = 5e-5;
00217   const double max_det_error = 1e-12;
00218   const bool allow_svd = true;
00219   const bool svd_only = false;
00220   const double svd_rel_err = 1e-8;
00221   const double svd_abs_err = 1e-8;
00222 
00223   setUnitarizeForceConstants(unitarize_eps, hisq_force_filter, max_det_error, allow_svd, svd_only, svd_rel_err, svd_abs_err);
00224   
00225   // First of all we fatten the links on the GPU
00226   hisqStaplesForceCuda(d_act_path_coeff, gaugeParam, *cudaOprod, *cudaGauge, cudaForce);
00227 
00228   cudaThreadSynchronize();
00229 
00230 
00231   checkCudaError();
00232   cudaForce->saveCPUField(*cpuForce, QUDA_CPU_FIELD_LOCATION);
00233 
00234 
00235   int* unitarization_failed_dev; 
00236   cudaMalloc((void**)&unitarization_failed_dev, sizeof(int));
00237 
00238   unitarizeForceCuda(gaugeParam, *cudaForce, *cudaGauge, cudaOprod, unitarization_failed_dev); // output is written to cudaOprod.
00239   if(verify_results){
00240     unitarizeForceCPU(gaugeParam, *cpuForce, *cpuGauge, cpuOprod);
00241   }
00242   cudaThreadSynchronize();
00243   checkCudaError();
00244   cudaFree(unitarization_failed_dev);
00245 
00246   cudaOprod->saveCPUField(*cpuReference, QUDA_CPU_FIELD_LOCATION); 
00247 
00248   int res;
00249   res = compare_floats(cpuOprod->Gauge_p(), cpuOprod->Gauge_p(), 4*cpuReference->Volume()*gaugeSiteSize, 1e-5, gaugeParam.cpu_prec);
00250 
00251   printf("Test %s\n",(1 == res) ? "PASSED" : "FAILED");
00252 
00253   hisq_force_end();
00254 }
00255 
00256 
00257 static void
00258 display_test_info()
00259 {
00260   printf("running the following fermion force computation test:\n");
00261     
00262   printf("link_precision           link_reconstruct           space_dim(x/y/z)         T_dimension\n");
00263   printf("%s                       %s                         %d/%d/%d                  %d \n", 
00264          get_prec_str(link_prec),
00265          get_recon_str(link_recon), 
00266          xdim, ydim, zdim, tdim);
00267   return ;
00268     
00269 }
00270 
00271 void
00272 usage_extra(char** argv )
00273 {
00274   printf("Extra options: \n");
00275   printf("    --verify                                  # Verify the GPU results using CPU results\n");
00276   return ;
00277 }
00278 
00279 int 
00280 main(int argc, char **argv) 
00281 {
00282   int i;
00283   for (i =1;i < argc; i++){
00284         
00285     if(process_command_line_option(argc, argv, &i) == 0){
00286       continue;
00287     }  
00288 
00289     if( strcmp(argv[i], "--verify") == 0){
00290       verify_results=1;
00291       continue;     
00292     }   
00293     fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
00294     usage(argv);
00295   }
00296 
00297 #ifdef MULTI_GPU
00298     initCommsQuda(argc, argv, gridsize_from_cmdline, 4);
00299 #endif
00300 
00301   setPrecision(prec);
00302 
00303   display_test_info();
00304     
00305   hisq_force_test();
00306 
00307 
00308 #ifdef MULTI_GPU
00309     endCommsQuda();
00310 #endif
00311     
00312     
00313   return EXIT_SUCCESS;
00314 }
00315 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines