QUDA: quda/tests/unitarize_link_test.cpp Source File

QUDA v0.4.0
A library for QCD on GPUs
00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include <sys/time.h>
00005 #include <cuda.h>
00006 #include <cuda_runtime.h>
00007 
00008 #include "quda.h"
00009 #include "gauge_field.h"
00010 #include "test_util.h"
00011 #include "llfat_reference.h"
00012 #include "misc.h"
00013 #include "util_quda.h"
00014 #include "llfat_quda.h"
00015 #include "fat_force_quda.h"
00016 #include "hisq_links_quda.h"
00017 #include "dslash_quda.h"
00018 #include "hisq_force_quda.h"
00019 
00020 #ifdef MULTI_GPU
00021 #include "face_quda.h"
00022 #include "comm_quda.h"
00023 #endif
00024 
00025 #define TDIFF(a,b) (b.tv_sec - a.tv_sec + 0.000001*(b.tv_usec - a.tv_usec))
00026 
00027 extern void usage(char** argv);
00028 static int verify_results = 0;
00029 
00030 extern int device;
00031 int Z[4];
00032 int V;
00033 int Vh;
00034 int Vs[4];
00035 int Vsh[4];
00036 
00037 static int V_ex;
00038 static int Vh_ex;
00039 
00040 
00041 static double unitarize_eps  = 1e-6;
00042 static bool reunit_allow_svd = true;
00043 static bool reunit_svd_only  = false;
00044 static double svd_rel_error  = 1e-4;
00045 static double svd_abs_error  = 1e-5;
00046 static double max_allowed_error = 1e-12;
00047 static bool check_unitarization = true;
00048 
00049 
00050 extern int xdim, ydim, zdim, tdim;
00051 extern int gridsize_from_cmdline[];
00052 
00053 extern QudaReconstructType link_recon;
00054 extern QudaPrecision prec;
00055 static QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION;
00056 static QudaGaugeFieldOrder gauge_order = QUDA_QDP_GAUGE_ORDER;
00057 
00058 static size_t gSize;
00059 
00060 void
00061 setDims(const int *X) {
00062   V = 1;
00063   for (int dir=0; dir<4; ++dir) {
00064     V *= X[dir];
00065     Z[dir] = X[dir];
00066   }
00067   Vh = V/2;
00068 
00069   Vs[0] =  X[1]*X[2]*X[3];
00070   Vs[1] =  X[0]*X[2]*X[3];
00071   Vs[2] =  X[0]*X[1]*X[3];
00072   Vs[3] =  X[0]*X[1]*X[2];
00073   for(int dir=0; dir<4; ++dir) Vsh[dir] = Vs[dir]/2;
00074   V_ex = 1;
00075   for (int d=0; d< 4; d++) {
00076     V_ex *= X[d]+4;
00077   }
00078   Vh_ex = V_ex/2;
00079   return;
00080 }
00081 
00082 static int
00083 unitarize_link_test()
00084 {
00085 
00086   QudaGaugeParam qudaGaugeParam = newQudaGaugeParam();
00087 
00088   initQuda(0);
00089 
00090   cpu_prec = prec;
00091   gSize = cpu_prec;  
00092   qudaGaugeParam.anisotropy = 1.0;
00093   
00094   qudaGaugeParam.X[0] = xdim;
00095   qudaGaugeParam.X[1] = ydim;
00096   qudaGaugeParam.X[2] = zdim;
00097   qudaGaugeParam.X[3] = tdim;
00098 
00099   setDims(qudaGaugeParam.X);
00100   
00101   QudaPrecision link_prec = QUDA_SINGLE_PRECISION;
00102   QudaReconstructType link_recon = QUDA_RECONSTRUCT_NO;
00103 
00104   qudaGaugeParam.cpu_prec  = link_prec;
00105   qudaGaugeParam.cuda_prec = link_prec;
00106   qudaGaugeParam.reconstruct = link_recon;
00107   qudaGaugeParam.type = QUDA_WILSON_LINKS;
00108 
00109 
00110   hisq::fermion_force::hisqForceInitCuda(&qudaGaugeParam);
00111   
00112   qudaGaugeParam.t_boundary        = QUDA_PERIODIC_T;
00113   qudaGaugeParam.anisotropy        = 1.0;
00114   qudaGaugeParam.cuda_prec_sloppy   = prec;
00115   qudaGaugeParam.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
00116   qudaGaugeParam.gauge_fix         = QUDA_GAUGE_FIXED_NO;
00117   qudaGaugeParam.ga_pad            = 0;
00118   qudaGaugeParam.packed_size       = 0;
00119   qudaGaugeParam.gaugeGiB          = 0;
00120   qudaGaugeParam.flag              = false;
00121 
00122    
00123   qudaGaugeParam.cpu_prec = cpu_prec;
00124   qudaGaugeParam.cuda_prec = prec;
00125   qudaGaugeParam.gauge_order = gauge_order;
00126   qudaGaugeParam.type=QUDA_WILSON_LINKS;
00127   qudaGaugeParam.reconstruct = link_recon;
00128   qudaGaugeParam.flag = QUDA_FAT_PRESERVE_CPU_GAUGE
00129     | QUDA_FAT_PRESERVE_GPU_GAUGE
00130     | QUDA_FAT_PRESERVE_COMM_MEM;
00131 
00132   setFatLinkPadding(QUDA_COMPUTE_FAT_STANDARD, &qudaGaugeParam);
00133  
00134   GaugeFieldParam gParam(0, qudaGaugeParam);
00135   gParam.pad = 0;
00136   gParam.create    = QUDA_REFERENCE_FIELD_CREATE;
00137   gParam.link_type = QUDA_WILSON_LINKS;
00138   gParam.order     = QUDA_MILC_GAUGE_ORDER;
00139   cpuGaugeField *cpuOutLink  = new cpuGaugeField(gParam);
00140 
00141   gParam.pad         = 0;
00142   gParam.create      = QUDA_NULL_FIELD_CREATE;
00143   gParam.link_type   = QUDA_WILSON_LINKS;
00144   gParam.order       = QUDA_QDP_GAUGE_ORDER;
00145   gParam.reconstruct = QUDA_RECONSTRUCT_NO;
00146   cudaGaugeField *cudaFatLink = new cudaGaugeField(gParam);
00147   cudaGaugeField *cudaULink   = new cudaGaugeField(gParam);  
00148 
00149   initCommonConstants(*cudaFatLink);
00150 
00151   void* fatlink = (void*)malloc(4*V*gaugeSiteSize*gSize);
00152   if(fatlink == NULL){
00153     errorQuda("ERROR: allocating fatlink failed\n");
00154   }
00155   
00156   void* sitelink[4];
00157   for(int i=0;i < 4;i++){
00158     cudaMallocHost((void**)&sitelink[i], V*gaugeSiteSize*gSize);
00159     if(sitelink[i] == NULL){
00160       errorQuda("ERROR; allocate sitelink[%d] failed\n", i);
00161     }
00162   }
00163   
00164   createSiteLinkCPU(sitelink, qudaGaugeParam.cpu_prec, 1);
00165 
00166   double act_path_coeff[6];
00167   act_path_coeff[0] = 0.625000;
00168   act_path_coeff[1] = -0.058479;
00169   act_path_coeff[2] = -0.087719;
00170   act_path_coeff[3] = 0.030778;
00171   act_path_coeff[4] = -0.007200;
00172   act_path_coeff[5] = -0.123113;
00173 
00174 
00175   //only record the last call's performance
00176   //the first one is for creating the cpu/cuda data structures
00177   
00178   if(gauge_order == QUDA_QDP_GAUGE_ORDER){
00179     computeFatLinkQuda(fatlink, sitelink, act_path_coeff, &qudaGaugeParam,
00180                            QUDA_COMPUTE_FAT_STANDARD);
00181   } // gauge order is QDP_GAUGE_ORDER
00182 
00183   cpuOutLink->setGauge((void**)fatlink);
00184   cudaFatLink->loadCPUField(*cpuOutLink, QUDA_CPU_FIELD_LOCATION);
00185  
00186 
00187  
00188   hisq::setUnitarizeLinksConstants(unitarize_eps,
00189                                    max_allowed_error,
00190                                    reunit_allow_svd,
00191                                    reunit_svd_only,
00192                                    svd_rel_error,
00193                                    svd_abs_error);
00194  
00195   hisq::setUnitarizeLinksPadding(0,0);
00196 
00197   int* num_failures_dev;
00198   if(cudaMalloc(&num_failures_dev, sizeof(int)) != cudaSuccess){
00199         errorQuda("cudaMallo failed for num_failures_dev\n");
00200   }
00201   cudaMemset(num_failures_dev, 0, sizeof(int));
00202 
00203   struct timeval t0, t1;
00204 
00205   gettimeofday(&t0,NULL);
00206   hisq::unitarizeLinksCuda(qudaGaugeParam,*cudaFatLink, cudaULink, num_failures_dev);
00207   cudaThreadSynchronize();
00208   gettimeofday(&t1,NULL);
00209 
00210   int num_failures=0;
00211   cudaMemcpy(&num_failures, num_failures_dev, sizeof(int), cudaMemcpyDeviceToHost);
00212 
00213  delete cudaFatLink;
00214  delete cudaULink;
00215  for(int dir=0; dir<4; ++dir) cudaFreeHost(sitelink[dir]);
00216   cudaFree(num_failures_dev); 
00217 #ifdef MULTI_GPU
00218   exchange_llfat_cleanup();
00219 #endif
00220   endQuda();
00221    
00222   printfQuda("Unitarization time: %g ms\n", TDIFF(t0,t1)*1000); 
00223   return num_failures;
00224 }
00225 
00226 static void
00227 display_test_info()
00228 {
00229   printfQuda("running the following test:\n");
00230     
00231   printfQuda("link_precision           link_reconstruct           space_dimension        T_dimension       algorithm     max allowed error\n");
00232   printfQuda("%s                       %s                         %d/%d/%d/                  %d               %s             %g \n", 
00233              get_prec_str(prec),
00234              get_recon_str(link_recon), 
00235              xdim, ydim, zdim, tdim, 
00236              get_unitarization_str(reunit_svd_only),
00237              max_allowed_error);
00238 
00239 #ifdef MULTI_GPU
00240   printfQuda("Grid partition info:     X  Y  Z  T\n");
00241   printfQuda("                         %d  %d  %d  %d\n",
00242              commDimPartitioned(0),
00243              commDimPartitioned(1),
00244              commDimPartitioned(2),
00245              commDimPartitioned(3));
00246 #endif
00247 
00248   return ;
00249   
00250 }
00251 
00252 
00253 int
00254 main(int argc, char **argv)
00255 {
00256   //default to 18 reconstruct, 8^3 x 8
00257   link_recon = QUDA_RECONSTRUCT_NO;
00258   xdim=ydim=zdim=tdim=8;
00259   cpu_prec = prec = QUDA_DOUBLE_PRECISION;
00260 
00261   int i;
00262   for (i=1; i<argc; i++){
00263     if(process_command_line_option(argc, argv, &i) == 0){
00264       continue;
00265     }
00266     
00267     fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]);
00268     usage(argv);
00269   }
00270 
00271   initCommsQuda(argc, argv, gridsize_from_cmdline, 4);
00272 
00273   display_test_info();
00274   int num_failures = unitarize_link_test();
00275   printfQuda("Number of failures = %d\n", num_failures);
00276   if(num_failures > 0){
00277     printfQuda("Failure rate = %lf%\n", num_failures/(4.0*V));
00278     printfQuda("You may want to increase your error tolerance or vary the unitarization parameters\n");
00279   }
00280   endCommsQuda();
00281 
00282   return EXIT_SUCCESS;
00283 }
00284 
00285