QUDA v0.4.0
A library for QCD on GPUs
|
00001 #include <cstdio> 00002 #include <cstdlib> 00003 #include <cstring> 00004 00005 #include <quda.h> 00006 #include "test_util.h" 00007 #include "gauge_field.h" 00008 #include "fat_force_quda.h" 00009 #include "misc.h" 00010 #include "hisq_force_reference.h" 00011 #include "hisq_force_quda.h" 00012 #include "hisq_force_utils.h" 00013 #include "hw_quda.h" 00014 #include <sys/time.h> 00015 00016 using namespace hisq::fermion_force; 00017 extern void usage(char** argv); 00018 static int device = 0; 00019 cudaGaugeField *cudaGauge = NULL; 00020 cpuGaugeField *cpuGauge = NULL; 00021 00022 cudaGaugeField *cudaForce = NULL; 00023 cpuGaugeField *cpuForce = NULL; 00024 00025 cpuGaugeField *cpuReference = NULL; 00026 00027 static FullHw cudaHw; 00028 static QudaGaugeParam gaugeParam; 00029 static void* hw; // the array of half_wilson_vector 00030 00031 00032 cpuGaugeField *cpuOprod = NULL; 00033 cudaGaugeField *cudaOprod = NULL; 00034 00035 00036 int verify_results = 0; 00037 int ODD_BIT = 1; 00038 extern int xdim, ydim, zdim, tdim; 00039 00040 extern QudaReconstructType link_recon; 00041 extern QudaPrecision prec; 00042 QudaPrecision link_prec = QUDA_SINGLE_PRECISION; 00043 QudaPrecision hw_prec = QUDA_SINGLE_PRECISION; 00044 QudaPrecision cpu_hw_prec = QUDA_SINGLE_PRECISION; 00045 QudaPrecision mom_prec = QUDA_SINGLE_PRECISION; 00046 00047 void setPrecision(QudaPrecision precision) 00048 { 00049 link_prec = precision; 00050 hw_prec = precision; 00051 cpu_hw_prec = precision; 00052 mom_prec = precision; 00053 00054 return; 00055 } 00056 00057 00058 int Z[4]; 00059 int V; 00060 int Vh; 00061 00062 00063 void 00064 setDims(int *X){ 00065 V = 1; 00066 for(int dir=0; dir<4; ++dir){ 00067 V *= X[dir]; 00068 Z[dir] = X[dir]; 00069 } 00070 Vh = V/2; 00071 return; 00072 } 00073 00074 00075 void initDslashConstants(const cudaGaugeField &gauge, const int sp_stride); 00076 00077 00078 // allocate memory 00079 // set the layout, etc. 00080 static void 00081 hisq_force_init() 00082 { 00083 initQuda(device); 00084 00085 gaugeParam.X[0] = xdim; 00086 gaugeParam.X[1] = ydim; 00087 gaugeParam.X[2] = zdim; 00088 gaugeParam.X[3] = tdim; 00089 00090 setDims(gaugeParam.X); 00091 00092 gaugeParam.cpu_prec = link_prec; 00093 gaugeParam.cuda_prec = link_prec; 00094 gaugeParam.reconstruct = link_recon; 00095 00096 gaugeParam.gauge_order = QUDA_MILC_GAUGE_ORDER; 00097 00098 GaugeFieldParam gParam(0, gaugeParam); 00099 gParam.create = QUDA_NULL_FIELD_CREATE; 00100 00101 cpuGauge = new cpuGaugeField(gParam); 00102 00103 // this is a hack to get the gauge field to appear as a void** rather than void* 00104 void* siteLink_2d[4]; 00105 for(int i=0;i < 4;i++){ 00106 siteLink_2d[i] = ((char*)cpuGauge->Gauge_p()) + i*cpuGauge->Volume()* gaugeSiteSize* gaugeParam.cpu_prec; 00107 } 00108 00109 // fills the gauge field with random numbers 00110 createSiteLinkCPU(siteLink_2d, gaugeParam.cpu_prec, 0); 00111 00112 gParam.precision = gaugeParam.cuda_prec; 00113 gParam.reconstruct = QUDA_RECONSTRUCT_NO; 00114 cudaGauge = new cudaGaugeField(gParam); 00115 00116 // create the force matrix 00117 // cannot reconstruct, since the force matrix is not in SU(3) 00118 gParam.precision = gaugeParam.cpu_prec; 00119 gParam.reconstruct = QUDA_RECONSTRUCT_NO; 00120 cpuForce = new cpuGaugeField(gParam); 00121 memset(cpuForce->Gauge_p(), 0, 4*cpuForce->Volume()*gaugeSiteSize*gaugeParam.cpu_prec); 00122 00123 cpuReference = new cpuGaugeField(gParam); 00124 memset(cpuReference->Gauge_p(), 0, 4*cpuReference->Volume()*gaugeSiteSize*gaugeParam.cpu_prec); 00125 00126 00127 00128 gParam.precision = gaugeParam.cuda_prec; 00129 gParam.reconstruct = QUDA_RECONSTRUCT_NO; 00130 cudaForce = new cudaGaugeField(gParam); 00131 cudaMemset((void**)(cudaForce->Gauge_p()), 0, cudaForce->Bytes()); 00132 00133 cudaForce->loadCPUField(*cpuForce, QUDA_CPU_FIELD_LOCATION); 00134 00135 00136 00137 gParam.precision = gaugeParam.cuda_prec; 00138 00139 hw = malloc(4*cpuGauge->Volume()*hwSiteSize*gaugeParam.cpu_prec); 00140 if (hw == NULL){ 00141 fprintf(stderr, "ERROR: malloc failed for hw\n"); 00142 exit(1); 00143 } 00144 00145 createHwCPU(hw, hw_prec); 00146 cudaHw = createHwQuda(gaugeParam.X, hw_prec); 00147 00148 00149 00150 gParam.reconstruct = QUDA_RECONSTRUCT_NO; 00151 gParam.precision = gaugeParam.cpu_prec; 00152 cpuOprod = new cpuGaugeField(gParam); 00153 computeLinkOrderedOuterProduct(hw, cpuOprod->Gauge_p(), hw_prec); 00154 00155 00156 gParam.precision = hw_prec; 00157 cudaOprod = new cudaGaugeField(gParam); 00158 00159 checkCudaError(); 00160 00161 return; 00162 } 00163 00164 00165 static void 00166 hisq_force_end() 00167 { 00168 delete cudaForce; 00169 delete cudaOprod; 00170 delete cudaGauge; 00171 freeHwQuda(cudaHw); 00172 00173 delete cpuGauge; 00174 delete cpuForce; 00175 delete cpuOprod; 00176 delete cpuReference; 00177 free(hw); 00178 00179 endQuda(); 00180 return; 00181 } 00182 00183 static void 00184 hisq_force_test() 00185 { 00186 hisq_force_init(); 00187 00188 initDslashConstants(*cudaGauge, cudaGauge->VolumeCB()); 00189 hisqForceInitCuda(&gaugeParam); 00190 00191 float act_path_coeff[6]; 00192 act_path_coeff[0] = 0.625000; 00193 // act_path_coeff[1] = -0.058479; 00194 act_path_coeff[1] = 0.0; // set Naik term to zero, temporarily 00195 act_path_coeff[2] = -0.087719; 00196 act_path_coeff[3] = 0.030778; 00197 act_path_coeff[4] = -0.007200; 00198 act_path_coeff[5] = -0.123113; 00199 00200 double d_act_path_coeff[6]; 00201 for(int i=0; i<6; ++i){ 00202 d_act_path_coeff[i] = act_path_coeff[i]; 00203 } 00204 00205 00206 // copy the gauge field to the GPU 00207 cudaGauge->loadCPUField(*cpuGauge, QUDA_CPU_FIELD_LOCATION); 00208 00209 // copy the outer product field to the GPU 00210 cudaOprod->loadCPUField(*cpuOprod, QUDA_CPU_FIELD_LOCATION); 00211 00212 loadHwToGPU(cudaHw, hw, cpu_hw_prec); 00213 00214 00215 const double unitarize_eps = 1e-5; 00216 const double hisq_force_filter = 5e-5; 00217 const double max_det_error = 1e-12; 00218 const bool allow_svd = true; 00219 const bool svd_only = false; 00220 const double svd_rel_err = 1e-8; 00221 const double svd_abs_err = 1e-8; 00222 00223 setUnitarizeForceConstants(unitarize_eps, hisq_force_filter, max_det_error, allow_svd, svd_only, svd_rel_err, svd_abs_err); 00224 00225 // First of all we fatten the links on the GPU 00226 hisqStaplesForceCuda(d_act_path_coeff, gaugeParam, *cudaOprod, *cudaGauge, cudaForce); 00227 00228 cudaThreadSynchronize(); 00229 00230 00231 checkCudaError(); 00232 cudaForce->saveCPUField(*cpuForce, QUDA_CPU_FIELD_LOCATION); 00233 00234 00235 int* unitarization_failed_dev; 00236 cudaMalloc((void**)&unitarization_failed_dev, sizeof(int)); 00237 00238 unitarizeForceCuda(gaugeParam, *cudaForce, *cudaGauge, cudaOprod, unitarization_failed_dev); // output is written to cudaOprod. 00239 if(verify_results){ 00240 unitarizeForceCPU(gaugeParam, *cpuForce, *cpuGauge, cpuOprod); 00241 } 00242 cudaThreadSynchronize(); 00243 checkCudaError(); 00244 cudaFree(unitarization_failed_dev); 00245 00246 cudaOprod->saveCPUField(*cpuReference, QUDA_CPU_FIELD_LOCATION); 00247 00248 int res; 00249 res = compare_floats(cpuOprod->Gauge_p(), cpuOprod->Gauge_p(), 4*cpuReference->Volume()*gaugeSiteSize, 1e-5, gaugeParam.cpu_prec); 00250 00251 printf("Test %s\n",(1 == res) ? "PASSED" : "FAILED"); 00252 00253 hisq_force_end(); 00254 } 00255 00256 00257 static void 00258 display_test_info() 00259 { 00260 printf("running the following fermion force computation test:\n"); 00261 00262 printf("link_precision link_reconstruct space_dim(x/y/z) T_dimension\n"); 00263 printf("%s %s %d/%d/%d %d \n", 00264 get_prec_str(link_prec), 00265 get_recon_str(link_recon), 00266 xdim, ydim, zdim, tdim); 00267 return ; 00268 00269 } 00270 00271 void 00272 usage_extra(char** argv ) 00273 { 00274 printf("Extra options: \n"); 00275 printf(" --verify # Verify the GPU results using CPU results\n"); 00276 return ; 00277 } 00278 00279 int 00280 main(int argc, char **argv) 00281 { 00282 int i; 00283 for (i =1;i < argc; i++){ 00284 00285 if(process_command_line_option(argc, argv, &i) == 0){ 00286 continue; 00287 } 00288 00289 if( strcmp(argv[i], "--verify") == 0){ 00290 verify_results=1; 00291 continue; 00292 } 00293 fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]); 00294 usage(argv); 00295 } 00296 00297 #ifdef MULTI_GPU 00298 initCommsQuda(argc, argv, gridsize_from_cmdline, 4); 00299 #endif 00300 00301 setPrecision(prec); 00302 00303 display_test_info(); 00304 00305 hisq_force_test(); 00306 00307 00308 #ifdef MULTI_GPU 00309 endCommsQuda(); 00310 #endif 00311 00312 00313 return EXIT_SUCCESS; 00314 } 00315