|
QUDA v0.3.2
A library for QCD on GPUs
|
00001 #include <stdio.h> 00002 #include <stdlib.h> 00003 #include <string.h> 00004 00005 #include <quda.h> 00006 #include "test_util.h" 00007 #include "gauge_quda.h" 00008 #include "misc.h" 00009 #include "fermion_force_reference.h" 00010 #include "fermion_force_quda.h" 00011 #include "hw_quda.h" 00012 #include <sys/time.h> 00013 00014 int device = 0; 00015 static FullGauge cudaSiteLink; 00016 static FullMom cudaMom; 00017 static FullHw cudaHw; 00018 static QudaGaugeParam gaugeParam; 00019 static void* siteLink; 00020 static void* mom; 00021 static void* refMom; 00022 static void* hw; //the array of half_wilson_vector 00023 static int X[4]; 00024 00025 int verify_results = 0; 00026 00027 extern void initDslashCuda(FullGauge gauge); 00028 00029 static int sdim= 8; 00030 00031 int ODD_BIT = 1; 00032 static int tdim = 8; 00033 00034 QudaReconstructType link_recon = QUDA_RECONSTRUCT_12; 00035 QudaPrecision link_prec = QUDA_SINGLE_PRECISION; 00036 QudaPrecision hw_prec = QUDA_SINGLE_PRECISION; 00037 QudaPrecision mom_prec = QUDA_SINGLE_PRECISION; 00038 00039 QudaPrecision cpu_hw_prec = QUDA_SINGLE_PRECISION; 00040 00041 typedef struct { 00042 double real; 00043 double imag; 00044 } dcomplex; 00045 00046 typedef struct { dcomplex e[3][3]; } dsu3_matrix; 00047 00048 00049 int Z[4]; 00050 int V; 00051 int Vh; 00052 void 00053 setDims(int *X) { 00054 V = 1; 00055 for (int d=0; d< 4; d++) { 00056 V *= X[d]; 00057 Z[d] = X[d]; 00058 } 00059 Vh = V/2; 00060 } 00061 00062 static void 00063 fermion_force_init() 00064 { 00065 initQuda(device); 00066 //cudaSetDevice(dev); CUERR; 00067 00068 X[0] = gaugeParam.X[0] = sdim; 00069 X[1] = gaugeParam.X[1] = sdim; 00070 X[2] = gaugeParam.X[2] = sdim; 00071 X[3] = gaugeParam.X[3] = tdim; 00072 00073 setDims(gaugeParam.X); 00074 00075 gaugeParam.cpu_prec = link_prec; 00076 gaugeParam.cuda_prec = link_prec; 00077 gaugeParam.reconstruct = link_recon; 00078 00079 00080 size_t gSize = (gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float); 00081 00082 siteLink = malloc(4*V*gaugeSiteSize* gSize); 00083 if (siteLink == NULL){ 00084 fprintf(stderr, "ERROR: malloc failed for sitelink\n"); 00085 exit(1); 00086 } 00087 createSiteLinkCPU(siteLink, gaugeParam.cpu_prec,1); 00088 00089 #if 1 00090 site_link_sanity_check(siteLink, V, gaugeParam.cpu_prec, &gaugeParam); 00091 #endif 00092 00093 mom = malloc(4*V*momSiteSize*gSize); 00094 if (mom == NULL){ 00095 fprintf(stderr, "ERROR: malloc failed for mom\n"); 00096 exit(1); 00097 } 00098 createMomCPU(mom,mom_prec); 00099 memset(mom, 0, 4*V*momSiteSize*gSize); 00100 00101 refMom = malloc(4*V*momSiteSize*gSize); 00102 if (refMom == NULL){ 00103 fprintf(stderr, "ERROR: malloc failed for refMom\n"); 00104 exit(1); 00105 } 00106 memcpy(refMom, mom, 4*V*momSiteSize*gSize); 00107 00108 00109 hw = malloc(4*V*hwSiteSize*gSize); 00110 if (hw == NULL){ 00111 fprintf(stderr, "ERROR: malloc failed for hw\n"); 00112 exit(1); 00113 } 00114 createHwCPU(hw, hw_prec); 00115 00116 00117 createLinkQuda(&cudaSiteLink, &gaugeParam); 00118 createMomQuda(&cudaMom, &gaugeParam); 00119 cudaHw = createHwQuda(X, hw_prec); 00120 00121 return; 00122 } 00123 00124 static void 00125 fermion_force_end() 00126 { 00127 free(siteLink); 00128 free(mom); 00129 free(refMom); 00130 free(hw); 00131 00132 freeLinkQuda(&cudaSiteLink); 00133 freeMomQuda(&cudaMom); 00134 } 00135 00136 00137 static void 00138 fermion_force_test(void) 00139 { 00140 00141 fermion_force_init(); 00142 initDslashConstants(cudaSiteLink, Vh, Vh); 00143 fermion_force_init_cuda(&gaugeParam); 00144 00145 00146 float eps= 0.02; 00147 float weight1 =1.0; 00148 float weight2 =1.0; 00149 float act_path_coeff[6]; 00150 00151 act_path_coeff[0] = 0.625000; 00152 act_path_coeff[1] = -0.058479; 00153 act_path_coeff[2] = -0.087719; 00154 act_path_coeff[3] = 0.030778; 00155 act_path_coeff[4] = -0.007200; 00156 act_path_coeff[5] = -0.123113; 00157 00158 loadMomToGPU(cudaMom, mom, &gaugeParam); 00159 loadLinkToGPU(cudaSiteLink, siteLink, &gaugeParam); 00160 loadHwToGPU(cudaHw, hw, cpu_hw_prec); 00161 00162 00163 if (verify_results){ 00164 fermion_force_reference(eps, weight1, weight2, act_path_coeff, hw, siteLink, refMom); 00165 } 00166 00167 00168 /* 00169 * The flops number comes from CPU implementation in MILC 00170 * function eo_fermion_force_twoterms_field(), fermion_force_asqtad.c 00171 * 00172 */ 00173 int flops = 433968; 00174 00175 struct timeval t0, t1; 00176 gettimeofday(&t0, NULL); 00177 00178 fermion_force_cuda(eps, weight1, weight2, act_path_coeff, cudaHw, cudaSiteLink, cudaMom, &gaugeParam); 00179 cudaThreadSynchronize(); 00180 gettimeofday(&t1, NULL); 00181 double secs = t1.tv_sec - t0.tv_sec + 0.000001*(t1.tv_usec - t0.tv_usec); 00182 00183 storeMomToCPU(mom, cudaMom, &gaugeParam); 00184 00185 int res; 00186 res = compare_floats(mom, refMom, 4*V*momSiteSize, 1e-5, gaugeParam.cpu_prec); 00187 00188 strong_check_mom(mom, refMom, 4*V, gaugeParam.cpu_prec); 00189 00190 printf("Test %s\n",(1 == res) ? "PASSED" : "FAILED"); 00191 00192 int volume = gaugeParam.X[0]*gaugeParam.X[1]*gaugeParam.X[2]*gaugeParam.X[3]; 00193 double perf = 1.0* flops*volume/(secs*1024*1024*1024); 00194 printf("gpu time =%.2f ms, flops= %.2f Gflops\n", secs*1000, perf); 00195 00196 fermion_force_end(); 00197 00198 if (res == 0){//failed 00199 printf("\n"); 00200 printf("Warning: you test failed. \n"); 00201 printf(" Did you use --verify?\n"); 00202 printf(" Did you check the GPU health by running cuda memtest?\n"); 00203 } 00204 00205 00206 } 00207 00208 00209 static void 00210 display_test_info() 00211 { 00212 printf("running the following fermion force computation test:\n"); 00213 00214 printf("link_precision link_reconstruct S_dimension T_dimension\n"); 00215 printf("%s %s %d %d \n", 00216 get_prec_str(link_prec), 00217 get_recon_str(link_recon), 00218 sdim, tdim); 00219 return ; 00220 00221 } 00222 00223 static void 00224 usage(char** argv ) 00225 { 00226 printf("Usage: %s <args>\n", argv[0]); 00227 printf(" --device <dev_id> Set which device to run on\n"); 00228 printf(" --gprec <double/single/half> Link precision\n"); 00229 printf(" --recon <8/12> Link reconstruction type\n"); 00230 printf(" --sdim <n> Set spacial dimention\n"); 00231 printf(" --tdim Set T dimention size(default 24)\n"); 00232 printf(" --sdim Set spalce dimention size(default 16)\n"); 00233 printf(" --verify Verify the GPU results using CPU results\n"); 00234 printf(" --help Print out this message\n"); 00235 exit(1); 00236 return ; 00237 } 00238 00239 int 00240 main(int argc, char **argv) 00241 { 00242 int i; 00243 for (i =1;i < argc; i++){ 00244 00245 if( strcmp(argv[i], "--help")== 0){ 00246 usage(argv); 00247 } 00248 00249 if( strcmp(argv[i], "--gprec") == 0){ 00250 if (i+1 >= argc){ 00251 usage(argv); 00252 } 00253 link_prec = get_prec(argv[i+1]); 00254 i++; 00255 continue; 00256 } 00257 00258 if( strcmp(argv[i], "--recon") == 0){ 00259 if (i+1 >= argc){ 00260 usage(argv); 00261 } 00262 link_recon = get_recon(argv[i+1]); 00263 i++; 00264 continue; 00265 } 00266 00267 if( strcmp(argv[i], "--tdim") == 0){ 00268 if (i+1 >= argc){ 00269 usage(argv); 00270 } 00271 tdim = atoi(argv[i+1]); 00272 if (tdim < 0 || tdim > 128){ 00273 fprintf(stderr, "Error: invalid t dimention\n"); 00274 exit(1); 00275 } 00276 i++; 00277 continue; 00278 } 00279 if( strcmp(argv[i], "--sdim") == 0){ 00280 if (i+1 >= argc){ 00281 usage(argv); 00282 } 00283 sdim = atoi(argv[i+1]); 00284 if (sdim < 0 || sdim > 128){ 00285 fprintf(stderr, "Error: invalid space dimention\n"); 00286 exit(1); 00287 } 00288 i++; 00289 continue; 00290 } 00291 if( strcmp(argv[i], "--device") == 0){ 00292 if (i+1 >= argc){ 00293 usage(argv); 00294 } 00295 device = atoi(argv[i+1]); 00296 if (device < 0){ 00297 fprintf(stderr, "Error: invalid device number(%d)\n", device); 00298 exit(1); 00299 } 00300 i++; 00301 continue; 00302 } 00303 00304 if( strcmp(argv[i], "--verify") == 0){ 00305 verify_results=1; 00306 continue; 00307 } 00308 fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]); 00309 usage(argv); 00310 } 00311 00312 display_test_info(); 00313 00314 fermion_force_test(); 00315 00316 00317 return 0; 00318 }
1.7.3