|
QUDA v0.3.2
A library for QCD on GPUs
|
00001 #include <stdio.h> 00002 #include <stdlib.h> 00003 #include <string.h> 00004 #include <sys/time.h> 00005 00006 #include <quda.h> 00007 #include <gauge_quda.h> 00008 #include <llfat_quda.h> 00009 00010 #include <test_util.h> 00011 #include <llfat_reference.h> 00012 #include "misc.h" 00013 00014 FullGauge cudaSiteLink; 00015 FullGauge cudaFatLink; 00016 FullStaple cudaStaple; 00017 FullStaple cudaStaple1; 00018 QudaGaugeParam gaugeParam; 00019 void *fatLink, *siteLink, *refLink; 00020 int verify_results = 0; 00021 00022 extern void initDslashCuda(FullGauge gauge); 00023 00024 #define DIM 24 00025 00026 int device = 0; 00027 int ODD_BIT = 1; 00028 int tdim = 16; 00029 int sdim = 16; 00030 int Z[4]; 00031 int V; 00032 int Vh; 00033 00034 00035 QudaReconstructType link_recon = QUDA_RECONSTRUCT_NO; 00036 QudaPrecision link_prec = QUDA_DOUBLE_PRECISION; 00037 QudaPrecision cpu_link_prec = QUDA_DOUBLE_PRECISION; 00038 00039 typedef struct { 00040 double real; 00041 double imag; 00042 } dcomplex; 00043 00044 typedef struct { dcomplex e[3][3]; } dsu3_matrix; 00045 00046 00047 00048 void 00049 setDims(int *X) { 00050 V = 1; 00051 for (int d=0; d< 4; d++) { 00052 V *= X[d]; 00053 Z[d] = X[d]; 00054 } 00055 Vh = V/2; 00056 } 00057 00058 static void 00059 llfat_init(void) 00060 { 00061 initQuda(device); 00062 //cudaSetDevice(dev); CUERR; 00063 00064 gaugeParam.X[0] = sdim; 00065 gaugeParam.X[1] = sdim; 00066 gaugeParam.X[2] = sdim; 00067 gaugeParam.X[3] = tdim; 00068 00069 setDims(gaugeParam.X); 00070 00071 gaugeParam.cpu_prec = cpu_link_prec; 00072 gaugeParam.cuda_prec = link_prec; 00073 00074 size_t gSize = (gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float); 00075 00076 fatLink = malloc(4*V*gaugeSiteSize* gSize); 00077 if (fatLink == NULL){ 00078 fprintf(stderr, "ERROR: malloc failed for fatLink\n"); 00079 exit(1); 00080 } 00081 siteLink = malloc(4*V*gaugeSiteSize* gSize); 00082 if (siteLink == NULL){ 00083 fprintf(stderr, "ERROR: malloc failed for sitelink\n"); 00084 exit(1); 00085 } 00086 00087 refLink = malloc(4*V*gaugeSiteSize* gSize); 00088 if (refLink == NULL){ 00089 fprintf(stderr, "ERROR: malloc failed for refLink\n"); 00090 exit(1); 00091 } 00092 00093 00094 createSiteLinkCPU(siteLink, gaugeParam.cpu_prec, 1); 00095 00096 #if 1 00097 site_link_sanity_check(siteLink, V, gaugeParam.cpu_prec, &gaugeParam); 00098 #endif 00099 00100 gaugeParam.reconstruct = link_recon; 00101 createLinkQuda(&cudaSiteLink, &gaugeParam); 00102 loadLinkToGPU(cudaSiteLink, siteLink, &gaugeParam); 00103 00104 createStapleQuda(&cudaStaple, &gaugeParam); 00105 createStapleQuda(&cudaStaple1, &gaugeParam); 00106 00107 gaugeParam.reconstruct = QUDA_RECONSTRUCT_NO; 00108 createLinkQuda(&cudaFatLink, &gaugeParam); 00109 00110 initDslashConstants(cudaSiteLink, 0, 0); 00111 00112 return; 00113 } 00114 00115 void 00116 llfat_end() 00117 { 00118 free(fatLink); 00119 free(siteLink); 00120 free(refLink); 00121 00122 freeLinkQuda(&cudaSiteLink); 00123 freeLinkQuda(&cudaFatLink); 00124 freeStapleQuda(&cudaStaple); 00125 freeStapleQuda(&cudaStaple1); 00126 } 00127 00128 00129 00130 static void 00131 llfat_test(void) 00132 { 00133 llfat_init(); 00134 00135 00136 float act_path_coeff_1[6]; 00137 double act_path_coeff_2[6]; 00138 00139 for(int i=0;i < 6;i++){ 00140 act_path_coeff_1[i]= 0.1*i; 00141 act_path_coeff_2[i]= 0.1*i; 00142 } 00143 00144 00145 00146 00147 void* act_path_coeff; 00148 if(gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION){ 00149 act_path_coeff = act_path_coeff_2; 00150 }else{ 00151 act_path_coeff = act_path_coeff_1; 00152 } 00153 if (verify_results){ 00154 llfat_reference(refLink, siteLink, gaugeParam.cpu_prec, act_path_coeff); 00155 } 00156 00157 llfat_init_cuda(&gaugeParam); 00158 00159 //The number comes from CPU implementation in MILC, fermion_links_helpers.c 00160 int flops= 61632; 00161 00162 struct timeval t0, t1; 00163 gettimeofday(&t0, NULL); 00164 llfat_cuda(fatLink, siteLink, cudaFatLink, cudaSiteLink, cudaStaple, cudaStaple1, &gaugeParam, act_path_coeff_2); 00165 cudaThreadSynchronize(); 00166 gettimeofday(&t1, NULL); 00167 double secs = t1.tv_sec - t0.tv_sec + 0.000001*(t1.tv_usec - t0.tv_usec); 00168 00169 storeLinkToCPU(fatLink, &cudaFatLink, &gaugeParam); 00170 int res; 00171 res = compare_floats(fatLink, refLink, 4*V*gaugeSiteSize, 1e-3, gaugeParam.cpu_prec); 00172 00173 strong_check_link(fatLink, refLink, 4*V, gaugeParam.cpu_prec); 00174 00175 00176 printf("Test %s\n",(1 == res) ? "PASSED" : "FAILED"); 00177 int volume = gaugeParam.X[0]*gaugeParam.X[1]*gaugeParam.X[2]*gaugeParam.X[3]; 00178 double perf = 1.0* flops*volume/(secs*1024*1024*1024); 00179 printf("gpu time =%.2f ms, flops= %.2f Gflops\n", secs*1000, perf); 00180 00181 llfat_end(); 00182 00183 if (res == 0){//failed 00184 printf("\n"); 00185 printf("Warning: your test failed. \n"); 00186 printf(" Did you use --verify?\n"); 00187 printf(" Did you check the GPU health by running cuda memtest?\n"); 00188 } 00189 } 00190 00191 00192 static void 00193 display_test_info() 00194 { 00195 printf("running the following test:\n"); 00196 00197 printf("link_precision link_reconstruct T_dimension\n"); 00198 printf("%s %s %d \n", 00199 get_prec_str(link_prec), 00200 get_recon_str(link_recon), 00201 tdim); 00202 return ; 00203 00204 } 00205 00206 static void 00207 usage(char** argv ) 00208 { 00209 printf("Usage: %s <args>\n", argv[0]); 00210 printf(" --device <dev_id> Set which device to run on\n"); 00211 printf(" --gprec <double/single/half> Link precision\n"); 00212 printf(" --recon <8/12> Link reconstruction type\n"); 00213 printf(" --sdim <n> Set spacial dimention\n"); 00214 printf(" --tdim <n> Set T dimention size(default 24)\n"); 00215 printf(" --verify Verify the GPU results using CPU results\n"); 00216 printf(" --help Print out this message\n"); 00217 exit(1); 00218 return ; 00219 } 00220 00221 int 00222 main(int argc, char **argv) 00223 { 00224 int i; 00225 for (i =1;i < argc; i++){ 00226 00227 if( strcmp(argv[i], "--help")== 0){ 00228 usage(argv); 00229 } 00230 00231 if( strcmp(argv[i], "--prec") == 0){ 00232 if (i+1 >= argc){ 00233 usage(argv); 00234 } 00235 link_prec = get_prec(argv[i+1]); 00236 i++; 00237 continue; 00238 } 00239 00240 if( strcmp(argv[i], "--cpu_prec") == 0){ 00241 if (i+1 >= argc){ 00242 usage(argv); 00243 } 00244 cpu_link_prec = get_prec(argv[i+1]); 00245 i++; 00246 continue; 00247 } 00248 00249 if( strcmp(argv[i], "--recon") == 0){ 00250 if (i+1 >= argc){ 00251 usage(argv); 00252 } 00253 link_recon = get_recon(argv[i+1]); 00254 i++; 00255 continue; 00256 } 00257 00258 if( strcmp(argv[i], "--tdim") == 0){ 00259 if (i+1 >= argc){ 00260 usage(argv); 00261 } 00262 tdim = atoi(argv[i+1]); 00263 if (tdim < 0 || tdim > 128){ 00264 fprintf(stderr, "Error: invalid t dimention\n"); 00265 exit(1); 00266 } 00267 i++; 00268 continue; 00269 } 00270 00271 if( strcmp(argv[i], "--sdim") == 0){ 00272 if (i+1 >= argc){ 00273 usage(argv); 00274 } 00275 sdim = atoi(argv[i+1]); 00276 if (sdim < 0 || sdim > 128){ 00277 fprintf(stderr, "Error: invalid space dimention\n"); 00278 exit(1); 00279 } 00280 i++; 00281 continue; 00282 } 00283 00284 if( strcmp(argv[i], "--verify") == 0){ 00285 verify_results=1; 00286 continue; 00287 } 00288 00289 if( strcmp(argv[i], "--device") == 0){ 00290 if (i+1 >= argc){ 00291 usage(argv); 00292 } 00293 device = atoi(argv[i+1]); 00294 if (device < 0){ 00295 fprintf(stderr, "Error: invalid device number(%d)\n", device); 00296 exit(1); 00297 } 00298 i++; 00299 continue; 00300 } 00301 00302 fprintf(stderr, "ERROR: Invalid option:%s\n", argv[i]); 00303 usage(argv); 00304 } 00305 00306 display_test_info(); 00307 00308 llfat_test(); 00309 00310 00311 return 0; 00312 }
1.7.3