|
QUDA v0.3.2
A library for QCD on GPUs
|
00001 #include <stdlib.h> 00002 #include <stdio.h> 00003 #include <time.h> 00004 #include <math.h> 00005 00006 #include <test_util.h> 00007 #include <blas_reference.h> 00008 #include <wilson_dslash_reference.h> 00009 00010 // In a typical application, quda.h is the only QUDA header required. 00011 #include <quda.h> 00012 00013 int main(int argc, char **argv) 00014 { 00015 // set QUDA parameters 00016 00017 int device = 0; // CUDA device number 00018 00019 QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION; 00020 QudaPrecision cuda_prec = QUDA_SINGLE_PRECISION; 00021 QudaPrecision cuda_prec_sloppy = QUDA_HALF_PRECISION; 00022 00023 QudaGaugeParam gauge_param = newQudaGaugeParam(); 00024 QudaInvertParam inv_param = newQudaInvertParam(); 00025 00026 gauge_param.X[0] = 24; 00027 gauge_param.X[1] = 24; 00028 gauge_param.X[2] = 24; 00029 gauge_param.X[3] = 24; 00030 00031 gauge_param.anisotropy = 1.0; 00032 gauge_param.type = QUDA_WILSON_LINKS; 00033 gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER; 00034 gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T; 00035 00036 gauge_param.cpu_prec = cpu_prec; 00037 gauge_param.cuda_prec = cuda_prec; 00038 gauge_param.reconstruct = QUDA_RECONSTRUCT_12; 00039 gauge_param.cuda_prec_sloppy = cuda_prec_sloppy; 00040 gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12; 00041 gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO; 00042 00043 int clover_yes = 0; // 0 for plain Wilson, 1 for clover 00044 00045 if (clover_yes) { 00046 inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH; 00047 } else { 00048 inv_param.dslash_type = QUDA_WILSON_DSLASH; 00049 } 00050 inv_param.inv_type = QUDA_BICGSTAB_INVERTER; 00051 00052 double mass = -0.9; 00053 inv_param.kappa = 1.0 / (2.0*(1 + 3/gauge_param.anisotropy + mass)); 00054 inv_param.tol = 5e-8; 00055 inv_param.maxiter = 1000; 00056 inv_param.reliable_delta = 0.1; 00057 00058 inv_param.solution_type = QUDA_MAT_SOLUTION; 00059 inv_param.solve_type = QUDA_DIRECT_PC_SOLVE; 00060 inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; 00061 inv_param.dagger = QUDA_DAG_NO; 00062 inv_param.mass_normalization = QUDA_MASS_NORMALIZATION; 00063 00064 inv_param.cpu_prec = cpu_prec; 00065 inv_param.cuda_prec = cuda_prec; 00066 inv_param.cuda_prec_sloppy = cuda_prec_sloppy; 00067 inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO; 00068 inv_param.dirac_order = QUDA_DIRAC_ORDER; 00069 00070 gauge_param.ga_pad = 0; // 24*24*24; 00071 inv_param.sp_pad = 0; // 24*24*24; 00072 inv_param.cl_pad = 0; // 24*24*24; 00073 00074 if (clover_yes) { 00075 inv_param.clover_cpu_prec = cpu_prec; 00076 inv_param.clover_cuda_prec = cuda_prec; 00077 inv_param.clover_cuda_prec_sloppy = cuda_prec_sloppy; 00078 inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER; 00079 } 00080 inv_param.verbosity = QUDA_VERBOSE; 00081 00082 // Everything between here and the call to initQuda() is application-specific. 00083 00084 // set parameters for the reference Dslash, and prepare fields to be loaded 00085 setDims(gauge_param.X); 00086 00087 size_t gSize = (gauge_param.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float); 00088 size_t sSize = (inv_param.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float); 00089 00090 void *gauge[4], *clover_inv, *clover; 00091 00092 for (int dir = 0; dir < 4; dir++) { 00093 gauge[dir] = malloc(V*gaugeSiteSize*gSize); 00094 } 00095 construct_gauge_field(gauge, 1, gauge_param.cpu_prec, &gauge_param); 00096 00097 if (clover_yes) { 00098 double norm = 0.0; // clover components are random numbers in the range (-norm, norm) 00099 double diag = 1.0; // constant added to the diagonal 00100 00101 size_t cSize = (inv_param.clover_cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float); 00102 clover_inv = malloc(V*cloverSiteSize*cSize); 00103 construct_clover_field(clover_inv, norm, diag, inv_param.clover_cpu_prec); 00104 00105 // The uninverted clover term is only needed when solving the unpreconditioned 00106 // system or when using "asymmetric" even/odd preconditioning. 00107 int preconditioned = (inv_param.solve_type == QUDA_DIRECT_PC_SOLVE || 00108 inv_param.solve_type == QUDA_NORMEQ_PC_SOLVE); 00109 int asymmetric = preconditioned && 00110 (inv_param.matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC || 00111 inv_param.matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC); 00112 if (!preconditioned) { 00113 clover = clover_inv; 00114 clover_inv = NULL; 00115 } else if (asymmetric) { // fake it by using the same random matrix 00116 clover = clover_inv; // for both clover and clover_inv 00117 } else { 00118 clover = NULL; 00119 } 00120 } 00121 00122 void *spinorIn = malloc(V*spinorSiteSize*sSize); 00123 void *spinorOut = malloc(V*spinorSiteSize*sSize); 00124 void *spinorCheck = malloc(V*spinorSiteSize*sSize); 00125 00126 // create a point source at 0 00127 if (inv_param.cpu_prec == QUDA_SINGLE_PRECISION) *((float*)spinorIn) = 1.0; 00128 else *((double*)spinorIn) = 1.0; 00129 00130 // start the timer 00131 double time0 = -((double)clock()); 00132 00133 // initialize the QUDA library 00134 initQuda(device); 00135 00136 // load the gauge field 00137 loadGaugeQuda((void*)gauge, &gauge_param); 00138 00139 // load the clover term, if desired 00140 if (clover_yes) loadCloverQuda(clover, clover_inv, &inv_param); 00141 00142 // perform the inversion 00143 invertQuda(spinorOut, spinorIn, &inv_param); 00144 00145 // stop the timer 00146 time0 += clock(); 00147 time0 /= CLOCKS_PER_SEC; 00148 00149 printf("Device memory used:\n Spinor: %f GiB\n Gauge: %f GiB\n", 00150 inv_param.spinorGiB, gauge_param.gaugeGiB); 00151 if (clover_yes) printf(" Clover: %f GiB\n", inv_param.cloverGiB); 00152 printf("\nDone: %i iter / %g secs = %g Gflops, total time = %g secs\n", 00153 inv_param.iter, inv_param.secs, inv_param.gflops/inv_param.secs, time0); 00154 00155 if (inv_param.solution_type == QUDA_MAT_SOLUTION) { 00156 mat(spinorCheck, gauge, spinorOut, inv_param.kappa, 0, 00157 inv_param.cpu_prec, gauge_param.cpu_prec); 00158 if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) 00159 ax(0.5/inv_param.kappa, spinorCheck, V*spinorSiteSize, inv_param.cpu_prec); 00160 } else if(inv_param.solution_type == QUDA_MATPC_SOLUTION) { 00161 matpc(spinorCheck, gauge, spinorOut, inv_param.kappa, inv_param.matpc_type, 0, 00162 inv_param.cpu_prec, gauge_param.cpu_prec); 00163 if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION) 00164 ax(0.25/(inv_param.kappa*inv_param.kappa), spinorCheck, V*spinorSiteSize, inv_param.cpu_prec); 00165 } 00166 00167 00168 mxpy(spinorIn, spinorCheck, V*spinorSiteSize, inv_param.cpu_prec); 00169 double nrm2 = norm_2(spinorCheck, V*spinorSiteSize, inv_param.cpu_prec); 00170 double src2 = norm_2(spinorIn, V*spinorSiteSize, inv_param.cpu_prec); 00171 printf("Relative residual: requested = %g, actual = %g\n", inv_param.tol, sqrt(nrm2/src2)); 00172 00173 // finalize the QUDA library 00174 endQuda(); 00175 00176 return 0; 00177 }
1.7.3