QUDA: quda/tests/wilson_invert

QUDA v0.3.2
A library for QCD on GPUs
00001 #include <stdlib.h>
00002 #include <stdio.h>
00003 #include <time.h>
00004 #include <math.h>
00005 
00006 #include <test_util.h>
00007 #include <blas_reference.h>
00008 #include <wilson_dslash_reference.h>
00009 
00010 // In a typical application, quda.h is the only QUDA header required.
00011 #include <quda.h>
00012 
00013 int main(int argc, char **argv)
00014 {
00015   // set QUDA parameters
00016 
00017   int device = 0; // CUDA device number
00018 
00019   QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION;
00020   QudaPrecision cuda_prec = QUDA_SINGLE_PRECISION;
00021   QudaPrecision cuda_prec_sloppy = QUDA_HALF_PRECISION;
00022 
00023   QudaGaugeParam gauge_param = newQudaGaugeParam();
00024   QudaInvertParam inv_param = newQudaInvertParam();
00025  
00026   gauge_param.X[0] = 24;
00027   gauge_param.X[1] = 24;
00028   gauge_param.X[2] = 24;
00029   gauge_param.X[3] = 24;
00030 
00031   gauge_param.anisotropy = 1.0;
00032   gauge_param.type = QUDA_WILSON_LINKS;
00033   gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
00034   gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
00035   
00036   gauge_param.cpu_prec = cpu_prec;
00037   gauge_param.cuda_prec = cuda_prec;
00038   gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
00039   gauge_param.cuda_prec_sloppy = cuda_prec_sloppy;
00040   gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
00041   gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
00042 
00043   int clover_yes = 0; // 0 for plain Wilson, 1 for clover
00044   
00045   if (clover_yes) {
00046     inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
00047   } else {
00048     inv_param.dslash_type = QUDA_WILSON_DSLASH;
00049   }
00050   inv_param.inv_type = QUDA_BICGSTAB_INVERTER;
00051 
00052   double mass = -0.9;
00053   inv_param.kappa = 1.0 / (2.0*(1 + 3/gauge_param.anisotropy + mass));
00054   inv_param.tol = 5e-8;
00055   inv_param.maxiter = 1000;
00056   inv_param.reliable_delta = 0.1;
00057 
00058   inv_param.solution_type = QUDA_MAT_SOLUTION;
00059   inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
00060   inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
00061   inv_param.dagger = QUDA_DAG_NO;
00062   inv_param.mass_normalization = QUDA_MASS_NORMALIZATION;
00063 
00064   inv_param.cpu_prec = cpu_prec;
00065   inv_param.cuda_prec = cuda_prec;
00066   inv_param.cuda_prec_sloppy = cuda_prec_sloppy;
00067   inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
00068   inv_param.dirac_order = QUDA_DIRAC_ORDER;
00069 
00070   gauge_param.ga_pad = 0; // 24*24*24;
00071   inv_param.sp_pad = 0;   // 24*24*24;
00072   inv_param.cl_pad = 0;   // 24*24*24;
00073 
00074   if (clover_yes) {
00075     inv_param.clover_cpu_prec = cpu_prec;
00076     inv_param.clover_cuda_prec = cuda_prec;
00077     inv_param.clover_cuda_prec_sloppy = cuda_prec_sloppy;
00078     inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
00079   }
00080   inv_param.verbosity = QUDA_VERBOSE;
00081 
00082   // Everything between here and the call to initQuda() is application-specific.
00083 
00084   // set parameters for the reference Dslash, and prepare fields to be loaded
00085   setDims(gauge_param.X);
00086 
00087   size_t gSize = (gauge_param.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
00088   size_t sSize = (inv_param.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
00089 
00090   void *gauge[4], *clover_inv, *clover;
00091 
00092   for (int dir = 0; dir < 4; dir++) {
00093     gauge[dir] = malloc(V*gaugeSiteSize*gSize);
00094   }
00095   construct_gauge_field(gauge, 1, gauge_param.cpu_prec, &gauge_param);
00096 
00097   if (clover_yes) {
00098     double norm = 0.0; // clover components are random numbers in the range (-norm, norm)
00099     double diag = 1.0; // constant added to the diagonal
00100 
00101     size_t cSize = (inv_param.clover_cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
00102     clover_inv = malloc(V*cloverSiteSize*cSize);
00103     construct_clover_field(clover_inv, norm, diag, inv_param.clover_cpu_prec);
00104 
00105     // The uninverted clover term is only needed when solving the unpreconditioned
00106     // system or when using "asymmetric" even/odd preconditioning.
00107     int preconditioned = (inv_param.solve_type == QUDA_DIRECT_PC_SOLVE ||
00108                           inv_param.solve_type == QUDA_NORMEQ_PC_SOLVE);
00109     int asymmetric = preconditioned &&
00110                          (inv_param.matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC ||
00111                           inv_param.matpc_type == QUDA_MATPC_ODD_ODD_ASYMMETRIC);
00112     if (!preconditioned) {
00113       clover = clover_inv;
00114       clover_inv = NULL;
00115     } else if (asymmetric) { // fake it by using the same random matrix
00116       clover = clover_inv;   // for both clover and clover_inv
00117     } else {
00118       clover = NULL;
00119     }
00120   }
00121 
00122   void *spinorIn = malloc(V*spinorSiteSize*sSize);
00123   void *spinorOut = malloc(V*spinorSiteSize*sSize);
00124   void *spinorCheck = malloc(V*spinorSiteSize*sSize);
00125 
00126   // create a point source at 0
00127   if (inv_param.cpu_prec == QUDA_SINGLE_PRECISION) *((float*)spinorIn) = 1.0;
00128   else *((double*)spinorIn) = 1.0;
00129 
00130   // start the timer
00131   double time0 = -((double)clock());
00132 
00133   // initialize the QUDA library
00134   initQuda(device);
00135 
00136   // load the gauge field
00137   loadGaugeQuda((void*)gauge, &gauge_param);
00138 
00139   // load the clover term, if desired
00140   if (clover_yes) loadCloverQuda(clover, clover_inv, &inv_param);
00141   
00142   // perform the inversion
00143   invertQuda(spinorOut, spinorIn, &inv_param);
00144 
00145   // stop the timer
00146   time0 += clock();
00147   time0 /= CLOCKS_PER_SEC;
00148 
00149   printf("Device memory used:\n   Spinor: %f GiB\n    Gauge: %f GiB\n", 
00150          inv_param.spinorGiB, gauge_param.gaugeGiB);
00151   if (clover_yes) printf("   Clover: %f GiB\n", inv_param.cloverGiB);
00152   printf("\nDone: %i iter / %g secs = %g Gflops, total time = %g secs\n", 
00153          inv_param.iter, inv_param.secs, inv_param.gflops/inv_param.secs, time0);
00154 
00155   if (inv_param.solution_type == QUDA_MAT_SOLUTION) { 
00156     mat(spinorCheck, gauge, spinorOut, inv_param.kappa, 0, 
00157         inv_param.cpu_prec, gauge_param.cpu_prec); 
00158     if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION)
00159       ax(0.5/inv_param.kappa, spinorCheck, V*spinorSiteSize, inv_param.cpu_prec);
00160   } else if(inv_param.solution_type == QUDA_MATPC_SOLUTION) {   
00161     matpc(spinorCheck, gauge, spinorOut, inv_param.kappa, inv_param.matpc_type, 0, 
00162           inv_param.cpu_prec, gauge_param.cpu_prec);
00163     if (inv_param.mass_normalization == QUDA_MASS_NORMALIZATION)
00164       ax(0.25/(inv_param.kappa*inv_param.kappa), spinorCheck, V*spinorSiteSize, inv_param.cpu_prec);
00165   }
00166 
00167 
00168   mxpy(spinorIn, spinorCheck, V*spinorSiteSize, inv_param.cpu_prec);
00169   double nrm2 = norm_2(spinorCheck, V*spinorSiteSize, inv_param.cpu_prec);
00170   double src2 = norm_2(spinorIn, V*spinorSiteSize, inv_param.cpu_prec);
00171   printf("Relative residual: requested = %g, actual = %g\n", inv_param.tol, sqrt(nrm2/src2));
00172 
00173   // finalize the QUDA library
00174   endQuda();
00175 
00176   return 0;
00177 }
quda/tests/wilson_invert_test.c