QUDA v0.3.2
A library for QCD on GPUs

quda/tests/twisted_mass_dslash_test.cpp

Go to the documentation of this file.
00001 #include <iostream>
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 
00005 #include <quda.h>
00006 #include <quda_internal.h>
00007 #include <dirac_quda.h>
00008 #include <dslash_quda.h>
00009 #include <invert_quda.h>
00010 #include <util_quda.h>
00011 #include <blas_quda.h>
00012 
00013 #include <test_util.h>
00014 #include <twisted_mass_dslash_reference.h>
00015 
00016 // What test are we doing (0 = dslash, 1 = MatPC, 2 = Mat)
00017 const int test_type = 1;
00018 
00019 const QudaParity parity = QUDA_EVEN_PARITY; // even or odd?
00020 const QudaDagType dagger = QUDA_DAG_NO;     // apply Dslash or Dslash dagger?
00021 const int transfer = 0; // include transfer time in the benchmark?
00022 
00023 const int loops = 100;
00024 
00025 QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION;
00026 QudaPrecision cuda_prec = QUDA_SINGLE_PRECISION;
00027 
00028 QudaGaugeParam gauge_param;
00029 QudaInvertParam inv_param;
00030 
00031 FullGauge gauge;
00032 
00033 cpuColorSpinorField *spinor, *spinorOut, *spinorRef;
00034 cudaColorSpinorField *cudaSpinor, *cudaSpinorOut, *tmp1=0, *tmp2=0;
00035 
00036 void *hostGauge[4];
00037 
00038 Dirac *dirac;
00039 
00040 void init() {
00041 
00042   gauge_param = newQudaGaugeParam();
00043   inv_param = newQudaInvertParam();
00044 
00045   gauge_param.X[0] = 24;
00046   gauge_param.X[1] = 24;
00047   gauge_param.X[2] = 24;
00048   gauge_param.X[3] = 24;
00049   setDims(gauge_param.X);
00050 
00051   gauge_param.anisotropy = 2.3;
00052 
00053   gauge_param.type = QUDA_WILSON_LINKS;
00054   gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
00055   gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
00056 
00057   gauge_param.cpu_prec = cpu_prec;
00058   gauge_param.cuda_prec = cuda_prec;
00059   gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
00060   gauge_param.reconstruct_sloppy = gauge_param.reconstruct;
00061   gauge_param.cuda_prec_sloppy = gauge_param.cuda_prec;
00062   gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
00063   gauge_param.type = QUDA_WILSON_LINKS;
00064 
00065   inv_param.kappa = 0.1;
00066   inv_param.mu = 0.01;
00067   inv_param.twist_flavor = QUDA_TWIST_MINUS;
00068 
00069   inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
00070   inv_param.dagger = dagger;
00071 
00072   inv_param.cpu_prec = cpu_prec;
00073   inv_param.cuda_prec = cuda_prec;
00074 
00075   gauge_param.ga_pad = 0;
00076   inv_param.sp_pad = 0;
00077   inv_param.cl_pad = 0;
00078 
00079   //gauge_param.ga_pad = 24*24*24;
00080   //inv_param.sp_pad = 24*24*24;
00081   //inv_param.cl_pad = 24*24*24;
00082 
00083   inv_param.dirac_order = QUDA_DIRAC_ORDER;
00084 
00085   if (test_type == 2) {
00086     inv_param.solution_type = QUDA_MAT_SOLUTION;
00087   } else {
00088     inv_param.solution_type = QUDA_MATPC_SOLUTION;
00089   }
00090 
00091   inv_param.dslash_type = QUDA_TWISTED_MASS_DSLASH;
00092 
00093   inv_param.verbosity = QUDA_VERBOSE;
00094 
00095   // construct input fields
00096   for (int dir = 0; dir < 4; dir++) hostGauge[dir] = malloc(V*gaugeSiteSize*gauge_param.cpu_prec);
00097 
00098   ColorSpinorParam csParam;
00099   
00100   csParam.fieldLocation = QUDA_CPU_FIELD_LOCATION;
00101   csParam.nColor = 3;
00102   csParam.nSpin = 4;
00103   csParam.twistFlavor = inv_param.twist_flavor;
00104   csParam.nDim = 4;
00105   for (int d=0; d<4; d++) csParam.x[d] = gauge_param.X[d];
00106   csParam.precision = inv_param.cpu_prec;
00107   csParam.pad = 0;
00108   if (test_type < 2) {
00109     csParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
00110     csParam.x[0] /= 2;
00111   } else {
00112     csParam.siteSubset = QUDA_FULL_SITE_SUBSET;
00113   }    
00114   csParam.siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
00115   csParam.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER;
00116   csParam.gammaBasis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
00117   csParam.create = QUDA_ZERO_FIELD_CREATE;
00118   
00119   spinor = new cpuColorSpinorField(csParam);
00120   spinorOut = new cpuColorSpinorField(csParam);
00121   spinorRef = new cpuColorSpinorField(csParam);
00122 
00123   csParam.siteSubset = QUDA_FULL_SITE_SUBSET;
00124   csParam.x[0] = gauge_param.X[0];
00125   
00126   printfQuda("Randomizing fields... ");
00127 
00128   construct_gauge_field(hostGauge, 1, gauge_param.cpu_prec, &gauge_param);
00129   spinor->Source(QUDA_RANDOM_SOURCE);
00130 
00131   printfQuda("done.\n"); fflush(stdout);
00132   
00133   int dev = 0;
00134   initQuda(dev);
00135 
00136   printfQuda("Sending gauge field to GPU\n");
00137 
00138   loadGaugeQuda(hostGauge, &gauge_param);
00139   gauge = cudaGaugePrecise;
00140 
00141   if (!transfer) {
00142     csParam.fieldLocation = QUDA_CUDA_FIELD_LOCATION;
00143     csParam.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
00144     csParam.pad = inv_param.sp_pad;
00145     csParam.precision = inv_param.cuda_prec;
00146     if (csParam.precision == QUDA_DOUBLE_PRECISION ) {
00147       csParam.fieldOrder = QUDA_FLOAT2_FIELD_ORDER;
00148     } else {
00149       /* Single and half */
00150       csParam.fieldOrder = QUDA_FLOAT4_FIELD_ORDER;
00151     }
00152  
00153     if (test_type < 2) {
00154       csParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
00155       csParam.x[0] /= 2;
00156     }
00157 
00158     printfQuda("Creating cudaSpinor\n");
00159     cudaSpinor = new cudaColorSpinorField(csParam);
00160     printfQuda("Creating cudaSpinorOut\n");
00161     cudaSpinorOut = new cudaColorSpinorField(csParam);
00162 
00163     if (test_type == 2) csParam.x[0] /= 2;
00164 
00165     csParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
00166     tmp1 = new cudaColorSpinorField(csParam);
00167     tmp2 = new cudaColorSpinorField(csParam);
00168 
00169     printfQuda("Sending spinor field to GPU\n");
00170     *cudaSpinor = *spinor;
00171 
00172     std::cout << "Source: CPU = " << norm2(*spinor) << ", CUDA = " << 
00173       norm2(*cudaSpinor) << std::endl;
00174 
00175     bool pc = (test_type != 2);
00176     DiracParam diracParam;
00177     setDiracParam(diracParam, &inv_param, pc);
00178     diracParam.verbose = QUDA_VERBOSE;
00179     diracParam.tmp1 = tmp1;
00180     diracParam.tmp2 = tmp2;
00181     
00182     dirac = Dirac::create(diracParam);
00183   } else {
00184     std::cout << "Source: CPU = " << norm2(*spinor) << std::endl;
00185   }
00186     
00187 }
00188 
00189 void end() {
00190   if (!transfer) {
00191     delete dirac;
00192     delete cudaSpinor;
00193     delete cudaSpinorOut;
00194     delete tmp1;
00195     delete tmp2;
00196   }
00197 
00198   // release memory
00199   delete spinor;
00200   delete spinorOut;
00201   delete spinorRef;
00202 
00203   for (int dir = 0; dir < 4; dir++) free(hostGauge[dir]);
00204   endQuda();
00205 }
00206 
00207 // execute kernel
00208 double dslashCUDA() {
00209 
00210   printfQuda("Executing %d kernel loops...\n", loops);
00211   fflush(stdout);
00212   stopwatchStart();
00213   for (int i = 0; i < loops; i++) {
00214     switch (test_type) {
00215     case 0:
00216       if (transfer) {
00217         dslashQuda(spinorOut->v, spinor->v, &inv_param, parity);
00218       } else {
00219         dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
00220       }
00221       break;
00222     case 1:
00223     case 2:
00224       if (transfer) {
00225         MatQuda(spinorOut->v, spinor->v, &inv_param);
00226       } else {
00227         dirac->M(*cudaSpinorOut, *cudaSpinor);
00228       }
00229       break;
00230     }
00231   }
00232     
00233   // check for errors
00234   cudaError_t stat = cudaGetLastError();
00235   if (stat != cudaSuccess)
00236     printf("with ERROR: %s\n", cudaGetErrorString(stat));
00237 
00238   cudaThreadSynchronize();
00239   double secs = stopwatchReadSeconds();
00240   printf("done.\n\n");
00241 
00242   return secs;
00243 }
00244 
00245 void dslashRef() {
00246 
00247   // compare to dslash reference implementation
00248   printf("Calculating reference implementation...");
00249   fflush(stdout);
00250   switch (test_type) {
00251   case 0:
00252     dslash(spinorRef->v, hostGauge, spinor->v, inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
00253            parity, dagger, inv_param.cpu_prec, gauge_param.cpu_prec);
00254     break;
00255   case 1:    
00256     matpc(spinorRef->v, hostGauge, spinor->v, inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
00257           inv_param.matpc_type, dagger, inv_param.cpu_prec, gauge_param.cpu_prec);
00258     break;
00259   case 2:
00260     mat(spinorRef->v, hostGauge, spinor->v, inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
00261         dagger, inv_param.cpu_prec, gauge_param.cpu_prec);
00262     break;
00263   default:
00264     printf("Test type not defined\n");
00265     exit(-1);
00266   }
00267 
00268   printf("done.\n");
00269     
00270 }
00271 
00272 int main(int argc, char **argv)
00273 {
00274   init();
00275 
00276   float spinorGiB = (float)Vh*spinorSiteSize*sizeof(inv_param.cpu_prec) / (1 << 30);
00277   float sharedKB = 0;//(float)dslashCudaSharedBytes(inv_param.cuda_prec) / (1 << 10);
00278   printf("\nSpinor mem: %.3f GiB\n", spinorGiB);
00279   printf("Gauge mem: %.3f GiB\n", gauge_param.gaugeGiB);
00280   printf("Shared mem: %.3f KB\n", sharedKB);
00281   
00282   int attempts = 1;
00283   dslashRef();
00284   for (int i=0; i<attempts; i++) {
00285     
00286     double secs = dslashCUDA();
00287 
00288     if (!transfer) *spinorOut = *cudaSpinorOut;
00289 
00290     // print timing information
00291     printf("%fms per loop\n", 1000*secs);
00292     
00293     unsigned long long flops = 0;
00294     if (!transfer) flops = dirac->Flops();
00295     int floats = test_type ? 2*(7*24+8*gauge_param.packed_size+24)+24 : 7*24+8*gauge_param.packed_size+24;
00296 
00297     printf("GFLOPS = %f\n", 1.0e-9*flops/secs);
00298     printf("GiB/s = %f\n\n", Vh*floats*sizeof(float)/((secs/loops)*(1<<30)));
00299     
00300     if (!transfer) {
00301       std::cout << "Results: CPU = " << norm2(*spinorRef) << ", CUDA = " << norm2(*cudaSpinorOut) << 
00302         ", CPU-CUDA = " << norm2(*spinorOut) << std::endl;
00303     } else {
00304       std::cout << "Result: CPU = " << norm2(*spinorRef) << ", CPU-CUDA = " << norm2(*spinorOut) << std::endl;
00305     }
00306     
00307     cpuColorSpinorField::Compare(*spinorRef, *spinorOut);
00308   }    
00309   end();
00310 }
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines