QUDA: quda/tests/staggered_invert

QUDA v0.4.0
A library for QCD on GPUs
00001 #include <stdlib.h>
00002 #include <stdio.h>
00003 #include <time.h>
00004 #include <math.h>
00005 
00006 #include <test_util.h>
00007 #include <blas_reference.h>
00008 #include <staggered_dslash_reference.h>
00009 #include <quda.h>
00010 #include <string.h>
00011 #include <face_quda.h>
00012 #include "misc.h"
00013 #include <gauge_field.h>
00014 
00015 #ifdef MULTI_GPU
00016 #include <face_quda.h>
00017 #endif
00018 
00019 #define MAX(a,b) ((a)>(b)?(a):(b))
00020 #define mySpinorSiteSize 6
00021 
00022 extern void usage(char** argv);
00023 void *fatlink[4];
00024 void *longlink[4];  
00025 
00026 #ifdef MULTI_GPU
00027 void** ghost_fatlink, **ghost_longlink;
00028 #endif
00029 
00030 extern int device;
00031 extern bool tune;
00032 
00033 extern QudaReconstructType link_recon;
00034 extern QudaPrecision prec;
00035 QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION;
00036 
00037 extern QudaReconstructType link_recon_sloppy;
00038 extern QudaPrecision  prec_sloppy;
00039 cpuColorSpinorField* in;
00040 cpuColorSpinorField* out;
00041 cpuColorSpinorField* ref;
00042 cpuColorSpinorField* tmp;
00043 
00044 cpuGaugeField *cpuFat = NULL;
00045 cpuGaugeField *cpuLong = NULL;
00046 
00047 static double tol = 1e-6;
00048 
00049 static int testtype = 0;
00050 extern int xdim;
00051 extern int ydim;
00052 extern int zdim;
00053 extern int tdim;
00054 extern bool kernelPackT;
00055 extern int gridsize_from_cmdline[];
00056 
00057 
00058 static void end();
00059 
00060 extern int Z[4];
00061 extern int V;
00062 extern int Vh;
00063 static int Vs_x, Vs_y, Vs_z, Vs_t;
00064 extern int Vsh_x, Vsh_y, Vsh_z, Vsh_t;
00065 static int Vsh[4];
00066 
00067 
00068 template<typename Float>
00069 void constructSpinorField(Float *res) {
00070   for(int i = 0; i < Vh; i++) {
00071     for (int s = 0; s < 1; s++) {
00072       for (int m = 0; m < 3; m++) {
00073         res[i*(1*3*2) + s*(3*2) + m*(2) + 0] = rand() / (Float)RAND_MAX;
00074         res[i*(1*3*2) + s*(3*2) + m*(2) + 1] = rand() / (Float)RAND_MAX;
00075       }
00076     }
00077   }
00078 }
00079 
00080 void
00081 setDimConstants(int *X)
00082 {
00083   V = 1;
00084   for (int d=0; d< 4; d++) {
00085     V *= X[d];
00086     Z[d] = X[d];
00087   }
00088   Vh = V/2;
00089 
00090   Vs_x = X[1]*X[2]*X[3];
00091   Vs_y = X[0]*X[2]*X[3];
00092   Vs_z = X[0]*X[1]*X[3];
00093   Vs_t = X[0]*X[1]*X[2];
00094 
00095 
00096   Vsh_x = Vs_x/2;
00097   Vsh_y = Vs_y/2;
00098   Vsh_z = Vs_z/2;
00099   Vsh_t = Vs_t/2;
00100 
00101   Vsh[0] = Vsh_x;
00102   Vsh[1] = Vsh_y;
00103   Vsh[2] = Vsh_z;
00104   Vsh[3] = Vsh_t;
00105 }
00106 
00107 static void
00108 set_params(QudaGaugeParam* gaugeParam, QudaInvertParam* inv_param,
00109            int X1, int  X2, int X3, int X4,
00110            QudaPrecision cpu_prec, QudaPrecision prec, QudaPrecision prec_sloppy,
00111            QudaReconstructType link_recon, QudaReconstructType link_recon_sloppy,
00112            double mass, double tol, int maxiter, double reliable_delta,
00113            double tadpole_coeff
00114            )
00115 {
00116   gaugeParam->X[0] = X1;
00117   gaugeParam->X[1] = X2;
00118   gaugeParam->X[2] = X3;
00119   gaugeParam->X[3] = X4;
00120 
00121   gaugeParam->cpu_prec = cpu_prec;    
00122   gaugeParam->cuda_prec = prec;
00123   gaugeParam->reconstruct = link_recon;  
00124   gaugeParam->cuda_prec_sloppy = prec_sloppy;
00125   gaugeParam->reconstruct_sloppy = link_recon_sloppy;
00126   gaugeParam->gauge_fix = QUDA_GAUGE_FIXED_NO;
00127   gaugeParam->anisotropy = 1.0;
00128   gaugeParam->tadpole_coeff = tadpole_coeff;
00129   gaugeParam->t_boundary = QUDA_ANTI_PERIODIC_T;
00130   gaugeParam->gauge_order = QUDA_QDP_GAUGE_ORDER;
00131   gaugeParam->ga_pad = X1*X2*X3/2;
00132 
00133   inv_param->verbosity = QUDA_VERBOSE;
00134   inv_param->mass = mass;
00135 
00136   // outer solver parameters
00137   inv_param->inv_type = QUDA_CG_INVERTER;
00138   inv_param->tol = tol;
00139   inv_param->maxiter = 10000;
00140   inv_param->reliable_delta = 1e-1; // ignored by multi-shift solver
00141 
00142   //inv_param->inv_type = QUDA_GCR_INVERTER;
00143   //inv_param->gcrNkrylov = 10;
00144 
00145   // domain decomposition preconditioner parameters
00146   //inv_param->inv_type_precondition = QUDA_MR_INVERTER;
00147   //inv_param->tol_precondition = 1e-1;
00148   //inv_param->maxiter_precondition = 100;
00149   //inv_param->verbosity_precondition = QUDA_SILENT;
00150   //inv_param->prec_precondition = prec_sloppy;
00151 
00152   inv_param->solution_type = QUDA_MATPCDAG_MATPC_SOLUTION;
00153   inv_param->solve_type = QUDA_NORMEQ_PC_SOLVE;
00154   inv_param->matpc_type = QUDA_MATPC_EVEN_EVEN;
00155   inv_param->dagger = QUDA_DAG_NO;
00156   inv_param->mass_normalization = QUDA_MASS_NORMALIZATION;
00157 
00158   inv_param->cpu_prec = cpu_prec;
00159   inv_param->cuda_prec = prec; 
00160   inv_param->cuda_prec_sloppy = prec_sloppy;
00161   inv_param->preserve_source = QUDA_PRESERVE_SOURCE_YES;
00162   inv_param->gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; // this is meaningless, but must be thus set
00163   inv_param->dirac_order = QUDA_DIRAC_ORDER;
00164   inv_param->dslash_type = QUDA_ASQTAD_DSLASH;
00165   inv_param->tune = tune ? QUDA_TUNE_YES : QUDA_TUNE_NO;
00166   inv_param->sp_pad = X1*X2*X3/2;
00167   inv_param->use_init_guess = QUDA_USE_INIT_GUESS_YES;
00168 
00169 }
00170 
00171 int
00172 invert_test(void)
00173 {
00174   QudaGaugeParam gaugeParam = newQudaGaugeParam();
00175   QudaInvertParam inv_param = newQudaInvertParam();
00176 
00177   double mass = 0.1;
00178 
00179   set_params(&gaugeParam, &inv_param,
00180              xdim, ydim, zdim, tdim,
00181              cpu_prec, prec, prec_sloppy,
00182              link_recon, link_recon_sloppy, mass, tol, 500, 1e-3,
00183              0.8);
00184   
00185   // this must be before the FaceBuffer is created (this is because it allocates pinned memory - FIXME)
00186   initQuda(device);
00187 
00188   setDims(gaugeParam.X);
00189   setDimConstants(gaugeParam.X);
00190 
00191   size_t gSize = (gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION) ? sizeof(double) : sizeof(float);
00192   for (int dir = 0; dir < 4; dir++) {
00193     fatlink[dir] = malloc(V*gaugeSiteSize*gSize);
00194     longlink[dir] = malloc(V*gaugeSiteSize*gSize);
00195   }
00196   
00197   construct_fat_long_gauge_field(fatlink, longlink, 1, gaugeParam.cpu_prec, &gaugeParam);
00198     
00199   for (int dir = 0; dir < 4; dir++) {
00200     for(int i = 0;i < V*gaugeSiteSize;i++){
00201       if (gaugeParam.cpu_prec == QUDA_DOUBLE_PRECISION){
00202         ((double*)fatlink[dir])[i] = 0.5 *rand()/RAND_MAX;
00203       }else{
00204         ((float*)fatlink[dir])[i] = 0.5* rand()/RAND_MAX;
00205       }
00206     }
00207   }  
00208  
00209   ColorSpinorParam csParam;
00210   csParam.fieldLocation = QUDA_CPU_FIELD_LOCATION;
00211   csParam.nColor=3;
00212   csParam.nSpin=1;
00213   csParam.nDim=4;
00214   for(int d = 0; d < 4; d++) {
00215     csParam.x[d] = gaugeParam.X[d];
00216   }
00217   csParam.x[0] /= 2;
00218   
00219   csParam.precision = inv_param.cpu_prec;
00220   csParam.pad = 0;
00221   csParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
00222   csParam.siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
00223   csParam.fieldOrder  = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER;
00224   csParam.gammaBasis = inv_param.gamma_basis;
00225   csParam.create = QUDA_ZERO_FIELD_CREATE;  
00226   in = new cpuColorSpinorField(csParam);  
00227   out = new cpuColorSpinorField(csParam);  
00228   ref = new cpuColorSpinorField(csParam);  
00229   tmp = new cpuColorSpinorField(csParam);  
00230   
00231   if (inv_param.cpu_prec == QUDA_SINGLE_PRECISION){
00232     constructSpinorField((float*)in->V());    
00233   }else{
00234     constructSpinorField((double*)in->V());
00235   }
00236 
00237   int tmp_value = MAX(ydim*zdim*tdim/2, xdim*zdim*tdim/2);
00238    tmp_value = MAX(tmp_value, xdim*ydim*tdim/2);
00239    tmp_value = MAX(tmp_value, xdim*ydim*zdim/2);
00240 
00241   int fat_pad = tmp_value;
00242   int link_pad =  3*tmp_value;
00243 
00244 #ifdef MULTI_GPU
00245   gaugeParam.type = QUDA_ASQTAD_FAT_LINKS;
00246   gaugeParam.reconstruct = QUDA_RECONSTRUCT_NO;
00247   GaugeFieldParam cpuFatParam(fatlink, gaugeParam);
00248   cpuFat = new cpuGaugeField(cpuFatParam);
00249   cpuFat->exchangeGhost();
00250   ghost_fatlink = (void**)cpuFat->Ghost();
00251   
00252   gaugeParam.type = QUDA_ASQTAD_LONG_LINKS;
00253   GaugeFieldParam cpuLongParam(longlink, gaugeParam);
00254   cpuLong = new cpuGaugeField(cpuLongParam);
00255   cpuLong->exchangeGhost();
00256   ghost_longlink = (void**)cpuLong->Ghost();
00257 #endif
00258   
00259   if(testtype == 6){    
00260     record_gauge(gaugeParam.X, fatlink, fat_pad,
00261                  longlink, link_pad,
00262                  link_recon, link_recon_sloppy,
00263                  &gaugeParam);        
00264    }else{ 
00265     
00266 #ifdef MULTI_GPU
00267  
00268 
00269     gaugeParam.type = QUDA_ASQTAD_FAT_LINKS;
00270     gaugeParam.ga_pad = fat_pad;
00271     gaugeParam.reconstruct= gaugeParam.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
00272     loadGaugeQuda(fatlink, &gaugeParam);
00273     
00274     gaugeParam.type = QUDA_ASQTAD_LONG_LINKS;
00275     gaugeParam.ga_pad = link_pad;
00276     gaugeParam.reconstruct= link_recon;
00277     gaugeParam.reconstruct_sloppy = link_recon_sloppy;
00278     loadGaugeQuda(longlink, &gaugeParam);
00279 #else
00280     gaugeParam.type = QUDA_ASQTAD_FAT_LINKS;
00281     gaugeParam.reconstruct = gaugeParam.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
00282     loadGaugeQuda(fatlink, &gaugeParam);
00283     
00284     gaugeParam.type = QUDA_ASQTAD_LONG_LINKS;
00285     gaugeParam.reconstruct = link_recon;
00286     gaugeParam.reconstruct_sloppy = link_recon_sloppy;
00287     loadGaugeQuda(longlink, &gaugeParam);
00288 #endif
00289   }
00290 
00291   
00292   double time0 = -((double)clock()); // Start the timer
00293   
00294   double nrm2=0;
00295   double src2=0;
00296   int ret = 0;
00297 
00298   switch(testtype){
00299   case 0: //even
00300     inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
00301     
00302     invertQuda(out->V(), in->V(), &inv_param);
00303     
00304     time0 += clock(); 
00305     time0 /= CLOCKS_PER_SEC;
00306 
00307 #ifdef MULTI_GPU    
00308     matdagmat_mg4dir(ref, fatlink, longlink, ghost_fatlink, ghost_longlink, 
00309                      out, mass, 0, inv_param.cpu_prec, gaugeParam.cpu_prec, tmp, QUDA_EVEN_PARITY);
00310 #else
00311     matdagmat(ref->V(), fatlink, longlink, out->V(), mass, 0, inv_param.cpu_prec, gaugeParam.cpu_prec, tmp->V(), QUDA_EVEN_PARITY);
00312 #endif
00313     
00314     mxpy(in->V(), ref->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
00315     nrm2 = norm_2(ref->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
00316     src2 = norm_2(in->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
00317 
00318     break;
00319 
00320   case 1: //odd
00321         
00322     inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
00323     invertQuda(out->V(), in->V(), &inv_param);  
00324     time0 += clock(); // stop the timer
00325     time0 /= CLOCKS_PER_SEC;
00326     
00327 #ifdef MULTI_GPU
00328     matdagmat_mg4dir(ref, fatlink, longlink, ghost_fatlink, ghost_longlink, 
00329                      out, mass, 0, inv_param.cpu_prec, gaugeParam.cpu_prec, tmp, QUDA_ODD_PARITY);
00330 #else
00331     matdagmat(ref->V(), fatlink, longlink, out->V(), mass, 0, inv_param.cpu_prec, gaugeParam.cpu_prec, tmp->V(), QUDA_ODD_PARITY);      
00332 #endif
00333     mxpy(in->V(), ref->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
00334     nrm2 = norm_2(ref->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
00335     src2 = norm_2(in->V(), Vh*mySpinorSiteSize, inv_param.cpu_prec);
00336         
00337     break;
00338     
00339   case 2: //full spinor
00340 
00341     errorQuda("full spinor not supported\n");
00342     break;
00343     
00344   case 3: //multi mass CG, even
00345   case 4:
00346   case 5:
00347   case 6:
00348 
00349 #define NUM_OFFSETS 7
00350         
00351     double masses[NUM_OFFSETS] ={5.05, 1.23, 2.64, 2.33, 2.70, 2.77, 2.81};
00352     double offsets[NUM_OFFSETS];        
00353     int num_offsets =NUM_OFFSETS;
00354     void* outArray[NUM_OFFSETS];
00355     int len;
00356     
00357     cpuColorSpinorField* spinorOutArray[NUM_OFFSETS];
00358     spinorOutArray[0] = out;    
00359     for(int i=1;i < num_offsets; i++){
00360       spinorOutArray[i] = new cpuColorSpinorField(csParam);       
00361     }
00362     
00363     for(int i=0;i < num_offsets; i++){
00364       outArray[i] = spinorOutArray[i]->V();
00365     }
00366 
00367     for (int i=0; i< num_offsets;i++){
00368       offsets[i] = 4*masses[i]*masses[i];
00369     }
00370     
00371     len=Vh;
00372 
00373     if (testtype == 3 || testtype == 6){
00374       inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;      
00375     } else if (testtype == 4){
00376       inv_param.matpc_type = QUDA_MATPC_ODD_ODD;      
00377     }else { //testtype ==5
00378       errorQuda("test 5 not supported\n");
00379     }
00380     
00381     double residue_sq;
00382     if (testtype == 6){
00383       invertMultiShiftQudaMixed(outArray, in->V(), &inv_param, offsets, num_offsets, &residue_sq);
00384     }else{      
00385       invertMultiShiftQuda(outArray, in->V(), &inv_param, offsets, num_offsets, &residue_sq);   
00386     }
00387     cudaThreadSynchronize();
00388     printfQuda("Final residue squred =%g\n", residue_sq);
00389     time0 += clock(); // stop the timer
00390     time0 /= CLOCKS_PER_SEC;
00391     
00392     printfQuda("done: total time = %g secs, %i iter / %g secs = %g gflops, \n", 
00393                time0, inv_param.iter, inv_param.secs,
00394                inv_param.gflops/inv_param.secs);
00395     
00396     
00397     printfQuda("checking the solution\n");
00398     QudaParity parity = QUDA_INVALID_PARITY;
00399     if (inv_param.solve_type == QUDA_NORMEQ_SOLVE){
00400       //parity = QUDA_EVENODD_PARITY;
00401       errorQuda("full parity not supported\n");
00402     }else if (inv_param.matpc_type == QUDA_MATPC_EVEN_EVEN){
00403       parity = QUDA_EVEN_PARITY;
00404     }else if (inv_param.matpc_type == QUDA_MATPC_ODD_ODD){
00405       parity = QUDA_ODD_PARITY;
00406     }else{
00407       errorQuda("ERROR: invalid spinor parity \n");
00408       exit(1);
00409     }
00410     for(int i=0;i < num_offsets;i++){
00411       printfQuda("%dth solution: mass=%f, ", i, masses[i]);
00412 #ifdef MULTI_GPU
00413       matdagmat_mg4dir(ref, fatlink, longlink, ghost_fatlink, ghost_longlink, 
00414                        spinorOutArray[i], masses[i], 0, inv_param.cpu_prec, 
00415                        gaugeParam.cpu_prec, tmp, parity);
00416 #else
00417       matdagmat(ref->V(), fatlink, longlink, outArray[i], masses[i], 0, inv_param.cpu_prec, gaugeParam.cpu_prec, tmp->V(), parity);
00418 #endif
00419       mxpy(in->V(), ref->V(), len*mySpinorSiteSize, inv_param.cpu_prec);
00420       double nrm2 = norm_2(ref->V(), len*mySpinorSiteSize, inv_param.cpu_prec);
00421       double src2 = norm_2(in->V(), len*mySpinorSiteSize, inv_param.cpu_prec);
00422       
00423       printfQuda("relative residual, requested = %g, actual = %g\n", inv_param.tol, sqrt(nrm2/src2));
00424 
00425       //emperical, if the cpu residue is more than 1 order the target accuracy, the it fails to converge
00426       if (sqrt(nrm2/src2) > 10*inv_param.tol){
00427         ret |=1;
00428       }
00429     }
00430 
00431     if (ret ==1){
00432       errorQuda("Converge failed!\n");
00433     }
00434 
00435     for(int i=1; i < num_offsets;i++){
00436       delete spinorOutArray[i];
00437     }
00438 
00439     
00440   }//switch
00441     
00442 
00443   if (testtype <=2){
00444 
00445     printfQuda("Relative residual, requested = %g, actual = %g\n", inv_param.tol, sqrt(nrm2/src2));
00446         
00447     printfQuda("done: total time = %g secs, %i iter / %g secs = %g gflops, \n", 
00448                time0, inv_param.iter, inv_param.secs,
00449                inv_param.gflops/inv_param.secs);
00450     
00451     //emperical, if the cpu residue is more than 2 order the target accuracy, the it fails to converge
00452     if (sqrt(nrm2/src2) > 100*inv_param.tol){
00453       ret = 1;
00454       errorQuda("Convergence failed!\n");
00455     }
00456   }
00457 
00458   end();
00459   return ret;
00460 }
00461 
00462 
00463 
00464 static void
00465 end(void) 
00466 {
00467   for(int i=0;i < 4;i++){
00468     free(fatlink[i]);
00469     free(longlink[i]);
00470   }
00471 
00472   delete in;
00473   delete out;
00474   delete ref;
00475   delete tmp;
00476   
00477   if (cpuFat) delete cpuFat;
00478   if (cpuLong) delete cpuLong;
00479 
00480   endQuda();
00481 }
00482 
00483 
00484 void
00485 display_test_info()
00486 {
00487   printfQuda("running the following test:\n");
00488     
00489   printfQuda("prec    sloppy_prec    link_recon  sloppy_link_recon test_type  S_dimension T_dimension\n");
00490   printfQuda("%s   %s             %s            %s            %s         %d/%d/%d          %d \n",
00491          get_prec_str(prec),get_prec_str(prec_sloppy),
00492          get_recon_str(link_recon), 
00493              get_recon_str(link_recon_sloppy), get_test_type(testtype), xdim, ydim, zdim, tdim);     
00494 
00495   printfQuda("Grid partition info:     X  Y  Z  T\n"); 
00496   printfQuda("                         %d  %d  %d  %d\n", 
00497              commDimPartitioned(0),
00498              commDimPartitioned(1),
00499              commDimPartitioned(2),
00500              commDimPartitioned(3)); 
00501   
00502   return ;
00503   
00504 }
00505 
00506 void
00507 usage_extra(char** argv )
00508 {
00509   printfQuda("Extra options:\n");
00510   printfQuda("    --tol  <resid_tol>                       # Set residual tolerance\n");
00511   printfQuda("    --test <0/1>                             # Test method\n");
00512   printfQuda("                                                0: Even even spinor CG inverter\n");
00513   printfQuda("                                                1: Odd odd spinor CG inverter\n");
00514   printfQuda("                                                3: Even even spinor multishift CG inverter\n");
00515   printfQuda("                                                4: Odd odd spinor multishift CG inverter\n");
00516   printfQuda("                                                6: Even even spinor mixed precision multishift CG inverter\n");
00517   printfQuda("    --cpu_prec <double/single/half>          # Set CPU precision\n");
00518   
00519   return ;
00520 }
00521 int main(int argc, char** argv)
00522 {
00523 
00524   int i;
00525   for (i =1;i < argc; i++){
00526 
00527     if(process_command_line_option(argc, argv, &i) == 0){
00528       continue;
00529     }   
00530     
00531 
00532     if( strcmp(argv[i], "--tol") == 0){
00533       float tmpf;
00534       if (i+1 >= argc){
00535         usage(argv);
00536       }
00537       sscanf(argv[i+1], "%f", &tmpf);
00538       if (tmpf <= 0){
00539         printf("ERROR: invalid tol(%f)\n", tmpf);
00540         usage(argv);
00541       }
00542       tol = tmpf;
00543       i++;
00544       continue;
00545     }
00546     
00547     
00548     if( strcmp(argv[i], "--test") == 0){
00549       if (i+1 >= argc){
00550         usage(argv);
00551       }     
00552       testtype = atoi(argv[i+1]);
00553       i++;
00554       continue;     
00555     }
00556     
00557     if( strcmp(argv[i], "--cpu_prec") == 0){
00558       if (i+1 >= argc){
00559         usage(argv);
00560       }
00561       cpu_prec= get_prec(argv[i+1]);
00562       i++;
00563       continue;
00564     }
00565    
00566         
00567     printf("ERROR: Invalid option:%s\n", argv[i]);
00568     usage(argv);
00569   }
00570 
00571 
00572   if (prec_sloppy == QUDA_INVALID_PRECISION){
00573     prec_sloppy = prec;
00574   }
00575   if (link_recon_sloppy == QUDA_RECONSTRUCT_INVALID){
00576     link_recon_sloppy = link_recon;
00577   }
00578   
00579 
00580   initCommsQuda(argc, argv, gridsize_from_cmdline, 4);
00581   display_test_info();
00582   
00583   int ret = invert_test();
00584 
00585   endCommsQuda();
00586 
00587   return ret;
00588 }