quda-ref/v1.0.0/multigrid_8cpp_source.html

 #include <multigrid.h>
 #include <qio_field.h>
 #include <string.h>

 #include <eigensolve_quda.h>

 namespace quda
 {

   using namespace blas;

   static bool debug = false;

   MG::MG(MGParam &param, TimeProfile &profile_global) :
     Solver(param, profile),
     param(param),
     transfer(0),
     resetTransfer(false),
     presmoother(nullptr),
     postsmoother(nullptr),
     profile_global(profile_global),
     profile("MG level " + std::to_string(param.level), false),
     coarse(nullptr),
     coarse_solver(nullptr),
     param_coarse(nullptr),
     param_presmooth(nullptr),
     param_postsmooth(nullptr),
     param_coarse_solver(nullptr),
     r(nullptr),
     r_coarse(nullptr),
     x_coarse(nullptr),
     tmp_coarse(nullptr),
     tmp2_coarse(nullptr),
     diracResidual(param.matResidual->Expose()),
     diracSmoother(param.matSmooth->Expose()),
     diracSmootherSloppy(param.matSmoothSloppy->Expose()),
     diracCoarseResidual(nullptr),
     diracCoarseSmoother(nullptr),
     diracCoarseSmootherSloppy(nullptr),
     matCoarseResidual(nullptr),
     matCoarseSmoother(nullptr),
     matCoarseSmootherSloppy(nullptr),
     rng(nullptr)
   {
     sprintf(prefix, "MG level %d (%s): ", param.level, param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
     pushLevel(param.level);

     if (param.level >= QUDA_MAX_MG_LEVEL)
       errorQuda("Level=%d is greater than limit of multigrid recursion depth", param.level);

     if (param.coarse_grid_solution_type == QUDA_MATPC_SOLUTION && param.smoother_solve_type != QUDA_DIRECT_PC_SOLVE)
       errorQuda("Cannot use preconditioned coarse grid solution without preconditioned smoother solve");

     // allocating vectors
     {
       // create residual vectors
       ColorSpinorParam csParam(*(param.B[0]));
       csParam.create = QUDA_NULL_FIELD_CREATE;
       csParam.location = param.location;
       csParam.setPrecision(param.mg_global.invert_param->cuda_prec_sloppy, QUDA_INVALID_PRECISION,
                            csParam.location == QUDA_CUDA_FIELD_LOCATION ? true : false);
       if (csParam.location==QUDA_CUDA_FIELD_LOCATION) {
         csParam.gammaBasis = param.level > 0 ? QUDA_DEGRAND_ROSSI_GAMMA_BASIS: QUDA_UKQCD_GAMMA_BASIS;
       }
       if (param.B[0]->Nspin() == 1) csParam.gammaBasis = param.B[0]->GammaBasis(); // hack for staggered to avoid unnecessary basis checks
       r = ColorSpinorField::Create(csParam);

       // if we're using preconditioning then allocate storage for the preconditioned source vector
       if (param.smoother_solve_type == QUDA_DIRECT_PC_SOLVE) {
         csParam.x[0] /= 2;
         csParam.siteSubset = QUDA_PARITY_SITE_SUBSET;
         b_tilde = ColorSpinorField::Create(csParam);
       }
     }

     rng = new RNG(*param.B[0], 1234);
     rng->Init();

     if (param.level < param.Nlevel-1) {
       if (param.mg_global.compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_YES) {
         if (param.mg_global.generate_all_levels == QUDA_BOOLEAN_TRUE || param.level == 0) {

           // Initializing to random vectors
           for(int i=0; i<(int)param.B.size(); i++) {
             spinorNoise(*r, *rng, QUDA_NOISE_UNIFORM);
             *param.B[i] = *r;
             param.evals.push_back(0.0);
           }
         }
         if (param.mg_global.num_setup_iter[param.level] > 0) {
           if (strcmp(param.mg_global.vec_infile[param.level], "")
               != 0) { // only load if infile is defined and not computing
             loadVectors(param.B);
           } else if (param.mg_global.use_eig_solver[param.level]) {
             generateEigenVectors(); // Run the eigensolver
           } else {
             generateNullVectors(param.B);
           }
         }
       } else if (strcmp(param.mg_global.vec_infile[param.level], "")
                  != 0) { // only load if infile is defined and not computing
         if ( param.mg_global.num_setup_iter[param.level] > 0 ) generateNullVectors(param.B);
       } else if (param.mg_global.vec_load[param.level] == QUDA_BOOLEAN_TRUE) { // only conditional load of null vectors

         loadVectors(param.B);
       } else { // generate free field vectors
         buildFreeVectors(param.B);
       }
     }

     // in case of iterative setup with MG the coarse level may be already built
     if (!transfer) reset();

     popLevel(param.level);
   }

   void MG::reset(bool refresh) {
     pushLevel(param.level);

     if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("%s level %d\n", transfer ? "Resetting" : "Creating", param.level);

     destroySmoother();
     destroyCoarseSolver();

     // reset the Dirac operator pointers since these may have changed
     diracResidual = param.matResidual->Expose();
     diracSmoother = param.matSmooth->Expose();
     diracSmootherSloppy = param.matSmoothSloppy->Expose();

     // Refresh the null-space vectors if we need to
     if (refresh && param.level < param.Nlevel-1) {
       if (param.mg_global.setup_maxiter_refresh[param.level]) generateNullVectors(param.B, refresh);
     }

     // if not on the coarsest level, update next
     if (param.level < param.Nlevel-1) {

       if (transfer) {
         // restoring FULL parity in Transfer changed at the end of this procedure
         transfer->setSiteSubset(QUDA_FULL_SITE_SUBSET, QUDA_INVALID_PARITY);
         if (resetTransfer || refresh) {
           transfer->reset();
           resetTransfer = false;
         }
       } else {
         // create transfer operator
         if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Creating transfer operator\n");
         transfer = new Transfer(param.B, param.Nvec, param.NblockOrtho, param.geoBlockSize, param.spinBlockSize,
                                 param.mg_global.precision_null[param.level], profile);
         for (int i=0; i<QUDA_MAX_MG_LEVEL; i++) param.mg_global.geo_block_size[param.level][i] = param.geoBlockSize[i];

         // create coarse temporary vector
         tmp_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), param.mg_global.location[param.level+1]);

         // create coarse temporary vector
         tmp2_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(),
                                                param.mg_global.location[param.level + 1]);

         // create coarse residual vector
         r_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), param.mg_global.location[param.level+1]);

         // create coarse solution vector
         x_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), param.mg_global.location[param.level+1]);

         B_coarse = new std::vector<ColorSpinorField*>();
         int nVec_coarse = std::max(param.Nvec, param.mg_global.n_vec[param.level + 1]);
         B_coarse->resize(nVec_coarse);

         // only have single precision B vectors on the coarse grid
         QudaPrecision B_coarse_precision = std::max(param.mg_global.precision_null[param.level+1], QUDA_SINGLE_PRECISION);
         for (int i=0; i<nVec_coarse; i++)
           (*B_coarse)[i] = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, B_coarse_precision, param.mg_global.setup_location[param.level+1]);

         // if we're not generating on all levels then we need to propagate the vectors down
         if (param.mg_global.generate_all_levels == QUDA_BOOLEAN_FALSE) {
           if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Restricting null space vectors\n");
           for (int i=0; i<param.Nvec; i++) {
             zero(*(*B_coarse)[i]);
             transfer->R(*(*B_coarse)[i], *(param.B[i]));
           }
         }
         if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Transfer operator done\n");
       }

       createCoarseDirac();
     }

     // delay allocating smoother until after coarse-links have been created
     createSmoother();

     if (param.level < param.Nlevel-1) {
       // creating or resetting the coarse level
       if (coarse) {
         coarse->param.updateInvertParam(*param.mg_global.invert_param);
         coarse->param.delta = 1e-20;
         coarse->param.precision = param.mg_global.invert_param->cuda_prec_precondition;
         coarse->param.matResidual = matCoarseResidual;
         coarse->param.matSmooth = matCoarseSmoother;
         coarse->param.matSmoothSloppy = matCoarseSmootherSloppy;
         coarse->reset(refresh);
       } else {
         // create the next multigrid level
         param_coarse = new MGParam(param, *B_coarse, param.evals, matCoarseResidual, matCoarseSmoother,
                                    matCoarseSmootherSloppy, param.level + 1);
         param_coarse->fine = this;
         param_coarse->delta = 1e-20;
         param_coarse->precision = param.mg_global.invert_param->cuda_prec_precondition;

         coarse = new MG(*param_coarse, profile_global);
       }
       setOutputPrefix(prefix); // restore since we just popped back from coarse grid

       createCoarseSolver();

       if (param.level == param.Nlevel - 2 && param.mg_global.use_eig_solver[param.level + 1]) {
         // if we are deflating the coarsest grid, then run a dummy solve
         // so that the deflation is done during the setup
         spinorNoise(*r_coarse, *coarse->rng, QUDA_NOISE_UNIFORM);
         param_coarse_solver->maxiter = 1; // do a single iteration on the dummy solve
         (*coarse_solver)(*x_coarse, *r_coarse);
         param_coarse_solver->maxiter = param.mg_global.coarse_solver_maxiter[param.level + 1];
       }
     }

     if (param.level == 0) {
       // now we can run the verification if requested
       if (param.mg_global.run_verify) verify();
     }

     if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Setup of level %d done\n", param.level);

     popLevel(param.level);
   }

   void MG::pushLevel(int level) const
   {
     postTrace();
     pushVerbosity(param.mg_global.verbosity[level]);
     pushOutputPrefix(prefix);
   }

   void MG::popLevel(int level) const
   {
     popVerbosity();
     popOutputPrefix();
     postTrace();
   }

   void MG::destroySmoother()
   {
     pushLevel(param.level);

     if (presmoother) {
       delete presmoother;
       presmoother = nullptr;
     }

     if (param_presmooth) {
       delete param_presmooth;
       param_presmooth = nullptr;
     }

     if (postsmoother) {
       delete postsmoother;
       postsmoother = nullptr;
     }

     if (param_postsmooth) {
       delete param_postsmooth;
       param_postsmooth = nullptr;
     }

     popLevel(param.level);
   }

   void MG::createSmoother() {
     pushLevel(param.level);

     // create the smoother for this level
     if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Creating smoother\n");
     destroySmoother();
     param_presmooth = new SolverParam(param);

     param_presmooth->is_preconditioner = false;
     param_presmooth->preserve_source = QUDA_PRESERVE_SOURCE_NO;
     param_presmooth->return_residual = true; // pre-smoother returns the residual vector for subsequent coarsening
     param_presmooth->use_init_guess = QUDA_USE_INIT_GUESS_NO;

     param_presmooth->precision = param.mg_global.invert_param->cuda_prec_sloppy;
     param_presmooth->precision_sloppy = (param.level == 0) ? param.mg_global.invert_param->cuda_prec_precondition : param.mg_global.invert_param->cuda_prec_sloppy;
     param_presmooth->precision_precondition = (param.level == 0) ? param.mg_global.invert_param->cuda_prec_precondition : param.mg_global.invert_param->cuda_prec_sloppy;

     param_presmooth->inv_type = param.smoother;
     param_presmooth->inv_type_precondition = QUDA_INVALID_INVERTER;
     param_presmooth->residual_type = (param_presmooth->inv_type == QUDA_MR_INVERTER) ? QUDA_INVALID_RESIDUAL : QUDA_L2_RELATIVE_RESIDUAL;
     param_presmooth->Nsteps = param.mg_global.smoother_schwarz_cycle[param.level];
     param_presmooth->maxiter = (param.level < param.Nlevel-1) ? param.nu_pre : param.nu_pre + param.nu_post;

     param_presmooth->Nkrylov = param_presmooth->maxiter;
     param_presmooth->pipeline = param_presmooth->maxiter;
     param_presmooth->tol = param.smoother_tol;
     param_presmooth->global_reduction = param.global_reduction;

     param_presmooth->sloppy_converge = true; // this means we don't check the true residual before declaring convergence

     param_presmooth->schwarz_type = param.mg_global.smoother_schwarz_type[param.level];
     // inner solver should recompute the true residual after each cycle if using Schwarz preconditioning
     param_presmooth->compute_true_res = (param_presmooth->schwarz_type != QUDA_INVALID_SCHWARZ) ? true : false;

     presmoother = ( (param.level < param.Nlevel-1 || param_presmooth->schwarz_type != QUDA_INVALID_SCHWARZ) &&
                     param_presmooth->inv_type != QUDA_INVALID_INVERTER && param_presmooth->maxiter > 0) ?
       Solver::create(*param_presmooth, *param.matSmooth, *param.matSmoothSloppy, *param.matSmoothSloppy, profile) : nullptr;

     if (param.level < param.Nlevel-1) { //Create the post smoother
       param_postsmooth = new SolverParam(*param_presmooth);
       param_postsmooth->return_residual = false;  // post smoother does not need to return the residual vector
       param_postsmooth->use_init_guess = QUDA_USE_INIT_GUESS_YES;

       param_postsmooth->maxiter = param.nu_post;
       param_postsmooth->Nkrylov = param_postsmooth->maxiter;
       param_postsmooth->pipeline = param_postsmooth->maxiter;

       // we never need to compute the true residual for a post smoother
       param_postsmooth->compute_true_res = false;

       postsmoother = (param_postsmooth->inv_type != QUDA_INVALID_INVERTER && param_postsmooth->maxiter > 0) ?
   Solver::create(*param_postsmooth, *param.matSmooth, *param.matSmoothSloppy, *param.matSmoothSloppy, profile) : nullptr;
     }
     if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Smoother done\n");

     popLevel(param.level);
   }

   void MG::createCoarseDirac() {
     pushLevel(param.level);

     if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Creating coarse Dirac operator\n");
     // check if we are coarsening the preconditioned system then
     bool preconditioned_coarsen = (param.coarse_grid_solution_type == QUDA_MATPC_SOLUTION && param.smoother_solve_type == QUDA_DIRECT_PC_SOLVE);
     QudaMatPCType matpc_type = param.mg_global.invert_param->matpc_type;

     // create coarse grid operator
     DiracParam diracParam;
     diracParam.transfer = transfer;

     diracParam.dirac = preconditioned_coarsen ? const_cast<Dirac*>(diracSmoother) : const_cast<Dirac*>(diracResidual);
     diracParam.kappa = diracParam.dirac->Kappa();
     diracParam.mu = diracParam.dirac->Mu();
     diracParam.mu_factor = param.mg_global.mu_factor[param.level+1]-param.mg_global.mu_factor[param.level];

     // Need to figure out if we need to force bi-directional build. If any previous level (incl this one) was
     // preconditioned, we have to force bi-directional builds.
     diracParam.need_bidirectional = QUDA_BOOLEAN_FALSE;
     for (int i = 0; i <= param.level; i++) {
       if (param.mg_global.coarse_grid_solution_type[i] == QUDA_MATPC_SOLUTION
           && param.mg_global.smoother_solve_type[i] == QUDA_DIRECT_PC_SOLVE) {
         diracParam.need_bidirectional = QUDA_BOOLEAN_TRUE;
       }
     }

     diracParam.dagger = QUDA_DAG_NO;
     diracParam.matpcType = matpc_type;
     diracParam.type = QUDA_COARSE_DIRAC;
     diracParam.tmp1 = tmp_coarse;
     diracParam.tmp2 = tmp2_coarse;
     diracParam.halo_precision = param.mg_global.precision_null[param.level];
     constexpr int MAX_BLOCK_FLOAT_NC=32; // FIXME this is the maximum number of colors for which we support block-float format
     if (param.Nvec > MAX_BLOCK_FLOAT_NC) diracParam.halo_precision = QUDA_SINGLE_PRECISION;

     // use even-odd preconditioning for the coarse grid solver
     if (diracCoarseResidual) delete diracCoarseResidual;
     diracCoarseResidual = new DiracCoarse(diracParam, param.setup_location == QUDA_CUDA_FIELD_LOCATION ? true : false,
                                           param.mg_global.setup_minimize_memory == QUDA_BOOLEAN_TRUE ? true : false);

     // create smoothing operators
     diracParam.dirac = const_cast<Dirac*>(param.matSmooth->Expose());
     diracParam.halo_precision = param.mg_global.smoother_halo_precision[param.level+1];

     if (diracCoarseSmoother) delete diracCoarseSmoother;
     if (diracCoarseSmootherSloppy) delete diracCoarseSmootherSloppy;
     if (param.mg_global.smoother_solve_type[param.level+1] == QUDA_DIRECT_PC_SOLVE) {
       diracParam.type = QUDA_COARSEPC_DIRAC;
       diracParam.tmp1 = &(tmp_coarse->Even());
       diracParam.tmp2 = &(tmp2_coarse->Even());
       diracCoarseSmoother = new DiracCoarsePC(static_cast<DiracCoarse&>(*diracCoarseResidual), diracParam);
       {
         bool schwarz = param.mg_global.smoother_schwarz_type[param.level+1] != QUDA_INVALID_SCHWARZ;
         for (int i=0; i<4; i++) diracParam.commDim[i] = schwarz ? 0 : 1;
       }
       diracCoarseSmootherSloppy = new DiracCoarsePC(static_cast<DiracCoarse&>(*diracCoarseSmoother),diracParam);
     } else {
       diracParam.type = QUDA_COARSE_DIRAC;
       diracParam.tmp1 = tmp_coarse;
       diracParam.tmp2 = tmp2_coarse;
       diracCoarseSmoother = new DiracCoarse(static_cast<DiracCoarse&>(*diracCoarseResidual), diracParam);
       {
         bool schwarz = param.mg_global.smoother_schwarz_type[param.level+1] != QUDA_INVALID_SCHWARZ;
         for (int i=0; i<4; i++) diracParam.commDim[i] = schwarz ? 0 : 1;
       }
       diracCoarseSmootherSloppy = new DiracCoarse(static_cast<DiracCoarse&>(*diracCoarseSmoother),diracParam);
     }

     if (matCoarseResidual) delete matCoarseResidual;
     if (matCoarseSmoother) delete matCoarseSmoother;
     if (matCoarseSmootherSloppy) delete matCoarseSmootherSloppy;
     matCoarseResidual = new DiracM(*diracCoarseResidual);
     matCoarseSmoother = new DiracM(*diracCoarseSmoother);
     matCoarseSmootherSloppy = new DiracM(*diracCoarseSmootherSloppy);

     if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Coarse Dirac operator done\n");

     popLevel(param.level);
   }

   void MG::destroyCoarseSolver() {
     pushLevel(param.level);

     if (param.cycle_type == QUDA_MG_CYCLE_VCYCLE && param.level < param.Nlevel-2) {
       // nothing to do
     } else if (param.cycle_type == QUDA_MG_CYCLE_RECURSIVE || param.level == param.Nlevel-2) {
       if (coarse_solver) {
         delete coarse_solver;
         coarse_solver = nullptr;
       }
       if (param_coarse_solver) {
         delete param_coarse_solver;
         param_coarse_solver = nullptr;
       }
     } else {
       errorQuda("Multigrid cycle type %d not supported", param.cycle_type);
     }

     popLevel(param.level);
   }

   void MG::createCoarseSolver() {
     pushLevel(param.level);

     if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Creating coarse solver wrapper\n");
     destroyCoarseSolver();
     if (param.cycle_type == QUDA_MG_CYCLE_VCYCLE && param.level < param.Nlevel-2) {
       // if coarse solver is not a bottom solver and on the second to bottom level then we can just use the coarse solver as is
       coarse_solver = coarse;
       if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Assigned coarse solver to coarse MG operator\n");
     } else if (param.cycle_type == QUDA_MG_CYCLE_RECURSIVE || param.level == param.Nlevel-2) {

       param_coarse_solver = new SolverParam(param);
       param_coarse_solver->inv_type = param.mg_global.coarse_solver[param.level + 1];
       param_coarse_solver->is_preconditioner = false;
       param_coarse_solver->sloppy_converge = true; // this means we don't check the true residual before declaring convergence

       param_coarse_solver->preserve_source = QUDA_PRESERVE_SOURCE_NO;  // or can this be no
       param_coarse_solver->return_residual = false; // coarse solver does need to return residual vector

       param_coarse_solver->use_init_guess = QUDA_USE_INIT_GUESS_NO;
       // Coarse level deflation is triggered if the eig param structure exists
       // on the coarsest level, and we are on the next to coarsest level.
       if (param.mg_global.use_eig_solver[param.Nlevel - 1] && (param.level == param.Nlevel - 2)) {
         param_coarse_solver->eig_param = *param.mg_global.eig_param[param.Nlevel - 1];
         param_coarse_solver->deflate = QUDA_BOOLEAN_TRUE;
         // Due to coherence between these levels, an initial guess
         // might be beneficial.
         if (param.mg_global.coarse_guess == QUDA_BOOLEAN_TRUE) {
           param_coarse_solver->use_init_guess = QUDA_USE_INIT_GUESS_YES;
         }

         // Deflation on the coarse is supported for 6 solvers only
         if (param_coarse_solver->inv_type != QUDA_CA_CGNR_INVERTER && param_coarse_solver->inv_type != QUDA_CGNR_INVERTER
             && param_coarse_solver->inv_type != QUDA_CA_CGNE_INVERTER
             && param_coarse_solver->inv_type != QUDA_CGNE_INVERTER && param_coarse_solver->inv_type != QUDA_CA_GCR_INVERTER
             && param_coarse_solver->inv_type != QUDA_GCR_INVERTER) {
           errorQuda("Coarse grid deflation not supported with coarse solver %d", param_coarse_solver->inv_type);
         }

         if (strcmp(param_coarse_solver->eig_param.vec_infile, "") == 0 && // check that input file not already set
             param.mg_global.vec_load[param.level + 1] == QUDA_BOOLEAN_TRUE
             && (strcmp(param.mg_global.vec_infile[param.level + 1], "") != 0)) {
           std::string vec_infile(param.mg_global.vec_infile[param.level + 1]);
           vec_infile += "_level_";
           vec_infile += std::to_string(param.level + 1);
           vec_infile += "_defl_";
           vec_infile += std::to_string(param.mg_global.n_vec[param.level + 1]);
           strcpy(param_coarse_solver->eig_param.vec_infile, vec_infile.c_str());
         }

         if (strcmp(param_coarse_solver->eig_param.vec_outfile, "") == 0 && // check that output file not already set
             param.mg_global.vec_store[param.level + 1] == QUDA_BOOLEAN_TRUE
             && (strcmp(param.mg_global.vec_outfile[param.level + 1], "") != 0)) {
           std::string vec_outfile(param.mg_global.vec_outfile[param.level + 1]);
           vec_outfile += "_level_";
           vec_outfile += std::to_string(param.level + 1);
           vec_outfile += "_defl_";
           vec_outfile += std::to_string(param.mg_global.n_vec[param.level + 1]);
           strcpy(param_coarse_solver->eig_param.vec_outfile, vec_outfile.c_str());
         }
       }

       param_coarse_solver->tol = param.mg_global.coarse_solver_tol[param.level+1];
       param_coarse_solver->global_reduction = true;
       param_coarse_solver->compute_true_res = false;
       param_coarse_solver->delta = 1e-8;
       param_coarse_solver->pipeline = 8;

       param_coarse_solver->maxiter = param.mg_global.coarse_solver_maxiter[param.level+1];
       param_coarse_solver->Nkrylov = param_coarse_solver->maxiter < 20 ? param_coarse_solver->maxiter : 20;
       if (param_coarse_solver->inv_type == QUDA_CA_CG_INVERTER ||
           param_coarse_solver->inv_type == QUDA_CA_CGNE_INVERTER ||
           param_coarse_solver->inv_type == QUDA_CA_CGNR_INVERTER ||
           param_coarse_solver->inv_type == QUDA_CA_GCR_INVERTER) {
         param_coarse_solver->ca_basis = param.mg_global.coarse_solver_ca_basis[param.level+1];
         param_coarse_solver->ca_lambda_min = param.mg_global.coarse_solver_ca_lambda_min[param.level+1];
         param_coarse_solver->ca_lambda_max = param.mg_global.coarse_solver_ca_lambda_max[param.level+1];
         param_coarse_solver->Nkrylov = param.mg_global.coarse_solver_ca_basis_size[param.level+1];
       }
       param_coarse_solver->inv_type_precondition = (param.level<param.Nlevel-2 || coarse->presmoother) ? QUDA_MG_INVERTER : QUDA_INVALID_INVERTER;
       param_coarse_solver->preconditioner = (param.level<param.Nlevel-2 || coarse->presmoother) ? coarse : nullptr;
       param_coarse_solver->mg_instance = true;
       param_coarse_solver->verbosity_precondition = param.mg_global.verbosity[param.level+1];

       // preconditioned solver wrapper is uniform precision
       param_coarse_solver->precision = r_coarse->Precision();
       param_coarse_solver->precision_sloppy = param_coarse_solver->precision;
       param_coarse_solver->precision_precondition = param_coarse_solver->precision_sloppy;

       if (param.mg_global.coarse_grid_solution_type[param.level+1] == QUDA_MATPC_SOLUTION) {
   Solver *solver = Solver::create(*param_coarse_solver, *matCoarseSmoother, *matCoarseSmoother, *matCoarseSmoother, profile);
         sprintf(coarse_prefix, "MG level %d (%s): ", param.level + 1,
                 param.mg_global.location[param.level + 1] == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
         coarse_solver = new PreconditionedSolver(*solver, *matCoarseSmoother->Expose(), *param_coarse_solver, profile, coarse_prefix);
       } else {
   Solver *solver = Solver::create(*param_coarse_solver, *matCoarseResidual, *matCoarseResidual, *matCoarseResidual, profile);
         sprintf(coarse_prefix, "MG level %d (%s): ", param.level + 1,
                 param.mg_global.location[param.level + 1] == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
         coarse_solver = new PreconditionedSolver(*solver, *matCoarseResidual->Expose(), *param_coarse_solver, profile, coarse_prefix);
       }

       if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Assigned coarse solver to preconditioned GCR solver\n");
     } else {
       errorQuda("Multigrid cycle type %d not supported", param.cycle_type);
     }
     if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Coarse solver wrapper done\n");

     popLevel(param.level);
   }

   MG::~MG()
   {
     pushLevel(param.level);

     if (param.level < param.Nlevel - 1) {
       if (coarse) delete coarse;
       if (param.level == param.Nlevel-1 || param.cycle_type == QUDA_MG_CYCLE_RECURSIVE) {
   if (coarse_solver) delete coarse_solver;
   if (param_coarse_solver) delete param_coarse_solver;
       }

       if (B_coarse) {
         int nVec_coarse = std::max(param.Nvec, param.mg_global.n_vec[param.level + 1]);
         for (int i = 0; i < nVec_coarse; i++)
           if ((*B_coarse)[i]) delete (*B_coarse)[i];
         delete B_coarse;
       }
       if (transfer) delete transfer;
       if (matCoarseSmootherSloppy) delete matCoarseSmootherSloppy;
       if (diracCoarseSmootherSloppy) delete diracCoarseSmootherSloppy;
       if (matCoarseSmoother) delete matCoarseSmoother;
       if (diracCoarseSmoother) delete diracCoarseSmoother;
       if (matCoarseResidual) delete matCoarseResidual;
       if (diracCoarseResidual) delete diracCoarseResidual;
       if (postsmoother) delete postsmoother;
       if (param_postsmooth) delete param_postsmooth;
     }

     if (rng) {
       rng->Release();
       delete rng;
     }

     if (presmoother) delete presmoother;
     if (param_presmooth) delete param_presmooth;

     if (b_tilde && param.smoother_solve_type == QUDA_DIRECT_PC_SOLVE) delete b_tilde;
     if (r) delete r;
     if (r_coarse) delete r_coarse;
     if (x_coarse) delete x_coarse;
     if (tmp_coarse) delete tmp_coarse;
     if (tmp2_coarse) delete tmp2_coarse;

     if (param_coarse) delete param_coarse;

     if (getVerbosity() >= QUDA_VERBOSE) profile.Print();

     popLevel(param.level);
   }

   // FIXME need to make this more robust (implement Solver::flops() for all solvers)
   double MG::flops() const {
     double flops = 0;

     if (param_coarse_solver) {
       flops += param_coarse_solver->gflops * 1e9;
       param_coarse_solver->gflops = 0;
     } else if (param.level < param.Nlevel-1) {
       flops += coarse->flops();
     }

     if (param_presmooth) {
       flops += param_presmooth->gflops * 1e9;
       param_presmooth->gflops = 0;
     }

     if (param_postsmooth) {
       flops += param_postsmooth->gflops * 1e9;
       param_postsmooth->gflops = 0;
     }

     if (transfer) {
       flops += transfer->flops();
     }

     return flops;
   }

   void MG::verify() {
     pushLevel(param.level);

     // temporary fields used for verification
     ColorSpinorParam csParam(*r);
     csParam.create = QUDA_NULL_FIELD_CREATE;
     ColorSpinorField *tmp1 = ColorSpinorField::Create(csParam);
     ColorSpinorField *tmp2 = ColorSpinorField::Create(csParam);
     double deviation;

     QudaPrecision prec = (param.mg_global.precision_null[param.level] < csParam.Precision()) ?
       param.mg_global.precision_null[param.level] :
       csParam.Precision();

     // may want to revisit this---these were relaxed for cases where ghost_precision < precision
     // these were set while hacking in tests of quarter precision ghosts
     double tol = (prec == QUDA_QUARTER_PRECISION || prec == QUDA_HALF_PRECISION) ? 5e-2 : prec == QUDA_SINGLE_PRECISION ? 1e-3 : 1e-8;

     if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Checking 0 = (1 - P P^\\dagger) v_k for %d vectors\n", param.Nvec);

     for (int i=0; i<param.Nvec; i++) {
       // as well as copying to the correct location this also changes basis if necessary
       *tmp1 = *param.B[i];

       transfer->R(*r_coarse, *tmp1);
       transfer->P(*tmp2, *r_coarse);
       deviation = sqrt(xmyNorm(*tmp1, *tmp2) / norm2(*tmp1));

       if (getVerbosity() >= QUDA_VERBOSE)
         printfQuda("Vector %d: norms v_k = %e P^\\dagger v_k = %e P P^\\dagger v_k = %e, L2 relative deviation = %e\n",
                    i, norm2(*tmp1), norm2(*r_coarse), norm2(*tmp2), deviation);
       if (deviation > tol) errorQuda("L2 relative deviation for k=%d failed, %e > %e", i, deviation, tol);
     }

     if (param.mg_global.run_oblique_proj_check) {

       sprintf(prefix, "MG level %d (%s): Null vector Oblique Projections : ", param.level + 1,
               param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
       setOutputPrefix(prefix);

       // Oblique projections
       if (getVerbosity() >= QUDA_SUMMARIZE)
         printfQuda("Checking 1 > || (1 - DP(P^dagDP)P^dag) v_k || / || v_k || for %d vectors\n", param.Nvec);

       for (int i = 0; i < param.Nvec; i++) {
         transfer->R(*r_coarse, *(param.B[i]));
         (*coarse_solver)(*x_coarse, *r_coarse); // this needs to be an exact solve to pass
         setOutputPrefix(prefix);                // restore prefix after return from coarse grid
         transfer->P(*tmp2, *x_coarse);
         (*param.matResidual)(*tmp1, *tmp2);
         *tmp2 = *(param.B[i]);
         if (getVerbosity() >= QUDA_SUMMARIZE) {
           printfQuda("Vector %d: norms %e %e\n", i, norm2(*param.B[i]), norm2(*tmp1));
           printfQuda("relative residual = %e\n", sqrt(xmyNorm(*tmp2, *tmp1) / norm2(*param.B[i])));
         }
       }
       sprintf(prefix, "MG level %d (%s): ", param.level + 1, param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
       setOutputPrefix(prefix);
     }
 #if 0

     if (getVerbosity() >= QUDA_SUMMARIZE)
       printfQuda("Checking 1 > || (1 - D P (P^\\dagger D P) P^\\dagger v_k || / || v_k || for %d vectors\n",
      param.Nvec);

     for (int i=0; i<param.Nvec; i++) {
       transfer->R(*r_coarse, *(param.B[i]));
       (*coarse)(*x_coarse, *r_coarse); // this needs to be an exact solve to pass
       setOutputPrefix(prefix); // restore output prefix
       transfer->P(*tmp2, *x_coarse);
       param.matResidual(*tmp1,*tmp2);
       *tmp2 = *(param.B[i]);
       if (getVerbosity() >= QUDA_VERBOSE) {
   printfQuda("Vector %d: norms %e %e ", i, norm2(*param.B[i]), norm2(*tmp1));
   printfQuda("relative residual = %e\n", sqrt(xmyNorm(*tmp2, *tmp1) / norm2(*param.B[i])) );
       }
     }
 #endif

     if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Checking 0 = (1 - P^\\dagger P) eta_c\n");

     spinorNoise(*x_coarse, *coarse->rng, QUDA_NOISE_UNIFORM);

     transfer->P(*tmp2, *x_coarse);
     transfer->R(*r_coarse, *tmp2);
     if (getVerbosity() >= QUDA_VERBOSE)
       printfQuda("L2 norms %e %e (fine tmp %e) ", norm2(*x_coarse), norm2(*r_coarse), norm2(*tmp2));

     deviation = sqrt( xmyNorm(*x_coarse, *r_coarse) / norm2(*x_coarse) );
     if (getVerbosity() >= QUDA_VERBOSE) printfQuda("relative deviation = %e\n", deviation);
     if (deviation > tol ) errorQuda("L2 relative deviation = %e > %e failed", deviation, tol);
     if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Checking 0 = (D_c - P^\\dagger D P) (native coarse operator to emulated operator)\n");

     ColorSpinorField *tmp_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), param.mg_global.location[param.level+1]);
     zero(*tmp_coarse);
     zero(*r_coarse);

     spinorNoise(*tmp_coarse, *coarse->rng, QUDA_NOISE_UNIFORM);
     transfer->P(*tmp1, *tmp_coarse);

     if (param.coarse_grid_solution_type == QUDA_MATPC_SOLUTION && param.smoother_solve_type == QUDA_DIRECT_PC_SOLVE) {
       double kappa = diracResidual->Kappa();
       if (param.level==0) {
   diracSmoother->DslashXpay(tmp2->Even(), tmp1->Odd(), QUDA_EVEN_PARITY, tmp1->Even(), -kappa);
   diracSmoother->DslashXpay(tmp2->Odd(), tmp1->Even(), QUDA_ODD_PARITY, tmp1->Odd(), -kappa);
       } else { // this is a hack since the coarse Dslash doesn't properly use the same xpay conventions yet
   diracSmoother->DslashXpay(tmp2->Even(), tmp1->Odd(), QUDA_EVEN_PARITY, tmp1->Even(), 1.0);
   diracSmoother->DslashXpay(tmp2->Odd(), tmp1->Even(), QUDA_ODD_PARITY, tmp1->Odd(), 1.0);
       }
     } else {
       (*param.matResidual)(*tmp2,*tmp1);
     }

     transfer->R(*x_coarse, *tmp2);
     (*param_coarse->matResidual)(*r_coarse, *tmp_coarse);

 #if 0 // enable to print out emulated and actual coarse-grid operator vectors for debugging
     setOutputPrefix("");

     for (int i=0; i<comm_rank(); i++) { // this ensures that we print each rank in order
       if (i==comm_rank()) {
         if (getVerbosity() >= QUDA_VERBOSE) printfQuda("emulated\n");
         for (int x=0; x<x_coarse->Volume(); x++) tmp1->PrintVector(x);

         if (getVerbosity() >= QUDA_VERBOSE) printfQuda("actual\n");
         for (int x=0; x<r_coarse->Volume(); x++) tmp2->PrintVector(x);
       }
       comm_barrier();
     }
     setOutputPrefix(prefix);
 #endif

     double r_nrm = norm2(*r_coarse);
     deviation = sqrt( xmyNorm(*x_coarse, *r_coarse) / norm2(*x_coarse) );

     if (diracResidual->Mu() != 0.0) {
       // When the mu is shifted on the coarse level; we can compute exactly the error we introduce in the check:
       //  it is given by 2*kappa*delta_mu || tmp_coarse ||; where tmp_coarse is the random vector generated for the test
       double delta_factor = param.mg_global.mu_factor[param.level+1] - param.mg_global.mu_factor[param.level];
       if(fabs(delta_factor) > tol ) {
   double delta_a = delta_factor * 2.0 * diracResidual->Kappa() *
     diracResidual->Mu() * transfer->Vectors().TwistFlavor();
   deviation -= fabs(delta_a) * sqrt( norm2(*tmp_coarse) / norm2(*x_coarse) );
   deviation = fabs(deviation);
       }
     }
     if (getVerbosity() >= QUDA_VERBOSE)
       printfQuda("L2 norms: Emulated = %e, Native = %e, relative deviation = %e\n", norm2(*x_coarse), r_nrm, deviation);
     if (deviation > tol) errorQuda("failed, deviation = %e (tol=%e)", deviation, tol);

     // check the preconditioned operator construction on the lower level if applicable
     bool coarse_was_preconditioned = (param.mg_global.coarse_grid_solution_type[param.level + 1] == QUDA_MATPC_SOLUTION
                                       && param.mg_global.smoother_solve_type[param.level + 1] == QUDA_DIRECT_PC_SOLVE);
     if (coarse_was_preconditioned) {
       // check eo
       if (getVerbosity() >= QUDA_SUMMARIZE)
         printfQuda("Checking Deo of preconditioned operator 0 = \\hat{D}_c - A^{-1} D_c\n");
       static_cast<DiracCoarse *>(diracCoarseResidual)->Dslash(r_coarse->Even(), tmp_coarse->Odd(), QUDA_EVEN_PARITY);
       static_cast<DiracCoarse *>(diracCoarseResidual)->CloverInv(x_coarse->Even(), r_coarse->Even(), QUDA_EVEN_PARITY);
       static_cast<DiracCoarsePC *>(diracCoarseSmoother)->Dslash(r_coarse->Even(), tmp_coarse->Odd(), QUDA_EVEN_PARITY);
       r_nrm = norm2(r_coarse->Even());
       deviation = sqrt(xmyNorm(x_coarse->Even(), r_coarse->Even()) / norm2(x_coarse->Even()));
       if (getVerbosity() >= QUDA_VERBOSE)
         printfQuda("L2 norms: Emulated = %e, Native = %e, relative deviation = %e\n", norm2(x_coarse->Even()), r_nrm,
                    deviation);
       if (deviation > tol) errorQuda("failed, deviation = %e (tol=%e)", deviation, tol);

       // check Doe
       if (getVerbosity() >= QUDA_SUMMARIZE)
         printfQuda("Checking Doe of preconditioned operator 0 = \\hat{D}_c - A^{-1} D_c\n");
       static_cast<DiracCoarse *>(diracCoarseResidual)->Dslash(r_coarse->Odd(), tmp_coarse->Even(), QUDA_ODD_PARITY);
       static_cast<DiracCoarse *>(diracCoarseResidual)->CloverInv(x_coarse->Odd(), r_coarse->Odd(), QUDA_ODD_PARITY);
       static_cast<DiracCoarsePC *>(diracCoarseSmoother)->Dslash(r_coarse->Odd(), tmp_coarse->Even(), QUDA_ODD_PARITY);
       r_nrm = norm2(r_coarse->Odd());
       deviation = sqrt(xmyNorm(x_coarse->Odd(), r_coarse->Odd()) / norm2(x_coarse->Odd()));
       if (getVerbosity() >= QUDA_VERBOSE)
         printfQuda("L2 norms: Emulated = %e, Native = %e, relative deviation = %e\n", norm2(x_coarse->Odd()), r_nrm,
                    deviation);
       if (deviation > tol) errorQuda("failed, deviation = %e (tol=%e)", deviation, tol);
     }

     // here we check that the Hermitian conjugate operator is working
     // as expected for both the smoother and residual Dirac operators
     if (param.coarse_grid_solution_type == QUDA_MATPC_SOLUTION && param.smoother_solve_type == QUDA_DIRECT_PC_SOLVE) {
       if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Checking normality of preconditioned operator\n");
       diracSmoother->MdagM(tmp2->Even(), tmp1->Odd());
       Complex dot = cDotProduct(tmp2->Even(),tmp1->Odd());
       double deviation = std::fabs(dot.imag()) / std::fabs(dot.real());
       if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Smoother normal operator test (eta^dag M^dag M eta): real=%e imag=%e, relative imaginary deviation=%e\n",
                  real(dot), imag(dot), deviation);
       if (deviation > tol) errorQuda("failed, deviation = %e (tol=%e)", deviation, tol);
     }

     { // normal operator check for residual operator
       if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Checking normality of residual operator\n");
       diracResidual->MdagM(*tmp2, *tmp1);
       Complex dot = cDotProduct(*tmp1,*tmp2);
       double deviation = std::fabs(dot.imag()) / std::fabs(dot.real());
       if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Normal operator test (eta^dag M^dag M eta): real=%e imag=%e, relative imaginary deviation=%e\n",
                  real(dot), imag(dot), deviation);
       if (deviation > tol) errorQuda("failed, deviation = %e (tol=%e)", deviation, tol);
     }

     if (param.mg_global.run_low_mode_check) {

       sprintf(prefix, "MG level %d (%s): eigenvector overlap : ", param.level + 1,
               param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
       setOutputPrefix(prefix);

       // Reuse the space for the Null vectors. By this point,
       // the coarse grid has already been constructed.
       generateEigenVectors();

       for (int i = 0; i < param.Nvec; i++) {

         // Restrict Evec, place result in r_coarse
         transfer->R(*r_coarse, *param.B[i]);
         // Prolong r_coarse, place result in tmp2
         transfer->P(*tmp2, *r_coarse);

         printfQuda("Vector %d: norms v_k = %e P^dag v_k = %e PP^dag v_k = %e\n", i, norm2(*param.B[i]),
                    norm2(*r_coarse), norm2(*tmp2));

         // Compare v_k and PP^dag v_k.
         deviation = sqrt(xmyNorm(*param.B[i], *tmp2) / norm2(*param.B[i]));
         printfQuda("L2 relative deviation = %e\n", deviation);

         if (param.mg_global.run_oblique_proj_check) {

           sprintf(prefix, "MG level %d (%s): eigenvector Oblique Projections : ", param.level + 1,
                   param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
           setOutputPrefix(prefix);

           // Oblique projections
           if (getVerbosity() >= QUDA_SUMMARIZE)
             printfQuda("Checking 1 > || (1 - DP(P^dagDP)P^dag) v_k || / || v_k || for vector %d\n", i);

           transfer->R(*r_coarse, *param.B[i]);
           (*coarse_solver)(*x_coarse, *r_coarse); // this needs to be an exact solve to pass
           setOutputPrefix(prefix);                // restore prefix after return from coarse grid
           transfer->P(*tmp2, *x_coarse);
           (*param.matResidual)(*tmp1, *tmp2);

           if (getVerbosity() >= QUDA_SUMMARIZE) {
             printfQuda("Vector %d: norms v_k %e DP(P^dagDP)P^dag v_k %e\n", i, norm2(*param.B[i]), norm2(*tmp1));
             printfQuda("L2 relative deviation = %e\n", sqrt(xmyNorm(*param.B[i], *tmp1) / norm2(*param.B[i])));
           }
         }

         sprintf(prefix, "MG level %d (%s): ", param.level + 1,
                 param.location == QUDA_CUDA_FIELD_LOCATION ? "GPU" : "CPU");
         setOutputPrefix(prefix);
       }
     }

     delete tmp1;
     delete tmp2;
     delete tmp_coarse;

     if (param.level < param.Nlevel - 2) coarse->verify();

     popLevel(param.level);
   }

   void MG::operator()(ColorSpinorField &x, ColorSpinorField &b) {
     pushOutputPrefix(prefix);

     if (param.level < param.Nlevel - 1) { // set parity for the solver in the transfer operator
       QudaSiteSubset site_subset
         = param.coarse_grid_solution_type == QUDA_MATPC_SOLUTION ? QUDA_PARITY_SITE_SUBSET : QUDA_FULL_SITE_SUBSET;
       QudaMatPCType matpc_type = param.mg_global.invert_param->matpc_type;
       QudaParity parity = (matpc_type == QUDA_MATPC_EVEN_EVEN || matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) ?
         QUDA_EVEN_PARITY :
         QUDA_ODD_PARITY;
       transfer->setSiteSubset(site_subset, parity); // use this to force location of transfer
     }

     // if input vector is single parity then we must be solving the
     // preconditioned system in general this can only happen on the
     // top level
     QudaSolutionType outer_solution_type = b.SiteSubset() == QUDA_FULL_SITE_SUBSET ? QUDA_MAT_SOLUTION : QUDA_MATPC_SOLUTION;
     QudaSolutionType inner_solution_type = param.coarse_grid_solution_type;

     if (debug) printfQuda("outer_solution_type = %d, inner_solution_type = %d\n", outer_solution_type, inner_solution_type);

     if ( outer_solution_type == QUDA_MATPC_SOLUTION && inner_solution_type == QUDA_MAT_SOLUTION)
       errorQuda("Unsupported solution type combination");

     if ( inner_solution_type == QUDA_MATPC_SOLUTION && param.smoother_solve_type != QUDA_DIRECT_PC_SOLVE)
       errorQuda("For this coarse grid solution type, a preconditioned smoother is required");

     if ( debug ) printfQuda("entering V-cycle with x2=%e, r2=%e\n", norm2(x), norm2(b));

     if (param.level < param.Nlevel-1) {
       //transfer->setTransferGPU(false); // use this to force location of transfer (need to check if still works for multi-level)

       // do the pre smoothing
       if ( debug ) printfQuda("pre-smoothing b2=%e\n", norm2(b));

       ColorSpinorField *out=nullptr, *in=nullptr;

       ColorSpinorField &residual = b.SiteSubset() == QUDA_FULL_SITE_SUBSET ? *r : r->Even();

       // FIXME only need to make a copy if not preconditioning
       residual = b; // copy source vector since we will overwrite source with iterated residual

       diracSmoother->prepare(in, out, x, residual, outer_solution_type);

       // b_tilde holds either a copy of preconditioned source or a pointer to original source
       if (param.smoother_solve_type == QUDA_DIRECT_PC_SOLVE) *b_tilde = *in;
       else b_tilde = &b;
       if (presmoother) (*presmoother)(*out, *in); else zero(*out);
       ColorSpinorField &solution = inner_solution_type == outer_solution_type ? x : x.Even();
       diracSmoother->reconstruct(solution, b, inner_solution_type);

       // if using preconditioned smoother then need to reconstruct full residual
       // FIXME extend this check for precision, Schwarz, etc.
       bool use_solver_residual =
   ( (param.smoother_solve_type == QUDA_DIRECT_PC_SOLVE && inner_solution_type == QUDA_MATPC_SOLUTION) ||
     (param.smoother_solve_type == QUDA_DIRECT_SOLVE && inner_solution_type == QUDA_MAT_SOLUTION) )
   ? true : false;
       // FIXME this is currently borked if inner solver is preconditioned
       double r2 = 0.0;
       if (use_solver_residual) {
   if (debug) r2 = norm2(*r);
       } else {
   (*param.matResidual)(*r, x);
   if (debug) r2 = xmyNorm(b, *r);
   else axpby(1.0, b, -1.0, *r);
       }

       // We need this to ensure that the coarse level has been created.
       // e.g. in case of iterative setup with MG we use just pre- and post-smoothing at the first iteration.
       if (transfer) {
         // restrict to the coarse grid
         transfer->R(*r_coarse, residual);
         if ( debug ) printfQuda("after pre-smoothing x2 = %e, r2 = %e, r_coarse2 = %e\n", norm2(x), r2, norm2(*r_coarse));

         // recurse to the next lower level
         (*coarse_solver)(*x_coarse, *r_coarse);
         if (debug) printfQuda("after coarse solve x_coarse2 = %e r_coarse2 = %e\n", norm2(*x_coarse), norm2(*r_coarse));

         // prolongate back to this grid
         ColorSpinorField &x_coarse_2_fine = inner_solution_type == QUDA_MAT_SOLUTION ? *r : r->Even(); // define according to inner solution type
         transfer->P(x_coarse_2_fine, *x_coarse); // repurpose residual storage
         xpy(x_coarse_2_fine, solution); // sum to solution FIXME - sum should be done inside the transfer operator
         if ( debug ) {
           printfQuda("Prolongated coarse solution y2 = %e\n", norm2(*r));
           printfQuda("after coarse-grid correction x2 = %e, r2 = %e\n", norm2(x), norm2(*r));
         }
       }

       // do the post smoothing
       //residual = outer_solution_type == QUDA_MAT_SOLUTION ? *r : r->Even(); // refine for outer solution type
       if (param.smoother_solve_type == QUDA_DIRECT_PC_SOLVE) {
   in = b_tilde;
       } else { // this incurs unecessary copying
   *r = b;
   in = r;
       }

       // we should keep a copy of the prepared right hand side as we've already destroyed it
       //dirac.prepare(in, out, solution, residual, inner_solution_type);

       if (postsmoother) (*postsmoother)(*out, *in); // for inner solve preconditioned, in the should be the original prepared rhs

       diracSmoother->reconstruct(x, b, outer_solution_type);

     } else { // do the coarse grid solve

       ColorSpinorField *out=nullptr, *in=nullptr;
       diracSmoother->prepare(in, out, x, b, outer_solution_type);
       if (presmoother) (*presmoother)(*out, *in);
       diracSmoother->reconstruct(x, b, outer_solution_type);
     }

     if ( debug ) {
       (*param.matResidual)(*r, x);
       double r2 = xmyNorm(b, *r);
       printfQuda("leaving V-cycle with x2=%e, r2=%e\n", norm2(x), r2);
     }

     popOutputPrefix();
   }

   // supports separate reading or single file read
   void MG::loadVectors(std::vector<ColorSpinorField *> &B)
   {
     profile_global.TPSTOP(QUDA_PROFILE_INIT);
     profile_global.TPSTART(QUDA_PROFILE_IO);
     pushLevel(param.level);
     std::string vec_infile(param.mg_global.vec_infile[param.level]);
     vec_infile += "_level_";
     vec_infile += std::to_string(param.level);
     vec_infile += "_nvec_";
     vec_infile += std::to_string(param.mg_global.n_vec[param.level]);
     EigenSolver::loadVectors(B, vec_infile);
     popLevel(param.level);
     profile_global.TPSTOP(QUDA_PROFILE_IO);
     profile_global.TPSTART(QUDA_PROFILE_INIT);
   }

   void MG::saveVectors(const std::vector<ColorSpinorField *> &B) const
   {
     profile_global.TPSTOP(QUDA_PROFILE_INIT);
     profile_global.TPSTART(QUDA_PROFILE_IO);
     pushLevel(param.level);
     std::string vec_outfile(param.mg_global.vec_outfile[param.level]);
     vec_outfile += "_level_";
     vec_outfile += std::to_string(param.level);
     vec_outfile += "_nvec_";
     vec_outfile += std::to_string(param.mg_global.n_vec[param.level]);
     EigenSolver::saveVectors(B, vec_outfile);
     popLevel(param.level);
     profile_global.TPSTOP(QUDA_PROFILE_IO);
     profile_global.TPSTART(QUDA_PROFILE_INIT);
   }

   void MG::dumpNullVectors() const
   {
     saveVectors(param.B);
     if (param.level < param.Nlevel - 2) coarse->dumpNullVectors();
   }

   void MG::generateNullVectors(std::vector<ColorSpinorField *> &B, bool refresh)
   {
     pushLevel(param.level);

     SolverParam solverParam(param); // Set solver field parameters:
     // set null-space generation options - need to expose these
     solverParam.maxiter
       = refresh ? param.mg_global.setup_maxiter_refresh[param.level] : param.mg_global.setup_maxiter[param.level];
     solverParam.tol = param.mg_global.setup_tol[param.level];
     solverParam.use_init_guess = QUDA_USE_INIT_GUESS_YES;
     solverParam.delta = 1e-1;
     solverParam.inv_type = param.mg_global.setup_inv_type[param.level];
     // Hard coded for now...
     if (solverParam.inv_type == QUDA_CA_CG_INVERTER || solverParam.inv_type == QUDA_CA_CGNE_INVERTER
         || solverParam.inv_type == QUDA_CA_CGNR_INVERTER || solverParam.inv_type == QUDA_CA_GCR_INVERTER) {
       solverParam.ca_basis = param.mg_global.setup_ca_basis[param.level];
       solverParam.ca_lambda_min = param.mg_global.setup_ca_lambda_min[param.level];
       solverParam.ca_lambda_max = param.mg_global.setup_ca_lambda_max[param.level];
       solverParam.Nkrylov = param.mg_global.setup_ca_basis_size[param.level];
     } else {
       solverParam.Nkrylov = 4;
     }
     solverParam.pipeline
       = (solverParam.inv_type == QUDA_BICGSTAB_INVERTER ? 0 : 4); // FIXME: pipeline != 0 breaks BICGSTAB
     solverParam.precision = r->Precision();

     if (param.level == 0) { // this enables half precision on the fine grid only if set
       solverParam.precision_sloppy = param.mg_global.invert_param->cuda_prec_precondition;
       solverParam.precision_precondition = param.mg_global.invert_param->cuda_prec_precondition;
     } else {
       solverParam.precision_precondition = solverParam.precision;
     }
     solverParam.residual_type = static_cast<QudaResidualType>(QUDA_L2_RELATIVE_RESIDUAL);
     solverParam.compute_null_vector = QUDA_COMPUTE_NULL_VECTOR_YES;
     ColorSpinorParam csParam(*B[0]);                            // Create spinor field parameters:
     csParam.setPrecision(r->Precision(), r->Precision(), true); // ensure native ordering
     csParam.location = QUDA_CUDA_FIELD_LOCATION; // hard code to GPU location for null-space generation for now
     csParam.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
     csParam.create = QUDA_ZERO_FIELD_CREATE;
     ColorSpinorField *b = static_cast<ColorSpinorField *>(new cudaColorSpinorField(csParam));
     ColorSpinorField *x = static_cast<ColorSpinorField *>(new cudaColorSpinorField(csParam));

     csParam.create = QUDA_NULL_FIELD_CREATE;

     // if we not using GCR/MG smoother then we need to switch off Schwarz since regular Krylov solvers do not support it
     bool schwarz_reset = solverParam.inv_type != QUDA_MG_INVERTER
       && param.mg_global.smoother_schwarz_type[param.level] != QUDA_INVALID_SCHWARZ;
     if (schwarz_reset) {
       if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Disabling Schwarz for null-space finding");
       int commDim[QUDA_MAX_DIM];
       for (int i = 0; i < QUDA_MAX_DIM; i++) commDim[i] = 1;
       diracSmootherSloppy->setCommDim(commDim);
     }

     // if quarter precision halo, promote for null-space finding to half precision
     QudaPrecision halo_precision = diracSmootherSloppy->HaloPrecision();
     if (halo_precision == QUDA_QUARTER_PRECISION) diracSmootherSloppy->setHaloPrecision(QUDA_HALF_PRECISION);

     Solver *solve;
     DiracMdagM *mdagm = (solverParam.inv_type == QUDA_CG_INVERTER || solverParam.inv_type == QUDA_CA_CG_INVERTER) ? new DiracMdagM(*diracSmoother) : nullptr;
     DiracMdagM *mdagmSloppy = (solverParam.inv_type == QUDA_CG_INVERTER || solverParam.inv_type == QUDA_CA_CG_INVERTER) ? new DiracMdagM(*diracSmootherSloppy) : nullptr;
     if (solverParam.inv_type == QUDA_CG_INVERTER || solverParam.inv_type == QUDA_CA_CG_INVERTER) {
       solve = Solver::create(solverParam, *mdagm, *mdagmSloppy, *mdagmSloppy, profile);
     } else if(solverParam.inv_type == QUDA_MG_INVERTER) {
       // in case MG has not been created, we create the Smoother
       if (!transfer) createSmoother();

       // run GCR with the MG as a preconditioner
       solverParam.inv_type_precondition = QUDA_MG_INVERTER;
       solverParam.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
       solverParam.precondition_cycle = 1;
       solverParam.tol_precondition = 1e-1;
       solverParam.maxiter_precondition = 1;
       solverParam.omega = 1.0;
       solverParam.verbosity_precondition = param.mg_global.verbosity[param.level+1];
       solverParam.precision_sloppy = solverParam.precision;
       solverParam.compute_true_res = 0;
       solverParam.preconditioner = this;

       solverParam.inv_type = QUDA_GCR_INVERTER;
       solve = Solver::create(solverParam, *param.matSmooth, *param.matSmooth, *param.matSmoothSloppy, profile);
       solverParam.inv_type = QUDA_MG_INVERTER;
     } else {
       solve = Solver::create(solverParam, *param.matSmooth, *param.matSmoothSloppy, *param.matSmoothSloppy, profile);
     }

     for (int si = 0; si < param.mg_global.num_setup_iter[param.level]; si++) {
       if (getVerbosity() >= QUDA_VERBOSE)
         printfQuda("Running vectors setup on level %d iter %d of %d\n", param.level, si + 1,
                    param.mg_global.num_setup_iter[param.level]);

       // global orthonormalization of the initial null-space vectors
       if(param.mg_global.pre_orthonormalize) {
         for(int i=0; i<(int)B.size(); i++) {
           for (int j=0; j<i; j++) {
             Complex alpha = cDotProduct(*B[j], *B[i]);// <j,i>
             caxpy(-alpha, *B[j], *B[i]); // i-<j,i>j
           }
           double nrm2 = norm2(*B[i]);
           if (nrm2 > 1e-16) ax(1.0 /sqrt(nrm2), *B[i]);// i/<i,i>
           else errorQuda("\nCannot normalize %u vector\n", i);
         }
       }

       // launch solver for each source
       for (int i=0; i<(int)B.size(); i++) {
         if (param.mg_global.setup_type == QUDA_TEST_VECTOR_SETUP) { // DDalphaAMG test vector idea
           *b = *B[i];  // inverting against the vector
           zero(*x);    // with zero initial guess
         } else {
           *x = *B[i];
           zero(*b);
         }

         if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Initial guess = %g\n", norm2(*x));
         if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Initial rhs = %g\n", norm2(*b));

         ColorSpinorField *out=nullptr, *in=nullptr;
         diracSmoother->prepare(in, out, *x, *b, QUDA_MAT_SOLUTION);
         (*solve)(*out, *in);
         diracSmoother->reconstruct(*x, *b, QUDA_MAT_SOLUTION);

         if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Solution = %g\n", norm2(*x));
         *B[i] = *x;
       }

       // global orthonormalization of the generated null-space vectors
       if (param.mg_global.post_orthonormalize) {
         for(int i=0; i<(int)B.size(); i++) {
           for (int j=0; j<i; j++) {
             Complex alpha = cDotProduct(*B[j], *B[i]);// <j,i>
             caxpy(-alpha, *B[j], *B[i]); // i-<j,i>j
           }
           double nrm2 = norm2(*B[i]);
           if (sqrt(nrm2) > 1e-16) ax(1.0/sqrt(nrm2), *B[i]);// i/<i,i>
           else errorQuda("\nCannot normalize %u vector (nrm=%e)\n", i, sqrt(nrm2));
         }
       }

       if (solverParam.inv_type == QUDA_MG_INVERTER) {

         if (transfer) {
           resetTransfer = true;
           reset();
           if ( param.level < param.Nlevel-2 ) {
             if ( param.mg_global.generate_all_levels == QUDA_BOOLEAN_TRUE ) {
               coarse->generateNullVectors(*B_coarse, refresh);
             } else {
               if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Restricting null space vectors\n");
               for (int i=0; i<param.Nvec; i++) {
                 zero(*(*B_coarse)[i]);
                 transfer->R(*(*B_coarse)[i], *(param.B[i]));
               }
               // rebuild the transfer operator in the coarse level
               coarse->resetTransfer = true;
               coarse->reset();
             }
           }
         } else {
           reset();
         }
       }
     }

     delete solve;
     if (mdagm) delete mdagm;
     if (mdagmSloppy) delete mdagmSloppy;

     diracSmootherSloppy->setHaloPrecision(halo_precision); // restore halo precision

     delete x;
     delete b;

     // reenable Schwarz
     if (schwarz_reset) {
       if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Reenabling Schwarz for null-space finding");
       int commDim[QUDA_MAX_DIM];
       for (int i=0; i<QUDA_MAX_DIM; i++) commDim[i] = 0;
       diracSmootherSloppy->setCommDim(commDim);
     }

     if (param.mg_global.vec_store[param.level] == QUDA_BOOLEAN_TRUE) { // conditional store of null vectors
       saveVectors(B);
     }

     popLevel(param.level);
   }

   // generate a full span of free vectors.
   // FIXME: Assumes fine level is SU(3).
   void MG::buildFreeVectors(std::vector<ColorSpinorField *> &B)
   {
     pushLevel(param.level);
     const int Nvec = B.size();

     // Given the number of colors and spins, figure out if the number
     // of vectors in 'B' makes sense.
     const int Ncolor = B[0]->Ncolor();
     const int Nspin = B[0]->Nspin();

     if (Ncolor == 3) // fine level
     {
       if (Nspin == 4) // Wilson or Twisted Mass (singlet)
       {
         // There needs to be 6 null vectors -> 12 after chirality.
         if (Nvec != 6) errorQuda("\nError in MG::buildFreeVectors: Wilson-type fermions require Nvec = 6");

         if (getVerbosity() >= QUDA_VERBOSE)
           printfQuda("Building %d free field vectors for Wilson-type fermions\n", Nvec);

         // Zero the null vectors.
         for (int i = 0; i < Nvec; i++) zero(*B[i]);

         // Create a temporary vector.
         ColorSpinorParam csParam(*B[0]);
         csParam.create = QUDA_ZERO_FIELD_CREATE;
         ColorSpinorField *tmp = ColorSpinorField::Create(csParam);

         int counter = 0;
         for (int c = 0; c < Ncolor; c++) {
           for (int s = 0; s < 2; s++) {
             tmp->Source(QUDA_CONSTANT_SOURCE, 1, s, c);
             xpy(*tmp, *B[counter]);
             tmp->Source(QUDA_CONSTANT_SOURCE, 1, s + 2, c);
             xpy(*tmp, *B[counter]);
             counter++;
           }
         }

         delete tmp;
       } else if (Nspin == 1) // Staggered
       {
         // There needs to be 24 null vectors -> 48 after chirality.
         if (Nvec != 24) errorQuda("\nError in MG::buildFreeVectors: Staggered-type fermions require Nvec = 24\n");

         if (getVerbosity() >= QUDA_VERBOSE)
           printfQuda("Building %d free field vectors for Staggered-type fermions\n", Nvec);

         // Zero the null vectors.
         for (int i = 0; i < Nvec; i++) zero(*B[i]);

         // Create a temporary vector.
         ColorSpinorParam csParam(*B[0]);
         csParam.create = QUDA_ZERO_FIELD_CREATE;
         ColorSpinorField *tmp = ColorSpinorField::Create(csParam);

         // Build free null vectors.
         for (int c = 0; c < B[0]->Ncolor(); c++) {
           // Need to pair an even+odd corner together
           // since they'll get split up.

           // 0000, 0001
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0x0, c);
           xpy(*tmp, *B[8 * c + 0]);
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0x1, c);
           xpy(*tmp, *B[8 * c + 0]);

           // 0010, 0011
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0x2, c);
           xpy(*tmp, *B[8 * c + 1]);
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0x3, c);
           xpy(*tmp, *B[8 * c + 1]);

           // 0100, 0101
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0x4, c);
           xpy(*tmp, *B[8 * c + 2]);
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0x5, c);
           xpy(*tmp, *B[8 * c + 2]);

           // 0110, 0111
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0x6, c);
           xpy(*tmp, *B[8 * c + 3]);
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0x7, c);
           xpy(*tmp, *B[8 * c + 3]);

           // 1000, 1001
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0x8, c);
           xpy(*tmp, *B[8 * c + 4]);
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0x9, c);
           xpy(*tmp, *B[8 * c + 4]);

           // 1010, 1011
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0xA, c);
           xpy(*tmp, *B[8 * c + 5]);
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0xB, c);
           xpy(*tmp, *B[8 * c + 5]);

           // 1100, 1101
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0xC, c);
           xpy(*tmp, *B[8 * c + 6]);
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0xD, c);
           xpy(*tmp, *B[8 * c + 6]);

           // 1110, 1111
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0xE, c);
           xpy(*tmp, *B[8 * c + 7]);
           tmp->Source(QUDA_CORNER_SOURCE, 1, 0xF, c);
           xpy(*tmp, *B[8 * c + 7]);
         }

         delete tmp;
       } else {
         errorQuda("\nError in MG::buildFreeVectors: Unsupported combo of Nc %d, Nspin %d", Ncolor, Nspin);
       }
     } else // coarse level
     {
       if (Nspin == 2) {
         // There needs to be Ncolor null vectors.
         if (Nvec != Ncolor) errorQuda("\nError in MG::buildFreeVectors: Coarse fermions require Nvec = Ncolor");

         if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Building %d free field vectors for Coarse fermions\n", Ncolor);

         // Zero the null vectors.
         for (int i = 0; i < Nvec; i++) zero(*B[i]);

         // Create a temporary vector.
         ColorSpinorParam csParam(*B[0]);
         csParam.create = QUDA_ZERO_FIELD_CREATE;
         ColorSpinorField *tmp = ColorSpinorField::Create(csParam);

         for (int c = 0; c < Ncolor; c++) {
           tmp->Source(QUDA_CONSTANT_SOURCE, 1, 0, c);
           xpy(*tmp, *B[c]);
           tmp->Source(QUDA_CONSTANT_SOURCE, 1, 1, c);
           xpy(*tmp, *B[c]);
         }

         delete tmp;
       } else if (Nspin == 1) {
         // There needs to be Ncolor null vectors.
         if (Nvec != Ncolor) errorQuda("\nError in MG::buildFreeVectors: Coarse fermions require Nvec = Ncolor");

         if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Building %d free field vectors for Coarse fermions\n", Ncolor);

         // Zero the null vectors.
         for (int i = 0; i < Nvec; i++) zero(*B[i]);

         // Create a temporary vector.
         ColorSpinorParam csParam(*B[0]);
         csParam.create = QUDA_ZERO_FIELD_CREATE;
         ColorSpinorField *tmp = ColorSpinorField::Create(csParam);

         for (int c = 0; c < Ncolor; c++) {
           tmp->Source(QUDA_CONSTANT_SOURCE, 1, 0, c);
           xpy(*tmp, *B[c]);
         }

         delete tmp;
       } else {
         errorQuda("\nError in MG::buildFreeVectors: Unexpected Nspin = %d for coarse fermions", Nspin);
       }
     }

     // global orthonormalization of the generated null-space vectors
     if(param.mg_global.post_orthonormalize) {
       for(int i=0; i<(int)B.size(); i++) {
         double nrm2 = norm2(*B[i]);
         if (nrm2 > 1e-16) ax(1.0 /sqrt(nrm2), *B[i]);// i/<i,i>
         else errorQuda("\nCannot normalize %u vector\n", i);
       }
     }

     if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Done building free vectors\n");

     popLevel(param.level);
   }

   void MG::generateEigenVectors()
   {
     pushLevel(param.level);

     // Extract eigensolver params
     int nConv = param.mg_global.eig_param[param.level]->nConv;
     bool dagger = param.mg_global.eig_param[param.level]->use_dagger;
     bool normop = param.mg_global.eig_param[param.level]->use_norm_op;

     // Dummy array to keep the eigensolver happy.
     std::vector<Complex> evals(nConv, 0.0);

     std::vector<ColorSpinorField *> B_evecs;
     ColorSpinorParam csParam(*param.B[0]);
     csParam.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
     csParam.create = QUDA_ZERO_FIELD_CREATE;
     // This is the vector precision used by matResidual
     csParam.setPrecision(param.mg_global.invert_param->cuda_prec_sloppy, QUDA_INVALID_PRECISION, true);

     for (int i = 0; i < nConv; i++) B_evecs.push_back(ColorSpinorField::Create(csParam));

     // before entering the eigen solver, lets free the B vectors to save some memory
     ColorSpinorParam bParam(*param.B[0]);
     for (int i = 0; i < (int)param.B.size(); i++) delete param.B[i];

     EigenSolver *eig_solve;
     if (!normop && !dagger) {
       DiracM *mat = new DiracM(*diracResidual);
       eig_solve = EigenSolver::create(param.mg_global.eig_param[param.level], *mat, profile);
       (*eig_solve)(B_evecs, evals);
       delete eig_solve;
       delete mat;
     } else if (!normop && dagger) {
       DiracMdag *mat = new DiracMdag(*diracResidual);
       eig_solve = EigenSolver::create(param.mg_global.eig_param[param.level], *mat, profile);
       (*eig_solve)(B_evecs, evals);
       delete eig_solve;
       delete mat;
     } else if (normop && !dagger) {
       DiracMdagM *mat = new DiracMdagM(*diracResidual);
       eig_solve = EigenSolver::create(param.mg_global.eig_param[param.level], *mat, profile);
       (*eig_solve)(B_evecs, evals);
       delete eig_solve;
       delete mat;
     } else if (normop && dagger) {
       DiracMMdag *mat = new DiracMMdag(*diracResidual);
       eig_solve = EigenSolver::create(param.mg_global.eig_param[param.level], *mat, profile);
       (*eig_solve)(B_evecs, evals);
       delete eig_solve;
       delete mat;
     }

     // now reallocate the B vectors copy in e-vectors
     for (int i = 0; i < (int)param.B.size(); i++) {
       param.B[i] = ColorSpinorField::Create(bParam);
       *param.B[i] = *B_evecs[i];
     }

     // Local clean-up
     for (auto b : B_evecs) { delete b; }

     // only save if outfile is defined
     if (strcmp(param.mg_global.vec_outfile[param.level], "") != 0) { saveVectors(param.B); }

     popLevel(param.level);
   }

 } // namespace quda
tmp2
cudaColorSpinorField * tmp2
Definition: dslash_ctest.cpp:40

quda::DiracCoarsePC
Definition: dirac_quda.h:972

quda::SolverParam::ca_basis
QudaCABasis ca_basis
Definition: invert_quda.h:208

quda::SolverParam::mg_instance
bool mg_instance
whether to use a global or local (node) reduction for this solver
Definition: invert_quda.h:248

quda::SolverParam::compute_true_res
bool compute_true_res
Definition: invert_quda.h:124

quda::SolverParam::global_reduction
bool global_reduction
whether the solver acting as a preconditioner for another solver
Definition: invert_quda.h:243

QUDA_VERBOSE
Definition: enum_quda.h:265

comm_rank
int comm_rank(void)
Definition: comm_mpi.cpp:82

quda::SolverParam::return_residual
bool return_residual
Definition: invert_quda.h:158

quda::SolverParam::schwarz_type
QudaSchwarzType schwarz_type
Definition: invert_quda.h:217

quda::RNG::Init
void Init()
Initialize CURAND RNG states.
Definition: random.cu:122

quda::MGParam::matSmoothSloppy
DiracMatrix * matSmoothSloppy
Definition: multigrid.h:84

quda::SolverParam::maxiter_precondition
int maxiter_precondition
Definition: invert_quda.h:202

quda::MGParam::global_reduction
QudaBoolean global_reduction
Definition: multigrid.h:75

postTrace
#define postTrace()
Definition: tune_quda.h:591

QUDA_CGNR_INVERTER
Definition: enum_quda.h:120

quda::MG::dumpNullVectors
void dumpNullVectors() const
Dump the null-space vectors to disk. Will recurse dumping all levels.
Definition: multigrid.cpp:1045

quda::MGParam::mg_global
QudaMultigridParam & mg_global
Definition: multigrid.h:30

QUDA_GCR_INVERTER
Definition: enum_quda.h:104

quda::ColorSpinorParam::setPrecision
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
Definition: color_spinor_field.h:231

quda::SolverParam::verbosity_precondition
QudaVerbosity verbosity_precondition
Definition: invert_quda.h:239

quda::MG::param_coarse_solver
SolverParam * param_coarse_solver
Definition: multigrid.h:220

tmp1
cudaColorSpinorField * tmp1
Definition: dslash_ctest.cpp:40

QudaEigParam_s::use_dagger
QudaBoolean use_dagger
Definition: quda.h:401

QUDA_MAT_SOLUTION
Definition: enum_quda.h:151

quda::Dirac::Kappa
double Kappa() const
Definition: dirac_quda.h:173

quda::MGParam::B
std::vector< ColorSpinorField * > & B
Definition: multigrid.h:57

QudaMultigridParam_s::eig_param
QudaEigParam * eig_param[QUDA_MAX_MG_LEVEL]
Definition: quda.h:480

QudaPrecision
enum QudaPrecision_s QudaPrecision

QUDA_INVALID_PARITY
Definition: enum_quda.h:289

quda::DiracParam::need_bidirectional
bool need_bidirectional
Definition: dirac_quda.h:51

quda::SolverParam::delta
double delta
Definition: invert_quda.h:70

QudaMultigridParam_s::setup_ca_lambda_max
double setup_ca_lambda_max[QUDA_MAX_MG_LEVEL]
Definition: quda.h:528

quda::PreconditionedSolver
Definition: invert_quda.h:1067

quda::ColorSpinorField
Definition: color_spinor_field.h:311

quda::MG::param_coarse
MGParam * param_coarse
Definition: multigrid.h:211

quda::MGParam::location
QudaFieldLocation location
Definition: multigrid.h:97

quda::MG::generateNullVectors
void generateNullVectors(std::vector< ColorSpinorField *> &B, bool refresh=false)
Generate the null-space vectors.
Definition: multigrid.cpp:1051

quda::ColorSpinorField::Source
virtual void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)=0

quda::MG::verify
void verify()
Definition: multigrid.cpp:627

quda::MG::profile_global
TimeProfile & profile_global
Definition: multigrid.h:193

QudaResidualType
enum QudaResidualType_s QudaResidualType

quda::Transfer
Definition: transfer.h:29

quda::SolverParam::ca_lambda_min
double ca_lambda_min
Definition: invert_quda.h:211

quda::SolverParam::inv_type
QudaInverterType inv_type
Definition: invert_quda.h:22

quda::MG::diracResidual
const Dirac * diracResidual
Definition: multigrid.h:244

getVerbosity
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21

kappa
double kappa
Definition: test_util.cpp:1647

QUDA_INVALID_PRECISION
Definition: enum_quda.h:63

quda::MGParam::setup_location
QudaFieldLocation setup_location
Definition: multigrid.h:100

QudaMultigridParam_s::use_eig_solver
QudaBoolean use_eig_solver[QUDA_MAX_MG_LEVEL]
Definition: quda.h:604

quda::MG::pushLevel
void pushLevel(int level) const
Helper function called on entry to each MG function.
Definition: multigrid.cpp:235

quda::MG::createSmoother
void createSmoother()
Create the smoothers.
Definition: multigrid.cpp:276

QUDA_TEST_VECTOR_SETUP
Definition: enum_quda.h:447

quda::MG::b_tilde
ColorSpinorField * b_tilde
Definition: multigrid.h:229

quda::DiracM
Definition: dirac_quda.h:1141

errorQuda
#define errorQuda(...)
Definition: util_quda.h:121

quda::MG::destroyCoarseSolver
void destroyCoarseSolver()
Destroy the solver wrapper.
Definition: multigrid.cpp:415

quda::MG::generateEigenVectors
void generateEigenVectors()
Generate lowest eigenvectors.
Definition: multigrid.cpp:1418

quda::Transfer::R
void R(ColorSpinorField &out, const ColorSpinorField &in) const
Definition: transfer.cpp:344

QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:326

QudaMultigridParam_s::smoother_solve_type
QudaSolveType smoother_solve_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:589

quda::MG::destroySmoother
void destroySmoother()
Destroy the smoothers.
Definition: multigrid.cpp:249

QUDA_QUARTER_PRECISION
Definition: enum_quda.h:59

QUDA_ODD_PARITY
Definition: enum_quda.h:288

QudaMultigridParam_s::setup_type
QudaSetupType setup_type
Definition: quda.h:531

QUDA_MR_INVERTER
Definition: enum_quda.h:105

QUDA_HALF_PRECISION
Definition: enum_quda.h:60

QudaMultigridParam_s::setup_tol
double setup_tol[QUDA_MAX_MG_LEVEL]
Definition: quda.h:510

quda::sqrt
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120

quda::blas::cDotProduct
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
Definition: reduce_quda.cu:764

QudaMultigridParam_s::num_setup_iter
int num_setup_iter[QUDA_MAX_MG_LEVEL]
Definition: quda.h:507

setOutputPrefix
void setOutputPrefix(const char *prefix)
Definition: util_quda.cpp:69

QUDA_INVALID_SCHWARZ
Definition: enum_quda.h:183

quda::MG::diracCoarseSmoother
Dirac * diracCoarseSmoother
Definition: multigrid.h:256

quda::DiracParam::transfer
Transfer * transfer
Definition: dirac_quda.h:49

QUDA_SUMMARIZE
Definition: enum_quda.h:264

QUDA_NOISE_UNIFORM
Definition: enum_quda.h:384

quda::MG::matCoarseSmootherSloppy
DiracMatrix * matCoarseSmootherSloppy
Definition: multigrid.h:268

quda::MG::param_presmooth
SolverParam * param_presmooth
Definition: multigrid.h:214

quda::MGParam
Definition: multigrid.h:26

tmp
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44

QUDA_FULL_SITE_SUBSET
Definition: enum_quda.h:333

quda::ColorSpinorField::Even
const ColorSpinorField & Even() const
Definition: color_spinor_field.cpp:608

QudaMultigridParam_s::coarse_solver_tol
double coarse_solver_tol[QUDA_MAX_MG_LEVEL]
Definition: quda.h:543

quda::ColorSpinorField::Odd
const ColorSpinorField & Odd() const
Definition: color_spinor_field.cpp:616

quda::ColorSpinorField::Create
static ColorSpinorField * Create(const ColorSpinorParam &param)
Definition: color_spinor_field.cpp:752

quda::EigenSolver::loadVectors
static void loadVectors(std::vector< ColorSpinorField *> &eig_vecs, std::string file)
Load vectors from file.
Definition: eigensolve_quda.cpp:362

QudaMultigridParam_s::pre_orthonormalize
QudaBoolean pre_orthonormalize
Definition: quda.h:534

QUDA_DIRECT_SOLVE
Definition: enum_quda.h:161

QudaMultigridParam_s::smoother_schwarz_type
QudaSchwarzType smoother_schwarz_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:579

quda::MG::r
ColorSpinorField * r
Definition: multigrid.h:226

QUDA_NULL_FIELD_CREATE
Definition: enum_quda.h:359

QudaMultigridParam_s::vec_load
QudaBoolean vec_load[QUDA_MAX_MG_LEVEL]
Definition: quda.h:627

quda::MG::param
MGParam & param
Definition: multigrid.h:181

quda::Transfer::P
void P(ColorSpinorField &out, const ColorSpinorField &in) const
Definition: transfer.cpp:305

std
STL namespace.

quda::cudaColorSpinorField
Definition: color_spinor_field.h:575

QudaMultigridParam_s::setup_ca_lambda_min
double setup_ca_lambda_min[QUDA_MAX_MG_LEVEL]
Definition: quda.h:525

quda::Dirac::HaloPrecision
QudaPrecision HaloPrecision() const
Definition: dirac_quda.h:200

quda::MGParam::Nlevel
int Nlevel
Definition: multigrid.h:36

QUDA_CG_INVERTER
Definition: enum_quda.h:102

quda::blas::xmyNorm
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:75

QudaMultigridParam_s::smoother_halo_precision
QudaPrecision smoother_halo_precision[QUDA_MAX_MG_LEVEL]
Definition: quda.h:576

quda::MG::loadVectors
void loadVectors(std::vector< ColorSpinorField *> &B)
Load the null space vectors in from file.
Definition: multigrid.cpp:1013

quda::SolverParam::inv_type_precondition
QudaInverterType inv_type_precondition
Definition: invert_quda.h:28

quda::DiracParam::dirac
Dirac * dirac
Definition: dirac_quda.h:50

quda::Dirac::setHaloPrecision
void setHaloPrecision(QudaPrecision halo_precision_) const
Definition: dirac_quda.h:201

quda::SolverParam::preserve_source
QudaPreserveSource preserve_source
Definition: invert_quda.h:154

QUDA_MG_INVERTER
Definition: enum_quda.h:117

quda::ColorSpinorField::Volume
int Volume() const
Definition: color_spinor_field.h:415

quda::DiracCoarse
Definition: dirac_quda.h:809

quda::MG::reset
void reset(bool refresh=false)
This method resets the solver, e.g., when a parameter has changed such as the mass.
Definition: multigrid.cpp:117

quda::DiracParam::mu_factor
double mu_factor
Definition: dirac_quda.h:38

quda::MGParam::spinBlockSize
int spinBlockSize
Definition: multigrid.h:42

QUDA_UKQCD_GAMMA_BASIS
Definition: enum_quda.h:368

quda::Dirac::Mu
virtual double Mu() const
Definition: dirac_quda.h:174

quda::Transfer::setSiteSubset
void setSiteSubset(QudaSiteSubset site_subset, QudaParity parity)
Sets whether the transfer operator is to act on full fields or single parity fields, and if single-parity which parity.
Definition: transfer.cpp:227

quda::SolverParam::omega
double omega
Definition: invert_quda.h:205

QudaMultigridParam_s::setup_minimize_memory
QudaBoolean setup_minimize_memory
Definition: quda.h:609

quda::Dirac::MdagM
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const =0

QudaMultigridParam_s::mu_factor
double mu_factor[QUDA_MAX_MG_LEVEL]
Definition: quda.h:648

Nspin
int Nspin
Definition: blas_test.cu:45

quda::MGParam::smoother_solve_type
QudaSolveType smoother_solve_type
Definition: multigrid.h:94

QudaMultigridParam_s::coarse_guess
QudaBoolean coarse_guess
Definition: quda.h:639

quda
Definition: blas_cublas.h:5

quda::LatticeFieldParam::siteSubset
QudaSiteSubset siteSubset
Definition: lattice_field.h:71

QUDA_MG_CYCLE_VCYCLE
Definition: enum_quda.h:173

QudaMultigridParam_s::setup_ca_basis
QudaCABasis setup_ca_basis[QUDA_MAX_MG_LEVEL]
Definition: quda.h:519

xD
ColorSpinorField * xD
Definition: blas_test.cu:41

axpby
static void axpby(Float a, Float *x, Float b, Float *y, int len)
Definition: dslash_util.h:33

QUDA_CONSTANT_SOURCE
Definition: enum_quda.h:376

quda::norm2
double norm2(const CloverField &a, bool inverse=false)
Definition: clover_field.cpp:470

quda::MG::presmoother
Solver * presmoother
Definition: multigrid.h:190

quda::MG::diracSmootherSloppy
const Dirac * diracSmootherSloppy
Definition: multigrid.h:250

quda::MG::operator()
void operator()(ColorSpinorField &out, ColorSpinorField &in)
Definition: multigrid.cpp:891

QudaMultigridParam_s::n_vec
int n_vec[QUDA_MAX_MG_LEVEL]
Definition: quda.h:492

quda::MG::popLevel
void popLevel(int level) const
Helper function called on exit to each MG member function.
Definition: multigrid.cpp:242

param
QudaGaugeParam param
Definition: pack_test.cpp:17

quda::Dslash
Definition: dslash.h:12

QudaEigParam_s::nConv
int nConv
Definition: quda.h:420

QudaMultigridParam_s::setup_location
QudaFieldLocation setup_location[QUDA_MAX_MG_LEVEL]
Definition: quda.h:601

quda::EigenSolver::saveVectors
static void saveVectors(const std::vector< ColorSpinorField *> &eig_vecs, std::string file)
Save vectors to file.
Definition: eigensolve_quda.cpp:416

QUDA_MATPC_SOLUTION
Definition: enum_quda.h:153

quda::MG::matCoarseSmoother
DiracMatrix * matCoarseSmoother
Definition: multigrid.h:265

quda::SolverParam::Nsteps
int Nsteps
Definition: invert_quda.h:190

quda::SolverParam::pipeline
int pipeline
Definition: invert_quda.h:112

quda::debug
static bool debug
Definition: multigrid.cpp:12

quda::LatticeFieldParam::x
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:67

quda::MG::saveVectors
void saveVectors(const std::vector< ColorSpinorField *> &B) const
Save the null space vectors in from file.
Definition: multigrid.cpp:1029

QUDA_MATPC_EVEN_EVEN_ASYMMETRIC
Definition: enum_quda.h:212

QUDA_CA_GCR_INVERTER
Definition: enum_quda.h:127

quda::DiracParam
Definition: dirac_quda.h:19

multigrid.h

quda::SolverParam::compute_null_vector
QudaComputeNullVector compute_null_vector
Definition: invert_quda.h:67

quda::SolverParam::sloppy_converge
bool sloppy_converge
Definition: invert_quda.h:127

quda::DiracParam::mu
double mu
Definition: dirac_quda.h:37

quda::MGParam::nu_pre
int nu_pre
Definition: multigrid.h:63

QudaMultigridParam_s::coarse_solver_maxiter
int coarse_solver_maxiter[QUDA_MAX_MG_LEVEL]
Definition: quda.h:546

quda::MG::tmp_coarse
ColorSpinorField * tmp_coarse
Definition: multigrid.h:238

quda::ax
void ax(const double &a, GaugeField &u)
Scale the gauge field by the scalar a.
Definition: gauge_field.cpp:349

quda::EigenSolver
Definition: eigensolve_quda.h:11

tol
double tol
Definition: test_util.cpp:1656

quda::caxpy
__device__ __host__ void caxpy(const complex< Float > &a, const complex< Float > &x, complex< Float > &y)
Definition: coarse_op_kernel.cuh:117

QUDA_USE_INIT_GUESS_YES
Definition: enum_quda.h:429

quda::ColorSpinorParam::location
QudaFieldLocation location
Definition: color_spinor_field.h:83

quda::MG::buildFreeVectors
void buildFreeVectors(std::vector< ColorSpinorField *> &B)
Build free-field null-space vectors.
Definition: multigrid.cpp:1241

QudaMultigridParam_s::post_orthonormalize
QudaBoolean post_orthonormalize
Definition: quda.h:537

quda::SolverParam::gflops
double gflops
Definition: invert_quda.h:223

quda::EigenSolver::create
static EigenSolver * create(QudaEigParam *eig_param, const DiracMatrix &mat, TimeProfile &profile)
Creates the eigensolver using the parameters given and the matrix.
Definition: eigensolve_quda.cpp:109

quda::DiracMdag
Definition: dirac_quda.h:1258

quda::SolverParam::residual_type
QudaResidualType residual_type
Definition: invert_quda.h:49

quda::MG::param_postsmooth
SolverParam * param_postsmooth
Definition: multigrid.h:217

pushOutputPrefix
void pushOutputPrefix(const char *prefix)
Push a new output prefix onto the stack.
Definition: util_quda.cpp:105

quda::SolverParam::updateInvertParam
void updateInvertParam(QudaInvertParam &param, int offset=-1)
Definition: invert_quda.h:428

QUDA_PRESERVE_SOURCE_NO
Definition: enum_quda.h:236

QUDA_CA_CGNR_INVERTER
Definition: enum_quda.h:126

quda::MG::prefix
char prefix[128]
Definition: multigrid.h:199

QudaInvertParam_s::cuda_prec_sloppy
QudaPrecision cuda_prec_sloppy
Definition: quda.h:215

quda::RNG::Release
void Release()
Release Device memory for CURAND RNG states.
Definition: random.cu:145

QudaMultigridParam_s::setup_maxiter
int setup_maxiter[QUDA_MAX_MG_LEVEL]
Definition: quda.h:513

QUDA_ADDITIVE_SCHWARZ
Definition: enum_quda.h:181

quda::MG::diracSmoother
const Dirac * diracSmoother
Definition: multigrid.h:247

quda::SolverParam::maxiter
int maxiter
Definition: invert_quda.h:136

csParam
ColorSpinorParam csParam
Definition: pack_test.cpp:24

quda::Dirac::prepare
virtual void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType) const =0

QUDA_INVALID_RESIDUAL
Definition: enum_quda.h:190

quda::DiracParam::commDim
int commDim[QUDA_MAX_DIM]
Definition: dirac_quda.h:44

quda::MGParam::evals
std::vector< Complex > evals
Definition: multigrid.h:60

quda::DiracParam::dagger
QudaDagType dagger
Definition: dirac_quda.h:30

quda::MGParam::smoother
QudaInverterType smoother
Definition: multigrid.h:87

in
cpuColorSpinorField * in
Definition: staggered_invert_test.cpp:98

quda::Dirac::DslashXpay
virtual void DslashXpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const ColorSpinorField &x, const double &k) const =0

QUDA_BOOLEAN_TRUE
Definition: enum_quda.h:453

quda::Solver::create
static Solver * create(SolverParam &param, DiracMatrix &mat, DiracMatrix &matSloppy, DiracMatrix &matPrecon, TimeProfile &profile)
Definition: solver.cpp:33

QUDA_BOOLEAN_FALSE
Definition: enum_quda.h:452

quda::TimeProfile::Print
void Print()
Definition: timer.cpp:7

quda::ColorSpinorField::SiteSubset
QudaSiteSubset SiteSubset() const
Definition: color_spinor_field.h:481

quda::RNG
Class declaration to initialize and hold CURAND RNG states.
Definition: random_quda.h:23

QudaMatPCType
enum QudaMatPCType_s QudaMatPCType

quda::ColorSpinorParam::gammaBasis
QudaGammaBasis gammaBasis
Definition: color_spinor_field.h:94

quda::MGParam::matSmooth
DiracMatrix * matSmooth
Definition: multigrid.h:81

quda::MGParam::geoBlockSize
int geoBlockSize[QUDA_MAX_DIM]
Definition: multigrid.h:39

quda::MG::diracCoarseResidual
Dirac * diracCoarseResidual
Definition: multigrid.h:253

QudaSolutionType
enum QudaSolutionType_s QudaSolutionType

QUDA_PARITY_SITE_SUBSET
Definition: enum_quda.h:332

quda::SolverParam::is_preconditioner
bool is_preconditioner
verbosity to use for preconditioner
Definition: invert_quda.h:241

QudaEigParam_s::use_norm_op
QudaBoolean use_norm_op
Definition: quda.h:402

quda::DiracParam::type
QudaDiracType type
Definition: dirac_quda.h:22

QudaMultigridParam_s::precision_null
QudaPrecision precision_null[QUDA_MAX_MG_LEVEL]
Definition: quda.h:495

popOutputPrefix
void popOutputPrefix()
Pop the output prefix restoring the prior one on the stack.
Definition: util_quda.cpp:121

QudaMultigridParam_s::verbosity
QudaVerbosity verbosity[QUDA_MAX_MG_LEVEL]
Definition: quda.h:501

QUDA_CGNE_INVERTER
Definition: enum_quda.h:119

QudaMultigridParam_s::coarse_solver
QudaInverterType coarse_solver[QUDA_MAX_MG_LEVEL]
Definition: quda.h:540

quda::DiracParam::matpcType
QudaMatPCType matpcType
Definition: dirac_quda.h:29

quda::Complex
std::complex< double > Complex
Definition: quda_internal.h:46

quda::Solver::eig_solve
EigenSolver * eig_solve
Definition: invert_quda.h:545

QudaMultigridParam_s::run_verify
QudaBoolean run_verify
Definition: quda.h:618

quda::MG::B_coarse
std::vector< ColorSpinorField * > * B_coarse
Definition: multigrid.h:223

QudaParity
enum QudaParity_s QudaParity

matpc_type
QudaMatPCType matpc_type
Definition: test_util.cpp:1662

quda::MG::coarse_prefix
char coarse_prefix[128]
Definition: multigrid.h:202

quda::SolverParam::eig_param
QudaEigParam eig_param
Definition: invert_quda.h:55

quda::MG::tmp2_coarse
ColorSpinorField * tmp2_coarse
Definition: multigrid.h:241

quda::MG::r_coarse
ColorSpinorField * r_coarse
Definition: multigrid.h:232

coarse_solver
QudaInverterType coarse_solver[QUDA_MAX_MG_LEVEL]
Definition: test_util.cpp:1691

QUDA_CORNER_SOURCE
Definition: enum_quda.h:378

MAX_BLOCK_FLOAT_NC
#define MAX_BLOCK_FLOAT_NC
Definition: color_spinor_pack.cuh:42

QUDA_MG_CYCLE_RECURSIVE
Definition: enum_quda.h:176

quda::MG::resetTransfer
bool resetTransfer
Definition: multigrid.h:187

quda::SolverParam::tol_precondition
double tol_precondition
Definition: invert_quda.h:199

quda::MG::matCoarseResidual
DiracMatrix * matCoarseResidual
Definition: multigrid.h:262

QudaMultigridParam_s::vec_store
QudaBoolean vec_store[QUDA_MAX_MG_LEVEL]
Definition: quda.h:633

quda::DiracParam::halo_precision
QudaPrecision halo_precision
Definition: dirac_quda.h:46

QUDA_BICGSTAB_INVERTER
Definition: enum_quda.h:103

quda::DiracParam::kappa
double kappa
Definition: dirac_quda.h:23

pushVerbosity
void pushVerbosity(QudaVerbosity verbosity)
Push a new verbosity onto the stack.
Definition: util_quda.cpp:83

quda::SolverParam::precision_precondition
QudaPrecision precision_precondition
Definition: invert_quda.h:151

QudaMultigridParam_s::run_oblique_proj_check
QudaBoolean run_oblique_proj_check
Definition: quda.h:624

quda::MG::diracCoarseSmootherSloppy
Dirac * diracCoarseSmootherSloppy
Definition: multigrid.h:259

quda::Dirac::setCommDim
void setCommDim(const int commDim_[QUDA_MAX_DIM]) const
Enable / disable communications for the Dirac operator.
Definition: dirac_quda.h:145

QudaMultigridParam_s::location
QudaFieldLocation location[QUDA_MAX_MG_LEVEL]
Definition: quda.h:598

QudaSiteSubset
enum QudaSiteSubset_s QudaSiteSubset

quda::SolverParam::Nkrylov
int Nkrylov
Definition: invert_quda.h:193

quda::SolverParam::precision
QudaPrecision precision
Definition: invert_quda.h:142

quda::QUDA_PROFILE_INIT
Definition: timer.h:106

QudaMultigridParam_s::smoother_schwarz_cycle
int smoother_schwarz_cycle[QUDA_MAX_MG_LEVEL]
Definition: quda.h:582

QudaMultigridParam_s::vec_outfile
char vec_outfile[QUDA_MAX_MG_LEVEL][256]
Definition: quda.h:636

QudaMultigridParam_s::run_low_mode_check
QudaBoolean run_low_mode_check
Definition: quda.h:621

quda::Solver
Definition: invert_quda.h:460

quda::ColorSpinorParam
Definition: color_spinor_field.h:80

QudaEigParam_s::vec_outfile
char vec_outfile[256]
Definition: quda.h:462

quda::MGParam::smoother_tol
double smoother_tol
Definition: multigrid.h:69

QUDA_USE_INIT_GUESS_NO
Definition: enum_quda.h:428

quda::MGParam::Nvec
int Nvec
Definition: multigrid.h:45

QudaMultigridParam_s::coarse_solver_ca_lambda_max
double coarse_solver_ca_lambda_max[QUDA_MAX_MG_LEVEL]
Definition: quda.h:558

quda::MGParam::nu_post
int nu_post
Definition: multigrid.h:66

quda::commDim
static int commDim[QUDA_MAX_DIM]
Definition: dslash_pack.cuh:9

out
cpuColorSpinorField * out
Definition: staggered_invert_test.cpp:99

QudaInvertParam_s::cuda_prec_precondition
QudaPrecision cuda_prec_precondition
Definition: quda.h:217

QudaMultigridParam_s::coarse_grid_solution_type
QudaSolutionType coarse_grid_solution_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:586

quda::MG::profile
TimeProfile profile
Definition: multigrid.h:196

Ncolor
int Ncolor
Definition: blas_test.cu:46

quda::MG::~MG
virtual ~MG()
Definition: multigrid.cpp:546

quda::MG::flops
double flops() const
Return the total flops done on this and all coarser levels.
Definition: multigrid.cpp:597

QUDA_INVALID_INVERTER
Definition: enum_quda.h:128

quda::DiracMdagM
Definition: dirac_quda.h:1181

QUDA_SINGLE_PRECISION
Definition: enum_quda.h:61

QudaMultigridParam_s::geo_block_size
int geo_block_size[QUDA_MAX_MG_LEVEL][QUDA_MAX_DIM]
Definition: quda.h:486

QudaMultigridParam_s::generate_all_levels
QudaBoolean generate_all_levels
Definition: quda.h:615

quda::MG::rng
RNG * rng
Definition: multigrid.h:271

quda::DiracMMdag
Definition: dirac_quda.h:1221

quda::SolverParam::preconditioner
void * preconditioner
Definition: invert_quda.h:33

QUDA_MAX_MG_LEVEL
#define QUDA_MAX_MG_LEVEL
Maximum number of multi-grid levels. This number may be increased if needed.
Definition: quda_constants.h:56

quda::s
__shared__ float s[]

quda::MGParam::fine
MG * fine
Definition: multigrid.h:54

quda::LatticeFieldParam::Precision
QudaPrecision Precision() const
Definition: lattice_field.h:58

QUDA_DEGRAND_ROSSI_GAMMA_BASIS
Definition: enum_quda.h:367

QUDA_COARSEPC_DIRAC
Definition: enum_quda.h:316

quda::MGParam::NblockOrtho
int NblockOrtho
Definition: multigrid.h:48

QudaMultigridParam_s::coarse_solver_ca_basis_size
int coarse_solver_ca_basis_size[QUDA_MAX_MG_LEVEL]
Definition: quda.h:552

QUDA_CA_CG_INVERTER
Definition: enum_quda.h:124

QudaMultigridParam_s::coarse_solver_ca_basis
QudaCABasis coarse_solver_ca_basis[QUDA_MAX_MG_LEVEL]
Definition: quda.h:549

printfQuda
#define printfQuda(...)
Definition: util_quda.h:115

QUDA_DAG_NO
Definition: enum_quda.h:218

quda::MG::createCoarseDirac
void createCoarseDirac()
Create the coarse dirac operator.
Definition: multigrid.cpp:334

QudaMultigridParam_s::vec_infile
char vec_infile[QUDA_MAX_MG_LEVEL][256]
Definition: quda.h:630

qio_field.h

quda::blas::xpy
void xpy(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:33

quda::ColorSpinorField::TwistFlavor
QudaTwistFlavorType TwistFlavor() const
Definition: color_spinor_field.h:408

quda::MG::transfer
Transfer * transfer
Definition: multigrid.h:184

transfer
int transfer
Definition: covdev_test.cpp:55

quda::TimeProfile
Definition: timer.h:171

QudaMultigridParam_s::setup_maxiter_refresh
int setup_maxiter_refresh[QUDA_MAX_MG_LEVEL]
Definition: quda.h:516

quda::SolverParam::precondition_cycle
int precondition_cycle
Definition: invert_quda.h:196

quda::SolverParam::deflate
bool deflate
Definition: invert_quda.h:52

quda::QUDA_PROFILE_IO
Definition: timer.h:112

QUDA_DIRECT_PC_SOLVE
Definition: enum_quda.h:163

quda::DiracParam::tmp2
ColorSpinorField * tmp2
Definition: dirac_quda.h:42

quda::ColorSpinorField::PrintVector
virtual void PrintVector(unsigned int x) const =0

quda::MG::postsmoother
Solver * postsmoother
Definition: multigrid.h:190

quda::SolverParam::use_init_guess
QudaUseInitGuess use_init_guess
Definition: invert_quda.h:64

QUDA_EVEN_PARITY
Definition: enum_quda.h:287

popVerbosity
void popVerbosity()
Pop the verbosity restoring the prior one on the stack.
Definition: util_quda.cpp:94

quda::SolverParam::precision_sloppy
QudaPrecision precision_sloppy
Definition: invert_quda.h:145

QUDA_MAX_DIM
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
Definition: quda_constants.h:17

QUDA_ZERO_FIELD_CREATE
Definition: enum_quda.h:360

QUDA_CA_CGNE_INVERTER
Definition: enum_quda.h:125

QUDA_COMPUTE_NULL_VECTOR_YES
Definition: enum_quda.h:441

quda::MG::MG
MG(MGParam &param, TimeProfile &profile)
Definition: multigrid.cpp:14

quda::SolverParam::ca_lambda_max
double ca_lambda_max
Definition: invert_quda.h:214

quda::ColorSpinorParam::create
QudaFieldCreate create
Definition: color_spinor_field.h:95

quda::SolverParam
Definition: invert_quda.h:17

QudaMultigridParam_s::compute_null_vector
QudaComputeNullVector compute_null_vector
Definition: quda.h:612

quda::MG::createCoarseSolver
void createCoarseSolver()
Create the solver wrapper.
Definition: multigrid.cpp:436

mat
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
Definition: covdev_reference.cpp:112

quda::Dirac::reconstruct
virtual void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType) const =0

quda::LatticeField::Precision
QudaPrecision Precision() const
Definition: lattice_field.h:546

quda::MGParam::coarse_grid_solution_type
QudaSolutionType coarse_grid_solution_type
Definition: multigrid.h:91

QUDA_MATPC_EVEN_EVEN
Definition: enum_quda.h:210

dagger
QudaDagType dagger
Definition: test_util.cpp:1620

quda::MG::coarse
MG * coarse
Definition: multigrid.h:205

QudaMultigridParam_s::setup_ca_basis_size
int setup_ca_basis_size[QUDA_MAX_MG_LEVEL]
Definition: quda.h:522

QudaMultigridParam_s::coarse_solver_ca_lambda_min
double coarse_solver_ca_lambda_min[QUDA_MAX_MG_LEVEL]
Definition: quda.h:555

parity
QudaParity parity
Definition: covdev_test.cpp:54

quda::Transfer::flops
double flops() const
Definition: transfer.cpp:387

prec
QudaPrecision prec
Definition: test_util.cpp:1608

quda::Transfer::Vectors
const ColorSpinorField & Vectors(QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION) const
Definition: transfer.h:205

quda::MG::coarse_solver
Solver * coarse_solver
Definition: multigrid.h:208

quda::MGParam::cycle_type
QudaMultigridCycleType cycle_type
Definition: multigrid.h:72

quda::zero
__device__ __host__ void zero(vector_type< scalar, n > &v)
Definition: cub_helper.cuh:54

QudaMultigridParam_s::setup_inv_type
QudaInverterType setup_inv_type[QUDA_MAX_MG_LEVEL]
Definition: quda.h:504

quda::SolverParam::tol
double tol
Definition: invert_quda.h:115

QUDA_COARSE_DIRAC
Definition: enum_quda.h:315

QudaInvertParam_s::matpc_type
QudaMatPCType matpc_type
Definition: quda.h:206

QudaEigParam_s::vec_infile
char vec_infile[256]
Definition: quda.h:459

quda::DiracMatrix::Expose
const Dirac * Expose() const
Definition: dirac_quda.h:1135

quda::DiracParam::tmp1
ColorSpinorField * tmp1
Definition: dirac_quda.h:41

quda::MGParam::matResidual
DiracMatrix * matResidual
Definition: multigrid.h:78

QUDA_L2_RELATIVE_RESIDUAL
Definition: enum_quda.h:187

quda::spinorNoise
void spinorNoise(ColorSpinorField &src, RNG &randstates, QudaNoiseType type)
Generate a random noise spinor. This variant allows the user to manage the RNG state.
Definition: spinor_noise.cu:122

quda::Dirac
Definition: dirac_quda.h:106

quda::MG::x_coarse
ColorSpinorField * x_coarse
Definition: multigrid.h:235

eigensolve_quda.h

QudaMultigridParam_s::invert_param
QudaInvertParam * invert_param
Definition: quda.h:478

dot
static void dot(sFloat *res, gFloat *a, sFloat *b)
Definition: dslash_util.h:56

comm_barrier
void comm_barrier(void)
Definition: comm_mpi.cpp:326

quda::MGParam::level
int level
Definition: multigrid.h:33

quda::Transfer::reset
void reset()
for resetting the Transfer when the null vectors have changed
Definition: transfer.cpp:182