34 std::array<int, 4>
dim = {24, 24, 24, 24};
143 #if (CUDA_VERSION >= 10010 && __COMPUTE_CAPABILITY__ >= 700)
280 CLI::TransformPairs<QudaBLASDataType> blas_dt_map {
304 CLI::TransformPairs<QudaTwistFlavorType> twist_flavor_type_map {{
"singlet",
QUDA_TWIST_SINGLET},
345 CLI::TransformPairs<QudaSolutionType> solution_type_map {{
"mat",
QUDA_MAT_SOLUTION},
360 CLI::TransformPairs<QudaTboundary> fermion_t_boundary_map {{
"periodic",
QUDA_PERIODIC_T},
363 CLI::TransformPairs<QudaSolveType> solve_type_map {
367 CLI::TransformPairs<QudaEigSpectrumType> seig_pectrum_map {
376 CLI::TransformPairs<QudaVerbosity> verbosity_map {
388 CLI::TransformPairs<QudaReconstructType> reconstruct_type_map {{
"18",
QUDA_RECONSTRUCT_NO},
394 CLI::TransformPairs<QudaEigSpectrumType> eig_spectrum_map {
409 auto quda_app = std::make_shared<QUDAApp>(app_description, app_name);
410 quda_app->option_defaults()->always_capture_default();
412 quda_app->add_option(
"--alternative-reliable",
alternative_reliable,
"use alternative reliable updates");
413 quda_app->add_option(
"--anisotropy",
anisotropy,
"Temporal anisotropy factor (default 1.0)");
415 quda_app->add_option(
"--ca-basis-type",
ca_basis,
"The basis to use for CA-CG (default power)")
416 ->transform(CLI::QUDACheckedTransformer(ca_basis_map));
417 quda_app->add_option(
418 "--cheby-basis-eig-max",
419 ca_lambda_max,
"Conservative estimate of largest eigenvalue for Chebyshev basis CA-CG (default is to guess with power iterations)");
421 "Conservative estimate of smallest eigenvalue for Chebyshev basis CA-CG (default 0)");
422 quda_app->add_option(
"--clover-csw",
clover_csw,
"Clover Csw coefficient 1.0")->capture_default_str();
423 quda_app->add_option(
"--clover-coeff",
clover_coeff,
"The overall clover coefficient, kappa * Csw. (default 0.0. Will be inferred from clover-csw (default 1.0) and kappa. "
424 "If the user populates this value with anything other than 0.0, the passed value will override the inferred value)")->capture_default_str();
427 "Compute the clover field or use random numbers (default false)");
429 "Compute the fat/long field or use random numbers (default false)");
433 "Whether to leave spin elemental open, or use a gamma basis and contract on "
434 "spin (default open)")
435 ->transform(CLI::QUDACheckedTransformer(contract_type_map));
439 "Whether to use single(S), double(D), and/or complex(C/Z) data types (default C)")
440 ->transform(CLI::QUDACheckedTransformer(blas_dt_map));
443 ->add_option(
"--blas-data-order",
blas_data_order,
"Whether data is in row major or column major order (default row)")
444 ->transform(CLI::QUDACheckedTransformer(blas_data_order_map));
449 "Whether to leave the A GEMM matrix as is (N), to transpose (T) or transpose conjugate (C) (default N) ")
450 ->transform(CLI::QUDACheckedTransformer(blas_op_map));
455 "Whether to leave the B GEMM matrix as is (N), to transpose (T) or transpose conjugate (C) (default N) ")
456 ->transform(CLI::QUDACheckedTransformer(blas_op_map));
458 quda_app->add_option(
"--blas-alpha",
blas_alpha_re_im,
"Set the complex value of alpha for GEMM (default {1.0,0.0}")
461 quda_app->add_option(
"--blas-beta",
blas_beta_re_im,
"Set the complex value of beta for GEMM (default {1.0,0.0}")
465 ->add_option(
"--blas-mnk",
blas_mnk,
"Set the dimensions of the A, B, and C matrices GEMM (default 128 128 128)")
470 "Set the leading dimensions A, B, and C matrices GEMM (default 128 128 128) ")
473 quda_app->add_option(
"--blas-offsets",
blas_offsets,
"Set the offsets for matrices A, B, and C (default 0 0 0)")
476 quda_app->add_option(
"--blas-strides",
blas_strides,
"Set the strides for matrices A, B, and C (default 1 1 1)")
479 quda_app->add_option(
"--blas-batch",
blas_batch,
"Set the number of batches for GEMM (default 16)");
481 quda_app->add_flag(
"--dagger",
dagger,
"Set the dagger to 1 (default 0)");
482 quda_app->add_option(
"--device",
device_ordinal,
"Set the CUDA device to use (default 0, single GPU only)")
485 quda_app->add_option(
"--dslash-type",
dslash_type,
"Set the dslash type")
486 ->transform(CLI::QUDACheckedTransformer(dslash_type_map));
488 quda_app->add_option(
"--epsilon",
epsilon,
"Twisted-Mass flavor twist of Dirac operator (default 0.01)");
489 quda_app->add_option(
"--epsilon-naik",
eps_naik,
"Epsilon factor on Naik term (default 0.0, suggested non-zero -0.1)");
493 "Set the twisted mass flavor type (singlet (default), deg-doublet, nondeg-doublet)")
494 ->transform(CLI::QUDACheckedTransformer(twist_flavor_type_map));
497 "Width of the Gaussian noise used for random gauge field contruction (default 0.2)");
499 quda_app->add_option(
"--heatbath-beta",
heatbath_beta_value,
"Beta value used in heatbath test (default 6.2)");
501 "Whether to use a cold or hot start in heatbath test (default false)");
503 "Number of heatbath hits per heatbath step (default 5)");
505 "Number of overrelaxation hits per heatbath step (default 5)");
507 "Number of measurement steps in heatbath test (default 10)");
509 "Number of warmup steps in heatbath test (default 10)");
511 quda_app->add_option(
"--inv-type",
inv_type,
"The type of solver to use (default cg)")
512 ->transform(CLI::QUDACheckedTransformer(inverter_type_map));
513 quda_app->add_option(
"--inv-deflate",
inv_deflate,
"Deflate the inverter using the eigensolver");
514 quda_app->add_option(
"--inv-multigrid",
inv_multigrid,
"Precondition the inverter using multigrid");
515 quda_app->add_option(
"--kappa",
kappa,
"Kappa of Dirac operator (default 0.12195122... [equiv to mass])");
516 quda_app->add_option(
518 "Restrict laplace operator to omit the t dimension (n=3), or include all dims (n=4) (default 4)");
519 quda_app->add_option(
"--load-gauge",
latfile,
"Load gauge field \" file \" for the test (requires QIO)");
520 quda_app->add_option(
"--Lsdim",
Lsdim,
"Set Ls dimension size(default 16)");
521 quda_app->add_option(
"--mass",
mass,
"Mass of Dirac operator (default 0.1)");
523 quda_app->add_option(
"--mass-normalization",
normalization,
"Mass normalization (kappa (default) / mass / asym-mass)")
524 ->transform(CLI::QUDACheckedTransformer(mass_normalization_map));
527 ->add_option(
"--matpc",
matpc_type,
"Matrix preconditioning type (even-even, odd-odd, even-even-asym, odd-odd-asym)")
528 ->transform(CLI::QUDACheckedTransformer(matpc_type_map));
529 quda_app->add_option(
"--msrc",
Msrc,
530 "Used for testing non-square block blas routines where nsrc defines the other dimension");
531 quda_app->add_option(
"--mu",
mu,
"Twisted-Mass chiral twist of Dirac operator (default 0.1)");
532 quda_app->add_option(
"--m5",
m5,
"Mass of shift of five-dimensional Dirac operators (default -1.5)");
533 quda_app->add_option(
"--b5",
b5,
"Mobius b5 parameter (default 1.5)");
534 quda_app->add_option(
"--c5",
c5,
"Mobius c5 parameter (default 0.5)");
535 quda_app->add_option(
537 "Whether to do a multi-shift solver test or not. Default is 1 (single mass)"
538 "If a value N > 1 is passed, heavier masses will be constructed and the multi-shift solver will be called");
539 quda_app->add_option(
"--ngcrkrylov",
gcrNkrylov,
540 "The number of inner iterations to use for GCR, BiCGstab-l, CA-CG (default 10)");
541 quda_app->add_option(
"--niter",
niter,
"The number of iterations to perform (default 100)");
543 "Use the native or generic BLAS LAPACK implementation (default true)");
545 "The number of iterations to perform for any preconditioner (default 10)");
546 quda_app->add_option(
"--nsrc",
Nsrc,
547 "How many spinors to apply the dslash to simultaneusly (experimental for staggered only)");
549 quda_app->add_option(
"--pipeline",
pipeline,
550 "The pipeline length for fused operations in GCR, BiCGstab-l (default 0, no pipelining)");
554 CLI::QUDACheckedTransformer prec_transform(precision_map);
555 quda_app->add_option(
"--prec",
prec,
"Precision in GPU")->transform(prec_transform);
556 quda_app->add_option(
"--prec-precondition",
prec_precondition,
"Preconditioner precision in GPU")->transform(prec_transform);
558 quda_app->add_option(
"--prec-eigensolver",
prec_eigensolver,
"Eigensolver precision in GPU")->transform(prec_transform);
561 ->transform(prec_transform);
563 quda_app->add_option(
"--prec-ritz",
prec_ritz,
"Eigenvector precision in GPU")->transform(prec_transform);
565 quda_app->add_option(
"--prec-sloppy",
prec_sloppy,
"Sloppy precision in GPU")->transform(prec_transform);
567 quda_app->add_option(
"--prec-null",
prec_null,
"Precison TODO")->transform(prec_transform);
569 quda_app->add_option(
"--precon-type",
precon_type,
"The type of solver to use (default none (=unspecified)).")
570 ->transform(CLI::QUDACheckedTransformer(inverter_type_map));
573 "The type of Schwarz preconditioning to use (default=invalid)")
574 ->transform(CLI::QUDACheckedTransformer(schwarz_type_map));
576 "The number of Schwarz cycles to apply per smoother application (default=1)");
578 CLI::TransformPairs<int> rank_order_map {{
"col", 0}, {
"row", 1}};
581 "Set the [t][z][y][x] rank order as either column major (t fastest, default) or row major (x fastest)")
582 ->transform(CLI::QUDACheckedTransformer(rank_order_map));
584 quda_app->add_option(
"--recon",
link_recon,
"Link reconstruction type")
585 ->transform(CLI::QUDACheckedTransformer(reconstruct_type_map));
586 quda_app->add_option(
"--recon-precondition",
link_recon_precondition,
"Preconditioner link reconstruction type")
587 ->transform(CLI::QUDACheckedTransformer(reconstruct_type_map));
588 quda_app->add_option(
"--recon-eigensolver",
link_recon_eigensolver,
"Eigensolver link reconstruction type")
589 ->transform(CLI::QUDACheckedTransformer(reconstruct_type_map));
590 quda_app->add_option(
"--recon-sloppy",
link_recon_sloppy,
"Sloppy link reconstruction type")
591 ->transform(CLI::QUDACheckedTransformer(reconstruct_type_map));
593 quda_app->add_option(
"--reliable-delta",
reliable_delta,
"Set reliable update delta factor");
595 "Save gauge field \" file \" for the test (requires QIO, heatbath test only)");
598 "The pipeline length for fused solution accumulation (default 0, no pipelining)");
603 "The solution we desire (mat (default), mat-dag-mat, mat-pc, mat-pc-dag-mat-pc (default for multi-shift))")
604 ->transform(CLI::QUDACheckedTransformer(solution_type_map));
608 "The fermoinic temporal boundary conditions (anti-periodic (default), periodic")
609 ->transform(CLI::QUDACheckedTransformer(fermion_t_boundary_map));
613 "The type of solve to do (direct, direct-pc, normop, normop-pc, normerr, normerr-pc)")
614 ->transform(CLI::QUDACheckedTransformer(solve_type_map));
616 ->add_option(
"--solver-ext-lib-type",
solver_ext_lib,
"Set external library for the solvers (default Eigen library)")
617 ->transform(CLI::QUDACheckedTransformer(extlib_map));
620 "Tadpole coefficient for HISQ fermions (default 1.0, recommended [Plaq]^1/4)");
622 quda_app->add_option(
"--tol",
tol,
"Set L2 residual tolerance");
623 quda_app->add_option(
"--tolhq",
tol_hq,
"Set heavy-quark residual tolerance");
624 quda_app->add_option(
"--tol-precondition",
tol_precondition,
"Set L2 residual tolerance for preconditioner");
625 quda_app->add_option(
627 "Generate a unit valued gauge field in the tests. If false, a random gauge is generated (default false)");
629 quda_app->add_option(
"--verbosity",
verbosity,
"The the verbosity on the top level of QUDA( default summarize)")
630 ->transform(CLI::QUDACheckedTransformer(verbosity_map));
631 quda_app->add_option(
"--verify",
verify_results,
"Verify the GPU results using CPU results (default true)");
634 auto dimopt = quda_app->add_option(
"--dim",
dim,
"Set space-time dimension (X Y Z T)")->check(
CLI::Range(1, 512));
635 auto sdimopt = quda_app
638 [](CLI::results_t res) {
639 return CLI::detail::lexical_cast(res[0],
xdim) && CLI::detail::lexical_cast(res[0],
ydim)
640 && CLI::detail::lexical_cast(res[0],
zdim);
642 "Set space dimension(X/Y/Z) size")
646 quda_app->add_option(
"--xdim",
xdim,
"Set X dimension size(default 24)")
650 quda_app->add_option(
"--ydim",
ydim,
"Set X dimension size(default 24)")
654 quda_app->add_option(
"--zdim",
zdim,
"Set X dimension size(default 24)")
658 quda_app->add_option(
"--tdim",
tdim,
"Set T dimension size(default 24)")->check(
CLI::Range(1, 512))->excludes(dimopt);
662 quda_app->add_option(
664 [](CLI::results_t res) {
666 auto retval = CLI::detail::lexical_cast(res[0], p);
667 for (
int j = 0; j < 4; j++) {
672 "Set the communication topology (X=1, Y=2, Z=4, T=8, and combinations of these)");
676 ->add_option(
"--gridsize",
gridsize_from_cmdline,
"Set the grid size in all four dimension (default 1 1 1 1)")
678 quda_app->add_option(
"--xgridsize",
grid_x,
"Set grid size in X dimension (default 1)")->excludes(gridsizeopt);
679 quda_app->add_option(
"--ygridsize",
grid_y,
"Set grid size in Y dimension (default 1)")->excludes(gridsizeopt);
680 quda_app->add_option(
"--zgridsize",
grid_z,
"Set grid size in Z dimension (default 1)")->excludes(gridsizeopt);
681 quda_app->add_option(
"--tgridsize",
grid_t,
"Set grid size in T dimension (default 1)")->excludes(gridsizeopt);
689 CLI::QUDACheckedTransformer prec_transform(precision_map);
691 auto opgroup = quda_app->add_option_group(
"Eigensolver",
"Options controlling eigensolver");
693 opgroup->add_option(
"--eig-amax",
eig_amax,
"The maximum in the polynomial acceleration")->check(CLI::PositiveNumber);
694 opgroup->add_option(
"--eig-amin",
eig_amin,
"The minimum in the polynomial acceleration")->check(CLI::PositiveNumber);
696 opgroup->add_option(
"--eig-ARPACK-logfile",
eig_arpack_logfile,
"The filename storing the log from arpack");
698 "Cross check the device data against ARPACK (requires ARPACK, default false)");
700 "Use Eigen to eigensolve the upper Hessenberg in IRAM, else use QUDA's QR code. (default true)");
702 "Solve the MdagM problem, use to compute SVD of M (default false)");
705 "Solve the gamma5 OP problem. Solve for OP then multiply by gamma_5 (default false)");
707 opgroup->add_option(
"--eig-max-restarts",
eig_max_restarts,
"Perform n iterations of the restart in the eigensolver");
708 opgroup->add_option(
"--eig-block-size",
eig_block_size,
"The block size to use in the block variant eigensolver");
711 "The number of converged eigenpairs that will be used in the deflation routines (default eig_n_conv)");
712 opgroup->add_option(
"--eig-n-conv",
eig_n_conv,
"The number of converged eigenvalues requested (default eig_n_ev)");
713 opgroup->add_option(
"--eig-n-ev",
eig_n_ev,
"The size of eigenvector search space in the eigensolver");
714 opgroup->add_option(
"--eig-n-kr",
eig_n_kr,
"The size of the Krylov subspace to use in the eigensolver");
716 "The maximum number of extra eigenvectors the solver may allocate to perform a Ritz rotation.");
717 opgroup->add_option(
"--eig-poly-deg",
eig_poly_deg,
"TODO");
719 "--eig-require-convergence",
720 eig_require_convergence,
"If true, the solver will error out if convergence is not attained. If false, a warning will be given (default true)");
721 opgroup->add_option(
"--eig-save-vec",
eig_vec_outfile,
"Save eigenvectors to <file> (requires QIO)");
722 opgroup->add_option(
"--eig-load-vec",
eig_vec_infile,
"Load eigenvectors to <file> (requires QIO)")
723 ->check(CLI::ExistingFile);
726 "If saving eigenvectors, use this precision to save. No-op if eig-save-prec is greater than or equal "
727 "to precision of eigensolver (default = double)")
728 ->transform(prec_transform);
732 "Whether to inflate single-parity eigenvectors onto dual parity full fields for file I/O (default = false)");
736 "The spectrum part to be calulated. S=smallest L=largest R=real M=modulus I=imaginary")
737 ->transform(CLI::QUDACheckedTransformer(eig_spectrum_map));
738 opgroup->add_option(
"--eig-tol",
eig_tol,
"The tolerance to use in the eigensolver (default 1e-6)");
739 opgroup->add_option(
"--eig-qr-tol",
eig_qr_tol,
"The tolerance to use in the qr (default 1e-11)");
741 opgroup->add_option(
"--eig-type",
eig_type,
"The type of eigensolver to use (default trlm)")
742 ->transform(CLI::QUDACheckedTransformer(eig_type_map));
745 "Solve the Mdag problem instead of M (MMdag if eig-use-normop == true) (default false)");
747 "Solve the MdagM problem instead of M (MMdag if eig-use-dagger == true) (default false)");
748 opgroup->add_option(
"--eig-use-poly-acc",
eig_use_poly_acc,
"Use Chebyshev polynomial acceleration in the eigensolver");
753 auto opgroup = quda_app->add_option_group(
"Deflation",
"Options controlling deflation");
757 "Set maximum number of cycles needed to compute eigenvectors(default 1)")
758 ->check(CLI::PositiveNumber);
761 "--df-eigcg-max-restarts",
762 eigcg_max_restarts,
"Set how many iterative refinement cycles will be solved with eigCG within a single physical right hand site solve (default 4)")
763 ->check(CLI::PositiveNumber);
766 "Set external library for the deflation methods (default Eigen library)");
768 "Set memory location for the ritz vectors (default cuda memory location)");
770 "Set maximum number of the initCG restarts in the deflation stage (default 3)");
771 opgroup->add_option(
"--df-max-search-dim",
max_search_dim,
"Set the size of eigenvector search space (default 64)");
773 "Set memory type for the ritz vectors (default device memory type)");
774 opgroup->add_option(
"--df-n-ev",
n_ev,
"Set number of eigenvectors computed within a single solve cycle (default 8)");
775 opgroup->add_option(
"--df-tol-eigenval",
eigenval_tol,
"Set maximum eigenvalue residual norm (default 1e-1)");
776 opgroup->add_option(
"--df-tol-inc",
inc_tol,
777 "Set tolerance for the subsequent restarts in the initCG solver (default 1e-2)");
778 opgroup->add_option(
"--df-tol-restart",
tol_restart,
779 "Set tolerance for the first restart in the initCG solver(default 5e-5)");
784 auto opgroup = quda_app->add_option_group(
"MultiGrid",
"Options controlling deflation");
788 auto solve_type_transform = CLI::QUDACheckedTransformer(solve_type_map);
790 CLI::QUDACheckedTransformer prec_transform(precision_map);
792 quda_app->add_mgoption(
794 "Set the geometric block size for the each multigrid levels transfer operator (default 4 4 4 4)");
795 quda_app->add_mgoption(opgroup,
"--mg-coarse-solve-type",
coarse_solve_type, solve_type_transform,
796 "The type of solve to do on each level (direct, direct-pc) (default = solve_type)");
798 auto solver_trans = CLI::QUDACheckedTransformer(inverter_type_map);
799 quda_app->add_mgoption(opgroup,
"--mg-coarse-solver",
coarse_solver, solver_trans,
800 "The solver to wrap the V cycle on each level (default gcr, only for levels 1+)");
803 "The basis size to use for CA-CG setup of multigrid (default 4)");
806 CLI::QUDACheckedTransformer(ca_basis_map),
807 "The basis to use for CA-CG setup of multigrid(default power)");
810 "Conservative estimate of largest eigenvalue for Chebyshev basis CA-CG in setup of multigrid "
811 "(default is to guess with power iterations)");
812 quda_app->add_mgoption(
814 "Conservative estimate of smallest eigenvalue for Chebyshev basis CA-CG in setup of multigrid (default 0)");
815 quda_app->add_mgoption(opgroup,
"--mg-coarse-solver-maxiter",
coarse_solver_maxiter, CLI::PositiveNumber,
816 "The coarse solver maxiter for each level (default 100)");
817 quda_app->add_mgoption(opgroup,
"--mg-coarse-solver-tol",
coarse_solver_tol, CLI::PositiveNumber,
818 "The coarse solver tolerance for each level (default 0.25, only for levels 1+)");
819 quda_app->add_mgoption(opgroup,
"--mg-eig",
mg_eig, CLI::Validator(),
820 "Use the eigensolver on this level (default false)");
821 quda_app->add_mgoption(opgroup,
"--mg-eig-amax",
mg_eig_amax, CLI::PositiveNumber,
822 "The maximum in the polynomial acceleration (default 4.0)");
823 quda_app->add_mgoption(opgroup,
"--mg-eig-amin",
mg_eig_amin, CLI::PositiveNumber,
824 "The minimum in the polynomial acceleration (default 0.1)");
825 quda_app->add_mgoption(
827 "Perform a convergence check every nth restart/iteration (only used in Implicit Restart types)");
829 "If deflating on the coarse grid, optionally use an initial guess (default = false)");
831 "If the multigrid operator is updated, preserve generated deflation space (default = false)");
832 quda_app->add_mgoption(opgroup,
"--mg-eig-max-restarts",
mg_eig_max_restarts, CLI::PositiveNumber,
833 "Perform a maximun of n restarts in eigensolver (default 100)");
834 quda_app->add_mgoption(
836 "Use Eigen to eigensolve the upper Hessenberg in IRAM, else use QUDA's QR code. (default true)");
837 quda_app->add_mgoption(opgroup,
"--mg-eig-block-size",
mg_eig_block_size, CLI::Validator(),
838 "The block size to use in the block variant eigensolver");
839 quda_app->add_mgoption(opgroup,
"--mg-eig-n-ev",
mg_eig_n_ev, CLI::Validator(),
840 "The size of eigenvector search space in the eigensolver");
841 quda_app->add_mgoption(opgroup,
"--mg-eig-n-kr",
mg_eig_n_kr, CLI::Validator(),
842 "The size of the Krylov subspace to use in the eigensolver");
843 quda_app->add_mgoption(opgroup,
"--mg-eig-n-ev-deflate",
mg_eig_n_ev_deflate, CLI::Validator(),
844 "The number of converged eigenpairs that will be used in the deflation routines");
845 quda_app->add_mgoption(
847 "The maximum number of extra eigenvectors the solver may allocate to perform a Ritz rotation.");
848 quda_app->add_mgoption(opgroup,
"--mg-eig-poly-deg",
mg_eig_poly_deg, CLI::PositiveNumber,
849 "Set the degree of the Chebyshev polynomial (default 100)");
850 quda_app->add_mgoption(
852 CLI::Validator(),
"If true, the solver will error out if convergence is not attained. If false, a warning will be given (default true)");
854 quda_app->add_mgoption(
855 opgroup,
"--mg-eig-spectrum",
mg_eig_spectrum, CLI::QUDACheckedTransformer(eig_spectrum_map),
856 "The spectrum part to be calulated. S=smallest L=largest R=real M=modulus I=imaginary (default SR)");
857 quda_app->add_mgoption(opgroup,
"--mg-eig-tol",
mg_eig_tol, CLI::PositiveNumber,
858 "The tolerance to use in the eigensolver (default 1e-6)");
859 quda_app->add_mgoption(opgroup,
"--mg-eig-qr-tol",
mg_eig_qr_tol, CLI::PositiveNumber,
860 "The tolerance to use in the QR (default 1e-11)");
862 quda_app->add_mgoption(opgroup,
"--mg-eig-type",
mg_eig_type, CLI::QUDACheckedTransformer(eig_type_map),
863 "The type of eigensolver to use (default trlm)");
864 quda_app->add_mgoption(opgroup,
"--mg-eig-use-dagger",
mg_eig_use_dagger, CLI::Validator(),
865 "Solve the MMdag problem instead of M (MMdag if eig-use-normop == true) (default false)");
866 quda_app->add_mgoption(opgroup,
"--mg-eig-use-normop",
mg_eig_use_normop, CLI::Validator(),
867 "Solve the MdagM problem instead of M (MMdag if eig-use-dagger == true) (default false)");
868 quda_app->add_mgoption(opgroup,
"--mg-eig-use-poly-acc",
mg_eig_use_poly_acc, CLI::Validator(),
869 "Use Chebyshev polynomial acceleration in the eigensolver (default true)");
871 "--mg-generate-all-levels",
872 generate_all_levels,
"true=generate null-space on all levels, false=generate on level 0 and create other levels from that (default true)");
874 "Utilize thin updates for multigrid evolution tests (default false)");
876 "Generate the null-space vector dynamically (default true, if set false and mg-load-vec isn't "
877 "set, creates free-field null vectors)");
878 opgroup->add_option(
"--mg-levels",
mg_levels,
"The number of multigrid levels to do (default 2)");
881 quda_app->add_mgoption(opgroup,
"--mg-load-vec",
mg_vec_infile, CLI::Validator(),
882 "Load the vectors <file> for the multigrid_test (requires QIO)");
883 quda_app->add_mgoption(opgroup,
"--mg-save-vec",
mg_vec_outfile, CLI::Validator(),
884 "Save the generated null-space vectors <file> from the multigrid_test (requires QIO)");
888 "If saving eigenvectors, use this precision to save. No-op if mg-eig-save-prec is greater than or "
889 "equal to precision of eigensolver (default = double)")
890 ->transform(prec_transform);
894 "Measure how well the null vector subspace overlaps with the low eigenmode subspace (default false)");
895 quda_app->add_mgoption(opgroup,
"--mg-mu-factor",
mu_factor, CLI::Validator(),
896 "Set the multiplicative factor for the twisted mass mu parameter on each level (default 1)");
897 quda_app->add_mgoption(opgroup,
"--mg-n-block-ortho",
n_block_ortho, CLI::PositiveNumber,
898 "The number of times to run Gram-Schmidt during block orthonormalization (default 1)");
899 quda_app->add_mgoption(opgroup,
"--mg-nu-post",
nu_post, CLI::PositiveNumber,
900 "The number of post-smoother applications to do at a given multigrid level (default 2)");
901 quda_app->add_mgoption(opgroup,
"--mg-nu-pre",
nu_pre, CLI::PositiveNumber,
902 "The number of pre-smoother applications to do at a given multigrid level (default 2)");
903 quda_app->add_mgoption(opgroup,
"--mg-nvec",
nvec, CLI::PositiveNumber,
904 "Number of null-space vectors to define the multigrid transfer operator on a given level");
906 "Measure how well the null vector subspace adjusts the low eigenmode subspace (default false)");
907 opgroup->add_option(
"--mg-omega",
omega,
908 "The over/under relaxation factor for the smoother of multigrid (default 0.85)");
910 "If orthonormalize the vector after inverting in the setup of multigrid (default true)");
912 "If orthonormalize the vector before inverting in the setup of multigrid (default false)");
915 ->add_mgoption(opgroup,
"--mg-schwarz-type",
mg_schwarz_type, CLI::Validator(),
916 "The type of preconditioning to use (requires MR smoother and GCR setup solver) (default=invalid)")
917 ->transform(CLI::QUDACheckedTransformer(schwarz_type_map));
918 quda_app->add_mgoption(opgroup,
"--mg-schwarz-cycle",
mg_schwarz_cycle, CLI::PositiveNumber,
919 "The number of Schwarz cycles to apply per smoother application (default=1)");
920 quda_app->add_mgoption(opgroup,
"--mg-setup-ca-basis-size",
setup_ca_basis_size, CLI::PositiveNumber,
921 "The basis size to use for CA-CG setup of multigrid (default 4)");
922 quda_app->add_mgoption(opgroup,
"--mg-setup-ca-basis-type",
setup_ca_basis, CLI::QUDACheckedTransformer(ca_basis_map),
923 "The basis to use for CA-CG setup of multigrid(default power)");
924 quda_app->add_mgoption(opgroup,
"--mg-setup-cheby-basis-eig-max",
setup_ca_lambda_max, CLI::PositiveNumber,
925 "Conservative estimate of largest eigenvalue for Chebyshev basis CA-CG in setup of multigrid "
926 "(default is to guess with power iterations)");
927 quda_app->add_mgoption(
929 "Conservative estimate of smallest eigenvalue for Chebyshev basis CA-CG in setup of multigrid (default 0)");
930 quda_app->add_mgoption(opgroup,
"--mg-setup-inv",
setup_inv, solver_trans,
931 "The inverter to use for the setup of multigrid (default bicgstab)");
932 quda_app->add_mgoption(opgroup,
"--mg-setup-iters",
num_setup_iter, CLI::PositiveNumber,
933 "The number of setup iterations to use for the multigrid (default 1)");
935 quda_app->add_mgoption(
936 opgroup,
"--mg-setup-maxiter",
setup_maxiter, CLI::Validator(),
937 "The maximum number of solver iterations to use when relaxing on a null space vector (default 500)");
938 quda_app->add_mgoption(
940 "The maximum number of solver iterations to use when refreshing the pre-existing null space vectors (default 100)");
941 quda_app->add_mgoption(opgroup,
"--mg-setup-tol",
setup_tol, CLI::Validator(),
942 "The tolerance to use for the setup of multigrid (default 5e-6)");
944 opgroup->add_option(
"--mg-setup-type",
setup_type,
"The type of setup to use for the multigrid (default null)")
945 ->transform(CLI::QUDACheckedTransformer(setup_type_map));
950 "--mg-staggered-coarsen-type",
951 staggered_transfer_type,
"The type of coarsening to use for the top level staggered operator (aggregate, kd-coarse (default), kd-optimized)")
952 ->transform(CLI::QUDACheckedTransformer(transfer_type_map));
954 quda_app->add_mgoption(opgroup,
"--mg-smoother",
smoother_type, solver_trans,
955 "The smoother to use for multigrid (default mr)");
959 "The smoother halo precision (applies to all levels - defaults to null_precision)")
960 ->transform(prec_transform);
962 quda_app->add_mgoption(opgroup,
"--mg-smoother-solve-type",
smoother_solve_type, solve_type_transform,
963 "The type of solve to do in smoother (direct, direct-pc (default) )");
964 quda_app->add_mgoption(opgroup,
"--mg-smoother-tol",
smoother_tol, CLI::Validator(),
965 "The smoother tolerance to use for each multigrid (default 0.25)");
967 quda_app->add_mgoption(opgroup,
"--mg-verbosity",
mg_verbosity, CLI::QUDACheckedTransformer(verbosity_map),
968 "The verbosity to use on each level of the multigrid (default summarize)");
972 "Use tensor-core to accelerate multigrid (default = true on Volta or later with CUDA >=10.1, otherwise false)");
977 auto opgroup = quda_app->add_option_group(
"EOFA",
"Options controlling EOFA parameteres");
979 CLI::TransformPairs<int> eofa_pm_map {{
"plus", 1}, {
"minus", 0}};
980 opgroup->add_option(
"--eofa-pm",
eofa_pm,
"Set to evalute \"plus\" or \"minus\" EOFA operator (default plus)")
981 ->transform(CLI::QUDACheckedTransformer(eofa_pm_map));
982 opgroup->add_option(
"--eofa-shift",
eofa_shift,
"Set the shift for the EOFA operator (default -0.12345)");
983 opgroup->add_option(
"--eofa-mq1",
eofa_mq1,
"Set mq1 for EOFA operator (default 1.0)");
984 opgroup->add_option(
"--eofa-mq2",
eofa_mq1,
"Set mq2 for EOFA operator (default 0.085)");
985 opgroup->add_option(
"--eofa-mq3",
eofa_mq1,
"Set mq3 for EOFA operator (default 1.0)");
992 auto opgroup = quda_app->add_option_group(
"SU(3)",
"Options controlling SU(3) tests");
993 opgroup->add_option(
"--su3-ape-rho",
ape_smear_rho,
"rho coefficient for APE smearing (default 0.6)");
996 "rho coefficient for Stout and Over-Improved Stout smearing (default 0.08)");
999 "epsilon coefficient for Over-Improved Stout smearing (default -0.25)");
1001 opgroup->add_option(
"--su3-smear-steps",
smear_steps,
"The number of smearing steps to perform (default 50)");
1003 opgroup->add_option(
"--su3-wflow-epsilon",
wflow_epsilon,
"The step size in the Runge-Kutta integrator (default 0.01)");
1005 opgroup->add_option(
"--su3-wflow-steps",
wflow_steps,
1006 "The number of steps in the Runge-Kutta integrator (default 100)");
1008 opgroup->add_option(
"--su3-wflow-type",
wflow_type,
"The type of action to use in the wilson flow (default wilson)")
1009 ->transform(CLI::QUDACheckedTransformer(wflow_type_map));
1013 "Measure the field energy and topological charge every Nth step (default 5) ");
1019 = quda_app->add_option_group(
"Communication",
"Options controlling communication (split grid) parameteres");
1020 opgroup->add_option(
"--grid-partition",
grid_partition,
"Set the grid partition (default 1 1 1 1)")->expected(4);
QudaTransferType staggered_transfer_type
double stout_smear_epsilon
QudaInverterType precon_type
QudaPrecision prec_refinement_sloppy
quda::mgarray< int > num_setup_iter
bool eig_io_parity_inflate
quda::mgarray< int > mg_eig_poly_deg
QudaInverterType inv_type
quda::mgarray< QudaEigType > mg_eig_type
QudaReconstructType link_recon_sloppy
bool mg_eig_preserve_deflation
quda::mgarray< int > mg_eig_check_interval
quda::mgarray< char[256]> mg_vec_outfile
std::shared_ptr< QUDAApp > make_app(std::string app_description, std::string app_name)
quda::mgarray< double > setup_ca_lambda_max
QudaExtLibType solver_ext_lib
QudaReconstructType link_recon
quda::mgarray< int > mg_eig_n_ev_deflate
quda::mgarray< bool > mg_eig
QudaPrecision eig_save_prec
quda::mgarray< QudaInverterType > coarse_solver
QudaReconstructType link_recon_precondition
quda::mgarray< QudaCABasis > coarse_solver_ca_basis
quda::mgarray< int > n_block_ortho
void add_multigrid_option_group(std::shared_ptr< QUDAApp > quda_app)
QudaTwistFlavorType twist_flavor
std::array< int, 4 > grid_partition
QudaBLASOperation blas_trans_b
quda::mgarray< int > coarse_solver_maxiter
quda::mgarray< char[256]> mg_vec_infile
quda::mgarray< int > nu_post
quda::mgarray< int > nu_pre
quda::mgarray< int > setup_ca_basis_size
quda::mgarray< int > mg_eig_n_kr
quda::mgarray< int > coarse_solver_ca_basis_size
char eig_vec_outfile[256]
quda::mgarray< double > mg_eig_amin
quda::mgarray< QudaVerbosity > mg_verbosity
quda::mgarray< double > setup_ca_lambda_min
quda::mgarray< int > setup_maxiter
quda::mgarray< int > nvec
std::array< double, 2 > blas_alpha_re_im
void add_eofa_option_group(std::shared_ptr< QUDAApp > quda_app)
QudaBLASDataOrder blas_data_order
quda::mgarray< QudaCABasis > setup_ca_basis
quda::mgarray< QudaSchwarzType > mg_schwarz_type
QudaMemoryType mem_type_ritz
quda::mgarray< QudaEigSpectrumType > mg_eig_spectrum
quda::mgarray< int > mg_eig_max_restarts
QudaSolutionType solution_type
int heatbath_num_heatbath_per_step
QudaDslashType dslash_type
QudaTboundary fermion_t_boundary
quda::mgarray< bool > mg_eig_use_dagger
std::array< int, 3 > blas_leading_dims
quda::mgarray< double > mu_factor
QudaExtLibType deflation_ext_lib
quda::mgarray< double > coarse_solver_ca_lambda_max
bool alternative_reliable
int solution_accumulator_pipeline
std::array< int, 4 > dim_partitioned
std::array< int, 3 > blas_strides
quda::mgarray< int > mg_eig_n_ev
quda::mgarray< double > mg_eig_amax
void add_eigen_option_group(std::shared_ptr< QUDAApp > quda_app)
quda::mgarray< QudaInverterType > setup_inv
bool mg_evolve_thin_updates
std::array< double, 2 > blas_beta_re_im
quda::mgarray< double > setup_tol
QudaFieldLocation location_ritz
quda::mgarray< double > coarse_solver_ca_lambda_min
QudaEigSpectrumType eig_spectrum
quda::mgarray< bool > mg_eig_use_poly_acc
quda::mgarray< double > mg_eig_qr_tol
QudaPrecision prec_eigensolver
quda::mgarray< QudaSolveType > coarse_solve_type
std::array< int, 3 > blas_offsets
quda::mgarray< double > mg_eig_tol
QudaSchwarzType precon_schwarz_type
void add_deflation_option_group(std::shared_ptr< QUDAApp > quda_app)
quda::mgarray< int > mg_eig_batched_rotate
QudaPrecision prec_precondition
quda::mgarray< bool > mg_eig_use_normop
quda::mgarray< bool > mg_eig_use_eigen_qr
QudaReconstructType link_recon_eigensolver
quda::mgarray< QudaPrecision > mg_eig_save_prec
char eig_arpack_logfile[256]
void add_su3_option_group(std::shared_ptr< QUDAApp > quda_app)
quda::mgarray< double > smoother_tol
QudaContractType contract_type
double heatbath_beta_value
int heatbath_num_overrelax_per_step
quda::mgarray< QudaSolveType > smoother_solve_type
bool eig_require_convergence
quda::mgarray< bool > mg_eig_require_convergence
quda::mgarray< int > mg_eig_block_size
quda::mgarray< double > coarse_solver_tol
QudaPrecision smoother_halo_prec
std::array< int, 3 > blas_mnk
quda::mgarray< QudaInverterType > smoother_type
quda::mgarray< int > setup_maxiter_refresh
void add_comms_option_group(std::shared_ptr< QUDAApp > quda_app)
QudaBLASOperation blas_trans_a
quda::mgarray< int > mg_schwarz_cycle
std::array< int, 4 > gridsize_from_cmdline
int heatbath_warmup_steps
QudaMassNormalization normalization
QudaBLASDataType blas_data_type
quda::mgarray< std::array< int, 4 > > geo_block_size
QudaPrecision prec_sloppy
enum QudaSolveType_s QudaSolveType
enum QudaWFlowType_s QudaWFlowType
enum QudaBLASOperation_s QudaBLASOperation
enum QudaPrecision_s QudaPrecision
@ QUDA_TWISTED_CLOVER_DSLASH
@ QUDA_CLOVER_WILSON_DSLASH
@ QUDA_TWISTED_MASS_DSLASH
@ QUDA_DOMAIN_WALL_DSLASH
@ QUDA_MOBIUS_DWF_EOFA_DSLASH
@ QUDA_CLOVER_HASENBUSCH_TWIST_DSLASH
@ QUDA_DOMAIN_WALL_4D_DSLASH
@ QUDA_CUDA_FIELD_LOCATION
@ QUDA_CPU_FIELD_LOCATION
enum QudaTwistFlavorType_s QudaTwistFlavorType
@ QUDA_KAPPA_NORMALIZATION
@ QUDA_ASYMMETRIC_MASS_NORMALIZATION
@ QUDA_MASS_NORMALIZATION
enum QudaTransferType_s QudaTransferType
enum QudaBLASDataOrder_s QudaBLASDataOrder
enum QudaTboundary_s QudaTboundary
@ QUDA_RECONSTRUCT_INVALID
enum QudaDslashType_s QudaDslashType
@ QUDA_TRANSFER_COARSE_KD
@ QUDA_TRANSFER_AGGREGATE
@ QUDA_TRANSFER_OPTIMIZED_KD
enum QudaSolutionType_s QudaSolutionType
enum QudaEigSpectrumType_s QudaEigSpectrumType
enum QudaInverterType_s QudaInverterType
@ QUDA_EIG_BLK_IR_ARNOLDI
@ QUDA_EIG_BLK_TR_LANCZOS
enum QudaFieldLocation_s QudaFieldLocation
enum QudaMassNormalization_s QudaMassNormalization
enum QudaBLASDataType_s QudaBLASDataType
enum QudaExtLibType_s QudaExtLibType
enum QudaEigType_s QudaEigType
@ QUDA_MATPC_ODD_ODD_ASYMMETRIC
@ QUDA_MATPC_EVEN_EVEN_ASYMMETRIC
enum QudaMatPCType_s QudaMatPCType
enum QudaSetupType_s QudaSetupType
@ QUDA_BLAS_DATAORDER_COL
@ QUDA_BLAS_DATAORDER_ROW
@ QUDA_GMRESDR_PROJ_INVERTER
@ QUDA_INC_EIGCG_INVERTER
@ QUDA_BICGSTABL_INVERTER
@ QUDA_GMRESDR_SH_INVERTER
@ QUDA_MPBICGSTAB_INVERTER
enum QudaMemoryType_s QudaMemoryType
enum QudaReconstructType_s QudaReconstructType
@ QUDA_MATPC_DAG_SOLUTION
@ QUDA_MATDAG_MAT_SOLUTION
@ QUDA_MATPCDAG_MATPC_SOLUTION
@ QUDA_MULTIPLICATIVE_SCHWARZ
enum QudaCABasis_s QudaCABasis
enum QudaContractType_s QudaContractType
enum QudaSchwarzType_s QudaSchwarzType
enum QudaVerbosity_s QudaVerbosity
@ QUDA_TWIST_NONDEG_DOUBLET
@ QUDA_CONTRACT_TYPE_OPEN
@ QUDA_WFLOW_TYPE_SYMANZIK
std::array< T, QUDA_MAX_MG_LEVEL > mgarray
internal::ParamGenerator< T > Range(T start, T end, IncrementT step)