19 #define MAX(a,b) ((a)>(b)?(a):(b)) 78 extern void usage(
char** );
87 printfQuda(
"prec prec_sloppy multishift matpc_type recon recon_sloppy S_dimension T_dimension Ls_dimension dslash_type normalization\n");
88 printfQuda(
"%6s %6s %d %12s %2s %2s %3d/%3d/%3d %3d %2d %14s %8s\n",
107 int main(
int argc,
char **argv)
114 for (
int i = 1; i < argc; i++){
118 printfQuda(
"ERROR: Invalid option:%s\n", argv[i]);
157 gauge_param.
X[0] =
xdim;
158 gauge_param.
X[1] =
ydim;
159 gauge_param.
X[2] =
zdim;
160 gauge_param.
X[3] =
tdim;
199 kappa5 = 0.5/(5 + inv_param.
m5);
201 for(
int k = 0; k <
Lsdim; k++)
205 inv_param.
b_5[k] = 1.452;
206 inv_param.
c_5[k] = 0.452;
212 double offset[12] = {0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12};
235 errorQuda(
"qudaInvert: requesting zero residual\n");
266 inv_param.
omega = 1.0;
285 int x_face_size = gauge_param.
X[1]*gauge_param.
X[2]*gauge_param.
X[3]/2;
286 int y_face_size = gauge_param.
X[0]*gauge_param.
X[2]*gauge_param.
X[3]/2;
287 int z_face_size = gauge_param.
X[0]*gauge_param.
X[1]*gauge_param.
X[3]/2;
288 int t_face_size = gauge_param.
X[0]*gauge_param.
X[1]*gauge_param.
X[2]/2;
289 int pad_size =
MAX(x_face_size, y_face_size);
290 pad_size =
MAX(pad_size, z_face_size);
291 pad_size =
MAX(pad_size, t_face_size);
292 gauge_param.
ga_pad = pad_size;
324 void *gauge[4], *clover=0, *clover_inv=0;
326 for (
int dir = 0; dir < 4; dir++) {
361 void *
spinorOut = NULL, **spinorOutMulti = NULL;
363 spinorOutMulti = (
void**)malloc(inv_param.
num_offset*
sizeof(
void *));
379 printfQuda(
"Computed plaquette is %e (spatial = %e, temporal = %e)\n", plaq[0], plaq[1], plaq[2]);
385 double *time =
new double[
Nsrc];
386 double *gflops =
new double[
Nsrc];
390 for (
int i = 0; i <
Nsrc; i++) {
400 time[i] = inv_param.
secs;
401 gflops[i] = inv_param.
gflops / inv_param.
secs;
409 auto mean_time = 0.0;
410 auto mean_time2 = 0.0;
411 auto mean_gflops = 0.0;
412 auto mean_gflops2 = 0.0;
413 for (
int i = 0; i <
Nsrc; i++) {
414 mean_time += time[i];
415 mean_time2 += time[i] * time[i];
416 mean_gflops += gflops[i];
417 mean_gflops2 += gflops[i] * gflops[i];
422 auto stddev_time = Nsrc > 1 ?
sqrt((Nsrc / ((
double)Nsrc - 1.0)) * (mean_time2 - mean_time * mean_time)) : std::numeric_limits<double>::infinity();
424 mean_gflops2 /=
Nsrc;
425 auto stddev_gflops = Nsrc > 1 ?
sqrt((Nsrc / ((
double)Nsrc - 1.0)) * (mean_gflops2 - mean_gflops * mean_gflops)) : std::numeric_limits<double>::infinity();
426 printfQuda(
"%d solves, with mean solve time %g (stddev = %g), mean GFLOPS %g (stddev = %g)\n", Nsrc, mean_time,
427 stddev_time, mean_gflops, stddev_gflops);
434 errorQuda(
"Mass normalization not supported for multi-shift solver in invert_test");
446 void *out0 = spinorCheck;
447 void *out1 = (
char*)out0 + tm_offset*cpu_prec;
450 void *
tmp1 = (
char*)tmp0 + tm_offset*cpu_prec;
452 void *in0 = spinorOutMulti[i];
453 void *in1 = (
char*)in0 + tm_offset*cpu_prec;
455 tm_ndeg_matpc(tmp0, tmp1, gauge, in0, in1, inv_param.
kappa, inv_param.
mu, inv_param.
epsilon, inv_param.
matpc_type, 0, inv_param.
cpu_prec, gauge_param);
456 tm_ndeg_matpc(out0, out1, gauge, tmp0, tmp1, inv_param.
kappa, inv_param.
mu, inv_param.
epsilon, inv_param.
matpc_type, 1, inv_param.
cpu_prec, gauge_param);
465 errorQuda(
"Twisted mass solution type not supported");
466 tmc_matpc(spinorTmp, gauge, spinorOutMulti[i], clover, clover_inv, inv_param.
kappa, inv_param.
mu,
468 tmc_matpc(spinorCheck, gauge, spinorTmp, clover, clover_inv, inv_param.
kappa, inv_param.
mu,
481 printfQuda(
"Domain wall not supported for multi-shift\n");
485 axpy(inv_param.
offset[i], spinorOutMulti[i], spinorCheck,
Vh*spinorSiteSize, inv_param.
cpu_prec);
486 mxpy(spinorIn, spinorCheck,
Vh*spinorSiteSize, inv_param.
cpu_prec);
489 double l2r =
sqrt(nrm2 / src2);
491 printfQuda(
"Shift %d residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g\n",
506 void *evenOut = spinorCheck;
507 void *oddOut = (
char*)evenOut + tm_offset*cpu_prec;
510 void *oddIn = (
char*)evenIn + tm_offset*cpu_prec;
512 tm_ndeg_mat(evenOut, oddOut, gauge, evenIn, oddIn, inv_param.
kappa, inv_param.
mu, inv_param.
epsilon, 0, inv_param.
cpu_prec, gauge_param);
526 double _Complex *kappa_b = (
double _Complex *)malloc(
Lsdim *
sizeof(
double _Complex));
527 double _Complex *kappa_c = (
double _Complex *)malloc(
Lsdim *
sizeof(
double _Complex));
528 for(
int xs = 0 ; xs <
Lsdim ; xs++)
530 kappa_b[xs] = 1.0/(2*(inv_param.
b_5[xs]*(4.0 + inv_param.
m5) + 1.0));
531 kappa_c[xs] = 1.0/(2*(inv_param.
c_5[xs]*(4.0 + inv_param.
m5) - 1.0));
533 mdw_mat(spinorCheck, gauge, spinorOut, kappa_b, kappa_c, inv_param.
dagger, inv_param.
cpu_prec, gauge_param, inv_param.
mass, inv_param.
b_5, inv_param.
c_5);
556 void *out0 = spinorCheck;
557 void *out1 = (
char*)out0 + tm_offset*cpu_prec;
560 void *in1 = (
char*)in0 + tm_offset*cpu_prec;
562 tm_ndeg_matpc(out0, out1, gauge, in0, in1, inv_param.
kappa, inv_param.
mu, inv_param.
epsilon, inv_param.
matpc_type, 0, inv_param.
cpu_prec, gauge_param);
569 errorQuda(
"Twisted mass solution type not supported");
570 tmc_matpc(spinorCheck, gauge, spinorOut, clover, clover_inv, inv_param.
kappa, inv_param.
mu,
583 double _Complex *kappa_b = (
double _Complex *)malloc(
Lsdim *
sizeof(
double _Complex));
584 double _Complex *kappa_c = (
double _Complex *)malloc(
Lsdim *
sizeof(
double _Complex));
585 for(
int xs = 0 ; xs <
Lsdim ; xs++)
587 kappa_b[xs] = 1.0/(2*(inv_param.
b_5[xs]*(4.0 + inv_param.
m5) + 1.0));
588 kappa_c[xs] = 1.0/(2*(inv_param.
c_5[xs]*(4.0 + inv_param.
m5) - 1.0));
590 mdw_matpc(spinorCheck, gauge, spinorOut, kappa_b, kappa_c, inv_param.
matpc_type, 0, inv_param.
cpu_prec, gauge_param, inv_param.
mass, inv_param.
b_5, inv_param.
c_5);
617 void *out0 = spinorCheck;
618 void *out1 = (
char*)out0 + tm_offset*cpu_prec;
621 void *
tmp1 = (
char*)tmp0 + tm_offset*cpu_prec;
624 void *in1 = (
char*)in0 + tm_offset*cpu_prec;
626 tm_ndeg_matpc(tmp0, tmp1, gauge, in0, in1, inv_param.
kappa, inv_param.
mu, inv_param.
epsilon, inv_param.
matpc_type, 0, inv_param.
cpu_prec, gauge_param);
627 tm_ndeg_matpc(out0, out1, gauge, tmp0, tmp1, inv_param.
kappa, inv_param.
mu, inv_param.
epsilon, inv_param.
matpc_type, 1, inv_param.
cpu_prec, gauge_param);
636 errorQuda(
"Twisted mass solution type not supported");
637 tmc_matpc(spinorTmp, gauge, spinorOut, clover, clover_inv, inv_param.
kappa, inv_param.
mu,
639 tmc_matpc(spinorCheck, gauge, spinorTmp, clover, clover_inv, inv_param.
kappa, inv_param.
mu,
658 double _Complex *kappa_b = (
double _Complex *)malloc(
Lsdim *
sizeof(
double _Complex));
659 double _Complex *kappa_c = (
double _Complex *)malloc(
Lsdim *
sizeof(
double _Complex));
660 for(
int xs = 0 ; xs <
Lsdim ; xs++)
662 kappa_b[xs] = 1.0/(2*(inv_param.
b_5[xs]*(4.0 + inv_param.
m5) + 1.0));
663 kappa_c[xs] = 1.0/(2*(inv_param.
c_5[xs]*(4.0 + inv_param.
m5) - 1.0));
665 mdw_matpc(spinorTmp, gauge, spinorOut, kappa_b, kappa_c, inv_param.
matpc_type, 0, inv_param.
cpu_prec, gauge_param, inv_param.
mass, inv_param.
b_5, inv_param.
c_5);
666 mdw_matpc(spinorCheck, gauge, spinorTmp, kappa_b, kappa_c, inv_param.
matpc_type, 1, inv_param.
cpu_prec, gauge_param, inv_param.
mass, inv_param.
b_5, inv_param.
c_5);
674 errorQuda(
"Mass normalization not implemented");
683 double nrm2 =
norm_2(spinorCheck, vol*spinorSiteSize*inv_param.
Ls, inv_param.
cpu_prec);
684 double src2 =
norm_2(spinorIn, vol*spinorSiteSize*inv_param.
Ls, inv_param.
cpu_prec);
685 double l2r =
sqrt(nrm2 / src2);
687 printfQuda(
"Residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g\n",
701 if (clover) free(clover);
702 if (clover_inv) free(clover_inv);
705 for (
int dir = 0; dir<4; dir++) free(gauge[dir]);
int dimPartitioned(int dim)
void ax(double a, ColorSpinorField &x)
QudaDiracFieldOrder dirac_order
QudaMassNormalization mass_normalization
double tol_hq_offset[QUDA_MAX_MULTI_SHIFT]
enum QudaMassNormalization_s QudaMassNormalization
QudaReconstructType reconstruct_sloppy
void freeCloverQuda(void)
QudaTwistFlavorType twist_flavor
void mdw_matpc(void *out, void **gauge, void *in, double _Complex *kappa_b, double _Complex *kappa_c, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *b5, double _Complex *c5)
cudaColorSpinorField * tmp1
void dw_4d_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void invertMultiShiftQuda(void **_hp_x, void *_hp_b, QudaInvertParam *param)
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
QudaVerbosity verbosity_precondition
enum QudaPrecision_s QudaPrecision
double_complex c_5[QUDA_MAX_DWF_LS]
void mdw_mat(void *out, void **gauge, void *in, double _Complex *kappa_b, double _Complex *kappa_c, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *b5, double _Complex *c5)
int main(int argc, char **argv)
void dw_setDims(int *X, const int L5)
QudaSchwarzType schwarz_type
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaInverterType inv_type_precondition
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
QudaDslashType dslash_type
QudaReconstructType reconstruct_precondition
QudaInverterType inv_type
int return_clover_inverse
enum QudaSolveType_s QudaSolveType
__host__ __device__ ValueType sqrt(ValueType x)
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
QudaVerbosity mg_verbosity[QUDA_MAX_MG_LEVEL]
QudaPrecision & cuda_prec
int process_command_line_option(int argc, char **argv, int *idx)
QudaReconstructType link_recon_precondition
void tm_ndeg_mat(void *evenOut, void *oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void plaqQuda(double plaq[3])
void clover_matpc(void *out, void **gauge, void *clover, void *clover_inv, void *in, double kappa, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaPrecision & cuda_prec_refinement_sloppy
const char * get_matpc_str(QudaMatPCType type)
QudaGaugeParam gauge_param
QudaPrecision cuda_prec_refinement_sloppy
QudaPrecision clover_cuda_prec_refinement_sloppy
QudaGaugeFieldOrder gauge_order
void dw_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void tmc_mat(void *out, void **gauge, void *clover, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaReconstructType link_recon
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
const char * get_prec_str(QudaPrecision prec)
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
void construct_spinor_source(void *v, int nSpin, int nColor, QudaPrecision precision, const int *const x, quda::RNG &rng)
QudaPrecision & cuda_prec_precondition
bool alternative_reliable
QudaPrecision clover_cuda_prec_sloppy
QudaFieldLocation input_location
double true_res_hq_offset[QUDA_MAX_MULTI_SHIFT]
int solution_accumulator_pipeline
double_complex b_5[QUDA_MAX_DWF_LS]
QudaSolutionType solution_type
int use_alternative_reliable
QudaSolverNormalization solver_normalization
QudaPrecision clover_cuda_prec
QudaPrecision & cuda_prec_sloppy
void initQuda(int device)
void dw_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaSolutionType solution_type
QudaFieldLocation output_location
QudaPrecision prec_refinement_sloppy
QudaPrecision clover_cuda_prec_precondition
QudaInvertParam inv_param
QudaPrecision cuda_prec_sloppy
QudaMassNormalization normalization
void setSpinorSiteSize(int n)
const char * get_mass_normalization_str(QudaMassNormalization type)
int solution_accumulator_pipeline
double tol_offset[QUDA_MAX_MULTI_SHIFT]
double true_res_offset[QUDA_MAX_MULTI_SHIFT]
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
QudaInvertParam newQudaInvertParam(void)
const char * get_recon_str(QudaReconstructType recon)
QudaPrecision cuda_prec_precondition
QudaCloverFieldOrder clover_order
Class declaration to initialize and hold CURAND RNG states.
void dw_4d_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
enum QudaMatPCType_s QudaMatPCType
cpuColorSpinorField * spinorOut
enum QudaSolutionType_s QudaSolutionType
QudaGammaBasis gamma_basis
QudaPrecision cuda_prec_sloppy
const char * get_dslash_str(QudaDslashType type)
void clover_mat(void *out, void **gauge, void *clover, void *in, double kappa, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
double offset[QUDA_MAX_MULTI_SHIFT]
int use_sloppy_partial_accumulator
void tm_ndeg_matpc(void *outEven1, void *outEven2, void **gauge, void *inEven1, void *inEven2, double kappa, double mu, double epsilon, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaReconstructType reconstruct
QudaPrecision cuda_prec_refinement_sloppy
double norm_2(void *v, int len, QudaPrecision precision)
int compute_clover_inverse
void construct_clover_field(void *clover, double norm, double diag, QudaPrecision precision)
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaInverterType inv_type
QudaPrecision cuda_prec_precondition
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
enum QudaCABasis_s QudaCABasis
#define QUDA_MAX_MG_LEVEL
Maximum number of multi-grid levels. This number may be increased if needed.
void tmc_matpc(void *out, void **gauge, void *in, void *clover, void *cInv, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
QudaTwistFlavorType twist_flavor
enum QudaDslashType_s QudaDslashType
void mxpy(ColorSpinorField &x, ColorSpinorField &y)
QudaResidualType residual_type
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
enum QudaVerbosity_s QudaVerbosity
QudaDslashType dslash_type
QudaReconstructType link_recon_sloppy
cpuColorSpinorField * spinorTmp
QudaPrecision prec_sloppy
QudaInverterType precon_type
void initComms(int argc, char **argv, int *const commDims)
void read_gauge_field(const char *filename, void *gauge[], QudaPrecision prec, const int *X, int argc, char *argv[])
int gridsize_from_cmdline[]
QudaPrecision clover_cpu_prec
QudaPrecision prec_precondition
enum QudaInverterType_s QudaInverterType
QudaReconstructType reconstruct_refinement_sloppy
QudaGaugeParam newQudaGaugeParam(void)
QudaPreserveSource preserve_source
enum QudaTwistFlavorType_s QudaTwistFlavorType