24 #define MAX(a,b) ((a)>(b)?(a):(b)) 84 void init(
int argc,
char **argv) {
97 errorQuda(
"Asqtad not supported. Please try staggered_dslash_test instead");
137 for(
int k = 0; k <
Lsdim; k++)
157 errorQuda(
"Gauge and spinor CPU precisions must match");
164 #ifndef MULTI_GPU // free parameter for single GPU 166 #else // must be this one c/b face for multi gpu 171 int pad_size =
MAX(x_face_size, y_face_size);
172 pad_size =
MAX(pad_size, z_face_size);
173 pad_size =
MAX(pad_size, t_face_size);
337 else printfQuda(
"Sending clover field to GPU\n");
391 printfQuda(
"Source: CPU = %e, CUDA = %e\n", cpu_norm, cuda_norm);
456 DslashTime() : event_time(0.0), cpu_time(0.0), cpu_min(DBL_MAX), cpu_max(0.0) {}
466 cudaEventCreate(&
start);
467 cudaEventCreate(&
end);
470 cudaEventRecord(
start, 0);
474 gettimeofday(&tstart, NULL);
613 gettimeofday(&tstop, NULL);
616 double elapsed = ds + 0.000001*dus;
626 cudaEventRecord(
end, 0);
627 cudaEventSynchronize(
end);
629 cudaEventElapsedTime(&runTime,
start,
end);
630 cudaEventDestroy(
start);
631 cudaEventDestroy(
end);
636 cudaError_t stat = cudaGetLastError();
637 if (stat != cudaSuccess)
638 printfQuda(
"with ERROR: %s\n", cudaGetErrorString(stat));
646 printfQuda(
"Calculating reference implementation...");
706 tm_dslash(
spinorRef->
V(),
hostGauge,
spinor->
V(),
inv_param.
kappa,
inv_param.
mu,
inv_param.
twist_flavor,
parity,
inv_param.
matpc_type,
dagger,
inv_param.
cpu_prec,
gauge_param);
712 void *ref2 = (
char*)ref1 + tm_offset*
cpu_prec;
715 void *flv2 = (
char*)flv1 + tm_offset*
cpu_prec;
729 void *ref2 = (
char*)ref1 + tm_offset*
cpu_prec;
732 void *flv2 = (
char*)flv1 + tm_offset*
cpu_prec;
734 tm_ndeg_matpc(ref1, ref2,
hostGauge, flv1, flv2,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
inv_param.
matpc_type,
dagger,
inv_param.
cpu_prec,
gauge_param);
745 void *oddOut = (
char*)evenOut + tm_offset*
cpu_prec;
748 void *oddIn = (
char*)evenIn + tm_offset*
cpu_prec;
750 tm_ndeg_mat(evenOut, oddOut,
hostGauge, evenIn, oddIn,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
dagger,
inv_param.
cpu_prec,
gauge_param);
765 void *ref2 = (
char*)ref1 + tm_offset*
cpu_prec;
768 void *flv2 = (
char*)flv1 + tm_offset*
cpu_prec;
773 tm_ndeg_matpc(
tmp1,
tmp2,
hostGauge, flv1, flv2,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
inv_param.
matpc_type,
dagger,
inv_param.
cpu_prec,
gauge_param);
774 tm_ndeg_matpc(ref1, ref2,
hostGauge,
tmp1,
tmp2,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
inv_param.
matpc_type,
not_dagger,
inv_param.
cpu_prec,
gauge_param);
789 void *oddOut = (
char*)evenOut + tm_offset*
cpu_prec;
792 void *oddIn = (
char*)evenIn + tm_offset*
cpu_prec;
795 void *oddTmp = (
char*)evenTmp + tm_offset*
cpu_prec;
797 tm_ndeg_mat(evenTmp, oddTmp,
hostGauge, evenIn, oddIn,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
dagger,
inv_param.
cpu_prec,
gauge_param);
798 tm_ndeg_mat(evenOut, oddOut,
hostGauge, evenTmp, oddTmp,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
not_dagger,
inv_param.
cpu_prec,
gauge_param);
809 tmc_dslash(
spinorRef->
V(),
hostGauge,
spinor->
V(),
hostClover,
hostCloverInv,
inv_param.
kappa,
inv_param.
mu,
inv_param.
twist_flavor,
parity,
inv_param.
matpc_type,
dagger,
inv_param.
cpu_prec,
gauge_param);
815 tmc_matpc(
spinorRef->
V(),
hostGauge,
spinor->
V(),
hostClover,
hostCloverInv,
inv_param.
kappa,
inv_param.
mu,
inv_param.
twist_flavor,
inv_param.
matpc_type,
dagger,
inv_param.
cpu_prec,
gauge_param);
864 printf(
"Test type not supported for domain wall\n");
868 double *kappa_5 = (
double*)
malloc(
Ls*
sizeof(
double));
869 for(
int xs = 0; xs <
Ls ; xs++)
890 printf(
"Test type not supported for domain wall\n");
895 double *kappa_b, *kappa_c, *kappa_5, *kappa_mdwf;
899 kappa_mdwf = (
double*)
malloc(
Lsdim*
sizeof(
double));
900 for(
int xs = 0 ; xs <
Lsdim ; xs++)
904 kappa_5[xs] = 0.5*kappa_b[xs]/kappa_c[xs];
905 kappa_mdwf[xs] = -kappa_5[xs];
915 mdw_dslash_4_pre(
spinorRef->
V(),
hostGauge,
spinor->
V(),
parity,
dagger,
gauge_param.
cpu_prec,
gauge_param,
inv_param.
mass,
inv_param.
b_5,
inv_param.
c_5,
true);
921 mdw_matpc(
spinorRef->
V(),
hostGauge,
spinor->
V(), kappa_b, kappa_c,
inv_param.
matpc_type,
dagger,
gauge_param.
cpu_prec,
gauge_param,
inv_param.
mass,
inv_param.
b_5,
inv_param.
c_5);
924 mdw_matpc(
spinorTmp->
V(),
hostGauge,
spinor->
V(), kappa_b, kappa_c,
inv_param.
matpc_type,
dagger,
gauge_param.
cpu_prec,
gauge_param,
inv_param.
mass,
inv_param.
b_5,
inv_param.
c_5);
925 mdw_matpc(
spinorRef->
V(),
hostGauge,
spinorTmp->
V(), kappa_b, kappa_c,
inv_param.
matpc_type,
not_dagger,
gauge_param.
cpu_prec,
gauge_param,
inv_param.
mass,
inv_param.
b_5,
inv_param.
c_5);
929 printf(
"Test type not supported for domain wall\n");
949 printfQuda(
"prec recon test_type matpc_type dagger S_dim T_dimension Ls_dimension dslash_type niter\n");
950 printfQuda(
"%6s %2s %d %12s %d %3d/%3d/%3d %3d %2d %14s %d\n",
965 extern void usage(
char**);
971 ASSERT_LE(deviation,
tol) <<
"CPU and CUDA implementations do not agree";
974 int main(
int argc,
char **argv)
977 ::testing::InitGoogleTest(&argc, argv);
980 for (
int i =1;
i < argc;
i++) {
985 fprintf(stderr,
"ERROR: Invalid option:%s\n", argv[
i]);
996 printfQuda(
"\nSpinor mem: %.3f GiB\n", spinorGiB);
1001 for (
int i=0;
i<attempts;
i++) {
1017 unsigned long long flops = 0;
1021 printfQuda(
"Effective halo bi-directional bandwidth (GB/s) GPU = %f ( CPU = %f, min = %f , max = %f ) for aggregate message size %lu bytes\n",
1030 printfQuda(
"Results: CPU = %f, CUDA=%f, CPU-CUDA = %f\n", norm2_cpu, norm2_cuda, norm2_cpu_cuda);
1032 printfQuda(
"Result: CPU = %f, CPU-QUDA = %f\n", norm2_cpu, norm2_cpu_cuda);
1036 test_rc = RUN_ALL_TESTS();
QudaGaugeParam gauge_param
cudaColorSpinorField * cudaSpinorOut
int dimPartitioned(int dim)
QudaDiracFieldOrder dirac_order
QudaReconstructType reconstruct_sloppy
double b_5[QUDA_MAX_DWF_LS]
void dw_4d_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void dw_dslash_5_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, bool zero_initialize)
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
cpuColorSpinorField * spinorRef
enum QudaPrecision_s QudaPrecision
void Dslash5(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
void Dslash5(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
void dw_setDims(int *X, const int L5)
cpuColorSpinorField * spinorTmp
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_dslash(void *res, void **gaugeFull, void *spinorField, double kappa, double mu, QudaTwistFlavorType flavor, int oddBit, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void MatDagMatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
double norm2(const ColorSpinorField &a)
QudaDslashType dslash_type
double c_5[QUDA_MAX_DWF_LS]
int return_clover_inverse
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
void tmc_dslash(void *out, void **gauge, void *in, void *clover, void *cInv, double kappa, double mu, QudaTwistFlavorType flavor, int parity, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam ¶m)
DiracMobiusPC * dirac_mdwf
int process_command_line_option(int argc, char **argv, int *idx)
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
cudaColorSpinorField * tmp1
void tm_ndeg_mat(void *evenOut, void *oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void clover_matpc(void *out, void **gauge, void *clover, void *clover_inv, void *in, double kappa, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
const char * get_matpc_str(QudaMatPCType type)
void dw_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaGaugeFieldOrder gauge_order
void dw_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void tmc_mat(void *out, void **gauge, void *clover, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
cpuColorSpinorField * spinor
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
const char * get_prec_str(QudaPrecision prec)
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
void M(ColorSpinorField &out, const ColorSpinorField &in) const
unsigned long long Flops() const
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const =0
QudaSiteSubset siteSubset
void exit(int) __attribute__((noreturn))
QudaPrecision clover_cuda_prec_sloppy
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity)
QudaFieldLocation input_location
int gridsize_from_cmdline[]
__darwin_suseconds_t tv_usec
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
QudaSolutionType solution_type
else return(__swbuf(_c, _p))
int main(int argc, char **argv)
void dslashQuda_4dpc(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int test_type)
int strcmp(const char *__s1, const char *__s2)
QudaPrecision clover_cuda_prec
void Dslash4(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
void dw_matdagmat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
DslashTime dslashCUDA(int niter)
void initQuda(int device)
void dw_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
cudaColorSpinorField * tmp2
QudaFieldLocation output_location
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
QudaPrecision clover_cuda_prec_precondition
int printf(const char *,...) __attribute__((__format__(__printf__
void mdw_dslash_5(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *kappa, bool zero_initialize)
QudaFieldOrder fieldOrder
void Dslash5inv(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const double &kappa5) const
void setSpinorSiteSize(int n)
QudaInvertParam newQudaInvertParam(void)
void mdw_dslash_4_pre(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *b5, double *c5, bool zero_initialize)
const char * get_recon_str(QudaReconstructType recon)
QudaCloverFieldOrder clover_order
enum QudaMatPCType_s QudaMatPCType
QudaGammaBasis gammaBasis
DiracDomainWall4DPC * dirac_4dpc
void mdw_matpc(void *out, void **gauge, void *in, double *kappa_b, double *kappa_c, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *b5, double *c5)
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
QudaGammaBasis gamma_basis
QudaDslashType dslash_type
QudaPrecision cuda_prec_sloppy
void Dslash4pre(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
const char * get_dslash_str(QudaDslashType type)
void clover_mat(void *out, void **gauge, void *clover, void *in, double kappa, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_ndeg_matpc(void *outEven1, void *outEven2, void **gauge, void *inEven1, void *inEven2, double kappa, double mu, double epsilon, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
enum QudaDagType_s QudaDagType
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
int fprintf(FILE *, const char *,...) __attribute__((__format__(__printf__
QudaTwistFlavorType twist_flavor
void * memcpy(void *__dst, const void *__src, size_t __n)
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
void clover_dslash(void *out, void **gauge, void *clover, void *in, int parity, int dagger, QudaPrecision precision, QudaGaugeParam ¶m)
int compute_clover_inverse
void Dslash4(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
void tm_ndeg_dslash(void *res1, void *res2, void **gauge, void *spinorField1, void *spinorField2, double kappa, double mu, double epsilon, int oddBit, int daggerBit, QudaMatPCType matpc_type, QudaPrecision precision, QudaGaugeParam &gauge_param)
void construct_clover_field(void *clover, double norm, double diag, QudaPrecision precision)
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const =0
QudaInvertParam inv_param
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
cpuColorSpinorField * spinorOut
void tmc_matpc(void *out, void **gauge, void *in, void *clover, void *cInv, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
QudaTwistFlavorType twist_flavor
void dslash_4_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaTwistFlavorType twistFlavor
void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
enum QudaDslashType_s QudaDslashType
void setKernelPackT(bool pack)
QudaReconstructType link_recon
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void init(int argc, char **argv)
enum QudaVerbosity_s QudaVerbosity
void wil_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
static Dirac * create(const DiracParam ¶m)
void Dslash5inv(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
void read_gauge_field(const char *filename, void *gauge[], QudaPrecision prec, const int *X, int argc, char *argv[])
static __inline__ size_t size_t d
virtual void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const =0
size_t GhostBytes() const
QudaPrecision clover_cpu_prec
cudaColorSpinorField * cudaSpinor
void M(ColorSpinorField &out, const ColorSpinorField &in) const
void initComms(int argc, char **argv, const int *commDims)
void setVerbosity(const QudaVerbosity verbosity)
void dslashQuda_mdwf(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int test_type)
void dslash_5_inv(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *kappa)
QudaGaugeParam newQudaGaugeParam(void)
enum QudaTwistFlavorType_s QudaTwistFlavorType