24 #define MAX(a,b) ((a)>(b)?(a):(b)) 79 extern void usage(
char**);
83 const char *
prec_str[] = {
"half",
"single",
"double"};
103 errorQuda(
"Asqtad not supported. Please try staggered_dslash_test instead");
143 for(
int k = 0; k <
Lsdim; k++)
163 errorQuda(
"Gauge and spinor CPU precisions must match");
170 #ifndef MULTI_GPU // free parameter for single GPU 172 #else // must be this one c/b face for multi gpu 177 int pad_size =
MAX(x_face_size, y_face_size);
178 pad_size =
MAX(pad_size, z_face_size);
179 pad_size =
MAX(pad_size, t_face_size);
345 else printfQuda(
"Sending clover field to GPU\n");
466 DslashTime() : event_time(0.0), cpu_time(0.0), cpu_min(DBL_MAX), cpu_max(0.0) {}
476 cudaEventCreate(&
start);
477 cudaEventCreate(&
end);
480 cudaEventRecord(
start, 0);
484 gettimeofday(&tstart, NULL);
623 gettimeofday(&tstop, NULL);
626 double elapsed = ds + 0.000001*dus;
636 cudaEventRecord(
end, 0);
637 cudaEventSynchronize(
end);
639 cudaEventElapsedTime(&runTime,
start,
end);
640 cudaEventDestroy(
start);
641 cudaEventDestroy(
end);
646 cudaError_t stat = cudaGetLastError();
647 if (stat != cudaSuccess)
648 printfQuda(
"with ERROR: %s\n", cudaGetErrorString(stat));
716 tm_dslash(
spinorRef->
V(),
hostGauge,
spinor->
V(),
inv_param.
kappa,
inv_param.
mu,
inv_param.
twist_flavor,
parity,
inv_param.
matpc_type,
dagger,
inv_param.
cpu_prec,
gauge_param);
722 void *ref2 = (
char*)ref1 + tm_offset*
cpu_prec;
725 void *flv2 = (
char*)flv1 + tm_offset*
cpu_prec;
739 void *ref2 = (
char*)ref1 + tm_offset*
cpu_prec;
742 void *flv2 = (
char*)flv1 + tm_offset*
cpu_prec;
744 tm_ndeg_matpc(ref1, ref2,
hostGauge, flv1, flv2,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
inv_param.
matpc_type,
dagger,
inv_param.
cpu_prec,
gauge_param);
755 void *oddOut = (
char*)evenOut + tm_offset*
cpu_prec;
758 void *oddIn = (
char*)evenIn + tm_offset*
cpu_prec;
760 tm_ndeg_mat(evenOut, oddOut,
hostGauge, evenIn, oddIn,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
dagger,
inv_param.
cpu_prec,
gauge_param);
775 void *ref2 = (
char*)ref1 + tm_offset*
cpu_prec;
778 void *flv2 = (
char*)flv1 + tm_offset*
cpu_prec;
783 tm_ndeg_matpc(
tmp1,
tmp2,
hostGauge, flv1, flv2,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
inv_param.
matpc_type,
dagger,
inv_param.
cpu_prec,
gauge_param);
784 tm_ndeg_matpc(ref1, ref2,
hostGauge,
tmp1,
tmp2,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
inv_param.
matpc_type,
not_dagger,
inv_param.
cpu_prec,
gauge_param);
799 void *oddOut = (
char*)evenOut + tm_offset*
cpu_prec;
802 void *oddIn = (
char*)evenIn + tm_offset*
cpu_prec;
805 void *oddTmp = (
char*)evenTmp + tm_offset*
cpu_prec;
807 tm_ndeg_mat(evenTmp, oddTmp,
hostGauge, evenIn, oddIn,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
dagger,
inv_param.
cpu_prec,
gauge_param);
808 tm_ndeg_mat(evenOut, oddOut,
hostGauge, evenTmp, oddTmp,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
not_dagger,
inv_param.
cpu_prec,
gauge_param);
819 tmc_dslash(
spinorRef->
V(),
hostGauge,
spinor->
V(),
hostClover,
hostCloverInv,
inv_param.
kappa,
inv_param.
mu,
inv_param.
twist_flavor,
parity,
inv_param.
matpc_type,
dagger,
inv_param.
cpu_prec,
gauge_param);
825 tmc_matpc(
spinorRef->
V(),
hostGauge,
spinor->
V(),
hostClover,
hostCloverInv,
inv_param.
kappa,
inv_param.
mu,
inv_param.
twist_flavor,
inv_param.
matpc_type,
dagger,
inv_param.
cpu_prec,
gauge_param);
874 printf(
"Test type not supported for domain wall\n");
878 double *kappa_5 = (
double*)
malloc(
Ls*
sizeof(
double));
879 for(
int xs = 0; xs <
Ls ; xs++)
900 printf(
"Test type not supported for domain wall\n");
905 double *kappa_b, *kappa_c, *kappa_5, *kappa_mdwf;
909 kappa_mdwf = (
double*)
malloc(
Lsdim*
sizeof(
double));
910 for(
int xs = 0 ; xs <
Lsdim ; xs++)
914 kappa_5[xs] = 0.5*kappa_b[xs]/kappa_c[xs];
915 kappa_mdwf[xs] = -kappa_5[xs];
925 mdw_dslash_4_pre(
spinorRef->
V(),
hostGauge,
spinor->
V(),
parity,
dagger,
gauge_param.
cpu_prec,
gauge_param,
inv_param.
mass,
inv_param.
b_5,
inv_param.
c_5,
true);
931 mdw_matpc(
spinorRef->
V(),
hostGauge,
spinor->
V(), kappa_b, kappa_c,
inv_param.
matpc_type,
dagger,
gauge_param.
cpu_prec,
gauge_param,
inv_param.
mass,
inv_param.
b_5,
inv_param.
c_5);
934 mdw_matpc(
spinorTmp->
V(),
hostGauge,
spinor->
V(), kappa_b, kappa_c,
inv_param.
matpc_type,
dagger,
gauge_param.
cpu_prec,
gauge_param,
inv_param.
mass,
inv_param.
b_5,
inv_param.
c_5);
935 mdw_matpc(
spinorRef->
V(),
hostGauge,
spinorTmp->
V(), kappa_b, kappa_c,
inv_param.
matpc_type,
not_dagger,
gauge_param.
cpu_prec,
gauge_param,
inv_param.
mass,
inv_param.
b_5,
inv_param.
c_5);
939 printf(
"Test type not supported for domain wall\n");
960 printfQuda(
"prec recon test_type matpc_type dagger S_dim T_dimension Ls_dimension dslash_type niter\n");
961 printfQuda(
"%6s %2s %d %12s %d %3d/%3d/%3d %3d %2d %14s %d\n",
978 using ::testing::TestWithParam;
979 using ::testing::Bool;
980 using ::testing::Values;
981 using ::testing::Range;
982 using ::testing::Combine;
984 class DslashTest :
public ::testing::TestWithParam<::testing::tuple<int, int, int>> {
986 ::testing::tuple<int, int, int>
param;
991 int prec = ::testing::get<0>(GetParam());
995 int value = ::testing::get<2>(GetParam());
996 for(
int j=0; j < 4;j++){
997 if (
value & (1 << j)){
1036 printfQuda(
"Results: CPU = %f, CUDA=%f, CPU-CUDA = %f\n", norm2_cpu, norm2_cuda, norm2_cpu_cuda);
1038 printfQuda(
"Result: CPU = %f, CPU-QUDA = %f\n", norm2_cpu, norm2_cpu_cuda);
1043 ASSERT_LE(deviation,
tol) <<
"CPU and CUDA implementations do not agree";
1053 printfQuda(
"%fus per kernel call\n", 1e6*dslash_time.event_time /
niter);
1055 unsigned long long flops = 0;
1057 double gflops=1.0e-9*
flops/dslash_time.event_time;
1059 RecordProperty(
"Gflops", std::to_string(gflops));
1062 RecordProperty(
"Halo_bidirectitonal_BW_CPU_min", 1.0
e-9*2*
cudaSpinor->
GhostBytes()/dslash_time.cpu_max);
1063 RecordProperty(
"Halo_bidirectitonal_BW_CPU_max", 1.0
e-9*2*
cudaSpinor->
GhostBytes()/dslash_time.cpu_min);
1066 printfQuda(
"Effective halo bi-directional bandwidth (GB/s) GPU = %f ( CPU = %f, min = %f , max = %f ) for aggregate message size %lu bytes\n",
1075 ::testing::InitGoogleTest(&argc, argv);
1078 for (
int i =1;
i < argc;
i++) {
1083 fprintf(stderr,
"ERROR: Invalid option:%s\n", argv[
i]);
1089 test_rc = RUN_ALL_TESTS();
1096 const int prec = ::testing::get<0>(
param.param);
1097 const int recon = ::testing::get<1>(
param.param);
1098 const int part = ::testing::get<2>(
param.param);
1099 std::stringstream ss;
1103 ss <<
"_r" << recon;
1104 ss <<
"_partition" << part;
cudaColorSpinorField * tmp2
QudaDiracFieldOrder dirac_order
QudaReconstructType reconstruct_sloppy
int main(int argc, char **argv)
cudaColorSpinorField * tmp1
double b_5[QUDA_MAX_DWF_LS]
void dw_4d_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void dw_dslash_5_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, bool zero_initialize)
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
int gridsize_from_cmdline[]
enum QudaPrecision_s QudaPrecision
DiracMobiusPC * dirac_mdwf
void Dslash5(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
void Dslash5(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
void dw_setDims(int *X, const int L5)
cpuColorSpinorField * spinorRef
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_dslash(void *res, void **gaugeFull, void *spinorField, double kappa, double mu, QudaTwistFlavorType flavor, int oddBit, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void MatDagMatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
double norm2(const ColorSpinorField &a)
QudaDslashType dslash_type
double c_5[QUDA_MAX_DWF_LS]
int return_clover_inverse
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
void tmc_dslash(void *out, void **gauge, void *in, void *clover, void *cInv, double kappa, double mu, QudaTwistFlavorType flavor, int parity, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam ¶m)
void commDimPartitionedSet(int dir)
int process_command_line_option(int argc, char **argv, int *idx)
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
void tm_ndeg_mat(void *evenOut, void *oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void clover_matpc(void *out, void **gauge, void *clover, void *clover_inv, void *in, double kappa, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
const char * get_matpc_str(QudaMatPCType type)
void dw_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaGaugeParam gauge_param
INSTANTIATE_TEST_CASE_P(QUDA, DslashTest, Combine(Range(0, 3), ::testing::Values(QUDA_RECONSTRUCT_NO, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_8), ::testing::Values(0)), getdslashtestname)
QudaGaugeFieldOrder gauge_order
void dw_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void tmc_mat(void *out, void **gauge, void *clover, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
const char * get_prec_str(QudaPrecision prec)
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
void M(ColorSpinorField &out, const ColorSpinorField &in) const
unsigned long long Flops() const
QudaTwistFlavorType twist_flavor
QudaDslashType dslash_type
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const =0
QudaSiteSubset siteSubset
void exit(int) __attribute__((noreturn))
QudaPrecision clover_cuda_prec_sloppy
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity)
QudaFieldLocation input_location
__darwin_suseconds_t tv_usec
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
QudaSolutionType solution_type
else return(__swbuf(_c, _p))
void dslashQuda_4dpc(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int test_type)
QudaPrecision clover_cuda_prec
void Dslash4(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
void dw_matdagmat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void initQuda(int device)
void dw_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaFieldLocation output_location
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
QudaPrecision clover_cuda_prec_precondition
double benchmark(int kernel, const int niter)
int printf(const char *,...) __attribute__((__format__(__printf__
void mdw_dslash_5(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *kappa, bool zero_initialize)
QudaFieldOrder fieldOrder
QudaReconstructType link_recon
void Dslash5inv(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const double &kappa5) const
void setSpinorSiteSize(int n)
QudaInvertParam newQudaInvertParam(void)
void mdw_dslash_4_pre(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *b5, double *c5, bool zero_initialize)
const char * get_recon_str(QudaReconstructType recon)
QudaCloverFieldOrder clover_order
enum QudaMatPCType_s QudaMatPCType
QudaGammaBasis gammaBasis
void mdw_matpc(void *out, void **gauge, void *in, double *kappa_b, double *kappa_c, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *b5, double *c5)
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
QudaGammaBasis gamma_basis
QudaPrecision cuda_prec_sloppy
void Dslash4pre(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
const char * get_dslash_str(QudaDslashType type)
void display_test_info(int precision, QudaReconstructType link_recon)
std::string getdslashtestname(testing::TestParamInfo<::testing::tuple< int, int, int >> param)
void clover_mat(void *out, void **gauge, void *clover, void *in, double kappa, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_ndeg_matpc(void *outEven1, void *outEven2, void **gauge, void *inEven1, void *inEven2, double kappa, double mu, double epsilon, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
enum QudaDagType_s QudaDagType
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
int fprintf(FILE *, const char *,...) __attribute__((__format__(__printf__
void * memcpy(void *__dst, const void *__src, size_t __n)
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
void clover_dslash(void *out, void **gauge, void *clover, void *in, int parity, int dagger, QudaPrecision precision, QudaGaugeParam ¶m)
int compute_clover_inverse
void Dslash4(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
::testing::tuple< int, int, int > param
void tm_ndeg_dslash(void *res1, void *res2, void **gauge, void *spinorField1, void *spinorField2, double kappa, double mu, double epsilon, int oddBit, int daggerBit, QudaMatPCType matpc_type, QudaPrecision precision, QudaGaugeParam &gauge_param)
void construct_clover_field(void *clover, double norm, double diag, QudaPrecision precision)
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const =0
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
DslashTime dslashCUDA(int niter)
enum QudaReconstructType_s QudaReconstructType
void commDimPartitionedReset()
Reset the comm dim partioned array to zero,.
Main header file for the QUDA library.
void tmc_matpc(void *out, void **gauge, void *in, void *clover, void *cInv, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
QudaTwistFlavorType twist_flavor
void dslash_4_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaTwistFlavorType twistFlavor
void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
enum QudaDslashType_s QudaDslashType
void setKernelPackT(bool pack)
static void SetUpTestCase()
QudaInvertParam inv_param
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
static void TearDownTestCase()
enum QudaVerbosity_s QudaVerbosity
void wil_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
cpuColorSpinorField * spinorTmp
static Dirac * create(const DiracParam ¶m)
cudaColorSpinorField * cudaSpinorOut
void Dslash5inv(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
static __inline__ size_t size_t d
virtual void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const =0
cudaColorSpinorField * cudaSpinor
size_t GhostBytes() const
cpuColorSpinorField * spinor
QudaPrecision clover_cpu_prec
void M(ColorSpinorField &out, const ColorSpinorField &in) const
void initComms(int argc, char **argv, const int *commDims)
void init(int precision, QudaReconstructType link_recon)
void setVerbosity(const QudaVerbosity verbosity)
void dslashQuda_mdwf(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int test_type)
void dslash_5_inv(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *kappa)
void updateR()
update the radius for halos.
QudaGaugeParam newQudaGaugeParam(void)
cpuColorSpinorField * spinorOut
enum QudaTwistFlavorType_s QudaTwistFlavorType
DiracDomainWall4DPC * dirac_4dpc
TEST_P(DslashTest, verify)