25 #define MAX(a,b) ((a)>(b)?(a):(b))
79 void init(
int argc,
char **argv) {
92 errorQuda(
"Asqtad not supported. Please try staggered_dslash_test instead");
135 for(
int k = 0; k <
Lsdim; k++)
152 errorQuda(
"Gauge and spinor CPU precisions must match");
159 #ifndef MULTI_GPU // free parameter for single GPU
161 #else // must be this one c/b face for multi gpu
166 int pad_size =
MAX(x_face_size, y_face_size);
167 pad_size =
MAX(pad_size, z_face_size);
168 pad_size =
MAX(pad_size, t_face_size);
397 printfQuda(
"Source: CPU = %e, CUDA = %e\n", cpu_norm, cuda_norm);
447 for (
int dir = 0; dir < 4; dir++) free(
hostGauge[dir]);
459 cudaEvent_t start,
end;
460 cudaEventCreate(&start);
461 cudaEventCreate(&end);
462 cudaEventRecord(start, 0);
464 for (
int i = 0; i <
niter; i++) {
599 cudaEventRecord(end, 0);
600 cudaEventSynchronize(end);
602 cudaEventElapsedTime(&runTime, start, end);
603 cudaEventDestroy(start);
604 cudaEventDestroy(end);
606 double secs = runTime / 1000;
609 cudaError_t stat = cudaGetLastError();
610 if (stat != cudaSuccess)
611 printfQuda(
"with ERROR: %s\n", cudaGetErrorString(stat));
619 printfQuda(
"Calculating reference implementation...");
653 tm_dslash(
spinorRef->
V(),
hostGauge,
spinor->
V(),
inv_param.
kappa,
inv_param.
mu,
inv_param.
twist_flavor,
parity,
inv_param.
matpc_type,
dagger,
inv_param.
cpu_prec,
gauge_param);
659 void *ref2 =
cpu_prec ==
sizeof(double) ? (
void*)((
double*)ref1 + tm_offset): (
void*)((
float*)ref1 + tm_offset);
662 void *flv2 =
cpu_prec ==
sizeof(double) ? (
void*)((
double*)flv1 + tm_offset): (
void*)((
float*)flv1 + tm_offset);
676 void *ref2 =
cpu_prec ==
sizeof(double) ? (
void*)((
double*)ref1 + tm_offset): (
void*)((
float*)ref1 + tm_offset);
679 void *flv2 =
cpu_prec ==
sizeof(double) ? (
void*)((
double*)flv1 + tm_offset): (
void*)((
float*)flv1 + tm_offset);
681 tm_ndeg_matpc(ref1, ref2,
hostGauge, flv1, flv2,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
inv_param.
matpc_type,
dagger,
inv_param.
cpu_prec,
gauge_param);
692 void *oddOut =
cpu_prec ==
sizeof(double) ? (
void*)((
double*)evenOut + tm_offset): (
void*)((
float*)evenOut + tm_offset);
695 void *oddIn =
cpu_prec ==
sizeof(double) ? (
void*)((
double*)evenIn + tm_offset): (
void*)((
float*)evenIn + tm_offset);
697 tm_ndeg_mat(evenOut, oddOut,
hostGauge, evenIn, oddIn,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
dagger,
inv_param.
cpu_prec,
gauge_param);
712 void *ref2 =
cpu_prec ==
sizeof(double) ? (
void*)((
double*)ref1 + tm_offset): (
void*)((
float*)ref1 + tm_offset);
715 void *flv2 =
cpu_prec ==
sizeof(double) ? (
void*)((
double*)flv1 + tm_offset): (
void*)((
float*)flv1 + tm_offset);
718 void *
tmp2 =
cpu_prec ==
sizeof(double) ? (
void*)((
double*)tmp1 + tm_offset): (
void*)((
float*)tmp1 + tm_offset);
720 tm_ndeg_matpc(tmp1, tmp2,
hostGauge, flv1, flv2,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
inv_param.
matpc_type,
dagger,
inv_param.
cpu_prec,
gauge_param);
721 tm_ndeg_matpc(ref1, ref2,
hostGauge, tmp1, tmp2,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
inv_param.
matpc_type,
not_dagger,
inv_param.
cpu_prec,
gauge_param);
736 void *oddOut =
cpu_prec ==
sizeof(double) ? (
void*)((
double*)evenOut + tm_offset): (
void*)((
float*)evenOut + tm_offset);
739 void *oddIn =
cpu_prec ==
sizeof(double) ? (
void*)((
double*)evenIn + tm_offset): (
void*)((
float*)evenIn + tm_offset);
742 void *oddTmp =
cpu_prec ==
sizeof(double) ? (
void*)((
double*)evenTmp + tm_offset): (
void*)((
float*)evenTmp + tm_offset);
744 tm_ndeg_mat(evenTmp, oddTmp,
hostGauge, evenIn, oddIn,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
dagger,
inv_param.
cpu_prec,
gauge_param);
745 tm_ndeg_mat(evenOut, oddOut,
hostGauge, evenTmp, oddTmp,
inv_param.
kappa,
inv_param.
mu,
inv_param.
epsilon,
not_dagger,
inv_param.
cpu_prec,
gauge_param);
771 printf(
"Test type not supported for domain wall\n");
775 double *kappa_5 = (
double*)malloc(
Ls*
sizeof(
double));
797 printf(
"Test type not supported for domain wall\n");
802 double *kappa_b, *kappa_c, *kappa_5, *kappa_mdwf;
803 kappa_b = (
double*)malloc(
Lsdim*
sizeof(
double));
804 kappa_c = (
double*)malloc(
Lsdim*
sizeof(
double));
805 kappa_5 = (
double*)malloc(
Lsdim*
sizeof(
double));
806 kappa_mdwf = (
double*)malloc(
Lsdim*
sizeof(
double));
811 kappa_5[
xs] = 0.5*kappa_b[
xs]/kappa_c[
xs];
812 kappa_mdwf[
xs] = -kappa_5[
xs];
828 mdw_matpc(
spinorRef->
V(),
hostGauge,
spinor->
V(), kappa_b, kappa_c,
inv_param.
matpc_type,
dagger,
gauge_param.
cpu_prec,
gauge_param,
inv_param.
mass,
inv_param.
b_5,
inv_param.
c_5);
831 mdw_matpc(
spinorTmp->
V(),
hostGauge,
spinor->
V(), kappa_b, kappa_c,
inv_param.
matpc_type,
dagger,
gauge_param.
cpu_prec,
gauge_param,
inv_param.
mass,
inv_param.
b_5,
inv_param.
c_5);
832 mdw_matpc(
spinorRef->
V(),
hostGauge,
spinorTmp->
V(), kappa_b, kappa_c,
inv_param.
matpc_type,
not_dagger,
gauge_param.
cpu_prec,
gauge_param,
inv_param.
mass,
inv_param.
b_5,
inv_param.
c_5);
836 printf(
"Test type not supported for domain wall\n");
856 printfQuda(
"prec recon test_type matpc_type dagger S_dim T_dimension Ls_dimension dslash_type niter\n");
857 printfQuda(
"%6s %2s %d %12s %d %3d/%3d/%3d %3d %2d %14s %d\n",
872 extern void usage(
char**);
878 ASSERT_LE(deviation, tol) <<
"CPU and CUDA implementations do not agree";
881 int main(
int argc,
char **argv)
884 for (
int i =1;i < argc; i++){
889 fprintf(stderr,
"ERROR: Invalid option:%s\n", argv[i]);
900 printfQuda(
"\nSpinor mem: %.3f GiB\n", spinorGiB);
922 unsigned long long flops = 0;
924 int spinor_floats =
test_type ? 2*(7*24+24)+24 : 7*24+24;
926 spinor_floats +=
test_type ? 2*(7*2 + 2) + 2 : 7*2 + 2;
931 printfQuda(
"GFLOPS = %f\n", 1.0e-9*flops/secs);
939 printfQuda(
"Results: CPU = %f, CUDA=%f, CPU-CUDA = %f\n", norm2_cpu, norm2_cuda, norm2_cpu_cuda);
941 printfQuda(
"Result: CPU = %f, CPU-QUDA = %f\n", norm2_cpu, norm2_cpu_cuda);
QudaGaugeParam gauge_param
cudaColorSpinorField * cudaSpinorOut
void Dslash5(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity) const
int dimPartitioned(int dim)
QudaDiracFieldOrder dirac_order
QudaReconstructType reconstruct_sloppy
void MdagM(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
cudaColorSpinorField * tmp1
void dw_dslash_5_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
double b_5[QUDA_MAX_DWF_LS]
void dw_4d_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
cpuColorSpinorField * spinorRef
enum QudaPrecision_s QudaPrecision
#define ASSERT_LE(val1, val2)
void dw_setDims(int *X, const int L5)
cpuColorSpinorField * spinorTmp
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void Dslash5inv(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity, const double &k) const
void tm_dslash(void *res, void **gaugeFull, void *spinorField, double kappa, double mu, QudaTwistFlavorType flavor, int oddBit, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void mdw_matpc(void *out, void **gauge, void *in, double *kappa_b, double *kappa_c, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *b5, double *c5)
void MatDagMatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
void MdagM(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
QudaDslashType dslash_type
void M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
double c_5[QUDA_MAX_DWF_LS]
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
int process_command_line_option(int argc, char **argv, int *idx)
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
cudaColorSpinorField * tmp1
void tm_ndeg_mat(void *evenOut, void *oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
DiracMobiusDomainWallPC * dirac_mdwf
const char * get_matpc_str(QudaMatPCType type)
void Dslash5inv(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity, const double &k) const
void dw_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaGaugeFieldOrder gauge_order
void dw_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
double dslashCUDA(int niter)
cpuColorSpinorField * spinor
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
const char * get_prec_str(QudaPrecision prec)
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
QudaSiteSubset siteSubset
QudaPrecision clover_cuda_prec_sloppy
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity)
QudaFieldLocation input_location
int gridsize_from_cmdline[]
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
QudaSolutionType solution_type
int main(int argc, char **argv)
void dslashQuda_4dpc(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int test_type)
virtual void Dslash(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity) const =0
QudaPrecision clover_cuda_prec
void dw_matdagmat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void setTuning(QudaTune tune)
void initQuda(int device)
void dw_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
cudaColorSpinorField * tmp2
void Dslash4(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity) const
void Dslash5(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity) const
void M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
QudaFieldLocation output_location
QudaFieldOrder fieldOrder
unsigned long long Flops() const
void setSpinorSiteSize(int n)
QudaInvertParam newQudaInvertParam(void)
const char * get_recon_str(QudaReconstructType recon)
QudaCloverFieldOrder clover_order
GTEST_API_ void InitGoogleTest(int *argc, char **argv)
enum QudaMatPCType_s QudaMatPCType
QudaGammaBasis gammaBasis
DiracDomainWall4DPC * dirac_4dpc
virtual void MdagM(cudaColorSpinorField &out, const cudaColorSpinorField &in) const =0
cudaColorSpinorField * tmp2
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
QudaGammaBasis gamma_basis
QudaDslashType dslash_type
QudaPrecision cuda_prec_sloppy
const char * get_dslash_str(QudaDslashType type)
void tm_ndeg_matpc(void *outEven1, void *outEven2, void **gauge, void *inEven1, void *inEven2, double kappa, double mu, double epsilon, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
enum QudaDagType_s QudaDagType
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
void read_gauge_field(char *filename, void *gauge[], QudaPrecision precision, int *X, int argc, char *argv[])
void mdw_dslash_5(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *kappa)
QudaTwistFlavorType twist_flavor
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
void Dslash4(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity) const
void tm_ndeg_dslash(void *res1, void *res2, void **gauge, void *spinorField1, void *spinorField2, double kappa, double mu, double epsilon, int oddBit, int daggerBit, QudaMatPCType matpc_type, QudaPrecision precision, QudaGaugeParam &gauge_param)
void Dslash4pre(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity) const
void construct_clover_field(void *clover, double norm, double diag, QudaPrecision precision)
QudaInvertParam inv_param
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void mdw_dslash_4_pre(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *b5, double *c5)
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
cpuColorSpinorField * spinorOut
virtual void M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const =0
void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
QudaTwistFlavorType twist_flavor
void dslash_4_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
QudaTwistFlavorType twistFlavor
int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_
enum QudaDslashType_s QudaDslashType
void setKernelPackT(bool pack)
QudaReconstructType link_recon
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void init(int argc, char **argv)
void wil_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
static Dirac * create(const DiracParam ¶m)
double norm2(const ColorSpinorField &)
QudaPrecision clover_cpu_prec
cudaColorSpinorField * cudaSpinor
void initComms(int argc, char **argv, const int *commDims)
void setVerbosity(const QudaVerbosity verbosity)
void dslashQuda_mdwf(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int test_type)
void dslash_5_inv(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *kappa)
QudaGaugeParam newQudaGaugeParam(void)
enum QudaTwistFlavorType_s QudaTwistFlavorType