25 #define MAX(a,b) ((a)>(b)?(a):(b)) 26 #define staggeredSpinorSiteSize 6 29 extern void usage(
char** argv );
50 void **ghost_fatlink, **ghost_longlink;
75 const char *
prec_str[] = {
"half",
"single",
"double"};
134 int tmpint =
MAX(
X[1]*
X[2]*
X[3],
X[0]*
X[2]*
X[3]);
135 tmpint =
MAX(tmpint,
X[0]*
X[1]*
X[3]);
136 tmpint =
MAX(tmpint,
X[0]*
X[1]*
X[2]);
146 for(
int d = 0;
d < 4;
d++) {
181 for (
int dir = 0; dir < 4; dir++) {
186 errorQuda(
"ERROR: malloc failed for fatlink/longlink");
206 int x_face_size =
X[1]*
X[2]*
X[3]/2;
207 int y_face_size =
X[0]*
X[2]*
X[3]/2;
208 int z_face_size =
X[0]*
X[1]*
X[3]/2;
209 int t_face_size =
X[0]*
X[1]*
X[2]/2;
210 int pad_size =
MAX(x_face_size, y_face_size);
211 pad_size =
MAX(pad_size, z_face_size);
212 pad_size =
MAX(pad_size, t_face_size);
262 cudaDeviceSynchronize();
291 for (
int dir = 0; dir < 4; dir++) {
322 DslashTime() : event_time(0.0), cpu_time(0.0), cpu_min(DBL_MAX), cpu_max(0.0) {}
331 cudaEventCreate(&
start);
332 cudaEventRecord(
start, 0);
333 cudaEventSynchronize(
start);
336 cudaEventRecord(
start, 0);
340 gettimeofday(&tstart, NULL);
358 errorQuda(
"Staggered operator acting on full-site not supported");
366 gettimeofday(&tstop, NULL);
369 double elapsed = ds + 0.000001*dus;
379 cudaEventCreate(&
end);
380 cudaEventRecord(
end, 0);
381 cudaEventSynchronize(
end);
383 cudaEventElapsedTime(&runTime,
start,
end);
384 cudaEventDestroy(
start);
385 cudaEventDestroy(
end);
390 cudaError_t stat = cudaGetLastError();
391 if (stat != cudaSuccess)
392 errorQuda(
"with ERROR: %s\n", cudaGetErrorString(stat));
438 printfQuda(
"prec recon test_type dagger S_dim T_dimension\n");
453 using ::testing::TestWithParam;
454 using ::testing::Bool;
455 using ::testing::Values;
456 using ::testing::Range;
457 using ::testing::Combine;
469 using ::testing::TestWithParam;
470 using ::testing::Bool;
471 using ::testing::Values;
472 using ::testing::Range;
473 using ::testing::Combine;
477 ::testing::tuple<int, int, int>
param;
482 int prec = ::testing::get<0>(GetParam());
486 int value = ::testing::get<2>(GetParam());
487 for(
int j=0; j < 4;j++){
488 if (
value & (1 << j)){
528 printfQuda(
"Results: CPU=%f, CUDA=%f, CPU-CUDA=%f\n", spinor_ref_norm2, cuda_spinor_out_norm2,
531 printfQuda(
"Result: CPU=%f , CPU-CUDA=%f", spinor_ref_norm2, spinor_out_norm2);
537 ASSERT_LE(deviation,
tol) <<
"CPU and CUDA implementations do not agree";
558 RecordProperty(
"Gflops", std::to_string(gflops));
566 printfQuda(
"Effective halo bi-directional bandwidth (GB/s) GPU = %f ( CPU = %f, min = %f , max = %f ) for aggregate message size %lu bytes\n",
573 int main(
int argc,
char **argv)
576 ::testing::InitGoogleTest(&argc, argv);
577 for (
int i=1 ;
i < argc;
i++){
583 fprintf(stderr,
"ERROR: Invalid option:%s\n", argv[
i]);
591 int test_rc = RUN_ALL_TESTS();
599 const int prec = ::testing::get<0>(
param.param);
600 const int recon = ::testing::get<1>(
param.param);
601 const int part = ::testing::get<2>(
param.param);
602 std::stringstream ss;
606 ss <<
"_partition" << part;
INSTANTIATE_TEST_CASE_P(QUDA, StaggeredDslashTest, Combine(Range(0, 3), ::testing::Values(QUDA_RECONSTRUCT_NO, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_8), ::testing::Values(0)), getstaggereddslashtestname)
QudaDiracFieldOrder dirac_order
QudaReconstructType reconstruct_sloppy
void usage_extra(char **argv)
cpuColorSpinorField * spinorOut
QudaGhostExchange ghostExchange
virtual ~StaggeredDslashTest()
void dw_setDims(int *X, const int L5)
cpuColorSpinorField * tmpCpu
QudaInvertParam inv_param
double norm2(const ColorSpinorField &a)
QudaDslashType dslash_type
QudaDslashType dslash_type
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
void commDimPartitionedSet(int dir)
void staggered_dslash(void *res, void **fatlink, void **longlink, void *spinorField, int oddBit, int daggerBit, QudaPrecision sPrecision, QudaPrecision gPrecision)
static void SetUpTestCase()
int process_command_line_option(int argc, char **argv, int *idx)
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
std::string getstaggereddslashtestname(testing::TestParamInfo<::testing::tuple< int, int, int >> param)
void matdagmat_mg4dir(cpuColorSpinorField *out, void **link, void **ghostLink, cpuColorSpinorField *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision, cpuColorSpinorField *tmp, QudaParity parity)
QudaGaugeFieldOrder gauge_order
DslashTime dslashCUDA(int niter)
const char * get_prec_str(QudaPrecision prec)
unsigned long long Flops() const
cudaColorSpinorField * cudaSpinor
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const =0
QudaSiteSubset siteSubset
QudaFieldLocation input_location
cpuColorSpinorField * spinor
__darwin_suseconds_t tv_usec
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
QudaSolutionType solution_type
else return(__swbuf(_c, _p))
void initQuda(int device)
QudaFieldLocation output_location
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
double benchmark(int kernel, const int niter)
QudaFieldOrder fieldOrder
QudaReconstructType link_recon
void setSpinorSiteSize(int n)
QudaInvertParam newQudaInvertParam(void)
const char * get_recon_str(QudaReconstructType recon)
QudaGaugeParam gaugeParam
QudaGammaBasis gammaBasis
void init(int precision, QudaReconstructType link_recon)
::testing::tuple< int, int, int > param
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
void matdagmat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision, void *tmp, QudaParity parity)
QudaGammaBasis gamma_basis
QudaPrecision cuda_prec_sloppy
const void ** Ghost() const
enum QudaDagType_s QudaDagType
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
void staggeredDslashRef()
int fprintf(FILE *, const char *,...) __attribute__((__format__(__printf__
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
void construct_fat_long_gauge_field(void **fatlink, void **longlink, int type, QudaPrecision precision, QudaGaugeParam *param, QudaDslashType dslash_type)
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const =0
void display_test_info(int precision, QudaReconstructType link_recon)
enum QudaReconstructType_s QudaReconstructType
void commDimPartitionedReset()
Reset the comm dim partioned array to zero,.
Main header file for the QUDA library.
int main(int argc, char **argv)
void staggered_dslash_mg4dir(cpuColorSpinorField *out, void **fatlink, void **longlink, void **ghost_fatlink, void **ghost_longlink, cpuColorSpinorField *in, int oddBit, int daggerBit, QudaPrecision sPrecision, QudaPrecision gPrecision)
enum QudaDslashType_s QudaDslashType
TEST_P(StaggeredDslashTest, verify)
cudaColorSpinorField * cudaSpinorOut
void setKernelPackT(bool pack)
static Dirac * create(const DiracParam ¶m)
cpuColorSpinorField * spinorRef
cudaColorSpinorField * tmp
static __inline__ size_t size_t d
virtual void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const =0
size_t GhostBytes() const
static void TearDownTestCase()
void initComms(int argc, char **argv, const int *commDims)
void setVerbosity(const QudaVerbosity verbosity)
void updateR()
update the radius for halos.
QudaGaugeParam newQudaGaugeParam(void)
int gridsize_from_cmdline[]