34 extern void usage(
char** );
42 std::vector<cpuColorSpinorField*>
xmH;
43 std::vector<cpuColorSpinorField*>
ymH;
44 std::vector<cpuColorSpinorField*>
zmH;
50 param.precision = precision;
62 printfQuda(
"S_dimension T_dimension Nspin Ncolor\n");
83 if (
Nspin == 2 && precision == 0) {
86 }
else if (
Nspin == 2 && kernel == 1) {
89 }
else if (
Ncolor != 3 && (kernel == 31 || kernel == 32)) {
92 }
else if ((
Nprec < 3) && (kernel == 0)) {
193 errorQuda(
"Precision option not defined");
204 param.is_composite =
true;
205 param.is_component =
false;
217 param.is_composite =
false;
218 param.is_component =
false;
219 param.composite_dim = 1;
231 bool flag = !(
param.nSpin == 2 &&
293 cudaEventCreate(&
start);
294 cudaEventCreate(&
end);
295 cudaEventRecord(
start, 0);
472 errorQuda(
"Undefined blas kernel %d\n", kernel);
476 cudaEventRecord(
end, 0);
477 cudaEventSynchronize(
end);
479 cudaEventElapsedTime(&runTime,
start,
end);
480 cudaEventDestroy(
start);
481 cudaEventDestroy(
end);
486 double secs = runTime / 1000;
490 #define ERROR(a) fabs(blas::norm2(*a##D) - blas::norm2(*a##H)) / blas::norm2(*a##H) 494 double a = M_PI,
b = M_PI*
exp(1.0),
c =
sqrt(M_PI);
831 for(
int j=0; j <
Msrc; j++){
885 for (
int i = 0;
i <
Nsrc;
i++) {
886 for (
int j = 0; j <
Nsrc; j++) {
899 for (
int i = 0;
i <
Nsrc;
i++) {
900 for (
int j = 0; j <
Msrc; j++) {
909 errorQuda(
"Undefined blas kernel %d\n", kernel);
919 const char *
prec_str[] = {
"half",
"single",
"double"};
955 "caxpbypzYmbwcDotProductUYNormY",
956 "HeavyQuarkResidualNorm",
957 "xpyHeavyQuarkResidualNorm",
965 "cDotProductNorm_block",
970 int main(
int argc,
char** argv)
973 ::testing::InitGoogleTest(&argc, argv);
979 for (
int i = 1;
i < argc;
i++){
1010 result = RUN_ALL_TESTS();
1021 using ::testing::TestWithParam;
1022 using ::testing::Bool;
1023 using ::testing::Values;
1024 using ::testing::Range;
1025 using ::testing::Combine;
1027 class BlasTest :
public ::testing::TestWithParam<::testing::tuple<int, int>> {
1043 int prec = ::testing::get<0>(GetParam());
1044 int kernel = ::testing::get<1>(GetParam());
1051 tol = (kernel < 2) ? 1
e-4 :
tol;
1052 EXPECT_LE(deviation,
tol) <<
"CPU and CUDA implementations do not agree";
1056 int prec = ::testing::get<0>(GetParam());
1057 int kernel = ::testing::get<1>(GetParam());
1069 RecordProperty(
"Gflops", std::to_string(gflops));
1070 RecordProperty(
"GBs", std::to_string(gbytes));
1071 printfQuda(
"%-31s: Gflop/s = %6.1f, GB/s = %6.1f\n",
names[kernel], gflops, gbytes);
1076 int prec = ::testing::get<0>(
param.param);
1077 int kernel = ::testing::get<1>(
param.param);
1078 std::string str(
names[kernel]);
1079 str += std::string(
"_");
QudaDslashType dslash_type
int dimPartitioned(int dim)
void xpay(ColorSpinorField &x, const double &a, ColorSpinorField &y)
enum QudaPrecision_s QudaPrecision
double3 cDotProductNormA(ColorSpinorField &a, ColorSpinorField &b)
double caxpyNorm(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
__host__ __device__ ValueType exp(ValueType x)
double norm2(const ColorSpinorField &a)
QudaInverterType inv_type
enum QudaSolveType_s QudaSolveType
__host__ __device__ ValueType sqrt(ValueType x)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
std::complex< double > Complex
int process_command_line_option(int argc, char **argv, int *idx)
::testing::tuple< int, int > param
double3 xpyHeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &r)
std::vector< cpuColorSpinorField * > ymH
double axpyNorm(const double &a, ColorSpinorField &x, ColorSpinorField &y)
CompositeColorSpinorField & Components()
bool skip_kernel(int precision, int kernel)
std::vector< cpuColorSpinorField * > xmH
double reDotProduct(ColorSpinorField &x, ColorSpinorField &y)
void copy(ColorSpinorField &dst, const ColorSpinorField &src)
void ax(const double &a, ColorSpinorField &x)
ColorSpinorField & Component(const int idx) const
double xmyNorm(ColorSpinorField &x, ColorSpinorField &y)
void caxpyBzpx(const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &)
void caxpyBxpz(const Complex &, ColorSpinorField &, ColorSpinorField &, const Complex &, ColorSpinorField &)
void cabxpyAx(const double &a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y)
void initQuda(int device)
double cabxpyAxNorm(const double &a, const Complex &b, ColorSpinorField &x, ColorSpinorField &y)
double benchmark(int kernel, const int niter)
void initFields(int prec)
void axpyZpbx(const double &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, const double &b)
void setSpinorSiteSize(int n)
void caxpbypzYmbw(const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, ColorSpinorField &, ColorSpinorField &)
void tripleCGUpdate(const double &alpha, const double &beta, ColorSpinorField &q, ColorSpinorField &r, ColorSpinorField &x, ColorSpinorField &p)
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
double caxpyXmazNormX(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Complex caxpyDotzy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
void caxpy(const Complex &a, ColorSpinorField &x, ColorSpinorField &y)
static __inline__ size_t h
void caxpbypczpw(const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, ColorSpinorField &)
void axpy(const double &a, ColorSpinorField &x, ColorSpinorField &y)
void axpby(const double &a, ColorSpinorField &x, const double &b, ColorSpinorField &y)
int abs(int) __attribute__((const))
double3 caxpbypzYmbwcDotProductUYNormY(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &u)
void caxpbypz(const Complex &, ColorSpinorField &, const Complex &, ColorSpinorField &, ColorSpinorField &)
double axpyReDot(const double &a, ColorSpinorField &x, ColorSpinorField &y)
void axpyBzpcx(const double &a, ColorSpinorField &x, ColorSpinorField &y, const double &b, ColorSpinorField &z, const double &c)
void caxpyXmaz(const Complex &a, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
Complex xpaycDotzy(ColorSpinorField &x, const double &a, ColorSpinorField &y, ColorSpinorField &z)
void setPrec(ColorSpinorParam ¶m, const QudaPrecision precision)
INSTANTIATE_TEST_CASE_P(QUDA, BlasTest, Combine(Range(0, 3), Range(0, Nkernels)), getblasname)
void xpy(ColorSpinorField &x, ColorSpinorField &y)
std::string getblasname(testing::TestParamInfo<::testing::tuple< int, int >> param)
enum QudaDslashType_s QudaDslashType
void caxpby(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y)
void mxpy(ColorSpinorField &x, ColorSpinorField &y)
__host__ __device__ ValueType abs(ValueType x)
void cxpaypbz(ColorSpinorField &, const Complex &b, ColorSpinorField &y, const Complex &c, ColorSpinorField &z)
std::vector< cpuColorSpinorField * > zmH
double3 cDotProductNormB(ColorSpinorField &a, ColorSpinorField &b)
static __inline__ size_t size_t d
void initComms(int argc, char **argv, const int *commDims)
int gridsize_from_cmdline[]
void setVerbosity(const QudaVerbosity verbosity)
enum QudaInverterType_s QudaInverterType
int main(int argc, char **argv)
double3 tripleCGReduction(ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z)
cudaEvent_t cudaEvent_t end