27 extern void usage(
char** );
29 #if (__COMPUTE_CAPABILITY__ >= 200)
31 #else // exclude Heavy Quark Norm if on Tesla architecture
85 else param.
x[0] =
xdim;
192 cudaEvent_t start,
end;
193 cudaEventCreate(&start);
194 cudaEventCreate(&end);
195 cudaEventRecord(start, 0);
197 for (
int i=0; i <
niter; ++i) {
333 errorQuda(
"Undefined blas kernel %d\n", kernel);
337 cudaEventRecord(end, 0);
338 cudaEventSynchronize(end);
340 cudaEventElapsedTime(&runTime, start, end);
341 cudaEventDestroy(start);
342 cudaEventDestroy(end);
344 double secs = runTime / 1000;
348 #define ERROR(a) fabs(norm2(*a##D) - norm2(*a##H)) / norm2(*a##H)
352 double a = 1.5, b = 2.5, c = 3.5;
525 error =
ERROR(y) + fabs(d-h)/fabs(h);}
533 error =
ERROR(y) + fabs(d-h)/fabs(h);}
541 error =
ERROR(y) + fabs(d-h)/fabs(h);}
584 error =
ERROR(y) + abs(d-h)/abs(h);}
593 error = fabs(d.x - h.x) / fabs(h.x) + fabs(d.y - h.y) / fabs(h.y) + fabs(d.z - h.z) / fabs(h.z); }
601 error = fabs(d.x - h.x) / fabs(h.x) + fabs(d.y - h.y) / fabs(h.y) + fabs(d.z - h.z) / fabs(h.z); }
612 error =
ERROR(z) +
ERROR(y) + fabs(d.x - h.x) / fabs(h.x) +
613 fabs(d.y - h.y) / fabs(h.y) + fabs(d.z - h.z) / fabs(h.z); }
621 error = fabs(d.x - h.x) / fabs(h.x) +
622 fabs(d.y - h.y) / fabs(h.y) + fabs(d.z - h.z) / fabs(h.z); }
626 errorQuda(
"Undefined blas kernel %d\n", kernel);
632 int main(
int argc,
char** argv)
634 for (
int i = 1; i < argc; i++){
638 printfQuda(
"ERROR: Invalid option:%s\n", argv[i]);
678 "caxpbypzYmbwcDotProductWYNormY",
679 "HeavyQuarkResidualNorm"
682 char *prec_str[] = {
"half",
"single",
"double"};
685 #if (__COMPUTE_CAPABILITY__ >= 130)
696 printfQuda(
"\nBenchmarking %s precision with %d iterations...\n\n", prec_str[
prec],
niter);
699 for (
int kernel = 0; kernel <
Nkernels; kernel++) {
701 if ((Nprec < 3) && (kernel == 0))
continue;
715 printfQuda(
"%-31s: Gflop/s = %6.1f, GB/s = %6.1f\n", names[kernel], gflops, gbytes);
728 for (
int kernel = 0; kernel <
Nkernels; kernel++) {
730 if ((Nprec < 3) && (kernel == 0))
continue;
731 double error =
test(kernel);
732 printfQuda(
"%-35s error = %e, \n", names[kernel], error);