12 double accounted = 0.0;
15 printfQuda(
" %20s = %f secs (%6.3g%%), with %8d calls at %e us per call\n",
22 if (accounted > 0.0) {
24 printfQuda(
" total accounted = %f secs (%6.3g%%)\n",
26 printfQuda(
" total missing = %f secs (%6.3g%%)\n",
31 warningQuda(
"Accounted time %f secs in %s is greater than total time %f secs",
37 std::string
TimeProfile::pname[] = {
"download",
"upload",
"init",
"preamble",
"compute",
38 "comms",
"epilogue",
"free",
"file i/o",
"dummy",
"pack kernel",
39 "dslash kernel",
"gather",
"scatter",
40 "kernel launch",
"event record",
41 "event query",
"stream wait event",
"set func attribute",
42 "event synchronize",
"stream synchronize",
"device synchronize",
43 "memcpy d2d async",
"memcpy d2h async",
"memcpy2d d2h async",
44 "memcpy h2d async",
"comms start",
"comms query",
"constant",
"total" };
47 const uint32_t TimeProfile::nvtx_colors[] = { 0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff };
48 const int TimeProfile::nvtx_num_colors =
sizeof(nvtx_colors)/
sizeof(
uint32_t);
57 printfQuda(
"\n %20s Total time = %g secs\n",
"QUDA",
61 double accounted = 0.0;
62 bool print_timer =
true;
65 if (print_timer)
printfQuda(
" %20s = %f secs (%6.3g%%), with %8d calls at %e us per call\n",
72 if (accounted > 0.0) {
74 printfQuda(
" total accounted = %f secs (%6.3g%%)\n",
76 printfQuda(
" total missing = %f secs (%6.3g%%)\n",
81 warningQuda(
"Accounted time %f secs in %s is greater than total time %f secs\n",
static std::string pname[]
static int global_total_level[QUDA_PROFILE_COUNT]
Timer profile[QUDA_PROFILE_COUNT]
static bool global_switchOff[QUDA_PROFILE_COUNT]
static Timer global_profile[QUDA_PROFILE_COUNT]
static void PrintGlobal()
__device__ unsigned int count[QUDA_MAX_MULTI_REDUCE]