12 double accounted = 0.0;
14 if (profile[i].count > 0) {
15 printfQuda(
" %20s = %9.3f secs (%7.3f%%),\t with %8d calls at %6.3e us per call\n",
16 (
const char *)&pname[i][0], profile[i].time, 100 * profile[i].time / profile[
QUDA_PROFILE_TOTAL].time,
17 profile[i].count, 1e6 * profile[i].time / profile[i].count);
18 accounted += profile[i].
time;
21 if (accounted > 0.0) {
23 printfQuda(
" total accounted = %9.3f secs (%7.3f%%)\n", accounted,
25 printfQuda(
" total missing = %9.3f secs (%7.3f%%)\n", missing,
30 warningQuda(
"Accounted time %9.3f secs in %s is greater than total time %9.3f secs", accounted,
69 "memcpy default async",
76 const uint32_t TimeProfile::nvtx_colors[] = { 0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, 0xff00ffff, 0xffff0000, 0xffffffff };
77 const int TimeProfile::nvtx_num_colors =
sizeof(nvtx_colors)/
sizeof(uint32_t);
89 double accounted = 0.0;
90 bool print_timer =
true;
92 if (global_profile[i].count > 0) {
94 printfQuda(
" %20s = %9.3f secs (%7.3f%%),\t with %8d calls at %6.3e us per call\n",
95 (
const char *)&pname[i][0], global_profile[i].time,
96 100 * global_profile[i].time / global_profile[
QUDA_PROFILE_TOTAL].time, global_profile[i].count,
97 1e6 * global_profile[i].time / global_profile[i].count);
98 accounted += global_profile[i].
time;
101 if (accounted > 0.0) {
103 printfQuda(
" total accounted = %9.3f secs (%7.3f%%)\n", accounted,
105 printfQuda(
" total missing = %9.3f secs (%7.3f%%)\n", missing,
110 warningQuda(
"Accounted time %9.3f secs in %s is greater than total time %9.3f secs\n", accounted,
"QUDA",
static void PrintGlobal()
@ QUDA_PROFILE_LOWER_LEVEL