QUDA  v1.1.0
A library for QCD on GPUs
timer.cpp
Go to the documentation of this file.
1 #include <quda_internal.h>
2 #include <timer.h>
3 
4 namespace quda {
5 
8  if (profile[QUDA_PROFILE_TOTAL].time > 0.0) {
9  printfQuda("\n %20s Total time = %9.3f secs\n", fname.c_str(), profile[QUDA_PROFILE_TOTAL].time);
10  }
11 
12  double accounted = 0.0;
13  for (int i=0; i<QUDA_PROFILE_COUNT-1; i++) {
14  if (profile[i].count > 0) {
15  printfQuda(" %20s = %9.3f secs (%7.3f%%),\t with %8d calls at %6.3e us per call\n",
16  (const char *)&pname[i][0], profile[i].time, 100 * profile[i].time / profile[QUDA_PROFILE_TOTAL].time,
17  profile[i].count, 1e6 * profile[i].time / profile[i].count);
18  accounted += profile[i].time;
19  }
20  }
21  if (accounted > 0.0) {
22  double missing = profile[QUDA_PROFILE_TOTAL].time - accounted;
23  printfQuda(" total accounted = %9.3f secs (%7.3f%%)\n", accounted,
24  100 * accounted / profile[QUDA_PROFILE_TOTAL].time);
25  printfQuda(" total missing = %9.3f secs (%7.3f%%)\n", missing,
26  100 * missing / profile[QUDA_PROFILE_TOTAL].time);
27  }
28 
29  if (accounted > profile[QUDA_PROFILE_TOTAL].time) {
30  warningQuda("Accounted time %9.3f secs in %s is greater than total time %9.3f secs", accounted,
31  (const char *)&fname[0], profile[QUDA_PROFILE_TOTAL].time);
32  }
33 
34  }
35 
36  std::string TimeProfile::pname[] = {"download",
37  "upload",
38  "init",
39  "preamble",
40  "compute",
41  "comms",
42  "epilogue",
43  "free",
44  "file i/o",
45  "chronology",
46  "eigen",
47  "eigenLU",
48  "eigenEV",
49  "eigenQR",
50  "arpack",
51  "host compute",
52  "dummy",
53  "pack kernel",
54  "dslash kernel",
55  "gather",
56  "scatter",
57  "kernel launch",
58  "event record",
59  "event query",
60  "stream wait event",
61  "set func attribute",
62  "event synchronize",
63  "stream synchronize",
64  "device synchronize",
65  "memcpy d2d async",
66  "memcpy d2h async",
67  "memcpy2d d2h async",
68  "memcpy h2d async",
69  "memcpy default async",
70  "comms start",
71  "comms query",
72  "constant",
73  "total"};
74 
75 #ifdef INTERFACE_NVTX
76  const uint32_t TimeProfile::nvtx_colors[] = { 0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, 0xff00ffff, 0xffff0000, 0xffffffff };
77  const int TimeProfile::nvtx_num_colors = sizeof(nvtx_colors)/sizeof(uint32_t);
78 #endif
79 
80  Timer TimeProfile::global_profile[QUDA_PROFILE_COUNT];
81  bool TimeProfile::global_switchOff[QUDA_PROFILE_COUNT] = {};
82  int TimeProfile::global_total_level[QUDA_PROFILE_COUNT] = {};
83 
85  if (global_profile[QUDA_PROFILE_TOTAL].time > 0.0) {
86  printfQuda("\n %20s Total time = %9.3f secs\n", "QUDA", global_profile[QUDA_PROFILE_TOTAL].time);
87  }
88 
89  double accounted = 0.0;
90  bool print_timer = true; // whether to print that timer
91  for (int i=0; i<QUDA_PROFILE_LOWER_LEVEL; i++) { // we do not want to print detailed lower level timers
92  if (global_profile[i].count > 0) {
93  if (print_timer)
94  printfQuda(" %20s = %9.3f secs (%7.3f%%),\t with %8d calls at %6.3e us per call\n",
95  (const char *)&pname[i][0], global_profile[i].time,
96  100 * global_profile[i].time / global_profile[QUDA_PROFILE_TOTAL].time, global_profile[i].count,
97  1e6 * global_profile[i].time / global_profile[i].count);
98  accounted += global_profile[i].time;
99  }
100  }
101  if (accounted > 0.0) {
102  double missing = global_profile[QUDA_PROFILE_TOTAL].time - accounted;
103  printfQuda(" total accounted = %9.3f secs (%7.3f%%)\n", accounted,
104  100 * accounted / global_profile[QUDA_PROFILE_TOTAL].time);
105  printfQuda(" total missing = %9.3f secs (%7.3f%%)\n", missing,
106  100 * missing / global_profile[QUDA_PROFILE_TOTAL].time);
107  }
108 
109  if (accounted > global_profile[QUDA_PROFILE_TOTAL].time) {
110  warningQuda("Accounted time %9.3f secs in %s is greater than total time %9.3f secs\n", accounted, "QUDA",
111  global_profile[QUDA_PROFILE_TOTAL].time);
112  }
113  }
114 
115 }
void Print()
Definition: timer.cpp:7
static void PrintGlobal()
Definition: timer.cpp:84
@ QUDA_PROFILE_COUNT
Definition: timer.h:150
@ QUDA_PROFILE_TOTAL
Definition: timer.h:149
@ QUDA_PROFILE_LOWER_LEVEL
Definition: timer.h:122
::std::string string
Definition: gtest-port.h:891
double time
Definition: timer.h:44
#define printfQuda(...)
Definition: util_quda.h:114
#define warningQuda(...)
Definition: util_quda.h:132