QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
timer.cpp
Go to the documentation of this file.
1 #include <quda_internal.h>
2 #include <timer.h>
3 
4 namespace quda {
5 
8  if (profile[QUDA_PROFILE_TOTAL].time > 0.0) {
9  printfQuda("\n %20s Total time = %g secs\n", fname.c_str(),
11  }
12 
13  double accounted = 0.0;
14  for (int i=0; i<QUDA_PROFILE_COUNT-1; i++) {
15  if (profile[i].count > 0) {
16  printfQuda(" %20s = %f secs (%6.3g%%), with %8d calls at %e us per call\n",
17  (const char*)&pname[i][0], profile[i].time,
18  100*profile[i].time/profile[QUDA_PROFILE_TOTAL].time,
19  profile[i].count, 1e6*profile[i].time/profile[i].count);
20  accounted += profile[i].time;
21  }
22  }
23  if (accounted > 0.0) {
24  double missing = profile[QUDA_PROFILE_TOTAL].time - accounted;
25  printfQuda(" total accounted = %f secs (%6.3g%%)\n",
26  accounted, 100*accounted/profile[QUDA_PROFILE_TOTAL].time);
27  printfQuda(" total missing = %f secs (%6.3g%%)\n",
28  missing, 100*missing/profile[QUDA_PROFILE_TOTAL].time);
29  }
30 
31  if (accounted > profile[QUDA_PROFILE_TOTAL].time) {
32  warningQuda("Accounted time %f secs in %s is greater than total time %f secs",
33  accounted, (const char*)&fname[0], profile[QUDA_PROFILE_TOTAL].time);
34  }
35 
36  }
37 
38  std::string TimeProfile::pname[] = {"download",
39  "upload",
40  "init",
41  "preamble",
42  "compute",
43  "comms",
44  "epilogue",
45  "free",
46  "file i/o",
47  "chronology",
48  "eigen",
49  "arpack",
50  "dummy",
51  "pack kernel",
52  "dslash kernel",
53  "gather",
54  "scatter",
55  "kernel launch",
56  "event record",
57  "event query",
58  "stream wait event",
59  "set func attribute",
60  "event synchronize",
61  "stream synchronize",
62  "device synchronize",
63  "memcpy d2d async",
64  "memcpy d2h async",
65  "memcpy2d d2h async",
66  "memcpy h2d async",
67  "comms start",
68  "comms query",
69  "constant",
70  "total"};
71 
72 #ifdef INTERFACE_NVTX
73  const uint32_t TimeProfile::nvtx_colors[] = { 0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, 0xff00ffff, 0xffff0000, 0xffffffff };
74  const int TimeProfile::nvtx_num_colors = sizeof(nvtx_colors)/sizeof(uint32_t);
75 #endif
76 
80 
82  if (global_profile[QUDA_PROFILE_TOTAL].time > 0.0) {
83  printfQuda("\n %20s Total time = %g secs\n", "QUDA",
85  }
86 
87  double accounted = 0.0;
88  bool print_timer = true; // whether to print that timer
89  for (int i=0; i<QUDA_PROFILE_LOWER_LEVEL; i++) { // we do not want to print detailed lower level timers
90  if (global_profile[i].count > 0) {
91  if (print_timer) printfQuda(" %20s = %f secs (%6.3g%%), with %8d calls at %e us per call\n",
92  (const char*)&pname[i][0], global_profile[i].time,
94  global_profile[i].count, 1e6*global_profile[i].time/global_profile[i].count);
95  accounted += global_profile[i].time;
96  }
97  }
98  if (accounted > 0.0) {
99  double missing = global_profile[QUDA_PROFILE_TOTAL].time - accounted;
100  printfQuda(" total accounted = %f secs (%6.3g%%)\n",
101  accounted, 100*accounted/global_profile[QUDA_PROFILE_TOTAL].time);
102  printfQuda(" total missing = %f secs (%6.3g%%)\n",
103  missing, 100*missing/global_profile[QUDA_PROFILE_TOTAL].time);
104  }
105 
106  if (accounted > global_profile[QUDA_PROFILE_TOTAL].time) {
107  warningQuda("Accounted time %f secs in %s is greater than total time %f secs\n",
108  accounted, "QUDA", global_profile[QUDA_PROFILE_TOTAL].time);
109  }
110 
111  }
112 
113 }
static std::string pname[]
Definition: timer.h:178
static int global_total_level[QUDA_PROFILE_COUNT]
Definition: timer.h:186
Timer profile[QUDA_PROFILE_COUNT]
Definition: timer.h:177
void Print()
Definition: timer.cpp:7
#define warningQuda(...)
Definition: util_quda.h:133
static bool global_switchOff[QUDA_PROFILE_COUNT]
Definition: timer.h:185
double time
Definition: timer.h:44
static Timer global_profile[QUDA_PROFILE_COUNT]
Definition: timer.h:184
#define printfQuda(...)
Definition: util_quda.h:115
std::string fname
Definition: timer.h:172
static void PrintGlobal()
Definition: timer.cpp:81
__device__ unsigned int count[QUDA_MAX_MULTI_REDUCE]
Definition: cub_helper.cuh:90