QUDA  0.9.0
timer.cpp
Go to the documentation of this file.
1 #include <quda_internal.h>
2 
3 namespace quda {
4 
7  if (profile[QUDA_PROFILE_TOTAL].time > 0.0) {
8  printfQuda("\n %20s Total time = %g secs\n", fname.c_str(),
10  }
11 
12  double accounted = 0.0;
13  for (int i=0; i<QUDA_PROFILE_COUNT-1; i++) {
14  if (profile[i].count > 0) {
15  printfQuda(" %20s = %f secs (%6.3g%%), with %8d calls at %e us per call\n",
16  (const char*)&pname[i][0], profile[i].time,
19  accounted += profile[i].time;
20  }
21  }
22  if (accounted > 0.0) {
23  double missing = profile[QUDA_PROFILE_TOTAL].time - accounted;
24  printfQuda(" total accounted = %f secs (%6.3g%%)\n",
25  accounted, 100*accounted/profile[QUDA_PROFILE_TOTAL].time);
26  printfQuda(" total missing = %f secs (%6.3g%%)\n",
27  missing, 100*missing/profile[QUDA_PROFILE_TOTAL].time);
28  }
29 
30  if (accounted > profile[QUDA_PROFILE_TOTAL].time) {
31  warningQuda("Accounted time %f secs in %s is greater than total time %f secs",
32  accounted, (const char*)&fname[0], profile[QUDA_PROFILE_TOTAL].time);
33  }
34 
35  }
36 
37  std::string TimeProfile::pname[] = { "download", "upload", "init", "preamble", "compute",
38  "comms", "epilogue", "free", "file i/o", "dummy", "pack kernel",
39  "dslash kernel", "gather", "scatter",
40  "kernel launch", "event record",
41  "event query", "stream wait event", "set func attribute",
42  "event synchronize", "stream synchronize", "device synchronize",
43  "memcpy d2d async", "memcpy d2h async", "memcpy2d d2h async",
44  "memcpy h2d async", "comms start", "comms query", "constant", "total" };
45 
46 #ifdef INTERFACE_NVTX
47  const uint32_t TimeProfile::nvtx_colors[] = { 0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff };
48  const int TimeProfile::nvtx_num_colors = sizeof(nvtx_colors)/sizeof(uint32_t);
49 #endif
50 
54 
57  printfQuda("\n %20s Total time = %g secs\n", "QUDA",
59  }
60 
61  double accounted = 0.0;
62  bool print_timer = true; // whether to print that timer
63  for (int i=0; i<QUDA_PROFILE_LOWER_LEVEL; i++) { // we do not want to print detailed lower level timers
64  if (global_profile[i].count > 0) {
65  if (print_timer) printfQuda(" %20s = %f secs (%6.3g%%), with %8d calls at %e us per call\n",
66  (const char*)&pname[i][0], global_profile[i].time,
69  accounted += global_profile[i].time;
70  }
71  }
72  if (accounted > 0.0) {
73  double missing = global_profile[QUDA_PROFILE_TOTAL].time - accounted;
74  printfQuda(" total accounted = %f secs (%6.3g%%)\n",
75  accounted, 100*accounted/global_profile[QUDA_PROFILE_TOTAL].time);
76  printfQuda(" total missing = %f secs (%6.3g%%)\n",
77  missing, 100*missing/global_profile[QUDA_PROFILE_TOTAL].time);
78  }
79 
80  if (accounted > global_profile[QUDA_PROFILE_TOTAL].time) {
81  warningQuda("Accounted time %f secs in %s is greater than total time %f secs\n",
82  accounted, "QUDA", global_profile[QUDA_PROFILE_TOTAL].time);
83  }
84 
85  }
86 
87 }
static std::string pname[]
static int global_total_level[QUDA_PROFILE_COUNT]
time_t time(time_t *)
Timer profile[QUDA_PROFILE_COUNT]
void Print()
Definition: timer.cpp:6
#define warningQuda(...)
Definition: util_quda.h:101
static bool global_switchOff[QUDA_PROFILE_COUNT]
static Timer global_profile[QUDA_PROFILE_COUNT]
unsigned int uint32_t
#define printfQuda(...)
Definition: util_quda.h:84
std::string fname
static void PrintGlobal()
Definition: timer.cpp:55
__device__ unsigned int count[QUDA_MAX_MULTI_REDUCE]
Definition: cub_helper.cuh:118