OpenNN
Open-source neural networks library
Loading...
Searching...
No Matches
profiler.h
Go to the documentation of this file.
1#pragma once
2
3#include <chrono>
4#include <iomanip>
5#include <iostream>
6#include <map>
7#include <string>
8#include <vector>
9
10#ifdef OPENNN_HAS_CUDA
11#include <cuda_runtime.h>
12#endif
13
14namespace opennn {
15
16struct Stats
17{
18 struct Entry { double total_ms = 0.0; long calls = 0; };
19
20 map<string, Entry> entries;
21
22 void add(const string& key, double ms)
23 {
24 auto& e = entries[key];
25 e.total_ms += ms;
26 e.calls += 1;
27 }
28
29 void clear()
30 {
31 entries.clear();
32 }
33
34 void print(ostream& os, const string& title, double total_ms = 0.0) const
35 {
36 vector<pair<string, Entry>> sorted(entries.begin(), entries.end());
37 ranges::sort(sorted,
38 [](const auto& a, const auto& b) { return a.second.total_ms > b.second.total_ms; });
39
40 os << "\n[PROFILE] " << title << "\n";
41 os << " " << left << setw(48) << "section"
42 << right << setw(12) << "total_ms"
43 << setw(10) << "calls"
44 << setw(12) << "ms/call";
45 if (total_ms > 0.0) os << setw(8) << "%";
46 os << "\n";
47
48 for (const auto& [key, entry] : sorted)
49 {
50 os << " " << left << setw(48) << key
51 << right << setw(12) << fixed << setprecision(2) << entry.total_ms
52 << setw(10) << entry.calls
53 << setw(12) << fixed << setprecision(3) << (entry.total_ms / double(entry.calls));
54 if (total_ms > 0.0)
55 os << setw(7) << fixed << setprecision(1) << (entry.total_ms / total_ms * 100.0) << "%";
56 os << "\n";
57 }
58 os << "\n";
59 }
60};
61
63{
64 static Stats stats;
65 return stats;
66}
67
68inline bool& enabled()
69{
70 static bool is_enabled = false;
71 return is_enabled;
72}
73
75{
76 string key_;
77 chrono::steady_clock::time_point t0_;
78 bool sync_gpu_;
79public:
80 ScopedTimer(string key, bool sync_gpu = true)
81 : key_(move(key)), sync_gpu_(sync_gpu)
82 {
83 if (!enabled()) return;
84#ifdef OPENNN_HAS_CUDA
85 if (sync_gpu_) cudaDeviceSynchronize();
86#endif
87 t0_ = chrono::steady_clock::now();
88 }
89
91 {
92 if (!enabled()) return;
93#ifdef OPENNN_HAS_CUDA
94 if (sync_gpu_) cudaDeviceSynchronize();
95#endif
96 const auto end_time = chrono::steady_clock::now();
97 const double elapsed_ms = chrono::duration<double, milli>(end_time - t0_).count();
98 global_stats().add(key_, elapsed_ms);
99 }
100};
101
102} // namespace opennn
103
104#define OPENNN_PROFILE_CAT_INNER(a, b) a##b
105#define OPENNN_PROFILE_CAT(a, b) OPENNN_PROFILE_CAT_INNER(a, b)
106
107// The ternary short-circuits: when profiling is disabled, the `name` expression
108// is not evaluated and no string is built. The empty-string fallback is cheap.
109#define PROFILE_SCOPE_IMPL(name, sync) \
110 ::opennn::ScopedTimer OPENNN_PROFILE_CAT(_profile_, __LINE__)( \
111 ::opennn::enabled() ? string(name) : string{}, sync)
112
113#define PROFILE_SCOPE(name) PROFILE_SCOPE_IMPL(name, true)
114#define PROFILE_SCOPE_HOST(name) PROFILE_SCOPE_IMPL(name, false)
~ScopedTimer()
Definition profiler.h:90
ScopedTimer(string key, bool sync_gpu=true)
Definition profiler.h:80
Definition adaptive_moment_estimation.h:14
bool & enabled()
Definition profiler.h:68
Stats & global_stats()
Definition profiler.h:62
Definition profiler.h:18
double total_ms
Definition profiler.h:18
long calls
Definition profiler.h:18
Definition profiler.h:17
void add(const string &key, double ms)
Definition profiler.h:22
void print(ostream &os, const string &title, double total_ms=0.0) const
Definition profiler.h:34
void clear()
Definition profiler.h:29
map< string, Entry > entries
Definition profiler.h:20