OpenNN
Open-source neural networks library
Loading...
Searching...
No Matches
profiler.h
Go to the documentation of this file.
1#pragma once
2
3#include <chrono>
4#include <iomanip>
5#include <iostream>
6#include <map>
7#include <string>
8#include <vector>
9
10#ifdef OPENNN_HAS_CUDA
11#include <cuda_runtime.h>
12#endif
13
15
16struct Stats
17{
18 std::map<std::string, double> times_ms;
19 std::map<std::string, long> counts;
20
21 void add(const std::string& key, double ms)
22 {
23 times_ms[key] += ms;
24 counts[key] += 1;
25 }
26
27 void clear()
28 {
29 times_ms.clear();
30 counts.clear();
31 }
32
33 void print(std::ostream& os, const std::string& title, double total_ms = 0.0) const
34 {
35 std::vector<std::pair<std::string, double>> sorted(times_ms.begin(), times_ms.end());
36 std::sort(sorted.begin(), sorted.end(),
37 [](const auto& a, const auto& b) { return a.second > b.second; });
38
39 os << "\n[PROFILE] " << title << "\n";
40 os << " " << std::left << std::setw(48) << "section"
41 << std::right << std::setw(12) << "total_ms"
42 << std::setw(10) << "calls"
43 << std::setw(12) << "ms/call";
44 if (total_ms > 0.0) os << std::setw(8) << "%";
45 os << "\n";
46
47 for (const auto& [key, total] : sorted)
48 {
49 const long call_count = counts.at(key);
50 os << " " << std::left << std::setw(48) << key
51 << std::right << std::setw(12) << std::fixed << std::setprecision(2) << total
52 << std::setw(10) << call_count
53 << std::setw(12) << std::fixed << std::setprecision(3) << (total / double(call_count));
54 if (total_ms > 0.0)
55 os << std::setw(7) << std::fixed << std::setprecision(1) << (total / total_ms * 100.0) << "%";
56 os << "\n";
57 }
58 os << "\n";
59 }
60};
61
63{
64 static Stats stats;
65 return stats;
66}
67
68inline bool& enabled()
69{
70 static bool is_enabled = false;
71 return is_enabled;
72}
73
75{
76 std::string key_;
77 std::chrono::steady_clock::time_point t0_;
78 bool sync_gpu_;
79public:
80 ScopedTimer(std::string key, bool sync_gpu = true)
81 : key_(move(key)), sync_gpu_(sync_gpu)
82 {
83 if (!enabled()) return;
84#ifdef OPENNN_HAS_CUDA
85 if (sync_gpu_) cudaDeviceSynchronize();
86#endif
87 t0_ = std::chrono::steady_clock::now();
88 }
89
91 {
92 if (!enabled()) return;
93#ifdef OPENNN_HAS_CUDA
94 if (sync_gpu_) cudaDeviceSynchronize();
95#endif
96 const auto end_time = std::chrono::steady_clock::now();
97 const double elapsed_ms = std::chrono::duration<double, std::milli>(end_time - t0_).count();
98 global_stats().add(key_, elapsed_ms);
99 }
100};
101
102} // namespace opennn::profiler
103
104#define OPENNN_PROFILE_CAT_INNER(a, b) a##b
105#define OPENNN_PROFILE_CAT(a, b) OPENNN_PROFILE_CAT_INNER(a, b)
106
107#define PROFILE_SCOPE(name) \
108 ::opennn::profiler::ScopedTimer OPENNN_PROFILE_CAT(_profile_, __LINE__)(name, true)
109#define PROFILE_SCOPE_HOST(name) \
110 ::opennn::profiler::ScopedTimer OPENNN_PROFILE_CAT(_profile_, __LINE__)(name, false)
ScopedTimer(std::string key, bool sync_gpu=true)
Definition profiler.h:80
~ScopedTimer()
Definition profiler.h:90
Definition profiler.h:14
Stats & global_stats()
Definition profiler.h:62
bool & enabled()
Definition profiler.h:68
Definition profiler.h:17
void print(std::ostream &os, const std::string &title, double total_ms=0.0) const
Definition profiler.h:33
void clear()
Definition profiler.h:27
std::map< std::string, long > counts
Definition profiler.h:19
void add(const std::string &key, double ms)
Definition profiler.h:21
std::map< std::string, double > times_ms
Definition profiler.h:18