Itoyori  v0.0.1
profiler.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <limits>
4 #include <tuple>
5 #include <memory>
6 
7 #include "ityr/common/util.hpp"
10 #include "ityr/common/topology.hpp"
12 #include "ityr/common/options.hpp"
13 
14 #if __has_include(<mlog/mlog.h>)
15 #include <mlog/mlog.h>
16 #else
17 #define MLOG_BEGIN(...) ((void*)0)
18 #define MLOG_END(...)
19 #define MLOG_READ_ARG(buf, type) ((type){})
21 using mlog_data_t = void*;
22 inline void mlog_die() {
23  die("Trace profiler cannot be used because MassiveLogger is not loaded.");
24 }
25 inline void mlog_init(void**, int, size_t) { mlog_die(); }
26 inline void mlog_flush_all(void**, FILE*) { mlog_die(); }
27 inline void mlog_clear_all(void**) { mlog_die(); }
28 }
29 #endif
30 
31 namespace ityr::common::profiler {
32 
33 struct mode_disabled {
34  using interval_begin_data = void*;
35 };
36 
37 struct mode_stats {
39 };
40 
41 struct mode_trace {
42  using interval_begin_data = void*;
43 };
44 
46  bool enabled;
51 };
52 
53 class event {
54 public:
56  : state_(state) {}
57 
58  virtual ~event() = default;
59 
62 
64  return t;
65  }
66 
68  do_acc(t - ibd);
69  }
70 
72  auto ibd = MLOG_BEGIN(&state_.trace_md, 0, t);
73  return ibd;
74  }
75 
77  MLOG_END(&state_.trace_md, 0, ibd, trace_decoder_base, this, t);
78  }
79 
80  static void* trace_decoder_base(FILE* stream, int, int, void* buf0, void* buf1) {
81  event* e = MLOG_READ_ARG(&buf1, event*);
82  return e->trace_decoder(stream, buf0, buf1);
83  }
84 
85  virtual void* trace_decoder(FILE* stream, void* buf0, void* buf1) {
86  auto t0 = MLOG_READ_ARG(&buf0, wallclock::wallclock_t);
87  auto t1 = MLOG_READ_ARG(&buf1, wallclock::wallclock_t);
88 
89  do_acc(t1 - t0);
90 
91  auto rank = topology::my_rank();
92  fprintf(stream, "%d,%lu,%d,%lu,%s\n", rank, t0, rank, t1, str().c_str());
93  return buf1;
94  }
95 
96  virtual std::string str() const { return "UNKNOWN"; }
97 
98  virtual void clear() {
99  sum_time_ = 0;
100  max_time_ = 0;
101  count_ = 0;
102  }
103 
104  virtual void print_stats() {
105  auto t_total = state_.t_end - state_.t_begin;
106  if (state_.output_per_rank) {
107  using msg_t = std::tuple<wallclock::wallclock_t, wallclock::wallclock_t, wallclock::wallclock_t, counter_t>;
108 
109  if (topology::my_rank() == 0) {
111  for (topology::rank_t i = 1; i < topology::n_ranks(); i++) {
113 
114  auto [s, m, t, c] = mpi_recv_value<msg_t>(i, 0, topology::mpicomm());
115  print_stats_per_rank(i, s, m, t, c);
116  }
117  } else {
118  for (topology::rank_t i = 1; i < topology::n_ranks(); i++) {
119  // Insert a barrier to each iteration so that too many send requests do not go to rank 0 at the same time
121 
122  if (i == topology::my_rank()) {
123  mpi_send_value(std::make_tuple(sum_time_, max_time_, t_total, count_), 0, 0, topology::mpicomm());
124  }
125  }
126  }
127  } else {
128  auto sum_time_all = mpi_reduce_value(sum_time_, 0, topology::mpicomm());
129  auto max_time_all = mpi_reduce_value(max_time_, 0, topology::mpicomm(), MPI_MAX);
130  auto t_total_all = mpi_reduce_value(t_total, 0, topology::mpicomm());
131  auto count_all = mpi_reduce_value(count_, 0, topology::mpicomm());
132  if (topology::my_rank() == 0) {
133  print_stats_sum(sum_time_all, max_time_all, t_total_all, count_all);
134  }
135  }
136  }
137 
138 protected:
140  sum_time_ += t;
142  count_++;
143  }
144 
145  using counter_t = uint64_t;
146 
148  wallclock::wallclock_t sum_time,
149  wallclock::wallclock_t max_time,
150  wallclock::wallclock_t t_total,
151  counter_t count) const {
152  printf(" %-22s (rank %3d) : %10.6f %% ( %15ld ns / %15ld ns ) count: %10ld ave: %8ld ns max: %8ld ns\n",
153  str().c_str(), rank,
154  (double)sum_time / t_total * 100, sum_time, t_total,
155  count, count == 0 ? 0 : (sum_time / count), max_time);
156  }
157 
159  wallclock::wallclock_t max_time,
160  wallclock::wallclock_t t_total,
161  counter_t count) const {
162  printf(" %-22s : %10.6f %% ( %15ld ns / %15ld ns ) count: %10ld ave: %8ld ns max: %8ld ns\n",
163  str().c_str(),
164  (double)sum_time / t_total * 100, sum_time, t_total,
165  count, count == 0 ? 0 : (sum_time / count), max_time);
166  }
167 
172 };
173 
174 template <typename Mode>
175 class profiler {
176 public:
179  if constexpr (std::is_same_v<Mode, mode_trace>) {
180  mlog_init(&state_.trace_md, 1, 1 << 20);
181  trace_out_file_ = {std::fopen(trace_out_filename().c_str(), "w"), &std::fclose};
182  }
183  }
184 
185  void add(event* e) {
186  events_.push_back(e);
187  }
188 
189  void begin() {
190  state_.enabled = true;
191  state_.t_begin = wallclock::gettime_ns();
192  }
193 
194  void end() {
195  state_.enabled = false;
196  state_.t_end = wallclock::gettime_ns();
197  if (last_phase_ != nullptr) {
198  last_phase_->interval_end(Mode{}, state_.t_end, phase_ibd_);
199  last_phase_ = nullptr;
200  }
201  }
202 
203  void flush() {
204  if constexpr (std::is_same_v<Mode, mode_trace>) {
205  mlog_flush_all(&state_.trace_md, trace_out_file_.get());
206  }
207  for (auto&& e : events_) {
208  e->print_stats();
209  }
210  if (topology::my_rank() == 0) {
211  printf("\n");
212  fflush(stdout);
213  }
214  for (auto&& e : events_) {
215  e->clear();
216  }
217  if constexpr (std::is_same_v<Mode, mode_trace>) {
218  mlog_clear_all(&state_.trace_md);
219  }
220  }
221 
222  profiler_state& get_state() { return state_; }
223 
224  template <typename PhaseFrom, typename PhaseTo>
225  void switch_phase() {
226  if (state_.enabled) {
227  auto t = wallclock::gettime_ns();
228  auto& phase_from = singleton<PhaseFrom>::get();
229  auto& phase_to = singleton<PhaseTo>::get();
230 
231  if (last_phase_ == nullptr) {
232  phase_ibd_ = phase_from.interval_begin(Mode{}, state_.t_begin);
233  } else {
234  ITYR_CHECK(last_phase_ == &phase_from);
235  }
236 
237  phase_from.interval_end(Mode{}, t, phase_ibd_);
238  phase_ibd_ = phase_to.interval_begin(Mode{}, t);
239  last_phase_ = &phase_to;
240  }
241  }
242 
243 private:
244  static std::string trace_out_filename() {
245  std::stringstream ss;
246  ss << "ityr_log_" << topology::my_rank() << ".ignore";
247  return ss.str();
248  }
249 
250  std::vector<event*> events_;
251  profiler_state state_;
252  event* last_phase_ = nullptr;
253  typename Mode::interval_begin_data phase_ibd_;
254  std::unique_ptr<FILE, int (*)(FILE*)> trace_out_file_ = {nullptr, nullptr};
255 };
256 
258 
260 
262 
263 template <typename Event>
265 public:
266  template <typename... Args>
268  : init_(instance::get().get_state(), std::forward<Args>(args)...) {
269  if (init_.should_finalize()) {
271  }
272  }
273 private:
275 };
276 
277 template <typename Event, typename... Args>
279  if constexpr (!std::is_same_v<mode, mode_disabled>) {
280  auto t = wallclock::gettime_ns();
281  return singleton<Event>::get().interval_begin(mode{}, t, std::forward<Args>(args)...);
282  } else {
283  return {};
284  }
285 }
286 
287 template <typename Event, typename... Args>
288 inline void interval_end(interval_begin_data ibd, Args&&... args) {
289  if constexpr (!std::is_same_v<mode, mode_disabled>) {
290  auto& state = instance::get().get_state();
291  if (state.enabled) {
292  auto t = wallclock::gettime_ns();
293  singleton<Event>::get().interval_end(mode{}, t, ibd, std::forward<Args>(args)...);
294  }
295  }
296 }
297 
298 template <typename Event>
300 public:
301  template <typename... Args>
302  interval_scope(Args&&... args) {
303  ibd_ = interval_begin<Event>(std::forward<Args>(args)...);
304  }
306  interval_end<Event>(ibd_);
307  }
308 
309  interval_scope(const interval_scope&) = delete;
311 
314 
315 private:
316  interval_begin_data ibd_;
317 };
318 
319 #define ITYR_PROFILER_RECORD(event, ...) \
320  ityr::common::profiler::interval_scope<event> ITYR_ANON_VAR {__VA_ARGS__};
321 
322 template <typename PhaseFrom, typename PhaseTo>
323 inline void switch_phase() {
324  if constexpr (!std::is_same_v<mode, mode_disabled>) {
325  instance::get().switch_phase<PhaseFrom, PhaseTo>();
326  }
327 }
328 
329 inline void begin() {
331  instance::get().begin();
332 }
333 
334 inline void end() {
335  instance::get().end();
336 }
337 
338 inline void flush() {
339  if constexpr (!std::is_same_v<mode, mode_disabled>) {
340  instance::get().flush();
341  }
342 }
343 
344 }
static value_type value()
Definition: options.hpp:62
Definition: profiler.hpp:264
event_initializer(Args &&... args)
Definition: profiler.hpp:267
Definition: profiler.hpp:53
mode_stats::interval_begin_data interval_begin(mode_stats, wallclock::wallclock_t t)
Definition: profiler.hpp:63
event(profiler_state &state)
Definition: profiler.hpp:55
virtual void print_stats_sum(wallclock::wallclock_t sum_time, wallclock::wallclock_t max_time, wallclock::wallclock_t t_total, counter_t count) const
Definition: profiler.hpp:158
virtual std::string str() const
Definition: profiler.hpp:96
void interval_end(mode_disabled, wallclock::wallclock_t, mode_disabled::interval_begin_data)
Definition: profiler.hpp:61
virtual void clear()
Definition: profiler.hpp:98
profiler_state & state_
Definition: profiler.hpp:168
mode_trace::interval_begin_data interval_begin(mode_trace, wallclock::wallclock_t t)
Definition: profiler.hpp:71
virtual void print_stats()
Definition: profiler.hpp:104
void interval_end(mode_trace, wallclock::wallclock_t t, mode_trace::interval_begin_data ibd)
Definition: profiler.hpp:76
void interval_end(mode_stats, wallclock::wallclock_t t, mode_stats::interval_begin_data ibd)
Definition: profiler.hpp:67
uint64_t counter_t
Definition: profiler.hpp:145
mode_stats::interval_begin_data interval_begin(mode_disabled, wallclock::wallclock_t)
Definition: profiler.hpp:60
static void * trace_decoder_base(FILE *stream, int, int, void *buf0, void *buf1)
Definition: profiler.hpp:80
virtual void print_stats_per_rank(topology::rank_t rank, wallclock::wallclock_t sum_time, wallclock::wallclock_t max_time, wallclock::wallclock_t t_total, counter_t count) const
Definition: profiler.hpp:147
wallclock::wallclock_t sum_time_
Definition: profiler.hpp:169
counter_t count_
Definition: profiler.hpp:171
void do_acc(wallclock::wallclock_t t)
Definition: profiler.hpp:139
wallclock::wallclock_t max_time_
Definition: profiler.hpp:170
virtual void * trace_decoder(FILE *stream, void *buf0, void *buf1)
Definition: profiler.hpp:85
Definition: profiler.hpp:299
interval_scope & operator=(interval_scope &&)=delete
interval_scope & operator=(const interval_scope &)=delete
interval_scope(interval_scope &&)=delete
~interval_scope()
Definition: profiler.hpp:305
interval_scope(const interval_scope &)=delete
interval_scope(Args &&... args)
Definition: profiler.hpp:302
Definition: profiler.hpp:175
void begin()
Definition: profiler.hpp:189
profiler()
Definition: profiler.hpp:177
profiler_state & get_state()
Definition: profiler.hpp:222
void add(event *e)
Definition: profiler.hpp:185
void flush()
Definition: profiler.hpp:203
void end()
Definition: profiler.hpp:194
void switch_phase()
Definition: profiler.hpp:225
Definition: util.hpp:207
Definition: util.hpp:176
static auto & get()
Definition: util.hpp:180
#define ITYR_PROFILER_MODE
#define ITYR_CONCAT(x, y)
Definition: util.hpp:20
#define ITYR_CHECK(cond)
Definition: util.hpp:48
Definition: profiler.hpp:20
void switch_phase()
Definition: profiler.hpp:323
mode::interval_begin_data interval_begin_data
Definition: profiler.hpp:261
void end()
Definition: profiler.hpp:334
ITYR_CONCAT(mode_, ITYR_PROFILER_MODE) mode
Definition: profiler.hpp:257
void * mlog_data_t
Definition: profiler.hpp:21
void mlog_die()
Definition: profiler.hpp:22
void interval_end(interval_begin_data ibd, Args &&... args)
Definition: profiler.hpp:288
void begin()
Definition: profiler.hpp:329
void mlog_init(void **, int, size_t)
Definition: profiler.hpp:25
interval_begin_data interval_begin(Args &&... args)
Definition: profiler.hpp:278
void flush()
Definition: profiler.hpp:338
void mlog_flush_all(void **, FILE *)
Definition: profiler.hpp:26
void mlog_clear_all(void **)
Definition: profiler.hpp:27
rank_t n_ranks()
Definition: topology.hpp:208
int rank_t
Definition: topology.hpp:12
MPI_Comm mpicomm()
Definition: topology.hpp:206
rank_t my_rank()
Definition: topology.hpp:207
uint64_t wallclock_t
Definition: wallclock.hpp:13
wallclock_t gettime_ns()
Definition: wallclock.hpp:88
va_list args
Definition: util.hpp:76
fprintf(stderr, "\x1b[31m%s\x1b[39m\n", msg)
fflush(stderr)
T mpi_reduce_value(const T &value, int root_rank, MPI_Comm comm, MPI_Op op=MPI_SUM)
Definition: mpi_util.hpp:170
void mpi_barrier(MPI_Comm comm)
Definition: mpi_util.hpp:42
void mpi_send_value(const T &value, int target_rank, int tag, MPI_Comm comm)
Definition: mpi_util.hpp:84
void get(global_ptr< ConstT > from_ptr, T *to_ptr, std::size_t count)
Definition: ori.hpp:80
monoid< T, max_functor<>, lowest< T > > max
Definition: reducer.hpp:104
#define MLOG_BEGIN(...)
Definition: profiler.hpp:17
#define MLOG_READ_ARG(buf, type)
Definition: profiler.hpp:19
#define MLOG_END(...)
Definition: profiler.hpp:18
Definition: profiler.hpp:33
void * interval_begin_data
Definition: profiler.hpp:34
Definition: profiler.hpp:37
wallclock::wallclock_t interval_begin_data
Definition: profiler.hpp:38
Definition: profiler.hpp:41
void * interval_begin_data
Definition: profiler.hpp:42
Definition: profiler.hpp:45
bool output_per_rank
Definition: profiler.hpp:49
bool enabled
Definition: profiler.hpp:46
wallclock::wallclock_t t_end
Definition: profiler.hpp:48
wallclock::wallclock_t t_begin
Definition: profiler.hpp:47
mlog_data_t trace_md
Definition: profiler.hpp:50