Itoyori  v0.0.1
wallclock.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <limits>
4 
5 #include "ityr/common/util.hpp"
9 #include "ityr/common/logger.hpp"
10 
12 
13 using wallclock_t = uint64_t;
14 
15 class global_clock {
16 public:
18  : n_sync_round_trips_(global_clock_sync_round_trips_option::value()) {
19  sync();
20  }
21 
22  void sync() {
25  do_sync();
28  verbose("Global clock synchronized (offset = %ld ns); took %ld ns", offset_, t1 - t0);
29  }
30 
32  return clock_gettime_ns() - offset_;
33  }
34 
35 private:
36  void do_sync() {
37  // Only the leader of each node involves in clock synchronization
38  if (topology::intra_my_rank() == 0) {
39  int64_t* offsets = new int64_t[topology::inter_n_ranks()]();
40 
41  // uses the reference clock of the node of rank 0
42  if (topology::inter_my_rank() == 0) {
43  // takes O(n) time, where n = # of nodes
44  for (int i = 1; i < topology::inter_n_ranks(); i++) {
45  uint64_t min_gap = std::numeric_limits<uint64_t>::max();
46  for (int j = 0; j < n_sync_round_trips_; j++) {
47  uint64_t t0 = clock_gettime_ns();
49  uint64_t t1 = mpi_recv_value<uint64_t>(i, j, topology::inter_mpicomm());
50  uint64_t t2 = clock_gettime_ns();
51 
52  // adopt the fastest communitation
53  if (t2 - t0 < min_gap) {
54  min_gap = t2 - t0;
55  offsets[i] = t1 - static_cast<int64_t>((t0 + t2) / 2);
56  }
57  }
58  }
59 
60  // Adjust the offset to begin with t=0
61  int64_t begin_time = clock_gettime_ns();
62  for (int i = 0; i < topology::inter_n_ranks(); i++) {
63  offsets[i] += begin_time;
64  }
65  } else {
66  for (int j = 0; j < n_sync_round_trips_; j++) {
67  mpi_recv_value<uint64_t>(0, j, topology::inter_mpicomm());
68  uint64_t t1 = clock_gettime_ns();
70  }
71  }
72 
73  offset_ = mpi_scatter_value(offsets, 0, topology::inter_mpicomm());
74 
75  delete[] offsets;
76  }
77 
78  // Share the offset within the node
79  offset_ = mpi_bcast_value(offset_, 0, topology::intra_mpicomm());
80  }
81 
82  int n_sync_round_trips_;
83  int64_t offset_;
84 };
85 
87 
88 inline wallclock_t gettime_ns() { return instance::get().gettime_ns(); }
89 
90 }
Definition: util.hpp:176
static auto & get()
Definition: util.hpp:180
Definition: wallclock.hpp:15
void sync()
Definition: wallclock.hpp:22
global_clock()
Definition: wallclock.hpp:17
wallclock_t gettime_ns() const
Definition: wallclock.hpp:31
rank_t inter_my_rank()
Definition: topology.hpp:215
MPI_Comm mpicomm()
Definition: topology.hpp:206
rank_t inter_n_ranks()
Definition: topology.hpp:216
rank_t intra_my_rank()
Definition: topology.hpp:211
MPI_Comm inter_mpicomm()
Definition: topology.hpp:214
MPI_Comm intra_mpicomm()
Definition: topology.hpp:210
Definition: wallclock.hpp:11
uint64_t wallclock_t
Definition: wallclock.hpp:13
wallclock_t gettime_ns()
Definition: wallclock.hpp:88
T mpi_bcast_value(const T &value, int root_rank, MPI_Comm comm)
Definition: mpi_util.hpp:145
T mpi_scatter_value(const T *sendbuf, int root_rank, MPI_Comm comm)
Definition: mpi_util.hpp:242
uint64_t clock_gettime_ns()
Definition: util.hpp:65
void mpi_barrier(MPI_Comm comm)
Definition: mpi_util.hpp:42
void mpi_send_value(const T &value, int target_rank, int tag, MPI_Comm comm)
Definition: mpi_util.hpp:84
void verbose(const char *fmt,...)
Definition: logger.hpp:11
monoid< T, max_functor<>, lowest< T > > max
Definition: reducer.hpp:104