Itoyori  v0.0.1
release_manager.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include "ityr/common/util.hpp"
7 #include "ityr/ori/util.hpp"
8 #include "ityr/ori/options.hpp"
10 
11 namespace ityr::ori {
12 
14 public:
16  : win_(common::topology::mpicomm(), 1),
17  remote_epochs_(common::topology::n_ranks(), 1),
18  check_interval_(lazy_release_check_interval_option::value()),
19  make_mpi_progress_(lazy_release_make_mpi_progress_option::value()) {}
20 
21  using epoch_t = uint64_t;
22 
23  struct release_handler {
26  };
27 
28  MPI_Win win() const { return win_.win(); }
29 
30  epoch_t current_epoch() const { return data().current_epoch; }
31 
32  void increment_epoch() {
33  data().current_epoch++;
34  }
35 
37  return {common::topology::my_rank(), current_epoch() + 1};
38  }
39 
41  return {0, 0};
42  }
43 
44  void ensure_released(const release_handler& rh) {
45  if (release_needed(rh)) {
47 
48  bool request_done = false;
49  while (remote_epochs_[rh.target_rank] < rh.required_epoch) {
50  if (!request_done) {
51  request_release(rh.target_rank, rh.required_epoch);
52  request_done = true;
53  }
54  usleep(check_interval_);
55  if (make_mpi_progress_) {
57  }
58  remote_epochs_[rh.target_rank] = get_remote_epoch(rh.target_rank);
59  }
60  }
61  }
62 
63  bool release_requested() const {
64  return data().requested_epoch > current_epoch();
65  }
66 
67 private:
68  struct rma_data {
70  epoch_t requested_epoch = 1;
71  };
72 
73  const rma_data& data() const { return *win_.baseptr(); }
74  rma_data& data() { return *win_.baseptr(); }
75 
76  bool release_needed(const release_handler& rh) const {
77  return rh.target_rank != common::topology::my_rank() && rh.required_epoch > 0;
78  }
79 
80  epoch_t get_remote_epoch(common::topology::rank_t target_rank) const {
81  return common::mpi_get_value<epoch_t>(target_rank, offsetof(rma_data, current_epoch), win());
82  }
83 
84  void request_release(common::topology::rank_t target_rank,
85  epoch_t requested_epoch) const {
86  // FIXME: As MPI_Fetch_and_op + MPI_MAX seems not offloaded to RDMA NICs, currently
87  // MPI_Compare_and_swap is used instead.
88  epoch_t remote_epoch = remote_epochs_[target_rank];
89  while (remote_epoch < requested_epoch) {
90  epoch_t result =
91  common::mpi_atomic_cas_value(requested_epoch, remote_epoch, target_rank,
92  offsetof(rma_data, requested_epoch), win());
93  if (result == remote_epoch) {
94  break; // success
95  } else {
96  remote_epoch = result;
97  }
98  }
99  }
100 
101  common::mpi_win_manager<rma_data> win_;
102  std::vector<epoch_t> remote_epochs_;
103  int check_interval_;
104  bool make_mpi_progress_;
105 };
106 
107 }
T * baseptr() const
Definition: mpi_rma.hpp:410
MPI_Win win() const
Definition: mpi_rma.hpp:409
Definition: release_manager.hpp:13
release_handler get_dummy_handler() const
Definition: release_manager.hpp:40
release_manager()
Definition: release_manager.hpp:15
epoch_t current_epoch() const
Definition: release_manager.hpp:30
void increment_epoch()
Definition: release_manager.hpp:32
MPI_Win win() const
Definition: release_manager.hpp:28
bool release_requested() const
Definition: release_manager.hpp:63
release_handler get_release_handler() const
Definition: release_manager.hpp:36
uint64_t epoch_t
Definition: release_manager.hpp:21
void ensure_released(const release_handler &rh)
Definition: release_manager.hpp:44
int rank_t
Definition: topology.hpp:12
MPI_Comm mpicomm()
Definition: topology.hpp:206
rank_t my_rank()
Definition: topology.hpp:207
void mpi_make_progress()
Definition: mpi_util.hpp:260
T mpi_atomic_cas_value(const T &value, const T &compare, int target_rank, std::size_t target_disp, MPI_Win win)
Definition: mpi_rma.hpp:222
Definition: block_region_set.hpp:9
core::instance::instance_type::release_handler release_handler
Definition: ori.hpp:204
rank_t n_ranks()
Return the total number of processes.
Definition: ityr.hpp:107
#define ITYR_PROFILER_RECORD(event,...)
Definition: profiler.hpp:319
Definition: prof_events.hpp:51
Definition: release_manager.hpp:23
common::topology::rank_t target_rank
Definition: release_manager.hpp:24
epoch_t required_epoch
Definition: release_manager.hpp:25