Itoyori  v0.0.1
virtual_mem.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <sys/mman.h>
4 #include <unistd.h>
5 #include <cstdio>
6 #include <cstdlib>
7 #include <cstdint>
8 
9 #include "ityr/common/util.hpp"
10 #include "ityr/common/mpi_util.hpp"
11 #include "ityr/common/topology.hpp"
12 
13 namespace ityr::common {
14 
15 class mmap_noreplace_exception : public std::exception {};
16 
17 inline void munmap(void* addr, std::size_t size);
18 
19 inline void* mmap_no_physical_mem(void* addr,
20  std::size_t size,
21  bool replace = false,
22  std::size_t alignment = alignof(max_align_t));
23 
24 class virtual_mem {
25 public:
27  virtual_mem(std::size_t size, std::size_t alignment = alignof(max_align_t))
28  : addr_(mmap_no_physical_mem(nullptr, size, false, alignment)), size_(size) {}
29  virtual_mem(void* addr, std::size_t size, std::size_t alignment = alignof(max_align_t))
30  : addr_(mmap_no_physical_mem(addr, size, false, alignment)), size_(size) {}
31 
32  ~virtual_mem() { destroy(); }
33 
34  virtual_mem(const virtual_mem&) = delete;
35  virtual_mem& operator=(const virtual_mem&) = delete;
36 
37  virtual_mem(virtual_mem&& vm) : addr_(vm.addr_), size_(vm.size_) { vm.addr_ = nullptr; }
39  destroy();
40  addr_ = vm.addr();
41  size_ = vm.size();
42  vm.addr_ = nullptr;
43  return *this;
44  }
45 
46  void* addr() const { return addr_; }
47  std::size_t size() const { return size_; }
48 
49  void shrink(std::size_t to_size) {
50  ITYR_CHECK(addr_);
51  ITYR_CHECK(to_size <= size_);
52 
53  std::size_t pagesize = get_page_size();
54  std::size_t curr_page_end = round_up_pow2(size_, pagesize);
55  std::size_t next_page_end = round_up_pow2(to_size, pagesize);
56  if (curr_page_end > next_page_end) {
57  munmap(reinterpret_cast<std::byte*>(addr_) + next_page_end,
58  curr_page_end - next_page_end);
59  }
60 
61  size_ = to_size;
62  }
63 
64 private:
65  void destroy() {
66  if (addr_) {
67  munmap(addr_, size_);
68  }
69  }
70 
71  void* addr_ = nullptr;
72  std::size_t size_;
73 };
74 
75 ITYR_TEST_CASE("[ityr::common::virtual_mem] allocate virtual memory") {
76  std::size_t pagesize = get_page_size();
77  void* addr = nullptr;
78  {
79  virtual_mem vm(32 * pagesize);
80  ITYR_CHECK(vm.addr() != nullptr);
81  addr = vm.addr();
82  }
83  {
84  virtual_mem vm_longlived;
85  {
86  // check if the same virtual address can be mapped after the previous mapping has been freed
87  virtual_mem vm(addr, 16 * pagesize);
88  ITYR_CHECK(vm.addr() == addr);
89  // mappings for the same virtual address cannot be replaced
90  ITYR_CHECK_THROWS_AS(virtual_mem vm2(addr, pagesize), mmap_noreplace_exception);
91  vm_longlived = std::move(vm);
92  }
93  // The VM mapping remains vaild even after it is moved to objects with longer lifetimes
94  ITYR_CHECK_THROWS_AS(virtual_mem vm3(addr, pagesize), mmap_noreplace_exception);
95  }
96  // The VM mapping is correctly freed after exiting the block
97  virtual_mem vm4(addr, pagesize);
98 }
99 
100 inline void munmap(void* addr, std::size_t size) {
101  ITYR_CHECK(size > 0);
102  ITYR_CHECK_MESSAGE(reinterpret_cast<uintptr_t>(addr) % get_page_size() == 0,
103  "The address passed to munmap() must be page-aligned");
104  if (::munmap(addr, size) == -1) {
105  perror("munmap");
106  die("[ityr::common::virtual_mem] munmap(%p, %lu) failed", addr, size);
107  }
108 }
109 
110 inline void* mmap_no_physical_mem(void* addr,
111  std::size_t size,
112  bool replace,
113  std::size_t alignment) {
114  int flags = MAP_PRIVATE | MAP_ANONYMOUS;
115 
116  std::size_t alloc_size;
117  if (addr == nullptr) {
118  alloc_size = size + alignment;
119  } else {
120  ITYR_CHECK(reinterpret_cast<uintptr_t>(addr) % alignment == 0);
121  alloc_size = size;
122  if (!replace) {
123  flags |= MAP_FIXED_NOREPLACE;
124  } else {
125  flags |= MAP_FIXED;
126  }
127  }
128 
129  void* allocated_p = mmap(addr, alloc_size, PROT_NONE, flags, -1, 0);
130  if (allocated_p == MAP_FAILED) {
131  if (errno == EEXIST) {
132  // MAP_FIXED_NOREPLACE error
133  throw mmap_noreplace_exception{};
134  } else {
135  perror("mmap");
136  die("[ityr::common::virtual_mem] mmap(%p, %lu, ...) failed", addr, alloc_size);
137  }
138  }
139 
140  if (addr == nullptr) {
141  std::size_t pagesize = get_page_size();
142 
143  uintptr_t allocated_addr = reinterpret_cast<uintptr_t>(allocated_p);
144  ITYR_CHECK(allocated_addr % pagesize == 0);
145 
146  uintptr_t ret_addr = round_up_pow2(allocated_addr, alignment);
147  ITYR_CHECK(ret_addr % pagesize == 0);
148 
149  // truncate the head end
150  ITYR_CHECK(ret_addr >= allocated_addr);
151  if (ret_addr - allocated_addr > 0) {
152  munmap(allocated_p, ret_addr - allocated_addr);
153  }
154 
155  // truncate the tail end
156  uintptr_t allocated_addr_end = allocated_addr + alloc_size;
157  uintptr_t ret_page_end = round_up_pow2(ret_addr + size, pagesize);
158  ITYR_CHECK(allocated_addr_end >= ret_page_end);
159  if (allocated_addr_end - ret_page_end > 0) {
160  munmap(reinterpret_cast<std::byte*>(ret_page_end), allocated_addr_end - ret_page_end);
161  }
162 
163  return reinterpret_cast<std::byte*>(ret_addr);
164  } else {
165  ITYR_CHECK(addr == allocated_p);
166  return allocated_p;
167  }
168 }
169 
171  std::size_t alignment = alignof(max_align_t)) {
172  ITYR_CHECK(size > 0);
173 
174  uintptr_t vm_addr = 0;
175  virtual_mem vm;
176 
177  std::vector<virtual_mem> prev_vms;
178  int max_trial = 100;
179  std::size_t alloc_size = round_up_pow2(size, get_page_size());
180  topology::rank_t leader_rank = 0;
181 
182  std::size_t alloc_size_max = std::max(alloc_size, std::size_t(1) << 40);
183 
184  // Repeat until the same virtual memory address is allocated
185  // TODO: smarter allocation using `pmap` result?
186  for (int n_trial = 0; n_trial <= max_trial; n_trial++) {
187  if (topology::my_rank() == leader_rank) {
188  vm = virtual_mem(alloc_size, alignment);
189  vm_addr = reinterpret_cast<uintptr_t>(vm.addr());
190  }
191 
192  vm_addr = mpi_bcast_value(vm_addr, leader_rank, topology::mpicomm());
193 
194  topology::rank_t failed_rank = -1;
195  if (topology::my_rank() != leader_rank) {
196  // unmap overlapping virtual addresses that were previously allocated
197  for (auto&& prev_vm : prev_vms) {
198  if (reinterpret_cast<uint64_t>(prev_vm.addr()) < vm_addr + alloc_size &&
199  vm_addr < reinterpret_cast<uint64_t>(prev_vm.addr()) + prev_vm.size()) {
200  // call destructor
201  prev_vm = virtual_mem();
202  }
203  }
204 
205  try {
206  vm = virtual_mem(reinterpret_cast<void*>(vm_addr), alloc_size, alignment);
207  } catch (mmap_noreplace_exception& e) {
208  failed_rank = topology::my_rank();
209  }
210  }
211 
212  // Among the failed processes, the process with the maximum rank will become the next leader
213  auto failed_rank_max = mpi_allreduce_value(failed_rank, topology::mpicomm(), MPI_MAX);
214 
215  if (failed_rank_max == -1) {
216  // success; prev_vms are automatically freed
217  vm.shrink(size);
218  return vm;
219  }
220 
221  if (failed_rank == -1) {
222  // Defer the deallocation of previous virtual addresses to prevent
223  // the same address from being allocated in the next turn
224  prev_vms.push_back(std::move(vm));
225  }
226 
227  leader_rank = failed_rank_max;
228  alloc_size = std::min(alloc_size_max, 2 * alloc_size);;
229  }
230 
231  die("Reservation of virtual memory address failed (size=%ld, max_trial=%d)", size, max_trial);
232 }
233 
234 ITYR_TEST_CASE("[ityr::common::virtual_mem] allocate the same virtual memory across processes") {
235  runtime_options opts;
236  singleton_initializer<topology::instance> topo;
237 
238  std::size_t pagesize = get_page_size();
239  virtual_mem vm = reserve_same_vm_coll(pagesize * 32);
240  ITYR_CHECK(vm.addr() != nullptr);
241 
242  uintptr_t vm_addr = reinterpret_cast<uintptr_t>(vm.addr());
243  std::size_t vm_size = vm.size();
244 
245  uintptr_t vm_addr_root = mpi_bcast_value(vm_addr, 0, topology::mpicomm());
246  std::size_t vm_size_root = mpi_bcast_value(vm_size, 0, topology::mpicomm());
247 
248  ITYR_CHECK(vm_addr == vm_addr_root);
249  ITYR_CHECK(vm_size == vm_size_root);
250  ITYR_CHECK(vm_size == pagesize * 32);
251 }
252 
253 }
Definition: virtual_mem.hpp:15
Definition: virtual_mem.hpp:24
virtual_mem(std::size_t size, std::size_t alignment=alignof(max_align_t))
Definition: virtual_mem.hpp:27
virtual_mem & operator=(const virtual_mem &)=delete
~virtual_mem()
Definition: virtual_mem.hpp:32
void * addr() const
Definition: virtual_mem.hpp:46
virtual_mem(virtual_mem &&vm)
Definition: virtual_mem.hpp:37
virtual_mem(void *addr, std::size_t size, std::size_t alignment=alignof(max_align_t))
Definition: virtual_mem.hpp:29
std::size_t size() const
Definition: virtual_mem.hpp:47
virtual_mem(const virtual_mem &)=delete
void shrink(std::size_t to_size)
Definition: virtual_mem.hpp:49
virtual_mem()
Definition: virtual_mem.hpp:26
virtual_mem & operator=(virtual_mem &&vm)
Definition: virtual_mem.hpp:38
#define ITYR_CHECK_THROWS_AS(exp, exception)
Definition: util.hpp:51
#define ITYR_CHECK_MESSAGE(cond,...)
Definition: util.hpp:49
#define ITYR_CHECK(cond)
Definition: util.hpp:48
int rank_t
Definition: topology.hpp:12
MPI_Comm mpicomm()
Definition: topology.hpp:206
rank_t my_rank()
Definition: topology.hpp:207
Definition: allocator.hpp:16
T round_up_pow2(T x, T alignment)
Definition: util.hpp:142
T mpi_bcast_value(const T &value, int root_rank, MPI_Comm comm)
Definition: mpi_util.hpp:145
T mpi_allreduce_value(const T &value, MPI_Comm comm, MPI_Op op=MPI_SUM)
Definition: mpi_util.hpp:194
void * mmap_no_physical_mem(void *addr, std::size_t size, bool replace=false, std::size_t alignment=alignof(max_align_t))
Definition: virtual_mem.hpp:110
std::size_t get_page_size()
Definition: util.hpp:170
void munmap(void *addr, std::size_t size)
Definition: virtual_mem.hpp:100
virtual_mem reserve_same_vm_coll(std::size_t size, std::size_t alignment=alignof(max_align_t))
Definition: virtual_mem.hpp:170
constexpr auto size(const span< T > &s) noexcept
Definition: span.hpp:61
monoid< T, min_functor<>, highest< T > > min
Definition: reducer.hpp:101
monoid< T, max_functor<>, lowest< T > > max
Definition: reducer.hpp:104
ForwardIteratorD move(const ExecutionPolicy &policy, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIteratorD first_d)
Move a range to another.
Definition: parallel_loop.hpp:934