Itoyori  v0.0.1
cache_manager.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <cstring>
4 #include <algorithm>
5 
6 #include "ityr/common/util.hpp"
9 #include "ityr/common/logger.hpp"
10 #include "ityr/common/rma.hpp"
13 #include "ityr/ori/util.hpp"
14 #include "ityr/ori/options.hpp"
15 #include "ityr/ori/prof_events.hpp"
18 #include "ityr/ori/tlb.hpp"
21 
22 namespace ityr::ori {
23 
24 template <block_size_t BlockSize>
26  static constexpr bool enable_write_through = ITYR_ORI_ENABLE_WRITE_THROUGH;
27  static constexpr bool enable_lazy_release = ITYR_ORI_ENABLE_LAZY_RELEASE;
28  static constexpr bool enable_vm_map = ITYR_ORI_ENABLE_VM_MAP;
29 
30 public:
31  cache_manager(std::size_t cache_size, std::size_t sub_block_size)
32  : cache_size_(cache_size),
33  sub_block_size_(sub_block_size),
34  vm_(cache_size_, BlockSize),
35  pm_(init_cache_pm()),
36  cs_(cache_size / BlockSize, cache_block(this)),
37  cache_win_(common::rma::create_win(reinterpret_cast<std::byte*>(vm_.addr()), vm_.size())),
38  cache_tlb_(nullptr, nullptr),
39  max_dirty_cache_blocks_(max_dirty_cache_size_option::value() / BlockSize),
40  cprof_(cs_.num_entries()) {
41  ITYR_CHECK(cache_size_ > 0);
42  ITYR_CHECK(common::is_pow2(cache_size_));
43  ITYR_CHECK(cache_size_ % BlockSize == 0);
44  ITYR_CHECK(common::is_pow2(sub_block_size_));
45  ITYR_CHECK(sub_block_size_ <= BlockSize);
46  }
47 
48  // return [entry_found, fetch_completed]
49  template <bool SkipFetch, bool IncrementRef>
50  std::pair<bool, bool> checkout_fast(std::byte* addr, std::size_t size) {
51  if constexpr (!cache_tlb::enabled) return {false, false};
52 
53  ITYR_CHECK(addr);
54  ITYR_CHECK(size > 0);
55 
56  std::byte* blk_addr = common::round_down_pow2(addr, BlockSize);
57  if (blk_addr + BlockSize < addr + size) {
58  // Fast path requires the requested region be within a single cache block.
59  // If not, set blk_addr as nullptr (invalid key), which causes the TLB search to fail.
60  // This can reduce a branch here (using CMOV etc.).
61  blk_addr = nullptr;
62  }
63 
64  auto cb_p = cache_tlb_.get(blk_addr);
65 
66  if (!cb_p) return {false, false};
67 
68  cache_block& cb = *cb_p;
69 
70  bool fetch_completed = true;
71 
72  block_region br = {addr - blk_addr, addr + size - blk_addr};
73 
74  if constexpr (SkipFetch) {
75  cprof_.record_writeonly(cb.entry_idx, br, cb.valid_regions);
76  cb.valid_regions.add(br);
77  } else {
78  if (fetch_begin(cb, br)) {
79  add_fetching_win(*cb.win);
80  fetch_completed = false;
81  }
82  }
83 
84  if constexpr (IncrementRef) {
85  ITYR_CHECK(cb.ref_count >= 0);
86  cb.ref_count++;
87  }
88 
89  return {true, fetch_completed};
90  }
91 
92  template <bool SkipFetch, bool IncrementRef>
93  bool checkout_blk(std::byte* blk_addr,
94  std::byte* req_addr_b,
95  std::byte* req_addr_e,
96  const common::rma::win& win,
98  std::size_t pm_offset) {
99  ITYR_CHECK(blk_addr <= req_addr_b);
100  ITYR_CHECK(blk_addr <= req_addr_e);
101  ITYR_CHECK(req_addr_b <= blk_addr + BlockSize);
102  ITYR_CHECK(req_addr_e <= blk_addr + BlockSize);
103  ITYR_CHECK(req_addr_b < req_addr_e);
104 
105  cache_block& cb = get_entry(blk_addr);
106 
107  bool checkout_completed = true;
108 
109  if (blk_addr != cb.mapped_addr) {
110  cb.addr = blk_addr;
111  cb.win = &win;
112  cb.owner = owner;
113  cb.pm_offset = pm_offset;
114  if constexpr (enable_vm_map) {
115  cache_blocks_to_map_.push_back(&cb);
116  checkout_completed = false;
117  } else {
118  cb.mapped_addr = blk_addr;
119  }
120  }
121 
122  block_region br = {req_addr_b - blk_addr, req_addr_e - blk_addr};
123 
124  if constexpr (SkipFetch) {
125  cprof_.record_writeonly(cb.entry_idx, br, cb.valid_regions);
126  cb.valid_regions.add(br);
127  } else {
128  if (fetch_begin(cb, br)) {
129  add_fetching_win(win);
130  checkout_completed = false;
131  }
132  }
133 
134  if constexpr (IncrementRef) {
135  ITYR_CHECK(cb.ref_count >= 0);
136  cb.ref_count++;
137  }
138 
139  cache_tlb_.add(blk_addr, &cb);
140 
141  return checkout_completed;
142  }
143 
145  // Overlap communication and memory remapping
146  if constexpr (enable_vm_map) {
147  if (!cache_blocks_to_map_.empty()) {
148  for (cache_block* cb : cache_blocks_to_map_) {
149  update_mapping(*cb);
150  }
151  cache_blocks_to_map_.clear();
152  }
153  }
154 
155  fetch_complete();
156  }
157 
158  template <bool RegisterDirty, bool DecrementRef>
159  bool checkin_fast(std::byte* addr, std::size_t size) {
160  if constexpr (!cache_tlb::enabled) return false;
161 
162  ITYR_CHECK(addr);
163  ITYR_CHECK(size > 0);
164 
165  std::byte* blk_addr = common::round_down_pow2(addr, BlockSize);
166  if (blk_addr + BlockSize < addr + size) {
167  // Fast path requires the requested region be within a single cache block.
168  // If not, set blk_addr as nullptr (invalid key), which causes the TLB search to fail.
169  // This can reduce a branch here (using CMOV etc.).
170  blk_addr = nullptr;
171  }
172 
173  auto cb_p = cache_tlb_.get(blk_addr);
174 
175  if (!cb_p) return false;
176 
177  cache_block& cb = *cb_p;
178 
179  if constexpr (RegisterDirty) {
180  block_region br = {addr - blk_addr, addr + size - blk_addr};
181  add_dirty_region(cb, br);
182  }
183 
184  if constexpr (DecrementRef) {
185  cb.ref_count--;
186  ITYR_CHECK(cb.ref_count >= 0);
187  }
188 
189  return true;
190  }
191 
192  template <bool RegisterDirty, bool DecrementRef>
193  void checkin_blk(std::byte* blk_addr,
194  std::byte* req_addr_b,
195  std::byte* req_addr_e) {
196  ITYR_CHECK(blk_addr <= req_addr_b);
197  ITYR_CHECK(blk_addr <= req_addr_e);
198  ITYR_CHECK(req_addr_b <= blk_addr + BlockSize);
199  ITYR_CHECK(req_addr_e <= blk_addr + BlockSize);
200  ITYR_CHECK(req_addr_b < req_addr_e);
201 
202  cache_block& cb = get_entry<false>(blk_addr);
203 
204  if constexpr (RegisterDirty) {
205  block_region br = {req_addr_b - blk_addr, req_addr_e - blk_addr};
206  add_dirty_region(cb, br);
207  }
208 
209  if constexpr (DecrementRef) {
210  cb.ref_count--;
211  ITYR_CHECK(cb.ref_count >= 0);
212  }
213  }
214 
215  void release() {
218  }
219 
220  using release_handler = std::conditional_t<enable_lazy_release, release_manager::release_handler, void*>;
221 
222  auto release_lazy() {
223  if constexpr (enable_lazy_release) {
224  if (has_dirty_cache_) {
225  return rm_.get_release_handler();
226  } else {
227  return rm_.get_dummy_handler();
228  }
229  } else {
230  release();
231  return release_handler{};
232  }
233  }
234 
235  void acquire() {
237 
238  // FIXME: no need to writeback dirty data here?
240  invalidate_all();
241  }
242 
243  template <typename ReleaseHandler>
244  void acquire(ReleaseHandler rh) {
246 
248  if constexpr (enable_lazy_release) {
249  rm_.ensure_released(rh);
250  }
251  invalidate_all();
252  }
253 
254  void set_readonly(void* addr, std::size_t size) {
255  readonly_regions_.add({reinterpret_cast<uintptr_t>(addr),
256  reinterpret_cast<uintptr_t>(addr) + size});
257  }
258 
259  void unset_readonly(void* addr, std::size_t size) {
260  readonly_regions_.remove({reinterpret_cast<uintptr_t>(addr),
261  reinterpret_cast<uintptr_t>(addr) + size});
262  }
263 
264  void poll() {
265  if constexpr (enable_lazy_release) {
266  if (rm_.release_requested()) {
268 
271  }
272  }
273  }
274 
276  writeback_begin();
277  writeback_complete();
278  ITYR_CHECK(!has_dirty_cache_);
279  }
280 
281  void ensure_evicted(void* addr) {
282  cs_.ensure_evicted(cache_key(addr));
283  }
284 
285  void clear_tlb() {
286  cache_tlb_.clear();
287  }
288 
289  void discard_dirty(std::byte* blk_addr,
290  std::byte* req_addr_b,
291  std::byte* req_addr_e) {
292  ITYR_CHECK(blk_addr <= req_addr_b);
293  ITYR_CHECK(blk_addr <= req_addr_e);
294  ITYR_CHECK(req_addr_b <= blk_addr + BlockSize);
295  ITYR_CHECK(req_addr_e <= blk_addr + BlockSize);
296  ITYR_CHECK(req_addr_b < req_addr_e);
297 
298  if (is_cached(blk_addr)) {
299  cache_block& cb = get_entry(blk_addr);
300 
301  if (cb.is_writing_back()) {
302  writeback_complete();
303  }
304 
305  block_region br = {req_addr_b - blk_addr, req_addr_e - blk_addr};
306  cb.dirty_regions.remove(br);
307  }
308  }
309 
310  void get_copy_blk(std::byte* blk_addr,
311  std::byte* req_addr_b,
312  std::byte* req_addr_e,
313  std::byte* to_addr) {
314  ITYR_CHECK(blk_addr <= req_addr_b);
315  ITYR_CHECK(blk_addr <= req_addr_e);
316  ITYR_CHECK(req_addr_b <= blk_addr + BlockSize);
317  ITYR_CHECK(req_addr_e <= blk_addr + BlockSize);
318  ITYR_CHECK(req_addr_b < req_addr_e);
319 
320  ITYR_CHECK(is_cached(blk_addr));
321  cache_block& cb = get_entry<false>(blk_addr);
322 
323  ITYR_CHECK(cb.entry_idx < cs_.num_entries());
324 
325  std::size_t blk_offset = req_addr_b - blk_addr;
326  std::byte* from_addr = reinterpret_cast<std::byte*>(vm_.addr()) + cb.entry_idx * BlockSize + blk_offset;
327  std::memcpy(to_addr, from_addr, req_addr_e - req_addr_b);
328  }
329 
330  void put_copy_blk(std::byte* blk_addr,
331  std::byte* req_addr_b,
332  std::byte* req_addr_e,
333  const std::byte* from_addr) {
334  ITYR_CHECK(blk_addr <= req_addr_b);
335  ITYR_CHECK(blk_addr <= req_addr_e);
336  ITYR_CHECK(req_addr_b <= blk_addr + BlockSize);
337  ITYR_CHECK(req_addr_e <= blk_addr + BlockSize);
338  ITYR_CHECK(req_addr_b < req_addr_e);
339 
340  ITYR_CHECK(is_cached(blk_addr));
341  cache_block& cb = get_entry<false>(blk_addr);
342 
343  ITYR_CHECK(cb.entry_idx < cs_.num_entries());
344 
345  std::size_t blk_offset = req_addr_b - blk_addr;
346  std::byte* to_addr = reinterpret_cast<std::byte*>(vm_.addr()) + cb.entry_idx * BlockSize + blk_offset;
347  std::memcpy(to_addr, from_addr, req_addr_e - req_addr_b);
348  }
349 
350  void cache_prof_begin() { invalidate_all(); cprof_.start(); }
351  void cache_prof_end() { cprof_.stop(); }
352  void cache_prof_print() const { cprof_.print(); }
353 
354 private:
355  using writeback_epoch_t = uint64_t;
356 
357  struct cache_block {
359  std::byte* addr = nullptr;
360  std::byte* mapped_addr = nullptr;
361  const common::rma::win* win = nullptr;
362  common::topology::rank_t owner = -1;
363  std::size_t pm_offset = 0;
364  int ref_count = 0;
365  writeback_epoch_t writeback_epoch = 0;
366  block_region_set valid_regions;
367  block_region_set dirty_regions;
368  cache_manager* outer;
369 
370  explicit cache_block(cache_manager* outer_p) : outer(outer_p) {}
371 
372  bool is_writing_back() const {
373  return writeback_epoch == outer->writeback_epoch_;
374  }
375 
376  void invalidate() {
377  outer->cprof_.invalidate(entry_idx, valid_regions);
378 
379  ITYR_CHECK(!is_writing_back());
380  ITYR_CHECK(dirty_regions.empty());
381  valid_regions.clear();
382  ITYR_CHECK(is_evictable());
383 
384  common::verbose<3>("Cache block %ld for [%p, %p) invalidated",
385  entry_idx, addr, addr + BlockSize);
386  }
387 
388  /* Callback functions for cache_system class */
389 
390  bool is_evictable() const {
391  return ref_count == 0 &&
392  dirty_regions.empty() &&
393  !is_writing_back();
394  }
395 
396  void on_evict() {
397  ITYR_CHECK(is_evictable());
398  invalidate();
400  // for safety
401  outer->cache_tlb_.clear();
402  }
403 
404  void on_cache_map(cache_entry_idx_t idx) {
405  entry_idx = idx;
406  }
407  };
408 
409  static std::string cache_shmem_name(int global_rank) {
410  std::stringstream ss;
411  ss << "/ityr_ori_cache_" << global_rank;
412  return ss.str();
413  }
414 
415  common::physical_mem init_cache_pm() {
416  common::physical_mem pm(cache_shmem_name(common::topology::my_rank()), vm_.size(), true);
417  pm.map_to_vm(vm_.addr(), vm_.size(), 0);
418  return pm;
419  }
420 
421  using cache_key_t = uintptr_t;
422 
423  cache_key_t cache_key(void* addr) const {
424  ITYR_CHECK(addr);
425  ITYR_CHECK(reinterpret_cast<uintptr_t>(addr) % BlockSize == 0);
426  return reinterpret_cast<uintptr_t>(addr) / BlockSize;
427  }
428 
429  block_region pad_fetch_region(block_region br) const {
430  return {common::round_down_pow2(br.begin, sub_block_size_),
431  common::round_up_pow2(br.end, sub_block_size_)};
432  }
433 
434  template <bool UpdateLRU = true>
435  cache_block& get_entry(void* addr) {
436  try {
437  return cs_.template ensure_cached<UpdateLRU>(cache_key(addr));
438  } catch (cache_full_exception& e) {
439  // write back all dirty cache and retry
441  try {
442  return cs_.template ensure_cached<UpdateLRU>(cache_key(addr));
443  } catch (cache_full_exception& e) {
444  common::die("cache is exhausted (too much checked-out memory)");
445  }
446  }
447  }
448 
449  void update_mapping(cache_block& cb) {
450  ITYR_PROFILER_RECORD(prof_event_cache_mmap);
451 
452  // save the number of mmap entries by unmapping previous virtual memory
453  if (cb.mapped_addr) {
454  common::verbose<3>("Unmap cache block %d from [%p, %p) (size=%ld)",
455  cb.entry_idx, cb.mapped_addr, cb.mapped_addr + BlockSize, BlockSize);
456  common::mmap_no_physical_mem(cb.mapped_addr, BlockSize, true);
457  }
458 
459  ITYR_CHECK(cb.addr);
460  common::verbose<3>("Map cache block %d to [%p, %p) (size=%ld)",
461  cb.entry_idx, cb.addr, cb.addr + BlockSize, BlockSize);
462  pm_.map_to_vm(cb.addr, BlockSize, cb.entry_idx * BlockSize);
463  cb.mapped_addr = cb.addr;
464  }
465 
466  bool fetch_begin(cache_block& cb, block_region br) {
468 
469  if (cb.valid_regions.include(br)) {
470  // fast path (the requested region is already fetched)
471  cprof_.record(cb.entry_idx, br, {});
472  return false;
473  }
474 
475  block_region br_pad = pad_fetch_region(br);
476 
477  std::byte* cache_begin = reinterpret_cast<std::byte*>(vm_.addr());
478 
479  block_region_set fetch_regions = cb.valid_regions.complement(br_pad);
480 
481  // fetch only nondirty sections
482  for (auto [blk_offset_b, blk_offset_e] : fetch_regions) {
483  ITYR_CHECK(cb.entry_idx < cs_.num_entries());
484 
485  std::byte* addr = cache_begin + cb.entry_idx * BlockSize + blk_offset_b;
486  std::size_t size = blk_offset_e - blk_offset_b;
487  std::size_t pm_offset = cb.pm_offset + blk_offset_b;
488 
489  common::verbose<3>("Fetching [%p, %p) (%ld bytes) to cache block %d from rank %d (win=%p, disp=%ld)",
490  cb.addr + blk_offset_b, cb.addr + blk_offset_e, size,
491  cb.entry_idx, cb.owner, cb.win, pm_offset);
492 
493  common::rma::get_nb(*cache_win_, addr, size, *cb.win, cb.owner, pm_offset);
494  }
495 
496  cb.valid_regions.add(br_pad);
497 
498  cprof_.record(cb.entry_idx, br, fetch_regions);
499 
500  return true;
501  }
502 
503  void fetch_complete() {
504  if (!fetching_wins_.empty()) {
505  for (const common::rma::win* win : fetching_wins_) {
506  // TODO: remove duplicates
508  common::verbose<3>("Fetch complete (win=%p)", win);
509  }
510  fetching_wins_.clear();
511  }
512  }
513 
514  void add_fetching_win(const common::rma::win& win) {
515  if (fetching_wins_.empty() || fetching_wins_.back() != &win) {
516  // best effort to avoid duplicates
517  fetching_wins_.push_back(&win);
518  }
519  }
520 
521  void add_dirty_region(cache_block& cb, block_region br) {
522  bool is_new_dirty_block = cb.dirty_regions.empty();
523 
524  cb.dirty_regions.add(br);
525 
526  if (is_new_dirty_block) {
527  dirty_cache_blocks_.push_back(&cb);
528  has_dirty_cache_ = true;
529 
530  if constexpr (enable_write_through) {
532 
533  } else if (dirty_cache_blocks_.size() >= max_dirty_cache_blocks_) {
534  writeback_begin();
535  }
536  }
537  }
538 
539  void writeback_begin() {
540  for (auto& cb : dirty_cache_blocks_) {
541  if (!cb->dirty_regions.empty()) {
542  writeback_begin(*cb);
543  }
544  }
545  dirty_cache_blocks_.clear();
546  }
547 
548  void writeback_begin(cache_block& cb) {
549  if (cb.writeback_epoch == writeback_epoch_) {
550  // MPI_Put has been already started on this cache block.
551  // As overlapping MPI_Put calls for the same location will cause undefined behaviour,
552  // we need to insert MPI_Win_flush between overlapping MPI_Put calls here.
553  writeback_complete();
554  ITYR_CHECK(cb.writeback_epoch < writeback_epoch_);
555  }
556 
557  std::byte* cache_begin = reinterpret_cast<std::byte*>(vm_.addr());
558 
559  for (auto [blk_offset_b, blk_offset_e] : cb.dirty_regions) {
560  ITYR_CHECK(cb.entry_idx < cs_.num_entries());
561 
562  std::byte* addr = cache_begin + cb.entry_idx * BlockSize + blk_offset_b;
563  std::size_t size = blk_offset_e - blk_offset_b;
564  std::size_t pm_offset = cb.pm_offset + blk_offset_b;
565 
566  common::verbose<3>("Writing back [%p, %p) (%ld bytes) to rank %d (win=%p, disp=%ld)",
567  cb.addr + blk_offset_b, cb.addr + blk_offset_e, size,
568  cb.owner, cb.win, pm_offset);
569 
570  common::rma::put_nb(*cache_win_, addr, size, *cb.win, cb.owner, pm_offset);
571  }
572 
573  cb.dirty_regions.clear();
574 
575  cb.writeback_epoch = writeback_epoch_;
576 
577  writing_back_wins_.push_back(cb.win);
578  }
579 
580  void writeback_complete() {
581  if (!writing_back_wins_.empty()) {
582  // sort | uniq
583  // FIXME: costly?
584  std::sort(writing_back_wins_.begin(), writing_back_wins_.end());
585  writing_back_wins_.erase(std::unique(writing_back_wins_.begin(), writing_back_wins_.end()), writing_back_wins_.end());
586 
587  for (const common::rma::win* win : writing_back_wins_) {
589  common::verbose<3>("Writing back complete (win=%p)", win);
590  }
591  writing_back_wins_.clear();
592 
593  writeback_epoch_++;
594  }
595 
596  if (dirty_cache_blocks_.empty() && has_dirty_cache_) {
597  has_dirty_cache_ = false;
598  rm_.increment_epoch();
599  }
600  }
601 
602  bool is_cached(void* addr) const {
603  return cs_.is_cached(cache_key(addr));
604  }
605 
606  void invalidate_all() {
607  if (readonly_regions_.empty()) {
608  cs_.for_each_entry([&](cache_block& cb) {
609  cb.invalidate();
610  });
611  } else {
612  cs_.for_each_entry([&](cache_block& cb) {
613  if (cb.valid_regions.empty()) return;
614 
615  ITYR_CHECK(cb.addr);
616  uintptr_t blk_addr = reinterpret_cast<uintptr_t>(cb.addr);
617  region<uintptr_t> blk_addr_range = {blk_addr, blk_addr + BlockSize};
618 
619  region_set<uintptr_t> blk_readonly_ranges = get_intersection(readonly_regions_, blk_addr_range);
620  if (blk_readonly_ranges.empty()) {
621  // This cache block is not included in the read-only regions
622  cb.invalidate();
623 
624  } else if (*blk_readonly_ranges.begin() != blk_addr_range) {
625  // This cache block partly overlaps with the read-only regions
626  block_region_set brs_ro;
627  auto it = brs_ro.before_begin();
628  for (const auto& r : blk_readonly_ranges) {
629  it = brs_ro.add({r.begin - blk_addr, r.end - blk_addr}, it);
630  }
631  cb.valid_regions = get_intersection(cb.valid_regions, brs_ro);
632  }
633  });
634  }
635  }
636 
637  using cache_tlb = tlb<std::byte*, cache_block*, ITYR_ORI_CACHE_TLB_SIZE>;
638 
639  std::size_t cache_size_;
640  block_size_t sub_block_size_;
641 
642  common::virtual_mem vm_;
643  common::physical_mem pm_;
644 
645  cache_system<cache_key_t, cache_block> cs_;
646 
647  std::unique_ptr<common::rma::win> cache_win_;
648 
649  cache_tlb cache_tlb_;
650 
651  std::vector<const common::rma::win*> fetching_wins_;
652  std::vector<cache_block*> cache_blocks_to_map_;
653 
654  std::vector<cache_block*> dirty_cache_blocks_;
655  std::size_t max_dirty_cache_blocks_;
656 
657  // A writeback epoch is an interval between writeback completion events.
658  // Writeback epochs are conceptually different from epochs used in the lazy release manager.
659  // Even if the writeback epoch is incremented, some cache blocks might be dirty.
660  writeback_epoch_t writeback_epoch_ = 1;
661  std::vector<const common::rma::win*> writing_back_wins_;
662 
663  // A pending dirty cache block is marked dirty but not yet started to writeback.
664  // Only if the writeback is completed and there is no pending dirty cache, we can say
665  // all cache blocks are clean.
666  bool has_dirty_cache_ = false;
667 
668  // A release epoch is an interval between the events when all cache become clean.
669  release_manager rm_;
670 
671  region_set<uintptr_t> readonly_regions_;
672 
673  cache_profiler cprof_;
674 };
675 
676 }
void map_to_vm(void *addr, std::size_t size, std::size_t offset) const
Definition: physical_mem.hpp:43
void * addr() const
Definition: virtual_mem.hpp:46
std::size_t size() const
Definition: virtual_mem.hpp:47
Definition: cache_manager.hpp:25
void checkin_blk(std::byte *blk_addr, std::byte *req_addr_b, std::byte *req_addr_e)
Definition: cache_manager.hpp:193
void get_copy_blk(std::byte *blk_addr, std::byte *req_addr_b, std::byte *req_addr_e, std::byte *to_addr)
Definition: cache_manager.hpp:310
void poll()
Definition: cache_manager.hpp:264
auto release_lazy()
Definition: cache_manager.hpp:222
cache_manager(std::size_t cache_size, std::size_t sub_block_size)
Definition: cache_manager.hpp:31
void cache_prof_begin()
Definition: cache_manager.hpp:350
void ensure_all_cache_clean()
Definition: cache_manager.hpp:275
void set_readonly(void *addr, std::size_t size)
Definition: cache_manager.hpp:254
void put_copy_blk(std::byte *blk_addr, std::byte *req_addr_b, std::byte *req_addr_e, const std::byte *from_addr)
Definition: cache_manager.hpp:330
void clear_tlb()
Definition: cache_manager.hpp:285
std::conditional_t< enable_lazy_release, release_manager::release_handler, void * > release_handler
Definition: cache_manager.hpp:220
void cache_prof_end()
Definition: cache_manager.hpp:351
void cache_prof_print() const
Definition: cache_manager.hpp:352
std::pair< bool, bool > checkout_fast(std::byte *addr, std::size_t size)
Definition: cache_manager.hpp:50
void checkout_complete()
Definition: cache_manager.hpp:144
bool checkout_blk(std::byte *blk_addr, std::byte *req_addr_b, std::byte *req_addr_e, const common::rma::win &win, common::topology::rank_t owner, std::size_t pm_offset)
Definition: cache_manager.hpp:93
void release()
Definition: cache_manager.hpp:215
void unset_readonly(void *addr, std::size_t size)
Definition: cache_manager.hpp:259
void ensure_evicted(void *addr)
Definition: cache_manager.hpp:281
void acquire()
Definition: cache_manager.hpp:235
bool checkin_fast(std::byte *addr, std::size_t size)
Definition: cache_manager.hpp:159
void acquire(ReleaseHandler rh)
Definition: cache_manager.hpp:244
void discard_dirty(std::byte *blk_addr, std::byte *req_addr_b, std::byte *req_addr_e)
Definition: cache_manager.hpp:289
void for_each_entry(Func &&f)
Definition: cache_system.hpp:92
bool is_cached(Key key) const
Definition: cache_system.hpp:47
cache_entry_idx_t num_entries() const
Definition: cache_system.hpp:45
void ensure_evicted(Key key)
Definition: cache_system.hpp:77
void remove(const region< T > &r)
Definition: block_region_set.hpp:121
region_set< T > complement(region< T > r) const
Definition: block_region_set.hpp:162
bool empty() const
Definition: block_region_set.hpp:82
iterator add(const region< T > &r, iterator begin_it)
Definition: block_region_set.hpp:90
void clear()
Definition: block_region_set.hpp:86
release_handler get_dummy_handler() const
Definition: release_manager.hpp:40
void increment_epoch()
Definition: release_manager.hpp:32
bool release_requested() const
Definition: release_manager.hpp:63
release_handler get_release_handler() const
Definition: release_manager.hpp:36
void ensure_released(const release_handler &rh)
Definition: release_manager.hpp:44
void clear()
Definition: tlb.hpp:61
Entry get(const Key &key)
Definition: tlb.hpp:42
void add(const Key &key, const Entry &entry)
Definition: tlb.hpp:21
#define ITYR_CHECK(cond)
Definition: util.hpp:48
void get_nb(const win &origin_win, T *origin_addr, std::size_t count, const win &target_win, int target_rank, std::size_t target_disp)
Definition: rma.hpp:25
void flush(const win &target_win)
Definition: rma.hpp:76
std::unique_ptr< win > create_win(T *baseptr, std::size_t count)
Definition: rma.hpp:16
ITYR_RMA_IMPL::win win
Definition: rma.hpp:13
void put_nb(const win &origin_win, const T *origin_addr, std::size_t count, const win &target_win, int target_rank, std::size_t target_disp)
Definition: rma.hpp:51
rank_t n_ranks()
Definition: topology.hpp:208
int rank_t
Definition: topology.hpp:12
rank_t my_rank()
Definition: topology.hpp:207
T round_up_pow2(T x, T alignment)
Definition: util.hpp:142
bool is_pow2(T x)
Definition: util.hpp:125
T round_down_pow2(T x, T alignment)
Definition: util.hpp:130
void * mmap_no_physical_mem(void *addr, std::size_t size, bool replace=false, std::size_t alignment=alignof(max_align_t))
Definition: virtual_mem.hpp:110
Definition: block_region_set.hpp:9
region_set< block_size_t > block_region_set
Definition: block_region_set.hpp:280
region< block_size_t > block_region
Definition: block_region_set.hpp:279
ITYR_CONCAT(cache_profiler_, ITYR_ORI_CACHE_PROF) cache_profiler
Definition: cache_profiler.hpp:163
region< T > get_intersection(const region< T > &r1, const region< T > &r2)
Definition: block_region_set.hpp:56
uint32_t block_size_t
Definition: util.hpp:30
int cache_entry_idx_t
Definition: cache_system.hpp:30
monoid< T, max_functor<>, lowest< T > > max
Definition: reducer.hpp:104
void sort(const ExecutionPolicy &policy, RandomAccessIterator first, RandomAccessIterator last, Compare comp)
Sort a range.
Definition: parallel_sort.hpp:210
constexpr auto size(const checkout_span< T, Mode > &cs) noexcept
Definition: checkout_span.hpp:178
#define ITYR_ORI_ENABLE_LAZY_RELEASE
#define ITYR_ORI_ENABLE_WRITE_THROUGH
#define ITYR_ORI_ENABLE_VM_MAP
#define ITYR_PROFILER_RECORD(event,...)
Definition: profiler.hpp:319
Definition: options.hpp:72
Definition: prof_events.hpp:46
Definition: prof_events.hpp:41
Definition: prof_events.hpp:36
Definition: block_region_set.hpp:12