23 template <block_
size_t BlockSize, 
typename Fn>
 
   26   std::byte* blk_addr_e = 
reinterpret_cast<std::byte*
>(addr) + 
size;
 
   28   for (std::byte* blk_addr = blk_addr_b; blk_addr < blk_addr_e; blk_addr += BlockSize) {
 
   29     std::byte* req_addr_b = 
std::max(
reinterpret_cast<std::byte*
>(addr), blk_addr);
 
   30     std::byte* req_addr_e = 
std::min(
reinterpret_cast<std::byte*
>(addr) + 
size, blk_addr + BlockSize);
 
   31     fn(blk_addr, req_addr_b, req_addr_e);
 
   35 template <block_
size_t BlockSize, 
typename HomeSegFn, 
typename CacheBlkFn>
 
   37                       HomeSegFn home_seg_fn, CacheBlkFn cache_blk_fn) {
 
   39     std::byte*  seg_addr = 
reinterpret_cast<std::byte*
>(cm.
vm().addr()) + seg.offset_b;
 
   40     std::size_t seg_size = seg.offset_e - seg.offset_b;
 
   44       home_seg_fn(seg_addr, seg_size, seg.pm_offset);
 
   48       std::byte* addr_b = 
std::max(seg_addr, 
reinterpret_cast<std::byte*
>(addr));
 
   49       std::byte* addr_e = 
std::min(seg_addr + seg_size, 
reinterpret_cast<std::byte*
>(addr) + 
size);
 
   50       for_each_block<BlockSize>(addr_b, addr_e - addr_b, [&](std::byte* blk_addr,
 
   51                                                              std::byte* req_addr_b,
 
   52                                                              std::byte* req_addr_e) {
 
   53         std::size_t pm_offset = seg.pm_offset + (blk_addr - seg_addr);
 
   55         cache_blk_fn(blk_addr, req_addr_b, req_addr_e, seg.owner, pm_offset);
 
   61 template <block_
size_t BlockSize>
 
   68       home_manager_(calc_home_mmap_limit(cache_size / BlockSize)),
 
   69       cache_manager_(cache_size, sub_block_size) {}
 
   75   template <
template <block_
size_t> 
typename MemMapper, 
typename... MemMapperArgs>
 
   79                          "The size passed to malloc_coll() is different among workers");
 
   81     auto mmapper = std::make_unique<MemMapper<BlockSize>>(
size,
 
   84                                                           std::forward<MemMapperArgs>(mmargs)...);
 
   86     void* addr = cm.
vm().
addr();
 
   88     common::verbose(
"Allocate collective memory [%p, %p) (%ld bytes) (win=%p)",
 
   89                     addr, 
reinterpret_cast<std::byte*
>(addr) + 
size, 
size, &cm.
win());
 
   97     void* addr = noncoll_mem_.allocate(
size);
 
   99     common::verbose<2>(
"Allocate noncollective memory [%p, %p) (%ld bytes)",
 
  100                        addr, 
reinterpret_cast<std::byte*
>(addr) + 
size, 
size);
 
  108                          "The address passed to free_coll() is different among workers");
 
  111     cache_manager_.ensure_all_cache_clean();
 
  117     for (std::size_t o = 0; o < cm.
effective_size(); o += BlockSize) {
 
  118       std::byte* addr = 
reinterpret_cast<std::byte*
>(cm.
vm().
addr()) + o;
 
  119       home_manager_.ensure_evicted(addr);
 
  120       cache_manager_.ensure_evicted(addr);
 
  123     home_manager_.clear_tlb();
 
  124     cache_manager_.clear_tlb();
 
  126     common::verbose(
"Deallocate collective memory [%p, %p) (%ld bytes) (win=%p)",
 
  127                     addr, 
reinterpret_cast<std::byte*
>(addr) + cm.
size(), cm.
size(), &cm.
win());
 
  142       common::verbose<2>(
"Deallocate noncollective memory [%p, %p) (%ld bytes) locally",
 
  143                          addr, 
reinterpret_cast<std::byte*
>(addr) + 
size, 
size);
 
  147       for_each_block<BlockSize>(addr, 
size, [&](std::byte* blk_addr,
 
  148                                                 std::byte* req_addr_b,
 
  149                                                 std::byte* req_addr_e) {
 
  150         cache_manager_.discard_dirty(blk_addr, req_addr_b, req_addr_e);
 
  155       common::verbose<2>(
"Deallocate noncollective memory [%p, %p) (%ld bytes) remotely (rank=%d)",
 
  156                          addr, 
reinterpret_cast<std::byte*
>(addr) + 
size, 
size, target_rank);
 
  160   void get(
const void* from_addr, 
void* to_addr, std::size_t 
size) {
 
  163     std::byte* from_addr_ = 
reinterpret_cast<std::byte*
>(
const_cast<void*
>(from_addr));
 
  169       if (!checkout_impl_nb<mode::read_t, false>(from_addr_, 
size)) {
 
  170         checkout_complete_impl();
 
  172       get_copy_impl(from_addr_, 
reinterpret_cast<std::byte*
>(to_addr), 
size);
 
  174       if (!checkout_impl_nb<mode::read_t, true>(from_addr_, 
size)) {
 
  175         checkout_complete_impl();
 
  177       get_copy_impl(from_addr_, 
reinterpret_cast<std::byte*
>(to_addr), 
size);
 
  178       checkin_impl<mode::read_t, true>(from_addr_, 
size);
 
  182   void put(
const void* from_addr, 
void* to_addr, std::size_t 
size) {
 
  185     std::byte* to_addr_ = 
reinterpret_cast<std::byte*
>(to_addr);
 
  190       if (!checkout_impl_nb<mode::write_t, false>(to_addr_, 
size)) {
 
  191         checkout_complete_impl();
 
  193       put_copy_impl(
reinterpret_cast<const std::byte*
>(from_addr), to_addr_, 
size);
 
  194       checkin_impl<mode::write_t, false>(to_addr_, 
size);
 
  196       if (!checkout_impl_nb<mode::write_t, true>(to_addr_, 
size)) {
 
  197         checkout_complete_impl();
 
  199       put_copy_impl(
reinterpret_cast<const std::byte*
>(from_addr), to_addr_, 
size);
 
  200       checkin_impl<mode::write_t, true>(to_addr_, 
size);
 
  204   template <
typename Mode>
 
  206     if constexpr (!enable_vm_map) {
 
  207       common::die(
"ITYR_ORI_ENABLE_VM_MAP must be true for core::checkout/checkin");
 
  211     common::verbose<2>(
"Checkout request (mode: %s) for [%p, %p) (%ld bytes)",
 
  212                        str(Mode{}).c_str(), addr, 
reinterpret_cast<std::byte*
>(addr) + 
size, 
size);
 
  217     return checkout_impl_nb<Mode, true>(
reinterpret_cast<std::byte*
>(addr), 
size);
 
  220   template <
typename Mode>
 
  229     checkout_complete_impl();
 
  232   template <
typename Mode>
 
  234     if constexpr (!enable_vm_map) {
 
  235       common::die(
"ITYR_ORI_ENABLE_VM_MAP must be true for core::checkout/checkin");
 
  239     common::verbose<2>(
"Checkin request (mode: %s) for [%p, %p) (%ld bytes)",
 
  240                        str(Mode{}).c_str(), addr, 
reinterpret_cast<std::byte*
>(addr) + 
size, 
size);
 
  245     checkin_impl<Mode, true>(
reinterpret_cast<std::byte*
>(addr), 
size);
 
  251     cache_manager_.release();
 
  259     common::verbose<2>(
"Lazy release handler is created");
 
  261     return cache_manager_.release_lazy();
 
  267     cache_manager_.acquire();
 
  275     cache_manager_.acquire(rh);
 
  284     cache_manager_.set_readonly(addr, 
size);
 
  292     cache_manager_.unset_readonly(addr, 
size);
 
  298     cache_manager_.poll();
 
  306     home_manager_.home_prof_begin();
 
  307     cache_manager_.cache_prof_begin();
 
  311     home_manager_.home_prof_end();
 
  312     cache_manager_.cache_prof_end();
 
  316     home_manager_.home_prof_print();
 
  317     cache_manager_.cache_prof_print();
 
  328   std::size_t calc_home_mmap_limit(std::size_t n_cache_blocks)
 const {
 
  330     std::size_t margin = 1000;
 
  331     ITYR_CHECK(sys_limit > 2 * n_cache_blocks + margin);
 
  333     std::size_t candidate = (sys_limit - 2 * n_cache_blocks - margin) / 2;
 
  334     std::size_t max_val = 1024 * 1024; 
 
  336     return std::min(max_val, candidate);
 
  339   template <
typename Mode, 
bool IncrementRef>
 
  340   bool checkout_impl_nb(std::byte* addr, std::size_t 
size) {
 
  341     constexpr 
bool skip_fetch = std::is_same_v<Mode, mode::write_t>;
 
  342     if (noncoll_mem_.
has(addr)) {
 
  343       return checkout_noncoll_nb<skip_fetch, IncrementRef>(addr, 
size);
 
  345       return checkout_coll_nb<skip_fetch, IncrementRef>(addr, 
size);
 
  349   template <
bool SkipFetch, 
bool IncrementRef>
 
  350   bool checkout_coll_nb(std::byte* addr, std::size_t 
size) {
 
  351     if (home_manager_.template checkout_fast<IncrementRef>(addr, 
size)) {
 
  355     auto [entry_found, fetch_completed] =
 
  356       cache_manager_.template checkout_fast<SkipFetch, IncrementRef>(addr, 
size);
 
  358       return fetch_completed;
 
  361     coll_mem& cm = cm_manager_.
get(addr);
 
  363     bool checkout_completed = 
true;
 
  365     for_each_seg_blk<BlockSize>(cm, addr, 
size,
 
  367       [&](std::byte* seg_addr, std::size_t seg_size, std::size_t pm_offset) {
 
  368         checkout_completed &=
 
  369           home_manager_.template checkout_seg<IncrementRef>(
 
  370               seg_addr, seg_size, addr, 
size,
 
  371               cm.home_pm(), pm_offset, cm.home_all_mapped());
 
  374       [&](std::byte* blk_addr, std::byte* req_addr_b, std::byte* req_addr_e,
 
  376         checkout_completed &=
 
  377           cache_manager_.template checkout_blk<SkipFetch, IncrementRef>(
 
  378               blk_addr, req_addr_b, req_addr_e,
 
  382     return checkout_completed;
 
  385   template <
bool SkipFetch, 
bool IncrementRef>
 
  386   bool checkout_noncoll_nb(std::byte* addr, std::size_t 
size) {
 
  389     auto target_rank = noncoll_mem_.
get_owner(addr);
 
  396       home_manager_.on_checkout_noncoll(
size);
 
  400     auto [entry_found, fetch_completed] =
 
  401       cache_manager_.template checkout_fast<SkipFetch, IncrementRef>(addr, 
size);
 
  403       return fetch_completed;
 
  406     bool checkout_completed = 
true;
 
  408     for_each_block<BlockSize>(addr, 
size, [&](std::byte* blk_addr,
 
  409                                               std::byte* req_addr_b,
 
  410                                               std::byte* req_addr_e) {
 
  411       checkout_completed &=
 
  412         cache_manager_.template checkout_blk<SkipFetch, IncrementRef>(
 
  413             blk_addr, req_addr_b, req_addr_e,
 
  419     return checkout_completed;
 
  422   template <
typename Mode, 
bool DecrementRef>
 
  423   void checkin_impl(std::byte* addr, std::size_t 
size) {
 
  424     constexpr 
bool register_dirty = !std::is_same_v<Mode, mode::read_t>;
 
  425     if (noncoll_mem_.
has(addr)) {
 
  426       checkin_noncoll<register_dirty, DecrementRef>(addr, 
size);
 
  428       checkin_coll<register_dirty, DecrementRef>(addr, 
size);
 
  432   void checkout_complete_impl() {
 
  433     home_manager_.checkout_complete();
 
  434     cache_manager_.checkout_complete();
 
  437   template <
bool RegisterDirty, 
bool DecrementRef>
 
  438   void checkin_coll(std::byte* addr, std::size_t 
size) {
 
  439     if (home_manager_.template checkin_fast<DecrementRef>(addr, 
size)) {
 
  443     if (cache_manager_.template checkin_fast<RegisterDirty, DecrementRef>(addr, 
size)) {
 
  447     coll_mem& cm = cm_manager_.
get(addr);
 
  449     for_each_seg_blk<BlockSize>(cm, addr, 
size,
 
  451       [&](std::byte* seg_addr, std::size_t, std::size_t) {
 
  452         home_manager_.template checkin_seg<DecrementRef>(seg_addr, cm.home_all_mapped());
 
  455       [&](std::byte* blk_addr, std::byte* req_addr_b, std::byte* req_addr_e,
 
  457         cache_manager_.template checkin_blk<RegisterDirty, DecrementRef>(
 
  458             blk_addr, req_addr_b, req_addr_e);
 
  462   template <
bool RegisterDirty, 
bool DecrementRef>
 
  463   void checkin_noncoll(std::byte* addr, std::size_t 
size) {
 
  466     auto target_rank = noncoll_mem_.
get_owner(addr);
 
  476     if (cache_manager_.template checkin_fast<RegisterDirty, DecrementRef>(addr, 
size)) {
 
  480     for_each_block<BlockSize>(addr, 
size, [&](std::byte* blk_addr,
 
  481                                               std::byte* req_addr_b,
 
  482                                               std::byte* req_addr_e) {
 
  483       cache_manager_.template checkin_blk<RegisterDirty, DecrementRef>(
 
  484           blk_addr, req_addr_b, req_addr_e);
 
  495   void get_copy_impl(std::byte* from_addr, std::byte* to_addr, std::size_t 
size) {
 
  496     if constexpr (enable_vm_map) {
 
  497       std::memcpy(to_addr, from_addr, 
size);
 
  498     } 
else if (noncoll_mem_.
has(from_addr)) {
 
  499       get_copy_noncoll(from_addr, to_addr, 
size);
 
  501       get_copy_coll(from_addr, to_addr, 
size);
 
  505   void get_copy_coll(std::byte* from_addr, std::byte* to_addr, std::size_t 
size) {
 
  508     coll_mem& cm = cm_manager_.
get(from_addr);
 
  510     for_each_seg_blk<BlockSize>(cm, from_addr, 
size,
 
  512       [&](std::byte* seg_addr, std::size_t seg_size, std::size_t pm_offset) {
 
  513         const common::virtual_mem& vm = cm.home_vm();
 
  514         std::byte* seg_addr_b         = 
std::max(from_addr, seg_addr);
 
  515         std::byte* seg_addr_e         = 
std::min(seg_addr + seg_size, from_addr + 
size);
 
  516         std::size_t seg_offset        = seg_addr_b - seg_addr;
 
  517         std::byte* from_addr_         = 
reinterpret_cast<std::byte*
>(vm.addr()) + pm_offset + seg_offset;
 
  518         std::byte* to_addr_           = to_addr + (seg_addr_b - from_addr);
 
  519         std::memcpy(to_addr_, from_addr_, seg_addr_e - seg_addr_b);
 
  522       [&](std::byte* blk_addr, std::byte* req_addr_b, std::byte* req_addr_e,
 
  524         cache_manager_.get_copy_blk(blk_addr, req_addr_b, req_addr_e, to_addr + (req_addr_b - from_addr));
 
  528   void get_copy_noncoll(std::byte* from_addr, std::byte* to_addr, std::size_t 
size) {
 
  533     auto target_rank = noncoll_mem_.
get_owner(from_addr);
 
  538       std::memcpy(to_addr, from_addr, 
size);
 
  542     for_each_block<BlockSize>(from_addr, 
size, [&](std::byte* blk_addr,
 
  543                                                    std::byte* req_addr_b,
 
  544                                                    std::byte* req_addr_e) {
 
  545       cache_manager_.get_copy_blk(blk_addr, req_addr_b, req_addr_e, to_addr + (req_addr_b - from_addr));
 
  549   void put_copy_impl(
const std::byte* from_addr, std::byte* to_addr, std::size_t 
size) {
 
  550     if constexpr (enable_vm_map) {
 
  551       std::memcpy(to_addr, from_addr, 
size);
 
  552     } 
else if (noncoll_mem_.
has(to_addr)) {
 
  553       put_copy_noncoll(from_addr, to_addr, 
size);
 
  555       put_copy_coll(from_addr, to_addr, 
size);
 
  559   void put_copy_coll(
const std::byte* from_addr, std::byte* to_addr, std::size_t 
size) {
 
  562     coll_mem& cm = cm_manager_.
get(to_addr);
 
  564     for_each_seg_blk<BlockSize>(cm, to_addr, 
size,
 
  566       [&](std::byte* seg_addr, std::size_t seg_size, std::size_t pm_offset) {
 
  567         const common::virtual_mem& vm = cm.home_vm();
 
  568         std::byte* seg_addr_b         = 
std::max(to_addr, seg_addr);
 
  569         std::byte* seg_addr_e         = 
std::min(seg_addr + seg_size, to_addr + 
size);
 
  570         std::size_t seg_offset        = seg_addr_b - seg_addr;
 
  571         const std::byte* from_addr_   = from_addr + (seg_addr_b - to_addr);
 
  572         std::byte* to_addr_           = 
reinterpret_cast<std::byte*
>(vm.addr()) + pm_offset + seg_offset;
 
  573         std::memcpy(to_addr_, from_addr_, seg_addr_e - seg_addr_b);
 
  576       [&](std::byte* blk_addr, std::byte* req_addr_b, std::byte* req_addr_e,
 
  578         cache_manager_.put_copy_blk(blk_addr, req_addr_b, req_addr_e, from_addr + (req_addr_b - to_addr));
 
  582   void put_copy_noncoll(
const std::byte* from_addr, std::byte* to_addr, std::size_t 
size) {
 
  587     auto target_rank = noncoll_mem_.
get_owner(to_addr);
 
  592       std::memcpy(to_addr, from_addr, 
size);
 
  596     for_each_block<BlockSize>(to_addr, 
size, [&](std::byte* blk_addr,
 
  597                                                  std::byte* req_addr_b,
 
  598                                                  std::byte* req_addr_e) {
 
  599       cache_manager_.put_copy_blk(blk_addr, req_addr_b, req_addr_e, from_addr + (req_addr_b - to_addr));
 
  603   template <block_
size_t BS>
 
  604   using default_mem_mapper = mem_mapper::ITYR_ORI_DEFAULT_MEM_MAPPER<BS>;
 
  606   coll_mem_manager         cm_manager_;
 
  607   noncoll_mem              noncoll_mem_;
 
  608   home_manager<BlockSize>  home_manager_;
 
  609   cache_manager<BlockSize> cache_manager_;
 
  612 template <block_
size_t BlockSize>
 
  622   template <
template <block_
size_t> 
typename MemMapper, 
typename... MemMapperArgs>
 
  625       common::die(
"Memory allocation size cannot be 0");
 
  628     auto mmapper = std::make_unique<MemMapper<BlockSize>>(
size,
 
  631                                                           std::forward<MemMapperArgs>(mmargs)...);
 
  633     void* addr = cm.
vm().
addr();
 
  635     common::verbose(
"Allocate collective memory [%p, %p) (%ld bytes) (win=%p)",
 
  636                     addr, 
reinterpret_cast<std::byte*
>(addr) + 
size, 
size, &cm.
win());
 
  644     void* addr = noncoll_mem_.allocate(
size);
 
  646     common::verbose<2>(
"Allocate noncollective memory [%p, %p) (%ld bytes)",
 
  647                        addr, 
reinterpret_cast<std::byte*
>(addr) + 
size, 
size);
 
  654       common::die(
"Null pointer was passed to free_coll()");
 
  660     common::verbose(
"Deallocate collective memory [%p, %p) (%ld bytes) (win=%p)",
 
  661                     addr, 
reinterpret_cast<std::byte*
>(addr) + cm.
size(), cm.
size(), &cm.
win());
 
  675       common::verbose<2>(
"Deallocate noncollective memory [%p, %p) (%ld bytes) locally",
 
  676                          addr, 
reinterpret_cast<std::byte*
>(addr) + 
size, 
size);
 
  681       common::verbose<2>(
"Deallocate noncollective memory [%p, %p) (%ld bytes) remotely (rank=%d)",
 
  682                          addr, 
reinterpret_cast<std::byte*
>(addr) + 
size, 
size, target_rank);
 
  686   void get(
const void* from_addr, 
void* to_addr, std::size_t 
size) {
 
  689     std::byte* from_addr_ = 
reinterpret_cast<std::byte*
>(
const_cast<void*
>(from_addr));
 
  690     get_impl(from_addr_, 
reinterpret_cast<std::byte*
>(to_addr), 
size);
 
  693   void put(
const void* from_addr, 
void* to_addr, std::size_t 
size) {
 
  696     std::byte* to_addr_ = 
reinterpret_cast<std::byte*
>(to_addr);
 
  697     put_impl(
reinterpret_cast<const std::byte*
>(from_addr), to_addr_, 
size);
 
  700   template <
typename Mode>
 
  702     common::die(
"core::checkout/checkin is disabled");
 
  705   template <
typename Mode>
 
  707     common::die(
"core::checkout/checkin is disabled");
 
  711     common::die(
"core::checkout/checkin is disabled");
 
  714   template <
typename Mode>
 
  716     common::die(
"core::checkout/checkin is disabled");
 
  750   void get_impl(std::byte* from_addr, std::byte* to_addr, std::size_t 
size) {
 
  751     if (noncoll_mem_.
has(from_addr)) {
 
  752       get_noncoll(from_addr, to_addr, 
size);
 
  754       get_coll(from_addr, to_addr, 
size);
 
  758   void get_coll(std::byte* from_addr, std::byte* to_addr, std::size_t 
size) {
 
  759     coll_mem& cm = cm_manager_.
get(from_addr);
 
  761     bool fetching = 
false;
 
  763     for_each_seg_blk<BlockSize>(cm, from_addr, 
size,
 
  765       [&](std::byte* seg_addr, std::size_t seg_size, std::size_t pm_offset) {
 
  766         const common::virtual_mem& vm = cm.home_vm();
 
  767         std::byte* seg_addr_b         = 
std::max(from_addr, seg_addr);
 
  768         std::byte* seg_addr_e         = 
std::min(seg_addr + seg_size, from_addr + 
size);
 
  769         std::size_t seg_offset        = seg_addr_b - seg_addr;
 
  770         std::byte* from_addr_         = 
reinterpret_cast<std::byte*
>(vm.addr()) + pm_offset + seg_offset;
 
  771         std::byte* to_addr_           = to_addr + (seg_addr_b - from_addr);
 
  772         std::memcpy(to_addr_, from_addr_, seg_addr_e - seg_addr_b);
 
  775       [&](std::byte* blk_addr, std::byte* req_addr_b, std::byte* req_addr_e,
 
  779                             pm_offset + (req_addr_b - blk_addr));
 
  788   void get_noncoll(std::byte* from_addr, std::byte* to_addr, std::size_t 
size) {
 
  791     auto target_rank = noncoll_mem_.
get_owner(from_addr);
 
  796       std::memcpy(to_addr, from_addr, 
size);
 
  800     for_each_block<BlockSize>(from_addr, 
size, [&](std::byte* blk_addr,
 
  801                                                    std::byte* req_addr_b,
 
  802                                                    std::byte* req_addr_e) {
 
  804                           noncoll_mem_.
win(), target_rank,
 
  805                           noncoll_mem_.
get_disp(blk_addr) + (req_addr_b - blk_addr));
 
  811   void put_impl(
const std::byte* from_addr, std::byte* to_addr, std::size_t 
size) {
 
  812     if (noncoll_mem_.
has(to_addr)) {
 
  813       put_noncoll(from_addr, to_addr, 
size);
 
  815       put_coll(from_addr, to_addr, 
size);
 
  819   void put_coll(
const std::byte* from_addr, std::byte* to_addr, std::size_t 
size) {
 
  820     coll_mem& cm = cm_manager_.
get(to_addr);
 
  822     bool putting = 
false;
 
  824     for_each_seg_blk<BlockSize>(cm, to_addr, 
size,
 
  826       [&](std::byte* seg_addr, std::size_t seg_size, std::size_t pm_offset) {
 
  827         const common::virtual_mem& vm = cm.home_vm();
 
  828         std::byte* seg_addr_b         = 
std::max(to_addr, seg_addr);
 
  829         std::byte* seg_addr_e         = 
std::min(seg_addr + seg_size, to_addr + 
size);
 
  830         std::size_t seg_offset        = seg_addr_b - seg_addr;
 
  831         const std::byte* from_addr_   = from_addr + (seg_addr_b - to_addr);
 
  832         std::byte* to_addr_           = 
reinterpret_cast<std::byte*
>(vm.addr()) + pm_offset + seg_offset;
 
  833         std::memcpy(to_addr_, from_addr_, seg_addr_e - seg_addr_b);
 
  836       [&](std::byte* blk_addr, std::byte* req_addr_b, std::byte* req_addr_e,
 
  840                             pm_offset + (req_addr_b - blk_addr));
 
  849   void put_noncoll(
const std::byte* from_addr, std::byte* to_addr, std::size_t 
size) {
 
  852     auto target_rank = noncoll_mem_.
get_owner(to_addr);
 
  857       std::memcpy(to_addr, from_addr, 
size);
 
  861     for_each_block<BlockSize>(to_addr, 
size, [&](std::byte* blk_addr,
 
  862                                                  std::byte* req_addr_b,
 
  863                                                  std::byte* req_addr_e) {
 
  865                           noncoll_mem_.
win(), target_rank,
 
  866                           noncoll_mem_.
get_disp(blk_addr) + (req_addr_b - blk_addr));
 
  872   template <block_
size_t BS>
 
  873   using default_mem_mapper = mem_mapper::ITYR_ORI_DEFAULT_MEM_MAPPER<BS>;
 
  875   coll_mem_manager cm_manager_;
 
  876   noncoll_mem      noncoll_mem_;
 
  879 template <block_
size_t BlockSize>
 
  888   template <
template <block_
size_t> 
typename MemMapper, 
typename... MemMapperArgs>
 
  901   void free(
void* addr, std::size_t) {
 
  905   void get(
const void* from_addr, 
void* to_addr, std::size_t 
size) {
 
  906     std::memcpy(to_addr, from_addr, 
size);
 
  909   void put(
const void* from_addr, 
void* to_addr, std::size_t 
size) {
 
  910     std::memcpy(to_addr, from_addr, 
size);
 
  913   template <
typename Mode>
 
  916   template <
typename Mode>
 
  921   template <
typename Mode>
 
  950 template <block_
size_t BlockSize>
 
  955 ITYR_TEST_CASE(
"[ityr::ori::core] malloc/free with block policy") {
 
  965     for (
int i = 1; i < n; i++) {
 
  974     for (
int i = 1; i < n; i++) {
 
  977     for (
int i = 1; i < n; i++) {
 
  978       c.free_coll(ptrs[i]);
 
  983 ITYR_TEST_CASE(
"[ityr::ori::core] malloc/free with cyclic policy") {
 
  989   core<bs> c(16 * bs, bs / 4);
 
  993     for (
int i = 1; i < n; i++) {
 
 1002     for (
int i = 1; i < n; i++) {
 
 1003       ptrs[i] = c.malloc_coll<mem_mapper::cyclic>(i * 27438, bs * i);
 
 1005     for (
int i = 1; i < n; i++) {
 
 1006       c.free_coll(ptrs[i]);
 
 1011 ITYR_TEST_CASE(
"[ityr::ori::core] malloc and free (noncollective)") {
 
 1012   common::runtime_options common_opts;
 
 1013   runtime_options opts;
 
 1014   common::singleton_initializer<common::topology::instance> topo;
 
 1015   common::singleton_initializer<common::rma::instance> rma;
 
 1017   core<bs> c(16 * bs, bs / 4);
 
 1019   constexpr 
int n = 10;
 
 1021     for (
int i = 0; i < n; i++) {
 
 1022       void* p = c.malloc(std::size_t(1) << i);
 
 1023       c.free(p, std::size_t(1) << i);
 
 1029     for (
int i = 0; i < n; i++) {
 
 1030       ptrs[i] = c.malloc(std::size_t(1) << i);
 
 1032     for (
int i = 0; i < n; i++) {
 
 1033       c.free(ptrs[i], std::size_t(1) << i);
 
 1040     for (
int i = 0; i < n; i++) {
 
 1041       ptrs_send[i] = c.malloc(std::size_t(1) << i);
 
 1053     for (
int i = 0; i < n; i++) {
 
 1054       c.free(ptrs_recv[i], std::size_t(1) << i);
 
 1059 ITYR_TEST_CASE(
"[ityr::ori::core] get/put") {
 
 1060   common::runtime_options common_opts;
 
 1061   runtime_options opts;
 
 1062   common::singleton_initializer<common::topology::instance> topo;
 
 1063   common::singleton_initializer<common::rma::instance> rma;
 
 1066   core<bs> c(n_cb * bs, bs / 4);
 
 1070   std::size_t n = n_cb * bs / 
sizeof(std::size_t);
 
 1073   ps[0] = 
reinterpret_cast<std::size_t*
>(c.malloc_coll<mem_mapper::block >(n * 
sizeof(std::size_t)));
 
 1074   ps[1] = 
reinterpret_cast<std::size_t*
>(c.malloc_coll<mem_mapper::cyclic>(n * 
sizeof(std::size_t)));
 
 1076   std::size_t* buf = 
new std::size_t[n + 2];
 
 1086       for (std::size_t i = 0; i < n; i++) {
 
 1089       c.put(buf, p, n * 
sizeof(std::size_t));
 
 1095       std::size_t special = 417;
 
 1096       buf[0] = buf[n + 1] = special;
 
 1098       c.get(p, buf + 1, n * 
sizeof(std::size_t));
 
 1100       for (std::size_t i = 0; i < n; i++) {
 
 1108       std::size_t ib = n / 5 * 2;
 
 1109       std::size_t ie = n / 5 * 4;
 
 1110       std::size_t s = ie - ib;
 
 1112       std::size_t special = 417;
 
 1113       buf[0] = buf[s + 1] = special;
 
 1115       c.get(p + ib, buf + 1, s * 
sizeof(std::size_t));
 
 1117       for (std::size_t i = 0; i < s; i++) {
 
 1125       for (std::size_t i = 0; i < n; i++) {
 
 1126         std::size_t special = 417;
 
 1127         buf[0] = buf[2] = special;
 
 1128         c.get(p + i, &buf[1], 
sizeof(std::size_t));
 
 1142 ITYR_TEST_CASE(
"[ityr::ori::core] checkout/checkin (small, aligned)") {
 
 1143   common::runtime_options common_opts;
 
 1144   runtime_options opts;
 
 1145   common::singleton_initializer<common::topology::instance> topo;
 
 1146   common::singleton_initializer<common::rma::instance> rma;
 
 1149   core<bs> c(n_cb * bs, bs / 4);
 
 1156   ps[0] = 
reinterpret_cast<uint8_t*
>(c.malloc_coll<mem_mapper::block >(n));
 
 1157   ps[1] = 
reinterpret_cast<uint8_t*
>(c.malloc_coll<mem_mapper::cyclic>(n));
 
 1166     uint8_t* home_ptr = 
reinterpret_cast<uint8_t*
>(c.get_local_mem(p));
 
 1167     for (std::size_t i = 0; i < bs; i++) {
 
 1175       for (
int i = 0; i < n; i++) {
 
 1182       for (
int iter = 0; iter < 
n_ranks; iter++) {
 
 1185           for (
int i = 0; i < n; i++) {
 
 1195         for (
int i = 0; i < n; i++) {
 
 1210       for (
int i = 0; i < s; i++) {
 
 1221 ITYR_TEST_CASE(
"[ityr::ori::core] checkout/checkin (large, not aligned)") {
 
 1222   common::runtime_options common_opts;
 
 1223   runtime_options opts;
 
 1224   common::singleton_initializer<common::topology::instance> topo;
 
 1225   common::singleton_initializer<common::rma::instance> rma;
 
 1228   core<bs> c(n_cb * bs, bs / 4);
 
 1233   std::size_t n = 10 * n_cb * bs / 
sizeof(std::size_t);
 
 1236   ps[0] = 
reinterpret_cast<std::size_t*
>(c.malloc_coll<mem_mapper::block >(n * 
sizeof(std::size_t)));
 
 1237   ps[1] = 
reinterpret_cast<std::size_t*
>(c.malloc_coll<mem_mapper::cyclic>(n * 
sizeof(std::size_t)));
 
 1239   std::size_t max_checkout_size = (16 - 2) * bs / 
sizeof(std::size_t);
 
 1249       for (std::size_t i = 0; i < n; i += max_checkout_size) {
 
 1250         std::size_t m = 
std::min(max_checkout_size, n - i);
 
 1251         c.checkout(p + i, m * 
sizeof(std::size_t), 
mode::write);
 
 1252         for (std::size_t j = i; j < i + m; j++) {
 
 1255         c.checkin(p + i, m * 
sizeof(std::size_t), 
mode::write);
 
 1262       for (std::size_t i = 0; i < n; i += max_checkout_size) {
 
 1263         std::size_t m = 
std::min(max_checkout_size, n - i);
 
 1264         c.checkout(p + i, m * 
sizeof(std::size_t), 
mode::read);
 
 1265         for (std::size_t j = i; j < i + m; j++) {
 
 1268         c.checkin(p + i, m * 
sizeof(std::size_t), 
mode::read);
 
 1273       std::size_t ib = n / 5 * 2;
 
 1274       std::size_t ie = n / 5 * 4;
 
 1275       std::size_t s = ie - ib;
 
 1277       for (std::size_t i = 0; i < s; i += max_checkout_size) {
 
 1278         std::size_t m = 
std::min(max_checkout_size, s - i);
 
 1279         c.checkout(p + ib + i, m * 
sizeof(std::size_t), 
mode::read);
 
 1280         for (std::size_t j = ib + i; j < ib + i + m; j++) {
 
 1283         c.checkin(p + ib + i, m * 
sizeof(std::size_t), 
mode::read);
 
 1288       std::size_t stride = 48;
 
 1290       for (std::size_t i = 
my_rank * stride; i < n; i += 
n_ranks * stride) {
 
 1291         std::size_t s = 
std::min(stride, n - i);
 
 1293         for (std::size_t j = i; j < i + s; j++) {
 
 1302       for (std::size_t i = 0; i < n; i += max_checkout_size) {
 
 1303         std::size_t m = 
std::min(max_checkout_size, n - i);
 
 1304         c.checkout(p + i, m * 
sizeof(std::size_t), 
mode::read);
 
 1305         for (std::size_t j = i; j < i + m; j++) {
 
 1308         c.checkin(p + i, m * 
sizeof(std::size_t), 
mode::read);
 
 1317 ITYR_TEST_CASE(
"[ityr::ori::core] checkout/checkin (noncontig)") {
 
 1318   common::runtime_options common_opts;
 
 1319   runtime_options opts;
 
 1320   common::singleton_initializer<common::topology::instance> topo;
 
 1321   common::singleton_initializer<common::rma::instance> rma;
 
 1324   core<bs> c(n_cb * bs, bs / 4);
 
 1329   std::size_t n = 2 * n_cb * bs / 
sizeof(std::size_t);
 
 1332   ps[0] = 
reinterpret_cast<std::size_t*
>(c.malloc_coll<mem_mapper::block >(n * 
sizeof(std::size_t)));
 
 1333   ps[1] = 
reinterpret_cast<std::size_t*
>(c.malloc_coll<mem_mapper::cyclic>(n * 
sizeof(std::size_t)));
 
 1343       c.checkout(p + i, 
sizeof(std::size_t), 
mode::write);
 
 1345       c.checkin(p + i, 
sizeof(std::size_t), 
mode::write);
 
 1361         c.checkout(p + i, 
sizeof(std::size_t), 
mode::write);
 
 1363         c.checkin(p + i, 
sizeof(std::size_t), 
mode::write);
 
 1365         c.checkout(p + i, 
sizeof(std::size_t), 
mode::read);
 
 1367         c.checkin(p + i, 
sizeof(std::size_t), 
mode::read);
 
 1374       c.checkout(p + i, 
sizeof(std::size_t), 
mode::read);
 
 1380       c.checkin(p + i, 
sizeof(std::size_t), 
mode::read);
 
 1388 ITYR_TEST_CASE(
"[ityr::ori::core] checkout/checkin (noncollective)") {
 
 1389   common::runtime_options common_opts;
 
 1390   runtime_options opts;
 
 1391   common::singleton_initializer<common::topology::instance> topo;
 
 1392   common::singleton_initializer<common::rma::instance> rma;
 
 1395   core<bs> c(n_cb * bs, bs / 4);
 
 1403     int n_alloc_iter = 10;
 
 1413     root_node->next = 
nullptr;
 
 1417     node_t* node = root_node;
 
 1418     for (
int i = 0; i < niter; i++) {
 
 1419       for (
int j = 0; j < n_alloc_iter; j++) {
 
 1423         c.checkout(&node->next, 
sizeof(node->next), 
mode::write);
 
 1424         node->next = new_node;
 
 1425         c.checkin(&node->next, 
sizeof(node->next), 
mode::write);
 
 1427         c.checkout(&node->value, 
sizeof(node->value), 
mode::read);
 
 1428         int val = node->value;
 
 1429         c.checkin(&node->value, 
sizeof(node->value), 
mode::read);
 
 1432         new_node->next = 
nullptr;
 
 1433         new_node->value = val + 1;
 
 1460     while (node != 
nullptr) {
 
 1465       node_t* prev_node = node;
 
 1470       std::destroy_at(prev_node);
 
 1471       c.free(prev_node, 
sizeof(
node_t));
 
 1478 ITYR_TEST_CASE(
"[ityr::ori::core] release/acquire fence") {
 
 1479   common::runtime_options common_opts;
 
 1480   runtime_options opts;
 
 1481   common::singleton_initializer<common::topology::instance> topo;
 
 1482   common::singleton_initializer<common::rma::instance> rma;
 
 1485   core<bs> c(n_cb * bs, bs / 4);
 
 1497   int* p = 
reinterpret_cast<int*
>(c.malloc_coll(
sizeof(
int)));
 
 1528   for (
int i = 0; i < n; i++) {
 
 1539       rh = c.release_lazy();
 
void * addr() const
Definition: virtual_mem.hpp:46
 
std::conditional_t< enable_lazy_release, release_manager::release_handler, void * > release_handler
Definition: cache_manager.hpp:220
 
coll_mem & create(std::size_t size, std::unique_ptr< mem_mapper::base > mmapper)
Definition: coll_mem_manager.hpp:23
 
void destroy(coll_mem &cm)
Definition: coll_mem_manager.hpp:34
 
coll_mem & get(void *addr)
Definition: coll_mem_manager.hpp:14
 
Definition: coll_mem.hpp:16
 
std::size_t effective_size() const
Definition: coll_mem.hpp:36
 
const common::rma::win & win() const
Definition: coll_mem.hpp:51
 
const common::virtual_mem & home_vm() const
Definition: coll_mem.hpp:47
 
std::size_t size() const
Definition: coll_mem.hpp:34
 
const common::virtual_mem & vm() const
Definition: coll_mem.hpp:41
 
const mem_mapper::base & mem_mapper() const
Definition: coll_mem.hpp:39
 
void * malloc_coll(std::size_t size, MemMapperArgs &&... mmargs)
Definition: core.hpp:76
 
void poll()
Definition: core.hpp:297
 
void free_coll(void *addr)
Definition: core.hpp:105
 
void checkout(void *addr, std::size_t size, Mode mode)
Definition: core.hpp:221
 
void unset_readonly_coll(void *addr, std::size_t size)
Definition: core.hpp:289
 
void collect_deallocated()
Definition: core.hpp:301
 
void checkout_complete()
Definition: core.hpp:227
 
void cache_prof_begin()
Definition: core.hpp:305
 
void * get_local_mem(void *addr)
Definition: core.hpp:322
 
void checkin(void *addr, std::size_t size, Mode)
Definition: core.hpp:233
 
void set_readonly_coll(void *addr, std::size_t size)
Definition: core.hpp:280
 
void acquire(release_handler rh)
Definition: core.hpp:272
 
void cache_prof_print() const
Definition: core.hpp:315
 
void free(void *addr, std::size_t size)
Definition: core.hpp:133
 
void * malloc(std::size_t size)
Definition: core.hpp:94
 
void get(const void *from_addr, void *to_addr, std::size_t size)
Definition: core.hpp:160
 
void cache_prof_end()
Definition: core.hpp:310
 
void acquire()
Definition: core.hpp:264
 
void release()
Definition: core.hpp:248
 
core_default(std::size_t cache_size, std::size_t sub_block_size)
Definition: core.hpp:66
 
typename cache_manager< BlockSize >::release_handler release_handler
Definition: core.hpp:256
 
void * malloc_coll(std::size_t size)
Definition: core.hpp:73
 
void put(const void *from_addr, void *to_addr, std::size_t size)
Definition: core.hpp:182
 
release_handler release_lazy()
Definition: core.hpp:258
 
static constexpr block_size_t block_size
Definition: core.hpp:71
 
bool checkout_nb(void *addr, std::size_t size, Mode)
Definition: core.hpp:205
 
void unset_readonly_coll(void *, std::size_t)
Definition: core.hpp:730
 
void * malloc(std::size_t size)
Definition: core.hpp:641
 
void release()
Definition: core.hpp:719
 
void free(void *addr, std::size_t size)
Definition: core.hpp:666
 
void cache_prof_begin()
Definition: core.hpp:738
 
void * release_handler
Definition: core.hpp:721
 
void acquire()
Definition: core.hpp:725
 
void get(const void *from_addr, void *to_addr, std::size_t size)
Definition: core.hpp:686
 
static constexpr block_size_t block_size
Definition: core.hpp:618
 
void poll()
Definition: core.hpp:732
 
void collect_deallocated()
Definition: core.hpp:734
 
void checkout_complete()
Definition: core.hpp:710
 
void * malloc_coll(std::size_t size, MemMapperArgs &&... mmargs)
Definition: core.hpp:623
 
void set_readonly_coll(void *, std::size_t)
Definition: core.hpp:729
 
core_nocache(std::size_t, std::size_t)
Definition: core.hpp:615
 
void checkout(void *, std::size_t, Mode)
Definition: core.hpp:706
 
void checkin(void *, std::size_t, Mode)
Definition: core.hpp:715
 
void * get_local_mem(void *addr)
Definition: core.hpp:744
 
void * malloc_coll(std::size_t size)
Definition: core.hpp:620
 
void free_coll(void *addr)
Definition: core.hpp:652
 
void put(const void *from_addr, void *to_addr, std::size_t size)
Definition: core.hpp:693
 
void cache_prof_end()
Definition: core.hpp:739
 
void acquire(release_handler)
Definition: core.hpp:727
 
release_handler release_lazy()
Definition: core.hpp:723
 
void cache_prof_print() const
Definition: core.hpp:740
 
bool checkout_nb(void *, std::size_t, Mode)
Definition: core.hpp:701
 
void * malloc(std::size_t size)
Definition: core.hpp:893
 
void put(const void *from_addr, void *to_addr, std::size_t size)
Definition: core.hpp:909
 
void checkout(void *, std::size_t, Mode)
Definition: core.hpp:917
 
core_serial(std::size_t, std::size_t)
Definition: core.hpp:882
 
void unset_readonly_coll(void *, std::size_t)
Definition: core.hpp:935
 
void * malloc_coll(std::size_t size, MemMapperArgs &&...)
Definition: core.hpp:889
 
static constexpr block_size_t block_size
Definition: core.hpp:884
 
void acquire(release_handler)
Definition: core.hpp:932
 
void checkin(void *, std::size_t, Mode)
Definition: core.hpp:922
 
void * release_handler
Definition: core.hpp:926
 
void cache_prof_end()
Definition: core.hpp:942
 
void acquire()
Definition: core.hpp:930
 
release_handler release_lazy()
Definition: core.hpp:928
 
void get(const void *from_addr, void *to_addr, std::size_t size)
Definition: core.hpp:905
 
void cache_prof_print() const
Definition: core.hpp:943
 
void * malloc_coll(std::size_t size)
Definition: core.hpp:886
 
void * get_local_mem(void *addr)
Definition: core.hpp:947
 
void poll()
Definition: core.hpp:937
 
void cache_prof_begin()
Definition: core.hpp:941
 
void free(void *addr, std::size_t)
Definition: core.hpp:901
 
void release()
Definition: core.hpp:924
 
void collect_deallocated()
Definition: core.hpp:939
 
void checkout_complete()
Definition: core.hpp:919
 
bool checkout_nb(void *, std::size_t, Mode)
Definition: core.hpp:914
 
void free_coll(void *addr)
Definition: core.hpp:897
 
void set_readonly_coll(void *, std::size_t)
Definition: core.hpp:934
 
virtual std::size_t local_size(int inter_rank) const =0
 
Definition: mem_mapper.hpp:64
 
Definition: mem_mapper.hpp:165
 
const common::rma::win & win() const
Definition: noncoll_mem.hpp:66
 
std::size_t get_disp(const void *p) const
Definition: noncoll_mem.hpp:76
 
void remote_deallocate(void *p, std::size_t bytes[[maybe_unused]], int target_rank, std::size_t alignment=alignof(max_align_t))
Definition: noncoll_mem.hpp:150
 
void local_deallocate(void *p, std::size_t bytes, std::size_t alignment=alignof(max_align_t))
Definition: noncoll_mem.hpp:134
 
common::topology::rank_t get_owner(const void *p) const
Definition: noncoll_mem.hpp:72
 
bool has(const void *p) const
Definition: noncoll_mem.hpp:68
 
void collect_deallocated()
Definition: noncoll_mem.hpp:168
 
#define ITYR_CONCAT(x, y)
Definition: util.hpp:20
 
#define ITYR_CHECK_MESSAGE(cond,...)
Definition: util.hpp:49
 
#define ITYR_SUBCASE(name)
Definition: util.hpp:41
 
#define ITYR_CHECK(cond)
Definition: util.hpp:48
 
#define ITYR_REQUIRE_MESSAGE(cond, msg,...)
Definition: util.hpp:43
 
#define ITYR_REQUIRE(cond)
Definition: util.hpp:42
 
int node_t
Definition: numa.hpp:76
 
ITYR_CONCAT(mode_, ITYR_PROFILER_MODE) mode
Definition: profiler.hpp:257
 
void get_nb(const win &origin_win, T *origin_addr, std::size_t count, const win &target_win, int target_rank, std::size_t target_disp)
Definition: rma.hpp:25
 
void flush(const win &target_win)
Definition: rma.hpp:76
 
void put_nb(const win &origin_win, const T *origin_addr, std::size_t count, const win &target_win, int target_rank, std::size_t target_disp)
Definition: rma.hpp:51
 
rank_t inter_my_rank()
Definition: topology.hpp:215
 
rank_t n_ranks()
Definition: topology.hpp:208
 
int rank_t
Definition: topology.hpp:12
 
MPI_Comm mpicomm()
Definition: topology.hpp:206
 
rank_t inter_n_ranks()
Definition: topology.hpp:216
 
bool is_locally_accessible(rank_t target_global_rank)
Definition: topology.hpp:224
 
rank_t inter2global_rank(rank_t inter_rank)
Definition: topology.hpp:222
 
rank_t intra_n_ranks()
Definition: topology.hpp:212
 
rank_t my_rank()
Definition: topology.hpp:207
 
T mpi_bcast_value(const T &value, int root_rank, MPI_Comm comm)
Definition: mpi_util.hpp:145
 
T round_down_pow2(T x, T alignment)
Definition: util.hpp:130
 
void mpi_wait(MPI_Request &req)
Definition: mpi_util.hpp:250
 
MPI_Request mpi_isend(const T *buf, std::size_t count, int target_rank, int tag, MPI_Comm comm)
Definition: mpi_util.hpp:67
 
MPI_Request mpi_ibarrier(MPI_Comm comm)
Definition: mpi_util.hpp:46
 
void mpi_barrier(MPI_Comm comm)
Definition: mpi_util.hpp:42
 
MPI_Request mpi_irecv(T *buf, std::size_t count, int target_rank, int tag, MPI_Comm comm)
Definition: mpi_util.hpp:107
 
bool mpi_test(MPI_Request &req)
Definition: mpi_util.hpp:254
 
void verbose(const char *fmt,...)
Definition: logger.hpp:11
 
void for_each_seg_blk(const coll_mem &cm, void *addr, std::size_t size, HomeSegFn home_seg_fn, CacheBlkFn cache_blk_fn)
Definition: core.hpp:36
 
ITYR_CONCAT(core_, ITYR_ORI_CORE)< BlockSize > core
Definition: core.hpp:951
 
void for_each_block(void *addr, std::size_t size, Fn fn)
Definition: core.hpp:24
 
constexpr read_write_t read_write
Definition: util.hpp:15
 
constexpr read_t read
Definition: util.hpp:11
 
constexpr write_t write
Definition: util.hpp:13
 
void for_each_mem_segment(const coll_mem &cm, const void *addr, std::size_t size, Fn fn)
Definition: coll_mem.hpp:141
 
void free(global_ptr< T > ptr, std::size_t count)
Definition: ori.hpp:75
 
std::string str(mode::read_t)
Definition: util.hpp:18
 
std::size_t sys_mmap_entry_limit()
Definition: util.hpp:32
 
core::instance::instance_type::release_handler release_handler
Definition: ori.hpp:204
 
global_ptr< T > malloc(std::size_t count)
Definition: ori.hpp:65
 
uint32_t block_size_t
Definition: util.hpp:30
 
monoid< T, min_functor<>, highest< T > > min
Definition: reducer.hpp:101
 
monoid< T, max_functor<>, lowest< T > > max
Definition: reducer.hpp:104
 
rank_t my_rank()
Return the rank of the process running the current thread.
Definition: ityr.hpp:99
 
rank_t n_ranks()
Return the total number of processes.
Definition: ityr.hpp:107
 
constexpr auto size(const checkout_span< T, Mode > &cs) noexcept
Definition: checkout_span.hpp:178
 
void barrier()
Barrier for all processes (collective).
Definition: ityr.hpp:150
 
ForwardIteratorD move(const ExecutionPolicy &policy, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIteratorD first_d)
Move a range to another.
Definition: parallel_loop.hpp:934
 
#define ITYR_ORI_ENABLE_VM_MAP
 
#define ITYR_PROFILER_RECORD(event,...)
Definition: profiler.hpp:319
 
Definition: options.hpp:153
 
Definition: options.hpp:78
 
Definition: prof_events.hpp:31
 
Definition: prof_events.hpp:26
 
Definition: prof_events.hpp:21
 
Definition: prof_events.hpp:11
 
Definition: prof_events.hpp:16
 
Definition: options.hpp:96