Itoyori  v0.0.1
cache_profiler.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include "ityr/common/util.hpp"
6 #include "ityr/ori/util.hpp"
7 #include "ityr/ori/options.hpp"
10 
11 namespace ityr::ori {
12 
14 public:
19  void start() {}
20  void stop() {}
21  void print() const {}
22 };
23 
25 public:
27  : n_blocks_(n_blocks),
28  blocks_(n_blocks_) {}
29 
30  void record(cache_entry_idx_t block_idx,
31  block_region requested_region,
32  const block_region_set& fetched_regions) {
33  ITYR_CHECK(0 <= block_idx);
34  ITYR_CHECK(block_idx < n_blocks_);
35  cache_block& blk = blocks_[block_idx];
36 
37  if (enabled_) {
38  requested_bytes_ += requested_region.size();
39  fetched_bytes_ += fetched_regions.size();
40 
41  block_region_set hit_regions = fetched_regions.complement(requested_region);
42  block_region_set temporal_hit_regions = get_intersection(hit_regions, blk.requested_regions);
43 
44  std::size_t temporal_hit_size = temporal_hit_regions.size();
45  std::size_t spatial_hit_size = hit_regions.size() - temporal_hit_size;
46 
47  temporal_hit_bytes_ += temporal_hit_size;
48  spatial_hit_bytes_ += spatial_hit_size;
49 
50  if (fetched_regions.empty()) {
51  block_hit_count_++;
52  } else {
53  block_miss_count_++;
54  }
55  }
56 
57  blk.requested_regions.add(requested_region);
58  }
59 
61  block_region requested_region,
62  const block_region_set& valid_regions) {
63  ITYR_CHECK(0 <= block_idx);
64  ITYR_CHECK(block_idx < n_blocks_);
65  cache_block& blk = blocks_[block_idx];
66 
67  if (enabled_) {
68  requested_bytes_ += requested_region.size();
69 
70  block_region_set skip_fetch_hit_regions = valid_regions.complement(requested_region);
71 
72  block_region_set hit_regions = skip_fetch_hit_regions.complement(requested_region);
73  block_region_set temporal_hit_regions = get_intersection(hit_regions, blk.requested_regions);
74 
75  std::size_t temporal_hit_size = temporal_hit_regions.size();
76  std::size_t spatial_hit_size = hit_regions.size() - temporal_hit_size;
77 
78  temporal_hit_bytes_ += temporal_hit_size;
79  spatial_hit_bytes_ += spatial_hit_size;
80 
81  skip_fetch_hit_bytes_ += skip_fetch_hit_regions.size();
82 
83  block_hit_count_++;
84  }
85 
86  blk.requested_regions.add(requested_region);
87  }
88 
89  void invalidate(cache_entry_idx_t block_idx, const block_region_set& valid_regions) {
90  ITYR_CHECK(0 <= block_idx);
91  ITYR_CHECK(block_idx < n_blocks_);
92  cache_block& blk = blocks_[block_idx];
93 
94  if (enabled_) {
95  wasted_fetched_bytes_ += valid_regions.size() - blk.requested_regions.size();
96  }
97 
98  blk.requested_regions.clear();
99  }
100 
101  void start() {
102  requested_bytes_ = 0;
103  fetched_bytes_ = 0;
104  wasted_fetched_bytes_ = 0;
105  temporal_hit_bytes_ = 0;
106  spatial_hit_bytes_ = 0;
107  skip_fetch_hit_bytes_ = 0;
108  block_hit_count_ = 0;
109  block_miss_count_ = 0;
110 
111  enabled_ = true;
112  }
113 
114  void stop() {
115  enabled_ = false;
116  }
117 
118  void print() const {
119  auto requested_bytes_all = common::mpi_reduce_value(requested_bytes_ , 0, common::topology::mpicomm());
120  auto fetched_bytes_all = common::mpi_reduce_value(fetched_bytes_ , 0, common::topology::mpicomm());
121  auto wasted_fetched_bytes_all = common::mpi_reduce_value(wasted_fetched_bytes_, 0, common::topology::mpicomm());
122  auto temporal_hit_bytes_all = common::mpi_reduce_value(temporal_hit_bytes_ , 0, common::topology::mpicomm());
123  auto spatial_hit_bytes_all = common::mpi_reduce_value(spatial_hit_bytes_ , 0, common::topology::mpicomm());
124  auto skip_fetch_hit_bytes_all = common::mpi_reduce_value(skip_fetch_hit_bytes_, 0, common::topology::mpicomm());
125  auto block_hit_count_all = common::mpi_reduce_value(block_hit_count_ , 0, common::topology::mpicomm());
126  auto block_miss_count_all = common::mpi_reduce_value(block_miss_count_ , 0, common::topology::mpicomm());
127 
128  if (common::topology::my_rank() == 0) {
129  printf("[Cache blocks]\n");
130  printf(" User requested: %18ld bytes\n" , requested_bytes_all);
131  printf(" Fetched: %18ld bytes\n" , fetched_bytes_all);
132  printf(" Fetched (wasted): %18ld bytes\n" , wasted_fetched_bytes_all);
133  printf(" Temporal hit: %18ld bytes\n" , temporal_hit_bytes_all);
134  printf(" Spatial hit: %18ld bytes\n" , spatial_hit_bytes_all);
135  printf(" Skip-fetch hit: %18ld bytes\n" , skip_fetch_hit_bytes_all);
136  printf(" Hit count: %18ld blocks\n", block_hit_count_all);
137  printf(" Miss count: %18ld blocks\n", block_miss_count_all);
138  printf("\n");
139  fflush(stdout);
140  }
141  }
142 
143 private:
144  struct cache_block {
145  block_region_set requested_regions;
146  };
147 
148  cache_entry_idx_t n_blocks_;
149  std::vector<cache_block> blocks_;
150 
151  std::size_t requested_bytes_ = 0; // requested by the user (through checkout calls)
152  std::size_t fetched_bytes_ = 0; // fetched from remote processes
153  std::size_t wasted_fetched_bytes_ = 0; // fetched but not requested by the user
154  std::size_t temporal_hit_bytes_ = 0; // cache hit for data requested again by the user
155  std::size_t spatial_hit_bytes_ = 0; // cache hit for data not previously requested by the user
156  std::size_t skip_fetch_hit_bytes_ = 0; // cache hit for write-only data (skipping remote fetch)
157  std::size_t block_hit_count_ = 0; // Cache hits counted for each block
158  std::size_t block_miss_count_ = 0; // Cache misses counted for each block
159 
160  bool enabled_ = false;
161 };
162 
164 
165 }
Definition: cache_profiler.hpp:13
void invalidate(cache_entry_idx_t, const block_region_set &)
Definition: cache_profiler.hpp:18
void stop()
Definition: cache_profiler.hpp:20
void record_writeonly(cache_entry_idx_t, block_region, const block_region_set &)
Definition: cache_profiler.hpp:17
void record(cache_entry_idx_t, block_region, const block_region_set &)
Definition: cache_profiler.hpp:16
void print() const
Definition: cache_profiler.hpp:21
void start()
Definition: cache_profiler.hpp:19
cache_profiler_disabled(cache_entry_idx_t)
Definition: cache_profiler.hpp:15
Definition: cache_profiler.hpp:24
void invalidate(cache_entry_idx_t block_idx, const block_region_set &valid_regions)
Definition: cache_profiler.hpp:89
void stop()
Definition: cache_profiler.hpp:114
void record_writeonly(cache_entry_idx_t block_idx, block_region requested_region, const block_region_set &valid_regions)
Definition: cache_profiler.hpp:60
void print() const
Definition: cache_profiler.hpp:118
cache_profiler_stats(cache_entry_idx_t n_blocks)
Definition: cache_profiler.hpp:26
void start()
Definition: cache_profiler.hpp:101
void record(cache_entry_idx_t block_idx, block_region requested_region, const block_region_set &fetched_regions)
Definition: cache_profiler.hpp:30
region_set< T > complement(region< T > r) const
Definition: block_region_set.hpp:162
bool empty() const
Definition: block_region_set.hpp:82
std::size_t size() const
Definition: block_region_set.hpp:242
#define ITYR_CONCAT(x, y)
Definition: util.hpp:20
#define ITYR_CHECK(cond)
Definition: util.hpp:48
MPI_Comm mpicomm()
Definition: topology.hpp:206
rank_t my_rank()
Definition: topology.hpp:207
fflush(stderr)
T mpi_reduce_value(const T &value, int root_rank, MPI_Comm comm, MPI_Op op=MPI_SUM)
Definition: mpi_util.hpp:170
Definition: block_region_set.hpp:9
ITYR_CONCAT(cache_profiler_, ITYR_ORI_CACHE_PROF) cache_profiler
Definition: cache_profiler.hpp:163
region< T > get_intersection(const region< T > &r1, const region< T > &r2)
Definition: block_region_set.hpp:56
int cache_entry_idx_t
Definition: cache_system.hpp:30
#define ITYR_ORI_CACHE_PROF
Definition: block_region_set.hpp:12
std::size_t size() const
Definition: block_region_set.hpp:23