47 work_ += t_stop - t_start_;
48 span_ += t_stop - t_start_;
68 n_threads_ += dp.n_threads_;
69 n_strands_ += dp.n_strands_;
78 n_threads_ += dp.n_threads_;
79 n_strands_ += dp.n_strands_;
86 printf(
"work: %ld ns span: %ld ns parallelism: %f\n"
87 "n_threads: %ld (ave: %ld ns) n_strands: %ld (ave: %ld ns)\n\n",
88 work_, span_, (span_ == 0) ? 0 :
static_cast<double>(work_) / span_,
89 n_threads_, (n_threads_ == 0) ? 0 : work_ / n_threads_,
90 n_strands_, (n_strands_ == 0) ? 0 : work_ / n_strands_);
98 uint64_t n_threads_ = 0;
99 uint64_t n_strands_ = 0;
114 template <
typename Fn,
typename... Args>
117 template <
typename Fn_,
typename... Args_>
119 : fn_(std::forward<Fn_>(fn)), arg_(std::forward<Args_>(
args)...) {}
123 std::tuple<Args...> arg_;
130 static std::mt19937 engine(std::random_device{}());
135 std::uniform_int_distribution<common::topology::rank_t> dist(a, b);
148 template <
typename T,
typename Fn,
typename ArgsTuple>
149 inline decltype(
auto)
invoke_fn(Fn&& fn, ArgsTuple&& args_tuple) {
150 if constexpr (!std::is_same_v<T, no_retval_t>) {
151 return std::apply(std::forward<Fn>(fn), std::forward<ArgsTuple>(args_tuple));
153 std::apply(std::forward<Fn>(fn), std::forward<ArgsTuple>(args_tuple));
162 template <
typename Fn,
typename... Args>
164 using type = std::invoke_result_t<Fn, Args...>;
167 template <
typename... Args>
172 template <
typename... Args>
177 template <
typename Fn,
typename... Args>
180 template <
typename PhaseFrom,
typename PhaseFn,
typename PhaseTo,
181 typename Fn,
typename... Args>
185 if constexpr (!std::is_null_pointer_v<std::remove_reference_t<Fn>>) {
186 common::profiler::switch_phase<PhaseFrom, PhaseFn>();
188 if constexpr (!std::is_void_v<retval_t>) {
189 auto ret = std::forward<Fn>(fn)(std::forward<Args>(
args)...);
190 common::profiler::switch_phase<PhaseFn, PhaseTo>();
194 std::forward<Fn>(fn)(std::forward<Args>(
args)...);
195 common::profiler::switch_phase<PhaseFn, PhaseTo>();
198 }
else if constexpr (!std::is_same_v<PhaseFrom, PhaseTo>) {
199 common::profiler::switch_phase<PhaseFrom, PhaseTo>();
202 if constexpr (!std::is_void_v<retval_t>) {
209 template <
typename PhaseFrom,
typename PhaseFn,
typename PhaseTo,
210 typename Fn,
typename... Args>
213 return call_with_prof_events<PhaseFrom, PhaseFn, PhaseTo>(
214 std::forward<Fn>(fn), std::forward<Args>(
args)...);
221 template <
typename Entry>
223 static_assert(std::is_trivially_copyable_v<Entry>);
227 : win_(common::topology::
mpicomm(), 1) {}
237 std::optional<Entry>
pop() {
239 if (mb.arrived.load(std::memory_order_acquire)) {
240 mb.arrived.store(0, std::memory_order_relaxed);
248 return win_.
local_buf()[0].arrived.load(std::memory_order_relaxed);
264 : win_(common::topology::
mpicomm(), 1) {}
275 if (mb.arrived.load(std::memory_order_acquire)) {
276 mb.arrived.store(0, std::memory_order_relaxed);
284 return win_.
local_buf()[0].arrived.load(std::memory_order_relaxed);
span< T > local_buf() const
Definition: mpi_rma.hpp:412
MPI_Win win() const
Definition: mpi_rma.hpp:409
void execute()
Definition: util.hpp:120
callable_task(Fn_ &&fn, Args_ &&... args)
Definition: util.hpp:118
void clear()
Definition: util.hpp:27
void increment_strand_count()
Definition: util.hpp:31
static constexpr bool enabled
Definition: util.hpp:23
void merge_serial(const dag_profiler_disabled &)
Definition: util.hpp:28
void stop()
Definition: util.hpp:25
void increment_thread_count()
Definition: util.hpp:30
void start()
Definition: util.hpp:24
void merge_parallel(const dag_profiler_disabled &)
Definition: util.hpp:29
void print() const
Definition: util.hpp:32
bool is_stopped() const
Definition: util.hpp:26
void stop()
Definition: util.hpp:44
void print() const
Definition: util.hpp:85
void start()
Definition: util.hpp:39
void increment_strand_count()
Definition: util.hpp:83
static constexpr bool enabled
Definition: util.hpp:37
void clear()
Definition: util.hpp:54
void merge_parallel(const dag_profiler_workspan &dp)
Definition: util.hpp:72
void increment_thread_count()
Definition: util.hpp:82
bool is_stopped() const
Definition: util.hpp:52
void merge_serial(const dag_profiler_workspan &dp)
Definition: util.hpp:62
void put(common::topology::rank_t target_rank)
Definition: util.hpp:266
oneslot_mailbox()
Definition: util.hpp:263
bool arrived() const
Definition: util.hpp:283
bool pop()
Definition: util.hpp:273
bool arrived() const
Definition: util.hpp:247
std::optional< Entry > pop()
Definition: util.hpp:237
oneslot_mailbox()
Definition: util.hpp:226
void put(const Entry &entry, common::topology::rank_t target_rank)
Definition: util.hpp:229
virtual ~task_general()=default
#define ITYR_CONCAT(x, y)
Definition: util.hpp:20
#define ITYR_CHECK(cond)
Definition: util.hpp:48
#define ITYR_ITO_DAG_PROF
rank_t n_ranks()
Definition: topology.hpp:208
int rank_t
Definition: topology.hpp:12
MPI_Comm mpicomm()
Definition: topology.hpp:206
rank_t my_rank()
Definition: topology.hpp:207
uint64_t wallclock_t
Definition: wallclock.hpp:13
wallclock_t gettime_ns()
Definition: wallclock.hpp:88
T mpi_atomic_put_value(const T &value, int target_rank, std::size_t target_disp, MPI_Win win)
Definition: mpi_rma.hpp:283
va_list args
Definition: util.hpp:76
void mpi_put_value(const T &value, int target_rank, std::size_t target_disp, MPI_Win win)
Definition: mpi_rma.hpp:165
Definition: aarch64.hpp:5
typename callback_retval< Fn, Args... >::type callback_retval_t
Definition: util.hpp:178
decltype(auto) invoke_fn(Fn &&fn, ArgsTuple &&args_tuple)
Definition: util.hpp:149
auto call_with_prof_events(Fn &&fn, Args &&... args)
Definition: util.hpp:182
ITYR_CONCAT(dag_profiler_, ITYR_ITO_DAG_PROF) dag_profiler
Definition: util.hpp:102
common::topology::rank_t get_random_rank(common::topology::rank_t a, common::topology::rank_t b)
Definition: util.hpp:128
monoid< T, max_functor<>, lowest< T > > max
Definition: reducer.hpp:104
ForwardIteratorD move(const ExecutionPolicy &policy, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIteratorD first_d)
Move a range to another.
Definition: parallel_loop.hpp:934
#define ITYR_PROFILER_RECORD(event,...)
Definition: profiler.hpp:319
void type
Definition: util.hpp:169
void type
Definition: util.hpp:174
std::invoke_result_t< Fn, Args... > type
Definition: util.hpp:164
Definition: prof_events.hpp:90