47     work_ += t_stop - t_start_;
 
   48     span_ += t_stop - t_start_;
 
   68     n_threads_ += dp.n_threads_;
 
   69     n_strands_ += dp.n_strands_;
 
   78     n_threads_ += dp.n_threads_;
 
   79     n_strands_ += dp.n_strands_;
 
   86     printf(
"work: %ld ns span: %ld ns parallelism: %f\n" 
   87            "n_threads: %ld (ave: %ld ns) n_strands: %ld (ave: %ld ns)\n\n",
 
   88            work_, span_, (span_ == 0) ? 0 : 
static_cast<double>(work_) / span_,
 
   89            n_threads_, (n_threads_ == 0) ? 0 : work_ / n_threads_,
 
   90            n_strands_, (n_strands_ == 0) ? 0 : work_ / n_strands_);
 
   98   uint64_t                       n_threads_ = 0;
 
   99   uint64_t                       n_strands_ = 0;
 
  114 template <
typename Fn, 
typename... Args>
 
  117   template <
typename Fn_, 
typename... Args_>
 
  119     : fn_(std::forward<Fn_>(fn)), arg_(std::forward<Args_>(
args)...) {}
 
  123   std::tuple<Args...> arg_;
 
  130   static std::mt19937 engine(std::random_device{}());
 
  135   std::uniform_int_distribution<common::topology::rank_t> dist(a, b);
 
  148 template <
typename T, 
typename Fn, 
typename ArgsTuple>
 
  149 inline decltype(
auto) 
invoke_fn(Fn&& fn, ArgsTuple&& args_tuple) {
 
  150   if constexpr (!std::is_same_v<T, no_retval_t>) {
 
  151     return std::apply(std::forward<Fn>(fn), std::forward<ArgsTuple>(args_tuple));
 
  153     std::apply(std::forward<Fn>(fn), std::forward<ArgsTuple>(args_tuple));
 
  162 template <
typename Fn, 
typename... Args>
 
  164   using type = std::invoke_result_t<Fn, Args...>;
 
  167 template <
typename... Args>
 
  172 template <
typename... Args>
 
  177 template <
typename Fn, 
typename... Args>
 
  180 template <
typename PhaseFrom, 
typename PhaseFn, 
typename PhaseTo,
 
  181           typename Fn, 
typename... Args>
 
  185   if constexpr (!std::is_null_pointer_v<std::remove_reference_t<Fn>>) {
 
  186     common::profiler::switch_phase<PhaseFrom, PhaseFn>();
 
  188     if constexpr (!std::is_void_v<retval_t>) {
 
  189       auto ret = std::forward<Fn>(fn)(std::forward<Args>(
args)...);
 
  190       common::profiler::switch_phase<PhaseFn, PhaseTo>();
 
  194       std::forward<Fn>(fn)(std::forward<Args>(
args)...);
 
  195       common::profiler::switch_phase<PhaseFn, PhaseTo>();
 
  198   } 
else if constexpr (!std::is_same_v<PhaseFrom, PhaseTo>) {
 
  199     common::profiler::switch_phase<PhaseFrom, PhaseTo>();
 
  202   if constexpr (!std::is_void_v<retval_t>) {
 
  209 template <
typename PhaseFrom, 
typename PhaseFn, 
typename PhaseTo,
 
  210           typename Fn, 
typename... Args>
 
  213   return call_with_prof_events<PhaseFrom, PhaseFn, PhaseTo>(
 
  214       std::forward<Fn>(fn), std::forward<Args>(
args)...);
 
  221 template <
typename Entry>
 
  223   static_assert(std::is_trivially_copyable_v<Entry>);
 
  227     : win_(common::topology::
mpicomm(), 1) {}
 
  237   std::optional<Entry> 
pop() {
 
  239     if (mb.arrived.load(std::memory_order_acquire)) {
 
  240       mb.arrived.store(0, std::memory_order_relaxed);
 
  248     return win_.
local_buf()[0].arrived.load(std::memory_order_relaxed);
 
  264     : win_(common::topology::
mpicomm(), 1) {}
 
  275     if (mb.arrived.load(std::memory_order_acquire)) {
 
  276       mb.arrived.store(0, std::memory_order_relaxed);
 
  284     return win_.
local_buf()[0].arrived.load(std::memory_order_relaxed);
 
span< T > local_buf() const
Definition: mpi_rma.hpp:412
 
MPI_Win win() const
Definition: mpi_rma.hpp:409
 
void execute()
Definition: util.hpp:120
 
callable_task(Fn_ &&fn, Args_ &&... args)
Definition: util.hpp:118
 
void clear()
Definition: util.hpp:27
 
void increment_strand_count()
Definition: util.hpp:31
 
static constexpr bool enabled
Definition: util.hpp:23
 
void merge_serial(const dag_profiler_disabled &)
Definition: util.hpp:28
 
void stop()
Definition: util.hpp:25
 
void increment_thread_count()
Definition: util.hpp:30
 
void start()
Definition: util.hpp:24
 
void merge_parallel(const dag_profiler_disabled &)
Definition: util.hpp:29
 
void print() const
Definition: util.hpp:32
 
bool is_stopped() const
Definition: util.hpp:26
 
void stop()
Definition: util.hpp:44
 
void print() const
Definition: util.hpp:85
 
void start()
Definition: util.hpp:39
 
void increment_strand_count()
Definition: util.hpp:83
 
static constexpr bool enabled
Definition: util.hpp:37
 
void clear()
Definition: util.hpp:54
 
void merge_parallel(const dag_profiler_workspan &dp)
Definition: util.hpp:72
 
void increment_thread_count()
Definition: util.hpp:82
 
bool is_stopped() const
Definition: util.hpp:52
 
void merge_serial(const dag_profiler_workspan &dp)
Definition: util.hpp:62
 
void put(common::topology::rank_t target_rank)
Definition: util.hpp:266
 
oneslot_mailbox()
Definition: util.hpp:263
 
bool arrived() const
Definition: util.hpp:283
 
bool pop()
Definition: util.hpp:273
 
bool arrived() const
Definition: util.hpp:247
 
std::optional< Entry > pop()
Definition: util.hpp:237
 
oneslot_mailbox()
Definition: util.hpp:226
 
void put(const Entry &entry, common::topology::rank_t target_rank)
Definition: util.hpp:229
 
virtual ~task_general()=default
 
#define ITYR_CONCAT(x, y)
Definition: util.hpp:20
 
#define ITYR_CHECK(cond)
Definition: util.hpp:48
 
#define ITYR_ITO_DAG_PROF
 
rank_t n_ranks()
Definition: topology.hpp:208
 
int rank_t
Definition: topology.hpp:12
 
MPI_Comm mpicomm()
Definition: topology.hpp:206
 
rank_t my_rank()
Definition: topology.hpp:207
 
uint64_t wallclock_t
Definition: wallclock.hpp:13
 
wallclock_t gettime_ns()
Definition: wallclock.hpp:88
 
T mpi_atomic_put_value(const T &value, int target_rank, std::size_t target_disp, MPI_Win win)
Definition: mpi_rma.hpp:283
 
va_list args
Definition: util.hpp:76
 
void mpi_put_value(const T &value, int target_rank, std::size_t target_disp, MPI_Win win)
Definition: mpi_rma.hpp:165
 
Definition: aarch64.hpp:5
 
typename callback_retval< Fn, Args... >::type callback_retval_t
Definition: util.hpp:178
 
decltype(auto) invoke_fn(Fn &&fn, ArgsTuple &&args_tuple)
Definition: util.hpp:149
 
auto call_with_prof_events(Fn &&fn, Args &&... args)
Definition: util.hpp:182
 
ITYR_CONCAT(dag_profiler_, ITYR_ITO_DAG_PROF) dag_profiler
Definition: util.hpp:102
 
common::topology::rank_t get_random_rank(common::topology::rank_t a, common::topology::rank_t b)
Definition: util.hpp:128
 
monoid< T, max_functor<>, lowest< T > > max
Definition: reducer.hpp:104
 
ForwardIteratorD move(const ExecutionPolicy &policy, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIteratorD first_d)
Move a range to another.
Definition: parallel_loop.hpp:934
 
#define ITYR_PROFILER_RECORD(event,...)
Definition: profiler.hpp:319
 
void type
Definition: util.hpp:169
 
void type
Definition: util.hpp:174
 
std::invoke_result_t< Fn, Args... > type
Definition: util.hpp:164
 
Definition: prof_events.hpp:90