Itoyori  v0.0.1
parallel_reduce.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include "ityr/common/util.hpp"
5 #include "ityr/ito/ito.hpp"
6 #include "ityr/ori/ori.hpp"
11 #include "ityr/pattern/reducer.hpp"
12 
13 namespace ityr {
14 
15 namespace internal {
16 
17 template <typename W, typename AccumulateOp, typename CombineOp, typename Reducer,
18  typename ReleaseHandler, typename ForwardIterator, typename... ForwardIterators>
19 inline typename Reducer::accumulator_type
20 parallel_reduce_generic(const execution::parallel_policy<W>& policy,
21  AccumulateOp accumulate_op,
22  CombineOp combine_op,
23  Reducer reducer,
24  typename Reducer::accumulator_type&& acc,
25  ReleaseHandler rh,
26  ForwardIterator first,
27  ForwardIterator last,
28  ForwardIterators... firsts) {
29  using acc_t = typename Reducer::accumulator_type;
30 
31  ori::poll();
32 
33  // for immediately executing cross-worker tasks in ADWS
34  ito::poll([] { return ori::release_lazy(); },
35  [&](ori::release_handler rh_) { ori::acquire(rh); ori::acquire(rh_); });
36 
37  std::size_t d = std::distance(first, last);
38  if (d <= policy.cutoff_count) {
39  for_each_aux(
40  execution::internal::to_sequenced_policy(policy),
41  [&](auto&&... refs) {
42  accumulate_op(acc, std::forward<decltype(refs)>(refs)...);
43  },
44  first, last, firsts...);
45  return std::move(acc);
46  }
47 
48  auto mid = std::next(first, d / 2);
49 
50  ito::task_group_data tgdata;
51  ito::task_group_begin(&tgdata);
52 
53  auto&& [p1, p2] = execution::internal::get_child_policies(policy);
54 
55  ito::thread<acc_t> th(
56  ito::with_callback, [=] { ori::acquire(rh); }, [] { ori::release(); },
57  execution::internal::get_workhint(policy),
58  [=, p1 = p1, acc = std::move(acc)]() mutable {
59  return parallel_reduce_generic(p1, accumulate_op, combine_op, reducer,
60  std::move(acc), rh, first, mid, firsts...);
61  });
62 
63  if (th.serialized()) {
64  acc_t acc_r = parallel_reduce_generic(p2, accumulate_op, combine_op, reducer,
65  th.join(), rh, mid, last, std::next(firsts, d / 2)...);
66 
67  ito::task_group_end([] { ori::release(); }, [] { ori::acquire(); });
68 
69  return acc_r;
70 
71  } else {
72  acc_t new_acc = reducer();
73  rh = ori::release_lazy();
74 
75  acc_t acc_r = parallel_reduce_generic(p2, accumulate_op, combine_op, reducer,
76  std::move(new_acc), rh, mid, last, std::next(firsts, d / 2)...);
77 
78  ori::release();
79 
80  acc_t acc_l = th.join();
81 
82  ito::task_group_end([] { ori::release(); }, [] { ori::acquire(); });
83 
84  ori::acquire();
85 
86  combine_op(acc_l, std::move(acc_r), first, mid, last, firsts...);
87  return acc_l;
88  }
89 }
90 
91 template <typename AccumulateOp, typename CombineOp, typename Reducer,
92  typename ForwardIterator, typename... ForwardIterators>
93 inline typename Reducer::accumulator_type
94 reduce_generic(const execution::sequenced_policy& policy,
95  AccumulateOp accumulate_op,
96  CombineOp combine_op [[maybe_unused]],
97  Reducer reducer [[maybe_unused]],
98  typename Reducer::accumulator_type&& acc,
99  ForwardIterator first,
100  ForwardIterator last,
101  ForwardIterators... firsts) {
102  execution::internal::assert_policy(policy);
103  for_each_aux(
104  execution::internal::to_sequenced_policy(policy),
105  [&](auto&&... refs) {
106  accumulate_op(acc, std::forward<decltype(refs)>(refs)...);
107  },
108  first, last, firsts...);
109  return std::move(acc);
110 }
111 
112 template <typename W, typename AccumulateOp, typename CombineOp, typename Reducer,
113  typename ForwardIterator, typename... ForwardIterators>
114 inline typename Reducer::accumulator_type
115 reduce_generic(const execution::parallel_policy<W>& policy,
116  AccumulateOp accumulate_op,
117  CombineOp combine_op,
118  Reducer reducer,
119  typename Reducer::accumulator_type&& acc,
120  ForwardIterator first,
121  ForwardIterator last,
122  ForwardIterators... firsts) {
123  execution::internal::assert_policy(policy);
124  auto rh = ori::release_lazy();
125  return parallel_reduce_generic(policy, accumulate_op, combine_op, reducer, std::move(acc),
126  rh, first, last, firsts...);
127 }
128 
129 }
130 
164 template <typename ExecutionPolicy, typename ForwardIterator,
165  typename Reducer, typename UnaryTransformOp>
166 inline typename Reducer::accumulator_type
167 transform_reduce(const ExecutionPolicy& policy,
168  ForwardIterator first,
169  ForwardIterator last,
170  Reducer reducer,
171  UnaryTransformOp unary_transform_op) {
172  if constexpr (ori::is_global_ptr_v<ForwardIterator>) {
173  return transform_reduce(
174  policy,
175  internal::convert_to_global_iterator(first, checkout_mode::read),
176  internal::convert_to_global_iterator(last , checkout_mode::read),
177  reducer,
178  unary_transform_op);
179 
180  } else {
181  auto accumulate_op = [=](auto&& acc, const auto& r) {
182  reducer(std::forward<decltype(acc)>(acc), unary_transform_op(r));
183  };
184 
185  auto combine_op = [=](auto&& acc1, auto&& acc2,
186  ForwardIterator, ForwardIterator, ForwardIterator) {
187  reducer(std::forward<decltype(acc1)>(acc1), std::forward<decltype(acc2)>(acc2));
188  };
189 
190  return internal::reduce_generic(policy, accumulate_op, combine_op, reducer,
191  reducer(), first, last);
192  }
193 }
194 
230 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIterator2,
231  typename Reducer, typename BinaryTransformOp>
232 inline typename Reducer::accumulator_type
233 transform_reduce(const ExecutionPolicy& policy,
234  ForwardIterator1 first1,
235  ForwardIterator1 last1,
236  ForwardIterator2 first2,
237  Reducer reducer,
238  BinaryTransformOp binary_transform_op) {
239  if constexpr (ori::is_global_ptr_v<ForwardIterator1> ||
240  ori::is_global_ptr_v<ForwardIterator2>) {
241  return transform_reduce(
242  policy,
243  internal::convert_to_global_iterator(first1, checkout_mode::read),
244  internal::convert_to_global_iterator(last1 , checkout_mode::read),
245  internal::convert_to_global_iterator(first2, checkout_mode::read),
246  reducer,
247  binary_transform_op);
248 
249  } else {
250  auto accumulate_op = [=](auto&& acc, const auto& r1, const auto& r2) {
251  reducer(std::forward<decltype(acc)>(acc), binary_transform_op(r1, r2));
252  };
253 
254  auto combine_op = [=](auto&& acc1, auto&& acc2,
255  ForwardIterator1, ForwardIterator1, ForwardIterator1, ForwardIterator2) {
256  reducer(std::forward<decltype(acc1)>(acc1), std::forward<decltype(acc2)>(acc2));
257  };
258 
259  return internal::reduce_generic(policy, accumulate_op, combine_op, reducer,
260  reducer(), first1, last1, first2);
261  }
262 }
263 
292 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIterator2>
293 inline auto transform_reduce(const ExecutionPolicy& policy,
294  ForwardIterator1 first1,
295  ForwardIterator1 last1,
296  ForwardIterator2 first2) {
297  using T = decltype((*first1) * (*first2));
298  return transform_reduce(policy, first1, last1, first2, reducer::plus<T>{}, std::multiplies<>{});
299 }
300 
338 template <typename ExecutionPolicy, typename ForwardIterator, typename Reducer>
339 inline typename Reducer::accumulator_type
340 reduce(const ExecutionPolicy& policy,
341  ForwardIterator first,
342  ForwardIterator last,
343  Reducer reducer) {
344  return transform_reduce(policy, first, last, reducer,
345  [](auto&& r) -> decltype(auto) { return std::forward<decltype(r)>(r); });
346 }
347 
360 template <typename ExecutionPolicy, typename ForwardIterator>
361 inline typename std::iterator_traits<ForwardIterator>::value_type
362 reduce(const ExecutionPolicy& policy,
363  ForwardIterator first,
364  ForwardIterator last) {
365  using T = typename std::iterator_traits<ForwardIterator>::value_type;
366  return reduce(policy, first, last, reducer::plus<T>{});
367 }
368 
369 ITYR_TEST_CASE("[ityr::pattern::parallel_reduce] reduce and transform_reduce") {
370  ito::init();
371  ori::init();
372 
373  ITYR_SUBCASE("default cutoff") {
374  long n = 10000;
375  long r = ito::root_exec([=] {
376  return reduce(
378  count_iterator<long>(0),
379  count_iterator<long>(n));
380  });
381  ITYR_CHECK(r == n * (n - 1) / 2);
382  }
383 
384  ITYR_SUBCASE("custom cutoff") {
385  long n = 100000;
386  long r = ito::root_exec([=] {
387  return reduce(
388  execution::parallel_policy(100),
389  count_iterator<long>(0),
390  count_iterator<long>(n));
391  });
392  ITYR_CHECK(r == n * (n - 1) / 2);
393  }
394 
395  ITYR_SUBCASE("transform unary") {
396  long n = 100000;
397  long r = ito::root_exec([=] {
398  return transform_reduce(
399  execution::parallel_policy(100),
400  count_iterator<long>(0),
401  count_iterator<long>(n),
402  reducer::plus<long>{},
403  [](long x) { return x * x; });
404  });
405  ITYR_CHECK(r == n * (n - 1) * (2 * n - 1) / 6);
406  }
407 
408  ITYR_SUBCASE("transform binary") {
409  long n = 100000;
410  long r = ito::root_exec([=] {
411  return transform_reduce(
412  execution::parallel_policy(100),
413  count_iterator<long>(0),
414  count_iterator<long>(n),
415  count_iterator<long>(0),
416  reducer::plus<long>{},
417  [](long x, long y) { return x * y; });
418  });
419  ITYR_CHECK(r == n * (n - 1) * (2 * n - 1) / 6);
420  }
421 
422  ITYR_SUBCASE("zero elements") {
423  long r = ito::root_exec([=] {
424  return reduce(
425  execution::parallel_policy(100),
426  count_iterator<long>(0),
427  count_iterator<long>(0));
428  });
429  ITYR_CHECK(r == 0);
430  }
431 
432  ori::fini();
433  ito::fini();
434 }
435 
436 ITYR_TEST_CASE("[ityr::pattern::parallel_reduce] parallel reduce with global_ptr") {
437  ito::init();
438  ori::init();
439 
440  long n = 100000;
441  ori::global_ptr<long> p = ori::malloc_coll<long>(n);
442 
443  ito::root_exec([=] {
444  long count = 0;
445  for_each(
446  execution::sequenced_policy(100),
449  [&](long& v) { v = count++; });
450  });
451 
452  ITYR_SUBCASE("default cutoff") {
453  long r = ito::root_exec([=] {
454  return reduce(
456  p, p + n);
457  });
458  ITYR_CHECK(r == n * (n - 1) / 2);
459  }
460 
461  ITYR_SUBCASE("custom cutoff and checkout count") {
462  long r = ito::root_exec([=] {
463  return reduce(
464  execution::parallel_policy(100),
465  p, p + n);
466  });
467  ITYR_CHECK(r == n * (n - 1) / 2);
468  }
469 
470  ITYR_SUBCASE("without auto checkout") {
471  long r = ito::root_exec([=] {
472  return transform_reduce(
476  reducer::plus<long>{},
477  [](ori::global_ref<long> gref) {
478  return gref.get();
479  });
480  });
481  ITYR_CHECK(r == n * (n - 1) / 2);
482  }
483 
484  ITYR_SUBCASE("serial") {
485  long r = ito::root_exec([=] {
486  return reduce(
487  execution::sequenced_policy(100),
488  p, p + n);
489  });
490  ITYR_CHECK(r == n * (n - 1) / 2);
491  }
492 
493  ITYR_SUBCASE("move only") {
494  ori::global_ptr<common::move_only_t> p_mo = ori::malloc_coll<common::move_only_t>(n);
495 
496  ito::root_exec([=] {
497  for_each(
498  execution::parallel_policy(100),
499  count_iterator<long>(0),
500  count_iterator<long>(n),
502  [&](long i, common::move_only_t& v) { v = common::move_only_t(i); });
503 
504  common::move_only_t r = reduce(
506  p_mo, p_mo + n);
507 
508  ITYR_CHECK(r.value() == n * (n - 1) / 2);
509  });
510 
511  ori::free_coll(p_mo);
512  }
513 
514  ori::free_coll(p);
515 
516  ori::fini();
517  ito::fini();
518 }
519 
571 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIteratorD,
572  typename Reducer, typename UnaryTransformOp>
573 inline ForwardIteratorD
574 transform_inclusive_scan(const ExecutionPolicy& policy,
575  ForwardIterator1 first1,
576  ForwardIterator1 last1,
577  ForwardIteratorD first_d,
578  Reducer reducer,
579  UnaryTransformOp unary_transform_op,
580  typename Reducer::accumulator_type&& init) {
581  if constexpr (ori::is_global_ptr_v<ForwardIterator1> ||
582  ori::is_global_ptr_v<ForwardIteratorD>) {
583  using value_type_d = typename std::iterator_traits<ForwardIteratorD>::value_type;
585  policy,
586  internal::convert_to_global_iterator(first1 , checkout_mode::read),
587  internal::convert_to_global_iterator(last1 , checkout_mode::read),
588  internal::convert_to_global_iterator(first_d, internal::dest_checkout_mode_t<value_type_d>{}),
589  reducer,
590  unary_transform_op,
591  std::move(init));
592 
593  } else {
594  auto accumulate_op = [=](auto&& acc, const auto& r1, auto&& d) {
595  reducer(acc, unary_transform_op(r1));
596  d = acc;
597  };
598 
599  // TODO: more efficient scan implementation
600  auto combine_op = [=](auto&& acc1,
601  auto&& acc2,
602  ForwardIterator1 first_,
603  ForwardIterator1 mid_,
604  ForwardIterator1 last_,
605  ForwardIteratorD first_d_) {
606  // Add the left accumulator `acc1` to the right half of the region
607  auto dm = std::distance(first_, mid_);
608  auto dl = std::distance(first_, last_);
609  if constexpr (!is_global_iterator_v<ForwardIteratorD>) {
610  for_each(policy, std::next(first_d_, dm), std::next(first_d_, dl),
611  [=](auto&& acc_r) { reducer(acc1, acc_r); });
612  } else if constexpr (std::is_same_v<typename ForwardIteratorD::mode, checkout_mode::no_access_t>) {
613  for_each(policy, std::next(first_d_, dm), std::next(first_d_, dl),
614  [=](auto&& acc_r) { reducer(acc1, acc_r); });
615  } else {
616  // &*: convert global_iterator -> global_ref -> global_ptr
617  auto fd = make_global_iterator(&*first_d_, checkout_mode::read_write);
618  for_each(policy, std::next(fd, dm), std::next(fd, dl),
619  [=](auto&& acc_r) { reducer(acc1, acc_r); });
620  }
621  reducer(std::forward<decltype(acc1)>(acc1), std::forward<decltype(acc2)>(acc2));
622  };
623 
624  internal::reduce_generic(policy, accumulate_op, combine_op, reducer,
625  std::move(init), first1, last1, first_d);
626 
627  return std::next(first_d, std::distance(first1, last1));
628  }
629 }
630 
662 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIteratorD,
663  typename Reducer, typename UnaryTransformOp>
664 inline ForwardIteratorD transform_inclusive_scan(const ExecutionPolicy& policy,
665  ForwardIterator1 first1,
666  ForwardIterator1 last1,
667  ForwardIteratorD first_d,
668  Reducer reducer,
669  UnaryTransformOp unary_transform_op) {
670  return transform_inclusive_scan(policy, first1, last1, first_d, reducer,
671  unary_transform_op, reducer());
672 }
673 
721 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIteratorD,
722  typename Reducer>
723 inline ForwardIteratorD
724 inclusive_scan(const ExecutionPolicy& policy,
725  ForwardIterator1 first1,
726  ForwardIterator1 last1,
727  ForwardIteratorD first_d,
728  Reducer reducer,
729  typename Reducer::accumulator_type&& init) {
730  return transform_inclusive_scan(policy, first1, last1, first_d, reducer,
731  [](auto&& r) -> decltype(auto) { return std::forward<decltype(r)>(r); }, std::move(init));
732 }
733 
762 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIteratorD,
763  typename Reducer>
764 inline ForwardIteratorD inclusive_scan(const ExecutionPolicy& policy,
765  ForwardIterator1 first1,
766  ForwardIterator1 last1,
767  ForwardIteratorD first_d,
768  Reducer reducer) {
769  return inclusive_scan(policy, first1, last1, first_d, reducer, reducer());
770 }
771 
799 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIteratorD>
800 inline ForwardIteratorD inclusive_scan(const ExecutionPolicy& policy,
801  ForwardIterator1 first1,
802  ForwardIterator1 last1,
803  ForwardIteratorD first_d) {
804  using T = typename std::iterator_traits<ForwardIterator1>::value_type;
805  return inclusive_scan(policy, first1, last1, first_d, reducer::plus<T>{});
806 }
807 
808 ITYR_TEST_CASE("[ityr::pattern::parallel_reduce] inclusive scan") {
809  ito::init();
810  ori::init();
811 
812  long n = 100000;
813  ori::global_ptr<long> p1 = ori::malloc_coll<long>(n);
814  ori::global_ptr<long> p2 = ori::malloc_coll<long>(n);
815 
816  ito::root_exec([=] {
817  fill(execution::parallel_policy(100),
818  p1, p1 + n, 1);
819 
821  execution::parallel_policy(100),
822  p1, p1 + n, p2);
823 
824  ITYR_CHECK(p2[0].get() == 1);
825  ITYR_CHECK(p2[n - 1].get() == n);
826 
827  auto sum = reduce(
828  execution::parallel_policy(100),
829  p2, p2 + n);
830 
831  ITYR_CHECK(sum == n * (n + 1) / 2);
832 
834  execution::parallel_policy(100),
835  p1, p1 + n, p2, reducer::multiplies<long>{}, 10);
836 
837  ITYR_CHECK(p2[0].get() == 10);
838  ITYR_CHECK(p2[n - 1].get() == 10);
839 
841  execution::parallel_policy(100),
842  p1, p1 + n, p2, reducer::plus<long>{}, [](long x) { return x + 1; }, 10);
843 
844  ITYR_CHECK(p2[0].get() == 12);
845  ITYR_CHECK(p2[n - 1].get() == 10 + n * 2);
846  });
847 
848  ori::free_coll(p1);
849  ori::free_coll(p2);
850 
851  ori::fini();
852  ito::fini();
853 }
854 
885 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIterator2,
886  typename BinaryPredicate>
887 inline bool equal(const ExecutionPolicy& policy,
888  ForwardIterator1 first1,
889  ForwardIterator1 last1,
890  ForwardIterator2 first2,
891  BinaryPredicate pred) {
892  return transform_reduce(policy, first1, last1, first2, reducer::logical_and{}, pred);
893 }
894 
926 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIterator2,
927  typename BinaryPredicate>
928 inline bool equal(const ExecutionPolicy& policy,
929  ForwardIterator1 first1,
930  ForwardIterator1 last1,
931  ForwardIterator2 first2,
932  ForwardIterator2 last2,
933  BinaryPredicate pred) {
934  return std::distance(first1, last1) == std::distance(first2, last2) &&
935  equal(policy, first1, last1, first2, pred);
936 }
937 
952 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIterator2>
953 inline bool equal(const ExecutionPolicy& policy,
954  ForwardIterator1 first1,
955  ForwardIterator1 last1,
956  ForwardIterator2 first2) {
957  return equal(policy, first1, last1, first2, std::equal_to<>{});
958 }
959 
975 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIterator2>
976 inline bool equal(const ExecutionPolicy& policy,
977  ForwardIterator1 first1,
978  ForwardIterator1 last1,
979  ForwardIterator2 first2,
980  ForwardIterator2 last2) {
981  return equal(policy, first1, last1, first2, last2, std::equal_to<>{});
982 }
983 
984 ITYR_TEST_CASE("[ityr::pattern::parallel_reduce] equal") {
985  ito::init();
986  ori::init();
987 
988  long n = 100000;
989  ori::global_ptr<long> p1 = ori::malloc_coll<long>(n);
990  ori::global_ptr<long> p2 = ori::malloc_coll<long>(n);
991 
992  ito::root_exec([=] {
993  for_each(
994  execution::parallel_policy(100),
997  count_iterator<long>(0),
998  [=](long& v, long i) { v = i * 2; });
999 
1000  copy(execution::parallel_policy(100), p1, p1 + n, p2);
1001 
1002  ITYR_CHECK(equal(execution::parallel_policy(100),
1003  p1, p1 + n, p2) == true);
1004 
1005  ITYR_CHECK(equal(execution::parallel_policy(100),
1006  p1, p1 + n, p2, p2 + n) == true);
1007 
1008  ITYR_CHECK(equal(execution::parallel_policy(100),
1009  p1, p1 + n, p2, p2 + n - 1) == false);
1010 
1011  p2[n / 2].put(0);
1012 
1013  ITYR_CHECK(equal(execution::parallel_policy(100), p1, p1 + n, p2) == false);
1014  });
1015 
1016  ori::free_coll(p1);
1017  ori::free_coll(p2);
1018 
1019  ori::fini();
1020  ito::fini();
1021 }
1022 
1053 template <typename ExecutionPolicy, typename ForwardIterator, typename Compare>
1054 inline bool is_sorted(const ExecutionPolicy& policy,
1055  ForwardIterator first,
1056  ForwardIterator last,
1057  Compare comp) {
1058  // Check if comp(a(i+1), a(i)) returns false for all i
1059  return std::distance(first, last) <= 1 ||
1060  transform_reduce(policy, std::next(first), last, first,
1061  reducer::logical_or{}, comp) == false;
1062 }
1063 
1088 template <typename ExecutionPolicy, typename ForwardIterator>
1089 inline bool is_sorted(const ExecutionPolicy& policy,
1090  ForwardIterator first,
1091  ForwardIterator last) {
1092  return is_sorted(policy, first, last, std::less<>{});
1093 }
1094 
1095 ITYR_TEST_CASE("[ityr::pattern::parallel_reduce] is_sorted") {
1096  ito::init();
1097  ori::init();
1098 
1099  long n = 100000;
1100  ori::global_ptr<long> p = ori::malloc_coll<long>(n);
1101 
1102  ito::root_exec([=] {
1103  for_each(
1104  execution::parallel_policy(100),
1107  count_iterator<long>(0),
1108  [=](long& v, long i) { v = i / 3; });
1109 
1110  ITYR_CHECK(is_sorted(execution::parallel_policy(100),
1111  p, p + n) == true);
1112 
1113  ITYR_CHECK(is_sorted(execution::parallel_policy(100),
1114  p, p + n, std::greater<>{}) == false);
1115 
1116  ITYR_CHECK(is_sorted(execution::parallel_policy(100),
1119  std::greater<>{}) == true);
1120 
1121  p[n / 4].put(0);
1122 
1123  ITYR_CHECK(is_sorted(execution::parallel_policy(100),
1124  p, p + n) == false);
1125  });
1126 
1127  ori::free_coll(p);
1128 
1129  ori::fini();
1130  ito::fini();
1131 }
1132 
1133 }
#define ITYR_SUBCASE(name)
Definition: util.hpp:41
#define ITYR_CHECK(cond)
Definition: util.hpp:48
constexpr read_write_t read_write
Read+Write checkout mode.
Definition: checkout_span.hpp:39
constexpr read_t read
Read-only checkout mode.
Definition: checkout_span.hpp:19
constexpr no_access_t no_access
Checkout mode to disable automatic checkout.
Definition: checkout_span.hpp:48
constexpr write_t write
Write-only checkout mode.
Definition: checkout_span.hpp:29
constexpr parallel_policy par
Default parallel execution policy for iterator-based loop functions.
Definition: execution.hpp:89
void fini()
Definition: ito.hpp:45
auto root_exec(Fn &&fn, Args &&... args)
Definition: ito.hpp:50
void task_group_begin(task_group_data *tgdata)
Definition: ito.hpp:105
void init(MPI_Comm comm=MPI_COMM_WORLD)
Definition: ito.hpp:41
void poll(PreSuspendCallback &&pre_suspend_cb, PostSuspendCallback &&post_suspend_cb)
Definition: ito.hpp:96
constexpr with_callback_t with_callback
Definition: thread.hpp:11
void task_group_end(PreSuspendCallback &&pre_suspend_cb, PostSuspendCallback &&post_suspend_cb)
Definition: ito.hpp:112
scheduler::task_group_data task_group_data
Definition: ito.hpp:103
void fini()
Definition: ori.hpp:49
void get(global_ptr< ConstT > from_ptr, T *to_ptr, std::size_t count)
Definition: ori.hpp:80
void init(MPI_Comm comm=MPI_COMM_WORLD)
Definition: ori.hpp:45
auto release_lazy()
Definition: ori.hpp:200
void free_coll(global_ptr< T > ptr)
Definition: ori.hpp:70
core::instance::instance_type::release_handler release_handler
Definition: ori.hpp:204
void poll()
Definition: ori.hpp:224
void release()
Definition: ori.hpp:196
void acquire()
Definition: ori.hpp:206
Definition: allocator.hpp:16
global_reverse_iterator< global_iterator< T, Mode > > make_reverse_iterator(ori::global_ptr< T > gptr, Mode mode)
Make a reverse iterator for global memory.
Definition: global_iterator.hpp:333
ForwardIteratorD transform_inclusive_scan(const ExecutionPolicy &policy, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIteratorD first_d, Reducer reducer, UnaryTransformOp unary_transform_op, typename Reducer::accumulator_type &&init)
Calculate a prefix sum (inclusive scan) while transforming each element.
Definition: parallel_reduce.hpp:574
ForwardIteratorD copy(const ExecutionPolicy &policy, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIteratorD first_d)
Copy a range to another.
Definition: parallel_loop.hpp:856
void for_each(const ExecutionPolicy &policy, ForwardIterator first, ForwardIterator last, Op op)
Apply an operator to each element in a range.
Definition: parallel_loop.hpp:136
void fill(const ExecutionPolicy &policy, ForwardIterator first, ForwardIterator last, const T &value)
Fill a range with a given value.
Definition: parallel_loop.hpp:771
Reducer::accumulator_type transform_reduce(const ExecutionPolicy &policy, ForwardIterator first, ForwardIterator last, Reducer reducer, UnaryTransformOp unary_transform_op)
Calculate reduction while transforming each element.
Definition: parallel_reduce.hpp:167
void init(MPI_Comm comm=MPI_COMM_WORLD)
Initialize Itoyori (collective).
Definition: ityr.hpp:69
Reducer::accumulator_type reduce(const ExecutionPolicy &policy, ForwardIterator first, ForwardIterator last, Reducer reducer)
Calculate reduction.
Definition: parallel_reduce.hpp:340
global_iterator< T, Mode > make_global_iterator(ori::global_ptr< T > gptr, Mode)
Make a global iterator to enable/disable automatic checkout.
Definition: global_iterator.hpp:158
bool is_sorted(const ExecutionPolicy &policy, ForwardIterator first, ForwardIterator last, Compare comp)
Check if a range is sorted.
Definition: parallel_reduce.hpp:1054
ForwardIteratorD inclusive_scan(const ExecutionPolicy &policy, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIteratorD first_d, Reducer reducer, typename Reducer::accumulator_type &&init)
Calculate a prefix sum (inclusive scan).
Definition: parallel_reduce.hpp:724
bool equal(const ExecutionPolicy &policy, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, BinaryPredicate pred)
Check if two ranges have equal values.
Definition: parallel_reduce.hpp:887
ForwardIteratorD move(const ExecutionPolicy &policy, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIteratorD first_d)
Move a range to another.
Definition: parallel_loop.hpp:934
Definition: reducer.hpp:15