Itoyori  v0.0.1
parallel_loop.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include "ityr/common/util.hpp"
5 #include "ityr/ito/ito.hpp"
6 #include "ityr/ori/ori.hpp"
11 
12 namespace ityr {
13 
14 namespace internal {
15 
16 template <typename W, typename Op, typename ReleaseHandler,
17  typename ForwardIterator, typename... ForwardIterators>
18 inline void parallel_loop_generic(const execution::parallel_policy<W>& policy,
19  Op op,
20  ReleaseHandler rh,
21  ForwardIterator first,
22  ForwardIterator last,
23  ForwardIterators... firsts) {
24  ori::poll();
25 
26  // for immediately executing cross-worker tasks in ADWS
27  ito::poll([] { return ori::release_lazy(); },
28  [&](ori::release_handler rh_) { ori::acquire(rh); ori::acquire(rh_); });
29 
30  std::size_t d = std::distance(first, last);
31  if (d <= policy.cutoff_count) {
32  for_each_aux(
33  execution::internal::to_sequenced_policy(policy),
34  [&](auto&&... refs) {
35  op(std::forward<decltype(refs)>(refs)...);
36  },
37  first, last, firsts...);
38  return;
39  }
40 
41  auto mid = std::next(first, d / 2);
42 
43  ito::task_group_data tgdata;
44  ito::task_group_begin(&tgdata);
45 
46  auto&& [p1, p2] = execution::internal::get_child_policies(policy);
47 
48  ito::thread<void> th(
49  ito::with_callback, [=] { ori::acquire(rh); }, [] { ori::release(); },
50  execution::internal::get_workhint(policy),
51  [=, p1 = p1] {
52  parallel_loop_generic(p1, op, rh, first, mid, firsts...);
53  });
54 
55  parallel_loop_generic(p2, op, rh, mid, last, std::next(firsts, d / 2)...);
56 
57  if (!th.serialized()) {
58  ori::release();
59  }
60 
61  th.join();
62 
63  ito::task_group_end([] { ori::release(); }, [] { ori::acquire(); });
64 
65  // TODO: needed?
66  if (!th.serialized()) {
67  ori::acquire();
68  }
69 }
70 
71 template <typename Op, typename ForwardIterator, typename... ForwardIterators>
72 inline void loop_generic(const execution::sequenced_policy& policy,
73  Op op,
74  ForwardIterator first,
75  ForwardIterator last,
76  ForwardIterators... firsts) {
77  execution::internal::assert_policy(policy);
78  for_each_aux(
79  execution::internal::to_sequenced_policy(policy),
80  [&](auto&&... refs) {
81  op(std::forward<decltype(refs)>(refs)...);
82  },
83  first, last, firsts...);
84 }
85 
86 template <typename W, typename Op, typename ForwardIterator, typename... ForwardIterators>
87 inline void loop_generic(const execution::parallel_policy<W>& policy,
88  Op op,
89  ForwardIterator first,
90  ForwardIterator last,
91  ForwardIterators... firsts) {
92  execution::internal::assert_policy(policy);
93  auto rh = ori::release_lazy();
94  parallel_loop_generic(policy, op, rh, first, last, firsts...);
95 }
96 
97 }
98 
135 template <typename ExecutionPolicy, typename ForwardIterator, typename Op>
136 inline void for_each(const ExecutionPolicy& policy,
137  ForwardIterator first,
138  ForwardIterator last,
139  Op op) {
140  internal::loop_generic(policy, op, first, last);
141 }
142 
182 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIterator2, typename Op>
183 inline void for_each(const ExecutionPolicy& policy,
184  ForwardIterator1 first1,
185  ForwardIterator1 last1,
186  ForwardIterator2 first2,
187  Op op) {
188  internal::loop_generic(policy, op, first1, last1, first2);
189 }
190 
232 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIterator2,
233  typename ForwardIterator3, typename Op>
234 inline void for_each(const ExecutionPolicy& policy,
235  ForwardIterator1 first1,
236  ForwardIterator1 last1,
237  ForwardIterator2 first2,
238  ForwardIterator3 first3,
239  Op op) {
240  internal::loop_generic(policy, op, first1, last1, first2, first3);
241 }
242 
246 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIterator2,
247  typename ForwardIterator3, typename ForwardIterator4, typename Op>
248 inline void for_each(const ExecutionPolicy& policy,
249  ForwardIterator1 first1,
250  ForwardIterator1 last1,
251  ForwardIterator2 first2,
252  ForwardIterator3 first3,
253  ForwardIterator4 first4,
254  Op op) {
255  internal::loop_generic(policy, op, first1, last1, first2, first3, first4);
256 }
257 
261 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIterator2,
262  typename ForwardIterator3, typename ForwardIterator4, typename ForwardIterator5, typename Op>
263 inline void for_each(const ExecutionPolicy& policy,
264  ForwardIterator1 first1,
265  ForwardIterator1 last1,
266  ForwardIterator2 first2,
267  ForwardIterator3 first3,
268  ForwardIterator4 first4,
269  ForwardIterator5 first5,
270  Op op) {
271  internal::loop_generic(policy, op, first1, last1, first2, first3, first4, first5);
272 }
273 
277 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIterator2,
278  typename ForwardIterator3, typename ForwardIterator4, typename ForwardIterator5,
279  typename ForwardIterator6, typename Op>
280 inline void for_each(const ExecutionPolicy& policy,
281  ForwardIterator1 first1,
282  ForwardIterator1 last1,
283  ForwardIterator2 first2,
284  ForwardIterator3 first3,
285  ForwardIterator4 first4,
286  ForwardIterator5 first5,
287  ForwardIterator6 first6,
288  Op op) {
289  internal::loop_generic(policy, op, first1, last1, first2, first3, first4, first5, first6);
290 }
291 
295 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIterator2,
296  typename ForwardIterator3, typename ForwardIterator4, typename ForwardIterator5,
297  typename ForwardIterator6, typename ForwardIterator7, typename Op>
298 inline void for_each(const ExecutionPolicy& policy,
299  ForwardIterator1 first1,
300  ForwardIterator1 last1,
301  ForwardIterator2 first2,
302  ForwardIterator3 first3,
303  ForwardIterator4 first4,
304  ForwardIterator5 first5,
305  ForwardIterator6 first6,
306  ForwardIterator6 first7,
307  Op op) {
308  internal::loop_generic(policy, op, first1, last1, first2, first3, first4, first5, first6, first7);
309 }
310 
314 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIterator2,
315  typename ForwardIterator3, typename ForwardIterator4, typename ForwardIterator5,
316  typename ForwardIterator6, typename ForwardIterator7, typename ForwardIterator8, typename Op>
317 inline void for_each(const ExecutionPolicy& policy,
318  ForwardIterator1 first1,
319  ForwardIterator1 last1,
320  ForwardIterator2 first2,
321  ForwardIterator3 first3,
322  ForwardIterator4 first4,
323  ForwardIterator5 first5,
324  ForwardIterator6 first6,
325  ForwardIterator6 first7,
326  ForwardIterator6 first8,
327  Op op) {
328  internal::loop_generic(policy, op, first1, last1, first2, first3, first4, first5, first6, first7, first8);
329 }
330 
331 ITYR_TEST_CASE("[ityr::pattern::serial_loop] serial for_each") {
332  ori::init();
333 
334  long n = 100000;
335 
336  ITYR_SUBCASE("without global_ptr") {
337  ITYR_SUBCASE("count iterator") {
338  long count = 0;
339  for_each(
341  count_iterator<long>(0),
342  count_iterator<long>(n),
343  [&](long i) { count += i; });
344  ITYR_CHECK(count == n * (n - 1) / 2);
345 
346  count = 0;
347  for_each(
349  count_iterator<long>(0),
350  count_iterator<long>(n),
351  count_iterator<long>(n),
352  [&](long i, long j) { count += i + j; });
353  ITYR_CHECK(count == 2 * n * (2 * n - 1) / 2);
354 
355  count = 0;
356  for_each(
358  count_iterator<long>(0),
359  count_iterator<long>(n),
360  count_iterator<long>(n),
361  count_iterator<long>(2 * n),
362  [&](long i, long j, long k) { count += i + j + k; });
363  ITYR_CHECK(count == 3 * n * (3 * n - 1) / 2);
364  }
365 
366  ITYR_SUBCASE("vector copy") {
367  std::vector<long> mos1(count_iterator<long>(0),
368  count_iterator<long>(n));
369 
370  std::vector<long> mos2;
371  for_each(
373  mos1.begin(), mos1.end(),
374  std::back_inserter(mos2),
375  [&](long i, auto&& out) { out = i; });
376 
377  long count = 0;
378  for_each(
380  mos2.begin(), mos2.end(),
381  [&](long i) { count += i; });
382  ITYR_CHECK(count == n * (n - 1) / 2);
383  }
384 
385  ITYR_SUBCASE("move iterator with vector") {
386  std::vector<common::move_only_t> mos1(count_iterator<long>(0),
387  count_iterator<long>(n));
388 
389  std::vector<common::move_only_t> mos2;
390  for_each(
392  std::make_move_iterator(mos1.begin()),
393  std::make_move_iterator(mos1.end()),
394  std::back_inserter(mos2),
395  [&](common::move_only_t&& in, auto&& out) { out = std::move(in); });
396 
397  long count = 0;
398  for_each(
400  mos2.begin(), mos2.end(),
401  [&](common::move_only_t& mo) { count += mo.value(); });
402  ITYR_CHECK(count == n * (n - 1) / 2);
403 
404  for_each(
406  mos1.begin(), mos1.end(),
407  [&](common::move_only_t& mo) { ITYR_CHECK(mo.value() == -1); });
408  }
409  }
410 
411  ITYR_SUBCASE("with global_ptr") {
412  ori::global_ptr<long> gp = ori::malloc<long>(n);
413 
414  for_each(
416  count_iterator<long>(0),
417  count_iterator<long>(n),
419  [&](long i, long& out) { new (&out) long(i); });
420 
421  ITYR_SUBCASE("read array without global_iterator") {
422  long count = 0;
423  for_each(
425  gp,
426  gp + n,
427  [&](ori::global_ref<long> gr) { count += gr; });
428  ITYR_CHECK(count == n * (n - 1) / 2);
429  }
430 
431  ITYR_SUBCASE("read array with global_iterator") {
432  long count = 0;
433  for_each(
437  [&](long i) { count += i; });
438  ITYR_CHECK(count == n * (n - 1) / 2);
439  }
440 
441  ITYR_SUBCASE("move iterator") {
442  ori::global_ptr<common::move_only_t> mos1 = ori::malloc<common::move_only_t>(n);
443  ori::global_ptr<common::move_only_t> mos2 = ori::malloc<common::move_only_t>(n);
444 
445  for_each(
450  [&](long i, common::move_only_t& out) { new (&out) common::move_only_t(i); });
451 
452  for_each(
454  make_move_iterator(mos1),
455  make_move_iterator(mos1 + n),
457  [&](common::move_only_t&& in, common::move_only_t& out) { new (&out) common::move_only_t(std::move(in)); });
458 
459  long count = 0;
460  for_each(
464  [&](const common::move_only_t& mo) { count += mo.value(); });
465  ITYR_CHECK(count == n * (n - 1) / 2);
466 
467  for_each(
471  [&](const common::move_only_t& mo) { ITYR_CHECK(mo.value() == -1); });
472 
473  ori::free(mos1, n);
474  ori::free(mos2, n);
475  }
476 
477  ori::free(gp, n);
478  }
479 
480  ori::fini();
481 }
482 
483 ITYR_TEST_CASE("[ityr::pattern::parallel_loop] parallel for_each") {
484  ito::init();
485  ori::init();
486 
487  int n = 100000;
488  ori::global_ptr<int> p1 = ori::malloc_coll<int>(n);
489  ori::global_ptr<int> p2 = ori::malloc_coll<int>(n);
490 
491  ito::root_exec([=] {
492  int count = 0;
493  for_each(
494  execution::sequenced_policy(100),
497  [&](int& v) { v = count++; });
498 
499  for_each(
503  count_iterator<int>(0),
504  [=](int x, int i) { ITYR_CHECK(x == i); });
505 
506  for_each(
508  count_iterator<int>(0),
509  count_iterator<int>(n),
511  [=](int i, int x) { ITYR_CHECK(x == i); });
512 
513  for_each(
518  [=](int x, int& y) { y = x * 2; });
519 
520  for_each(
524  [=](int& y) { y *= 2; });
525 
526  for_each(
528  count_iterator<int>(0),
529  count_iterator<int>(n),
531  [=](int i, int y) { ITYR_CHECK(y == i * 4); });
532  });
533 
534  ori::free_coll(p1);
535  ori::free_coll(p2);
536 
537  ori::fini();
538  ito::fini();
539 }
540 
581 template <typename ExecutionPolicy, typename ForwardIterator1,
582  typename ForwardIteratorD, typename UnaryOp>
583 inline ForwardIteratorD transform(const ExecutionPolicy& policy,
584  ForwardIterator1 first1,
585  ForwardIterator1 last1,
586  ForwardIteratorD first_d,
587  UnaryOp unary_op) {
588  if constexpr (ori::is_global_ptr_v<ForwardIterator1> ||
589  ori::is_global_ptr_v<ForwardIteratorD>) {
590  using value_type_d = typename std::iterator_traits<ForwardIteratorD>::value_type;
591  return transform(
592  policy,
593  internal::convert_to_global_iterator(first1 , checkout_mode::read),
594  internal::convert_to_global_iterator(last1 , checkout_mode::read),
595  internal::convert_to_global_iterator(first_d, internal::dest_checkout_mode_t<value_type_d>{}),
596  unary_op);
597 
598  } else {
599  auto op = [=](const auto& r1, auto&& d) {
600  d = unary_op(r1);
601  };
602 
603  internal::loop_generic(policy, op, first1, last1, first_d);
604 
605  return std::next(first_d, std::distance(first1, last1));
606  }
607 }
608 
653 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIterator2,
654  typename ForwardIteratorD, typename BinaryOp>
655 inline ForwardIteratorD transform(const ExecutionPolicy& policy,
656  ForwardIterator1 first1,
657  ForwardIterator1 last1,
658  ForwardIterator2 first2,
659  ForwardIteratorD first_d,
660  BinaryOp binary_op) {
661  if constexpr (ori::is_global_ptr_v<ForwardIterator1> ||
662  ori::is_global_ptr_v<ForwardIterator2> ||
663  ori::is_global_ptr_v<ForwardIteratorD>) {
664  using value_type_d = typename std::iterator_traits<ForwardIteratorD>::value_type;
665  return transform(
666  policy,
667  internal::convert_to_global_iterator(first1 , checkout_mode::read),
668  internal::convert_to_global_iterator(last1 , checkout_mode::read),
669  internal::convert_to_global_iterator(first2 , checkout_mode::read),
670  internal::convert_to_global_iterator(first_d, internal::dest_checkout_mode_t<value_type_d>{}),
671  binary_op);
672 
673  } else {
674  auto op = [=](const auto& r1, const auto& r2, auto&& d) {
675  d = binary_op(r1, r2);
676  };
677 
678  internal::loop_generic(policy, op, first1, last1, first2, first_d);
679 
680  return std::next(first_d, std::distance(first1, last1));
681  }
682 }
683 
684 ITYR_TEST_CASE("[ityr::pattern::parallel_loop] transform") {
685  ito::init();
686  ori::init();
687 
688  long n = 100000;
689  ori::global_ptr<long> p1 = ori::malloc_coll<long>(n);
690  ori::global_ptr<long> p2 = ori::malloc_coll<long>(n);
691 
692  ITYR_SUBCASE("parallel") {
693  ito::root_exec([=] {
694  auto r = transform(
695  execution::parallel_policy(100),
696  count_iterator<long>(0), count_iterator<long>(n), p1,
697  [](long i) { return i * 2; });
698  ITYR_CHECK(r == p1 + n);
699 
700  transform(
701  execution::parallel_policy(100),
702  count_iterator<long>(0), count_iterator<long>(n), p1, p2,
703  [](long i, long j) { return i * j; });
704 
705  for_each(
706  execution::parallel_policy(100),
709  count_iterator<long>(0),
710  [=](long v, long i) { ITYR_CHECK(v == i * i * 2); });
711  });
712  }
713 
714  ITYR_SUBCASE("serial") {
715  ito::root_exec([=] {
716  auto ret = transform(
717  execution::sequenced_policy(100),
718  count_iterator<long>(0), count_iterator<long>(n), p1,
719  [](long i) { return i * 2; });
720  ITYR_CHECK(ret == p1 + n);
721 
722  transform(
723  execution::sequenced_policy(100),
724  count_iterator<long>(0), count_iterator<long>(n), p1, p2,
725  [](long i, long j) { return i * j; });
726 
727  for_each(
728  execution::sequenced_policy(100),
731  count_iterator<long>(0),
732  [=](long v, long i) { ITYR_CHECK(v == i * i * 2); });
733  });
734  }
735 
736  ori::free_coll(p1);
737  ori::free_coll(p2);
738 
739  ori::fini();
740  ito::fini();
741 }
742 
770 template <typename ExecutionPolicy, typename ForwardIterator, typename T>
771 inline void fill(const ExecutionPolicy& policy,
772  ForwardIterator first,
773  ForwardIterator last,
774  const T& value) {
775  if constexpr (ori::is_global_ptr_v<ForwardIterator>) {
776  using value_type = typename std::iterator_traits<ForwardIterator>::value_type;
777  fill(
778  policy,
779  internal::convert_to_global_iterator(first, internal::dest_checkout_mode_t<value_type>{}),
780  internal::convert_to_global_iterator(last , internal::dest_checkout_mode_t<value_type>{}),
781  value);
782 
783  } else {
784  auto op = [=](auto&& d) {
785  d = value;
786  };
787 
788  internal::loop_generic(policy, op, first, last);
789  }
790 }
791 
792 ITYR_TEST_CASE("[ityr::pattern::parallel_loop] fill") {
793  ito::init();
794  ori::init();
795 
796  long n = 100000;
797  ori::global_ptr<long> p = ori::malloc_coll<long>(n);
798 
799  ito::root_exec([=] {
800  long val = 33;
801  fill(execution::parallel_policy(100), p, p + n, val);
802 
803  for_each(
804  execution::parallel_policy(100),
807  [=](long v) { ITYR_CHECK(v == val); });
808  });
809 
810  ori::free_coll(p);
811 
812  ori::fini();
813  ito::fini();
814 }
815 
855 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIteratorD>
856 inline ForwardIteratorD copy(const ExecutionPolicy& policy,
857  ForwardIterator1 first1,
858  ForwardIterator1 last1,
859  ForwardIteratorD first_d) {
860  if constexpr (ori::is_global_ptr_v<ForwardIterator1> ||
861  ori::is_global_ptr_v<ForwardIteratorD>) {
862  using value_type1 = typename std::iterator_traits<ForwardIterator1>::value_type;
863  using value_type_d = typename std::iterator_traits<ForwardIteratorD>::value_type;
864  return copy(
865  policy,
866  internal::convert_to_global_iterator(first1 , internal::src_checkout_mode_t<value_type1>{}),
867  internal::convert_to_global_iterator(last1 , internal::src_checkout_mode_t<value_type1>{}),
868  internal::convert_to_global_iterator(first_d, internal::dest_checkout_mode_t<value_type_d>{}));
869 
870  } else {
871  auto op = [=](auto&& r1, auto&& d) {
872  d = std::forward<decltype(r1)>(r1);
873  };
874 
875  internal::loop_generic(policy, op, first1, last1, first_d);
876 
877  return std::next(first_d, std::distance(first1, last1));
878  }
879 }
880 
881 ITYR_TEST_CASE("[ityr::pattern::parallel_loop] copy") {
882  ito::init();
883  ori::init();
884 
885  long n = 100000;
886  ori::global_ptr<long> p1 = ori::malloc_coll<long>(n);
887  ori::global_ptr<long> p2 = ori::malloc_coll<long>(n);
888 
889  ito::root_exec([=] {
890  for_each(
891  execution::parallel_policy(100),
894  count_iterator<long>(0),
895  [=](long& v, long i) { v = i * 2; });
896 
897  copy(execution::parallel_policy(100), p1, p1 + n, p2);
898 
899  for_each(
900  execution::parallel_policy(100),
903  count_iterator<long>(0),
904  [=](long v, long i) { ITYR_CHECK(v == i * 2); });
905  });
906 
907  ori::free_coll(p1);
908  ori::free_coll(p2);
909 
910  ori::fini();
911  ito::fini();
912 }
913 
933 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIteratorD>
934 inline ForwardIteratorD move(const ExecutionPolicy& policy,
935  ForwardIterator1 first1,
936  ForwardIterator1 last1,
937  ForwardIteratorD first_d) {
938  if constexpr (ori::is_global_ptr_v<ForwardIterator1> ||
939  ori::is_global_ptr_v<ForwardIteratorD>) {
940  using value_type1 = typename std::iterator_traits<ForwardIterator1>::value_type;
941  using value_type_d = typename std::iterator_traits<ForwardIteratorD>::value_type;
942  return move(
943  policy,
944  internal::convert_to_global_iterator(first1 , internal::src_checkout_mode_t<value_type1>{}),
945  internal::convert_to_global_iterator(last1 , internal::src_checkout_mode_t<value_type1>{}),
946  internal::convert_to_global_iterator(first_d, internal::dest_checkout_mode_t<value_type_d>{}));
947 
948  } else {
950  return copy(policy, make_move_iterator(first1), make_move_iterator(last1), first_d);
951  }
952 }
953 
954 ITYR_TEST_CASE("[ityr::pattern::parallel_loop] move") {
955  ito::init();
956  ori::init();
957 
958  long n = 100000;
959  ori::global_ptr<common::move_only_t> p1 = ori::malloc_coll<common::move_only_t>(n);
960  ori::global_ptr<common::move_only_t> p2 = ori::malloc_coll<common::move_only_t>(n);
961 
962  ito::root_exec([=] {
963  for_each(
964  execution::parallel_policy(100),
967  count_iterator<long>(0),
968  [=](common::move_only_t& r, long i) { new (&r) common::move_only_t(i * 2); });
969 
970  move(execution::parallel_policy(100), p1, p1 + n, p2);
971 
972  for_each(
973  execution::parallel_policy(100),
976  count_iterator<long>(0),
977  [=](const common::move_only_t& r, long i) { ITYR_CHECK(r.value() == i * 2); });
978  });
979 
980  ori::free_coll(p1);
981  ori::free_coll(p2);
982 
983  ori::fini();
984  ito::fini();
985 }
986 
1013 template <typename ExecutionPolicy, typename BidirectionalIterator>
1014 inline void reverse(const ExecutionPolicy& policy,
1015  BidirectionalIterator first,
1016  BidirectionalIterator last) {
1017  if constexpr (ori::is_global_ptr_v<BidirectionalIterator>) {
1018  return reverse(
1019  policy,
1020  internal::convert_to_global_iterator(first, checkout_mode::read_write),
1021  internal::convert_to_global_iterator(last , checkout_mode::read_write));
1022 
1023  } else {
1024  auto op = [=](auto&& r1, auto&& r2) {
1025  using std::swap;
1026  swap(r1, r2);
1027  };
1028 
1030  auto d = std::distance(first, last);
1031  internal::loop_generic(policy, op, first, std::next(first, d / 2), make_reverse_iterator(last));
1032  }
1033 }
1034 
1055 template <typename ExecutionPolicy, typename BidirectionalIterator1, typename BidirectionalIteratorD>
1056 inline BidirectionalIteratorD reverse_copy(const ExecutionPolicy& policy,
1057  BidirectionalIterator1 first1,
1058  BidirectionalIterator1 last1,
1059  BidirectionalIteratorD first_d) {
1061  return copy(policy, make_reverse_iterator(last1), make_reverse_iterator(first1), first_d);
1062 }
1063 
1064 ITYR_TEST_CASE("[ityr::pattern::parallel_loop] reverse") {
1065  ito::init();
1066  ori::init();
1067 
1068  long n = 100000;
1069  ori::global_ptr<long> p1 = ori::malloc_coll<long>(n);
1070  ori::global_ptr<long> p2 = ori::malloc_coll<long>(n);
1071 
1072  ito::root_exec([=] {
1073  for_each(
1074  execution::parallel_policy(100),
1077  count_iterator<long>(0),
1078  [=](long& v, long i) { v = i; });
1079 
1080  reverse(execution::parallel_policy(100), p1, p1 + n);
1081 
1082  reverse_copy(execution::parallel_policy(100), p1, p1 + n, p2);
1083 
1084  for_each(
1085  execution::parallel_policy(100),
1089  count_iterator<long>(0),
1090  [=](long v1, long v2, long i) {
1091  ITYR_CHECK(v1 == n - i - 1);
1092  ITYR_CHECK(v2 == i);
1093  });
1094  });
1095 
1096  ori::free_coll(p1);
1097  ori::free_coll(p2);
1098 
1099  ori::fini();
1100  ito::fini();
1101 }
1102 
1134 template <typename ExecutionPolicy, typename BidirectionalIterator>
1135 inline BidirectionalIterator rotate(const ExecutionPolicy& policy,
1136  BidirectionalIterator first,
1137  BidirectionalIterator middle,
1138  BidirectionalIterator last) {
1139  // TODO: implement a version with ForwardIterator
1140  if (first == middle) return last;
1141  if (middle == last) return first;
1142 
1144  [=] { reverse(policy, first, middle); },
1145  [=] { reverse(policy, middle, last); });
1146  reverse(policy, first, last);
1147 
1148  return std::next(first, std::distance(middle, last));
1149 }
1150 
1187 template <typename ExecutionPolicy, typename ForwardIterator1, typename ForwardIteratorD>
1188 inline ForwardIteratorD rotate_copy(const ExecutionPolicy& policy,
1189  ForwardIterator1 first1,
1190  ForwardIterator1 middle1,
1191  ForwardIterator1 last1,
1192  ForwardIteratorD first_d) {
1193  auto [_, last_d] = parallel_invoke(
1194  [=] { copy(policy, middle1, last1, first_d); },
1195  [=] { auto middle_d = std::next(first_d, std::distance(middle1, last1));
1196  return copy(policy, first1, middle1, middle_d); });
1197  return last_d;
1198 }
1199 
1200 ITYR_TEST_CASE("[ityr::pattern::parallel_loop] rotate") {
1201  ito::init();
1202  ori::init();
1203 
1204  long n = 100000;
1205  ori::global_ptr<long> p1 = ori::malloc_coll<long>(n);
1206  ori::global_ptr<long> p2 = ori::malloc_coll<long>(n);
1207 
1208  ito::root_exec([=] {
1209  for_each(
1210  execution::parallel_policy(100),
1213  count_iterator<long>(0),
1214  [=](long& v, long i) { v = i; });
1215 
1216  long shift = n / 3;
1217  rotate(execution::parallel_policy(100), p1, p1 + shift, p1 + n);
1218 
1219  rotate_copy(
1220  execution::parallel_policy(100),
1221  p1, p1 + (n - shift), p1 + n, p2);
1222 
1223  for_each(
1224  execution::parallel_policy(100),
1228  count_iterator<long>(0),
1229  [=](long v1, long v2, long i) {
1230  ITYR_CHECK(v1 == (i + shift) % n);
1231  ITYR_CHECK(v2 == i);
1232  });
1233  });
1234 
1235  ori::free_coll(p1);
1236  ori::free_coll(p2);
1237 
1238  ori::fini();
1239  ito::fini();
1240 }
1241 
1242 }
#define ITYR_SUBCASE(name)
Definition: util.hpp:41
#define ITYR_CHECK(cond)
Definition: util.hpp:48
constexpr read_write_t read_write
Read+Write checkout mode.
Definition: checkout_span.hpp:39
constexpr read_t read
Read-only checkout mode.
Definition: checkout_span.hpp:19
constexpr write_t write
Write-only checkout mode.
Definition: checkout_span.hpp:29
constexpr parallel_policy par
Default parallel execution policy for iterator-based loop functions.
Definition: execution.hpp:89
constexpr sequenced_policy seq
Default serial execution policy for iterator-based loop functions.
Definition: execution.hpp:83
void fini()
Definition: ito.hpp:45
auto root_exec(Fn &&fn, Args &&... args)
Definition: ito.hpp:50
void task_group_begin(task_group_data *tgdata)
Definition: ito.hpp:105
void init(MPI_Comm comm=MPI_COMM_WORLD)
Definition: ito.hpp:41
void poll(PreSuspendCallback &&pre_suspend_cb, PostSuspendCallback &&post_suspend_cb)
Definition: ito.hpp:96
constexpr with_callback_t with_callback
Definition: thread.hpp:11
void task_group_end(PreSuspendCallback &&pre_suspend_cb, PostSuspendCallback &&post_suspend_cb)
Definition: ito.hpp:112
scheduler::task_group_data task_group_data
Definition: ito.hpp:103
void fini()
Definition: ori.hpp:49
void init(MPI_Comm comm=MPI_COMM_WORLD)
Definition: ori.hpp:45
void free(global_ptr< T > ptr, std::size_t count)
Definition: ori.hpp:75
auto release_lazy()
Definition: ori.hpp:200
void free_coll(global_ptr< T > ptr)
Definition: ori.hpp:70
core::instance::instance_type::release_handler release_handler
Definition: ori.hpp:204
void poll()
Definition: ori.hpp:224
void release()
Definition: ori.hpp:196
void acquire()
Definition: ori.hpp:206
Definition: allocator.hpp:16
global_reverse_iterator< global_iterator< T, Mode > > make_reverse_iterator(ori::global_ptr< T > gptr, Mode mode)
Make a reverse iterator for global memory.
Definition: global_iterator.hpp:333
BidirectionalIterator rotate(const ExecutionPolicy &policy, BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last)
Rotate a range.
Definition: parallel_loop.hpp:1135
BidirectionalIteratorD reverse_copy(const ExecutionPolicy &policy, BidirectionalIterator1 first1, BidirectionalIterator1 last1, BidirectionalIteratorD first_d)
Copy a reversed range to another.
Definition: parallel_loop.hpp:1056
ForwardIteratorD copy(const ExecutionPolicy &policy, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIteratorD first_d)
Copy a range to another.
Definition: parallel_loop.hpp:856
auto parallel_invoke(Args &&... args)
Fork parallel tasks and join them.
Definition: parallel_invoke.hpp:238
ForwardIteratorD transform(const ExecutionPolicy &policy, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIteratorD first_d, UnaryOp unary_op)
Transform elements in a given range and store them in another range.
Definition: parallel_loop.hpp:583
ForwardIteratorD rotate_copy(const ExecutionPolicy &policy, ForwardIterator1 first1, ForwardIterator1 middle1, ForwardIterator1 last1, ForwardIteratorD first_d)
Copy a rotated range to another.
Definition: parallel_loop.hpp:1188
void for_each(const ExecutionPolicy &policy, ForwardIterator first, ForwardIterator last, Op op)
Apply an operator to each element in a range.
Definition: parallel_loop.hpp:136
void fill(const ExecutionPolicy &policy, ForwardIterator first, ForwardIterator last, const T &value)
Fill a range with a given value.
Definition: parallel_loop.hpp:771
void swap(global_vector< T > &v1, global_vector< T > &v2) noexcept
Definition: global_vector.hpp:664
void reverse(const ExecutionPolicy &policy, BidirectionalIterator first, BidirectionalIterator last)
Reverse a range.
Definition: parallel_loop.hpp:1014
global_iterator< T, Mode > make_global_iterator(ori::global_ptr< T > gptr, Mode)
Make a global iterator to enable/disable automatic checkout.
Definition: global_iterator.hpp:158
global_move_iterator< global_iterator< T, internal::src_checkout_mode_t< T > > > make_move_iterator(ori::global_ptr< T > gptr)
Make a global iterator for moving objects.
Definition: global_iterator.hpp:258
ForwardIteratorD move(const ExecutionPolicy &policy, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIteratorD first_d)
Move a range to another.
Definition: parallel_loop.hpp:934