12 template <
typename RandomAccessIterator1,
typename RandomAccessIterator2,
typename Compare>
13 inline std::pair<RandomAccessIterator1, RandomAccessIterator2>
14 find_split_points_for_merge(RandomAccessIterator1 first1,
15 RandomAccessIterator1 last1,
16 RandomAccessIterator2 first2,
17 RandomAccessIterator2 last2,
19 auto n1 = std::distance(first1, last1);
20 auto n2 = std::distance(first2, last2);
26 auto [p2, p1] = find_split_points_for_merge(first2, last2, first1, last1, comp);
27 return std::make_pair(p1, p2);
30 auto m = (n1 + n2) / 2;
33 RandomAccessIterator2 it2 = std::next(first2, m);
35 auto&& [css, its] = checkout_global_iterators(1, first1, it2);
36 auto [it1, it2r] = its;
38 if (comp(*it1, *it2r)) {
39 return std::make_pair(last1, it2);
41 return std::make_pair(first1, it2);
46 RandomAccessIterator2 low = first2;
47 RandomAccessIterator2 high = last2;
52 RandomAccessIterator2 it2 = std::next(low, std::distance(low, high) / 2);
54 auto c2 = std::distance(first2, it2);
57 auto&& [css, its] = checkout_global_iterators(1, first1, std::prev(it2));
58 auto [it1r, it2l] = its;
60 if (comp(*it1r, *it2l)) {
65 return std::make_pair(first1, it2);
68 }
else if (m - c2 >= n1) {
70 auto&& [css, its] = checkout_global_iterators(1, std::prev(last1), it2);
71 auto [it1l, it2r] = its;
73 if (comp(*it2r, *it1l)) {
78 return std::make_pair(last1, it2);
83 RandomAccessIterator1 it1 = std::next(first1, m - c2);
90 auto&& [css, its] = checkout_global_iterators(2, std::prev(it1), std::prev(it2));
91 auto [it1_, it2_] = its;
94 auto it1r = std::next(it1_);
96 auto it2r = std::next(it2_);
98 if (comp(*it2r, *it1l)) {
100 low = std::next(it2);
102 }
else if (comp(*it1r, *it2l)) {
107 return std::make_pair(it1, it2);
113 ITYR_TEST_CASE(
"[ityr::pattern::parallel_merge] find_split_points_for_merge") {
117 auto check_fn = [](std::vector<int> v1, std::vector<int> v2) {
118 auto [it1, it2] = find_split_points_for_merge(v1.begin(), v1.end(), v2.begin(), v2.end(), std::less<>{});
119 if (it1 != v1.begin() && it2 != v2.end()) {
122 if (it2 != v2.begin() && it1 != v1.end()) {
125 ITYR_CHECK(!(it1 == v1.begin() && it2 == v2.begin()));
126 ITYR_CHECK(!(it1 == v1.end() && it2 == v2.end()));
129 check_fn({0}, {1, 2, 3, 4, 5});
130 check_fn({2}, {1, 2, 3, 4, 5});
131 check_fn({3}, {1, 2, 3, 4, 5});
132 check_fn({6}, {1, 2, 3, 4, 5});
133 check_fn({1, 4}, {1, 2, 3, 4, 5});
134 check_fn({2, 3}, {1, 2, 3, 4, 5});
135 check_fn({0, 6}, {1, 2, 3, 4, 5});
136 check_fn({0, 1}, {2, 2, 2, 4, 5});
137 check_fn({4, 5}, {2, 2, 2, 2, 3});
138 check_fn({3, 3}, {3, 3, 3, 3, 3, 3, 3});
139 check_fn({3, 4}, {2, 2, 3, 3, 3, 3, 4});
140 check_fn({1, 2, 3, 4, 5}, {1, 2, 3, 4, 5});
141 check_fn({1, 2, 3, 4, 5}, {4, 5, 6, 7, 8});
142 check_fn({1, 2, 3, 4, 5}, {6, 7, 8, 9, 10});
143 check_fn({6, 7, 8, 9, 10}, {1, 2, 3, 4, 5});
144 check_fn({0, 0, 0, 0, 0}, {0, 0, 0, 0, 0});
150 template <
bool Stable,
typename W,
typename RandomAccessIterator,
typename Compare>
151 inline void inplace_merge_aux(
const execution::parallel_policy<W>& policy,
152 RandomAccessIterator first,
153 RandomAccessIterator middle,
154 RandomAccessIterator last,
157 std::size_t d = std::distance(first, last);
159 if (d <= 1 || first == middle || middle == last)
return;
161 if (d <= policy.cutoff_count) {
163 ITYR_CHECK(policy.cutoff_count == policy.checkout_count);
165 auto&& [css, its] = checkout_global_iterators(d, first);
166 auto&& first_ = std::get<0>(its);
168 std::next(first_, std::distance(first, middle)),
169 std::next(first_, d),
174 auto comp_mids = [&]{
175 auto&& [css, its] = checkout_global_iterators(2, std::prev(middle));
176 auto mids = std::get<0>(its);
177 return !comp(*std::next(mids), *mids);
185 auto comp_ends = [&]{
186 auto&& [css, its] = checkout_global_iterators(1, std::prev(last), first);
194 rotate(policy, first, middle, last);
198 auto [s1, s2] = find_split_points_for_merge(first, middle, middle, last, comp);
200 if constexpr (Stable) {
201 if (s1 != middle && s2 != middle) {
207 auto&& [css, its] = checkout_global_iterators(1, s1, std::prev(s2));
208 auto [it1r, it2l] = its;
209 if (!comp(*it1r, *it2l) && !comp(*it2l, *it1r)) {
211 using value_type =
typename std::iterator_traits<RandomAccessIterator>::value_type;
215 s1 = std::partition_point(s1, middle,
216 [&, it = it1r](
const value_type& r) {
return !comp(*it, r); });
221 s2 = std::partition_point(middle, s2,
222 [&, it = it2l](
const value_type& r) {
return comp(r, *it); });
228 auto m =
rotate(policy, s1, middle, s2);
234 [=, s1 = s1] { inplace_merge_aux<Stable>(policy, first, s1, m, comp); },
235 [=, s2 = s2] { inplace_merge_aux<Stable>(policy, m, s2, last, comp); });
272 template <
typename ExecutionPolicy,
typename RandomAccessIterator,
typename Compare>
274 RandomAccessIterator first,
275 RandomAccessIterator middle,
276 RandomAccessIterator last,
278 if constexpr (ori::is_global_ptr_v<RandomAccessIterator>) {
287 internal::inplace_merge_aux<true>(policy, first, middle, last, comp);
315 template <
typename ExecutionPolicy,
typename RandomAccessIterator>
317 RandomAccessIterator first,
318 RandomAccessIterator middle,
319 RandomAccessIterator last) {
323 ITYR_TEST_CASE(
"[ityr::pattern::parallel_merge] inplace_merge") {
329 ori::global_ptr<long> p = ori::malloc_coll<long>(n);
335 execution::parallel_policy(100),
336 count_iterator<long>(0), count_iterator<long>(m), p,
337 [=](
long i) {
return i; });
340 execution::parallel_policy(100),
341 count_iterator<long>(0), count_iterator<long>(n - m), p + m,
342 [=](
long i) {
return i; });
348 inplace_merge(execution::parallel_policy(100), p, p + m, p + n);
365 ori::global_ptr<item> p = ori::malloc_coll<item>(n);
371 execution::parallel_policy(100),
372 count_iterator<long>(0), count_iterator<long>(m), p,
373 [=](
long i) {
return item{i / nb, i}; });
376 execution::parallel_policy(100),
377 count_iterator<long>(0), count_iterator<long>(n - m), p + m,
378 [=](
long i) {
return item{i / nb, i + m}; });
380 auto comp_key = [](
const auto& a,
const auto& b) {
return a.key < b.key ; };
381 auto comp_val = [](
const auto& a,
const auto& b) {
return a.val < b.val; };
384 p, p + n, comp_val) ==
true);
387 p, p + m, p + n, comp_key);
390 p, p + n, comp_key) ==
true);
392 for (
long key = 0; key < m / nb; key++) {
393 bool sorted =
is_sorted(execution::parallel_policy(100),
395 p +
std::min((key + 1) * nb * 2, n),
396 [=](
const auto& a,
const auto& b) {
399 return a.val < b.val;
410 ori::global_ptr<long> p = ori::malloc_coll<long>(n);
414 for (
int i = 0; i < 4; i++) {
417 for (
int i = 4; i < 187; i++) {
420 for (
int i = 187; i < 1635; i++) {
423 for (
int i = 1635; i < 1802; i++) {
428 inplace_merge(execution::parallel_policy(100), p, p + 1635, p + n);
#define ITYR_SUBCASE(name)
Definition: util.hpp:41
#define ITYR_CHECK(cond)
Definition: util.hpp:48
constexpr read_write_t read_write
Read+Write checkout mode.
Definition: checkout_span.hpp:39
void fini()
Definition: ito.hpp:45
auto root_exec(Fn &&fn, Args &&... args)
Definition: ito.hpp:50
void init(MPI_Comm comm=MPI_COMM_WORLD)
Definition: ito.hpp:41
constexpr write_t write
Definition: util.hpp:13
void fini()
Definition: ori.hpp:49
void init(MPI_Comm comm=MPI_COMM_WORLD)
Definition: ori.hpp:45
void checkin(T *raw_ptr, std::size_t count, mode::read_t)
Definition: ori.hpp:168
void free_coll(global_ptr< T > ptr)
Definition: ori.hpp:70
auto checkout(global_ptr< T > ptr, std::size_t count, Mode mode)
Definition: ori.hpp:148
monoid< T, min_functor<>, highest< T > > min
Definition: reducer.hpp:101
Definition: allocator.hpp:16
BidirectionalIterator rotate(const ExecutionPolicy &policy, BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last)
Rotate a range.
Definition: parallel_loop.hpp:1135
auto parallel_invoke(Args &&... args)
Fork parallel tasks and join them.
Definition: parallel_invoke.hpp:238
ForwardIteratorD transform(const ExecutionPolicy &policy, ForwardIterator1 first1, ForwardIterator1 last1, ForwardIteratorD first_d, UnaryOp unary_op)
Transform elements in a given range and store them in another range.
Definition: parallel_loop.hpp:583
void inplace_merge(const ExecutionPolicy &policy, RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last, Compare comp)
Merge two sorted ranges into one sorted range in place.
Definition: parallel_merge.hpp:273
bool is_sorted(const ExecutionPolicy &policy, ForwardIterator first, ForwardIterator last, Compare comp)
Check if a range is sorted.
Definition: parallel_reduce.hpp:1054
void inplace_merge(const ExecutionPolicy &policy, RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last)
Merge two sorted ranges into one sorted range in place.
Definition: parallel_merge.hpp:316