| 1 | ////////////////////////////////////////////////////////////////////////////// |
| 2 | // |
| 3 | // (C) Copyright Ion Gaztanaga 2015-2016. |
| 4 | // Distributed under the Boost Software License, Version 1.0. |
| 5 | // (See accompanying file LICENSE_1_0.txt or copy at |
| 6 | // http://www.boost.org/LICENSE_1_0.txt) |
| 7 | // |
| 8 | // See http://www.boost.org/libs/move for documentation. |
| 9 | // |
| 10 | ////////////////////////////////////////////////////////////////////////////// |
| 11 | // |
| 12 | // Stable sorting that works in O(N*log(N)) worst time |
| 13 | // and uses O(1) extra memory |
| 14 | // |
| 15 | ////////////////////////////////////////////////////////////////////////////// |
| 16 | // |
| 17 | // The main idea of the adaptive_sort algorithm was developed by Andrey Astrelin |
| 18 | // and explained in the article from the russian collaborative blog |
| 19 | // Habrahabr (http://habrahabr.ru/post/205290/). The algorithm is based on |
| 20 | // ideas from B-C. Huang and M. A. Langston explained in their article |
| 21 | // "Fast Stable Merging and Sorting in Constant Extra Space (1989-1992)" |
| 22 | // (http://comjnl.oxfordjournals.org/content/35/6/643.full.pdf). |
| 23 | // |
| 24 | // This implementation by Ion Gaztanaga uses previous ideas with additional changes: |
| 25 | // |
| 26 | // - Use of GCD-based rotation. |
| 27 | // - Non power of two buffer-sizes. |
| 28 | // - Tries to find sqrt(len)*2 unique keys, so that the merge sort |
| 29 | // phase can form up to sqrt(len)*4 segments if enough keys are found. |
| 30 | // - The merge-sort phase can take advantage of external memory to |
| 31 | // save some additional combination steps. |
| 32 | // - Combination phase: Blocks are selection sorted and merged in parallel. |
| 33 | // - The combination phase is performed alternating merge to left and merge |
| 34 | // to right phases minimizing swaps due to internal buffer repositioning. |
| 35 | // - When merging blocks special optimizations are made to avoid moving some |
| 36 | // elements twice. |
| 37 | // |
| 38 | // The adaptive_merge algorithm was developed by Ion Gaztanaga reusing some parts |
| 39 | // from the sorting algorithm and implementing an additional block merge algorithm |
| 40 | // without moving elements to left or right. |
| 41 | ////////////////////////////////////////////////////////////////////////////// |
| 42 | #ifndef BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP |
| 43 | #define BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP |
| 44 | |
| 45 | #include <boost/move/detail/config_begin.hpp> |
| 46 | #include <boost/move/detail/reverse_iterator.hpp> |
| 47 | #include <boost/move/algo/move.hpp> |
| 48 | #include <boost/move/algo/detail/merge.hpp> |
| 49 | #include <boost/move/adl_move_swap.hpp> |
| 50 | #include <boost/move/algo/detail/insertion_sort.hpp> |
| 51 | #include <boost/move/algo/detail/merge_sort.hpp> |
| 52 | #include <boost/move/algo/detail/merge.hpp> |
| 53 | #include <boost/assert.hpp> |
| 54 | #include <boost/cstdint.hpp> |
| 55 | |
| 56 | #ifdef BOOST_MOVE_ADAPTIVE_SORT_STATS |
| 57 | #define BOOST_MOVE_ADAPTIVE_SORT_PRINT(STR, L) \ |
| 58 | print_stats(STR, L)\ |
| 59 | // |
| 60 | #else |
| 61 | #define BOOST_MOVE_ADAPTIVE_SORT_PRINT(STR, L) |
| 62 | #endif |
| 63 | |
| 64 | #ifdef BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS |
| 65 | #define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT BOOST_ASSERT |
| 66 | #else |
| 67 | #define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(L) |
| 68 | #endif |
| 69 | |
| 70 | |
| 71 | |
| 72 | namespace boost { |
| 73 | namespace movelib { |
| 74 | |
| 75 | namespace detail_adaptive { |
| 76 | |
| 77 | static const std::size_t AdaptiveSortInsertionSortThreshold = 16; |
| 78 | //static const std::size_t AdaptiveSortInsertionSortThreshold = 4; |
| 79 | BOOST_STATIC_ASSERT((AdaptiveSortInsertionSortThreshold&(AdaptiveSortInsertionSortThreshold-1)) == 0); |
| 80 | |
| 81 | #if defined BOOST_HAS_INTPTR_T |
| 82 | typedef ::boost::uintptr_t uintptr_t; |
| 83 | #else |
| 84 | typedef std::size_t uintptr_t; |
| 85 | #endif |
| 86 | |
| 87 | template<class T> |
| 88 | const T &min_value(const T &a, const T &b) |
| 89 | { |
| 90 | return a < b ? a : b; |
| 91 | } |
| 92 | |
| 93 | template<class T> |
| 94 | const T &max_value(const T &a, const T &b) |
| 95 | { |
| 96 | return a > b ? a : b; |
| 97 | } |
| 98 | |
| 99 | template<class ForwardIt, class Pred> |
| 100 | bool is_sorted(ForwardIt const first, ForwardIt last, Pred pred) |
| 101 | { |
| 102 | if (first != last) { |
| 103 | ForwardIt next = first, cur(first); |
| 104 | while (++next != last) { |
| 105 | if (pred(*next, *cur)) |
| 106 | return false; |
| 107 | cur = next; |
| 108 | } |
| 109 | } |
| 110 | return true; |
| 111 | } |
| 112 | |
| 113 | #if defined(BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS) |
| 114 | |
| 115 | bool is_sorted(::order_perf_type *first, ::order_perf_type *last, ::order_type_less) |
| 116 | { |
| 117 | if (first != last) { |
| 118 | const order_perf_type *next = first, *cur(first); |
| 119 | while (++next != last) { |
| 120 | if (!(cur->key < next->key || (cur->key == next->key && cur->val < next->val))) |
| 121 | return false; |
| 122 | cur = next; |
| 123 | } |
| 124 | } |
| 125 | return true; |
| 126 | } |
| 127 | |
| 128 | #endif //BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS |
| 129 | |
| 130 | template<class ForwardIt, class Pred> |
| 131 | bool is_sorted_and_unique(ForwardIt first, ForwardIt last, Pred pred) |
| 132 | { |
| 133 | if (first != last) { |
| 134 | ForwardIt next = first; |
| 135 | while (++next != last) { |
| 136 | if (!pred(*first, *next)) |
| 137 | return false; |
| 138 | first = next; |
| 139 | } |
| 140 | } |
| 141 | return true; |
| 142 | } |
| 143 | |
| 144 | template<class ForwardIt, class Pred, class V> |
| 145 | typename iterator_traits<ForwardIt>::size_type |
| 146 | count_if_with(ForwardIt first, ForwardIt last, Pred pred, const V &v) |
| 147 | { |
| 148 | typedef typename iterator_traits<ForwardIt>::size_type size_type; |
| 149 | size_type count = 0; |
| 150 | while(first != last) { |
| 151 | count += static_cast<size_type>(0 != pred(*first, v)); |
| 152 | ++first; |
| 153 | } |
| 154 | return count; |
| 155 | } |
| 156 | |
| 157 | template<class T, class RandRawIt = T*> |
| 158 | class adaptive_xbuf |
| 159 | { |
| 160 | adaptive_xbuf(const adaptive_xbuf &); |
| 161 | adaptive_xbuf & operator=(const adaptive_xbuf &); |
| 162 | |
| 163 | public: |
| 164 | typedef RandRawIt iterator; |
| 165 | |
| 166 | adaptive_xbuf() |
| 167 | : m_ptr(), m_size(0), m_capacity(0) |
| 168 | {} |
| 169 | |
| 170 | adaptive_xbuf(RandRawIt raw_memory, std::size_t capacity) |
| 171 | : m_ptr(raw_memory), m_size(0), m_capacity(capacity) |
| 172 | {} |
| 173 | |
| 174 | template<class RandIt> |
| 175 | void move_assign(RandIt first, std::size_t n) |
| 176 | { |
| 177 | if(n <= m_size){ |
| 178 | boost::move(first, first+n, m_ptr); |
| 179 | std::size_t size = m_size; |
| 180 | while(size-- != n){ |
| 181 | m_ptr[size].~T(); |
| 182 | } |
| 183 | m_size = n; |
| 184 | } |
| 185 | else{ |
| 186 | RandRawIt result = boost::move(first, first+m_size, m_ptr); |
| 187 | boost::uninitialized_move(first+m_size, first+n, result); |
| 188 | m_size = n; |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | template<class RandIt> |
| 193 | void push_back(RandIt first, std::size_t n) |
| 194 | { |
| 195 | BOOST_ASSERT(m_capacity - m_size >= n); |
| 196 | boost::uninitialized_move(first, first+n, m_ptr+m_size); |
| 197 | m_size += n; |
| 198 | } |
| 199 | |
| 200 | template<class RandIt> |
| 201 | iterator add(RandIt it) |
| 202 | { |
| 203 | BOOST_ASSERT(m_size < m_capacity); |
| 204 | RandRawIt p_ret = m_ptr + m_size; |
| 205 | ::new(&*p_ret) T(::boost::move(*it)); |
| 206 | ++m_size; |
| 207 | return p_ret; |
| 208 | } |
| 209 | |
| 210 | template<class RandIt> |
| 211 | void insert(iterator pos, RandIt it) |
| 212 | { |
| 213 | if(pos == (m_ptr + m_size)){ |
| 214 | this->add(it); |
| 215 | } |
| 216 | else{ |
| 217 | this->add(m_ptr+m_size-1); |
| 218 | //m_size updated |
| 219 | boost::move_backward(pos, m_ptr+m_size-2, m_ptr+m_size-1); |
| 220 | *pos = boost::move(*it); |
| 221 | } |
| 222 | } |
| 223 | |
| 224 | void set_size(std::size_t size) |
| 225 | { |
| 226 | m_size = size; |
| 227 | } |
| 228 | |
| 229 | void shrink_to_fit(std::size_t const size) |
| 230 | { |
| 231 | if(m_size > size){ |
| 232 | for(std::size_t szt_i = size; szt_i != m_size; ++szt_i){ |
| 233 | m_ptr[szt_i].~T(); |
| 234 | } |
| 235 | m_size = size; |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | void initialize_until(std::size_t const size, T &t) |
| 240 | { |
| 241 | BOOST_ASSERT(m_size < m_capacity); |
| 242 | if(m_size < size){ |
| 243 | ::new((void*)&m_ptr[m_size]) T(::boost::move(t)); |
| 244 | ++m_size; |
| 245 | for(; m_size != size; ++m_size){ |
| 246 | ::new((void*)&m_ptr[m_size]) T(::boost::move(m_ptr[m_size-1])); |
| 247 | } |
| 248 | t = ::boost::move(m_ptr[m_size-1]); |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | private: |
| 253 | template<class RIt> |
| 254 | static bool is_raw_ptr(RIt) |
| 255 | { |
| 256 | return false; |
| 257 | } |
| 258 | |
| 259 | static bool is_raw_ptr(T*) |
| 260 | { |
| 261 | return true; |
| 262 | } |
| 263 | |
| 264 | public: |
| 265 | template<class U> |
| 266 | bool supports_aligned_trailing(std::size_t size, std::size_t trail_count) const |
| 267 | { |
| 268 | if(this->is_raw_ptr(this->data()) && m_capacity){ |
| 269 | uintptr_t u_addr_sz = uintptr_t(&*(this->data()+size)); |
| 270 | uintptr_t u_addr_cp = uintptr_t(&*(this->data()+this->capacity())); |
| 271 | u_addr_sz = ((u_addr_sz + sizeof(U)-1)/sizeof(U))*sizeof(U); |
| 272 | return (u_addr_cp >= u_addr_sz) && ((u_addr_cp - u_addr_sz)/sizeof(U) >= trail_count); |
| 273 | } |
| 274 | return false; |
| 275 | } |
| 276 | |
| 277 | template<class U> |
| 278 | U *aligned_trailing() const |
| 279 | { |
| 280 | return this->aligned_trailing<U>(this->size()); |
| 281 | } |
| 282 | |
| 283 | template<class U> |
| 284 | U *aligned_trailing(std::size_t pos) const |
| 285 | { |
| 286 | uintptr_t u_addr = uintptr_t(&*(this->data()+pos)); |
| 287 | u_addr = ((u_addr + sizeof(U)-1)/sizeof(U))*sizeof(U); |
| 288 | return (U*)u_addr; |
| 289 | } |
| 290 | |
| 291 | ~adaptive_xbuf() |
| 292 | { |
| 293 | this->clear(); |
| 294 | } |
| 295 | |
| 296 | std::size_t capacity() const |
| 297 | { return m_capacity; } |
| 298 | |
| 299 | iterator data() const |
| 300 | { return m_ptr; } |
| 301 | |
| 302 | iterator end() const |
| 303 | { return m_ptr+m_size; } |
| 304 | |
| 305 | std::size_t size() const |
| 306 | { return m_size; } |
| 307 | |
| 308 | bool empty() const |
| 309 | { return !m_size; } |
| 310 | |
| 311 | void clear() |
| 312 | { |
| 313 | this->shrink_to_fit(0u); |
| 314 | } |
| 315 | |
| 316 | private: |
| 317 | RandRawIt m_ptr; |
| 318 | std::size_t m_size; |
| 319 | std::size_t m_capacity; |
| 320 | }; |
| 321 | |
| 322 | template<class Iterator, class Op> |
| 323 | class range_xbuf |
| 324 | { |
| 325 | range_xbuf(const range_xbuf &); |
| 326 | range_xbuf & operator=(const range_xbuf &); |
| 327 | |
| 328 | public: |
| 329 | typedef typename iterator_traits<Iterator>::size_type size_type; |
| 330 | typedef Iterator iterator; |
| 331 | |
| 332 | range_xbuf(Iterator first, Iterator last) |
| 333 | : m_first(first), m_last(first), m_cap(last) |
| 334 | {} |
| 335 | |
| 336 | template<class RandIt> |
| 337 | void move_assign(RandIt first, std::size_t n) |
| 338 | { |
| 339 | BOOST_ASSERT(size_type(n) <= size_type(m_cap-m_first)); |
| 340 | m_last = Op()(forward_t(), first, first+n, m_first); |
| 341 | } |
| 342 | |
| 343 | ~range_xbuf() |
| 344 | {} |
| 345 | |
| 346 | std::size_t capacity() const |
| 347 | { return m_cap-m_first; } |
| 348 | |
| 349 | Iterator data() const |
| 350 | { return m_first; } |
| 351 | |
| 352 | Iterator end() const |
| 353 | { return m_last; } |
| 354 | |
| 355 | std::size_t size() const |
| 356 | { return m_last-m_first; } |
| 357 | |
| 358 | bool empty() const |
| 359 | { return m_first == m_last; } |
| 360 | |
| 361 | void clear() |
| 362 | { |
| 363 | m_last = m_first; |
| 364 | } |
| 365 | |
| 366 | template<class RandIt> |
| 367 | iterator add(RandIt it) |
| 368 | { |
| 369 | Iterator pos(m_last); |
| 370 | *pos = boost::move(*it); |
| 371 | ++m_last; |
| 372 | return pos; |
| 373 | } |
| 374 | |
| 375 | void set_size(std::size_t size) |
| 376 | { |
| 377 | m_last = m_first; |
| 378 | m_last += size; |
| 379 | } |
| 380 | |
| 381 | private: |
| 382 | Iterator const m_first; |
| 383 | Iterator m_last; |
| 384 | Iterator const m_cap; |
| 385 | }; |
| 386 | |
| 387 | |
| 388 | template<class RandIt, class Compare> |
| 389 | RandIt skip_until_merge |
| 390 | ( RandIt first1, RandIt const last1 |
| 391 | , const typename iterator_traits<RandIt>::value_type &next_key, Compare comp) |
| 392 | { |
| 393 | while(first1 != last1 && !comp(next_key, *first1)){ |
| 394 | ++first1; |
| 395 | } |
| 396 | return first1; |
| 397 | } |
| 398 | |
| 399 | |
| 400 | template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op> |
| 401 | RandItB op_buffered_partial_merge_to_range1_and_buffer |
| 402 | ( RandIt1 first1, RandIt1 const last1 |
| 403 | , RandIt2 &rfirst2, RandIt2 const last2 |
| 404 | , RandItB &rfirstb, Compare comp, Op op ) |
| 405 | { |
| 406 | RandItB firstb = rfirstb; |
| 407 | RandItB lastb = firstb; |
| 408 | RandIt2 first2 = rfirst2; |
| 409 | |
| 410 | //Move to buffer while merging |
| 411 | //Three way moves need less moves when op is swap_op so use it |
| 412 | //when merging elements from range2 to the destination occupied by range1 |
| 413 | if(first1 != last1 && first2 != last2){ |
| 414 | op(three_way_t(), first2++, first1++, lastb++); |
| 415 | |
| 416 | while(true){ |
| 417 | if(first1 == last1){ |
| 418 | break; |
| 419 | } |
| 420 | if(first2 == last2){ |
| 421 | lastb = op(forward_t(), first1, last1, firstb); |
| 422 | break; |
| 423 | } |
| 424 | op(three_way_t(), comp(*first2, *firstb) ? first2++ : firstb++, first1++, lastb++); |
| 425 | } |
| 426 | rfirst2 = first2; |
| 427 | rfirstb = firstb; |
| 428 | } |
| 429 | |
| 430 | return lastb; |
| 431 | } |
| 432 | |
| 433 | template<class RandItKeys, class RandIt> |
| 434 | void swap_and_update_key |
| 435 | ( bool is_next_far_away |
| 436 | , RandItKeys const key_next |
| 437 | , RandItKeys const key_range2 |
| 438 | , RandItKeys &key_mid |
| 439 | , RandIt const begin |
| 440 | , RandIt const end |
| 441 | , RandIt const with) |
| 442 | { |
| 443 | if(is_next_far_away){ |
| 444 | ::boost::adl_move_swap_ranges(begin, end, with); |
| 445 | ::boost::adl_move_swap(*key_next, *key_range2); |
| 446 | if(key_next == key_mid){ |
| 447 | key_mid = key_range2; |
| 448 | } |
| 449 | else if(key_mid == key_range2){ |
| 450 | key_mid = key_next; |
| 451 | } |
| 452 | } |
| 453 | } |
| 454 | |
| 455 | /////////////////////////////////////////////////////////////////////////////// |
| 456 | // |
| 457 | // MERGE BUFFERLESS |
| 458 | // |
| 459 | /////////////////////////////////////////////////////////////////////////////// |
| 460 | |
| 461 | // [first1, last1) merge [last1,last2) -> [first1,last2) |
| 462 | template<class RandIt, class Compare> |
| 463 | RandIt partial_merge_bufferless_impl |
| 464 | (RandIt first1, RandIt last1, RandIt const last2, bool *const pis_range1_A, Compare comp) |
| 465 | { |
| 466 | if(last1 == last2){ |
| 467 | return first1; |
| 468 | } |
| 469 | bool const is_range1_A = *pis_range1_A; |
| 470 | if(first1 != last1 && comp(*last1, last1[-1])){ |
| 471 | do{ |
| 472 | RandIt const old_last1 = last1; |
| 473 | last1 = boost::movelib::lower_bound(last1, last2, *first1, comp); |
| 474 | first1 = rotate_gcd(first1, old_last1, last1);//old_last1 == last1 supported |
| 475 | if(last1 == last2){ |
| 476 | return first1; |
| 477 | } |
| 478 | do{ |
| 479 | ++first1; |
| 480 | } while(last1 != first1 && !comp(*last1, *first1) ); |
| 481 | } while(first1 != last1); |
| 482 | } |
| 483 | *pis_range1_A = !is_range1_A; |
| 484 | return last1; |
| 485 | } |
| 486 | |
| 487 | // [first1, last1) merge [last1,last2) -> [first1,last2) |
| 488 | template<class RandIt, class Compare> |
| 489 | RandIt partial_merge_bufferless |
| 490 | (RandIt first1, RandIt last1, RandIt const last2, bool *const pis_range1_A, Compare comp) |
| 491 | { |
| 492 | return *pis_range1_A ? partial_merge_bufferless_impl(first1, last1, last2, pis_range1_A, comp) |
| 493 | : partial_merge_bufferless_impl(first1, last1, last2, pis_range1_A, antistable<Compare>(comp)); |
| 494 | } |
| 495 | |
| 496 | template<class SizeType> |
| 497 | static SizeType needed_keys_count(SizeType n_block_a, SizeType n_block_b) |
| 498 | { |
| 499 | return n_block_a + n_block_b; |
| 500 | } |
| 501 | |
| 502 | template<class RandItKeys, class KeyCompare, class RandIt, class Compare> |
| 503 | typename iterator_traits<RandIt>::size_type |
| 504 | find_next_block |
| 505 | ( RandItKeys key_first |
| 506 | , KeyCompare key_comp |
| 507 | , RandIt const first |
| 508 | , typename iterator_traits<RandIt>::size_type const l_block |
| 509 | , typename iterator_traits<RandIt>::size_type const ix_first_block |
| 510 | , typename iterator_traits<RandIt>::size_type const ix_last_block |
| 511 | , Compare comp) |
| 512 | { |
| 513 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 514 | typedef typename iterator_traits<RandIt>::value_type value_type; |
| 515 | typedef typename iterator_traits<RandItKeys>::value_type key_type; |
| 516 | BOOST_ASSERT(ix_first_block <= ix_last_block); |
| 517 | size_type ix_min_block = 0u; |
| 518 | for (size_type szt_i = ix_first_block; szt_i < ix_last_block; ++szt_i) { |
| 519 | const value_type &min_val = first[ix_min_block*l_block]; |
| 520 | const value_type &cur_val = first[szt_i*l_block]; |
| 521 | const key_type &min_key = key_first[ix_min_block]; |
| 522 | const key_type &cur_key = key_first[szt_i]; |
| 523 | |
| 524 | bool const less_than_minimum = comp(cur_val, min_val) || |
| 525 | (!comp(min_val, cur_val) && key_comp(cur_key, min_key)); |
| 526 | |
| 527 | if (less_than_minimum) { |
| 528 | ix_min_block = szt_i; |
| 529 | } |
| 530 | } |
| 531 | return ix_min_block; |
| 532 | } |
| 533 | |
| 534 | template<class RandItKeys, class KeyCompare, class RandIt, class Compare> |
| 535 | void merge_blocks_bufferless |
| 536 | ( RandItKeys key_first |
| 537 | , KeyCompare key_comp |
| 538 | , RandIt const first |
| 539 | , typename iterator_traits<RandIt>::size_type const l_block |
| 540 | , typename iterator_traits<RandIt>::size_type const l_irreg1 |
| 541 | , typename iterator_traits<RandIt>::size_type const n_block_a |
| 542 | , typename iterator_traits<RandIt>::size_type const n_block_b |
| 543 | , typename iterator_traits<RandIt>::size_type const l_irreg2 |
| 544 | , Compare comp) |
| 545 | { |
| 546 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 547 | size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count; |
| 548 | //BOOST_ASSERT(n_block_a || n_block_b); |
| 549 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp)); |
| 550 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a])); |
| 551 | |
| 552 | size_type n_bef_irreg2 = 0; |
| 553 | bool l_irreg_pos_count = true; |
| 554 | RandItKeys key_mid(key_first + n_block_a); |
| 555 | RandIt const first_irr2 = first + l_irreg1 + (n_block_a+n_block_b)*l_block; |
| 556 | RandIt const last_irr2 = first_irr2 + l_irreg2; |
| 557 | |
| 558 | { //Selection sort blocks |
| 559 | size_type n_block_left = n_block_b + n_block_a; |
| 560 | RandItKeys key_range2(key_first); |
| 561 | |
| 562 | size_type min_check = n_block_a == n_block_left ? 0u : n_block_a; |
| 563 | size_type max_check = min_value(min_check+1, n_block_left); |
| 564 | for (RandIt f = first+l_irreg1; n_block_left; --n_block_left, ++key_range2, f += l_block, min_check -= min_check != 0, max_check -= max_check != 0) { |
| 565 | size_type const next_key_idx = find_next_block(key_range2, key_comp, f, l_block, min_check, max_check, comp); |
| 566 | RandItKeys const key_next(key_range2 + next_key_idx); |
| 567 | max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left); |
| 568 | |
| 569 | RandIt const first_min = f + next_key_idx*l_block; |
| 570 | |
| 571 | //Check if irregular b block should go here. |
| 572 | //If so, break to the special code handling the irregular block |
| 573 | if (l_irreg_pos_count && l_irreg2 && comp(*first_irr2, *first_min)){ |
| 574 | l_irreg_pos_count = false; |
| 575 | } |
| 576 | n_bef_irreg2 += l_irreg_pos_count; |
| 577 | |
| 578 | swap_and_update_key(next_key_idx != 0, key_next, key_range2, key_mid, f, f + l_block, first_min); |
| 579 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(f, f+l_block, comp)); |
| 580 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_min, first_min + l_block, comp)); |
| 581 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((f == (first+l_irreg1)) || !comp(*f, *(f-l_block))); |
| 582 | } |
| 583 | } |
| 584 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first+l_irreg1+n_bef_irreg2*l_block, first_irr2, comp)); |
| 585 | |
| 586 | RandIt first1 = first; |
| 587 | RandIt last1 = first+l_irreg1; |
| 588 | RandItKeys const key_end (key_first+n_bef_irreg2); |
| 589 | bool is_range1_A = true; |
| 590 | |
| 591 | for( ; key_first != key_end; ++key_first){ |
| 592 | bool is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_first, *key_mid); |
| 593 | first1 = is_range1_A == is_range2_A |
| 594 | ? last1 : partial_merge_bufferless(first1, last1, last1 + l_block, &is_range1_A, comp); |
| 595 | last1 += l_block; |
| 596 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, first1, comp)); |
| 597 | } |
| 598 | |
| 599 | merge_bufferless(is_range1_A ? first1 : last1, first_irr2, last_irr2, comp); |
| 600 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, last_irr2, comp)); |
| 601 | } |
| 602 | |
| 603 | /////////////////////////////////////////////////////////////////////////////// |
| 604 | // |
| 605 | // BUFFERED MERGE |
| 606 | // |
| 607 | /////////////////////////////////////////////////////////////////////////////// |
| 608 | template<class RandIt, class Compare, class Op, class Buf> |
| 609 | void op_buffered_merge |
| 610 | ( RandIt first, RandIt const middle, RandIt last |
| 611 | , Compare comp, Op op |
| 612 | , Buf &xbuf) |
| 613 | { |
| 614 | if(first != middle && middle != last && comp(*middle, middle[-1])){ |
| 615 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 616 | size_type const len1 = size_type(middle-first); |
| 617 | size_type const len2 = size_type(last-middle); |
| 618 | if(len1 <= len2){ |
| 619 | first = boost::movelib::upper_bound(first, middle, *middle, comp); |
| 620 | xbuf.move_assign(first, size_type(middle-first)); |
| 621 | op_merge_with_right_placed |
| 622 | (xbuf.data(), xbuf.end(), first, middle, last, comp, op); |
| 623 | } |
| 624 | else{ |
| 625 | last = boost::movelib::lower_bound(middle, last, middle[-1], comp); |
| 626 | xbuf.move_assign(middle, size_type(last-middle)); |
| 627 | op_merge_with_left_placed |
| 628 | (first, middle, last, xbuf.data(), xbuf.end(), comp, op); |
| 629 | } |
| 630 | } |
| 631 | } |
| 632 | |
| 633 | template<class RandIt, class Compare, class XBuf> |
| 634 | void buffered_merge |
| 635 | ( RandIt first, RandIt const middle, RandIt last |
| 636 | , Compare comp |
| 637 | , XBuf &xbuf) |
| 638 | { |
| 639 | op_buffered_merge(first, middle, last, comp, move_op(), xbuf); |
| 640 | } |
| 641 | |
| 642 | // Complexity: 2*distance(first, last)+max_collected^2/2 |
| 643 | // |
| 644 | // Tries to collect at most n_keys unique elements from [first, last), |
| 645 | // in the begining of the range, and ordered according to comp |
| 646 | // |
| 647 | // Returns the number of collected keys |
| 648 | template<class RandIt, class Compare, class XBuf> |
| 649 | typename iterator_traits<RandIt>::size_type |
| 650 | collect_unique |
| 651 | ( RandIt const first, RandIt const last |
| 652 | , typename iterator_traits<RandIt>::size_type const max_collected, Compare comp |
| 653 | , XBuf & xbuf) |
| 654 | { |
| 655 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 656 | size_type h = 0; |
| 657 | if(max_collected){ |
| 658 | ++h; // first key is always here |
| 659 | RandIt h0 = first; |
| 660 | RandIt u = first; ++u; |
| 661 | RandIt search_end = u; |
| 662 | |
| 663 | if(xbuf.capacity() >= max_collected){ |
| 664 | typename XBuf::iterator const ph0 = xbuf.add(first); |
| 665 | while(u != last && h < max_collected){ |
| 666 | typename XBuf::iterator const r = boost::movelib::lower_bound(ph0, xbuf.end(), *u, comp); |
| 667 | //If key not found add it to [h, h+h0) |
| 668 | if(r == xbuf.end() || comp(*u, *r) ){ |
| 669 | RandIt const new_h0 = boost::move(search_end, u, h0); |
| 670 | search_end = u; |
| 671 | ++search_end; |
| 672 | ++h; |
| 673 | xbuf.insert(r, u); |
| 674 | h0 = new_h0; |
| 675 | } |
| 676 | ++u; |
| 677 | } |
| 678 | boost::move_backward(first, h0, h0+h); |
| 679 | boost::move(xbuf.data(), xbuf.end(), first); |
| 680 | } |
| 681 | else{ |
| 682 | while(u != last && h < max_collected){ |
| 683 | RandIt const r = boost::movelib::lower_bound(h0, search_end, *u, comp); |
| 684 | //If key not found add it to [h, h+h0) |
| 685 | if(r == search_end || comp(*u, *r) ){ |
| 686 | RandIt const new_h0 = rotate_gcd(h0, search_end, u); |
| 687 | search_end = u; |
| 688 | ++search_end; |
| 689 | ++h; |
| 690 | rotate_gcd(r+(new_h0-h0), u, search_end); |
| 691 | h0 = new_h0; |
| 692 | } |
| 693 | ++u; |
| 694 | } |
| 695 | rotate_gcd(first, h0, h0+h); |
| 696 | } |
| 697 | } |
| 698 | return h; |
| 699 | } |
| 700 | |
| 701 | template<class Unsigned> |
| 702 | Unsigned floor_sqrt(Unsigned const n) |
| 703 | { |
| 704 | Unsigned x = n; |
| 705 | Unsigned y = x/2 + (x&1); |
| 706 | while (y < x){ |
| 707 | x = y; |
| 708 | y = (x + n / x)/2; |
| 709 | } |
| 710 | return x; |
| 711 | } |
| 712 | |
| 713 | template<class Unsigned> |
| 714 | Unsigned ceil_sqrt(Unsigned const n) |
| 715 | { |
| 716 | Unsigned r = floor_sqrt(n); |
| 717 | return r + Unsigned((n%r) != 0); |
| 718 | } |
| 719 | |
| 720 | template<class Unsigned> |
| 721 | Unsigned floor_merge_multiple(Unsigned const n, Unsigned &base, Unsigned &pow) |
| 722 | { |
| 723 | Unsigned s = n; |
| 724 | Unsigned p = 0; |
| 725 | while(s > AdaptiveSortInsertionSortThreshold){ |
| 726 | s /= 2; |
| 727 | ++p; |
| 728 | } |
| 729 | base = s; |
| 730 | pow = p; |
| 731 | return s << p; |
| 732 | } |
| 733 | |
| 734 | template<class Unsigned> |
| 735 | Unsigned ceil_merge_multiple(Unsigned const n, Unsigned &base, Unsigned &pow) |
| 736 | { |
| 737 | Unsigned fm = floor_merge_multiple(n, base, pow); |
| 738 | |
| 739 | if(fm != n){ |
| 740 | if(base < AdaptiveSortInsertionSortThreshold){ |
| 741 | ++base; |
| 742 | } |
| 743 | else{ |
| 744 | base = AdaptiveSortInsertionSortThreshold/2 + 1; |
| 745 | ++pow; |
| 746 | } |
| 747 | } |
| 748 | return base << pow; |
| 749 | } |
| 750 | |
| 751 | template<class Unsigned> |
| 752 | Unsigned ceil_sqrt_multiple(Unsigned const n, Unsigned *pbase = 0) |
| 753 | { |
| 754 | Unsigned const r = ceil_sqrt(n); |
| 755 | Unsigned pow = 0; |
| 756 | Unsigned base = 0; |
| 757 | Unsigned const res = ceil_merge_multiple(r, base, pow); |
| 758 | if(pbase) *pbase = base; |
| 759 | return res; |
| 760 | } |
| 761 | |
| 762 | struct less |
| 763 | { |
| 764 | template<class T> |
| 765 | bool operator()(const T &l, const T &r) |
| 766 | { return l < r; } |
| 767 | }; |
| 768 | |
| 769 | /////////////////////////////////////////////////////////////////////////////// |
| 770 | // |
| 771 | // MERGE BLOCKS |
| 772 | // |
| 773 | /////////////////////////////////////////////////////////////////////////////// |
| 774 | |
| 775 | //#define ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN |
| 776 | |
| 777 | #if defined ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN |
| 778 | template<class RandIt, class Compare> |
| 779 | void slow_stable_sort |
| 780 | ( RandIt const first, RandIt const last, Compare comp) |
| 781 | { |
| 782 | boost::movelib::inplace_stable_sort(first, last, comp); |
| 783 | } |
| 784 | |
| 785 | #else //ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN |
| 786 | |
| 787 | template<class RandIt, class Compare> |
| 788 | void slow_stable_sort |
| 789 | ( RandIt const first, RandIt const last, Compare comp) |
| 790 | { |
| 791 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 792 | size_type L = size_type(last - first); |
| 793 | { //Use insertion sort to merge first elements |
| 794 | size_type m = 0; |
| 795 | while((L - m) > size_type(AdaptiveSortInsertionSortThreshold)){ |
| 796 | insertion_sort(first+m, first+m+size_type(AdaptiveSortInsertionSortThreshold), comp); |
| 797 | m += AdaptiveSortInsertionSortThreshold; |
| 798 | } |
| 799 | insertion_sort(first+m, last, comp); |
| 800 | } |
| 801 | |
| 802 | size_type h = AdaptiveSortInsertionSortThreshold; |
| 803 | for(bool do_merge = L > h; do_merge; h*=2){ |
| 804 | do_merge = (L - h) > h; |
| 805 | size_type p0 = 0; |
| 806 | if(do_merge){ |
| 807 | size_type const h_2 = 2*h; |
| 808 | while((L-p0) > h_2){ |
| 809 | merge_bufferless(first+p0, first+p0+h, first+p0+h_2, comp); |
| 810 | p0 += h_2; |
| 811 | } |
| 812 | } |
| 813 | if((L-p0) > h){ |
| 814 | merge_bufferless(first+p0, first+p0+h, last, comp); |
| 815 | } |
| 816 | } |
| 817 | } |
| 818 | |
| 819 | #endif //ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN |
| 820 | |
| 821 | //Returns new l_block and updates use_buf |
| 822 | template<class Unsigned> |
| 823 | Unsigned lblock_for_combine |
| 824 | (Unsigned const l_block, Unsigned const n_keys, Unsigned const l_data, bool &use_buf) |
| 825 | { |
| 826 | BOOST_ASSERT(l_data > 1); |
| 827 | |
| 828 | //We need to guarantee lblock >= l_merged/(n_keys/2) keys for the combination. |
| 829 | //We have at least 4 keys guaranteed (which are the minimum to merge 2 ranges) |
| 830 | //If l_block != 0, then n_keys is already enough to merge all blocks in all |
| 831 | //phases as we've found all needed keys for that buffer and length before. |
| 832 | //If l_block == 0 then see if half keys can be used as buffer and the rest |
| 833 | //as keys guaranteeing that n_keys >= (2*l_merged)/lblock = |
| 834 | if(!l_block){ |
| 835 | //If l_block == 0 then n_keys is power of two |
| 836 | //(guaranteed by build_params(...)) |
| 837 | BOOST_ASSERT(n_keys >= 4); |
| 838 | //BOOST_ASSERT(0 == (n_keys &(n_keys-1))); |
| 839 | |
| 840 | //See if half keys are at least 4 and if half keys fulfill |
| 841 | Unsigned const new_buf = n_keys/2; |
| 842 | Unsigned const new_keys = n_keys-new_buf; |
| 843 | use_buf = new_keys >= 4 && new_keys >= l_data/new_buf; |
| 844 | if(use_buf){ |
| 845 | return new_buf; |
| 846 | } |
| 847 | else{ |
| 848 | return l_data/n_keys; |
| 849 | } |
| 850 | } |
| 851 | else{ |
| 852 | use_buf = true; |
| 853 | return l_block; |
| 854 | } |
| 855 | } |
| 856 | |
| 857 | template<class RandIt, class Compare, class XBuf> |
| 858 | void stable_sort( RandIt first, RandIt last, Compare comp, XBuf & xbuf) |
| 859 | { |
| 860 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 861 | size_type const len = size_type(last - first); |
| 862 | size_type const half_len = len/2 + (len&1); |
| 863 | if(std::size_t(xbuf.capacity() - xbuf.size()) >= half_len) { |
| 864 | merge_sort(first, last, comp, xbuf.data()+xbuf.size()); |
| 865 | } |
| 866 | else{ |
| 867 | slow_stable_sort(first, last, comp); |
| 868 | } |
| 869 | } |
| 870 | |
| 871 | template<class RandIt, class Comp, class XBuf> |
| 872 | void initialize_keys( RandIt first, RandIt last |
| 873 | , Comp comp |
| 874 | , XBuf & xbuf) |
| 875 | { |
| 876 | stable_sort(first, last, comp, xbuf); |
| 877 | } |
| 878 | |
| 879 | template<class RandIt, class U> |
| 880 | void initialize_keys( RandIt first, RandIt last |
| 881 | , less |
| 882 | , U &) |
| 883 | { |
| 884 | typedef typename iterator_traits<RandIt>::value_type value_type; |
| 885 | std::size_t count = std::size_t(last - first); |
| 886 | for(std::size_t i = 0; i != count; ++i){ |
| 887 | *first = value_type(i); |
| 888 | ++first; |
| 889 | } |
| 890 | } |
| 891 | |
| 892 | template<class RandIt> |
| 893 | void move_data_backward( RandIt cur_pos |
| 894 | , typename iterator_traits<RandIt>::size_type const l_data |
| 895 | , RandIt new_pos |
| 896 | , bool const xbuf_used) |
| 897 | { |
| 898 | //Move buffer to the total combination right |
| 899 | if(xbuf_used){ |
| 900 | boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data); |
| 901 | } |
| 902 | else{ |
| 903 | boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data); |
| 904 | //Rotate does less moves but it seems slower due to cache issues |
| 905 | //rotate_gcd(first-l_block, first+len-l_block, first+len); |
| 906 | } |
| 907 | } |
| 908 | |
| 909 | template<class RandIt> |
| 910 | void move_data_forward( RandIt cur_pos |
| 911 | , typename iterator_traits<RandIt>::size_type const l_data |
| 912 | , RandIt new_pos |
| 913 | , bool const xbuf_used) |
| 914 | { |
| 915 | //Move buffer to the total combination right |
| 916 | if(xbuf_used){ |
| 917 | boost::move(cur_pos, cur_pos+l_data, new_pos); |
| 918 | } |
| 919 | else{ |
| 920 | boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos); |
| 921 | //Rotate does less moves but it seems slower due to cache issues |
| 922 | //rotate_gcd(first-l_block, first+len-l_block, first+len); |
| 923 | } |
| 924 | } |
| 925 | |
| 926 | template <class Unsigned> |
| 927 | Unsigned calculate_total_combined(Unsigned const len, Unsigned const l_prev_merged, Unsigned *pl_irreg_combined = 0) |
| 928 | { |
| 929 | typedef Unsigned size_type; |
| 930 | |
| 931 | size_type const l_combined = 2*l_prev_merged; |
| 932 | size_type l_irreg_combined = len%l_combined; |
| 933 | size_type l_total_combined = len; |
| 934 | if(l_irreg_combined <= l_prev_merged){ |
| 935 | l_total_combined -= l_irreg_combined; |
| 936 | l_irreg_combined = 0; |
| 937 | } |
| 938 | if(pl_irreg_combined) |
| 939 | *pl_irreg_combined = l_irreg_combined; |
| 940 | return l_total_combined; |
| 941 | } |
| 942 | |
| 943 | template<class RandItKeys, class KeyCompare, class SizeType, class XBuf> |
| 944 | void combine_params |
| 945 | ( RandItKeys const keys |
| 946 | , KeyCompare key_comp |
| 947 | , SizeType l_combined |
| 948 | , SizeType const l_prev_merged |
| 949 | , SizeType const l_block |
| 950 | , XBuf & xbuf |
| 951 | //Output |
| 952 | , SizeType &n_block_a |
| 953 | , SizeType &n_block_b |
| 954 | , SizeType &l_irreg1 |
| 955 | , SizeType &l_irreg2 |
| 956 | //Options |
| 957 | , bool do_initialize_keys = true) |
| 958 | { |
| 959 | typedef SizeType size_type; |
| 960 | |
| 961 | //Initial parameters for selection sort blocks |
| 962 | l_irreg1 = l_prev_merged%l_block; |
| 963 | l_irreg2 = (l_combined-l_irreg1)%l_block; |
| 964 | BOOST_ASSERT(((l_combined-l_irreg1-l_irreg2)%l_block) == 0); |
| 965 | size_type const n_reg_block = (l_combined-l_irreg1-l_irreg2)/l_block; |
| 966 | n_block_a = l_prev_merged/l_block; |
| 967 | n_block_b = n_reg_block - n_block_a; |
| 968 | BOOST_ASSERT(n_reg_block>=n_block_a); |
| 969 | |
| 970 | //Key initialization |
| 971 | if (do_initialize_keys) { |
| 972 | initialize_keys(keys, keys + needed_keys_count(n_block_a, n_block_b), key_comp, xbuf); |
| 973 | } |
| 974 | } |
| 975 | |
| 976 | template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op> |
| 977 | RandItB op_buffered_partial_merge_and_swap_to_range1_and_buffer |
| 978 | ( RandIt1 first1, RandIt1 const last1 |
| 979 | , RandIt2 &rfirst2, RandIt2 const last2, RandIt2 &rfirst_min |
| 980 | , RandItB &rfirstb, Compare comp, Op op ) |
| 981 | { |
| 982 | RandItB firstb = rfirstb; |
| 983 | RandItB lastb = firstb; |
| 984 | RandIt2 first2 = rfirst2; |
| 985 | |
| 986 | //Move to buffer while merging |
| 987 | //Three way moves need less moves when op is swap_op so use it |
| 988 | //when merging elements from range2 to the destination occupied by range1 |
| 989 | if(first1 != last1 && first2 != last2){ |
| 990 | RandIt2 first_min = rfirst_min; |
| 991 | op(four_way_t(), first2++, first_min++, first1++, lastb++); |
| 992 | |
| 993 | while(first1 != last1){ |
| 994 | if(first2 == last2){ |
| 995 | lastb = op(forward_t(), first1, last1, firstb); |
| 996 | break; |
| 997 | } |
| 998 | bool const min_less = comp(*first_min, *firstb); |
| 999 | |
| 1000 | if(min_less){ |
| 1001 | op( four_way_t(), first2++, first_min++, first1++, lastb++); |
| 1002 | } |
| 1003 | else{ |
| 1004 | op(three_way_t(), firstb++, first1++, lastb++); |
| 1005 | } |
| 1006 | } |
| 1007 | rfirst2 = first2; |
| 1008 | rfirstb = firstb; |
| 1009 | rfirst_min = first_min; |
| 1010 | } |
| 1011 | |
| 1012 | return lastb; |
| 1013 | } |
| 1014 | |
| 1015 | ////////////////////////////////// |
| 1016 | // |
| 1017 | // partial_merge |
| 1018 | // |
| 1019 | ////////////////////////////////// |
| 1020 | template<class InputIt1, class InputIt2, class OutputIt, class Compare, class Op> |
| 1021 | OutputIt op_partial_merge_impl |
| 1022 | (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, OutputIt d_first, Compare comp, Op op) |
| 1023 | { |
| 1024 | InputIt1 first1(r_first1); |
| 1025 | InputIt2 first2(r_first2); |
| 1026 | if(first2 != last2 && last1 != first1) |
| 1027 | while(1){ |
| 1028 | if(comp(*first2, *first1)) { |
| 1029 | op(first2++, d_first++); |
| 1030 | if(first2 == last2){ |
| 1031 | break; |
| 1032 | } |
| 1033 | } |
| 1034 | else{ |
| 1035 | op(first1++, d_first++); |
| 1036 | if(first1 == last1){ |
| 1037 | break; |
| 1038 | } |
| 1039 | } |
| 1040 | } |
| 1041 | r_first1 = first1; |
| 1042 | r_first2 = first2; |
| 1043 | return d_first; |
| 1044 | } |
| 1045 | |
| 1046 | template<class InputIt1, class InputIt2, class OutputIt, class Compare, class Op> |
| 1047 | OutputIt op_partial_merge |
| 1048 | (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, OutputIt d_first, Compare comp, Op op, bool is_stable) |
| 1049 | { |
| 1050 | return is_stable ? op_partial_merge_impl(r_first1, last1, r_first2, last2, d_first, comp, op) |
| 1051 | : op_partial_merge_impl(r_first1, last1, r_first2, last2, d_first, antistable<Compare>(comp), op); |
| 1052 | } |
| 1053 | |
| 1054 | ////////////////////////////////// |
| 1055 | // |
| 1056 | // partial_merge_and_swap |
| 1057 | // |
| 1058 | ////////////////////////////////// |
| 1059 | template<class InputIt1, class InputIt2, class OutputIt, class Compare, class Op> |
| 1060 | OutputIt op_partial_merge_and_swap_impl |
| 1061 | (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, InputIt2 &r_first_min, OutputIt d_first, Compare comp, Op op) |
| 1062 | { |
| 1063 | InputIt1 first1(r_first1); |
| 1064 | InputIt2 first2(r_first2); |
| 1065 | |
| 1066 | if(first2 != last2 && last1 != first1) { |
| 1067 | InputIt2 first_min(r_first_min); |
| 1068 | bool non_empty_ranges = true; |
| 1069 | do{ |
| 1070 | if(comp(*first_min, *first1)) { |
| 1071 | op(three_way_t(), first2++, first_min++, d_first++); |
| 1072 | non_empty_ranges = first2 != last2; |
| 1073 | } |
| 1074 | else{ |
| 1075 | op(first1++, d_first++); |
| 1076 | non_empty_ranges = first1 != last1; |
| 1077 | } |
| 1078 | } while(non_empty_ranges); |
| 1079 | r_first_min = first_min; |
| 1080 | r_first1 = first1; |
| 1081 | r_first2 = first2; |
| 1082 | } |
| 1083 | return d_first; |
| 1084 | } |
| 1085 | |
| 1086 | template<class RandIt, class InputIt2, class OutputIt, class Compare, class Op> |
| 1087 | RandIt op_partial_merge_and_swap |
| 1088 | (RandIt &r_first1, RandIt const last1, RandIt &r_first2, RandIt const last2, InputIt2 &r_first_min, OutputIt d_first, Compare comp, Op op, bool is_stable) |
| 1089 | { |
| 1090 | return is_stable ? op_partial_merge_and_swap_impl(r_first1, last1, r_first2, last2, r_first_min, d_first, comp, op) |
| 1091 | : op_partial_merge_and_swap_impl(r_first1, last1, r_first2, last2, r_first_min, d_first, antistable<Compare>(comp), op); |
| 1092 | } |
| 1093 | |
| 1094 | template<class RandIt, class RandItBuf, class Compare, class Op> |
| 1095 | RandIt op_partial_merge_and_save_impl |
| 1096 | ( RandIt first1, RandIt const last1, RandIt &rfirst2, RandIt last2, RandIt first_min |
| 1097 | , RandItBuf &buf_first1_in_out, RandItBuf &buf_last1_in_out |
| 1098 | , Compare comp, Op op |
| 1099 | ) |
| 1100 | { |
| 1101 | RandItBuf buf_first1 = buf_first1_in_out; |
| 1102 | RandItBuf buf_last1 = buf_last1_in_out; |
| 1103 | RandIt first2(rfirst2); |
| 1104 | |
| 1105 | bool const do_swap = first2 != first_min; |
| 1106 | if(buf_first1 == buf_last1){ |
| 1107 | //Skip any element that does not need to be moved |
| 1108 | RandIt new_first1 = skip_until_merge(first1, last1, *first_min, comp); |
| 1109 | buf_first1 += (new_first1-first1); |
| 1110 | first1 = new_first1; |
| 1111 | buf_last1 = do_swap ? op_buffered_partial_merge_and_swap_to_range1_and_buffer(first1, last1, first2, last2, first_min, buf_first1, comp, op) |
| 1112 | : op_buffered_partial_merge_to_range1_and_buffer (first1, last1, first2, last2, buf_first1, comp, op); |
| 1113 | first1 = last1; |
| 1114 | } |
| 1115 | else{ |
| 1116 | BOOST_ASSERT((last1-first1) == (buf_last1 - buf_first1)); |
| 1117 | } |
| 1118 | |
| 1119 | //Now merge from buffer |
| 1120 | first1 = do_swap ? op_partial_merge_and_swap_impl(buf_first1, buf_last1, first2, last2, first_min, first1, comp, op) |
| 1121 | : op_partial_merge_impl (buf_first1, buf_last1, first2, last2, first1, comp, op); |
| 1122 | buf_first1_in_out = buf_first1; |
| 1123 | buf_last1_in_out = buf_last1; |
| 1124 | rfirst2 = first2; |
| 1125 | return first1; |
| 1126 | } |
| 1127 | |
| 1128 | template<class RandIt, class RandItBuf, class Compare, class Op> |
| 1129 | RandIt op_partial_merge_and_save |
| 1130 | ( RandIt first1, RandIt const last1, RandIt &rfirst2, RandIt last2, RandIt first_min |
| 1131 | , RandItBuf &buf_first1_in_out |
| 1132 | , RandItBuf &buf_last1_in_out |
| 1133 | , Compare comp |
| 1134 | , Op op |
| 1135 | , bool is_stable) |
| 1136 | { |
| 1137 | return is_stable |
| 1138 | ? op_partial_merge_and_save_impl |
| 1139 | (first1, last1, rfirst2, last2, first_min, buf_first1_in_out, buf_last1_in_out, comp, op) |
| 1140 | : op_partial_merge_and_save_impl |
| 1141 | (first1, last1, rfirst2, last2, first_min, buf_first1_in_out, buf_last1_in_out, antistable<Compare>(comp), op) |
| 1142 | ; |
| 1143 | } |
| 1144 | |
| 1145 | |
| 1146 | |
| 1147 | template<class RandItKeys, class KeyCompare, class RandIt, class RandIt2, class OutputIt, class Compare, class Op> |
| 1148 | OutputIt op_merge_blocks_with_irreg |
| 1149 | ( RandItKeys key_first |
| 1150 | , RandItKeys key_mid |
| 1151 | , KeyCompare key_comp |
| 1152 | , RandIt first_reg |
| 1153 | , RandIt2 &first_irr |
| 1154 | , RandIt2 const last_irr |
| 1155 | , OutputIt dest |
| 1156 | , typename iterator_traits<RandIt>::size_type const l_block |
| 1157 | , typename iterator_traits<RandIt>::size_type n_block_left |
| 1158 | , typename iterator_traits<RandIt>::size_type min_check |
| 1159 | , typename iterator_traits<RandIt>::size_type max_check |
| 1160 | , Compare comp, bool const is_stable, Op op) |
| 1161 | { |
| 1162 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 1163 | |
| 1164 | for(; n_block_left; --n_block_left, ++key_first, min_check -= min_check != 0, max_check -= max_check != 0){ |
| 1165 | size_type next_key_idx = find_next_block(key_first, key_comp, first_reg, l_block, min_check, max_check, comp); |
| 1166 | max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left); |
| 1167 | RandIt const last_reg = first_reg + l_block; |
| 1168 | RandIt first_min = first_reg + next_key_idx*l_block; |
| 1169 | RandIt const last_min = first_min + l_block; (void)last_min; |
| 1170 | |
| 1171 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_reg, last_reg, comp)); |
| 1172 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!next_key_idx || is_sorted(first_min, last_min, comp)); |
| 1173 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((!next_key_idx || !comp(*first_reg, *first_min ))); |
| 1174 | |
| 1175 | OutputIt orig_dest = dest; (void)orig_dest; |
| 1176 | dest = next_key_idx ? op_partial_merge_and_swap(first_irr, last_irr, first_reg, last_reg, first_min, dest, comp, op, is_stable) |
| 1177 | : op_partial_merge (first_irr, last_irr, first_reg, last_reg, dest, comp, op, is_stable); |
| 1178 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(orig_dest, dest, comp)); |
| 1179 | |
| 1180 | if(first_reg == dest){ |
| 1181 | dest = next_key_idx ? ::boost::adl_move_swap_ranges(first_min, last_min, first_reg) |
| 1182 | : last_reg; |
| 1183 | } |
| 1184 | else{ |
| 1185 | dest = next_key_idx ? op(three_way_forward_t(), first_reg, last_reg, first_min, dest) |
| 1186 | : op(forward_t(), first_reg, last_reg, dest); |
| 1187 | } |
| 1188 | |
| 1189 | RandItKeys const key_next(key_first + next_key_idx); |
| 1190 | swap_and_update_key(next_key_idx != 0, key_next, key_first, key_mid, last_reg, last_reg, first_min); |
| 1191 | |
| 1192 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(orig_dest, dest, comp)); |
| 1193 | first_reg = last_reg; |
| 1194 | } |
| 1195 | return dest; |
| 1196 | } |
| 1197 | |
| 1198 | template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Op> |
| 1199 | void op_merge_blocks_left |
| 1200 | ( RandItKeys const key_first |
| 1201 | , KeyCompare key_comp |
| 1202 | , RandIt const first |
| 1203 | , typename iterator_traits<RandIt>::size_type const l_block |
| 1204 | , typename iterator_traits<RandIt>::size_type const l_irreg1 |
| 1205 | , typename iterator_traits<RandIt>::size_type const n_block_a |
| 1206 | , typename iterator_traits<RandIt>::size_type const n_block_b |
| 1207 | , typename iterator_traits<RandIt>::size_type const l_irreg2 |
| 1208 | , Compare comp, Op op) |
| 1209 | { |
| 1210 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 1211 | size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count; |
| 1212 | // BOOST_ASSERT(n_block_a || n_block_b); |
| 1213 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp)); |
| 1214 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a])); |
| 1215 | |
| 1216 | size_type n_block_b_left = n_block_b; |
| 1217 | size_type n_block_a_left = n_block_a; |
| 1218 | size_type n_block_left = n_block_b + n_block_a; |
| 1219 | RandItKeys key_mid(key_first + n_block_a); |
| 1220 | |
| 1221 | RandIt buffer = first - l_block; |
| 1222 | RandIt first1 = first; |
| 1223 | RandIt last1 = first1 + l_irreg1; |
| 1224 | RandIt first2 = last1; |
| 1225 | RandIt const irreg2 = first2 + n_block_left*l_block; |
| 1226 | bool is_range1_A = true; |
| 1227 | |
| 1228 | RandItKeys key_range2(key_first); |
| 1229 | |
| 1230 | //////////////////////////////////////////////////////////////////////////// |
| 1231 | //Process all regular blocks before the irregular B block |
| 1232 | //////////////////////////////////////////////////////////////////////////// |
| 1233 | size_type min_check = n_block_a == n_block_left ? 0u : n_block_a; |
| 1234 | size_type max_check = min_value(min_check+1, n_block_left); |
| 1235 | for (; n_block_left; --n_block_left, ++key_range2, min_check -= min_check != 0, max_check -= max_check != 0) { |
| 1236 | size_type const next_key_idx = find_next_block(key_range2, key_comp, first2, l_block, min_check, max_check, comp); |
| 1237 | max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left); |
| 1238 | RandIt const first_min = first2 + next_key_idx*l_block; |
| 1239 | RandIt const last_min = first_min + l_block; (void)last_min; |
| 1240 | RandIt const last2 = first2 + l_block; |
| 1241 | |
| 1242 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first1, last1, comp)); |
| 1243 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first2, last2, comp)); |
| 1244 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_left || is_sorted(first_min, last_min, comp)); |
| 1245 | |
| 1246 | //Check if irregular b block should go here. |
| 1247 | //If so, break to the special code handling the irregular block |
| 1248 | if (!n_block_b_left && |
| 1249 | ( (l_irreg2 && comp(*irreg2, *first_min)) || (!l_irreg2 && is_range1_A)) ){ |
| 1250 | break; |
| 1251 | } |
| 1252 | |
| 1253 | RandItKeys const key_next(key_range2 + next_key_idx); |
| 1254 | bool const is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_next, *key_mid); |
| 1255 | |
| 1256 | bool const is_buffer_middle = last1 == buffer; |
| 1257 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT( ( is_buffer_middle && size_type(first2-buffer) == l_block && buffer == last1) || |
| 1258 | (!is_buffer_middle && size_type(first1-buffer) == l_block && first2 == last1)); |
| 1259 | |
| 1260 | if(is_range1_A == is_range2_A){ |
| 1261 | BOOST_ASSERT((first1 == last1) || !comp(*first_min, last1[-1])); |
| 1262 | if(!is_buffer_middle){ |
| 1263 | buffer = op(forward_t(), first1, last1, buffer); |
| 1264 | } |
| 1265 | swap_and_update_key(next_key_idx != 0, key_next, key_range2, key_mid, first2, last2, first_min); |
| 1266 | first1 = first2; |
| 1267 | last1 = last2; |
| 1268 | } |
| 1269 | else { |
| 1270 | RandIt unmerged; |
| 1271 | RandIt buf_beg; |
| 1272 | RandIt buf_end; |
| 1273 | if(is_buffer_middle){ |
| 1274 | buf_end = buf_beg = first2 - (last1-first1); |
| 1275 | unmerged = op_partial_merge_and_save( first1, last1, first2, last2, first_min |
| 1276 | , buf_beg, buf_end, comp, op, is_range1_A); |
| 1277 | } |
| 1278 | else{ |
| 1279 | buf_beg = first1; |
| 1280 | buf_end = last1; |
| 1281 | unmerged = op_partial_merge_and_save |
| 1282 | (buffer, buffer+(last1-first1), first2, last2, first_min, buf_beg, buf_end, comp, op, is_range1_A); |
| 1283 | } |
| 1284 | (void)unmerged; |
| 1285 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first-l_block, unmerged, comp)); |
| 1286 | |
| 1287 | swap_and_update_key( next_key_idx != 0, key_next, key_range2, key_mid, first2, last2 |
| 1288 | , last_min - size_type(last2 - first2)); |
| 1289 | |
| 1290 | if(buf_beg != buf_end){ //range2 exhausted: is_buffer_middle for the next iteration |
| 1291 | first1 = buf_beg; |
| 1292 | last1 = buf_end; |
| 1293 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(buf_end == (last2-l_block)); |
| 1294 | buffer = last1; |
| 1295 | } |
| 1296 | else{ //range1 exhausted: !is_buffer_middle for the next iteration |
| 1297 | first1 = first2; |
| 1298 | last1 = last2; |
| 1299 | buffer = first2 - l_block; |
| 1300 | is_range1_A = is_range2_A; |
| 1301 | } |
| 1302 | } |
| 1303 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT( (is_range2_A && n_block_a_left) || (!is_range2_A && n_block_b_left)); |
| 1304 | is_range2_A ? --n_block_a_left : --n_block_b_left; |
| 1305 | first2 = last2; |
| 1306 | } |
| 1307 | |
| 1308 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_range2 + n_block_left, key_comp, *key_mid)); |
| 1309 | BOOST_ASSERT(!n_block_b_left); |
| 1310 | |
| 1311 | //////////////////////////////////////////////////////////////////////////// |
| 1312 | //Process remaining range 1 left before the irregular B block |
| 1313 | //////////////////////////////////////////////////////////////////////////// |
| 1314 | bool const is_buffer_middle = last1 == buffer; |
| 1315 | RandIt first_irr2 = irreg2; |
| 1316 | RandIt const last_irr2 = first_irr2 + l_irreg2; |
| 1317 | if(l_irreg2 && is_range1_A){ |
| 1318 | if(is_buffer_middle){ |
| 1319 | first1 = skip_until_merge(first1, last1, *first_irr2, comp); |
| 1320 | //Even if we copy backward, no overlapping occurs so use forward copy |
| 1321 | //that can be faster specially with trivial types |
| 1322 | RandIt const new_first1 = first2 - (last1 - first1); |
| 1323 | op(forward_t(), first1, last1, new_first1); |
| 1324 | first1 = new_first1; |
| 1325 | last1 = first2; |
| 1326 | buffer = first1 - l_block; |
| 1327 | } |
| 1328 | buffer = op_partial_merge_impl(first1, last1, first_irr2, last_irr2, buffer, comp, op); |
| 1329 | buffer = op(forward_t(), first1, last1, buffer); |
| 1330 | } |
| 1331 | else if(!is_buffer_middle){ |
| 1332 | buffer = op(forward_t(), first1, last1, buffer); |
| 1333 | } |
| 1334 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first-l_block, buffer, comp)); |
| 1335 | |
| 1336 | //////////////////////////////////////////////////////////////////////////// |
| 1337 | //Process irregular B block and remaining A blocks |
| 1338 | //////////////////////////////////////////////////////////////////////////// |
| 1339 | buffer = op_merge_blocks_with_irreg |
| 1340 | ( key_range2, key_mid, key_comp, first2, first_irr2, last_irr2 |
| 1341 | , buffer, l_block, n_block_left, min_check, max_check, comp, false, op); |
| 1342 | buffer = op(forward_t(), first_irr2, last_irr2, buffer);(void)buffer; |
| 1343 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first-l_block, buffer, comp)); |
| 1344 | } |
| 1345 | |
| 1346 | // first - first element to merge. |
| 1347 | // first[-l_block, 0) - buffer (if use_buf == true) |
| 1348 | // l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded |
| 1349 | // keys - sequence of keys, in same order as blocks. key<midkey means stream A |
| 1350 | // n_bef_irreg2/n_aft_irreg2 are regular blocks |
| 1351 | // l_irreg2 is a irregular block, that is to be combined after n_bef_irreg2 blocks and before n_aft_irreg2 blocks |
| 1352 | // If l_irreg2==0 then n_aft_irreg2==0 (no irregular blocks). |
| 1353 | template<class RandItKeys, class KeyCompare, class RandIt, class Compare> |
| 1354 | void merge_blocks_left |
| 1355 | ( RandItKeys const key_first |
| 1356 | , KeyCompare key_comp |
| 1357 | , RandIt const first |
| 1358 | , typename iterator_traits<RandIt>::size_type const l_block |
| 1359 | , typename iterator_traits<RandIt>::size_type const l_irreg1 |
| 1360 | , typename iterator_traits<RandIt>::size_type const n_block_a |
| 1361 | , typename iterator_traits<RandIt>::size_type const n_block_b |
| 1362 | , typename iterator_traits<RandIt>::size_type const l_irreg2 |
| 1363 | , Compare comp |
| 1364 | , bool const xbuf_used) |
| 1365 | { |
| 1366 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + needed_keys_count(n_block_a, n_block_b), key_comp, key_first[n_block_a])); |
| 1367 | if(xbuf_used){ |
| 1368 | op_merge_blocks_left |
| 1369 | (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op()); |
| 1370 | } |
| 1371 | else{ |
| 1372 | op_merge_blocks_left |
| 1373 | (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op()); |
| 1374 | } |
| 1375 | } |
| 1376 | |
| 1377 | |
| 1378 | // first - first element to merge. |
| 1379 | // [first+l_block*(n_bef_irreg2+n_aft_irreg2)+l_irreg2, first+l_block*(n_bef_irreg2+n_aft_irreg2+1)+l_irreg2) - buffer |
| 1380 | // l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded |
| 1381 | // keys - sequence of keys, in same order as blocks. key<midkey means stream A |
| 1382 | // n_bef_irreg2/n_aft_irreg2 are regular blocks |
| 1383 | // l_irreg2 is a irregular block, that is to be combined after n_bef_irreg2 blocks and before n_aft_irreg2 blocks |
| 1384 | // If l_irreg2==0 then n_aft_irreg2==0 (no irregular blocks). |
| 1385 | template<class RandItKeys, class KeyCompare, class RandIt, class Compare> |
| 1386 | void merge_blocks_right |
| 1387 | ( RandItKeys const key_first |
| 1388 | , KeyCompare key_comp |
| 1389 | , RandIt const first |
| 1390 | , typename iterator_traits<RandIt>::size_type const l_block |
| 1391 | , typename iterator_traits<RandIt>::size_type const n_block_a |
| 1392 | , typename iterator_traits<RandIt>::size_type const n_block_b |
| 1393 | , typename iterator_traits<RandIt>::size_type const l_irreg2 |
| 1394 | , Compare comp |
| 1395 | , bool const xbuf_used) |
| 1396 | { |
| 1397 | merge_blocks_left |
| 1398 | ( make_reverse_iterator(key_first + needed_keys_count(n_block_a, n_block_b)) |
| 1399 | , inverse<KeyCompare>(key_comp) |
| 1400 | , make_reverse_iterator(first + ((n_block_a+n_block_b)*l_block+l_irreg2)) |
| 1401 | , l_block |
| 1402 | , l_irreg2 |
| 1403 | , n_block_b |
| 1404 | , n_block_a |
| 1405 | , 0 |
| 1406 | , inverse<Compare>(comp), xbuf_used); |
| 1407 | } |
| 1408 | |
| 1409 | template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Op, class RandItBuf> |
| 1410 | void op_merge_blocks_with_buf |
| 1411 | ( RandItKeys key_first |
| 1412 | , KeyCompare key_comp |
| 1413 | , RandIt const first |
| 1414 | , typename iterator_traits<RandIt>::size_type const l_block |
| 1415 | , typename iterator_traits<RandIt>::size_type const l_irreg1 |
| 1416 | , typename iterator_traits<RandIt>::size_type const n_block_a |
| 1417 | , typename iterator_traits<RandIt>::size_type const n_block_b |
| 1418 | , typename iterator_traits<RandIt>::size_type const l_irreg2 |
| 1419 | , Compare comp |
| 1420 | , Op op |
| 1421 | , RandItBuf const buf_first) |
| 1422 | { |
| 1423 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 1424 | size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count; |
| 1425 | //BOOST_ASSERT(n_block_a || n_block_b); |
| 1426 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp)); |
| 1427 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a])); |
| 1428 | |
| 1429 | size_type n_block_b_left = n_block_b; |
| 1430 | size_type n_block_a_left = n_block_a; |
| 1431 | size_type n_block_left = n_block_b + n_block_a; |
| 1432 | RandItKeys key_mid(key_first + n_block_a); |
| 1433 | |
| 1434 | RandItBuf buffer = buf_first; |
| 1435 | RandItBuf buffer_end = buffer; |
| 1436 | RandIt first1 = first; |
| 1437 | RandIt last1 = first1 + l_irreg1; |
| 1438 | RandIt first2 = last1; |
| 1439 | RandIt const first_irr2 = first2 + n_block_left*l_block; |
| 1440 | bool is_range1_A = true; |
| 1441 | |
| 1442 | RandItKeys key_range2(key_first); |
| 1443 | |
| 1444 | //////////////////////////////////////////////////////////////////////////// |
| 1445 | //Process all regular blocks before the irregular B block |
| 1446 | //////////////////////////////////////////////////////////////////////////// |
| 1447 | size_type min_check = n_block_a == n_block_left ? 0u : n_block_a; |
| 1448 | size_type max_check = min_value(min_check+1, n_block_left); |
| 1449 | for (; n_block_left; --n_block_left, ++key_range2, min_check -= min_check != 0, max_check -= max_check != 0) { |
| 1450 | size_type const next_key_idx = find_next_block(key_range2, key_comp, first2, l_block, min_check, max_check, comp); |
| 1451 | max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left); |
| 1452 | RandIt first_min = first2 + next_key_idx*l_block; |
| 1453 | RandIt const last_min = first_min + l_block; (void)last_min; |
| 1454 | RandIt const last2 = first2 + l_block; |
| 1455 | |
| 1456 | bool const buffer_empty = buffer == buffer_end; (void)buffer_empty; |
| 1457 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(buffer_empty ? is_sorted(first1, last1, comp) : is_sorted(buffer, buffer_end, comp)); |
| 1458 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first2, last2, comp)); |
| 1459 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_left || is_sorted(first_min, last_min, comp)); |
| 1460 | |
| 1461 | //Check if irregular b block should go here. |
| 1462 | //If so, break to the special code handling the irregular block |
| 1463 | if (!n_block_b_left && |
| 1464 | ( (l_irreg2 && comp(*first_irr2, *first_min)) || (!l_irreg2 && is_range1_A)) ){ |
| 1465 | break; |
| 1466 | } |
| 1467 | |
| 1468 | RandItKeys const key_next(key_range2 + next_key_idx); |
| 1469 | bool const is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_next, *key_mid); |
| 1470 | |
| 1471 | if(is_range1_A == is_range2_A){ |
| 1472 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((first1 == last1) || (buffer_empty ? !comp(*first_min, last1[-1]) : !comp(*first_min, buffer_end[-1]))); |
| 1473 | //If buffered, put those elements in place |
| 1474 | RandIt res = op(forward_t(), buffer, buffer_end, first1); |
| 1475 | buffer = buffer_end = buf_first; |
| 1476 | BOOST_ASSERT(buffer_empty || res == last1); (void)res; |
| 1477 | swap_and_update_key(next_key_idx != 0, key_next, key_range2, key_mid, first2, last2, first_min); |
| 1478 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first2, last2, comp)); |
| 1479 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_min, last_min, comp)); |
| 1480 | first1 = first2; |
| 1481 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, first1, comp)); |
| 1482 | } |
| 1483 | else { |
| 1484 | RandIt const unmerged = op_partial_merge_and_save(first1, last1, first2, last2, first_min, buffer, buffer_end, comp, op, is_range1_A); |
| 1485 | bool const is_range_1_empty = buffer == buffer_end; |
| 1486 | BOOST_ASSERT(is_range_1_empty || (buffer_end-buffer) == (last1+l_block-unmerged)); |
| 1487 | if(is_range_1_empty){ |
| 1488 | buffer = buffer_end = buf_first; |
| 1489 | first_min = last_min - (last2 - first2); |
| 1490 | } |
| 1491 | else{ |
| 1492 | first_min = last_min; |
| 1493 | } |
| 1494 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!is_range_1_empty || (last_min-first_min) == (last2-unmerged)); |
| 1495 | swap_and_update_key(next_key_idx != 0, key_next, key_range2, key_mid, first2, last2, first_min); |
| 1496 | |
| 1497 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_min, last_min, comp)); |
| 1498 | is_range1_A ^= is_range_1_empty; |
| 1499 | first1 = unmerged; |
| 1500 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, unmerged, comp)); |
| 1501 | } |
| 1502 | BOOST_ASSERT( (is_range2_A && n_block_a_left) || (!is_range2_A && n_block_b_left)); |
| 1503 | is_range2_A ? --n_block_a_left : --n_block_b_left; |
| 1504 | last1 += l_block; |
| 1505 | first2 = last2; |
| 1506 | } |
| 1507 | |
| 1508 | RandIt res = op(forward_t(), buffer, buffer_end, first1); (void)res; |
| 1509 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, res, comp)); |
| 1510 | |
| 1511 | //////////////////////////////////////////////////////////////////////////// |
| 1512 | //Process irregular B block and remaining A blocks |
| 1513 | //////////////////////////////////////////////////////////////////////////// |
| 1514 | RandIt const last_irr2 = first_irr2 + l_irreg2; |
| 1515 | op(forward_t(), first_irr2, first_irr2+l_irreg2, buf_first); |
| 1516 | buffer = buf_first; |
| 1517 | buffer_end = buffer+l_irreg2; |
| 1518 | |
| 1519 | reverse_iterator<RandItBuf> rbuf_beg(buffer_end); |
| 1520 | RandIt dest = op_merge_blocks_with_irreg |
| 1521 | ( make_reverse_iterator(key_first + n_block_b + n_block_a), make_reverse_iterator(key_mid), inverse<KeyCompare>(key_comp) |
| 1522 | , make_reverse_iterator(first_irr2), rbuf_beg |
| 1523 | , make_reverse_iterator(buffer), make_reverse_iterator(last_irr2) |
| 1524 | , l_block, n_block_left, 0, n_block_left |
| 1525 | , inverse<Compare>(comp), true, op).base(); |
| 1526 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(dest, last_irr2, comp)); |
| 1527 | |
| 1528 | buffer_end = rbuf_beg.base(); |
| 1529 | BOOST_ASSERT((dest-last1) == (buffer_end-buffer)); |
| 1530 | op_merge_with_left_placed(is_range1_A ? first1 : last1, last1, dest, buffer, buffer_end, comp, op); |
| 1531 | |
| 1532 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, last_irr2, comp)); |
| 1533 | } |
| 1534 | |
| 1535 | template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class RandItBuf> |
| 1536 | void merge_blocks_with_buf |
| 1537 | ( RandItKeys key_first |
| 1538 | , KeyCompare key_comp |
| 1539 | , RandIt const first |
| 1540 | , typename iterator_traits<RandIt>::size_type const l_block |
| 1541 | , typename iterator_traits<RandIt>::size_type const l_irreg1 |
| 1542 | , typename iterator_traits<RandIt>::size_type const n_block_a |
| 1543 | , typename iterator_traits<RandIt>::size_type const n_block_b |
| 1544 | , typename iterator_traits<RandIt>::size_type const l_irreg2 |
| 1545 | , Compare comp |
| 1546 | , RandItBuf const buf_first |
| 1547 | , bool const xbuf_used) |
| 1548 | { |
| 1549 | if(xbuf_used){ |
| 1550 | op_merge_blocks_with_buf |
| 1551 | (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), buf_first); |
| 1552 | } |
| 1553 | else{ |
| 1554 | op_merge_blocks_with_buf |
| 1555 | (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), buf_first); |
| 1556 | } |
| 1557 | } |
| 1558 | |
| 1559 | template<class RandIt, class Compare, class Op> |
| 1560 | typename iterator_traits<RandIt>::size_type |
| 1561 | op_insertion_sort_step_left |
| 1562 | ( RandIt const first |
| 1563 | , typename iterator_traits<RandIt>::size_type const length |
| 1564 | , typename iterator_traits<RandIt>::size_type const step |
| 1565 | , Compare comp, Op op) |
| 1566 | { |
| 1567 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 1568 | size_type const s = min_value<size_type>(step, AdaptiveSortInsertionSortThreshold); |
| 1569 | size_type m = 0; |
| 1570 | |
| 1571 | while((length - m) > s){ |
| 1572 | insertion_sort_op(first+m, first+m+s, first+m-s, comp, op); |
| 1573 | m += s; |
| 1574 | } |
| 1575 | insertion_sort_op(first+m, first+length, first+m-s, comp, op); |
| 1576 | return s; |
| 1577 | } |
| 1578 | |
| 1579 | template<class RandIt, class Compare> |
| 1580 | typename iterator_traits<RandIt>::size_type |
| 1581 | insertion_sort_step |
| 1582 | ( RandIt const first |
| 1583 | , typename iterator_traits<RandIt>::size_type const length |
| 1584 | , typename iterator_traits<RandIt>::size_type const step |
| 1585 | , Compare comp) |
| 1586 | { |
| 1587 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 1588 | size_type const s = min_value<size_type>(step, AdaptiveSortInsertionSortThreshold); |
| 1589 | size_type m = 0; |
| 1590 | |
| 1591 | while((length - m) > s){ |
| 1592 | insertion_sort(first+m, first+m+s, comp); |
| 1593 | m += s; |
| 1594 | } |
| 1595 | insertion_sort(first+m, first+length, comp); |
| 1596 | return s; |
| 1597 | } |
| 1598 | |
| 1599 | template<class RandIt, class Compare, class Op> |
| 1600 | typename iterator_traits<RandIt>::size_type |
| 1601 | op_merge_left_step_multiple |
| 1602 | ( RandIt first_block |
| 1603 | , typename iterator_traits<RandIt>::size_type const elements_in_blocks |
| 1604 | , typename iterator_traits<RandIt>::size_type l_merged |
| 1605 | , typename iterator_traits<RandIt>::size_type const l_build_buf |
| 1606 | , typename iterator_traits<RandIt>::size_type l_left_space |
| 1607 | , Compare comp |
| 1608 | , Op op) |
| 1609 | { |
| 1610 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 1611 | for(; l_merged < l_build_buf && l_left_space >= l_merged; l_merged*=2){ |
| 1612 | size_type p0=0; |
| 1613 | RandIt pos = first_block; |
| 1614 | while((elements_in_blocks - p0) > 2*l_merged) { |
| 1615 | op_merge_left(pos-l_merged, pos, pos+l_merged, pos+2*l_merged, comp, op); |
| 1616 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(pos-l_merged, pos+l_merged, comp)); |
| 1617 | p0 += 2*l_merged; |
| 1618 | pos = first_block+p0; |
| 1619 | } |
| 1620 | if((elements_in_blocks-p0) > l_merged) { |
| 1621 | op_merge_left(pos-l_merged, pos, pos+l_merged, first_block+elements_in_blocks, comp, op); |
| 1622 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(pos-l_merged, pos-l_merged+(first_block+elements_in_blocks-pos), comp)); |
| 1623 | } |
| 1624 | else { |
| 1625 | op(forward_t(), pos, first_block+elements_in_blocks, pos-l_merged); |
| 1626 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(pos-l_merged, first_block+elements_in_blocks-l_merged, comp)); |
| 1627 | } |
| 1628 | first_block -= l_merged; |
| 1629 | l_left_space -= l_merged; |
| 1630 | } |
| 1631 | return l_merged; |
| 1632 | } |
| 1633 | |
| 1634 | template<class RandIt, class Compare, class Op> |
| 1635 | void op_merge_right_step_once |
| 1636 | ( RandIt first_block |
| 1637 | , typename iterator_traits<RandIt>::size_type const elements_in_blocks |
| 1638 | , typename iterator_traits<RandIt>::size_type const l_build_buf |
| 1639 | , Compare comp |
| 1640 | , Op op) |
| 1641 | { |
| 1642 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 1643 | size_type restk = elements_in_blocks%(2*l_build_buf); |
| 1644 | size_type p = elements_in_blocks - restk; |
| 1645 | BOOST_ASSERT(0 == (p%(2*l_build_buf))); |
| 1646 | |
| 1647 | if(restk <= l_build_buf){ |
| 1648 | op(backward_t(),first_block+p, first_block+p+restk, first_block+p+restk+l_build_buf); |
| 1649 | } |
| 1650 | else{ |
| 1651 | op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+restk, first_block+p+restk+l_build_buf, comp, op); |
| 1652 | } |
| 1653 | while(p>0){ |
| 1654 | p -= 2*l_build_buf; |
| 1655 | op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+2*l_build_buf, first_block+p+3*l_build_buf, comp, op); |
| 1656 | } |
| 1657 | } |
| 1658 | |
| 1659 | |
| 1660 | // build blocks of length 2*l_build_buf. l_build_buf is power of two |
| 1661 | // input: [0, l_build_buf) elements are buffer, rest unsorted elements |
| 1662 | // output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted |
| 1663 | // |
| 1664 | // First elements are merged from right to left until elements start |
| 1665 | // at first. All old elements [first, first + l_build_buf) are placed at the end |
| 1666 | // [first+len-l_build_buf, first+len). To achieve this: |
| 1667 | // - If we have external memory to merge, we save elements from the buffer |
| 1668 | // so that a non-swapping merge is used. Buffer elements are restored |
| 1669 | // at the end of the buffer from the external memory. |
| 1670 | // |
| 1671 | // - When the external memory is not available or it is insufficient |
| 1672 | // for a merge operation, left swap merging is used. |
| 1673 | // |
| 1674 | // Once elements are merged left to right in blocks of l_build_buf, then a single left |
| 1675 | // to right merge step is performed to achieve merged blocks of size 2K. |
| 1676 | // If external memory is available, usual merge is used, swap merging otherwise. |
| 1677 | // |
| 1678 | // As a last step, if auxiliary memory is available in-place merge is performed. |
| 1679 | // until all is merged or auxiliary memory is not large enough. |
| 1680 | template<class RandIt, class Compare, class XBuf> |
| 1681 | typename iterator_traits<RandIt>::size_type |
| 1682 | adaptive_sort_build_blocks |
| 1683 | ( RandIt const first |
| 1684 | , typename iterator_traits<RandIt>::size_type const len |
| 1685 | , typename iterator_traits<RandIt>::size_type const l_base |
| 1686 | , typename iterator_traits<RandIt>::size_type const l_build_buf |
| 1687 | , XBuf & xbuf |
| 1688 | , Compare comp) |
| 1689 | { |
| 1690 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 1691 | BOOST_ASSERT(l_build_buf <= len); |
| 1692 | BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1))); |
| 1693 | |
| 1694 | //Place the start pointer after the buffer |
| 1695 | RandIt first_block = first + l_build_buf; |
| 1696 | size_type const elements_in_blocks = len - l_build_buf; |
| 1697 | |
| 1698 | ////////////////////////////////// |
| 1699 | // Start of merge to left step |
| 1700 | ////////////////////////////////// |
| 1701 | size_type l_merged = 0u; |
| 1702 | |
| 1703 | BOOST_ASSERT(l_build_buf); |
| 1704 | //If there is no enough buffer for the insertion sort step, just avoid the external buffer |
| 1705 | size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity())); |
| 1706 | kbuf = kbuf < l_base ? 0 : kbuf; |
| 1707 | |
| 1708 | if(kbuf){ |
| 1709 | //Backup internal buffer values in external buffer so they can be overwritten |
| 1710 | xbuf.move_assign(first+l_build_buf-kbuf, kbuf); |
| 1711 | l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op()); |
| 1712 | |
| 1713 | //Now combine them using the buffer. Elements from buffer can be |
| 1714 | //overwritten since they've been saved to xbuf |
| 1715 | l_merged = op_merge_left_step_multiple |
| 1716 | ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op()); |
| 1717 | |
| 1718 | //Restore internal buffer from external buffer unless kbuf was l_build_buf, |
| 1719 | //in that case restoration will happen later |
| 1720 | if(kbuf != l_build_buf){ |
| 1721 | boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks); |
| 1722 | } |
| 1723 | } |
| 1724 | else{ |
| 1725 | l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp); |
| 1726 | rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks); |
| 1727 | } |
| 1728 | |
| 1729 | //Now combine elements using the buffer. Elements from buffer can't be |
| 1730 | //overwritten since xbuf was not big enough, so merge swapping elements. |
| 1731 | l_merged = op_merge_left_step_multiple |
| 1732 | (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op()); |
| 1733 | |
| 1734 | BOOST_ASSERT(l_merged == l_build_buf); |
| 1735 | |
| 1736 | ////////////////////////////////// |
| 1737 | // Start of merge to right step |
| 1738 | ////////////////////////////////// |
| 1739 | |
| 1740 | //If kbuf is l_build_buf then we can merge right without swapping |
| 1741 | //Saved data is still in xbuf |
| 1742 | if(kbuf && kbuf == l_build_buf){ |
| 1743 | op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op()); |
| 1744 | //Restore internal buffer from external buffer if kbuf was l_build_buf. |
| 1745 | //as this operation was previously delayed. |
| 1746 | boost::move(xbuf.data(), xbuf.data() + kbuf, first); |
| 1747 | } |
| 1748 | else{ |
| 1749 | op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op()); |
| 1750 | } |
| 1751 | xbuf.clear(); |
| 1752 | //2*l_build_buf or total already merged |
| 1753 | return min_value(elements_in_blocks, 2*l_build_buf); |
| 1754 | } |
| 1755 | |
| 1756 | template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf> |
| 1757 | void adaptive_sort_combine_blocks |
| 1758 | ( RandItKeys const keys |
| 1759 | , KeyCompare key_comp |
| 1760 | , RandIt const first |
| 1761 | , typename iterator_traits<RandIt>::size_type const len |
| 1762 | , typename iterator_traits<RandIt>::size_type const l_prev_merged |
| 1763 | , typename iterator_traits<RandIt>::size_type const l_block |
| 1764 | , bool const use_buf |
| 1765 | , bool const xbuf_used |
| 1766 | , XBuf & xbuf |
| 1767 | , Compare comp |
| 1768 | , bool merge_left) |
| 1769 | { |
| 1770 | (void)xbuf; |
| 1771 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 1772 | |
| 1773 | size_type const l_reg_combined = 2*l_prev_merged; |
| 1774 | size_type l_irreg_combined = 0; |
| 1775 | size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined); |
| 1776 | size_type const n_reg_combined = len/l_reg_combined; |
| 1777 | RandIt combined_first = first; |
| 1778 | |
| 1779 | (void)l_total_combined; |
| 1780 | BOOST_ASSERT(l_total_combined <= len); |
| 1781 | |
| 1782 | size_type const max_i = n_reg_combined + (l_irreg_combined != 0); |
| 1783 | |
| 1784 | if(merge_left || !use_buf) { |
| 1785 | for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_reg_combined) { |
| 1786 | //Now merge blocks |
| 1787 | bool const is_last = combined_i==n_reg_combined; |
| 1788 | size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; |
| 1789 | |
| 1790 | range_xbuf<RandIt, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first); |
| 1791 | size_type n_block_a, n_block_b, l_irreg1, l_irreg2; |
| 1792 | combine_params( keys, key_comp, l_cur_combined |
| 1793 | , l_prev_merged, l_block, rbuf |
| 1794 | , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs |
| 1795 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combpar: " , len + l_block); |
| 1796 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp)); |
| 1797 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp)); |
| 1798 | if(!use_buf){ |
| 1799 | merge_blocks_bufferless |
| 1800 | (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp); |
| 1801 | } |
| 1802 | else{ |
| 1803 | merge_blocks_left |
| 1804 | (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used); |
| 1805 | } |
| 1806 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After merge_blocks_l: " , len + l_block); |
| 1807 | } |
| 1808 | } |
| 1809 | else{ |
| 1810 | combined_first += l_reg_combined*(max_i-1); |
| 1811 | for( size_type combined_i = max_i; combined_i--; combined_first -= l_reg_combined) { |
| 1812 | bool const is_last = combined_i==n_reg_combined; |
| 1813 | size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; |
| 1814 | |
| 1815 | RandIt const combined_last(combined_first+l_cur_combined); |
| 1816 | range_xbuf<RandIt, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last); |
| 1817 | size_type n_block_a, n_block_b, l_irreg1, l_irreg2; |
| 1818 | combine_params( keys, key_comp, l_cur_combined |
| 1819 | , l_prev_merged, l_block, rbuf |
| 1820 | , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs |
| 1821 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combpar: " , len + l_block); |
| 1822 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp)); |
| 1823 | BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp)); |
| 1824 | merge_blocks_right |
| 1825 | (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used); |
| 1826 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After merge_blocks_r: " , len + l_block); |
| 1827 | } |
| 1828 | } |
| 1829 | } |
| 1830 | |
| 1831 | //Returns true if buffer is placed in |
| 1832 | //[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is |
| 1833 | //[buffer,buffer+l_intbuf) |
| 1834 | template<class RandIt, class Compare, class XBuf> |
| 1835 | bool adaptive_sort_combine_all_blocks |
| 1836 | ( RandIt keys |
| 1837 | , typename iterator_traits<RandIt>::size_type &n_keys |
| 1838 | , RandIt const buffer |
| 1839 | , typename iterator_traits<RandIt>::size_type const l_buf_plus_data |
| 1840 | , typename iterator_traits<RandIt>::size_type l_merged |
| 1841 | , typename iterator_traits<RandIt>::size_type &l_intbuf |
| 1842 | , XBuf & xbuf |
| 1843 | , Compare comp) |
| 1844 | { |
| 1845 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 1846 | RandIt const first = buffer + l_intbuf; |
| 1847 | size_type const l_data = l_buf_plus_data - l_intbuf; |
| 1848 | size_type const l_unique = l_intbuf+n_keys; |
| 1849 | //Backup data to external buffer once if possible |
| 1850 | bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity(); |
| 1851 | if(common_xbuf){ |
| 1852 | xbuf.move_assign(buffer, l_intbuf); |
| 1853 | } |
| 1854 | |
| 1855 | bool prev_merge_left = true; |
| 1856 | size_type l_prev_total_combined = l_merged, l_prev_block = 0; |
| 1857 | bool prev_use_internal_buf = true; |
| 1858 | |
| 1859 | for( size_type n = 0; l_data > l_merged |
| 1860 | ; l_merged*=2 |
| 1861 | , ++n){ |
| 1862 | //If l_intbuf is non-zero, use that internal buffer. |
| 1863 | // Implies l_block == l_intbuf && use_internal_buf == true |
| 1864 | //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer, |
| 1865 | // Implies l_block == n_keys/2 && use_internal_buf == true |
| 1866 | //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false) |
| 1867 | bool use_internal_buf = false; |
| 1868 | size_type const l_block = lblock_for_combine(l_intbuf, n_keys, 2*l_merged, use_internal_buf); |
| 1869 | BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf)); |
| 1870 | BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) ); |
| 1871 | BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) ); |
| 1872 | |
| 1873 | bool const is_merge_left = (n&1) == 0; |
| 1874 | size_type const l_total_combined = calculate_total_combined(l_data, l_merged); |
| 1875 | if(n && prev_use_internal_buf && prev_merge_left){ |
| 1876 | if(is_merge_left || !use_internal_buf){ |
| 1877 | move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf); |
| 1878 | } |
| 1879 | else{ |
| 1880 | //Put the buffer just after l_total_combined |
| 1881 | RandIt const buf_end = first+l_prev_total_combined; |
| 1882 | RandIt const buf_beg = buf_end-l_block; |
| 1883 | if(l_prev_total_combined > l_total_combined){ |
| 1884 | size_type const l_diff = l_prev_total_combined - l_total_combined; |
| 1885 | move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf); |
| 1886 | } |
| 1887 | else if(l_prev_total_combined < l_total_combined){ |
| 1888 | size_type const l_diff = l_total_combined - l_prev_total_combined; |
| 1889 | move_data_forward(buf_end, l_diff, buf_beg, common_xbuf); |
| 1890 | } |
| 1891 | } |
| 1892 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After move_data : " , l_data + l_intbuf); |
| 1893 | } |
| 1894 | |
| 1895 | //Combine to form l_merged*2 segments |
| 1896 | if(n_keys){ |
| 1897 | adaptive_sort_combine_blocks |
| 1898 | ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block |
| 1899 | , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); |
| 1900 | } |
| 1901 | else{ |
| 1902 | size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(); |
| 1903 | adaptive_sort_combine_blocks |
| 1904 | ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block |
| 1905 | , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); |
| 1906 | } |
| 1907 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After combine_blocks: " , l_data + l_intbuf); |
| 1908 | prev_merge_left = is_merge_left; |
| 1909 | l_prev_total_combined = l_total_combined; |
| 1910 | l_prev_block = l_block; |
| 1911 | prev_use_internal_buf = use_internal_buf; |
| 1912 | } |
| 1913 | BOOST_ASSERT(l_prev_total_combined == l_data); |
| 1914 | bool const buffer_right = prev_use_internal_buf && prev_merge_left; |
| 1915 | |
| 1916 | l_intbuf = prev_use_internal_buf ? l_prev_block : 0u; |
| 1917 | n_keys = l_unique - l_intbuf; |
| 1918 | //Restore data from to external common buffer if used |
| 1919 | if(common_xbuf){ |
| 1920 | if(buffer_right){ |
| 1921 | boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data); |
| 1922 | } |
| 1923 | else{ |
| 1924 | boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer); |
| 1925 | } |
| 1926 | } |
| 1927 | return buffer_right; |
| 1928 | } |
| 1929 | |
| 1930 | template<class RandIt, class Compare, class XBuf> |
| 1931 | void stable_merge |
| 1932 | ( RandIt first, RandIt const middle, RandIt last |
| 1933 | , Compare comp |
| 1934 | , XBuf &xbuf) |
| 1935 | { |
| 1936 | BOOST_ASSERT(xbuf.empty()); |
| 1937 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 1938 | size_type const len1 = size_type(middle-first); |
| 1939 | size_type const len2 = size_type(last-middle); |
| 1940 | size_type const l_min = min_value(len1, len2); |
| 1941 | if(xbuf.capacity() >= l_min){ |
| 1942 | buffered_merge(first, middle, last, comp, xbuf); |
| 1943 | xbuf.clear(); |
| 1944 | } |
| 1945 | else{ |
| 1946 | merge_bufferless(first, middle, last, comp); |
| 1947 | } |
| 1948 | } |
| 1949 | |
| 1950 | |
| 1951 | template<class RandIt, class Compare, class XBuf> |
| 1952 | void adaptive_sort_final_merge( bool buffer_right |
| 1953 | , RandIt const first |
| 1954 | , typename iterator_traits<RandIt>::size_type const l_intbuf |
| 1955 | , typename iterator_traits<RandIt>::size_type const n_keys |
| 1956 | , typename iterator_traits<RandIt>::size_type const len |
| 1957 | , XBuf & xbuf |
| 1958 | , Compare comp) |
| 1959 | { |
| 1960 | //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf); |
| 1961 | xbuf.clear(); |
| 1962 | |
| 1963 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 1964 | size_type const n_key_plus_buf = l_intbuf+n_keys; |
| 1965 | if(buffer_right){ |
| 1966 | stable_sort(first+len-l_intbuf, first+len, comp, xbuf); |
| 1967 | stable_merge(first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf); |
| 1968 | stable_sort(first, first+n_keys, comp, xbuf); |
| 1969 | stable_merge(first, first+n_keys, first+len, comp, xbuf); |
| 1970 | } |
| 1971 | else{ |
| 1972 | stable_sort(first, first+n_key_plus_buf, comp, xbuf); |
| 1973 | if(xbuf.capacity() >= n_key_plus_buf){ |
| 1974 | buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf); |
| 1975 | } |
| 1976 | else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){ |
| 1977 | stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf); |
| 1978 | stable_merge(first, first+n_keys, first+len, comp, xbuf); |
| 1979 | } |
| 1980 | else{ |
| 1981 | merge_bufferless(first, first+n_key_plus_buf, first+len, comp); |
| 1982 | } |
| 1983 | } |
| 1984 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After final_merge : " , len); |
| 1985 | } |
| 1986 | |
| 1987 | template<class RandIt, class Compare, class Unsigned, class XBuf> |
| 1988 | bool adaptive_sort_build_params |
| 1989 | (RandIt first, Unsigned const len, Compare comp |
| 1990 | , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf |
| 1991 | , XBuf & xbuf |
| 1992 | ) |
| 1993 | { |
| 1994 | typedef Unsigned size_type; |
| 1995 | |
| 1996 | //Calculate ideal parameters and try to collect needed unique keys |
| 1997 | l_base = 0u; |
| 1998 | |
| 1999 | //Try to find a value near sqrt(len) that is 2^N*l_base where |
| 2000 | //l_base <= AdaptiveSortInsertionSortThreshold. This property is important |
| 2001 | //as build_blocks merges to the left iteratively duplicating the |
| 2002 | //merged size and all the buffer must be used just before the final |
| 2003 | //merge to right step. This guarantees "build_blocks" produces |
| 2004 | //segments of size l_build_buf*2, maximizing the classic merge phase. |
| 2005 | l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base)); |
| 2006 | |
| 2007 | //The internal buffer can be expanded if there is enough external memory |
| 2008 | while(xbuf.capacity() >= l_intbuf*2){ |
| 2009 | l_intbuf *= 2; |
| 2010 | } |
| 2011 | |
| 2012 | //This is the minimum number of keys to implement the ideal algorithm |
| 2013 | // |
| 2014 | //l_intbuf is used as buffer plus the key count |
| 2015 | size_type n_min_ideal_keys = l_intbuf-1; |
| 2016 | while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){ |
| 2017 | --n_min_ideal_keys; |
| 2018 | } |
| 2019 | n_min_ideal_keys += 1; |
| 2020 | BOOST_ASSERT(n_min_ideal_keys <= l_intbuf); |
| 2021 | |
| 2022 | if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){ |
| 2023 | n_keys = 0u; |
| 2024 | l_build_buf = l_intbuf; |
| 2025 | } |
| 2026 | else{ |
| 2027 | //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that |
| 2028 | //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half |
| 2029 | //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin. |
| 2030 | // |
| 2031 | //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed, |
| 2032 | //(to be used for keys in combine_all_blocks) as the whole l_build_buf |
| 2033 | //will be backuped in the buffer during build_blocks. |
| 2034 | bool const non_unique_buf = xbuf.capacity() >= l_intbuf; |
| 2035 | size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2; |
| 2036 | size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf); |
| 2037 | |
| 2038 | //If available memory is 2*sqrt(l), then for "build_params" |
| 2039 | //the situation is the same as if 2*l_intbuf were collected. |
| 2040 | if(non_unique_buf && collected == n_min_ideal_keys){ |
| 2041 | l_build_buf = l_intbuf; |
| 2042 | n_keys = n_min_ideal_keys; |
| 2043 | } |
| 2044 | else if(collected == 2*l_intbuf){ |
| 2045 | //l_intbuf*2 elements found. Use all of them in the build phase |
| 2046 | l_build_buf = l_intbuf*2; |
| 2047 | n_keys = l_intbuf; |
| 2048 | } |
| 2049 | else if(collected == (n_min_ideal_keys+l_intbuf)){ |
| 2050 | l_build_buf = l_intbuf; |
| 2051 | n_keys = n_min_ideal_keys; |
| 2052 | } |
| 2053 | //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix |
| 2054 | //is possible (due to very low unique keys), then go to a slow sort based on rotations. |
| 2055 | else{ |
| 2056 | BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf)); |
| 2057 | if(collected < 4){ //No combination possible with less that 4 keys |
| 2058 | return false; |
| 2059 | } |
| 2060 | n_keys = l_intbuf; |
| 2061 | while(n_keys&(n_keys-1)){ |
| 2062 | n_keys &= n_keys-1; // make it power or 2 |
| 2063 | } |
| 2064 | while(n_keys > collected){ |
| 2065 | n_keys/=2; |
| 2066 | } |
| 2067 | //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two |
| 2068 | l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold); |
| 2069 | l_intbuf = 0; |
| 2070 | l_build_buf = n_keys; |
| 2071 | } |
| 2072 | BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf); |
| 2073 | } |
| 2074 | |
| 2075 | return true; |
| 2076 | } |
| 2077 | |
| 2078 | template<class RandIt, class Compare, class XBuf> |
| 2079 | inline void adaptive_merge_combine_blocks( RandIt first |
| 2080 | , typename iterator_traits<RandIt>::size_type len1 |
| 2081 | , typename iterator_traits<RandIt>::size_type len2 |
| 2082 | , typename iterator_traits<RandIt>::size_type collected |
| 2083 | , typename iterator_traits<RandIt>::size_type n_keys |
| 2084 | , typename iterator_traits<RandIt>::size_type l_block |
| 2085 | , bool use_internal_buf |
| 2086 | , bool xbuf_used |
| 2087 | , Compare comp |
| 2088 | , XBuf & xbuf |
| 2089 | ) |
| 2090 | { |
| 2091 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 2092 | size_type const len = len1+len2; |
| 2093 | size_type const l_combine = len-collected; |
| 2094 | size_type const l_combine1 = len1-collected; |
| 2095 | |
| 2096 | if(n_keys){ |
| 2097 | RandIt const first_data = first+collected; |
| 2098 | RandIt const keys = first; |
| 2099 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: " , len); |
| 2100 | if(xbuf_used){ |
| 2101 | if(xbuf.size() < l_block){ |
| 2102 | xbuf.initialize_until(l_block, *first); |
| 2103 | } |
| 2104 | BOOST_ASSERT(xbuf.size() >= l_block); |
| 2105 | size_type n_block_a, n_block_b, l_irreg1, l_irreg2; |
| 2106 | combine_params( keys, comp, l_combine |
| 2107 | , l_combine1, l_block, xbuf |
| 2108 | , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs |
| 2109 | merge_blocks_with_buf |
| 2110 | (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, xbuf.data(), xbuf_used); |
| 2111 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg xbf: " , len); |
| 2112 | } |
| 2113 | else{ |
| 2114 | size_type n_block_a, n_block_b, l_irreg1, l_irreg2; |
| 2115 | combine_params( keys, comp, l_combine |
| 2116 | , l_combine1, l_block, xbuf |
| 2117 | , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs |
| 2118 | if(use_internal_buf){ |
| 2119 | merge_blocks_with_buf |
| 2120 | (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, first_data-l_block, xbuf_used); |
| 2121 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg buf: " , len); |
| 2122 | } |
| 2123 | else{ |
| 2124 | merge_blocks_bufferless |
| 2125 | (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp); |
| 2126 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg nbf: " , len); |
| 2127 | } |
| 2128 | } |
| 2129 | } |
| 2130 | else{ |
| 2131 | xbuf.shrink_to_fit(l_block); |
| 2132 | if(xbuf.size() < l_block){ |
| 2133 | xbuf.initialize_until(l_block, *first); |
| 2134 | } |
| 2135 | size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block); |
| 2136 | size_type n_block_a, n_block_b, l_irreg1, l_irreg2; |
| 2137 | combine_params( uint_keys, less(), l_combine |
| 2138 | , l_combine1, l_block, xbuf |
| 2139 | , n_block_a, n_block_b, l_irreg1, l_irreg2, true); //Outputs |
| 2140 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: " , len); |
| 2141 | BOOST_ASSERT(xbuf.size() >= l_block); |
| 2142 | merge_blocks_with_buf |
| 2143 | (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, xbuf.data(), true); |
| 2144 | xbuf.clear(); |
| 2145 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg buf: " , len); |
| 2146 | } |
| 2147 | } |
| 2148 | |
| 2149 | template<class RandIt, class Compare, class XBuf> |
| 2150 | inline void adaptive_merge_final_merge( RandIt first |
| 2151 | , typename iterator_traits<RandIt>::size_type len1 |
| 2152 | , typename iterator_traits<RandIt>::size_type len2 |
| 2153 | , typename iterator_traits<RandIt>::size_type collected |
| 2154 | , typename iterator_traits<RandIt>::size_type l_intbuf |
| 2155 | , typename iterator_traits<RandIt>::size_type l_block |
| 2156 | , bool use_internal_buf |
| 2157 | , bool xbuf_used |
| 2158 | , Compare comp |
| 2159 | , XBuf & xbuf |
| 2160 | ) |
| 2161 | { |
| 2162 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 2163 | (void)l_block; |
| 2164 | size_type n_keys = collected-l_intbuf; |
| 2165 | size_type len = len1+len2; |
| 2166 | if(use_internal_buf){ |
| 2167 | if(xbuf_used){ |
| 2168 | xbuf.clear(); |
| 2169 | //Nothing to do |
| 2170 | if(n_keys){ |
| 2171 | stable_sort(first, first+n_keys, comp, xbuf); |
| 2172 | stable_merge(first, first+n_keys, first+len, comp, xbuf); |
| 2173 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key mrg: " , len); |
| 2174 | } |
| 2175 | } |
| 2176 | else{ |
| 2177 | xbuf.clear(); |
| 2178 | stable_sort(first, first+collected, comp, xbuf); |
| 2179 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b srt: " , len); |
| 2180 | stable_merge(first, first+collected, first+len, comp, xbuf); |
| 2181 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b mrg: " , len); |
| 2182 | } |
| 2183 | } |
| 2184 | else{ |
| 2185 | xbuf.clear(); |
| 2186 | stable_sort(first, first+collected, comp, xbuf); |
| 2187 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b srt: " , len); |
| 2188 | stable_merge(first, first+collected, first+len1+len2, comp, xbuf); |
| 2189 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b mrg: " , len); |
| 2190 | } |
| 2191 | } |
| 2192 | |
| 2193 | template<class SizeType, class Xbuf> |
| 2194 | inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout) |
| 2195 | { |
| 2196 | typedef SizeType size_type; |
| 2197 | size_type l_block = rl_block; |
| 2198 | size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block; |
| 2199 | |
| 2200 | while(xbuf.capacity() >= l_block*2){ |
| 2201 | l_block *= 2; |
| 2202 | } |
| 2203 | |
| 2204 | //This is the minimum number of keys to implement the ideal algorithm |
| 2205 | size_type n_keys = len1/l_block+len2/l_block; |
| 2206 | while(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)){ |
| 2207 | --n_keys; |
| 2208 | } |
| 2209 | ++n_keys; |
| 2210 | BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)); |
| 2211 | |
| 2212 | if(xbuf.template supports_aligned_trailing<size_type>(l_block, n_keys)){ |
| 2213 | n_keys = 0u; |
| 2214 | } |
| 2215 | l_intbuf_inout = l_intbuf; |
| 2216 | rl_block = l_block; |
| 2217 | return n_keys; |
| 2218 | } |
| 2219 | |
| 2220 | /////////////////////////////////////////////////////////////////////////////////////////// |
| 2221 | /////////////////////////////////////////////////////////////////////////////////////////// |
| 2222 | /////////////////////////////////////////////////////////////////////////////////////////// |
| 2223 | /////////////////////////////////////////////////////////////////////////////////////////// |
| 2224 | /////////////////////////////////////////////////////////////////////////////////////////// |
| 2225 | /////////////////////////////////////////////////////////////////////////////////////////// |
| 2226 | /////////////////////////////////////////////////////////////////////////////////////////// |
| 2227 | |
| 2228 | // Main explanation of the sort algorithm. |
| 2229 | // |
| 2230 | // csqrtlen = ceil(sqrt(len)); |
| 2231 | // |
| 2232 | // * First, 2*csqrtlen unique elements elements are extracted from elements to be |
| 2233 | // sorted and placed in the beginning of the range. |
| 2234 | // |
| 2235 | // * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements |
| 2236 | // will be used as auxiliary memory, so trailing len-2*csqrtlen elements are |
| 2237 | // are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step |
| 2238 | // 2*csqrtlen unique elements are again the leading elements of the whole range. |
| 2239 | // |
| 2240 | // * Step "combine_blocks": pairs of previously formed blocks are merged with a different |
| 2241 | // ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the |
| 2242 | // "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen |
| 2243 | // elements, etc) of until all trailing (len-2*csqrtlen) elements are merged. |
| 2244 | // |
| 2245 | // In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to |
| 2246 | // know if elements belong to the first or second block to be merged and another |
| 2247 | // leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step: |
| 2248 | // |
| 2249 | // Iteratively until all trailing (len-2*csqrtlen) elements are merged: |
| 2250 | // Iteratively for each pair of previously merged block: |
| 2251 | // * Blocks are divided groups of csqrtlen elements and |
| 2252 | // 2*merged_block/csqrtlen keys are sorted to be used as markers |
| 2253 | // * Groups are selection-sorted by first or last element (depending wheter they |
| 2254 | // merged to left or right) and keys are reordered accordingly as an imitation-buffer. |
| 2255 | // * Elements of each block pair are merged using the csqrtlen buffer taking into account |
| 2256 | // if they belong to the first half or second half (marked by the key). |
| 2257 | // |
| 2258 | // * In the final merge step leading elements (2*csqrtlen) are sorted and merged with |
| 2259 | // rotations with the rest of sorted elements in the "combine_blocks" step. |
| 2260 | // |
| 2261 | // Corner cases: |
| 2262 | // |
| 2263 | // * If no 2*csqrtlen elements can be extracted: |
| 2264 | // |
| 2265 | // * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used |
| 2266 | // as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This |
| 2267 | // means that an additional "combine_blocks" step will be needed to merge all elements. |
| 2268 | // |
| 2269 | // * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum, |
| 2270 | // then reduces the number of elements used as buffer and keys in the "build_blocks" |
| 2271 | // and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction |
| 2272 | // then uses a rotation based smart merge. |
| 2273 | // |
| 2274 | // * If the minimum number of keys can't be extracted, a rotation-based sorting is performed. |
| 2275 | // |
| 2276 | // * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used. |
| 2277 | // |
| 2278 | // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), |
| 2279 | // then only csqrtlen elements need to be extracted and "combine_blocks" will use integral |
| 2280 | // keys to combine blocks. |
| 2281 | // |
| 2282 | // * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks |
| 2283 | // using classic merge. |
| 2284 | template<class RandIt, class Compare, class XBuf> |
| 2285 | void adaptive_sort_impl |
| 2286 | ( RandIt first |
| 2287 | , typename iterator_traits<RandIt>::size_type const len |
| 2288 | , Compare comp |
| 2289 | , XBuf & xbuf |
| 2290 | ) |
| 2291 | { |
| 2292 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 2293 | |
| 2294 | //Small sorts go directly to insertion sort |
| 2295 | if(len <= size_type(AdaptiveSortInsertionSortThreshold)){ |
| 2296 | insertion_sort(first, first + len, comp); |
| 2297 | return; |
| 2298 | } |
| 2299 | |
| 2300 | if((len-len/2) <= xbuf.capacity()){ |
| 2301 | merge_sort(first, first+len, comp, xbuf.data()); |
| 2302 | return; |
| 2303 | } |
| 2304 | |
| 2305 | //Make sure it is at least four |
| 2306 | BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4); |
| 2307 | |
| 2308 | size_type l_base = 0; |
| 2309 | size_type l_intbuf = 0; |
| 2310 | size_type n_keys = 0; |
| 2311 | size_type l_build_buf = 0; |
| 2312 | |
| 2313 | //Calculate and extract needed unique elements. If a minimum is not achieved |
| 2314 | //fallback to rotation-based merge |
| 2315 | if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){ |
| 2316 | stable_sort(first, first+len, comp, xbuf); |
| 2317 | return; |
| 2318 | } |
| 2319 | BOOST_ASSERT(l_build_buf); |
| 2320 | //Otherwise, continue the adaptive_sort |
| 2321 | BOOST_MOVE_ADAPTIVE_SORT_PRINT("\n After collect_unique: " , len); |
| 2322 | size_type const n_key_plus_buf = l_intbuf+n_keys; |
| 2323 | //l_build_buf is always power of two if l_intbuf is zero |
| 2324 | BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1)))); |
| 2325 | |
| 2326 | //Classic merge sort until internal buffer and xbuf are exhausted |
| 2327 | size_type const l_merged = adaptive_sort_build_blocks |
| 2328 | (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp); |
| 2329 | BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After build_blocks: " , len); |
| 2330 | |
| 2331 | //Non-trivial merge |
| 2332 | bool const buffer_right = adaptive_sort_combine_all_blocks |
| 2333 | (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp); |
| 2334 | |
| 2335 | //Sort keys and buffer and merge the whole sequence |
| 2336 | adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp); |
| 2337 | } |
| 2338 | |
| 2339 | // Main explanation of the merge algorithm. |
| 2340 | // |
| 2341 | // csqrtlen = ceil(sqrt(len)); |
| 2342 | // |
| 2343 | // * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect |
| 2344 | // unique elements are extracted from elements to be sorted and placed in the beginning of the range. |
| 2345 | // |
| 2346 | // * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements |
| 2347 | // are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements. |
| 2348 | // |
| 2349 | // Explanation of the "combine_blocks" step: |
| 2350 | // |
| 2351 | // * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements. |
| 2352 | // Remaining elements that can't form a group are grouped in front of those elements. |
| 2353 | // * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements. |
| 2354 | // Remaining elements that can't form a group are grouped in the back of those elements. |
| 2355 | // * In parallel the following two steps are performed: |
| 2356 | // * Groups are selection-sorted by first or last element (depending wheter they |
| 2357 | // merged to left or right) and keys are reordered accordingly as an imitation-buffer. |
| 2358 | // * Elements of each block pair are merged using the csqrtlen buffer taking into account |
| 2359 | // if they belong to the first half or second half (marked by the key). |
| 2360 | // |
| 2361 | // * In the final merge step leading "to_collect" elements are merged with rotations |
| 2362 | // with the rest of merged elements in the "combine_blocks" step. |
| 2363 | // |
| 2364 | // Corner cases: |
| 2365 | // |
| 2366 | // * If no "to_collect" elements can be extracted: |
| 2367 | // |
| 2368 | // * If more than a minimum number of elements is extracted |
| 2369 | // then reduces the number of elements used as buffer and keys in the |
| 2370 | // and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction |
| 2371 | // then uses a rotation based smart merge. |
| 2372 | // |
| 2373 | // * If the minimum number of keys can't be extracted, a rotation-based merge is performed. |
| 2374 | // |
| 2375 | // * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed. |
| 2376 | // |
| 2377 | // * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed. |
| 2378 | // |
| 2379 | // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), |
| 2380 | // then no csqrtlen need to be extracted and "combine_blocks" will use integral |
| 2381 | // keys to combine blocks. |
| 2382 | template<class RandIt, class Compare, class XBuf> |
| 2383 | void adaptive_merge_impl |
| 2384 | ( RandIt first |
| 2385 | , typename iterator_traits<RandIt>::size_type const len1 |
| 2386 | , typename iterator_traits<RandIt>::size_type const len2 |
| 2387 | , Compare comp |
| 2388 | , XBuf & xbuf |
| 2389 | ) |
| 2390 | { |
| 2391 | typedef typename iterator_traits<RandIt>::size_type size_type; |
| 2392 | |
| 2393 | if(xbuf.capacity() >= min_value<size_type>(len1, len2)){ |
| 2394 | buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf); |
| 2395 | } |
| 2396 | else{ |
| 2397 | const size_type len = len1+len2; |
| 2398 | //Calculate ideal parameters and try to collect needed unique keys |
| 2399 | size_type l_block = size_type(ceil_sqrt(len)); |
| 2400 | |
| 2401 | //One range is not big enough to extract keys and the internal buffer so a |
| 2402 | //rotation-based based merge will do just fine |
| 2403 | if(len1 <= l_block*2 || len2 <= l_block*2){ |
| 2404 | merge_bufferless(first, first+len1, first+len1+len2, comp); |
| 2405 | return; |
| 2406 | } |
| 2407 | |
| 2408 | //Detail the number of keys and internal buffer. If xbuf has enough memory, no |
| 2409 | //internal buffer is needed so l_intbuf will remain 0. |
| 2410 | size_type l_intbuf = 0; |
| 2411 | size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf); |
| 2412 | size_type const to_collect = l_intbuf+n_keys; |
| 2413 | //Try to extract needed unique values from the first range |
| 2414 | size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf); |
| 2415 | BOOST_MOVE_ADAPTIVE_SORT_PRINT("\n A collect: " , len); |
| 2416 | |
| 2417 | //Not the minimum number of keys is not available on the first range, so fallback to rotations |
| 2418 | if(collected != to_collect && collected < 4){ |
| 2419 | merge_bufferless(first, first+len1, first+len1+len2, comp); |
| 2420 | return; |
| 2421 | } |
| 2422 | |
| 2423 | //If not enough keys but more than minimum, adjust the internal buffer and key count |
| 2424 | bool use_internal_buf = collected == to_collect; |
| 2425 | if (!use_internal_buf){ |
| 2426 | l_intbuf = 0u; |
| 2427 | n_keys = collected; |
| 2428 | l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf); |
| 2429 | //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used |
| 2430 | l_intbuf = use_internal_buf ? l_block : 0u; |
| 2431 | } |
| 2432 | |
| 2433 | bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block; |
| 2434 | //Merge trailing elements using smart merges |
| 2435 | adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf); |
| 2436 | //Merge buffer and keys with the rest of the values |
| 2437 | adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf); |
| 2438 | } |
| 2439 | } |
| 2440 | |
| 2441 | |
| 2442 | } //namespace detail_adaptive { |
| 2443 | } //namespace movelib { |
| 2444 | } //namespace boost { |
| 2445 | |
| 2446 | #include <boost/move/detail/config_end.hpp> |
| 2447 | |
| 2448 | #endif //#define BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP |
| 2449 | |