1//////////////////////////////////////////////////////////////////////////////
2//
3// (C) Copyright Ion Gaztanaga 2015-2016.
4// Distributed under the Boost Software License, Version 1.0.
5// (See accompanying file LICENSE_1_0.txt or copy at
6// http://www.boost.org/LICENSE_1_0.txt)
7//
8// See http://www.boost.org/libs/move for documentation.
9//
10//////////////////////////////////////////////////////////////////////////////
11//
12// Stable sorting that works in O(N*log(N)) worst time
13// and uses O(1) extra memory
14//
15//////////////////////////////////////////////////////////////////////////////
16//
17// The main idea of the adaptive_sort algorithm was developed by Andrey Astrelin
18// and explained in the article from the russian collaborative blog
19// Habrahabr (http://habrahabr.ru/post/205290/). The algorithm is based on
20// ideas from B-C. Huang and M. A. Langston explained in their article
21// "Fast Stable Merging and Sorting in Constant Extra Space (1989-1992)"
22// (http://comjnl.oxfordjournals.org/content/35/6/643.full.pdf).
23//
24// This implementation by Ion Gaztanaga uses previous ideas with additional changes:
25//
26// - Use of GCD-based rotation.
27// - Non power of two buffer-sizes.
28// - Tries to find sqrt(len)*2 unique keys, so that the merge sort
29// phase can form up to sqrt(len)*4 segments if enough keys are found.
30// - The merge-sort phase can take advantage of external memory to
31// save some additional combination steps.
32// - Combination phase: Blocks are selection sorted and merged in parallel.
33// - The combination phase is performed alternating merge to left and merge
34// to right phases minimizing swaps due to internal buffer repositioning.
35// - When merging blocks special optimizations are made to avoid moving some
36// elements twice.
37//
38// The adaptive_merge algorithm was developed by Ion Gaztanaga reusing some parts
39// from the sorting algorithm and implementing an additional block merge algorithm
40// without moving elements to left or right.
41//////////////////////////////////////////////////////////////////////////////
42#ifndef BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP
43#define BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP
44
45#include <boost/move/detail/config_begin.hpp>
46#include <boost/move/detail/reverse_iterator.hpp>
47#include <boost/move/algo/move.hpp>
48#include <boost/move/algo/detail/merge.hpp>
49#include <boost/move/adl_move_swap.hpp>
50#include <boost/move/algo/detail/insertion_sort.hpp>
51#include <boost/move/algo/detail/merge_sort.hpp>
52#include <boost/move/algo/detail/merge.hpp>
53#include <boost/assert.hpp>
54#include <boost/cstdint.hpp>
55
56#ifdef BOOST_MOVE_ADAPTIVE_SORT_STATS
57 #define BOOST_MOVE_ADAPTIVE_SORT_PRINT(STR, L) \
58 print_stats(STR, L)\
59 //
60#else
61 #define BOOST_MOVE_ADAPTIVE_SORT_PRINT(STR, L)
62#endif
63
64#ifdef BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS
65 #define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT BOOST_ASSERT
66#else
67 #define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(L)
68#endif
69
70
71
72namespace boost {
73namespace movelib {
74
75namespace detail_adaptive {
76
77static const std::size_t AdaptiveSortInsertionSortThreshold = 16;
78//static const std::size_t AdaptiveSortInsertionSortThreshold = 4;
79BOOST_STATIC_ASSERT((AdaptiveSortInsertionSortThreshold&(AdaptiveSortInsertionSortThreshold-1)) == 0);
80
81#if defined BOOST_HAS_INTPTR_T
82 typedef ::boost::uintptr_t uintptr_t;
83#else
84 typedef std::size_t uintptr_t;
85#endif
86
87template<class T>
88const T &min_value(const T &a, const T &b)
89{
90 return a < b ? a : b;
91}
92
93template<class T>
94const T &max_value(const T &a, const T &b)
95{
96 return a > b ? a : b;
97}
98
99template<class ForwardIt, class Pred>
100bool is_sorted(ForwardIt const first, ForwardIt last, Pred pred)
101{
102 if (first != last) {
103 ForwardIt next = first, cur(first);
104 while (++next != last) {
105 if (pred(*next, *cur))
106 return false;
107 cur = next;
108 }
109 }
110 return true;
111}
112
113#if defined(BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS)
114
115bool is_sorted(::order_perf_type *first, ::order_perf_type *last, ::order_type_less)
116{
117 if (first != last) {
118 const order_perf_type *next = first, *cur(first);
119 while (++next != last) {
120 if (!(cur->key < next->key || (cur->key == next->key && cur->val < next->val)))
121 return false;
122 cur = next;
123 }
124 }
125 return true;
126}
127
128#endif //BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS
129
130template<class ForwardIt, class Pred>
131bool is_sorted_and_unique(ForwardIt first, ForwardIt last, Pred pred)
132{
133 if (first != last) {
134 ForwardIt next = first;
135 while (++next != last) {
136 if (!pred(*first, *next))
137 return false;
138 first = next;
139 }
140 }
141 return true;
142}
143
144template<class ForwardIt, class Pred, class V>
145typename iterator_traits<ForwardIt>::size_type
146 count_if_with(ForwardIt first, ForwardIt last, Pred pred, const V &v)
147{
148 typedef typename iterator_traits<ForwardIt>::size_type size_type;
149 size_type count = 0;
150 while(first != last) {
151 count += static_cast<size_type>(0 != pred(*first, v));
152 ++first;
153 }
154 return count;
155}
156
157template<class T, class RandRawIt = T*>
158class adaptive_xbuf
159{
160 adaptive_xbuf(const adaptive_xbuf &);
161 adaptive_xbuf & operator=(const adaptive_xbuf &);
162
163 public:
164 typedef RandRawIt iterator;
165
166 adaptive_xbuf()
167 : m_ptr(), m_size(0), m_capacity(0)
168 {}
169
170 adaptive_xbuf(RandRawIt raw_memory, std::size_t capacity)
171 : m_ptr(raw_memory), m_size(0), m_capacity(capacity)
172 {}
173
174 template<class RandIt>
175 void move_assign(RandIt first, std::size_t n)
176 {
177 if(n <= m_size){
178 boost::move(first, first+n, m_ptr);
179 std::size_t size = m_size;
180 while(size-- != n){
181 m_ptr[size].~T();
182 }
183 m_size = n;
184 }
185 else{
186 RandRawIt result = boost::move(first, first+m_size, m_ptr);
187 boost::uninitialized_move(first+m_size, first+n, result);
188 m_size = n;
189 }
190 }
191
192 template<class RandIt>
193 void push_back(RandIt first, std::size_t n)
194 {
195 BOOST_ASSERT(m_capacity - m_size >= n);
196 boost::uninitialized_move(first, first+n, m_ptr+m_size);
197 m_size += n;
198 }
199
200 template<class RandIt>
201 iterator add(RandIt it)
202 {
203 BOOST_ASSERT(m_size < m_capacity);
204 RandRawIt p_ret = m_ptr + m_size;
205 ::new(&*p_ret) T(::boost::move(*it));
206 ++m_size;
207 return p_ret;
208 }
209
210 template<class RandIt>
211 void insert(iterator pos, RandIt it)
212 {
213 if(pos == (m_ptr + m_size)){
214 this->add(it);
215 }
216 else{
217 this->add(m_ptr+m_size-1);
218 //m_size updated
219 boost::move_backward(pos, m_ptr+m_size-2, m_ptr+m_size-1);
220 *pos = boost::move(*it);
221 }
222 }
223
224 void set_size(std::size_t size)
225 {
226 m_size = size;
227 }
228
229 void shrink_to_fit(std::size_t const size)
230 {
231 if(m_size > size){
232 for(std::size_t szt_i = size; szt_i != m_size; ++szt_i){
233 m_ptr[szt_i].~T();
234 }
235 m_size = size;
236 }
237 }
238
239 void initialize_until(std::size_t const size, T &t)
240 {
241 BOOST_ASSERT(m_size < m_capacity);
242 if(m_size < size){
243 ::new((void*)&m_ptr[m_size]) T(::boost::move(t));
244 ++m_size;
245 for(; m_size != size; ++m_size){
246 ::new((void*)&m_ptr[m_size]) T(::boost::move(m_ptr[m_size-1]));
247 }
248 t = ::boost::move(m_ptr[m_size-1]);
249 }
250 }
251
252 private:
253 template<class RIt>
254 static bool is_raw_ptr(RIt)
255 {
256 return false;
257 }
258
259 static bool is_raw_ptr(T*)
260 {
261 return true;
262 }
263
264 public:
265 template<class U>
266 bool supports_aligned_trailing(std::size_t size, std::size_t trail_count) const
267 {
268 if(this->is_raw_ptr(this->data()) && m_capacity){
269 uintptr_t u_addr_sz = uintptr_t(&*(this->data()+size));
270 uintptr_t u_addr_cp = uintptr_t(&*(this->data()+this->capacity()));
271 u_addr_sz = ((u_addr_sz + sizeof(U)-1)/sizeof(U))*sizeof(U);
272 return (u_addr_cp >= u_addr_sz) && ((u_addr_cp - u_addr_sz)/sizeof(U) >= trail_count);
273 }
274 return false;
275 }
276
277 template<class U>
278 U *aligned_trailing() const
279 {
280 return this->aligned_trailing<U>(this->size());
281 }
282
283 template<class U>
284 U *aligned_trailing(std::size_t pos) const
285 {
286 uintptr_t u_addr = uintptr_t(&*(this->data()+pos));
287 u_addr = ((u_addr + sizeof(U)-1)/sizeof(U))*sizeof(U);
288 return (U*)u_addr;
289 }
290
291 ~adaptive_xbuf()
292 {
293 this->clear();
294 }
295
296 std::size_t capacity() const
297 { return m_capacity; }
298
299 iterator data() const
300 { return m_ptr; }
301
302 iterator end() const
303 { return m_ptr+m_size; }
304
305 std::size_t size() const
306 { return m_size; }
307
308 bool empty() const
309 { return !m_size; }
310
311 void clear()
312 {
313 this->shrink_to_fit(0u);
314 }
315
316 private:
317 RandRawIt m_ptr;
318 std::size_t m_size;
319 std::size_t m_capacity;
320};
321
322template<class Iterator, class Op>
323class range_xbuf
324{
325 range_xbuf(const range_xbuf &);
326 range_xbuf & operator=(const range_xbuf &);
327
328 public:
329 typedef typename iterator_traits<Iterator>::size_type size_type;
330 typedef Iterator iterator;
331
332 range_xbuf(Iterator first, Iterator last)
333 : m_first(first), m_last(first), m_cap(last)
334 {}
335
336 template<class RandIt>
337 void move_assign(RandIt first, std::size_t n)
338 {
339 BOOST_ASSERT(size_type(n) <= size_type(m_cap-m_first));
340 m_last = Op()(forward_t(), first, first+n, m_first);
341 }
342
343 ~range_xbuf()
344 {}
345
346 std::size_t capacity() const
347 { return m_cap-m_first; }
348
349 Iterator data() const
350 { return m_first; }
351
352 Iterator end() const
353 { return m_last; }
354
355 std::size_t size() const
356 { return m_last-m_first; }
357
358 bool empty() const
359 { return m_first == m_last; }
360
361 void clear()
362 {
363 m_last = m_first;
364 }
365
366 template<class RandIt>
367 iterator add(RandIt it)
368 {
369 Iterator pos(m_last);
370 *pos = boost::move(*it);
371 ++m_last;
372 return pos;
373 }
374
375 void set_size(std::size_t size)
376 {
377 m_last = m_first;
378 m_last += size;
379 }
380
381 private:
382 Iterator const m_first;
383 Iterator m_last;
384 Iterator const m_cap;
385};
386
387
388template<class RandIt, class Compare>
389RandIt skip_until_merge
390 ( RandIt first1, RandIt const last1
391 , const typename iterator_traits<RandIt>::value_type &next_key, Compare comp)
392{
393 while(first1 != last1 && !comp(next_key, *first1)){
394 ++first1;
395 }
396 return first1;
397}
398
399
400template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op>
401RandItB op_buffered_partial_merge_to_range1_and_buffer
402 ( RandIt1 first1, RandIt1 const last1
403 , RandIt2 &rfirst2, RandIt2 const last2
404 , RandItB &rfirstb, Compare comp, Op op )
405{
406 RandItB firstb = rfirstb;
407 RandItB lastb = firstb;
408 RandIt2 first2 = rfirst2;
409
410 //Move to buffer while merging
411 //Three way moves need less moves when op is swap_op so use it
412 //when merging elements from range2 to the destination occupied by range1
413 if(first1 != last1 && first2 != last2){
414 op(three_way_t(), first2++, first1++, lastb++);
415
416 while(true){
417 if(first1 == last1){
418 break;
419 }
420 if(first2 == last2){
421 lastb = op(forward_t(), first1, last1, firstb);
422 break;
423 }
424 op(three_way_t(), comp(*first2, *firstb) ? first2++ : firstb++, first1++, lastb++);
425 }
426 rfirst2 = first2;
427 rfirstb = firstb;
428 }
429
430 return lastb;
431}
432
433template<class RandItKeys, class RandIt>
434void swap_and_update_key
435 ( bool is_next_far_away
436 , RandItKeys const key_next
437 , RandItKeys const key_range2
438 , RandItKeys &key_mid
439 , RandIt const begin
440 , RandIt const end
441 , RandIt const with)
442{
443 if(is_next_far_away){
444 ::boost::adl_move_swap_ranges(begin, end, with);
445 ::boost::adl_move_swap(*key_next, *key_range2);
446 if(key_next == key_mid){
447 key_mid = key_range2;
448 }
449 else if(key_mid == key_range2){
450 key_mid = key_next;
451 }
452 }
453}
454
455///////////////////////////////////////////////////////////////////////////////
456//
457// MERGE BUFFERLESS
458//
459///////////////////////////////////////////////////////////////////////////////
460
461// [first1, last1) merge [last1,last2) -> [first1,last2)
462template<class RandIt, class Compare>
463RandIt partial_merge_bufferless_impl
464 (RandIt first1, RandIt last1, RandIt const last2, bool *const pis_range1_A, Compare comp)
465{
466 if(last1 == last2){
467 return first1;
468 }
469 bool const is_range1_A = *pis_range1_A;
470 if(first1 != last1 && comp(*last1, last1[-1])){
471 do{
472 RandIt const old_last1 = last1;
473 last1 = boost::movelib::lower_bound(last1, last2, *first1, comp);
474 first1 = rotate_gcd(first1, old_last1, last1);//old_last1 == last1 supported
475 if(last1 == last2){
476 return first1;
477 }
478 do{
479 ++first1;
480 } while(last1 != first1 && !comp(*last1, *first1) );
481 } while(first1 != last1);
482 }
483 *pis_range1_A = !is_range1_A;
484 return last1;
485}
486
487// [first1, last1) merge [last1,last2) -> [first1,last2)
488template<class RandIt, class Compare>
489RandIt partial_merge_bufferless
490 (RandIt first1, RandIt last1, RandIt const last2, bool *const pis_range1_A, Compare comp)
491{
492 return *pis_range1_A ? partial_merge_bufferless_impl(first1, last1, last2, pis_range1_A, comp)
493 : partial_merge_bufferless_impl(first1, last1, last2, pis_range1_A, antistable<Compare>(comp));
494}
495
496template<class SizeType>
497static SizeType needed_keys_count(SizeType n_block_a, SizeType n_block_b)
498{
499 return n_block_a + n_block_b;
500}
501
502template<class RandItKeys, class KeyCompare, class RandIt, class Compare>
503typename iterator_traits<RandIt>::size_type
504 find_next_block
505 ( RandItKeys key_first
506 , KeyCompare key_comp
507 , RandIt const first
508 , typename iterator_traits<RandIt>::size_type const l_block
509 , typename iterator_traits<RandIt>::size_type const ix_first_block
510 , typename iterator_traits<RandIt>::size_type const ix_last_block
511 , Compare comp)
512{
513 typedef typename iterator_traits<RandIt>::size_type size_type;
514 typedef typename iterator_traits<RandIt>::value_type value_type;
515 typedef typename iterator_traits<RandItKeys>::value_type key_type;
516 BOOST_ASSERT(ix_first_block <= ix_last_block);
517 size_type ix_min_block = 0u;
518 for (size_type szt_i = ix_first_block; szt_i < ix_last_block; ++szt_i) {
519 const value_type &min_val = first[ix_min_block*l_block];
520 const value_type &cur_val = first[szt_i*l_block];
521 const key_type &min_key = key_first[ix_min_block];
522 const key_type &cur_key = key_first[szt_i];
523
524 bool const less_than_minimum = comp(cur_val, min_val) ||
525 (!comp(min_val, cur_val) && key_comp(cur_key, min_key));
526
527 if (less_than_minimum) {
528 ix_min_block = szt_i;
529 }
530 }
531 return ix_min_block;
532}
533
534template<class RandItKeys, class KeyCompare, class RandIt, class Compare>
535void merge_blocks_bufferless
536 ( RandItKeys key_first
537 , KeyCompare key_comp
538 , RandIt const first
539 , typename iterator_traits<RandIt>::size_type const l_block
540 , typename iterator_traits<RandIt>::size_type const l_irreg1
541 , typename iterator_traits<RandIt>::size_type const n_block_a
542 , typename iterator_traits<RandIt>::size_type const n_block_b
543 , typename iterator_traits<RandIt>::size_type const l_irreg2
544 , Compare comp)
545{
546 typedef typename iterator_traits<RandIt>::size_type size_type;
547 size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count;
548 //BOOST_ASSERT(n_block_a || n_block_b);
549 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp));
550 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a]));
551
552 size_type n_bef_irreg2 = 0;
553 bool l_irreg_pos_count = true;
554 RandItKeys key_mid(key_first + n_block_a);
555 RandIt const first_irr2 = first + l_irreg1 + (n_block_a+n_block_b)*l_block;
556 RandIt const last_irr2 = first_irr2 + l_irreg2;
557
558 { //Selection sort blocks
559 size_type n_block_left = n_block_b + n_block_a;
560 RandItKeys key_range2(key_first);
561
562 size_type min_check = n_block_a == n_block_left ? 0u : n_block_a;
563 size_type max_check = min_value(min_check+1, n_block_left);
564 for (RandIt f = first+l_irreg1; n_block_left; --n_block_left, ++key_range2, f += l_block, min_check -= min_check != 0, max_check -= max_check != 0) {
565 size_type const next_key_idx = find_next_block(key_range2, key_comp, f, l_block, min_check, max_check, comp);
566 RandItKeys const key_next(key_range2 + next_key_idx);
567 max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left);
568
569 RandIt const first_min = f + next_key_idx*l_block;
570
571 //Check if irregular b block should go here.
572 //If so, break to the special code handling the irregular block
573 if (l_irreg_pos_count && l_irreg2 && comp(*first_irr2, *first_min)){
574 l_irreg_pos_count = false;
575 }
576 n_bef_irreg2 += l_irreg_pos_count;
577
578 swap_and_update_key(next_key_idx != 0, key_next, key_range2, key_mid, f, f + l_block, first_min);
579 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(f, f+l_block, comp));
580 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_min, first_min + l_block, comp));
581 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((f == (first+l_irreg1)) || !comp(*f, *(f-l_block)));
582 }
583 }
584 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first+l_irreg1+n_bef_irreg2*l_block, first_irr2, comp));
585
586 RandIt first1 = first;
587 RandIt last1 = first+l_irreg1;
588 RandItKeys const key_end (key_first+n_bef_irreg2);
589 bool is_range1_A = true;
590
591 for( ; key_first != key_end; ++key_first){
592 bool is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_first, *key_mid);
593 first1 = is_range1_A == is_range2_A
594 ? last1 : partial_merge_bufferless(first1, last1, last1 + l_block, &is_range1_A, comp);
595 last1 += l_block;
596 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, first1, comp));
597 }
598
599 merge_bufferless(is_range1_A ? first1 : last1, first_irr2, last_irr2, comp);
600 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, last_irr2, comp));
601}
602
603///////////////////////////////////////////////////////////////////////////////
604//
605// BUFFERED MERGE
606//
607///////////////////////////////////////////////////////////////////////////////
608template<class RandIt, class Compare, class Op, class Buf>
609void op_buffered_merge
610 ( RandIt first, RandIt const middle, RandIt last
611 , Compare comp, Op op
612 , Buf &xbuf)
613{
614 if(first != middle && middle != last && comp(*middle, middle[-1])){
615 typedef typename iterator_traits<RandIt>::size_type size_type;
616 size_type const len1 = size_type(middle-first);
617 size_type const len2 = size_type(last-middle);
618 if(len1 <= len2){
619 first = boost::movelib::upper_bound(first, middle, *middle, comp);
620 xbuf.move_assign(first, size_type(middle-first));
621 op_merge_with_right_placed
622 (xbuf.data(), xbuf.end(), first, middle, last, comp, op);
623 }
624 else{
625 last = boost::movelib::lower_bound(middle, last, middle[-1], comp);
626 xbuf.move_assign(middle, size_type(last-middle));
627 op_merge_with_left_placed
628 (first, middle, last, xbuf.data(), xbuf.end(), comp, op);
629 }
630 }
631}
632
633template<class RandIt, class Compare, class XBuf>
634void buffered_merge
635 ( RandIt first, RandIt const middle, RandIt last
636 , Compare comp
637 , XBuf &xbuf)
638{
639 op_buffered_merge(first, middle, last, comp, move_op(), xbuf);
640}
641
642// Complexity: 2*distance(first, last)+max_collected^2/2
643//
644// Tries to collect at most n_keys unique elements from [first, last),
645// in the begining of the range, and ordered according to comp
646//
647// Returns the number of collected keys
648template<class RandIt, class Compare, class XBuf>
649typename iterator_traits<RandIt>::size_type
650 collect_unique
651 ( RandIt const first, RandIt const last
652 , typename iterator_traits<RandIt>::size_type const max_collected, Compare comp
653 , XBuf & xbuf)
654{
655 typedef typename iterator_traits<RandIt>::size_type size_type;
656 size_type h = 0;
657 if(max_collected){
658 ++h; // first key is always here
659 RandIt h0 = first;
660 RandIt u = first; ++u;
661 RandIt search_end = u;
662
663 if(xbuf.capacity() >= max_collected){
664 typename XBuf::iterator const ph0 = xbuf.add(first);
665 while(u != last && h < max_collected){
666 typename XBuf::iterator const r = boost::movelib::lower_bound(ph0, xbuf.end(), *u, comp);
667 //If key not found add it to [h, h+h0)
668 if(r == xbuf.end() || comp(*u, *r) ){
669 RandIt const new_h0 = boost::move(search_end, u, h0);
670 search_end = u;
671 ++search_end;
672 ++h;
673 xbuf.insert(r, u);
674 h0 = new_h0;
675 }
676 ++u;
677 }
678 boost::move_backward(first, h0, h0+h);
679 boost::move(xbuf.data(), xbuf.end(), first);
680 }
681 else{
682 while(u != last && h < max_collected){
683 RandIt const r = boost::movelib::lower_bound(h0, search_end, *u, comp);
684 //If key not found add it to [h, h+h0)
685 if(r == search_end || comp(*u, *r) ){
686 RandIt const new_h0 = rotate_gcd(h0, search_end, u);
687 search_end = u;
688 ++search_end;
689 ++h;
690 rotate_gcd(r+(new_h0-h0), u, search_end);
691 h0 = new_h0;
692 }
693 ++u;
694 }
695 rotate_gcd(first, h0, h0+h);
696 }
697 }
698 return h;
699}
700
701template<class Unsigned>
702Unsigned floor_sqrt(Unsigned const n)
703{
704 Unsigned x = n;
705 Unsigned y = x/2 + (x&1);
706 while (y < x){
707 x = y;
708 y = (x + n / x)/2;
709 }
710 return x;
711}
712
713template<class Unsigned>
714Unsigned ceil_sqrt(Unsigned const n)
715{
716 Unsigned r = floor_sqrt(n);
717 return r + Unsigned((n%r) != 0);
718}
719
720template<class Unsigned>
721Unsigned floor_merge_multiple(Unsigned const n, Unsigned &base, Unsigned &pow)
722{
723 Unsigned s = n;
724 Unsigned p = 0;
725 while(s > AdaptiveSortInsertionSortThreshold){
726 s /= 2;
727 ++p;
728 }
729 base = s;
730 pow = p;
731 return s << p;
732}
733
734template<class Unsigned>
735Unsigned ceil_merge_multiple(Unsigned const n, Unsigned &base, Unsigned &pow)
736{
737 Unsigned fm = floor_merge_multiple(n, base, pow);
738
739 if(fm != n){
740 if(base < AdaptiveSortInsertionSortThreshold){
741 ++base;
742 }
743 else{
744 base = AdaptiveSortInsertionSortThreshold/2 + 1;
745 ++pow;
746 }
747 }
748 return base << pow;
749}
750
751template<class Unsigned>
752Unsigned ceil_sqrt_multiple(Unsigned const n, Unsigned *pbase = 0)
753{
754 Unsigned const r = ceil_sqrt(n);
755 Unsigned pow = 0;
756 Unsigned base = 0;
757 Unsigned const res = ceil_merge_multiple(r, base, pow);
758 if(pbase) *pbase = base;
759 return res;
760}
761
762struct less
763{
764 template<class T>
765 bool operator()(const T &l, const T &r)
766 { return l < r; }
767};
768
769///////////////////////////////////////////////////////////////////////////////
770//
771// MERGE BLOCKS
772//
773///////////////////////////////////////////////////////////////////////////////
774
775//#define ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN
776
777#if defined ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN
778template<class RandIt, class Compare>
779void slow_stable_sort
780 ( RandIt const first, RandIt const last, Compare comp)
781{
782 boost::movelib::inplace_stable_sort(first, last, comp);
783}
784
785#else //ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN
786
787template<class RandIt, class Compare>
788void slow_stable_sort
789 ( RandIt const first, RandIt const last, Compare comp)
790{
791 typedef typename iterator_traits<RandIt>::size_type size_type;
792 size_type L = size_type(last - first);
793 { //Use insertion sort to merge first elements
794 size_type m = 0;
795 while((L - m) > size_type(AdaptiveSortInsertionSortThreshold)){
796 insertion_sort(first+m, first+m+size_type(AdaptiveSortInsertionSortThreshold), comp);
797 m += AdaptiveSortInsertionSortThreshold;
798 }
799 insertion_sort(first+m, last, comp);
800 }
801
802 size_type h = AdaptiveSortInsertionSortThreshold;
803 for(bool do_merge = L > h; do_merge; h*=2){
804 do_merge = (L - h) > h;
805 size_type p0 = 0;
806 if(do_merge){
807 size_type const h_2 = 2*h;
808 while((L-p0) > h_2){
809 merge_bufferless(first+p0, first+p0+h, first+p0+h_2, comp);
810 p0 += h_2;
811 }
812 }
813 if((L-p0) > h){
814 merge_bufferless(first+p0, first+p0+h, last, comp);
815 }
816 }
817}
818
819#endif //ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN
820
821//Returns new l_block and updates use_buf
822template<class Unsigned>
823Unsigned lblock_for_combine
824 (Unsigned const l_block, Unsigned const n_keys, Unsigned const l_data, bool &use_buf)
825{
826 BOOST_ASSERT(l_data > 1);
827
828 //We need to guarantee lblock >= l_merged/(n_keys/2) keys for the combination.
829 //We have at least 4 keys guaranteed (which are the minimum to merge 2 ranges)
830 //If l_block != 0, then n_keys is already enough to merge all blocks in all
831 //phases as we've found all needed keys for that buffer and length before.
832 //If l_block == 0 then see if half keys can be used as buffer and the rest
833 //as keys guaranteeing that n_keys >= (2*l_merged)/lblock =
834 if(!l_block){
835 //If l_block == 0 then n_keys is power of two
836 //(guaranteed by build_params(...))
837 BOOST_ASSERT(n_keys >= 4);
838 //BOOST_ASSERT(0 == (n_keys &(n_keys-1)));
839
840 //See if half keys are at least 4 and if half keys fulfill
841 Unsigned const new_buf = n_keys/2;
842 Unsigned const new_keys = n_keys-new_buf;
843 use_buf = new_keys >= 4 && new_keys >= l_data/new_buf;
844 if(use_buf){
845 return new_buf;
846 }
847 else{
848 return l_data/n_keys;
849 }
850 }
851 else{
852 use_buf = true;
853 return l_block;
854 }
855}
856
857template<class RandIt, class Compare, class XBuf>
858void stable_sort( RandIt first, RandIt last, Compare comp, XBuf & xbuf)
859{
860 typedef typename iterator_traits<RandIt>::size_type size_type;
861 size_type const len = size_type(last - first);
862 size_type const half_len = len/2 + (len&1);
863 if(std::size_t(xbuf.capacity() - xbuf.size()) >= half_len) {
864 merge_sort(first, last, comp, xbuf.data()+xbuf.size());
865 }
866 else{
867 slow_stable_sort(first, last, comp);
868 }
869}
870
871template<class RandIt, class Comp, class XBuf>
872void initialize_keys( RandIt first, RandIt last
873 , Comp comp
874 , XBuf & xbuf)
875{
876 stable_sort(first, last, comp, xbuf);
877}
878
879template<class RandIt, class U>
880void initialize_keys( RandIt first, RandIt last
881 , less
882 , U &)
883{
884 typedef typename iterator_traits<RandIt>::value_type value_type;
885 std::size_t count = std::size_t(last - first);
886 for(std::size_t i = 0; i != count; ++i){
887 *first = value_type(i);
888 ++first;
889 }
890}
891
892template<class RandIt>
893void move_data_backward( RandIt cur_pos
894 , typename iterator_traits<RandIt>::size_type const l_data
895 , RandIt new_pos
896 , bool const xbuf_used)
897{
898 //Move buffer to the total combination right
899 if(xbuf_used){
900 boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
901 }
902 else{
903 boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
904 //Rotate does less moves but it seems slower due to cache issues
905 //rotate_gcd(first-l_block, first+len-l_block, first+len);
906 }
907}
908
909template<class RandIt>
910void move_data_forward( RandIt cur_pos
911 , typename iterator_traits<RandIt>::size_type const l_data
912 , RandIt new_pos
913 , bool const xbuf_used)
914{
915 //Move buffer to the total combination right
916 if(xbuf_used){
917 boost::move(cur_pos, cur_pos+l_data, new_pos);
918 }
919 else{
920 boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos);
921 //Rotate does less moves but it seems slower due to cache issues
922 //rotate_gcd(first-l_block, first+len-l_block, first+len);
923 }
924}
925
926template <class Unsigned>
927Unsigned calculate_total_combined(Unsigned const len, Unsigned const l_prev_merged, Unsigned *pl_irreg_combined = 0)
928{
929 typedef Unsigned size_type;
930
931 size_type const l_combined = 2*l_prev_merged;
932 size_type l_irreg_combined = len%l_combined;
933 size_type l_total_combined = len;
934 if(l_irreg_combined <= l_prev_merged){
935 l_total_combined -= l_irreg_combined;
936 l_irreg_combined = 0;
937 }
938 if(pl_irreg_combined)
939 *pl_irreg_combined = l_irreg_combined;
940 return l_total_combined;
941}
942
943template<class RandItKeys, class KeyCompare, class SizeType, class XBuf>
944void combine_params
945 ( RandItKeys const keys
946 , KeyCompare key_comp
947 , SizeType l_combined
948 , SizeType const l_prev_merged
949 , SizeType const l_block
950 , XBuf & xbuf
951 //Output
952 , SizeType &n_block_a
953 , SizeType &n_block_b
954 , SizeType &l_irreg1
955 , SizeType &l_irreg2
956 //Options
957 , bool do_initialize_keys = true)
958{
959 typedef SizeType size_type;
960
961 //Initial parameters for selection sort blocks
962 l_irreg1 = l_prev_merged%l_block;
963 l_irreg2 = (l_combined-l_irreg1)%l_block;
964 BOOST_ASSERT(((l_combined-l_irreg1-l_irreg2)%l_block) == 0);
965 size_type const n_reg_block = (l_combined-l_irreg1-l_irreg2)/l_block;
966 n_block_a = l_prev_merged/l_block;
967 n_block_b = n_reg_block - n_block_a;
968 BOOST_ASSERT(n_reg_block>=n_block_a);
969
970 //Key initialization
971 if (do_initialize_keys) {
972 initialize_keys(keys, keys + needed_keys_count(n_block_a, n_block_b), key_comp, xbuf);
973 }
974}
975
976template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op>
977RandItB op_buffered_partial_merge_and_swap_to_range1_and_buffer
978 ( RandIt1 first1, RandIt1 const last1
979 , RandIt2 &rfirst2, RandIt2 const last2, RandIt2 &rfirst_min
980 , RandItB &rfirstb, Compare comp, Op op )
981{
982 RandItB firstb = rfirstb;
983 RandItB lastb = firstb;
984 RandIt2 first2 = rfirst2;
985
986 //Move to buffer while merging
987 //Three way moves need less moves when op is swap_op so use it
988 //when merging elements from range2 to the destination occupied by range1
989 if(first1 != last1 && first2 != last2){
990 RandIt2 first_min = rfirst_min;
991 op(four_way_t(), first2++, first_min++, first1++, lastb++);
992
993 while(first1 != last1){
994 if(first2 == last2){
995 lastb = op(forward_t(), first1, last1, firstb);
996 break;
997 }
998 bool const min_less = comp(*first_min, *firstb);
999
1000 if(min_less){
1001 op( four_way_t(), first2++, first_min++, first1++, lastb++);
1002 }
1003 else{
1004 op(three_way_t(), firstb++, first1++, lastb++);
1005 }
1006 }
1007 rfirst2 = first2;
1008 rfirstb = firstb;
1009 rfirst_min = first_min;
1010 }
1011
1012 return lastb;
1013}
1014
1015//////////////////////////////////
1016//
1017// partial_merge
1018//
1019//////////////////////////////////
1020template<class InputIt1, class InputIt2, class OutputIt, class Compare, class Op>
1021OutputIt op_partial_merge_impl
1022 (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, OutputIt d_first, Compare comp, Op op)
1023{
1024 InputIt1 first1(r_first1);
1025 InputIt2 first2(r_first2);
1026 if(first2 != last2 && last1 != first1)
1027 while(1){
1028 if(comp(*first2, *first1)) {
1029 op(first2++, d_first++);
1030 if(first2 == last2){
1031 break;
1032 }
1033 }
1034 else{
1035 op(first1++, d_first++);
1036 if(first1 == last1){
1037 break;
1038 }
1039 }
1040 }
1041 r_first1 = first1;
1042 r_first2 = first2;
1043 return d_first;
1044}
1045
1046template<class InputIt1, class InputIt2, class OutputIt, class Compare, class Op>
1047OutputIt op_partial_merge
1048 (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, OutputIt d_first, Compare comp, Op op, bool is_stable)
1049{
1050 return is_stable ? op_partial_merge_impl(r_first1, last1, r_first2, last2, d_first, comp, op)
1051 : op_partial_merge_impl(r_first1, last1, r_first2, last2, d_first, antistable<Compare>(comp), op);
1052}
1053
1054//////////////////////////////////
1055//
1056// partial_merge_and_swap
1057//
1058//////////////////////////////////
1059template<class InputIt1, class InputIt2, class OutputIt, class Compare, class Op>
1060OutputIt op_partial_merge_and_swap_impl
1061 (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, InputIt2 &r_first_min, OutputIt d_first, Compare comp, Op op)
1062{
1063 InputIt1 first1(r_first1);
1064 InputIt2 first2(r_first2);
1065
1066 if(first2 != last2 && last1 != first1) {
1067 InputIt2 first_min(r_first_min);
1068 bool non_empty_ranges = true;
1069 do{
1070 if(comp(*first_min, *first1)) {
1071 op(three_way_t(), first2++, first_min++, d_first++);
1072 non_empty_ranges = first2 != last2;
1073 }
1074 else{
1075 op(first1++, d_first++);
1076 non_empty_ranges = first1 != last1;
1077 }
1078 } while(non_empty_ranges);
1079 r_first_min = first_min;
1080 r_first1 = first1;
1081 r_first2 = first2;
1082 }
1083 return d_first;
1084}
1085
1086template<class RandIt, class InputIt2, class OutputIt, class Compare, class Op>
1087RandIt op_partial_merge_and_swap
1088 (RandIt &r_first1, RandIt const last1, RandIt &r_first2, RandIt const last2, InputIt2 &r_first_min, OutputIt d_first, Compare comp, Op op, bool is_stable)
1089{
1090 return is_stable ? op_partial_merge_and_swap_impl(r_first1, last1, r_first2, last2, r_first_min, d_first, comp, op)
1091 : op_partial_merge_and_swap_impl(r_first1, last1, r_first2, last2, r_first_min, d_first, antistable<Compare>(comp), op);
1092}
1093
1094template<class RandIt, class RandItBuf, class Compare, class Op>
1095RandIt op_partial_merge_and_save_impl
1096 ( RandIt first1, RandIt const last1, RandIt &rfirst2, RandIt last2, RandIt first_min
1097 , RandItBuf &buf_first1_in_out, RandItBuf &buf_last1_in_out
1098 , Compare comp, Op op
1099 )
1100{
1101 RandItBuf buf_first1 = buf_first1_in_out;
1102 RandItBuf buf_last1 = buf_last1_in_out;
1103 RandIt first2(rfirst2);
1104
1105 bool const do_swap = first2 != first_min;
1106 if(buf_first1 == buf_last1){
1107 //Skip any element that does not need to be moved
1108 RandIt new_first1 = skip_until_merge(first1, last1, *first_min, comp);
1109 buf_first1 += (new_first1-first1);
1110 first1 = new_first1;
1111 buf_last1 = do_swap ? op_buffered_partial_merge_and_swap_to_range1_and_buffer(first1, last1, first2, last2, first_min, buf_first1, comp, op)
1112 : op_buffered_partial_merge_to_range1_and_buffer (first1, last1, first2, last2, buf_first1, comp, op);
1113 first1 = last1;
1114 }
1115 else{
1116 BOOST_ASSERT((last1-first1) == (buf_last1 - buf_first1));
1117 }
1118
1119 //Now merge from buffer
1120 first1 = do_swap ? op_partial_merge_and_swap_impl(buf_first1, buf_last1, first2, last2, first_min, first1, comp, op)
1121 : op_partial_merge_impl (buf_first1, buf_last1, first2, last2, first1, comp, op);
1122 buf_first1_in_out = buf_first1;
1123 buf_last1_in_out = buf_last1;
1124 rfirst2 = first2;
1125 return first1;
1126}
1127
1128template<class RandIt, class RandItBuf, class Compare, class Op>
1129RandIt op_partial_merge_and_save
1130 ( RandIt first1, RandIt const last1, RandIt &rfirst2, RandIt last2, RandIt first_min
1131 , RandItBuf &buf_first1_in_out
1132 , RandItBuf &buf_last1_in_out
1133 , Compare comp
1134 , Op op
1135 , bool is_stable)
1136{
1137 return is_stable
1138 ? op_partial_merge_and_save_impl
1139 (first1, last1, rfirst2, last2, first_min, buf_first1_in_out, buf_last1_in_out, comp, op)
1140 : op_partial_merge_and_save_impl
1141 (first1, last1, rfirst2, last2, first_min, buf_first1_in_out, buf_last1_in_out, antistable<Compare>(comp), op)
1142 ;
1143}
1144
1145
1146
1147template<class RandItKeys, class KeyCompare, class RandIt, class RandIt2, class OutputIt, class Compare, class Op>
1148OutputIt op_merge_blocks_with_irreg
1149 ( RandItKeys key_first
1150 , RandItKeys key_mid
1151 , KeyCompare key_comp
1152 , RandIt first_reg
1153 , RandIt2 &first_irr
1154 , RandIt2 const last_irr
1155 , OutputIt dest
1156 , typename iterator_traits<RandIt>::size_type const l_block
1157 , typename iterator_traits<RandIt>::size_type n_block_left
1158 , typename iterator_traits<RandIt>::size_type min_check
1159 , typename iterator_traits<RandIt>::size_type max_check
1160 , Compare comp, bool const is_stable, Op op)
1161{
1162 typedef typename iterator_traits<RandIt>::size_type size_type;
1163
1164 for(; n_block_left; --n_block_left, ++key_first, min_check -= min_check != 0, max_check -= max_check != 0){
1165 size_type next_key_idx = find_next_block(key_first, key_comp, first_reg, l_block, min_check, max_check, comp);
1166 max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left);
1167 RandIt const last_reg = first_reg + l_block;
1168 RandIt first_min = first_reg + next_key_idx*l_block;
1169 RandIt const last_min = first_min + l_block; (void)last_min;
1170
1171 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_reg, last_reg, comp));
1172 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!next_key_idx || is_sorted(first_min, last_min, comp));
1173 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((!next_key_idx || !comp(*first_reg, *first_min )));
1174
1175 OutputIt orig_dest = dest; (void)orig_dest;
1176 dest = next_key_idx ? op_partial_merge_and_swap(first_irr, last_irr, first_reg, last_reg, first_min, dest, comp, op, is_stable)
1177 : op_partial_merge (first_irr, last_irr, first_reg, last_reg, dest, comp, op, is_stable);
1178 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(orig_dest, dest, comp));
1179
1180 if(first_reg == dest){
1181 dest = next_key_idx ? ::boost::adl_move_swap_ranges(first_min, last_min, first_reg)
1182 : last_reg;
1183 }
1184 else{
1185 dest = next_key_idx ? op(three_way_forward_t(), first_reg, last_reg, first_min, dest)
1186 : op(forward_t(), first_reg, last_reg, dest);
1187 }
1188
1189 RandItKeys const key_next(key_first + next_key_idx);
1190 swap_and_update_key(next_key_idx != 0, key_next, key_first, key_mid, last_reg, last_reg, first_min);
1191
1192 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(orig_dest, dest, comp));
1193 first_reg = last_reg;
1194 }
1195 return dest;
1196}
1197
1198template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Op>
1199void op_merge_blocks_left
1200 ( RandItKeys const key_first
1201 , KeyCompare key_comp
1202 , RandIt const first
1203 , typename iterator_traits<RandIt>::size_type const l_block
1204 , typename iterator_traits<RandIt>::size_type const l_irreg1
1205 , typename iterator_traits<RandIt>::size_type const n_block_a
1206 , typename iterator_traits<RandIt>::size_type const n_block_b
1207 , typename iterator_traits<RandIt>::size_type const l_irreg2
1208 , Compare comp, Op op)
1209{
1210 typedef typename iterator_traits<RandIt>::size_type size_type;
1211 size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count;
1212// BOOST_ASSERT(n_block_a || n_block_b);
1213 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp));
1214 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a]));
1215
1216 size_type n_block_b_left = n_block_b;
1217 size_type n_block_a_left = n_block_a;
1218 size_type n_block_left = n_block_b + n_block_a;
1219 RandItKeys key_mid(key_first + n_block_a);
1220
1221 RandIt buffer = first - l_block;
1222 RandIt first1 = first;
1223 RandIt last1 = first1 + l_irreg1;
1224 RandIt first2 = last1;
1225 RandIt const irreg2 = first2 + n_block_left*l_block;
1226 bool is_range1_A = true;
1227
1228 RandItKeys key_range2(key_first);
1229
1230 ////////////////////////////////////////////////////////////////////////////
1231 //Process all regular blocks before the irregular B block
1232 ////////////////////////////////////////////////////////////////////////////
1233 size_type min_check = n_block_a == n_block_left ? 0u : n_block_a;
1234 size_type max_check = min_value(min_check+1, n_block_left);
1235 for (; n_block_left; --n_block_left, ++key_range2, min_check -= min_check != 0, max_check -= max_check != 0) {
1236 size_type const next_key_idx = find_next_block(key_range2, key_comp, first2, l_block, min_check, max_check, comp);
1237 max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left);
1238 RandIt const first_min = first2 + next_key_idx*l_block;
1239 RandIt const last_min = first_min + l_block; (void)last_min;
1240 RandIt const last2 = first2 + l_block;
1241
1242 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first1, last1, comp));
1243 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first2, last2, comp));
1244 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_left || is_sorted(first_min, last_min, comp));
1245
1246 //Check if irregular b block should go here.
1247 //If so, break to the special code handling the irregular block
1248 if (!n_block_b_left &&
1249 ( (l_irreg2 && comp(*irreg2, *first_min)) || (!l_irreg2 && is_range1_A)) ){
1250 break;
1251 }
1252
1253 RandItKeys const key_next(key_range2 + next_key_idx);
1254 bool const is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_next, *key_mid);
1255
1256 bool const is_buffer_middle = last1 == buffer;
1257 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT( ( is_buffer_middle && size_type(first2-buffer) == l_block && buffer == last1) ||
1258 (!is_buffer_middle && size_type(first1-buffer) == l_block && first2 == last1));
1259
1260 if(is_range1_A == is_range2_A){
1261 BOOST_ASSERT((first1 == last1) || !comp(*first_min, last1[-1]));
1262 if(!is_buffer_middle){
1263 buffer = op(forward_t(), first1, last1, buffer);
1264 }
1265 swap_and_update_key(next_key_idx != 0, key_next, key_range2, key_mid, first2, last2, first_min);
1266 first1 = first2;
1267 last1 = last2;
1268 }
1269 else {
1270 RandIt unmerged;
1271 RandIt buf_beg;
1272 RandIt buf_end;
1273 if(is_buffer_middle){
1274 buf_end = buf_beg = first2 - (last1-first1);
1275 unmerged = op_partial_merge_and_save( first1, last1, first2, last2, first_min
1276 , buf_beg, buf_end, comp, op, is_range1_A);
1277 }
1278 else{
1279 buf_beg = first1;
1280 buf_end = last1;
1281 unmerged = op_partial_merge_and_save
1282 (buffer, buffer+(last1-first1), first2, last2, first_min, buf_beg, buf_end, comp, op, is_range1_A);
1283 }
1284 (void)unmerged;
1285 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first-l_block, unmerged, comp));
1286
1287 swap_and_update_key( next_key_idx != 0, key_next, key_range2, key_mid, first2, last2
1288 , last_min - size_type(last2 - first2));
1289
1290 if(buf_beg != buf_end){ //range2 exhausted: is_buffer_middle for the next iteration
1291 first1 = buf_beg;
1292 last1 = buf_end;
1293 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(buf_end == (last2-l_block));
1294 buffer = last1;
1295 }
1296 else{ //range1 exhausted: !is_buffer_middle for the next iteration
1297 first1 = first2;
1298 last1 = last2;
1299 buffer = first2 - l_block;
1300 is_range1_A = is_range2_A;
1301 }
1302 }
1303 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT( (is_range2_A && n_block_a_left) || (!is_range2_A && n_block_b_left));
1304 is_range2_A ? --n_block_a_left : --n_block_b_left;
1305 first2 = last2;
1306 }
1307
1308 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_range2 + n_block_left, key_comp, *key_mid));
1309 BOOST_ASSERT(!n_block_b_left);
1310
1311 ////////////////////////////////////////////////////////////////////////////
1312 //Process remaining range 1 left before the irregular B block
1313 ////////////////////////////////////////////////////////////////////////////
1314 bool const is_buffer_middle = last1 == buffer;
1315 RandIt first_irr2 = irreg2;
1316 RandIt const last_irr2 = first_irr2 + l_irreg2;
1317 if(l_irreg2 && is_range1_A){
1318 if(is_buffer_middle){
1319 first1 = skip_until_merge(first1, last1, *first_irr2, comp);
1320 //Even if we copy backward, no overlapping occurs so use forward copy
1321 //that can be faster specially with trivial types
1322 RandIt const new_first1 = first2 - (last1 - first1);
1323 op(forward_t(), first1, last1, new_first1);
1324 first1 = new_first1;
1325 last1 = first2;
1326 buffer = first1 - l_block;
1327 }
1328 buffer = op_partial_merge_impl(first1, last1, first_irr2, last_irr2, buffer, comp, op);
1329 buffer = op(forward_t(), first1, last1, buffer);
1330 }
1331 else if(!is_buffer_middle){
1332 buffer = op(forward_t(), first1, last1, buffer);
1333 }
1334 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first-l_block, buffer, comp));
1335
1336 ////////////////////////////////////////////////////////////////////////////
1337 //Process irregular B block and remaining A blocks
1338 ////////////////////////////////////////////////////////////////////////////
1339 buffer = op_merge_blocks_with_irreg
1340 ( key_range2, key_mid, key_comp, first2, first_irr2, last_irr2
1341 , buffer, l_block, n_block_left, min_check, max_check, comp, false, op);
1342 buffer = op(forward_t(), first_irr2, last_irr2, buffer);(void)buffer;
1343 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first-l_block, buffer, comp));
1344}
1345
1346// first - first element to merge.
1347// first[-l_block, 0) - buffer (if use_buf == true)
1348// l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded
1349// keys - sequence of keys, in same order as blocks. key<midkey means stream A
1350// n_bef_irreg2/n_aft_irreg2 are regular blocks
1351// l_irreg2 is a irregular block, that is to be combined after n_bef_irreg2 blocks and before n_aft_irreg2 blocks
1352// If l_irreg2==0 then n_aft_irreg2==0 (no irregular blocks).
1353template<class RandItKeys, class KeyCompare, class RandIt, class Compare>
1354void merge_blocks_left
1355 ( RandItKeys const key_first
1356 , KeyCompare key_comp
1357 , RandIt const first
1358 , typename iterator_traits<RandIt>::size_type const l_block
1359 , typename iterator_traits<RandIt>::size_type const l_irreg1
1360 , typename iterator_traits<RandIt>::size_type const n_block_a
1361 , typename iterator_traits<RandIt>::size_type const n_block_b
1362 , typename iterator_traits<RandIt>::size_type const l_irreg2
1363 , Compare comp
1364 , bool const xbuf_used)
1365{
1366 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + needed_keys_count(n_block_a, n_block_b), key_comp, key_first[n_block_a]));
1367 if(xbuf_used){
1368 op_merge_blocks_left
1369 (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op());
1370 }
1371 else{
1372 op_merge_blocks_left
1373 (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op());
1374 }
1375}
1376
1377
1378// first - first element to merge.
1379// [first+l_block*(n_bef_irreg2+n_aft_irreg2)+l_irreg2, first+l_block*(n_bef_irreg2+n_aft_irreg2+1)+l_irreg2) - buffer
1380// l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded
1381// keys - sequence of keys, in same order as blocks. key<midkey means stream A
1382// n_bef_irreg2/n_aft_irreg2 are regular blocks
1383// l_irreg2 is a irregular block, that is to be combined after n_bef_irreg2 blocks and before n_aft_irreg2 blocks
1384// If l_irreg2==0 then n_aft_irreg2==0 (no irregular blocks).
1385template<class RandItKeys, class KeyCompare, class RandIt, class Compare>
1386void merge_blocks_right
1387 ( RandItKeys const key_first
1388 , KeyCompare key_comp
1389 , RandIt const first
1390 , typename iterator_traits<RandIt>::size_type const l_block
1391 , typename iterator_traits<RandIt>::size_type const n_block_a
1392 , typename iterator_traits<RandIt>::size_type const n_block_b
1393 , typename iterator_traits<RandIt>::size_type const l_irreg2
1394 , Compare comp
1395 , bool const xbuf_used)
1396{
1397 merge_blocks_left
1398 ( make_reverse_iterator(key_first + needed_keys_count(n_block_a, n_block_b))
1399 , inverse<KeyCompare>(key_comp)
1400 , make_reverse_iterator(first + ((n_block_a+n_block_b)*l_block+l_irreg2))
1401 , l_block
1402 , l_irreg2
1403 , n_block_b
1404 , n_block_a
1405 , 0
1406 , inverse<Compare>(comp), xbuf_used);
1407}
1408
1409template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Op, class RandItBuf>
1410void op_merge_blocks_with_buf
1411 ( RandItKeys key_first
1412 , KeyCompare key_comp
1413 , RandIt const first
1414 , typename iterator_traits<RandIt>::size_type const l_block
1415 , typename iterator_traits<RandIt>::size_type const l_irreg1
1416 , typename iterator_traits<RandIt>::size_type const n_block_a
1417 , typename iterator_traits<RandIt>::size_type const n_block_b
1418 , typename iterator_traits<RandIt>::size_type const l_irreg2
1419 , Compare comp
1420 , Op op
1421 , RandItBuf const buf_first)
1422{
1423 typedef typename iterator_traits<RandIt>::size_type size_type;
1424 size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count;
1425 //BOOST_ASSERT(n_block_a || n_block_b);
1426 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp));
1427 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a]));
1428
1429 size_type n_block_b_left = n_block_b;
1430 size_type n_block_a_left = n_block_a;
1431 size_type n_block_left = n_block_b + n_block_a;
1432 RandItKeys key_mid(key_first + n_block_a);
1433
1434 RandItBuf buffer = buf_first;
1435 RandItBuf buffer_end = buffer;
1436 RandIt first1 = first;
1437 RandIt last1 = first1 + l_irreg1;
1438 RandIt first2 = last1;
1439 RandIt const first_irr2 = first2 + n_block_left*l_block;
1440 bool is_range1_A = true;
1441
1442 RandItKeys key_range2(key_first);
1443
1444 ////////////////////////////////////////////////////////////////////////////
1445 //Process all regular blocks before the irregular B block
1446 ////////////////////////////////////////////////////////////////////////////
1447 size_type min_check = n_block_a == n_block_left ? 0u : n_block_a;
1448 size_type max_check = min_value(min_check+1, n_block_left);
1449 for (; n_block_left; --n_block_left, ++key_range2, min_check -= min_check != 0, max_check -= max_check != 0) {
1450 size_type const next_key_idx = find_next_block(key_range2, key_comp, first2, l_block, min_check, max_check, comp);
1451 max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left);
1452 RandIt first_min = first2 + next_key_idx*l_block;
1453 RandIt const last_min = first_min + l_block; (void)last_min;
1454 RandIt const last2 = first2 + l_block;
1455
1456 bool const buffer_empty = buffer == buffer_end; (void)buffer_empty;
1457 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(buffer_empty ? is_sorted(first1, last1, comp) : is_sorted(buffer, buffer_end, comp));
1458 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first2, last2, comp));
1459 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_left || is_sorted(first_min, last_min, comp));
1460
1461 //Check if irregular b block should go here.
1462 //If so, break to the special code handling the irregular block
1463 if (!n_block_b_left &&
1464 ( (l_irreg2 && comp(*first_irr2, *first_min)) || (!l_irreg2 && is_range1_A)) ){
1465 break;
1466 }
1467
1468 RandItKeys const key_next(key_range2 + next_key_idx);
1469 bool const is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_next, *key_mid);
1470
1471 if(is_range1_A == is_range2_A){
1472 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((first1 == last1) || (buffer_empty ? !comp(*first_min, last1[-1]) : !comp(*first_min, buffer_end[-1])));
1473 //If buffered, put those elements in place
1474 RandIt res = op(forward_t(), buffer, buffer_end, first1);
1475 buffer = buffer_end = buf_first;
1476 BOOST_ASSERT(buffer_empty || res == last1); (void)res;
1477 swap_and_update_key(next_key_idx != 0, key_next, key_range2, key_mid, first2, last2, first_min);
1478 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first2, last2, comp));
1479 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_min, last_min, comp));
1480 first1 = first2;
1481 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, first1, comp));
1482 }
1483 else {
1484 RandIt const unmerged = op_partial_merge_and_save(first1, last1, first2, last2, first_min, buffer, buffer_end, comp, op, is_range1_A);
1485 bool const is_range_1_empty = buffer == buffer_end;
1486 BOOST_ASSERT(is_range_1_empty || (buffer_end-buffer) == (last1+l_block-unmerged));
1487 if(is_range_1_empty){
1488 buffer = buffer_end = buf_first;
1489 first_min = last_min - (last2 - first2);
1490 }
1491 else{
1492 first_min = last_min;
1493 }
1494 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!is_range_1_empty || (last_min-first_min) == (last2-unmerged));
1495 swap_and_update_key(next_key_idx != 0, key_next, key_range2, key_mid, first2, last2, first_min);
1496
1497 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_min, last_min, comp));
1498 is_range1_A ^= is_range_1_empty;
1499 first1 = unmerged;
1500 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, unmerged, comp));
1501 }
1502 BOOST_ASSERT( (is_range2_A && n_block_a_left) || (!is_range2_A && n_block_b_left));
1503 is_range2_A ? --n_block_a_left : --n_block_b_left;
1504 last1 += l_block;
1505 first2 = last2;
1506 }
1507
1508 RandIt res = op(forward_t(), buffer, buffer_end, first1); (void)res;
1509 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, res, comp));
1510
1511 ////////////////////////////////////////////////////////////////////////////
1512 //Process irregular B block and remaining A blocks
1513 ////////////////////////////////////////////////////////////////////////////
1514 RandIt const last_irr2 = first_irr2 + l_irreg2;
1515 op(forward_t(), first_irr2, first_irr2+l_irreg2, buf_first);
1516 buffer = buf_first;
1517 buffer_end = buffer+l_irreg2;
1518
1519 reverse_iterator<RandItBuf> rbuf_beg(buffer_end);
1520 RandIt dest = op_merge_blocks_with_irreg
1521 ( make_reverse_iterator(key_first + n_block_b + n_block_a), make_reverse_iterator(key_mid), inverse<KeyCompare>(key_comp)
1522 , make_reverse_iterator(first_irr2), rbuf_beg
1523 , make_reverse_iterator(buffer), make_reverse_iterator(last_irr2)
1524 , l_block, n_block_left, 0, n_block_left
1525 , inverse<Compare>(comp), true, op).base();
1526 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(dest, last_irr2, comp));
1527
1528 buffer_end = rbuf_beg.base();
1529 BOOST_ASSERT((dest-last1) == (buffer_end-buffer));
1530 op_merge_with_left_placed(is_range1_A ? first1 : last1, last1, dest, buffer, buffer_end, comp, op);
1531
1532 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, last_irr2, comp));
1533}
1534
1535template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class RandItBuf>
1536void merge_blocks_with_buf
1537 ( RandItKeys key_first
1538 , KeyCompare key_comp
1539 , RandIt const first
1540 , typename iterator_traits<RandIt>::size_type const l_block
1541 , typename iterator_traits<RandIt>::size_type const l_irreg1
1542 , typename iterator_traits<RandIt>::size_type const n_block_a
1543 , typename iterator_traits<RandIt>::size_type const n_block_b
1544 , typename iterator_traits<RandIt>::size_type const l_irreg2
1545 , Compare comp
1546 , RandItBuf const buf_first
1547 , bool const xbuf_used)
1548{
1549 if(xbuf_used){
1550 op_merge_blocks_with_buf
1551 (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), buf_first);
1552 }
1553 else{
1554 op_merge_blocks_with_buf
1555 (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), buf_first);
1556 }
1557}
1558
1559template<class RandIt, class Compare, class Op>
1560typename iterator_traits<RandIt>::size_type
1561 op_insertion_sort_step_left
1562 ( RandIt const first
1563 , typename iterator_traits<RandIt>::size_type const length
1564 , typename iterator_traits<RandIt>::size_type const step
1565 , Compare comp, Op op)
1566{
1567 typedef typename iterator_traits<RandIt>::size_type size_type;
1568 size_type const s = min_value<size_type>(step, AdaptiveSortInsertionSortThreshold);
1569 size_type m = 0;
1570
1571 while((length - m) > s){
1572 insertion_sort_op(first+m, first+m+s, first+m-s, comp, op);
1573 m += s;
1574 }
1575 insertion_sort_op(first+m, first+length, first+m-s, comp, op);
1576 return s;
1577}
1578
1579template<class RandIt, class Compare>
1580typename iterator_traits<RandIt>::size_type
1581 insertion_sort_step
1582 ( RandIt const first
1583 , typename iterator_traits<RandIt>::size_type const length
1584 , typename iterator_traits<RandIt>::size_type const step
1585 , Compare comp)
1586{
1587 typedef typename iterator_traits<RandIt>::size_type size_type;
1588 size_type const s = min_value<size_type>(step, AdaptiveSortInsertionSortThreshold);
1589 size_type m = 0;
1590
1591 while((length - m) > s){
1592 insertion_sort(first+m, first+m+s, comp);
1593 m += s;
1594 }
1595 insertion_sort(first+m, first+length, comp);
1596 return s;
1597}
1598
1599template<class RandIt, class Compare, class Op>
1600typename iterator_traits<RandIt>::size_type
1601 op_merge_left_step_multiple
1602 ( RandIt first_block
1603 , typename iterator_traits<RandIt>::size_type const elements_in_blocks
1604 , typename iterator_traits<RandIt>::size_type l_merged
1605 , typename iterator_traits<RandIt>::size_type const l_build_buf
1606 , typename iterator_traits<RandIt>::size_type l_left_space
1607 , Compare comp
1608 , Op op)
1609{
1610 typedef typename iterator_traits<RandIt>::size_type size_type;
1611 for(; l_merged < l_build_buf && l_left_space >= l_merged; l_merged*=2){
1612 size_type p0=0;
1613 RandIt pos = first_block;
1614 while((elements_in_blocks - p0) > 2*l_merged) {
1615 op_merge_left(pos-l_merged, pos, pos+l_merged, pos+2*l_merged, comp, op);
1616 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(pos-l_merged, pos+l_merged, comp));
1617 p0 += 2*l_merged;
1618 pos = first_block+p0;
1619 }
1620 if((elements_in_blocks-p0) > l_merged) {
1621 op_merge_left(pos-l_merged, pos, pos+l_merged, first_block+elements_in_blocks, comp, op);
1622 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(pos-l_merged, pos-l_merged+(first_block+elements_in_blocks-pos), comp));
1623 }
1624 else {
1625 op(forward_t(), pos, first_block+elements_in_blocks, pos-l_merged);
1626 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(pos-l_merged, first_block+elements_in_blocks-l_merged, comp));
1627 }
1628 first_block -= l_merged;
1629 l_left_space -= l_merged;
1630 }
1631 return l_merged;
1632}
1633
1634template<class RandIt, class Compare, class Op>
1635void op_merge_right_step_once
1636 ( RandIt first_block
1637 , typename iterator_traits<RandIt>::size_type const elements_in_blocks
1638 , typename iterator_traits<RandIt>::size_type const l_build_buf
1639 , Compare comp
1640 , Op op)
1641{
1642 typedef typename iterator_traits<RandIt>::size_type size_type;
1643 size_type restk = elements_in_blocks%(2*l_build_buf);
1644 size_type p = elements_in_blocks - restk;
1645 BOOST_ASSERT(0 == (p%(2*l_build_buf)));
1646
1647 if(restk <= l_build_buf){
1648 op(backward_t(),first_block+p, first_block+p+restk, first_block+p+restk+l_build_buf);
1649 }
1650 else{
1651 op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+restk, first_block+p+restk+l_build_buf, comp, op);
1652 }
1653 while(p>0){
1654 p -= 2*l_build_buf;
1655 op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+2*l_build_buf, first_block+p+3*l_build_buf, comp, op);
1656 }
1657}
1658
1659
1660// build blocks of length 2*l_build_buf. l_build_buf is power of two
1661// input: [0, l_build_buf) elements are buffer, rest unsorted elements
1662// output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted
1663//
1664// First elements are merged from right to left until elements start
1665// at first. All old elements [first, first + l_build_buf) are placed at the end
1666// [first+len-l_build_buf, first+len). To achieve this:
1667// - If we have external memory to merge, we save elements from the buffer
1668// so that a non-swapping merge is used. Buffer elements are restored
1669// at the end of the buffer from the external memory.
1670//
1671// - When the external memory is not available or it is insufficient
1672// for a merge operation, left swap merging is used.
1673//
1674// Once elements are merged left to right in blocks of l_build_buf, then a single left
1675// to right merge step is performed to achieve merged blocks of size 2K.
1676// If external memory is available, usual merge is used, swap merging otherwise.
1677//
1678// As a last step, if auxiliary memory is available in-place merge is performed.
1679// until all is merged or auxiliary memory is not large enough.
1680template<class RandIt, class Compare, class XBuf>
1681typename iterator_traits<RandIt>::size_type
1682 adaptive_sort_build_blocks
1683 ( RandIt const first
1684 , typename iterator_traits<RandIt>::size_type const len
1685 , typename iterator_traits<RandIt>::size_type const l_base
1686 , typename iterator_traits<RandIt>::size_type const l_build_buf
1687 , XBuf & xbuf
1688 , Compare comp)
1689{
1690 typedef typename iterator_traits<RandIt>::size_type size_type;
1691 BOOST_ASSERT(l_build_buf <= len);
1692 BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1)));
1693
1694 //Place the start pointer after the buffer
1695 RandIt first_block = first + l_build_buf;
1696 size_type const elements_in_blocks = len - l_build_buf;
1697
1698 //////////////////////////////////
1699 // Start of merge to left step
1700 //////////////////////////////////
1701 size_type l_merged = 0u;
1702
1703 BOOST_ASSERT(l_build_buf);
1704 //If there is no enough buffer for the insertion sort step, just avoid the external buffer
1705 size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity()));
1706 kbuf = kbuf < l_base ? 0 : kbuf;
1707
1708 if(kbuf){
1709 //Backup internal buffer values in external buffer so they can be overwritten
1710 xbuf.move_assign(first+l_build_buf-kbuf, kbuf);
1711 l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op());
1712
1713 //Now combine them using the buffer. Elements from buffer can be
1714 //overwritten since they've been saved to xbuf
1715 l_merged = op_merge_left_step_multiple
1716 ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op());
1717
1718 //Restore internal buffer from external buffer unless kbuf was l_build_buf,
1719 //in that case restoration will happen later
1720 if(kbuf != l_build_buf){
1721 boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks);
1722 }
1723 }
1724 else{
1725 l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp);
1726 rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks);
1727 }
1728
1729 //Now combine elements using the buffer. Elements from buffer can't be
1730 //overwritten since xbuf was not big enough, so merge swapping elements.
1731 l_merged = op_merge_left_step_multiple
1732 (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op());
1733
1734 BOOST_ASSERT(l_merged == l_build_buf);
1735
1736 //////////////////////////////////
1737 // Start of merge to right step
1738 //////////////////////////////////
1739
1740 //If kbuf is l_build_buf then we can merge right without swapping
1741 //Saved data is still in xbuf
1742 if(kbuf && kbuf == l_build_buf){
1743 op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op());
1744 //Restore internal buffer from external buffer if kbuf was l_build_buf.
1745 //as this operation was previously delayed.
1746 boost::move(xbuf.data(), xbuf.data() + kbuf, first);
1747 }
1748 else{
1749 op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op());
1750 }
1751 xbuf.clear();
1752 //2*l_build_buf or total already merged
1753 return min_value(elements_in_blocks, 2*l_build_buf);
1754}
1755
1756template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf>
1757void adaptive_sort_combine_blocks
1758 ( RandItKeys const keys
1759 , KeyCompare key_comp
1760 , RandIt const first
1761 , typename iterator_traits<RandIt>::size_type const len
1762 , typename iterator_traits<RandIt>::size_type const l_prev_merged
1763 , typename iterator_traits<RandIt>::size_type const l_block
1764 , bool const use_buf
1765 , bool const xbuf_used
1766 , XBuf & xbuf
1767 , Compare comp
1768 , bool merge_left)
1769{
1770 (void)xbuf;
1771 typedef typename iterator_traits<RandIt>::size_type size_type;
1772
1773 size_type const l_reg_combined = 2*l_prev_merged;
1774 size_type l_irreg_combined = 0;
1775 size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined);
1776 size_type const n_reg_combined = len/l_reg_combined;
1777 RandIt combined_first = first;
1778
1779 (void)l_total_combined;
1780 BOOST_ASSERT(l_total_combined <= len);
1781
1782 size_type const max_i = n_reg_combined + (l_irreg_combined != 0);
1783
1784 if(merge_left || !use_buf) {
1785 for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_reg_combined) {
1786 //Now merge blocks
1787 bool const is_last = combined_i==n_reg_combined;
1788 size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
1789
1790 range_xbuf<RandIt, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first);
1791 size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
1792 combine_params( keys, key_comp, l_cur_combined
1793 , l_prev_merged, l_block, rbuf
1794 , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
1795 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combpar: ", len + l_block);
1796 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
1797 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
1798 if(!use_buf){
1799 merge_blocks_bufferless
1800 (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp);
1801 }
1802 else{
1803 merge_blocks_left
1804 (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
1805 }
1806 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After merge_blocks_l: ", len + l_block);
1807 }
1808 }
1809 else{
1810 combined_first += l_reg_combined*(max_i-1);
1811 for( size_type combined_i = max_i; combined_i--; combined_first -= l_reg_combined) {
1812 bool const is_last = combined_i==n_reg_combined;
1813 size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
1814
1815 RandIt const combined_last(combined_first+l_cur_combined);
1816 range_xbuf<RandIt, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last);
1817 size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
1818 combine_params( keys, key_comp, l_cur_combined
1819 , l_prev_merged, l_block, rbuf
1820 , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
1821 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combpar: ", len + l_block);
1822 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
1823 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
1824 merge_blocks_right
1825 (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
1826 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After merge_blocks_r: ", len + l_block);
1827 }
1828 }
1829}
1830
1831//Returns true if buffer is placed in
1832//[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is
1833//[buffer,buffer+l_intbuf)
1834template<class RandIt, class Compare, class XBuf>
1835bool adaptive_sort_combine_all_blocks
1836 ( RandIt keys
1837 , typename iterator_traits<RandIt>::size_type &n_keys
1838 , RandIt const buffer
1839 , typename iterator_traits<RandIt>::size_type const l_buf_plus_data
1840 , typename iterator_traits<RandIt>::size_type l_merged
1841 , typename iterator_traits<RandIt>::size_type &l_intbuf
1842 , XBuf & xbuf
1843 , Compare comp)
1844{
1845 typedef typename iterator_traits<RandIt>::size_type size_type;
1846 RandIt const first = buffer + l_intbuf;
1847 size_type const l_data = l_buf_plus_data - l_intbuf;
1848 size_type const l_unique = l_intbuf+n_keys;
1849 //Backup data to external buffer once if possible
1850 bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity();
1851 if(common_xbuf){
1852 xbuf.move_assign(buffer, l_intbuf);
1853 }
1854
1855 bool prev_merge_left = true;
1856 size_type l_prev_total_combined = l_merged, l_prev_block = 0;
1857 bool prev_use_internal_buf = true;
1858
1859 for( size_type n = 0; l_data > l_merged
1860 ; l_merged*=2
1861 , ++n){
1862 //If l_intbuf is non-zero, use that internal buffer.
1863 // Implies l_block == l_intbuf && use_internal_buf == true
1864 //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer,
1865 // Implies l_block == n_keys/2 && use_internal_buf == true
1866 //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false)
1867 bool use_internal_buf = false;
1868 size_type const l_block = lblock_for_combine(l_intbuf, n_keys, 2*l_merged, use_internal_buf);
1869 BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf));
1870 BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) );
1871 BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) );
1872
1873 bool const is_merge_left = (n&1) == 0;
1874 size_type const l_total_combined = calculate_total_combined(l_data, l_merged);
1875 if(n && prev_use_internal_buf && prev_merge_left){
1876 if(is_merge_left || !use_internal_buf){
1877 move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf);
1878 }
1879 else{
1880 //Put the buffer just after l_total_combined
1881 RandIt const buf_end = first+l_prev_total_combined;
1882 RandIt const buf_beg = buf_end-l_block;
1883 if(l_prev_total_combined > l_total_combined){
1884 size_type const l_diff = l_prev_total_combined - l_total_combined;
1885 move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf);
1886 }
1887 else if(l_prev_total_combined < l_total_combined){
1888 size_type const l_diff = l_total_combined - l_prev_total_combined;
1889 move_data_forward(buf_end, l_diff, buf_beg, common_xbuf);
1890 }
1891 }
1892 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After move_data : ", l_data + l_intbuf);
1893 }
1894
1895 //Combine to form l_merged*2 segments
1896 if(n_keys){
1897 adaptive_sort_combine_blocks
1898 ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block
1899 , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
1900 }
1901 else{
1902 size_type *const uint_keys = xbuf.template aligned_trailing<size_type>();
1903 adaptive_sort_combine_blocks
1904 ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
1905 , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
1906 }
1907 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After combine_blocks: ", l_data + l_intbuf);
1908 prev_merge_left = is_merge_left;
1909 l_prev_total_combined = l_total_combined;
1910 l_prev_block = l_block;
1911 prev_use_internal_buf = use_internal_buf;
1912 }
1913 BOOST_ASSERT(l_prev_total_combined == l_data);
1914 bool const buffer_right = prev_use_internal_buf && prev_merge_left;
1915
1916 l_intbuf = prev_use_internal_buf ? l_prev_block : 0u;
1917 n_keys = l_unique - l_intbuf;
1918 //Restore data from to external common buffer if used
1919 if(common_xbuf){
1920 if(buffer_right){
1921 boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data);
1922 }
1923 else{
1924 boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer);
1925 }
1926 }
1927 return buffer_right;
1928}
1929
1930template<class RandIt, class Compare, class XBuf>
1931void stable_merge
1932 ( RandIt first, RandIt const middle, RandIt last
1933 , Compare comp
1934 , XBuf &xbuf)
1935{
1936 BOOST_ASSERT(xbuf.empty());
1937 typedef typename iterator_traits<RandIt>::size_type size_type;
1938 size_type const len1 = size_type(middle-first);
1939 size_type const len2 = size_type(last-middle);
1940 size_type const l_min = min_value(len1, len2);
1941 if(xbuf.capacity() >= l_min){
1942 buffered_merge(first, middle, last, comp, xbuf);
1943 xbuf.clear();
1944 }
1945 else{
1946 merge_bufferless(first, middle, last, comp);
1947 }
1948}
1949
1950
1951template<class RandIt, class Compare, class XBuf>
1952void adaptive_sort_final_merge( bool buffer_right
1953 , RandIt const first
1954 , typename iterator_traits<RandIt>::size_type const l_intbuf
1955 , typename iterator_traits<RandIt>::size_type const n_keys
1956 , typename iterator_traits<RandIt>::size_type const len
1957 , XBuf & xbuf
1958 , Compare comp)
1959{
1960 //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf);
1961 xbuf.clear();
1962
1963 typedef typename iterator_traits<RandIt>::size_type size_type;
1964 size_type const n_key_plus_buf = l_intbuf+n_keys;
1965 if(buffer_right){
1966 stable_sort(first+len-l_intbuf, first+len, comp, xbuf);
1967 stable_merge(first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf);
1968 stable_sort(first, first+n_keys, comp, xbuf);
1969 stable_merge(first, first+n_keys, first+len, comp, xbuf);
1970 }
1971 else{
1972 stable_sort(first, first+n_key_plus_buf, comp, xbuf);
1973 if(xbuf.capacity() >= n_key_plus_buf){
1974 buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
1975 }
1976 else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){
1977 stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf);
1978 stable_merge(first, first+n_keys, first+len, comp, xbuf);
1979 }
1980 else{
1981 merge_bufferless(first, first+n_key_plus_buf, first+len, comp);
1982 }
1983 }
1984 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After final_merge : ", len);
1985}
1986
1987template<class RandIt, class Compare, class Unsigned, class XBuf>
1988bool adaptive_sort_build_params
1989 (RandIt first, Unsigned const len, Compare comp
1990 , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf
1991 , XBuf & xbuf
1992 )
1993{
1994 typedef Unsigned size_type;
1995
1996 //Calculate ideal parameters and try to collect needed unique keys
1997 l_base = 0u;
1998
1999 //Try to find a value near sqrt(len) that is 2^N*l_base where
2000 //l_base <= AdaptiveSortInsertionSortThreshold. This property is important
2001 //as build_blocks merges to the left iteratively duplicating the
2002 //merged size and all the buffer must be used just before the final
2003 //merge to right step. This guarantees "build_blocks" produces
2004 //segments of size l_build_buf*2, maximizing the classic merge phase.
2005 l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base));
2006
2007 //The internal buffer can be expanded if there is enough external memory
2008 while(xbuf.capacity() >= l_intbuf*2){
2009 l_intbuf *= 2;
2010 }
2011
2012 //This is the minimum number of keys to implement the ideal algorithm
2013 //
2014 //l_intbuf is used as buffer plus the key count
2015 size_type n_min_ideal_keys = l_intbuf-1;
2016 while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){
2017 --n_min_ideal_keys;
2018 }
2019 n_min_ideal_keys += 1;
2020 BOOST_ASSERT(n_min_ideal_keys <= l_intbuf);
2021
2022 if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){
2023 n_keys = 0u;
2024 l_build_buf = l_intbuf;
2025 }
2026 else{
2027 //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that
2028 //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half
2029 //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin.
2030 //
2031 //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed,
2032 //(to be used for keys in combine_all_blocks) as the whole l_build_buf
2033 //will be backuped in the buffer during build_blocks.
2034 bool const non_unique_buf = xbuf.capacity() >= l_intbuf;
2035 size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2;
2036 size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf);
2037
2038 //If available memory is 2*sqrt(l), then for "build_params"
2039 //the situation is the same as if 2*l_intbuf were collected.
2040 if(non_unique_buf && collected == n_min_ideal_keys){
2041 l_build_buf = l_intbuf;
2042 n_keys = n_min_ideal_keys;
2043 }
2044 else if(collected == 2*l_intbuf){
2045 //l_intbuf*2 elements found. Use all of them in the build phase
2046 l_build_buf = l_intbuf*2;
2047 n_keys = l_intbuf;
2048 }
2049 else if(collected == (n_min_ideal_keys+l_intbuf)){
2050 l_build_buf = l_intbuf;
2051 n_keys = n_min_ideal_keys;
2052 }
2053 //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix
2054 //is possible (due to very low unique keys), then go to a slow sort based on rotations.
2055 else{
2056 BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf));
2057 if(collected < 4){ //No combination possible with less that 4 keys
2058 return false;
2059 }
2060 n_keys = l_intbuf;
2061 while(n_keys&(n_keys-1)){
2062 n_keys &= n_keys-1; // make it power or 2
2063 }
2064 while(n_keys > collected){
2065 n_keys/=2;
2066 }
2067 //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two
2068 l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold);
2069 l_intbuf = 0;
2070 l_build_buf = n_keys;
2071 }
2072 BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf);
2073 }
2074
2075 return true;
2076}
2077
2078template<class RandIt, class Compare, class XBuf>
2079inline void adaptive_merge_combine_blocks( RandIt first
2080 , typename iterator_traits<RandIt>::size_type len1
2081 , typename iterator_traits<RandIt>::size_type len2
2082 , typename iterator_traits<RandIt>::size_type collected
2083 , typename iterator_traits<RandIt>::size_type n_keys
2084 , typename iterator_traits<RandIt>::size_type l_block
2085 , bool use_internal_buf
2086 , bool xbuf_used
2087 , Compare comp
2088 , XBuf & xbuf
2089 )
2090{
2091 typedef typename iterator_traits<RandIt>::size_type size_type;
2092 size_type const len = len1+len2;
2093 size_type const l_combine = len-collected;
2094 size_type const l_combine1 = len1-collected;
2095
2096 if(n_keys){
2097 RandIt const first_data = first+collected;
2098 RandIt const keys = first;
2099 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len);
2100 if(xbuf_used){
2101 if(xbuf.size() < l_block){
2102 xbuf.initialize_until(l_block, *first);
2103 }
2104 BOOST_ASSERT(xbuf.size() >= l_block);
2105 size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
2106 combine_params( keys, comp, l_combine
2107 , l_combine1, l_block, xbuf
2108 , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
2109 merge_blocks_with_buf
2110 (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, xbuf.data(), xbuf_used);
2111 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg xbf: ", len);
2112 }
2113 else{
2114 size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
2115 combine_params( keys, comp, l_combine
2116 , l_combine1, l_block, xbuf
2117 , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
2118 if(use_internal_buf){
2119 merge_blocks_with_buf
2120 (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, first_data-l_block, xbuf_used);
2121 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg buf: ", len);
2122 }
2123 else{
2124 merge_blocks_bufferless
2125 (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp);
2126 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg nbf: ", len);
2127 }
2128 }
2129 }
2130 else{
2131 xbuf.shrink_to_fit(l_block);
2132 if(xbuf.size() < l_block){
2133 xbuf.initialize_until(l_block, *first);
2134 }
2135 size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block);
2136 size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
2137 combine_params( uint_keys, less(), l_combine
2138 , l_combine1, l_block, xbuf
2139 , n_block_a, n_block_b, l_irreg1, l_irreg2, true); //Outputs
2140 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A combine: ", len);
2141 BOOST_ASSERT(xbuf.size() >= l_block);
2142 merge_blocks_with_buf
2143 (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, xbuf.data(), true);
2144 xbuf.clear();
2145 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A mrg buf: ", len);
2146 }
2147}
2148
2149template<class RandIt, class Compare, class XBuf>
2150inline void adaptive_merge_final_merge( RandIt first
2151 , typename iterator_traits<RandIt>::size_type len1
2152 , typename iterator_traits<RandIt>::size_type len2
2153 , typename iterator_traits<RandIt>::size_type collected
2154 , typename iterator_traits<RandIt>::size_type l_intbuf
2155 , typename iterator_traits<RandIt>::size_type l_block
2156 , bool use_internal_buf
2157 , bool xbuf_used
2158 , Compare comp
2159 , XBuf & xbuf
2160 )
2161{
2162 typedef typename iterator_traits<RandIt>::size_type size_type;
2163 (void)l_block;
2164 size_type n_keys = collected-l_intbuf;
2165 size_type len = len1+len2;
2166 if(use_internal_buf){
2167 if(xbuf_used){
2168 xbuf.clear();
2169 //Nothing to do
2170 if(n_keys){
2171 stable_sort(first, first+n_keys, comp, xbuf);
2172 stable_merge(first, first+n_keys, first+len, comp, xbuf);
2173 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A key mrg: ", len);
2174 }
2175 }
2176 else{
2177 xbuf.clear();
2178 stable_sort(first, first+collected, comp, xbuf);
2179 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b srt: ", len);
2180 stable_merge(first, first+collected, first+len, comp, xbuf);
2181 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b mrg: ", len);
2182 }
2183 }
2184 else{
2185 xbuf.clear();
2186 stable_sort(first, first+collected, comp, xbuf);
2187 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b srt: ", len);
2188 stable_merge(first, first+collected, first+len1+len2, comp, xbuf);
2189 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" A k/b mrg: ", len);
2190 }
2191}
2192
2193template<class SizeType, class Xbuf>
2194inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout)
2195{
2196 typedef SizeType size_type;
2197 size_type l_block = rl_block;
2198 size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block;
2199
2200 while(xbuf.capacity() >= l_block*2){
2201 l_block *= 2;
2202 }
2203
2204 //This is the minimum number of keys to implement the ideal algorithm
2205 size_type n_keys = len1/l_block+len2/l_block;
2206 while(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)){
2207 --n_keys;
2208 }
2209 ++n_keys;
2210 BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block));
2211
2212 if(xbuf.template supports_aligned_trailing<size_type>(l_block, n_keys)){
2213 n_keys = 0u;
2214 }
2215 l_intbuf_inout = l_intbuf;
2216 rl_block = l_block;
2217 return n_keys;
2218}
2219
2220///////////////////////////////////////////////////////////////////////////////////////////
2221///////////////////////////////////////////////////////////////////////////////////////////
2222///////////////////////////////////////////////////////////////////////////////////////////
2223///////////////////////////////////////////////////////////////////////////////////////////
2224///////////////////////////////////////////////////////////////////////////////////////////
2225///////////////////////////////////////////////////////////////////////////////////////////
2226///////////////////////////////////////////////////////////////////////////////////////////
2227
2228// Main explanation of the sort algorithm.
2229//
2230// csqrtlen = ceil(sqrt(len));
2231//
2232// * First, 2*csqrtlen unique elements elements are extracted from elements to be
2233// sorted and placed in the beginning of the range.
2234//
2235// * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements
2236// will be used as auxiliary memory, so trailing len-2*csqrtlen elements are
2237// are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step
2238// 2*csqrtlen unique elements are again the leading elements of the whole range.
2239//
2240// * Step "combine_blocks": pairs of previously formed blocks are merged with a different
2241// ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the
2242// "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen
2243// elements, etc) of until all trailing (len-2*csqrtlen) elements are merged.
2244//
2245// In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to
2246// know if elements belong to the first or second block to be merged and another
2247// leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step:
2248//
2249// Iteratively until all trailing (len-2*csqrtlen) elements are merged:
2250// Iteratively for each pair of previously merged block:
2251// * Blocks are divided groups of csqrtlen elements and
2252// 2*merged_block/csqrtlen keys are sorted to be used as markers
2253// * Groups are selection-sorted by first or last element (depending wheter they
2254// merged to left or right) and keys are reordered accordingly as an imitation-buffer.
2255// * Elements of each block pair are merged using the csqrtlen buffer taking into account
2256// if they belong to the first half or second half (marked by the key).
2257//
2258// * In the final merge step leading elements (2*csqrtlen) are sorted and merged with
2259// rotations with the rest of sorted elements in the "combine_blocks" step.
2260//
2261// Corner cases:
2262//
2263// * If no 2*csqrtlen elements can be extracted:
2264//
2265// * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used
2266// as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This
2267// means that an additional "combine_blocks" step will be needed to merge all elements.
2268//
2269// * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum,
2270// then reduces the number of elements used as buffer and keys in the "build_blocks"
2271// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
2272// then uses a rotation based smart merge.
2273//
2274// * If the minimum number of keys can't be extracted, a rotation-based sorting is performed.
2275//
2276// * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used.
2277//
2278// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
2279// then only csqrtlen elements need to be extracted and "combine_blocks" will use integral
2280// keys to combine blocks.
2281//
2282// * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks
2283// using classic merge.
2284template<class RandIt, class Compare, class XBuf>
2285void adaptive_sort_impl
2286 ( RandIt first
2287 , typename iterator_traits<RandIt>::size_type const len
2288 , Compare comp
2289 , XBuf & xbuf
2290 )
2291{
2292 typedef typename iterator_traits<RandIt>::size_type size_type;
2293
2294 //Small sorts go directly to insertion sort
2295 if(len <= size_type(AdaptiveSortInsertionSortThreshold)){
2296 insertion_sort(first, first + len, comp);
2297 return;
2298 }
2299
2300 if((len-len/2) <= xbuf.capacity()){
2301 merge_sort(first, first+len, comp, xbuf.data());
2302 return;
2303 }
2304
2305 //Make sure it is at least four
2306 BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4);
2307
2308 size_type l_base = 0;
2309 size_type l_intbuf = 0;
2310 size_type n_keys = 0;
2311 size_type l_build_buf = 0;
2312
2313 //Calculate and extract needed unique elements. If a minimum is not achieved
2314 //fallback to rotation-based merge
2315 if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){
2316 stable_sort(first, first+len, comp, xbuf);
2317 return;
2318 }
2319 BOOST_ASSERT(l_build_buf);
2320 //Otherwise, continue the adaptive_sort
2321 BOOST_MOVE_ADAPTIVE_SORT_PRINT("\n After collect_unique: ", len);
2322 size_type const n_key_plus_buf = l_intbuf+n_keys;
2323 //l_build_buf is always power of two if l_intbuf is zero
2324 BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1))));
2325
2326 //Classic merge sort until internal buffer and xbuf are exhausted
2327 size_type const l_merged = adaptive_sort_build_blocks
2328 (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp);
2329 BOOST_MOVE_ADAPTIVE_SORT_PRINT(" After build_blocks: ", len);
2330
2331 //Non-trivial merge
2332 bool const buffer_right = adaptive_sort_combine_all_blocks
2333 (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp);
2334
2335 //Sort keys and buffer and merge the whole sequence
2336 adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp);
2337}
2338
2339// Main explanation of the merge algorithm.
2340//
2341// csqrtlen = ceil(sqrt(len));
2342//
2343// * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect
2344// unique elements are extracted from elements to be sorted and placed in the beginning of the range.
2345//
2346// * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements
2347// are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements.
2348//
2349// Explanation of the "combine_blocks" step:
2350//
2351// * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements.
2352// Remaining elements that can't form a group are grouped in front of those elements.
2353// * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements.
2354// Remaining elements that can't form a group are grouped in the back of those elements.
2355// * In parallel the following two steps are performed:
2356// * Groups are selection-sorted by first or last element (depending wheter they
2357// merged to left or right) and keys are reordered accordingly as an imitation-buffer.
2358// * Elements of each block pair are merged using the csqrtlen buffer taking into account
2359// if they belong to the first half or second half (marked by the key).
2360//
2361// * In the final merge step leading "to_collect" elements are merged with rotations
2362// with the rest of merged elements in the "combine_blocks" step.
2363//
2364// Corner cases:
2365//
2366// * If no "to_collect" elements can be extracted:
2367//
2368// * If more than a minimum number of elements is extracted
2369// then reduces the number of elements used as buffer and keys in the
2370// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
2371// then uses a rotation based smart merge.
2372//
2373// * If the minimum number of keys can't be extracted, a rotation-based merge is performed.
2374//
2375// * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed.
2376//
2377// * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed.
2378//
2379// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
2380// then no csqrtlen need to be extracted and "combine_blocks" will use integral
2381// keys to combine blocks.
2382template<class RandIt, class Compare, class XBuf>
2383void adaptive_merge_impl
2384 ( RandIt first
2385 , typename iterator_traits<RandIt>::size_type const len1
2386 , typename iterator_traits<RandIt>::size_type const len2
2387 , Compare comp
2388 , XBuf & xbuf
2389 )
2390{
2391 typedef typename iterator_traits<RandIt>::size_type size_type;
2392
2393 if(xbuf.capacity() >= min_value<size_type>(len1, len2)){
2394 buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf);
2395 }
2396 else{
2397 const size_type len = len1+len2;
2398 //Calculate ideal parameters and try to collect needed unique keys
2399 size_type l_block = size_type(ceil_sqrt(len));
2400
2401 //One range is not big enough to extract keys and the internal buffer so a
2402 //rotation-based based merge will do just fine
2403 if(len1 <= l_block*2 || len2 <= l_block*2){
2404 merge_bufferless(first, first+len1, first+len1+len2, comp);
2405 return;
2406 }
2407
2408 //Detail the number of keys and internal buffer. If xbuf has enough memory, no
2409 //internal buffer is needed so l_intbuf will remain 0.
2410 size_type l_intbuf = 0;
2411 size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf);
2412 size_type const to_collect = l_intbuf+n_keys;
2413 //Try to extract needed unique values from the first range
2414 size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf);
2415 BOOST_MOVE_ADAPTIVE_SORT_PRINT("\n A collect: ", len);
2416
2417 //Not the minimum number of keys is not available on the first range, so fallback to rotations
2418 if(collected != to_collect && collected < 4){
2419 merge_bufferless(first, first+len1, first+len1+len2, comp);
2420 return;
2421 }
2422
2423 //If not enough keys but more than minimum, adjust the internal buffer and key count
2424 bool use_internal_buf = collected == to_collect;
2425 if (!use_internal_buf){
2426 l_intbuf = 0u;
2427 n_keys = collected;
2428 l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf);
2429 //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used
2430 l_intbuf = use_internal_buf ? l_block : 0u;
2431 }
2432
2433 bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block;
2434 //Merge trailing elements using smart merges
2435 adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf);
2436 //Merge buffer and keys with the rest of the values
2437 adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf);
2438 }
2439}
2440
2441
2442} //namespace detail_adaptive {
2443} //namespace movelib {
2444} //namespace boost {
2445
2446#include <boost/move/detail/config_end.hpp>
2447
2448#endif //#define BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP
2449