1 | /* |
2 | * Taken from https://github.com/swenson/sort |
3 | * Revision: 05fd77bfec049ce8b7c408c4d3dd2d51ee061a15 |
4 | * Removed all code unrelated to Timsort and made minor adjustments for |
5 | * cross-platform compatibility. |
6 | */ |
7 | |
8 | /* |
9 | * The MIT License (MIT) |
10 | * |
11 | * Copyright (c) 2010-2017 Christopher Swenson. |
12 | * Copyright (c) 2012 Vojtech Fried. |
13 | * Copyright (c) 2012 Google Inc. All Rights Reserved. |
14 | * |
15 | * Permission is hereby granted, free of charge, to any person obtaining a |
16 | * copy of this software and associated documentation files (the "Software"), |
17 | * to deal in the Software without restriction, including without limitation |
18 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
19 | * and/or sell copies of the Software, and to permit persons to whom the |
20 | * Software is furnished to do so, subject to the following conditions: |
21 | * |
22 | * The above copyright notice and this permission notice shall be included in |
23 | * all copies or substantial portions of the Software. |
24 | * |
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
26 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
27 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
28 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
29 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
31 | * DEALINGS IN THE SOFTWARE. |
32 | */ |
33 | |
34 | #include <stdlib.h> |
35 | #include <stdio.h> |
36 | #include <string.h> |
37 | #ifdef HAVE_STDINT_H |
38 | #include <stdint.h> |
39 | #elif defined(_WIN32) |
40 | typedef unsigned __int64 uint64_t; |
41 | #endif |
42 | |
43 | #ifndef SORT_NAME |
44 | #error "Must declare SORT_NAME" |
45 | #endif |
46 | |
47 | #ifndef SORT_TYPE |
48 | #error "Must declare SORT_TYPE" |
49 | #endif |
50 | |
51 | #ifndef SORT_CMP |
52 | #define SORT_CMP(x, y) ((x) < (y) ? -1 : ((x) == (y) ? 0 : 1)) |
53 | #endif |
54 | |
55 | #ifndef TIM_SORT_STACK_SIZE |
56 | #define TIM_SORT_STACK_SIZE 128 |
57 | #endif |
58 | |
59 | #define SORT_SWAP(x,y) {SORT_TYPE __SORT_SWAP_t = (x); (x) = (y); (y) = __SORT_SWAP_t;} |
60 | |
61 | |
62 | /* Common, type-agnosting functions and constants that we don't want to declare twice. */ |
63 | #ifndef SORT_COMMON_H |
64 | #define SORT_COMMON_H |
65 | |
66 | #ifndef MAX |
67 | #define MAX(x,y) (((x) > (y) ? (x) : (y))) |
68 | #endif |
69 | |
70 | #ifndef MIN |
71 | #define MIN(x,y) (((x) < (y) ? (x) : (y))) |
72 | #endif |
73 | |
74 | static int compute_minrun(const uint64_t); |
75 | |
76 | #ifndef CLZ |
77 | #ifdef __GNUC__ |
78 | #define CLZ __builtin_clzll |
79 | #else |
80 | |
81 | static int clzll(uint64_t); |
82 | |
83 | /* adapted from Hacker's Delight */ |
84 | static int clzll(uint64_t x) { |
85 | int n; |
86 | |
87 | if (x == 0) { |
88 | return 64; |
89 | } |
90 | |
91 | n = 0; |
92 | |
93 | if (x <= 0x00000000FFFFFFFFL) { |
94 | n = n + 32; |
95 | x = x << 32; |
96 | } |
97 | |
98 | if (x <= 0x0000FFFFFFFFFFFFL) { |
99 | n = n + 16; |
100 | x = x << 16; |
101 | } |
102 | |
103 | if (x <= 0x00FFFFFFFFFFFFFFL) { |
104 | n = n + 8; |
105 | x = x << 8; |
106 | } |
107 | |
108 | if (x <= 0x0FFFFFFFFFFFFFFFL) { |
109 | n = n + 4; |
110 | x = x << 4; |
111 | } |
112 | |
113 | if (x <= 0x3FFFFFFFFFFFFFFFL) { |
114 | n = n + 2; |
115 | x = x << 2; |
116 | } |
117 | |
118 | if (x <= 0x7FFFFFFFFFFFFFFFL) { |
119 | n = n + 1; |
120 | } |
121 | |
122 | return n; |
123 | } |
124 | |
125 | #define CLZ clzll |
126 | #endif |
127 | #endif |
128 | |
129 | static __inline int compute_minrun(const uint64_t size) { |
130 | const int top_bit = 64 - CLZ(size); |
131 | const int shift = MAX(top_bit, 6) - 6; |
132 | const int minrun = size >> shift; |
133 | const uint64_t mask = (1ULL << shift) - 1; |
134 | |
135 | if (mask & size) { |
136 | return minrun + 1; |
137 | } |
138 | |
139 | return minrun; |
140 | } |
141 | |
142 | #endif /* SORT_COMMON_H */ |
143 | |
144 | #define SORT_CONCAT(x, y) x ## _ ## y |
145 | #define SORT_MAKE_STR1(x, y) SORT_CONCAT(x,y) |
146 | #define SORT_MAKE_STR(x) SORT_MAKE_STR1(SORT_NAME,x) |
147 | |
148 | #define BINARY_INSERTION_FIND SORT_MAKE_STR(binary_insertion_find) |
149 | #define BINARY_INSERTION_SORT_START SORT_MAKE_STR(binary_insertion_sort_start) |
150 | #define BINARY_INSERTION_SORT SORT_MAKE_STR(binary_insertion_sort) |
151 | #define REVERSE_ELEMENTS SORT_MAKE_STR(reverse_elements) |
152 | #define COUNT_RUN SORT_MAKE_STR(count_run) |
153 | #define CHECK_INVARIANT SORT_MAKE_STR(check_invariant) |
154 | #define TIM_SORT SORT_MAKE_STR(tim_sort) |
155 | #define TIM_SORT_RESIZE SORT_MAKE_STR(tim_sort_resize) |
156 | #define TIM_SORT_MERGE SORT_MAKE_STR(tim_sort_merge) |
157 | #define TIM_SORT_COLLAPSE SORT_MAKE_STR(tim_sort_collapse) |
158 | |
159 | #ifndef MAX |
160 | #define MAX(x,y) (((x) > (y) ? (x) : (y))) |
161 | #endif |
162 | #ifndef MIN |
163 | #define MIN(x,y) (((x) < (y) ? (x) : (y))) |
164 | #endif |
165 | |
166 | typedef struct { |
167 | size_t start; |
168 | size_t length; |
169 | } TIM_SORT_RUN_T; |
170 | |
171 | |
172 | void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size); |
173 | void TIM_SORT(SORT_TYPE *dst, const size_t size); |
174 | |
175 | |
176 | /* Function used to do a binary search for binary insertion sort */ |
177 | static __inline size_t BINARY_INSERTION_FIND(SORT_TYPE *dst, const SORT_TYPE x, |
178 | const size_t size) { |
179 | size_t l, c, r; |
180 | SORT_TYPE cx; |
181 | l = 0; |
182 | r = size - 1; |
183 | c = r >> 1; |
184 | |
185 | /* check for out of bounds at the beginning. */ |
186 | if (SORT_CMP(x, dst[0]) < 0) { |
187 | return 0; |
188 | } else if (SORT_CMP(x, dst[r]) > 0) { |
189 | return r; |
190 | } |
191 | |
192 | cx = dst[c]; |
193 | |
194 | while (1) { |
195 | const int val = SORT_CMP(x, cx); |
196 | |
197 | if (val < 0) { |
198 | if (c - l <= 1) { |
199 | return c; |
200 | } |
201 | |
202 | r = c; |
203 | } else { /* allow = for stability. The binary search favors the right. */ |
204 | if (r - c <= 1) { |
205 | return c + 1; |
206 | } |
207 | |
208 | l = c; |
209 | } |
210 | |
211 | c = l + ((r - l) >> 1); |
212 | cx = dst[c]; |
213 | } |
214 | } |
215 | |
216 | /* Binary insertion sort, but knowing that the first "start" entries are sorted. Used in timsort. */ |
217 | static void BINARY_INSERTION_SORT_START(SORT_TYPE *dst, const size_t start, const size_t size) { |
218 | size_t i; |
219 | |
220 | for (i = start; i < size; i++) { |
221 | size_t j; |
222 | SORT_TYPE x; |
223 | size_t location; |
224 | |
225 | /* If this entry is already correct, just move along */ |
226 | if (SORT_CMP(dst[i - 1], dst[i]) <= 0) { |
227 | continue; |
228 | } |
229 | |
230 | /* Else we need to find the right place, shift everything over, and squeeze in */ |
231 | x = dst[i]; |
232 | location = BINARY_INSERTION_FIND(dst, x, i); |
233 | |
234 | for (j = i - 1; j >= location; j--) { |
235 | dst[j + 1] = dst[j]; |
236 | |
237 | if (j == 0) { /* check edge case because j is unsigned */ |
238 | break; |
239 | } |
240 | } |
241 | |
242 | dst[location] = x; |
243 | } |
244 | } |
245 | |
246 | /* Binary insertion sort */ |
247 | void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size) { |
248 | /* don't bother sorting an array of size <= 1 */ |
249 | if (size <= 1) { |
250 | return; |
251 | } |
252 | |
253 | BINARY_INSERTION_SORT_START(dst, 1, size); |
254 | } |
255 | |
256 | /* timsort implementation, based on timsort.txt */ |
257 | |
258 | static __inline void REVERSE_ELEMENTS(SORT_TYPE *dst, size_t start, size_t end) { |
259 | while (1) { |
260 | if (start >= end) { |
261 | return; |
262 | } |
263 | |
264 | SORT_SWAP(dst[start], dst[end]); |
265 | start++; |
266 | end--; |
267 | } |
268 | } |
269 | |
270 | static size_t COUNT_RUN(SORT_TYPE *dst, const size_t start, const size_t size) { |
271 | size_t curr; |
272 | |
273 | if (size - start == 1) { |
274 | return 1; |
275 | } |
276 | |
277 | if (start >= size - 2) { |
278 | if (SORT_CMP(dst[size - 2], dst[size - 1]) > 0) { |
279 | SORT_SWAP(dst[size - 2], dst[size - 1]); |
280 | } |
281 | |
282 | return 2; |
283 | } |
284 | |
285 | curr = start + 2; |
286 | |
287 | if (SORT_CMP(dst[start], dst[start + 1]) <= 0) { |
288 | /* increasing run */ |
289 | while (1) { |
290 | if (curr == size - 1) { |
291 | break; |
292 | } |
293 | |
294 | if (SORT_CMP(dst[curr - 1], dst[curr]) > 0) { |
295 | break; |
296 | } |
297 | |
298 | curr++; |
299 | } |
300 | |
301 | return curr - start; |
302 | } else { |
303 | /* decreasing run */ |
304 | while (1) { |
305 | if (curr == size - 1) { |
306 | break; |
307 | } |
308 | |
309 | if (SORT_CMP(dst[curr - 1], dst[curr]) <= 0) { |
310 | break; |
311 | } |
312 | |
313 | curr++; |
314 | } |
315 | |
316 | /* reverse in-place */ |
317 | REVERSE_ELEMENTS(dst, start, curr - 1); |
318 | return curr - start; |
319 | } |
320 | } |
321 | |
322 | static int CHECK_INVARIANT(TIM_SORT_RUN_T *stack, const int stack_curr) { |
323 | size_t A, B, C; |
324 | |
325 | if (stack_curr < 2) { |
326 | return 1; |
327 | } |
328 | |
329 | if (stack_curr == 2) { |
330 | const size_t A1 = stack[stack_curr - 2].length; |
331 | const size_t B1 = stack[stack_curr - 1].length; |
332 | |
333 | if (A1 <= B1) { |
334 | return 0; |
335 | } |
336 | |
337 | return 1; |
338 | } |
339 | |
340 | A = stack[stack_curr - 3].length; |
341 | B = stack[stack_curr - 2].length; |
342 | C = stack[stack_curr - 1].length; |
343 | |
344 | if ((A <= B + C) || (B <= C)) { |
345 | return 0; |
346 | } |
347 | |
348 | return 1; |
349 | } |
350 | |
351 | typedef struct { |
352 | size_t alloc; |
353 | SORT_TYPE *storage; |
354 | } TEMP_STORAGE_T; |
355 | |
356 | static void TIM_SORT_RESIZE(TEMP_STORAGE_T *store, const size_t new_size) { |
357 | if (store->alloc < new_size) { |
358 | SORT_TYPE *tempstore = (SORT_TYPE *)realloc(store->storage, new_size * sizeof(SORT_TYPE)); |
359 | |
360 | if (tempstore == NULL) { |
361 | fprintf(stderr, "Error allocating temporary storage for tim sort: need %lu bytes" , |
362 | (unsigned long)(sizeof(SORT_TYPE) * new_size)); |
363 | exit(1); |
364 | } |
365 | |
366 | store->storage = tempstore; |
367 | store->alloc = new_size; |
368 | } |
369 | } |
370 | |
371 | static void TIM_SORT_MERGE(SORT_TYPE *dst, const TIM_SORT_RUN_T *stack, const int stack_curr, |
372 | TEMP_STORAGE_T *store) { |
373 | const size_t A = stack[stack_curr - 2].length; |
374 | const size_t B = stack[stack_curr - 1].length; |
375 | const size_t curr = stack[stack_curr - 2].start; |
376 | SORT_TYPE *storage; |
377 | size_t i, j, k; |
378 | TIM_SORT_RESIZE(store, MIN(A, B)); |
379 | storage = store->storage; |
380 | |
381 | /* left merge */ |
382 | if (A < B) { |
383 | memcpy(storage, &dst[curr], A * sizeof(SORT_TYPE)); |
384 | i = 0; |
385 | j = curr + A; |
386 | |
387 | for (k = curr; k < curr + A + B; k++) { |
388 | if ((i < A) && (j < curr + A + B)) { |
389 | if (SORT_CMP(storage[i], dst[j]) <= 0) { |
390 | dst[k] = storage[i++]; |
391 | } else { |
392 | dst[k] = dst[j++]; |
393 | } |
394 | } else if (i < A) { |
395 | dst[k] = storage[i++]; |
396 | } else { |
397 | break; |
398 | } |
399 | } |
400 | } else { |
401 | /* right merge */ |
402 | memcpy(storage, &dst[curr + A], B * sizeof(SORT_TYPE)); |
403 | i = B; |
404 | j = curr + A; |
405 | k = curr + A + B; |
406 | |
407 | while (k-- > curr) { |
408 | if ((i > 0) && (j > curr)) { |
409 | if (SORT_CMP(dst[j - 1], storage[i - 1]) > 0) { |
410 | dst[k] = dst[--j]; |
411 | } else { |
412 | dst[k] = storage[--i]; |
413 | } |
414 | } else if (i > 0) { |
415 | dst[k] = storage[--i]; |
416 | } else { |
417 | break; |
418 | } |
419 | } |
420 | } |
421 | } |
422 | |
423 | static int TIM_SORT_COLLAPSE(SORT_TYPE *dst, TIM_SORT_RUN_T *stack, int stack_curr, |
424 | TEMP_STORAGE_T *store, const size_t size) { |
425 | while (1) { |
426 | size_t A, B, C, D; |
427 | int ABC, BCD, CD; |
428 | |
429 | /* if the stack only has one thing on it, we are done with the collapse */ |
430 | if (stack_curr <= 1) { |
431 | break; |
432 | } |
433 | |
434 | /* if this is the last merge, just do it */ |
435 | if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) { |
436 | TIM_SORT_MERGE(dst, stack, stack_curr, store); |
437 | stack[0].length += stack[1].length; |
438 | stack_curr--; |
439 | break; |
440 | } |
441 | /* check if the invariant is off for a stack of 2 elements */ |
442 | else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) { |
443 | TIM_SORT_MERGE(dst, stack, stack_curr, store); |
444 | stack[0].length += stack[1].length; |
445 | stack_curr--; |
446 | break; |
447 | } else if (stack_curr == 2) { |
448 | break; |
449 | } |
450 | |
451 | B = stack[stack_curr - 3].length; |
452 | C = stack[stack_curr - 2].length; |
453 | D = stack[stack_curr - 1].length; |
454 | |
455 | if (stack_curr >= 4) { |
456 | A = stack[stack_curr - 4].length; |
457 | ABC = (A <= B + C); |
458 | } else { |
459 | ABC = 0; |
460 | } |
461 | |
462 | BCD = (B <= C + D) || ABC; |
463 | CD = (C <= D); |
464 | |
465 | /* Both invariants are good */ |
466 | if (!BCD && !CD) { |
467 | break; |
468 | } |
469 | |
470 | /* left merge */ |
471 | if (BCD && !CD) { |
472 | TIM_SORT_MERGE(dst, stack, stack_curr - 1, store); |
473 | stack[stack_curr - 3].length += stack[stack_curr - 2].length; |
474 | stack[stack_curr - 2] = stack[stack_curr - 1]; |
475 | stack_curr--; |
476 | } else { |
477 | /* right merge */ |
478 | TIM_SORT_MERGE(dst, stack, stack_curr, store); |
479 | stack[stack_curr - 2].length += stack[stack_curr - 1].length; |
480 | stack_curr--; |
481 | } |
482 | } |
483 | |
484 | return stack_curr; |
485 | } |
486 | |
487 | static __inline int PUSH_NEXT(SORT_TYPE *dst, |
488 | const size_t size, |
489 | TEMP_STORAGE_T *store, |
490 | const size_t minrun, |
491 | TIM_SORT_RUN_T *run_stack, |
492 | size_t *stack_curr, |
493 | size_t *curr) { |
494 | size_t len = COUNT_RUN(dst, *curr, size); |
495 | size_t run = minrun; |
496 | |
497 | if (run > size - *curr) { |
498 | run = size - *curr; |
499 | } |
500 | |
501 | if (run > len) { |
502 | BINARY_INSERTION_SORT_START(&dst[*curr], len, run); |
503 | len = run; |
504 | } |
505 | |
506 | run_stack[*stack_curr].start = *curr; |
507 | run_stack[*stack_curr].length = len; |
508 | (*stack_curr)++; |
509 | *curr += len; |
510 | |
511 | if (*curr == size) { |
512 | /* finish up */ |
513 | while (*stack_curr > 1) { |
514 | TIM_SORT_MERGE(dst, run_stack, *stack_curr, store); |
515 | run_stack[*stack_curr - 2].length += run_stack[*stack_curr - 1].length; |
516 | (*stack_curr)--; |
517 | } |
518 | |
519 | if (store->storage != NULL) { |
520 | free(store->storage); |
521 | store->storage = NULL; |
522 | } |
523 | |
524 | return 0; |
525 | } |
526 | |
527 | return 1; |
528 | } |
529 | |
530 | void TIM_SORT(SORT_TYPE *dst, const size_t size) { |
531 | size_t minrun; |
532 | TEMP_STORAGE_T _store, *store; |
533 | TIM_SORT_RUN_T run_stack[TIM_SORT_STACK_SIZE]; |
534 | size_t stack_curr = 0; |
535 | size_t curr = 0; |
536 | |
537 | /* don't bother sorting an array of size 1 */ |
538 | if (size <= 1) { |
539 | return; |
540 | } |
541 | |
542 | if (size < 64) { |
543 | BINARY_INSERTION_SORT(dst, size); |
544 | return; |
545 | } |
546 | |
547 | /* compute the minimum run length */ |
548 | minrun = compute_minrun(size); |
549 | /* temporary storage for merges */ |
550 | store = &_store; |
551 | store->alloc = 0; |
552 | store->storage = NULL; |
553 | |
554 | if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) { |
555 | return; |
556 | } |
557 | |
558 | if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) { |
559 | return; |
560 | } |
561 | |
562 | if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) { |
563 | return; |
564 | } |
565 | |
566 | while (1) { |
567 | if (!CHECK_INVARIANT(run_stack, stack_curr)) { |
568 | stack_curr = TIM_SORT_COLLAPSE(dst, run_stack, stack_curr, store, size); |
569 | continue; |
570 | } |
571 | |
572 | if (!PUSH_NEXT(dst, size, store, minrun, run_stack, &stack_curr, &curr)) { |
573 | return; |
574 | } |
575 | } |
576 | } |
577 | |
578 | #undef SORT_CONCAT |
579 | #undef SORT_MAKE_STR1 |
580 | #undef SORT_MAKE_STR |
581 | #undef SORT_NAME |
582 | #undef SORT_TYPE |
583 | #undef SORT_CMP |
584 | #undef TEMP_STORAGE_T |
585 | #undef TIM_SORT_RUN_T |
586 | #undef PUSH_NEXT |
587 | #undef SORT_SWAP |
588 | #undef SORT_CONCAT |
589 | #undef SORT_MAKE_STR1 |
590 | #undef SORT_MAKE_STR |
591 | #undef BINARY_INSERTION_FIND |
592 | #undef BINARY_INSERTION_SORT_START |
593 | #undef BINARY_INSERTION_SORT |
594 | #undef REVERSE_ELEMENTS |
595 | #undef COUNT_RUN |
596 | #undef TIM_SORT |
597 | #undef TIM_SORT_RESIZE |
598 | #undef TIM_SORT_COLLAPSE |
599 | #undef TIM_SORT_RUN_T |
600 | #undef TEMP_STORAGE_T |
601 | |