1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* |
10 | * This file implements a stable sort algorithm known as "timsort". |
11 | * The algorithm is a straight copy of the listsort function in the |
12 | * Python 2.5 source code, heavily modified to fit into the MonetDB |
13 | * environment. |
14 | * The original author of the sort algorithm was Tim Peters, the |
15 | * adaptation was done by Sjoerd Mullender. |
16 | * |
17 | * |
18 | * This file is included multiple times. We expect a bunch of tokens |
19 | * to be redefined differently each time (see gdk_ssort.c). If the |
20 | * token GDKssortimpl is defined, the main interface is defined. |
21 | */ |
22 | |
23 | /* binarysort is the best method for sorting small arrays: it does few |
24 | * compares, but can do data movement quadratic in the number of |
25 | * elements. |
26 | * [lo, hi) is a contiguous slice of a list, and is sorted via binary |
27 | * insertion. This sort is stable. On entry, must have lo <= start <= |
28 | * hi, and that [lo, start) is already sorted (pass start == lo if you |
29 | * don't know!). */ |
30 | static void |
31 | binarysort(size_t lo, size_t hi, size_t start, MergeState *ms) |
32 | { |
33 | register size_t l, p, r; |
34 | |
35 | assert(lo <= start && start <= hi); |
36 | /* assert [lo, start) is sorted */ |
37 | if (lo == start) |
38 | start++; |
39 | /* [lo,start) is sorted, insert start (the pivot) into this |
40 | * area, finding its position using binary search. */ |
41 | for (; start < hi; start++) { |
42 | /* set l to where start belongs */ |
43 | l = lo; |
44 | r = start; |
45 | /* ms->t[ht] is the pivot */ |
46 | COPY(ms->th, PTRADD(ms->bh, r, ms->hs), ms->hs); |
47 | COPY_any(ms->tt, PTRADD(ms->bt, r, ms->ts), ms->ts); |
48 | /* Invariants: |
49 | * pivot >= all in [lo, l). |
50 | * pivot < all in [r, start). |
51 | * The second is vacuously true at the start. */ |
52 | assert(l < r); |
53 | do { |
54 | p = l + ((r - l) >> 1); |
55 | if (ISLT(ms->th, PTRADD(ms->bh, p, ms->hs), ms)) |
56 | r = p; |
57 | else |
58 | l = p + 1; |
59 | } while (l < r); |
60 | assert(l == r); |
61 | /* The invariants still hold, so pivot >= all in [lo, |
62 | * l) and pivot < all in [l, start), so pivot belongs |
63 | * at l. Note that if there are elements equal to |
64 | * pivot, l points to the first slot after them -- |
65 | * that's why this sort is stable. Slide over to make |
66 | * room. |
67 | * Caution: using memmove is much slower under MSVC 5; |
68 | * we're not usually moving many slots. */ |
69 | for (p = start, r = p - 1; p > l; p = r, r = p - 1) { |
70 | COPY(PTRADD(ms->bh, p, ms->hs), |
71 | PTRADD(ms->bh, r, ms->hs), ms->hs); |
72 | COPY_any(PTRADD(ms->bt, p, ms->ts), |
73 | PTRADD(ms->bt, r, ms->ts), ms->ts); |
74 | } |
75 | COPY(PTRADD(ms->bh, l, ms->hs), ms->th, ms->hs); |
76 | COPY_any(PTRADD(ms->bt, l, ms->ts), ms->tt, ms->ts); |
77 | } |
78 | } |
79 | |
80 | /* Locate the proper position of key in a sorted vector; if the vector |
81 | * contains an element equal to key, return the position immediately |
82 | * to the left of the leftmost equal element. [gallop_right() does |
83 | * the same except returns the position to the right of the rightmost |
84 | * equal element (if any).] |
85 | * |
86 | * "a" is a sorted vector with n elements, starting at a[0]. n must |
87 | * be > 0. |
88 | * |
89 | * "hint" is an index at which to begin the search, 0 <= hint < n. |
90 | * The closer hint is to the final result, the faster this runs. |
91 | * |
92 | * The return value is the int k in 0..n such that |
93 | * |
94 | * a[k-1] < key <= a[k] |
95 | * |
96 | * pretending that *(a-1) is minus infinity and a[n] is plus infinity. |
97 | * IOW, key belongs at index k; or, IOW, the first k elements of a |
98 | * should precede key, and the last n-k should follow key. |
99 | * |
100 | * Returns -1 on error. See listsort.txt for info on the method. */ |
101 | static ssize_t |
102 | gallop_left(void *key, void *a, ssize_t n, ssize_t hint, MergeState *ms) |
103 | { |
104 | ssize_t ofs; |
105 | ssize_t lastofs; |
106 | ssize_t k; |
107 | |
108 | assert(key && a && n > 0 && hint >= 0 && hint < n); |
109 | |
110 | a = PTRADD(a, hint, ms->hs); |
111 | lastofs = 0; |
112 | ofs = 1; |
113 | if (ISLT(a, key, ms)) { |
114 | /* a[hint] < key -- gallop right, until |
115 | * a[hint + lastofs] < key <= a[hint + ofs] */ |
116 | const ssize_t maxofs = n - hint; /* &a[n-1] is highest */ |
117 | while (ofs < maxofs) { |
118 | if (ISLT(PTRADD(a, ofs, ms->hs), key, ms)) { |
119 | lastofs = ofs; |
120 | ofs = (ofs << 1) + 1; |
121 | if (ofs <= 0) /* int overflow */ |
122 | ofs = maxofs; |
123 | } else /* key <= a[hint + ofs] */ |
124 | break; |
125 | } |
126 | if (ofs > maxofs) |
127 | ofs = maxofs; |
128 | /* Translate back to offsets relative to &a[0]. */ |
129 | lastofs += hint; |
130 | ofs += hint; |
131 | } else { |
132 | /* key <= a[hint] -- gallop left, until |
133 | * a[hint - ofs] < key <= a[hint - lastofs] */ |
134 | const ssize_t maxofs = hint + 1; /* &a[0] is lowest */ |
135 | while (ofs < maxofs) { |
136 | if (ISLT(PTRADD(a, -ofs, ms->hs), key, ms)) |
137 | break; |
138 | /* key <= a[hint - ofs] */ |
139 | lastofs = ofs; |
140 | ofs = (ofs << 1) + 1; |
141 | if (ofs <= 0) /* int overflow */ |
142 | ofs = maxofs; |
143 | } |
144 | if (ofs > maxofs) |
145 | ofs = maxofs; |
146 | /* Translate back to positive offsets relative to &a[0]. */ |
147 | k = lastofs; |
148 | lastofs = hint - ofs; |
149 | ofs = hint - k; |
150 | } |
151 | a = PTRADD(a, -hint, ms->hs); |
152 | |
153 | assert(-1 <= lastofs && lastofs < ofs && ofs <= n); |
154 | /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to |
155 | * the right of lastofs but no farther right than ofs. Do a |
156 | * binary search, with invariant a[lastofs-1] < key <= |
157 | * a[ofs]. */ |
158 | ++lastofs; |
159 | while (lastofs < ofs) { |
160 | ssize_t m = lastofs + ((ofs - lastofs) >> 1); |
161 | |
162 | if (ISLT(PTRADD(a, m, ms->hs), key, ms)) |
163 | lastofs = m + 1; /* a[m] < key */ |
164 | else |
165 | ofs = m; /* key <= a[m] */ |
166 | } |
167 | assert(lastofs == ofs); /* so a[ofs-1] < key <= a[ofs] */ |
168 | return ofs; |
169 | } |
170 | |
171 | /* Exactly like gallop_left(), except that if key already exists in |
172 | * a[0:n], finds the position immediately to the right of the |
173 | * rightmost equal value. |
174 | * |
175 | * The return value is the int k in 0..n such that |
176 | * |
177 | * a[k-1] <= key < a[k] |
178 | * |
179 | * or -1 if error. |
180 | * |
181 | * The code duplication is massive, but this is enough different given |
182 | * that we're sticking to "<" comparisons that it's much harder to |
183 | * follow if written as one routine with yet another "left or right?" |
184 | * flag. */ |
185 | static ssize_t |
186 | gallop_right(void *key, void *a, ssize_t n, ssize_t hint, MergeState *ms) |
187 | { |
188 | ssize_t ofs; |
189 | ssize_t lastofs; |
190 | ssize_t k; |
191 | |
192 | assert(key && a && n > 0 && hint >= 0 && hint < n); |
193 | |
194 | a = PTRADD(a, hint, ms->hs); |
195 | lastofs = 0; |
196 | ofs = 1; |
197 | if (ISLT(key, a, ms)) { |
198 | /* key < a[hint] -- gallop left, until |
199 | * a[hint - ofs] <= key < a[hint - lastofs] */ |
200 | const ssize_t maxofs = hint + 1; /* &a[0] is lowest */ |
201 | while (ofs < maxofs) { |
202 | if (ISLT(key, PTRADD(a, -ofs, ms->hs), ms)) { |
203 | lastofs = ofs; |
204 | ofs = (ofs << 1) + 1; |
205 | if (ofs <= 0) /* int overflow */ |
206 | ofs = maxofs; |
207 | } else /* a[hint - ofs] <= key */ |
208 | break; |
209 | } |
210 | if (ofs > maxofs) |
211 | ofs = maxofs; |
212 | /* Translate back to positive offsets relative to &a[0]. */ |
213 | k = lastofs; |
214 | lastofs = hint - ofs; |
215 | ofs = hint - k; |
216 | } else { |
217 | /* a[hint] <= key -- gallop right, until |
218 | * a[hint + lastofs] <= key < a[hint + ofs] */ |
219 | const ssize_t maxofs = n - hint; /* &a[n-1] is highest */ |
220 | while (ofs < maxofs) { |
221 | if (ISLT(key, PTRADD(a, ofs, ms->hs), ms)) |
222 | break; |
223 | /* a[hint + ofs] <= key */ |
224 | lastofs = ofs; |
225 | ofs = (ofs << 1) + 1; |
226 | if (ofs <= 0) /* int overflow */ |
227 | ofs = maxofs; |
228 | } |
229 | if (ofs > maxofs) |
230 | ofs = maxofs; |
231 | /* Translate back to offsets relative to &a[0]. */ |
232 | lastofs += hint; |
233 | ofs += hint; |
234 | } |
235 | a = PTRADD(a, -hint, ms->hs); |
236 | |
237 | assert(-1 <= lastofs && lastofs < ofs && ofs <= n); |
238 | /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to |
239 | * the right of lastofs but no farther right than ofs. Do a |
240 | * binary search, with invariant a[lastofs-1] <= key < |
241 | * a[ofs]. */ |
242 | ++lastofs; |
243 | while (lastofs < ofs) { |
244 | ssize_t m = lastofs + ((ofs - lastofs) >> 1); |
245 | |
246 | if (ISLT(key, PTRADD(a, m, ms->hs), ms)) |
247 | ofs = m; /* key < a[m] */ |
248 | else |
249 | lastofs = m+1; /* a[m] <= key */ |
250 | } |
251 | assert(lastofs == ofs); /* so a[ofs-1] <= key < a[ofs] */ |
252 | return ofs; |
253 | } |
254 | |
255 | /* Merge the two runs at stack indices i and i+1. |
256 | * Returns 0 on success, -1 on error. */ |
257 | static ssize_t |
258 | merge_at(MergeState *ms, ssize_t i) |
259 | { |
260 | size_t pa, pb; |
261 | ssize_t na, nb; |
262 | ssize_t k; |
263 | |
264 | assert(ms != NULL); |
265 | assert(ms->n >= 2); |
266 | assert(i >= 0); |
267 | assert(i == ms->n - 2 || i == ms->n - 3); |
268 | |
269 | pa = ms->pending[i].base; |
270 | na = ms->pending[i].len; |
271 | pb = ms->pending[i + 1].base; |
272 | nb = ms->pending[i + 1].len; |
273 | assert(na > 0 && nb > 0); |
274 | assert(pa + na == pb); |
275 | |
276 | /* Record the length of the combined runs; if i is the |
277 | * 3rd-last run now, also slide over the last run (which isn't |
278 | * involved in this merge). The current run i+1 goes away in |
279 | * any case. */ |
280 | ms->pending[i].len = na + nb; |
281 | if (i == ms->n - 3) |
282 | ms->pending[i + 1] = ms->pending[i + 2]; |
283 | --ms->n; |
284 | |
285 | /* Where does b start in a? Elements in a before that can be |
286 | * ignored (already in place). */ |
287 | k = gallop_right(PTRADD(ms->bh, pb, ms->hs), |
288 | PTRADD(ms->bh, pa, ms->hs), na, 0, ms); |
289 | pa += k; |
290 | na -= k; |
291 | if (na == 0) |
292 | return 0; |
293 | |
294 | /* Where does a end in b? Elements in b after that can be |
295 | * ignored (already in place). */ |
296 | nb = gallop_left(PTRADD(ms->bh, pa + na - 1, ms->hs), |
297 | PTRADD(ms->bh, pb, ms->hs), nb, nb-1, ms); |
298 | if (nb <= 0) |
299 | return nb; |
300 | |
301 | /* Merge what remains of the runs, using a temp array with |
302 | * min(na, nb) elements. */ |
303 | if (na <= nb) { |
304 | /* Merge the na elements starting at pa with the nb elements starting |
305 | * at pb in a stable way, in-place. na and nb must be > 0, and pa + |
306 | * na == pb. Must also have that *pb < *pa, that pa[na-1] belongs at |
307 | * the end of the merge, and should have na <= nb. See listsort.txt |
308 | * for more info. Return 0 if successful, -1 if error. */ |
309 | size_t dest; |
310 | ssize_t min_gallop = ms->min_gallop; |
311 | |
312 | assert(ms && na > 0 && nb > 0 && pa + na == pb); |
313 | if (MERGE_GETMEMH(ms, na) < 0) |
314 | return -1; |
315 | if (MERGE_GETMEMT(ms, na) < 0) |
316 | return -1; |
317 | COPY_anyN(ms->ah, PTRADD(ms->bh, pa, ms->hs), ms->hs, na); |
318 | COPY_anyN(ms->at, PTRADD(ms->bt, pa, ms->ts), ms->ts, na); |
319 | dest = pa; |
320 | pa = 0; |
321 | |
322 | COPY(PTRADD(ms->bh, dest, ms->hs), |
323 | PTRADD(ms->bh, pb, ms->hs), ms->hs); |
324 | COPY_any(PTRADD(ms->bt, dest, ms->ts), |
325 | PTRADD(ms->bt, pb, ms->ts), ms->ts); |
326 | dest++; |
327 | pb++; |
328 | --nb; |
329 | if (nb == 0) |
330 | goto SucceedA; |
331 | if (na == 1) |
332 | goto CopyB; |
333 | |
334 | for (;;) { |
335 | ssize_t acount = 0; /* # of times A won in a row */ |
336 | ssize_t bcount = 0; /* # of times B won in a row */ |
337 | |
338 | /* Do the straightforward thing until (if |
339 | * ever) one run appears to win |
340 | * consistently. */ |
341 | for (;;) { |
342 | assert(na > 1 && nb > 0); |
343 | k = ISLT(PTRADD(ms->bh, pb, ms->hs), |
344 | PTRADD(ms->ah, pa, ms->hs), ms); |
345 | if (k) { |
346 | COPY(PTRADD(ms->bh, dest, ms->hs), |
347 | PTRADD(ms->bh, pb, ms->hs), |
348 | ms->hs); |
349 | COPY_any(PTRADD(ms->bt, dest, ms->ts), |
350 | PTRADD(ms->bt, pb, ms->ts), |
351 | ms->ts); |
352 | dest++; |
353 | pb++; |
354 | ++bcount; |
355 | acount = 0; |
356 | --nb; |
357 | if (nb == 0) |
358 | goto SucceedA; |
359 | if (bcount >= min_gallop) |
360 | break; |
361 | } else { |
362 | COPY(PTRADD(ms->bh, dest, ms->hs), |
363 | PTRADD(ms->ah, pa, ms->hs), |
364 | ms->hs); |
365 | COPY_any(PTRADD(ms->bt, dest, ms->ts), |
366 | PTRADD(ms->at, pa, ms->ts), |
367 | ms->ts); |
368 | dest++; |
369 | pa++; |
370 | ++acount; |
371 | bcount = 0; |
372 | --na; |
373 | if (na == 1) |
374 | goto CopyB; |
375 | if (acount >= min_gallop) |
376 | break; |
377 | } |
378 | } |
379 | |
380 | /* One run is winning so consistently that |
381 | * galloping may be a huge win. So try that, |
382 | * and continue galloping until (if ever) |
383 | * neither run appears to be winning |
384 | * consistently anymore. */ |
385 | ++min_gallop; |
386 | do { |
387 | assert(na > 1 && nb > 0); |
388 | min_gallop -= min_gallop > 1; |
389 | ms->min_gallop = min_gallop; |
390 | k = gallop_right(PTRADD(ms->bh, pb, ms->hs), |
391 | PTRADD(ms->ah, pa, ms->hs), |
392 | na, 0, ms); |
393 | acount = k; |
394 | if (k) { |
395 | COPY_anyN(PTRADD(ms->bh, dest, ms->hs), |
396 | PTRADD(ms->ah, pa, ms->hs), |
397 | ms->hs, k); |
398 | COPY_anyN(PTRADD(ms->bt, dest, ms->ts), |
399 | PTRADD(ms->at, pa, ms->ts), |
400 | ms->ts, k); |
401 | dest += k; |
402 | pa += k; |
403 | na -= k; |
404 | if (na == 1) |
405 | goto CopyB; |
406 | /* na==0 is impossible now if |
407 | * the comparison function is |
408 | * consistent, but we can't |
409 | * assume that it is. */ |
410 | if (na == 0) |
411 | goto SucceedA; |
412 | } |
413 | COPY(PTRADD(ms->bh, dest, ms->hs), |
414 | PTRADD(ms->bh, pb, ms->hs), ms->hs); |
415 | COPY_any(PTRADD(ms->bt, dest, ms->ts), |
416 | PTRADD(ms->bt, pb, ms->ts), ms->ts); |
417 | dest++; |
418 | pb++; |
419 | --nb; |
420 | if (nb == 0) |
421 | goto SucceedA; |
422 | |
423 | k = gallop_left(PTRADD(ms->ah, pa, ms->hs), |
424 | PTRADD(ms->bh, pb, ms->hs), |
425 | nb, 0, ms); |
426 | bcount = k; |
427 | if (k) { |
428 | memmove(PTRADD(ms->bh, dest, ms->hs), |
429 | PTRADD(ms->bh, pb, ms->hs), |
430 | k * ms->hs); |
431 | memmove(PTRADD(ms->bt, dest, ms->ts), |
432 | PTRADD(ms->bt, pb, ms->ts), |
433 | k * ms->ts); |
434 | dest += k; |
435 | pb += k; |
436 | nb -= k; |
437 | if (nb == 0) |
438 | goto SucceedA; |
439 | } |
440 | COPY(PTRADD(ms->bh, dest, ms->hs), |
441 | PTRADD(ms->ah, pa, ms->hs), ms->hs); |
442 | COPY_any(PTRADD(ms->bt, dest, ms->ts), |
443 | PTRADD(ms->at, pa, ms->ts), ms->ts); |
444 | dest++; |
445 | pa++; |
446 | --na; |
447 | if (na == 1) |
448 | goto CopyB; |
449 | } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); |
450 | ++min_gallop; /* penalize it for leaving galloping mode */ |
451 | ms->min_gallop = min_gallop; |
452 | } |
453 | SucceedA: |
454 | if (na) { |
455 | COPY_anyN(PTRADD(ms->bh, dest, ms->hs), |
456 | PTRADD(ms->ah, pa, ms->hs), ms->hs, na); |
457 | COPY_anyN(PTRADD(ms->bt, dest, ms->ts), |
458 | PTRADD(ms->at, pa, ms->ts), ms->ts, na); |
459 | } |
460 | return 0; |
461 | CopyB: |
462 | assert(na == 1 && nb > 0); |
463 | /* The last element of pa belongs at the end of the merge. */ |
464 | memmove(PTRADD(ms->bh, dest, ms->hs), |
465 | PTRADD(ms->bh, pb, ms->hs), nb * ms->hs); |
466 | memmove(PTRADD(ms->bt, dest, ms->ts), |
467 | PTRADD(ms->bt, pb, ms->ts), nb * ms->ts); |
468 | COPY(PTRADD(ms->bh, dest + nb, ms->hs), |
469 | PTRADD(ms->ah, pa, ms->hs), ms->hs); |
470 | COPY_any(PTRADD(ms->bt, dest + nb, ms->ts), |
471 | PTRADD(ms->at, pa, ms->ts), ms->ts); |
472 | return 0; |
473 | } else { |
474 | /* Merge the na elements starting at pa with the nb elements starting |
475 | * at pb in a stable way, in-place. na and nb must be > 0, and pa + |
476 | * na == pb. Must also have that *pb < *pa, that pa[na-1] belongs at |
477 | * the end of the merge, and should have na >= nb. See listsort.txt |
478 | * for more info. Return 0 if successful, -1 if error. */ |
479 | size_t dest; |
480 | size_t basea; |
481 | size_t baseb; |
482 | ssize_t min_gallop = ms->min_gallop; |
483 | |
484 | assert(ms && na > 0 && nb > 0 && pa + na == pb); |
485 | if (MERGE_GETMEMH(ms, nb) < 0) |
486 | return -1; |
487 | if (MERGE_GETMEMT(ms, nb) < 0) |
488 | return -1; |
489 | dest = pb + nb - 1; |
490 | COPY_anyN(ms->ah, PTRADD(ms->bh, pb, ms->hs), ms->hs, nb); |
491 | COPY_anyN(ms->at, PTRADD(ms->bt, pb, ms->ts), ms->ts, nb); |
492 | basea = pa; |
493 | baseb = 0; |
494 | pb = nb - 1; |
495 | pa += na - 1; |
496 | |
497 | COPY(PTRADD(ms->bh, dest, ms->hs), |
498 | PTRADD(ms->bh, pa, ms->hs), ms->hs); |
499 | COPY_any(PTRADD(ms->bt, dest, ms->ts), |
500 | PTRADD(ms->bt, pa, ms->ts), ms->ts); |
501 | dest--; |
502 | pa--; |
503 | --na; |
504 | if (na == 0) |
505 | goto SucceedB; |
506 | if (nb == 1) |
507 | goto CopyA; |
508 | |
509 | for (;;) { |
510 | ssize_t acount = 0; /* # of times A won in a row */ |
511 | ssize_t bcount = 0; /* # of times B won in a row */ |
512 | |
513 | /* Do the straightforward thing until (if |
514 | * ever) one run appears to win |
515 | * consistently. */ |
516 | for (;;) { |
517 | assert(na > 0 && nb > 1); |
518 | k = ISLT(PTRADD(ms->ah, pb, ms->hs), |
519 | PTRADD(ms->bh, pa, ms->hs), ms); |
520 | if (k) { |
521 | COPY(PTRADD(ms->bh, dest, ms->hs), |
522 | PTRADD(ms->bh, pa, ms->hs), |
523 | ms->hs); |
524 | COPY_any(PTRADD(ms->bt, dest, ms->ts), |
525 | PTRADD(ms->bt, pa, ms->ts), |
526 | ms->ts); |
527 | dest--; |
528 | pa--; |
529 | ++acount; |
530 | bcount = 0; |
531 | --na; |
532 | if (na == 0) |
533 | goto SucceedB; |
534 | if (acount >= min_gallop) |
535 | break; |
536 | } else { |
537 | COPY(PTRADD(ms->bh, dest, ms->hs), |
538 | PTRADD(ms->ah, pb, ms->hs), |
539 | ms->hs); |
540 | COPY_any(PTRADD(ms->bt, dest, ms->ts), |
541 | PTRADD(ms->at, pb, ms->ts), |
542 | ms->ts); |
543 | dest--; |
544 | pb--; |
545 | ++bcount; |
546 | acount = 0; |
547 | --nb; |
548 | if (nb == 1) |
549 | goto CopyA; |
550 | if (bcount >= min_gallop) |
551 | break; |
552 | } |
553 | } |
554 | |
555 | /* One run is winning so consistently that |
556 | * galloping may be a huge win. So try that, |
557 | * and continue galloping until (if ever) |
558 | * neither run appears to be winning |
559 | * consistently anymore. */ |
560 | ++min_gallop; |
561 | do { |
562 | assert(na > 0 && nb > 1); |
563 | min_gallop -= min_gallop > 1; |
564 | ms->min_gallop = min_gallop; |
565 | k = gallop_right(PTRADD(ms->ah, pb, ms->hs), |
566 | PTRADD(ms->bh, basea, ms->hs), |
567 | na, na - 1, ms); |
568 | k = na - k; |
569 | acount = k; |
570 | if (k) { |
571 | dest -= k; |
572 | pa -= k; |
573 | memmove(PTRADD(ms->bh, dest + 1, |
574 | ms->hs), |
575 | PTRADD(ms->bh, pa + 1, ms->hs), |
576 | k * ms->hs); |
577 | memmove(PTRADD(ms->bt, dest + 1, |
578 | ms->ts), |
579 | PTRADD(ms->bt, pa + 1, ms->ts), |
580 | k * ms->ts); |
581 | na -= k; |
582 | if (na == 0) |
583 | goto SucceedB; |
584 | } |
585 | COPY(PTRADD(ms->bh, dest, ms->hs), |
586 | PTRADD(ms->ah, pb, ms->hs), ms->hs); |
587 | COPY_any(PTRADD(ms->bt, dest, ms->ts), |
588 | PTRADD(ms->at, pb, ms->ts), ms->ts); |
589 | dest--; |
590 | pb--; |
591 | --nb; |
592 | if (nb == 1) |
593 | goto CopyA; |
594 | |
595 | k = gallop_left(PTRADD(ms->bh, pa, ms->hs), |
596 | PTRADD(ms->ah, baseb, ms->hs), |
597 | nb, nb - 1, ms); |
598 | k = nb - k; |
599 | bcount = k; |
600 | if (k) { |
601 | dest -= k; |
602 | pb -= k; |
603 | memmove(PTRADD(ms->bh, dest + 1, |
604 | ms->hs), |
605 | PTRADD(ms->ah, pb + 1, ms->hs), |
606 | k * ms->hs); |
607 | memmove(PTRADD(ms->bt, dest + 1, |
608 | ms->ts), |
609 | PTRADD(ms->at, pb + 1, ms->ts), |
610 | k * ms->ts); |
611 | nb -= k; |
612 | if (nb == 1) |
613 | goto CopyA; |
614 | /* nb==0 is impossible now if |
615 | * the comparison function is |
616 | * consistent, but we can't |
617 | * assume that it is. */ |
618 | if (nb == 0) |
619 | goto SucceedB; |
620 | } |
621 | COPY(PTRADD(ms->bh, dest, ms->hs), |
622 | PTRADD(ms->bh, pa, ms->hs), ms->hs); |
623 | COPY_any(PTRADD(ms->bt, dest, ms->ts), |
624 | PTRADD(ms->bt, pa, ms->ts), ms->ts); |
625 | dest--; |
626 | pa--; |
627 | --na; |
628 | if (na == 0) |
629 | goto SucceedB; |
630 | } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); |
631 | ++min_gallop; /* penalize it for leaving galloping mode */ |
632 | ms->min_gallop = min_gallop; |
633 | } |
634 | SucceedB: |
635 | if (nb) { |
636 | COPY_anyN(PTRADD(ms->bh, dest + 1 - nb, ms->hs), |
637 | PTRADD(ms->ah, baseb, ms->hs), ms->hs, nb); |
638 | COPY_anyN(PTRADD(ms->bt, dest + 1 - nb, ms->ts), |
639 | PTRADD(ms->at, baseb, ms->ts), ms->ts, nb); |
640 | } |
641 | return 0; |
642 | CopyA: |
643 | assert(nb == 1 && na > 0); |
644 | /* The first element of pb belongs at the front of the |
645 | * merge. */ |
646 | dest -= na; |
647 | pa -= na; |
648 | memmove(PTRADD(ms->bh, dest + 1, ms->hs), |
649 | PTRADD(ms->bh, pa + 1, ms->hs), |
650 | na * ms->hs); |
651 | memmove(PTRADD(ms->bt, dest + 1, ms->ts), |
652 | PTRADD(ms->bt, pa + 1, ms->ts), |
653 | na * ms->ts); |
654 | COPY(PTRADD(ms->bh, dest, ms->hs), |
655 | PTRADD(ms->ah, pb, ms->hs), ms->hs); |
656 | COPY_any(PTRADD(ms->bt, dest, ms->ts), |
657 | PTRADD(ms->at, pb, ms->ts), ms->ts); |
658 | return 0; |
659 | } |
660 | } |
661 | |
662 | static int |
663 | do_ssort(MergeState *ms, ssize_t nremaining, size_t lo, size_t hi, ssize_t minrun) |
664 | { |
665 | do { |
666 | int descending; |
667 | ssize_t n; |
668 | |
669 | /* Identify next run. */ |
670 | { |
671 | /* Return the length of the run beginning at lo, in the slice [lo, |
672 | * hi). lo < hi is required on entry. "A run" is the longest |
673 | * ascending sequence, with |
674 | * |
675 | * lo[0] <= lo[1] <= lo[2] <= ... |
676 | * |
677 | * or the longest descending sequence, with |
678 | * |
679 | * lo[0] > lo[1] > lo[2] > ... |
680 | * |
681 | * Boolean descending is set to 0 in the former case, or to 1 in the |
682 | * latter. For its intended use in a stable mergesort, the strictness |
683 | * of the defn of "descending" is needed so that the caller can safely |
684 | * reverse a descending sequence without violating stability (strict > |
685 | * ensures there are no equal elements to get out of order). */ |
686 | size_t nlo; |
687 | size_t olo; |
688 | |
689 | assert(lo < hi); |
690 | descending = 0; |
691 | olo = lo; |
692 | nlo = lo + 1; |
693 | if (nlo == hi) { |
694 | n = 1; |
695 | } else { |
696 | n = 2; |
697 | if (ISLT(PTRADD(ms->bh, nlo, ms->hs), |
698 | PTRADD(ms->bh, olo, ms->hs), ms)) { |
699 | descending = 1; |
700 | for (olo = nlo++; |
701 | nlo < hi; |
702 | olo = nlo++, ++n) { |
703 | if (!ISLT(PTRADD(ms->bh, nlo, |
704 | ms->hs), |
705 | PTRADD(ms->bh, olo, |
706 | ms->hs), ms)) |
707 | break; |
708 | } |
709 | } |
710 | else { |
711 | for (olo = nlo++; |
712 | nlo < hi; |
713 | olo = nlo++, ++n) { |
714 | if (ISLT(PTRADD(ms->bh, nlo, |
715 | ms->hs), |
716 | PTRADD(ms->bh, olo, |
717 | ms->hs), ms)) |
718 | break; |
719 | } |
720 | } |
721 | } |
722 | } |
723 | if (descending) |
724 | reverse_slice(lo, lo + n, ms); |
725 | /* If short, extend to min(minrun, nremaining). */ |
726 | if (n < minrun) { |
727 | ssize_t force = nremaining <= minrun ? nremaining : minrun; |
728 | |
729 | binarysort(lo, lo + force, lo + n, ms); |
730 | n = force; |
731 | } |
732 | /* Push run onto pending-runs stack, and maybe merge. */ |
733 | assert(ms->n < MAX_MERGE_PENDING); |
734 | ms->pending[ms->n].base = lo; |
735 | ms->pending[ms->n].len = n; |
736 | ms->n++; |
737 | { |
738 | /* Examine the stack of runs waiting to be merged, merging adjacent |
739 | * runs until the stack invariants are re-established: |
740 | * |
741 | * 1. len[-3] > len[-2] + len[-1] |
742 | * 2. len[-2] > len[-1] |
743 | * |
744 | * See listsort.txt for more info. |
745 | * |
746 | * Returns 0 on success, -1 on error. */ |
747 | struct slice *p = ms->pending; |
748 | |
749 | while (ms->n > 1) { |
750 | ssize_t i = ms->n - 2; |
751 | |
752 | if ((i > 0 && |
753 | p[i-1].len <= p[i].len + p[i+1].len) || |
754 | (i > 1 && |
755 | p[i-2].len <= p[i-1].len + p[i].len)) { |
756 | if (p[i - 1].len < p[i + 1].len) |
757 | --i; |
758 | if (merge_at(ms, i) < 0) |
759 | return -1; |
760 | } else if (p[i].len <= p[i + 1].len) { |
761 | if (merge_at(ms, i) < 0) |
762 | return -1; |
763 | } else |
764 | break; |
765 | } |
766 | } |
767 | /* Advance to find next run. */ |
768 | lo += n; |
769 | nremaining -= n; |
770 | } while (nremaining > 0); |
771 | assert(lo == hi); |
772 | |
773 | { |
774 | /* Regardless of invariants, merge all runs on the stack until only |
775 | * one remains. This is used at the end of the mergesort. |
776 | * |
777 | * Returns 0 on success, -1 on error. */ |
778 | struct slice *p = ms->pending; |
779 | |
780 | while (ms->n > 1) { |
781 | ssize_t n = ms->n - 2; |
782 | |
783 | if (n > 0 && p[n - 1].len < p[n + 1].len) |
784 | --n; |
785 | if (merge_at(ms, n) < 0) |
786 | return -1; |
787 | } |
788 | } |
789 | return 0; |
790 | } |
791 | |
792 | #ifdef GDKssortimpl |
793 | /* Stable sort an array "h" (and move t accordingly). |
794 | * "nitems" is the number of items to sort; "hs"+"ts" is the size of |
795 | * the items, "tpe" is the type of the key within the items. If "heap" |
796 | * is non-NULL, the key is actually an offset relative to "heap" and |
797 | * the actual key is found at that offset (MonetDB var-sized |
798 | * atoms). */ |
799 | gdk_return |
800 | GDKssortimpl(void *restrict h, void *restrict t, const void *restrict heap, |
801 | size_t nitems, int hs, int ts, int tpe) |
802 | { |
803 | char temp; |
804 | MergeState ms; |
805 | ssize_t nremaining; |
806 | gdk_return result = GDK_FAIL; |
807 | size_t lo, hi; |
808 | ssize_t minrun; |
809 | |
810 | assert(h); |
811 | assert(hs > 0); |
812 | |
813 | ms.ah = (void *) ms.temparrayh; |
814 | ms.allocedh = MERGESTATE_TEMP_SIZE; |
815 | ms.at = (void *) ms.temparrayt; |
816 | ms.allocedt = MERGESTATE_TEMP_SIZE; |
817 | ms.n = 0; |
818 | ms.min_gallop = MIN_GALLOP; |
819 | ms.compare = ATOMcompare(tpe); |
820 | ms.heap = heap; |
821 | ms.hs = hs; |
822 | ms.ts = ts; |
823 | ms.bh = h; |
824 | if (!t) |
825 | t = &temp; |
826 | ms.bt = t; |
827 | ms.th = ms.tempstorageh; |
828 | ms.tt = ms.tempstoraget; |
829 | assert((size_t) hs <= sizeof(ms.tempstorageh)); |
830 | assert((size_t) ts <= sizeof(ms.tempstoraget)); |
831 | nremaining = (ssize_t) nitems; |
832 | |
833 | if (nremaining < 2) |
834 | goto succeed; |
835 | |
836 | tpe = ATOMbasetype(tpe); |
837 | |
838 | /* March over the array once, left to right, finding natural |
839 | * runs, and extending short natural runs to minrun |
840 | * elements. */ |
841 | lo = 0; |
842 | hi = lo + nremaining; |
843 | minrun = merge_compute_minrun(nremaining); |
844 | switch (tpe) { |
845 | case TYPE_bte: |
846 | if (do_ssort_bte(&ms, nremaining, lo, hi, minrun) < 0) |
847 | goto fail; |
848 | break; |
849 | case TYPE_sht: |
850 | if (do_ssort_sht(&ms, nremaining, lo, hi, minrun) < 0) |
851 | goto fail; |
852 | break; |
853 | case TYPE_int: |
854 | if (do_ssort_int(&ms, nremaining, lo, hi, minrun) < 0) |
855 | goto fail; |
856 | break; |
857 | case TYPE_lng: |
858 | if (do_ssort_lng(&ms, nremaining, lo, hi, minrun) < 0) |
859 | goto fail; |
860 | break; |
861 | #ifdef HAVE_HGE |
862 | case TYPE_hge: |
863 | if (do_ssort_hge(&ms, nremaining, lo, hi, minrun) < 0) |
864 | goto fail; |
865 | break; |
866 | #endif |
867 | case TYPE_flt: |
868 | if (do_ssort_flt(&ms, nremaining, lo, hi, minrun) < 0) |
869 | goto fail; |
870 | break; |
871 | case TYPE_dbl: |
872 | if (do_ssort_dbl(&ms, nremaining, lo, hi, minrun) < 0) |
873 | goto fail; |
874 | break; |
875 | default: |
876 | if (do_ssort_any(&ms, nremaining, lo, hi, minrun) < 0) |
877 | goto fail; |
878 | break; |
879 | } |
880 | assert(ms.n == 1); |
881 | assert(ms.pending[0].base == 0); |
882 | assert(ms.pending[0].len == (ssize_t) nitems); |
883 | |
884 | succeed: |
885 | result = GDK_SUCCEED; |
886 | fail: |
887 | merge_freemem(&ms); |
888 | return result; |
889 | } |
890 | #endif /* GDKssortimpl */ |
891 | |