1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | #include "monetdb_config.h" |
10 | #include "gdk.h" |
11 | #include "gdk_private.h" |
12 | |
13 | /* The maximum number of entries in a MergeState's pending-runs |
14 | * stack. This is enough to sort arrays of size up to about |
15 | * |
16 | * 32 * phi ** MAX_MERGE_PENDING |
17 | * |
18 | * where phi ~= 1.618. 85 is ridiculously large enough, good for an |
19 | * array with 2**64 elements. */ |
20 | #define MAX_MERGE_PENDING 85 |
21 | |
22 | /* When we get into galloping mode, we stay there until both runs win |
23 | * less often than MIN_GALLOP consecutive times. See listsort.txt for |
24 | * more info. */ |
25 | #define MIN_GALLOP 7 |
26 | |
27 | /* Avoid malloc for small temp arrays. */ |
28 | #define MERGESTATE_TEMP_SIZE (256 * sizeof(void *)) |
29 | |
30 | /* One MergeState exists on the stack per invocation of mergesort. |
31 | * It's just a convenient way to pass state around among the helper |
32 | * functions. */ |
33 | struct slice { |
34 | size_t base; |
35 | ssize_t len; |
36 | }; |
37 | |
38 | typedef struct { |
39 | /* The comparison function. */ |
40 | int (*compare) (const void *, const void *); |
41 | const char *heap; |
42 | int hs; |
43 | int ts; |
44 | void *restrict bh; |
45 | void *restrict bt; |
46 | /* Temporary storage for a single entry. If an entry is at |
47 | * most 2 lng's, we don't need to allocate anything. */ |
48 | void *th; |
49 | void *tt; |
50 | #ifdef HAVE_HGE |
51 | hge tempstorageh[1]; /* 16 bytes should be wide enough ... */ |
52 | hge tempstoraget[1]; /* ... for all our fixed-sized data */ |
53 | #else |
54 | lng tempstorageh[2]; /* 16 bytes should be wide enough ... */ |
55 | lng tempstoraget[2]; /* ... for all our fixed-sized data */ |
56 | #endif |
57 | |
58 | /* This controls when we get *into* galloping mode. It's |
59 | * initialized to MIN_GALLOP. merge_lo and merge_hi tend to |
60 | * nudge it higher for random data, and lower for highly |
61 | * structured data. */ |
62 | ssize_t min_gallop; |
63 | |
64 | /* 'ah' and 'at' are temp storage to help with merges. They |
65 | * contain room for alloced[ht] entries. */ |
66 | void *ah; |
67 | ssize_t allocedh; |
68 | void *at; |
69 | ssize_t allocedt; |
70 | |
71 | /* A stack of n pending runs yet to be merged. Run #i starts |
72 | * at address base[i] and extends for len[i] elements. It's |
73 | * always true (so long as the indices are in bounds) that |
74 | * |
75 | * pending[i].base + pending[i].len == pending[i+1].base |
76 | * |
77 | * so we could cut the storage for this, but it's a minor |
78 | * amount, and keeping all the info explicit simplifies the |
79 | * code. */ |
80 | int n; |
81 | struct slice pending[MAX_MERGE_PENDING]; |
82 | |
83 | /* 'ah' and 'at' point to this when possible, rather than muck |
84 | * with malloc. */ |
85 | char temparrayh[MERGESTATE_TEMP_SIZE]; |
86 | char temparrayt[MERGESTATE_TEMP_SIZE]; |
87 | } MergeState; |
88 | |
89 | /* Free all the temp memory owned by the MergeState. This must be |
90 | * called when you're done with a MergeState, and may be called before |
91 | * then if you want to free the temp memory early. */ |
92 | static void |
93 | merge_freemem(MergeState *ms) |
94 | { |
95 | assert(ms != NULL); |
96 | if (ms->ah != (void *) ms->temparrayh) |
97 | GDKfree(ms->ah); |
98 | ms->ah = (void *) ms->temparrayh; |
99 | ms->allocedh = MERGESTATE_TEMP_SIZE; |
100 | if (ms->at != (void *) ms->temparrayt) |
101 | GDKfree(ms->at); |
102 | ms->at = (void *) ms->temparrayt; |
103 | ms->allocedt = MERGESTATE_TEMP_SIZE; |
104 | } |
105 | |
106 | /* Ensure enough temp memory for 'need' array slots is available. |
107 | * Returns 0 on success and -1 if the memory can't be gotten. */ |
108 | static int |
109 | merge_getmem(MergeState *ms, ssize_t need, void **ap, |
110 | ssize_t *allocedp, int s, char *temparray) |
111 | { |
112 | assert(ms != NULL); |
113 | need *= s; |
114 | if (need <= *allocedp) |
115 | return 0; |
116 | /* Don't realloc! That can cost cycles to copy the old data, |
117 | * but we don't care what's in the block. */ |
118 | if (*ap != (void *) temparray) |
119 | GDKfree(*ap); |
120 | *ap = GDKmalloc(need); |
121 | if (*ap) { |
122 | *allocedp = need; |
123 | return 0; |
124 | } |
125 | merge_freemem(ms); /* reset to sane state */ |
126 | return -1; |
127 | } |
128 | |
129 | #define MERGE_GETMEMH(MS, NEED) \ |
130 | ((NEED) * (MS)->hs <= (MS)->allocedh ? 0 : \ |
131 | merge_getmem(MS, NEED, &(MS)->ah, &(MS)->allocedh, (MS)->hs, \ |
132 | (MS)->temparrayh)) |
133 | #define MERGE_GETMEMT(MS, NEED) \ |
134 | ((NEED) * (MS)->ts <= (MS)->allocedt ? 0 : \ |
135 | merge_getmem(MS, NEED, &(MS)->at, &(MS)->allocedt, (MS)->ts, \ |
136 | (MS)->temparrayt)) |
137 | |
138 | #define PTRADD(p, n, w) ((void *) ((char *) (p) + (n) * (w))) |
139 | |
140 | #define COPY_bte(d,s,w) (* (bte *) (d) = * (bte *) (s)) |
141 | #define COPY_sht(d,s,w) (* (sht *) (d) = * (sht *) (s)) |
142 | #define COPY_int(d,s,w) (* (int *) (d) = * (int *) (s)) |
143 | #define COPY_lng(d,s,w) (* (lng *) (d) = * (lng *) (s)) |
144 | #ifdef HAVE_HGE |
145 | #define COPY_hge(d,s,w) (* (hge *) (d) = * (hge *) (s)) |
146 | #endif |
147 | #define COPY_flt(d,s,w) (* (flt *) (d) = * (flt *) (s)) |
148 | #define COPY_dbl(d,s,w) (* (dbl *) (d) = * (dbl *) (s)) |
149 | #define COPY_oid(d,s,w) (* (oid *) (d) = * (oid *) (s)) |
150 | |
151 | #define COPY_any(d,s,w) \ |
152 | do { \ |
153 | switch (w) { \ |
154 | case 0: \ |
155 | break; \ |
156 | case sizeof(bte): \ |
157 | * (bte *) (d) = * (bte *) (s); \ |
158 | break; \ |
159 | case sizeof(sht): \ |
160 | * (sht *) (d) = * (sht *) (s); \ |
161 | break; \ |
162 | case sizeof(int): \ |
163 | * (int *) (d) = * (int *) (s); \ |
164 | break; \ |
165 | case sizeof(lng): \ |
166 | * (lng *) (d) = * (lng *) (s); \ |
167 | break; \ |
168 | case 2 * sizeof(lng): \ |
169 | * (lng *) (d) = * (lng *) (s); \ |
170 | * ((lng *) (d) + 1) = * ((lng *) (s) + 1); \ |
171 | break; \ |
172 | default: \ |
173 | memcpy((d), (s), (size_t) (w)); \ |
174 | break; \ |
175 | } \ |
176 | } while (0) |
177 | |
178 | #define COPY_anyN(d,s,w,N) \ |
179 | do { \ |
180 | int i; \ |
181 | switch (w) { \ |
182 | case 0: \ |
183 | break; \ |
184 | case sizeof(bte): \ |
185 | for (i = 0; i < N; i++) \ |
186 | ((bte*)(d))[i] = ((bte*)(s))[i]; \ |
187 | break; \ |
188 | case sizeof(sht): \ |
189 | for (i = 0; i < N; i++) \ |
190 | ((sht*)(d))[i] = ((sht*)(s))[i]; \ |
191 | break; \ |
192 | case sizeof(int): \ |
193 | for (i = 0; i < N; i++) \ |
194 | ((int*)(d))[i] = ((int*)(s))[i]; \ |
195 | break; \ |
196 | case sizeof(lng): \ |
197 | for (i = 0; i < N; i++) \ |
198 | ((lng*)(d))[i] = ((lng*)(s))[i]; \ |
199 | break; \ |
200 | case 2 * sizeof(lng): \ |
201 | for (i = 0; i < N*2; i++) \ |
202 | ((lng*)(d))[i] = ((lng*)(s))[i]; \ |
203 | break; \ |
204 | default: \ |
205 | memcpy((d), (s), (size_t) (w)*N); \ |
206 | break; \ |
207 | } \ |
208 | } while (0) |
209 | |
210 | #define ISLT_any(X, Y, ms) (((ms)->heap ? (*(ms)->compare)((ms)->heap + VarHeapVal(X,0,(ms)->hs), (ms)->heap + VarHeapVal(Y,0,(ms)->hs)) : (*(ms)->compare)((X), (Y))) < 0) |
211 | #define ISLT_bte(X, Y, ms) (* (bte *) (X) < * (bte *) (Y)) |
212 | #define ISLT_sht(X, Y, ms) (* (sht *) (X) < * (sht *) (Y)) |
213 | #define ISLT_int(X, Y, ms) (* (int *) (X) < * (int *) (Y)) |
214 | #define ISLT_lng(X, Y, ms) (* (lng *) (X) < * (lng *) (Y)) |
215 | #ifdef HAVE_HGE |
216 | #define ISLT_hge(X, Y, ms) (* (hge *) (X) < * (hge *) (Y)) |
217 | #endif |
218 | #define ISLT_flt(X, Y, ms) (!is_flt_nil(*(flt*)(Y)) && (is_flt_nil(*(flt*)(X)) || *(flt*)(X) < *(flt*)(Y))) |
219 | #define ISLT_dbl(X, Y, ms) (!is_dbl_nil(*(dbl*)(Y)) && (is_dbl_nil(*(dbl*)(X)) || *(dbl*)(X) < *(dbl*)(Y))) |
220 | #if SIZEOF_OID == SIZEOF_LNG |
221 | #define ISLT_oid(X, Y, ms) ISLT_lng(X, Y, ms) |
222 | #else |
223 | #define ISLT_oid(X, Y, ms) ISLT_int(X, Y, ms) |
224 | #endif |
225 | |
226 | /* for reverse order, just reverse arguments */ |
227 | #define ISLT_any_rev(X, Y, ms) ISLT_any(Y, X, ms) |
228 | #define ISLT_bte_rev(X, Y, ms) ISLT_bte(Y, X, ms) |
229 | #define ISLT_sht_rev(X, Y, ms) ISLT_sht(Y, X, ms) |
230 | #define ISLT_int_rev(X, Y, ms) ISLT_int(Y, X, ms) |
231 | #define ISLT_lng_rev(X, Y, ms) ISLT_lng(Y, X, ms) |
232 | #ifdef HAVE_HGE |
233 | #define ISLT_hge_rev(X, Y, ms) ISLT_hge(Y, X, ms) |
234 | #endif |
235 | #define ISLT_flt_rev(X, Y, ms) ISLT_flt(Y, X, ms) |
236 | #define ISLT_dbl_rev(X, Y, ms) ISLT_dbl(Y, X, ms) |
237 | #define ISLT_oid_rev(X, Y, ms) ISLT_oid(Y, X, ms) |
238 | |
239 | /* Reverse a slice of a list in place, from lo up to (exclusive) hi. */ |
240 | static void |
241 | reverse_slice(size_t lo, size_t hi, MergeState *ms) |
242 | { |
243 | void *th, *tt; |
244 | int hs, ts; |
245 | |
246 | assert(ms); |
247 | |
248 | th = ms->th; |
249 | tt = ms->tt; |
250 | hs = ms->hs; |
251 | ts = ms->ts; |
252 | |
253 | hi--; |
254 | while (lo < hi) { |
255 | COPY_any(th, PTRADD(ms->bh, lo, hs), hs); |
256 | COPY_any(PTRADD(ms->bh, lo, hs), PTRADD(ms->bh, hi, hs), hs); |
257 | COPY_any(PTRADD(ms->bh, hi, hs), th, hs); |
258 | COPY_any(tt, PTRADD(ms->bt, lo, ts), ts); |
259 | COPY_any(PTRADD(ms->bt, lo, ts), PTRADD(ms->bt, hi, ts), ts); |
260 | COPY_any(PTRADD(ms->bt, hi, ts), tt, ts); |
261 | lo++; |
262 | hi--; |
263 | } |
264 | } |
265 | |
266 | static ssize_t |
267 | merge_compute_minrun(ssize_t n) |
268 | { |
269 | ssize_t r = 0; /* becomes 1 if any 1 bits are shifted off */ |
270 | |
271 | assert(n >= 0); |
272 | while (n >= 16) { |
273 | r |= n & 1; |
274 | n >>= 1; |
275 | } |
276 | return n + r; |
277 | } |
278 | |
279 | |
280 | #define COPY COPY_bte |
281 | |
282 | #define binarysort binarysort_bte |
283 | #define do_ssort do_ssort_bte |
284 | #define gallop_left gallop_left_bte |
285 | #define gallop_right gallop_right_bte |
286 | #define ISLT ISLT_bte |
287 | #define merge_at merge_at_bte |
288 | #include "gdk_ssort_impl.h" |
289 | #undef binarysort |
290 | #undef do_ssort |
291 | #undef gallop_left |
292 | #undef gallop_right |
293 | #undef ISLT |
294 | #undef merge_at |
295 | |
296 | #define binarysort binarysort_bte_rev |
297 | #define do_ssort do_ssort_bte_rev |
298 | #define gallop_left gallop_left_bte_rev |
299 | #define gallop_right gallop_right_bte_rev |
300 | #define ISLT ISLT_bte_rev |
301 | #define merge_at merge_at_bte_rev |
302 | #include "gdk_ssort_impl.h" |
303 | #undef binarysort |
304 | #undef do_ssort |
305 | #undef gallop_left |
306 | #undef gallop_right |
307 | #undef ISLT |
308 | #undef merge_at |
309 | |
310 | #undef COPY |
311 | |
312 | #define COPY COPY_sht |
313 | |
314 | #define binarysort binarysort_sht |
315 | #define do_ssort do_ssort_sht |
316 | #define gallop_left gallop_left_sht |
317 | #define gallop_right gallop_right_sht |
318 | #define ISLT ISLT_sht |
319 | #define merge_at merge_at_sht |
320 | #include "gdk_ssort_impl.h" |
321 | #undef binarysort |
322 | #undef do_ssort |
323 | #undef gallop_left |
324 | #undef gallop_right |
325 | #undef ISLT |
326 | #undef merge_at |
327 | |
328 | #define binarysort binarysort_sht_rev |
329 | #define do_ssort do_ssort_sht_rev |
330 | #define gallop_left gallop_left_sht_rev |
331 | #define gallop_right gallop_right_sht_rev |
332 | #define ISLT ISLT_sht_rev |
333 | #define merge_at merge_at_sht_rev |
334 | #include "gdk_ssort_impl.h" |
335 | #undef binarysort |
336 | #undef do_ssort |
337 | #undef gallop_left |
338 | #undef gallop_right |
339 | #undef ISLT |
340 | #undef merge_at |
341 | |
342 | #undef COPY |
343 | |
344 | #define COPY COPY_int |
345 | |
346 | #define binarysort binarysort_int |
347 | #define do_ssort do_ssort_int |
348 | #define gallop_left gallop_left_int |
349 | #define gallop_right gallop_right_int |
350 | #define ISLT ISLT_int |
351 | #define merge_at merge_at_int |
352 | #include "gdk_ssort_impl.h" |
353 | #undef binarysort |
354 | #undef do_ssort |
355 | #undef gallop_left |
356 | #undef gallop_right |
357 | #undef ISLT |
358 | #undef merge_at |
359 | |
360 | #define binarysort binarysort_int_rev |
361 | #define do_ssort do_ssort_int_rev |
362 | #define gallop_left gallop_left_int_rev |
363 | #define gallop_right gallop_right_int_rev |
364 | #define ISLT ISLT_int_rev |
365 | #define merge_at merge_at_int_rev |
366 | #include "gdk_ssort_impl.h" |
367 | #undef binarysort |
368 | #undef do_ssort |
369 | #undef gallop_left |
370 | #undef gallop_right |
371 | #undef ISLT |
372 | #undef merge_at |
373 | |
374 | #undef COPY |
375 | |
376 | #define COPY COPY_lng |
377 | |
378 | #define binarysort binarysort_lng |
379 | #define do_ssort do_ssort_lng |
380 | #define gallop_left gallop_left_lng |
381 | #define gallop_right gallop_right_lng |
382 | #define ISLT ISLT_lng |
383 | #define merge_at merge_at_lng |
384 | #include "gdk_ssort_impl.h" |
385 | #undef binarysort |
386 | #undef do_ssort |
387 | #undef gallop_left |
388 | #undef gallop_right |
389 | #undef ISLT |
390 | #undef merge_at |
391 | |
392 | #define binarysort binarysort_lng_rev |
393 | #define do_ssort do_ssort_lng_rev |
394 | #define gallop_left gallop_left_lng_rev |
395 | #define gallop_right gallop_right_lng_rev |
396 | #define ISLT ISLT_lng_rev |
397 | #define merge_at merge_at_lng_rev |
398 | #include "gdk_ssort_impl.h" |
399 | #undef binarysort |
400 | #undef do_ssort |
401 | #undef gallop_left |
402 | #undef gallop_right |
403 | #undef ISLT |
404 | #undef merge_at |
405 | |
406 | #undef COPY |
407 | |
408 | #ifdef HAVE_HGE |
409 | #define COPY COPY_hge |
410 | |
411 | #define binarysort binarysort_hge |
412 | #define do_ssort do_ssort_hge |
413 | #define gallop_left gallop_left_hge |
414 | #define gallop_right gallop_right_hge |
415 | #define ISLT ISLT_hge |
416 | #define merge_at merge_at_hge |
417 | #include "gdk_ssort_impl.h" |
418 | #undef binarysort |
419 | #undef do_ssort |
420 | #undef gallop_left |
421 | #undef gallop_right |
422 | #undef ISLT |
423 | #undef merge_at |
424 | |
425 | #define binarysort binarysort_hge_rev |
426 | #define do_ssort do_ssort_hge_rev |
427 | #define gallop_left gallop_left_hge_rev |
428 | #define gallop_right gallop_right_hge_rev |
429 | #define ISLT ISLT_hge_rev |
430 | #define merge_at merge_at_hge_rev |
431 | #include "gdk_ssort_impl.h" |
432 | #undef binarysort |
433 | #undef do_ssort |
434 | #undef gallop_left |
435 | #undef gallop_right |
436 | #undef ISLT |
437 | #undef merge_at |
438 | |
439 | #undef COPY |
440 | #endif |
441 | |
442 | #define COPY COPY_flt |
443 | |
444 | #define binarysort binarysort_flt |
445 | #define do_ssort do_ssort_flt |
446 | #define gallop_left gallop_left_flt |
447 | #define gallop_right gallop_right_flt |
448 | #define ISLT ISLT_flt |
449 | #define merge_at merge_at_flt |
450 | #include "gdk_ssort_impl.h" |
451 | #undef binarysort |
452 | #undef do_ssort |
453 | #undef gallop_left |
454 | #undef gallop_right |
455 | #undef ISLT |
456 | #undef merge_at |
457 | |
458 | #define binarysort binarysort_flt_rev |
459 | #define do_ssort do_ssort_flt_rev |
460 | #define gallop_left gallop_left_flt_rev |
461 | #define gallop_right gallop_right_flt_rev |
462 | #define ISLT ISLT_flt_rev |
463 | #define merge_at merge_at_flt_rev |
464 | #include "gdk_ssort_impl.h" |
465 | #undef binarysort |
466 | #undef do_ssort |
467 | #undef gallop_left |
468 | #undef gallop_right |
469 | #undef ISLT |
470 | #undef merge_at |
471 | |
472 | #undef COPY |
473 | |
474 | #define COPY COPY_dbl |
475 | |
476 | #define binarysort binarysort_dbl |
477 | #define do_ssort do_ssort_dbl |
478 | #define gallop_left gallop_left_dbl |
479 | #define gallop_right gallop_right_dbl |
480 | #define ISLT ISLT_dbl |
481 | #define merge_at merge_at_dbl |
482 | #include "gdk_ssort_impl.h" |
483 | #undef binarysort |
484 | #undef do_ssort |
485 | #undef gallop_left |
486 | #undef gallop_right |
487 | #undef ISLT |
488 | #undef merge_at |
489 | |
490 | #define binarysort binarysort_dbl_rev |
491 | #define do_ssort do_ssort_dbl_rev |
492 | #define gallop_left gallop_left_dbl_rev |
493 | #define gallop_right gallop_right_dbl_rev |
494 | #define ISLT ISLT_dbl_rev |
495 | #define merge_at merge_at_dbl_rev |
496 | #include "gdk_ssort_impl.h" |
497 | #undef binarysort |
498 | #undef do_ssort |
499 | #undef gallop_left |
500 | #undef gallop_right |
501 | #undef ISLT |
502 | #undef merge_at |
503 | |
504 | #undef COPY |
505 | |
506 | #define COPY COPY_any |
507 | |
508 | #define binarysort binarysort_any |
509 | #define do_ssort do_ssort_any |
510 | #define gallop_left gallop_left_any |
511 | #define gallop_right gallop_right_any |
512 | #define ISLT ISLT_any |
513 | #define merge_at merge_at_any |
514 | |
515 | #define GDKssortimpl GDKssort |
516 | |
517 | #include "gdk_ssort_impl.h" |
518 | |
519 | #undef GDKssortimpl |
520 | |
521 | #undef binarysort |
522 | #undef do_ssort |
523 | #undef gallop_left |
524 | #undef gallop_right |
525 | #undef ISLT |
526 | #undef merge_at |
527 | |
528 | #define binarysort binarysort_any_rev |
529 | #define do_ssort do_ssort_any_rev |
530 | #define gallop_left gallop_left_any_rev |
531 | #define gallop_right gallop_right_any_rev |
532 | #define ISLT ISLT_any_rev |
533 | #define merge_at merge_at_any_rev |
534 | |
535 | #define GDKssortimpl GDKssort_rev |
536 | #define do_ssort_bte do_ssort_bte_rev |
537 | #define do_ssort_sht do_ssort_sht_rev |
538 | #define do_ssort_int do_ssort_int_rev |
539 | #define do_ssort_lng do_ssort_lng_rev |
540 | #ifdef HAVE_HGE |
541 | #define do_ssort_hge do_ssort_hge_rev |
542 | #endif |
543 | #define do_ssort_flt do_ssort_flt_rev |
544 | #define do_ssort_dbl do_ssort_dbl_rev |
545 | #define do_ssort_any do_ssort_any_rev |
546 | |
547 | #include "gdk_ssort_impl.h" |
548 | |
549 | #undef GDKssortimpl |
550 | #undef do_ssort_bte |
551 | #undef do_ssort_sht |
552 | #undef do_ssort_int |
553 | #undef do_ssort_lng |
554 | #ifdef HAVE_HGE |
555 | #undef do_ssort_hge |
556 | #endif |
557 | #undef do_ssort_flt |
558 | #undef do_ssort_dbl |
559 | #undef do_ssort_any |
560 | |
561 | #undef binarysort |
562 | #undef do_ssort |
563 | #undef gallop_left |
564 | #undef gallop_right |
565 | #undef ISLT |
566 | #undef merge_at |
567 | |
568 | #undef COPY |
569 | |