1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9#include "monetdb_config.h"
10#include "gdk.h"
11#include "gdk_private.h"
12
13/* The maximum number of entries in a MergeState's pending-runs
14 * stack. This is enough to sort arrays of size up to about
15 *
16 * 32 * phi ** MAX_MERGE_PENDING
17 *
18 * where phi ~= 1.618. 85 is ridiculously large enough, good for an
19 * array with 2**64 elements. */
20#define MAX_MERGE_PENDING 85
21
22/* When we get into galloping mode, we stay there until both runs win
23 * less often than MIN_GALLOP consecutive times. See listsort.txt for
24 * more info. */
25#define MIN_GALLOP 7
26
27/* Avoid malloc for small temp arrays. */
28#define MERGESTATE_TEMP_SIZE (256 * sizeof(void *))
29
30/* One MergeState exists on the stack per invocation of mergesort.
31 * It's just a convenient way to pass state around among the helper
32 * functions. */
33struct slice {
34 size_t base;
35 ssize_t len;
36};
37
38typedef struct {
39 /* The comparison function. */
40 int (*compare) (const void *, const void *);
41 const char *heap;
42 int hs;
43 int ts;
44 void *restrict bh;
45 void *restrict bt;
46 /* Temporary storage for a single entry. If an entry is at
47 * most 2 lng's, we don't need to allocate anything. */
48 void *th;
49 void *tt;
50#ifdef HAVE_HGE
51 hge tempstorageh[1]; /* 16 bytes should be wide enough ... */
52 hge tempstoraget[1]; /* ... for all our fixed-sized data */
53#else
54 lng tempstorageh[2]; /* 16 bytes should be wide enough ... */
55 lng tempstoraget[2]; /* ... for all our fixed-sized data */
56#endif
57
58 /* This controls when we get *into* galloping mode. It's
59 * initialized to MIN_GALLOP. merge_lo and merge_hi tend to
60 * nudge it higher for random data, and lower for highly
61 * structured data. */
62 ssize_t min_gallop;
63
64 /* 'ah' and 'at' are temp storage to help with merges. They
65 * contain room for alloced[ht] entries. */
66 void *ah;
67 ssize_t allocedh;
68 void *at;
69 ssize_t allocedt;
70
71 /* A stack of n pending runs yet to be merged. Run #i starts
72 * at address base[i] and extends for len[i] elements. It's
73 * always true (so long as the indices are in bounds) that
74 *
75 * pending[i].base + pending[i].len == pending[i+1].base
76 *
77 * so we could cut the storage for this, but it's a minor
78 * amount, and keeping all the info explicit simplifies the
79 * code. */
80 int n;
81 struct slice pending[MAX_MERGE_PENDING];
82
83 /* 'ah' and 'at' point to this when possible, rather than muck
84 * with malloc. */
85 char temparrayh[MERGESTATE_TEMP_SIZE];
86 char temparrayt[MERGESTATE_TEMP_SIZE];
87} MergeState;
88
89/* Free all the temp memory owned by the MergeState. This must be
90 * called when you're done with a MergeState, and may be called before
91 * then if you want to free the temp memory early. */
92static void
93merge_freemem(MergeState *ms)
94{
95 assert(ms != NULL);
96 if (ms->ah != (void *) ms->temparrayh)
97 GDKfree(ms->ah);
98 ms->ah = (void *) ms->temparrayh;
99 ms->allocedh = MERGESTATE_TEMP_SIZE;
100 if (ms->at != (void *) ms->temparrayt)
101 GDKfree(ms->at);
102 ms->at = (void *) ms->temparrayt;
103 ms->allocedt = MERGESTATE_TEMP_SIZE;
104}
105
106/* Ensure enough temp memory for 'need' array slots is available.
107 * Returns 0 on success and -1 if the memory can't be gotten. */
108static int
109merge_getmem(MergeState *ms, ssize_t need, void **ap,
110 ssize_t *allocedp, int s, char *temparray)
111{
112 assert(ms != NULL);
113 need *= s;
114 if (need <= *allocedp)
115 return 0;
116 /* Don't realloc! That can cost cycles to copy the old data,
117 * but we don't care what's in the block. */
118 if (*ap != (void *) temparray)
119 GDKfree(*ap);
120 *ap = GDKmalloc(need);
121 if (*ap) {
122 *allocedp = need;
123 return 0;
124 }
125 merge_freemem(ms); /* reset to sane state */
126 return -1;
127}
128
129#define MERGE_GETMEMH(MS, NEED) \
130 ((NEED) * (MS)->hs <= (MS)->allocedh ? 0 : \
131 merge_getmem(MS, NEED, &(MS)->ah, &(MS)->allocedh, (MS)->hs, \
132 (MS)->temparrayh))
133#define MERGE_GETMEMT(MS, NEED) \
134 ((NEED) * (MS)->ts <= (MS)->allocedt ? 0 : \
135 merge_getmem(MS, NEED, &(MS)->at, &(MS)->allocedt, (MS)->ts, \
136 (MS)->temparrayt))
137
138#define PTRADD(p, n, w) ((void *) ((char *) (p) + (n) * (w)))
139
140#define COPY_bte(d,s,w) (* (bte *) (d) = * (bte *) (s))
141#define COPY_sht(d,s,w) (* (sht *) (d) = * (sht *) (s))
142#define COPY_int(d,s,w) (* (int *) (d) = * (int *) (s))
143#define COPY_lng(d,s,w) (* (lng *) (d) = * (lng *) (s))
144#ifdef HAVE_HGE
145#define COPY_hge(d,s,w) (* (hge *) (d) = * (hge *) (s))
146#endif
147#define COPY_flt(d,s,w) (* (flt *) (d) = * (flt *) (s))
148#define COPY_dbl(d,s,w) (* (dbl *) (d) = * (dbl *) (s))
149#define COPY_oid(d,s,w) (* (oid *) (d) = * (oid *) (s))
150
151#define COPY_any(d,s,w) \
152 do { \
153 switch (w) { \
154 case 0: \
155 break; \
156 case sizeof(bte): \
157 * (bte *) (d) = * (bte *) (s); \
158 break; \
159 case sizeof(sht): \
160 * (sht *) (d) = * (sht *) (s); \
161 break; \
162 case sizeof(int): \
163 * (int *) (d) = * (int *) (s); \
164 break; \
165 case sizeof(lng): \
166 * (lng *) (d) = * (lng *) (s); \
167 break; \
168 case 2 * sizeof(lng): \
169 * (lng *) (d) = * (lng *) (s); \
170 * ((lng *) (d) + 1) = * ((lng *) (s) + 1); \
171 break; \
172 default: \
173 memcpy((d), (s), (size_t) (w)); \
174 break; \
175 } \
176 } while (0)
177
178#define COPY_anyN(d,s,w,N) \
179 do { \
180 int i; \
181 switch (w) { \
182 case 0: \
183 break; \
184 case sizeof(bte): \
185 for (i = 0; i < N; i++) \
186 ((bte*)(d))[i] = ((bte*)(s))[i]; \
187 break; \
188 case sizeof(sht): \
189 for (i = 0; i < N; i++) \
190 ((sht*)(d))[i] = ((sht*)(s))[i]; \
191 break; \
192 case sizeof(int): \
193 for (i = 0; i < N; i++) \
194 ((int*)(d))[i] = ((int*)(s))[i]; \
195 break; \
196 case sizeof(lng): \
197 for (i = 0; i < N; i++) \
198 ((lng*)(d))[i] = ((lng*)(s))[i]; \
199 break; \
200 case 2 * sizeof(lng): \
201 for (i = 0; i < N*2; i++) \
202 ((lng*)(d))[i] = ((lng*)(s))[i]; \
203 break; \
204 default: \
205 memcpy((d), (s), (size_t) (w)*N); \
206 break; \
207 } \
208 } while (0)
209
210#define ISLT_any(X, Y, ms) (((ms)->heap ? (*(ms)->compare)((ms)->heap + VarHeapVal(X,0,(ms)->hs), (ms)->heap + VarHeapVal(Y,0,(ms)->hs)) : (*(ms)->compare)((X), (Y))) < 0)
211#define ISLT_bte(X, Y, ms) (* (bte *) (X) < * (bte *) (Y))
212#define ISLT_sht(X, Y, ms) (* (sht *) (X) < * (sht *) (Y))
213#define ISLT_int(X, Y, ms) (* (int *) (X) < * (int *) (Y))
214#define ISLT_lng(X, Y, ms) (* (lng *) (X) < * (lng *) (Y))
215#ifdef HAVE_HGE
216#define ISLT_hge(X, Y, ms) (* (hge *) (X) < * (hge *) (Y))
217#endif
218#define ISLT_flt(X, Y, ms) (!is_flt_nil(*(flt*)(Y)) && (is_flt_nil(*(flt*)(X)) || *(flt*)(X) < *(flt*)(Y)))
219#define ISLT_dbl(X, Y, ms) (!is_dbl_nil(*(dbl*)(Y)) && (is_dbl_nil(*(dbl*)(X)) || *(dbl*)(X) < *(dbl*)(Y)))
220#if SIZEOF_OID == SIZEOF_LNG
221#define ISLT_oid(X, Y, ms) ISLT_lng(X, Y, ms)
222#else
223#define ISLT_oid(X, Y, ms) ISLT_int(X, Y, ms)
224#endif
225
226/* for reverse order, just reverse arguments */
227#define ISLT_any_rev(X, Y, ms) ISLT_any(Y, X, ms)
228#define ISLT_bte_rev(X, Y, ms) ISLT_bte(Y, X, ms)
229#define ISLT_sht_rev(X, Y, ms) ISLT_sht(Y, X, ms)
230#define ISLT_int_rev(X, Y, ms) ISLT_int(Y, X, ms)
231#define ISLT_lng_rev(X, Y, ms) ISLT_lng(Y, X, ms)
232#ifdef HAVE_HGE
233#define ISLT_hge_rev(X, Y, ms) ISLT_hge(Y, X, ms)
234#endif
235#define ISLT_flt_rev(X, Y, ms) ISLT_flt(Y, X, ms)
236#define ISLT_dbl_rev(X, Y, ms) ISLT_dbl(Y, X, ms)
237#define ISLT_oid_rev(X, Y, ms) ISLT_oid(Y, X, ms)
238
239/* Reverse a slice of a list in place, from lo up to (exclusive) hi. */
240static void
241reverse_slice(size_t lo, size_t hi, MergeState *ms)
242{
243 void *th, *tt;
244 int hs, ts;
245
246 assert(ms);
247
248 th = ms->th;
249 tt = ms->tt;
250 hs = ms->hs;
251 ts = ms->ts;
252
253 hi--;
254 while (lo < hi) {
255 COPY_any(th, PTRADD(ms->bh, lo, hs), hs);
256 COPY_any(PTRADD(ms->bh, lo, hs), PTRADD(ms->bh, hi, hs), hs);
257 COPY_any(PTRADD(ms->bh, hi, hs), th, hs);
258 COPY_any(tt, PTRADD(ms->bt, lo, ts), ts);
259 COPY_any(PTRADD(ms->bt, lo, ts), PTRADD(ms->bt, hi, ts), ts);
260 COPY_any(PTRADD(ms->bt, hi, ts), tt, ts);
261 lo++;
262 hi--;
263 }
264}
265
266static ssize_t
267merge_compute_minrun(ssize_t n)
268{
269 ssize_t r = 0; /* becomes 1 if any 1 bits are shifted off */
270
271 assert(n >= 0);
272 while (n >= 16) {
273 r |= n & 1;
274 n >>= 1;
275 }
276 return n + r;
277}
278
279
280#define COPY COPY_bte
281
282#define binarysort binarysort_bte
283#define do_ssort do_ssort_bte
284#define gallop_left gallop_left_bte
285#define gallop_right gallop_right_bte
286#define ISLT ISLT_bte
287#define merge_at merge_at_bte
288#include "gdk_ssort_impl.h"
289#undef binarysort
290#undef do_ssort
291#undef gallop_left
292#undef gallop_right
293#undef ISLT
294#undef merge_at
295
296#define binarysort binarysort_bte_rev
297#define do_ssort do_ssort_bte_rev
298#define gallop_left gallop_left_bte_rev
299#define gallop_right gallop_right_bte_rev
300#define ISLT ISLT_bte_rev
301#define merge_at merge_at_bte_rev
302#include "gdk_ssort_impl.h"
303#undef binarysort
304#undef do_ssort
305#undef gallop_left
306#undef gallop_right
307#undef ISLT
308#undef merge_at
309
310#undef COPY
311
312#define COPY COPY_sht
313
314#define binarysort binarysort_sht
315#define do_ssort do_ssort_sht
316#define gallop_left gallop_left_sht
317#define gallop_right gallop_right_sht
318#define ISLT ISLT_sht
319#define merge_at merge_at_sht
320#include "gdk_ssort_impl.h"
321#undef binarysort
322#undef do_ssort
323#undef gallop_left
324#undef gallop_right
325#undef ISLT
326#undef merge_at
327
328#define binarysort binarysort_sht_rev
329#define do_ssort do_ssort_sht_rev
330#define gallop_left gallop_left_sht_rev
331#define gallop_right gallop_right_sht_rev
332#define ISLT ISLT_sht_rev
333#define merge_at merge_at_sht_rev
334#include "gdk_ssort_impl.h"
335#undef binarysort
336#undef do_ssort
337#undef gallop_left
338#undef gallop_right
339#undef ISLT
340#undef merge_at
341
342#undef COPY
343
344#define COPY COPY_int
345
346#define binarysort binarysort_int
347#define do_ssort do_ssort_int
348#define gallop_left gallop_left_int
349#define gallop_right gallop_right_int
350#define ISLT ISLT_int
351#define merge_at merge_at_int
352#include "gdk_ssort_impl.h"
353#undef binarysort
354#undef do_ssort
355#undef gallop_left
356#undef gallop_right
357#undef ISLT
358#undef merge_at
359
360#define binarysort binarysort_int_rev
361#define do_ssort do_ssort_int_rev
362#define gallop_left gallop_left_int_rev
363#define gallop_right gallop_right_int_rev
364#define ISLT ISLT_int_rev
365#define merge_at merge_at_int_rev
366#include "gdk_ssort_impl.h"
367#undef binarysort
368#undef do_ssort
369#undef gallop_left
370#undef gallop_right
371#undef ISLT
372#undef merge_at
373
374#undef COPY
375
376#define COPY COPY_lng
377
378#define binarysort binarysort_lng
379#define do_ssort do_ssort_lng
380#define gallop_left gallop_left_lng
381#define gallop_right gallop_right_lng
382#define ISLT ISLT_lng
383#define merge_at merge_at_lng
384#include "gdk_ssort_impl.h"
385#undef binarysort
386#undef do_ssort
387#undef gallop_left
388#undef gallop_right
389#undef ISLT
390#undef merge_at
391
392#define binarysort binarysort_lng_rev
393#define do_ssort do_ssort_lng_rev
394#define gallop_left gallop_left_lng_rev
395#define gallop_right gallop_right_lng_rev
396#define ISLT ISLT_lng_rev
397#define merge_at merge_at_lng_rev
398#include "gdk_ssort_impl.h"
399#undef binarysort
400#undef do_ssort
401#undef gallop_left
402#undef gallop_right
403#undef ISLT
404#undef merge_at
405
406#undef COPY
407
408#ifdef HAVE_HGE
409#define COPY COPY_hge
410
411#define binarysort binarysort_hge
412#define do_ssort do_ssort_hge
413#define gallop_left gallop_left_hge
414#define gallop_right gallop_right_hge
415#define ISLT ISLT_hge
416#define merge_at merge_at_hge
417#include "gdk_ssort_impl.h"
418#undef binarysort
419#undef do_ssort
420#undef gallop_left
421#undef gallop_right
422#undef ISLT
423#undef merge_at
424
425#define binarysort binarysort_hge_rev
426#define do_ssort do_ssort_hge_rev
427#define gallop_left gallop_left_hge_rev
428#define gallop_right gallop_right_hge_rev
429#define ISLT ISLT_hge_rev
430#define merge_at merge_at_hge_rev
431#include "gdk_ssort_impl.h"
432#undef binarysort
433#undef do_ssort
434#undef gallop_left
435#undef gallop_right
436#undef ISLT
437#undef merge_at
438
439#undef COPY
440#endif
441
442#define COPY COPY_flt
443
444#define binarysort binarysort_flt
445#define do_ssort do_ssort_flt
446#define gallop_left gallop_left_flt
447#define gallop_right gallop_right_flt
448#define ISLT ISLT_flt
449#define merge_at merge_at_flt
450#include "gdk_ssort_impl.h"
451#undef binarysort
452#undef do_ssort
453#undef gallop_left
454#undef gallop_right
455#undef ISLT
456#undef merge_at
457
458#define binarysort binarysort_flt_rev
459#define do_ssort do_ssort_flt_rev
460#define gallop_left gallop_left_flt_rev
461#define gallop_right gallop_right_flt_rev
462#define ISLT ISLT_flt_rev
463#define merge_at merge_at_flt_rev
464#include "gdk_ssort_impl.h"
465#undef binarysort
466#undef do_ssort
467#undef gallop_left
468#undef gallop_right
469#undef ISLT
470#undef merge_at
471
472#undef COPY
473
474#define COPY COPY_dbl
475
476#define binarysort binarysort_dbl
477#define do_ssort do_ssort_dbl
478#define gallop_left gallop_left_dbl
479#define gallop_right gallop_right_dbl
480#define ISLT ISLT_dbl
481#define merge_at merge_at_dbl
482#include "gdk_ssort_impl.h"
483#undef binarysort
484#undef do_ssort
485#undef gallop_left
486#undef gallop_right
487#undef ISLT
488#undef merge_at
489
490#define binarysort binarysort_dbl_rev
491#define do_ssort do_ssort_dbl_rev
492#define gallop_left gallop_left_dbl_rev
493#define gallop_right gallop_right_dbl_rev
494#define ISLT ISLT_dbl_rev
495#define merge_at merge_at_dbl_rev
496#include "gdk_ssort_impl.h"
497#undef binarysort
498#undef do_ssort
499#undef gallop_left
500#undef gallop_right
501#undef ISLT
502#undef merge_at
503
504#undef COPY
505
506#define COPY COPY_any
507
508#define binarysort binarysort_any
509#define do_ssort do_ssort_any
510#define gallop_left gallop_left_any
511#define gallop_right gallop_right_any
512#define ISLT ISLT_any
513#define merge_at merge_at_any
514
515#define GDKssortimpl GDKssort
516
517#include "gdk_ssort_impl.h"
518
519#undef GDKssortimpl
520
521#undef binarysort
522#undef do_ssort
523#undef gallop_left
524#undef gallop_right
525#undef ISLT
526#undef merge_at
527
528#define binarysort binarysort_any_rev
529#define do_ssort do_ssort_any_rev
530#define gallop_left gallop_left_any_rev
531#define gallop_right gallop_right_any_rev
532#define ISLT ISLT_any_rev
533#define merge_at merge_at_any_rev
534
535#define GDKssortimpl GDKssort_rev
536#define do_ssort_bte do_ssort_bte_rev
537#define do_ssort_sht do_ssort_sht_rev
538#define do_ssort_int do_ssort_int_rev
539#define do_ssort_lng do_ssort_lng_rev
540#ifdef HAVE_HGE
541#define do_ssort_hge do_ssort_hge_rev
542#endif
543#define do_ssort_flt do_ssort_flt_rev
544#define do_ssort_dbl do_ssort_dbl_rev
545#define do_ssort_any do_ssort_any_rev
546
547#include "gdk_ssort_impl.h"
548
549#undef GDKssortimpl
550#undef do_ssort_bte
551#undef do_ssort_sht
552#undef do_ssort_int
553#undef do_ssort_lng
554#ifdef HAVE_HGE
555#undef do_ssort_hge
556#endif
557#undef do_ssort_flt
558#undef do_ssort_dbl
559#undef do_ssort_any
560
561#undef binarysort
562#undef do_ssort
563#undef gallop_left
564#undef gallop_right
565#undef ISLT
566#undef merge_at
567
568#undef COPY
569