1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/*
10 * In this file we have a number of functions that search a column
11 * using binary search. The column must be sorted or reverse sorted
12 * (for the SORTfnd* functions), or there must be an order index (for
13 * the ORDERfnd* functions).
14 *
15 * All functions return a BUN, i.e. an offset from the start of the
16 * column. The SORTfnd and ORDERfnd functions return BUN_NONE if the
17 * value does not occur in the column.
18 *
19 * The ORDERfnd* functions return an offset in the order index, so to
20 * get the actual position, (the OID, not the BUN), read the order
21 * index at that offset.
22 *
23 * The *fndfirst functions return the BUN of the *first* value in the
24 * column that is greater (less) than or equal to the value being
25 * searched (when the column is sorted in ascending (descending)
26 * order).
27 *
28 * The *fndlast functions return the BUN of the *first* value in the
29 * column that is greater (less) than the value being searched (when
30 * the column is sorted in ascending (descending) order).
31 *
32 * If the value to be found occurs, the following relationship holds:
33 *
34 * SORTfndfirst(b, v) <= SORTfnd(b, v) < SORTfndlast(b, v)
35 * ORDERfndfirst(b, v) <= ORDERfnd(b, v) < ORDERfndlast(b, v)
36 *
37 * and the range from *fndfirst (included) up to *fndlast (not
38 * included) are all values in the column that are equal to the value.
39 *
40 * If the value to be found does not occur, SORTfnd and ORDERfnd
41 * return BUN_NONE, and the other functions return the location of the
42 * next larger value, or BATcount if the value being searched for is
43 * larger (smaller if reverse sorted) than any in the column.
44 *
45 * Note that the NIL value is considered smaller than all other values.
46 */
47
48#include "monetdb_config.h"
49#include "gdk.h"
50#include "gdk_private.h"
51
52#define VALUE(x) (vars ? \
53 vars + VarHeapVal(vals, (x), width) : \
54 (const char *) vals + ((x) * width))
55
56#define bte_LT(a, b) ((a) < (b))
57#define bte_LE(a, b) ((a) <= (b))
58#define bte_GT(a, b) ((a) > (b))
59#define bte_GE(a, b) ((a) >= (b))
60#define bte_EQ(a, b) ((a) == (b))
61#define sht_LT(a, b) ((a) < (b))
62#define sht_LE(a, b) ((a) <= (b))
63#define sht_GT(a, b) ((a) > (b))
64#define sht_GE(a, b) ((a) >= (b))
65#define sht_EQ(a, b) ((a) == (b))
66#define int_LT(a, b) ((a) < (b))
67#define int_LE(a, b) ((a) <= (b))
68#define int_GT(a, b) ((a) > (b))
69#define int_GE(a, b) ((a) >= (b))
70#define int_EQ(a, b) ((a) == (b))
71#define lng_LT(a, b) ((a) < (b))
72#define lng_LE(a, b) ((a) <= (b))
73#define lng_GT(a, b) ((a) > (b))
74#define lng_GE(a, b) ((a) >= (b))
75#define lng_EQ(a, b) ((a) == (b))
76#ifdef HAVE_HGE
77#define hge_LT(a, b) ((a) < (b))
78#define hge_LE(a, b) ((a) <= (b))
79#define hge_GT(a, b) ((a) > (b))
80#define hge_GE(a, b) ((a) >= (b))
81#define hge_EQ(a, b) ((a) == (b))
82#endif
83#define flt_LT(a, b) (!is_flt_nil(b) && (is_flt_nil(a) || (a) < (b)))
84#define flt_LE(a, b) (is_flt_nil(a) || (!is_flt_nil(b) && (a) <= (b)))
85#define flt_GT(a, b) (!is_flt_nil(a) && (is_flt_nil(b) || (a) > (b)))
86#define flt_GE(a, b) (is_flt_nil(b) || (!is_flt_nil(a) && (a) >= (b)))
87#define flt_EQ(a, b) (is_flt_nil(a) ? is_flt_nil(b) : !is_flt_nil(b) && (a) == (b))
88#define dbl_LT(a, b) (!is_dbl_nil(b) && (is_dbl_nil(a) || (a) < (b)))
89#define dbl_LE(a, b) (is_dbl_nil(a) || (!is_dbl_nil(b) && (a) <= (b)))
90#define dbl_GT(a, b) (!is_dbl_nil(a) && (is_dbl_nil(b) || (a) > (b)))
91#define dbl_GE(a, b) (is_dbl_nil(b) || (!is_dbl_nil(a) && (a) >= (b)))
92#define dbl_EQ(a, b) (is_dbl_nil(a) ? is_dbl_nil(b) : !is_dbl_nil(b) && (a) == (b))
93
94#define BINSEARCHFUNC(TYPE) \
95BUN \
96binsearch_##TYPE(const oid *restrict indir, oid offset, \
97 const TYPE *restrict vals, BUN lo, BUN hi, TYPE v, \
98 int ordering, int last) \
99{ \
100 BUN mid; \
101 TYPE x; \
102 \
103 assert(ordering == 1 || ordering == -1); \
104 assert(lo <= hi); \
105 \
106 if (ordering > 0) { \
107 if (indir) { \
108 if (last > 0) { \
109 x = vals[indir[lo] - offset]; \
110 if (TYPE##_GT(x, v)) \
111 return lo; \
112 x = vals[indir[hi] - offset]; \
113 if (TYPE##_LE(x, v)) \
114 return hi + 1; \
115 \
116 /* loop invariant: */ \
117 /* value@lo <= v < value@hi */ \
118 while (hi - lo > 1) { \
119 mid = (hi + lo) / 2; \
120 x = vals[indir[mid] - offset]; \
121 if (TYPE##_GT(x, v)) \
122 hi = mid; \
123 else \
124 lo = mid; \
125 } \
126 } else { \
127 x = vals[indir[lo] - offset]; \
128 if (TYPE##_GE(x, v)) \
129 return last == 0 || TYPE##_EQ(x, v) ? lo : BUN_NONE; \
130 x = vals[indir[hi] - offset]; \
131 if (TYPE##_LT(x, v)) \
132 return last == 0 ? hi + 1 : BUN_NONE; \
133 \
134 /* loop invariant: */ \
135 /* value@lo < v <= value@hi */ \
136 while (hi - lo > 1) { \
137 mid = (hi + lo) / 2; \
138 x = vals[indir[mid] - offset]; \
139 if (TYPE##_GE(x, v)) \
140 hi = mid; \
141 else \
142 lo = mid; \
143 } \
144 } \
145 } else { \
146 if (last > 0) { \
147 x = vals[lo]; \
148 if (TYPE##_GT(x, v)) \
149 return lo; \
150 x = vals[hi]; \
151 if (TYPE##_LE(x, v)) \
152 return hi + 1; \
153 \
154 /* loop invariant: */ \
155 /* value@lo <= v < value@hi */ \
156 while (hi - lo > 1) { \
157 mid = (hi + lo) / 2; \
158 x = vals[mid]; \
159 if (TYPE##_GT(x, v)) \
160 hi = mid; \
161 else \
162 lo = mid; \
163 } \
164 } else { \
165 x = vals[lo]; \
166 if (TYPE##_GE(x, v)) \
167 return last == 0 || TYPE##_EQ(x, v) ? lo : BUN_NONE; \
168 x = vals[hi]; \
169 if (TYPE##_LT(x, v)) \
170 return last == 0 ? hi + 1 : BUN_NONE; \
171 \
172 /* loop invariant: */ \
173 /* value@lo < v <= value@hi */ \
174 while (hi - lo > 1) { \
175 mid = (hi + lo) / 2; \
176 x = vals[mid]; \
177 if (TYPE##_GE(x, v)) \
178 hi = mid; \
179 else \
180 lo = mid; \
181 } \
182 } \
183 } \
184 } else { \
185 if (indir) { \
186 if (last > 0) { \
187 x = vals[indir[lo] - offset]; \
188 if (TYPE##_LT(x, v)) \
189 return lo; \
190 x = vals[indir[hi] - offset]; \
191 if (TYPE##_GE(x, v)) \
192 return hi + 1; \
193 \
194 /* loop invariant: */ \
195 /* value@lo >= v > value@hi */ \
196 while (hi - lo > 1) { \
197 mid = (hi + lo) / 2; \
198 x = vals[indir[mid] - offset]; \
199 if (TYPE##_LT(x, v)) \
200 hi = mid; \
201 else \
202 lo = mid; \
203 } \
204 } else { \
205 x = vals[indir[lo] - offset]; \
206 if (TYPE##_LE(x, v)) \
207 return last == 0 || TYPE##_EQ(x, v) ? lo : BUN_NONE; \
208 x = vals[indir[hi] - offset]; \
209 if (TYPE##_GT(x, v)) \
210 return last == 0 ? hi + 1 : BUN_NONE; \
211 \
212 /* loop invariant: */ \
213 /* value@lo > v >= value@hi */ \
214 while (hi - lo > 1) { \
215 mid = (hi + lo) / 2; \
216 x = vals[indir[mid] - offset]; \
217 if (TYPE##_LE(x, v)) \
218 hi = mid; \
219 else \
220 lo = mid; \
221 } \
222 } \
223 } else { \
224 if (last > 0) { \
225 x = vals[lo]; \
226 if (TYPE##_LT(x, v)) \
227 return lo; \
228 x = vals[hi]; \
229 if (TYPE##_GE(x, v)) \
230 return hi + 1; \
231 \
232 /* loop invariant: */ \
233 /* value@lo >= v > value@hi */ \
234 while (hi - lo > 1) { \
235 mid = (hi + lo) / 2; \
236 x = vals[mid]; \
237 if (TYPE##_LT(x, v)) \
238 hi = mid; \
239 else \
240 lo = mid; \
241 } \
242 } else { \
243 x = vals[lo]; \
244 if (TYPE##_LE(x, v)) \
245 return last == 0 || TYPE##_EQ(x, v) ? lo : BUN_NONE; \
246 x = vals[hi]; \
247 if (TYPE##_GT(x, v)) \
248 return last == 0 ? hi + 1 : BUN_NONE; \
249 \
250 /* loop invariant: */ \
251 /* value@lo > v >= value@hi */ \
252 while (hi - lo > 1) { \
253 mid = (hi + lo) / 2; \
254 x = vals[mid]; \
255 if (TYPE##_LE(x, v)) \
256 hi = mid; \
257 else \
258 lo = mid; \
259 } \
260 } \
261 } \
262 } \
263 return last >= 0 || (x = (indir ? vals[indir[hi] - offset] : vals[hi]), TYPE##_EQ(x, v)) ? hi : BUN_NONE; \
264}
265
266BINSEARCHFUNC(bte)
267BINSEARCHFUNC(sht)
268BINSEARCHFUNC(int)
269BINSEARCHFUNC(lng)
270#ifdef HAVE_HGE
271BINSEARCHFUNC(hge)
272#endif
273BINSEARCHFUNC(flt)
274BINSEARCHFUNC(dbl)
275
276/* Do a binary search for the first/last occurrence of v between lo and hi
277 * (lo inclusive, hi not inclusive) in vals/vars.
278 * If last > 0, return the index of the first value > v; if last == 0,
279 * return the index of the first value >= v; if last < 0, return
280 * BUN_NONE if v does not occur, any BUN where v occurs otherwise.
281 * If ordering is -1, the values are sorted in reverse order and hence
282 * all comparisons are reversed.
283 */
284BUN
285binsearch(const oid *restrict indir, oid offset,
286 int type, const void *restrict vals, const char * restrict vars,
287 int width, BUN lo, BUN hi, const void *restrict v,
288 int ordering, int last)
289{
290 BUN mid;
291 int c;
292 int (*cmp)(const void *, const void *);
293
294 assert(ordering == 1 || ordering == -1);
295 assert(lo < hi);
296
297 --hi; /* now hi is inclusive */
298
299 switch (ATOMbasetype(type)) {
300 /* TYPE_oid is covered by TYPE_int/TYPE_lng */
301 case TYPE_bte:
302 return binsearch_bte(indir, offset, (const bte *) vals,
303 lo, hi, *(const bte *) v, ordering, last);
304 case TYPE_sht:
305 return binsearch_sht(indir, offset, (const sht *) vals,
306 lo, hi, *(const sht *) v, ordering, last);
307 case TYPE_int:
308 return binsearch_int(indir, offset, (const int *) vals,
309 lo, hi, *(const int *) v, ordering, last);
310 case TYPE_lng:
311 return binsearch_lng(indir, offset, (const lng *) vals,
312 lo, hi, *(const lng *) v, ordering, last);
313#ifdef HAVE_HGE
314 case TYPE_hge:
315 return binsearch_hge(indir, offset, (const hge *) vals,
316 lo, hi, *(const hge *) v, ordering, last);
317#endif
318 case TYPE_flt:
319 return binsearch_flt(indir, offset, (const flt *) vals,
320 lo, hi, *(const flt *) v, ordering, last);
321 case TYPE_dbl:
322 return binsearch_dbl(indir, offset, (const dbl *) vals,
323 lo, hi, *(const dbl *) v, ordering, last);
324 }
325
326 cmp = ATOMcompare(type);
327
328 if (last > 0) {
329 if ((c = ordering * cmp(VALUE(indir ? indir[lo] - offset : lo), v)) > 0)
330 return lo;
331 if ((c = ordering * cmp(VALUE(indir ? indir[hi] - offset : hi), v)) <= 0)
332 return hi + 1;
333 } else if (last == 0) {
334 if ((c = ordering * cmp(VALUE(indir ? indir[lo] - offset : lo), v)) >= 0)
335 return lo;
336 if ((c = ordering * cmp(VALUE(indir ? indir[hi] - offset : hi), v)) < 0)
337 return hi + 1;
338 } else {
339 if ((c = ordering * cmp(VALUE(indir ? indir[lo] - offset : lo), v)) > 0)
340 return BUN_NONE;
341 if (c == 0)
342 return lo;
343 if ((c = ordering * cmp(VALUE(indir ? indir[hi] - offset : hi), v)) < 0)
344 return BUN_NONE;
345 if (c == 0)
346 return hi;
347 if (hi - lo <= 1) {
348 /* not the first, not the last, and nothing in
349 * between */
350 return BUN_NONE;
351 }
352 }
353
354 /* loop invariant:
355 * last ? value@lo <= v < value@hi : value@lo < v <= value@hi
356 *
357 * This version does some more work in the inner loop than the
358 * type-expanded versions (ordering and indir checks) but is
359 * slow due to the function call and the needed check for
360 * vars (in VALUE()) already, so we're beyond caring. */
361 while (hi - lo > 1) {
362 mid = (hi + lo) / 2;
363 if ((c = ordering * cmp(VALUE(indir ? indir[mid] - offset : mid), v)) > 0 ||
364 (last <= 0 && c == 0))
365 hi = mid;
366 else
367 lo = mid;
368 }
369 return last >= 0 || cmp(VALUE(indir ? indir[hi] - offset : hi), v) == 0 ? hi : BUN_NONE;
370}
371
372/* Return the BUN of any tail value in b that is equal to v; if no
373 * match is found, return BUN_NONE. b must be sorted (reverse or
374 * forward). */
375BUN
376SORTfnd(BAT *b, const void *v)
377{
378 if (BATcount(b) == 0)
379 return BUN_NONE;
380 if (BATtdense(b)) {
381 if (is_oid_nil(*(oid*)v) ||
382 *(oid*)v < b->tseqbase ||
383 *(oid*)v >= b->tseqbase + BATcount(b))
384 return BUN_NONE;
385 return *(oid*)v - b->tseqbase;
386 }
387 if (b->ttype == TYPE_void) {
388 if (b->tvheap) {
389 struct canditer ci;
390 canditer_init(&ci, NULL, b);
391 return canditer_search(&ci, *(const oid*)v, false);
392 }
393 assert(is_oid_nil(b->tseqbase));
394 if (is_oid_nil(*(const oid *) v))
395 return 0;
396 return BUN_NONE;
397 }
398 return binsearch(NULL, 0, b->ttype, Tloc(b, 0),
399 b->tvheap ? b->tvheap->base : NULL, b->twidth, 0,
400 BATcount(b), v, b->tsorted ? 1 : -1, -1);
401}
402
403/* use orderidx, returns BUN on order index */
404BUN
405ORDERfnd(BAT *b, const void *v)
406{
407 assert(b->torderidx);
408 if (BATcount(b) == 0)
409 return BUN_NONE;
410 return binsearch((oid *) b->torderidx->base + ORDERIDXOFF, 0, b->ttype,
411 Tloc(b, 0), b->tvheap ? b->tvheap->base : NULL,
412 b->twidth, 0, BATcount(b), v, 1, -1);
413}
414
415/* Return the BUN of the first (lowest numbered) tail value that is
416 * equal to v; if no match is found, return the BUN of the next higher
417 * value in b. b must be sorted (reverse or forward). */
418BUN
419SORTfndfirst(BAT *b, const void *v)
420{
421 if (BATcount(b) == 0)
422 return 0;
423 if (BATtdense(b)) {
424 if (is_oid_nil(*(oid*)v) || *(oid*)v <= b->tseqbase)
425 return 0;
426 if (*(oid*)v >= b->tseqbase + BATcount(b))
427 return BATcount(b);
428 return *(oid*)v - b->tseqbase;
429 }
430 if (b->ttype == TYPE_void) {
431 if (b->tvheap) {
432 struct canditer ci;
433 canditer_init(&ci, NULL, b);
434 return canditer_search(&ci, *(const oid*)v, true);
435 }
436 assert(is_oid_nil(b->tseqbase));
437 return 0;
438 }
439 return binsearch(NULL, 0, b->ttype, Tloc(b, 0),
440 b->tvheap ? b->tvheap->base : NULL, b->twidth, 0,
441 BATcount(b), v, b->tsorted ? 1 : -1, 0);
442}
443
444/* use orderidx, returns BUN on order index */
445BUN
446ORDERfndfirst(BAT *b, const void *v)
447{
448 assert(b->torderidx);
449 if (BATcount(b) == 0)
450 return 0;
451 return binsearch((oid *) b->torderidx->base + ORDERIDXOFF, 0, b->ttype,
452 Tloc(b, 0), b->tvheap ? b->tvheap->base : NULL,
453 b->twidth, 0, BATcount(b), v, 1, 0);
454}
455
456/* Return the BUN of the first (lowest numbered) tail value beyond v.
457 * b must be sorted (reverse or forward). */
458BUN
459SORTfndlast(BAT *b, const void *v)
460{
461 if (BATcount(b) == 0)
462 return 0;
463 if (BATtdense(b)) {
464 if (is_oid_nil(*(oid*)v) || *(oid*)v <= b->tseqbase)
465 return 0;
466 if (*(oid*)v >= b->tseqbase + BATcount(b))
467 return BATcount(b);
468 return *(oid*)v - b->tseqbase;
469 }
470 if (b->ttype == TYPE_void) {
471 if (b->tvheap) {
472 struct canditer ci;
473 canditer_init(&ci, NULL, b);
474 return canditer_search(&ci, *(const oid*)v + 1, true);
475 }
476 assert(is_oid_nil(b->tseqbase));
477 return BATcount(b);
478 }
479 return binsearch(NULL, 0, b->ttype, Tloc(b, 0),
480 b->tvheap ? b->tvheap->base : NULL, b->twidth, 0,
481 BATcount(b), v, b->tsorted ? 1 : -1, 1);
482}
483
484/* use orderidx, returns BUN on order index */
485BUN
486ORDERfndlast(BAT *b, const void *v)
487{
488 assert(b->torderidx);
489 if (BATcount(b) == 0)
490 return 0;
491 return binsearch((oid *) b->torderidx->base + ORDERIDXOFF, 0, b->ttype,
492 Tloc(b, 0), b->tvheap ? b->tvheap->base : NULL,
493 b->twidth, 0, BATcount(b), v, 1, 1);
494}
495