1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9#include "monetdb_config.h"
10#include "gdk.h"
11#include "gdk_private.h"
12#include "gdk_cand.h"
13
14/* create a new, dense candidate list with values from `first' up to,
15 * but not including, `last' */
16static inline BAT *
17newdensecand(oid first, oid last)
18{
19 if (last <= first)
20 first = last = 0; /* empty range */
21 return BATdense(0, first, last - first);
22}
23
24/* merge two candidate lists and produce a new one
25 *
26 * candidate lists are VOID-headed BATs with an OID tail which is
27 * sorted and unique.
28 */
29BAT *
30BATmergecand(BAT *a, BAT *b)
31{
32 BAT *bn;
33 oid *restrict p, i;
34 struct canditer cia, cib;
35
36 BATcheck(a, "BATmergecand", NULL);
37 BATcheck(b, "BATmergecand", NULL);
38
39 canditer_init(&cia, NULL, a);
40 canditer_init(&cib, NULL, b);
41
42 /* we can return a if b is empty (and v.v.) */
43 if (cia.ncand == 0) {
44 return canditer_slice(&cib, 0, cib.ncand);
45 }
46 if (cib.ncand == 0) {
47 return canditer_slice(&cia, 0, cia.ncand);
48 }
49 /* we can return a if a fully covers b (and v.v) */
50 if (cia.tpe == cand_dense && cib.tpe == cand_dense) {
51 /* both are dense */
52 if (cia.seq <= cib.seq && cib.seq <= cia.seq + cia.ncand) {
53 /* partial overlap starting with a, or b is
54 * smack bang after a */
55 return newdensecand(cia.seq, cia.seq + cia.ncand < cib.seq + cib.ncand ? cib.seq + cib.ncand : cia.seq + cia.ncand);
56 }
57 if (cib.seq <= cia.seq && cia.seq <= cib.seq + cib.ncand) {
58 /* partial overlap starting with b, or a is
59 * smack bang after b */
60 return newdensecand(cib.seq, cia.seq + cia.ncand < cib.seq + cib.ncand ? cib.seq + cib.ncand : cia.seq + cia.ncand);
61 }
62 }
63 if (cia.tpe == cand_dense
64 && cia.seq <= cib.seq
65 && canditer_last(&cia) >= canditer_last(&cib)) {
66 return canditer_slice(&cia, 0, cia.ncand);
67 }
68 if (cib.tpe == cand_dense
69 && cib.seq <= cia.seq
70 && canditer_last(&cib) >= canditer_last(&cia)) {
71 return canditer_slice(&cib, 0, cib.ncand);
72 }
73
74 bn = COLnew(0, TYPE_oid, cia.ncand + cib.ncand, TRANSIENT);
75 if (bn == NULL)
76 return NULL;
77 p = (oid *) Tloc(bn, 0);
78 if (cia.tpe == cand_dense && cib.tpe == cand_dense) {
79 /* both lists are dense */
80 if (cia.seq > cib.seq) {
81 struct canditer ci;
82 ci = cia;
83 cia = cib;
84 cib = ci;
85 }
86 /* cia completely before cib */
87 assert(cia.seq + cia.ncand < cib.seq);
88 for (i = cia.seq; i < cia.seq + cia.ncand; i++)
89 *p++ = i;
90 /* there is a gap */
91 for (i = cib.seq; i < cib.seq + cib.ncand; i++)
92 *p++ = i;
93 } else if (cia.tpe == cand_dense || cib.tpe == cand_dense) {
94 if (cib.tpe == cand_dense) {
95 struct canditer ci;
96 ci = cia;
97 cia = cib;
98 cib = ci;
99 }
100 /* only cia is dense */
101
102 /* copy part of cib that's before the start of cia */
103 while (canditer_peek(&cib) < cia.seq) {
104 *p++ = canditer_next(&cib);
105 }
106 /* copy all of cia */
107 for (i = cia.seq; i < cia.seq + cia.ncand; i++)
108 *p++ = i;
109 /* skip over part of cib that overlaps with cia */
110 canditer_setidx(&cib, canditer_search(&cib, cia.seq + cia.ncand, true));
111 /* copy rest of cib */
112 while (cib.next < cib.ncand) {
113 *p++ = canditer_next(&cib);
114 }
115 } else {
116 /* a and b are both not dense */
117 oid ao = canditer_next(&cia);
118 oid bo = canditer_next(&cib);
119 while (!is_oid_nil(ao) && !is_oid_nil(bo)) {
120 if (ao < bo) {
121 *p++ = ao;
122 ao = canditer_next(&cia);
123 } else if (bo < ao) {
124 *p++ = bo;
125 bo = canditer_next(&cib);
126 } else {
127 *p++ = ao;
128 ao = canditer_next(&cia);
129 bo = canditer_next(&cib);
130 }
131 }
132 while (!is_oid_nil(ao)) {
133 *p++ = ao;
134 ao = canditer_next(&cia);
135 }
136 while (!is_oid_nil(bo)) {
137 *p++ = bo;
138 bo = canditer_next(&cib);
139 }
140 }
141
142 /* properties */
143 BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, 0)));
144 bn->trevsorted = BATcount(bn) <= 1;
145 bn->tsorted = true;
146 bn->tkey = true;
147 bn->tnil = false;
148 bn->tnonil = true;
149 return virtualize(bn);
150}
151
152/* intersect two candidate lists and produce a new one
153 *
154 * candidate lists are VOID-headed BATs with an OID tail which is
155 * sorted and unique.
156 */
157BAT *
158BATintersectcand(BAT *a, BAT *b)
159{
160 BAT *bn;
161 oid *restrict p;
162 struct canditer cia, cib;
163
164 BATcheck(a, "BATintersectcand", NULL);
165 BATcheck(b, "BATintersectcand", NULL);
166
167 canditer_init(&cia, NULL, a);
168 canditer_init(&cib, NULL, b);
169
170 if (cia.ncand == 0 || cib.ncand == 0) {
171 return BATdense(0, 0, 0);
172 }
173
174 if (cia.tpe == cand_dense && cib.tpe == cand_dense) {
175 /* both lists are dense */
176 return newdensecand(MAX(cia.seq, cib.seq), MIN(cia.seq + cia.ncand, cib.seq + cib.ncand));
177 }
178
179 bn = COLnew(0, TYPE_oid, MIN(cia.ncand, cib.ncand), TRANSIENT);
180 if (bn == NULL)
181 return NULL;
182 p = (oid *) Tloc(bn, 0);
183 if (cia.tpe == cand_dense || cib.tpe == cand_dense) {
184 if (cib.tpe == cand_dense) {
185 struct canditer ci;
186 ci = cia;
187 cia = cib;
188 cib = ci;
189 }
190 /* only cia is dense */
191
192 /* search for first value in cib that is contained in cia */
193 canditer_setidx(&cib, canditer_search(&cib, cia.seq, true));
194 oid bo;
195 while (!is_oid_nil(bo = canditer_next(&cib)) && bo < cia.seq + cia.ncand)
196 *p++ = bo;
197 } else {
198 /* a and b are both not dense */
199 oid ao = canditer_next(&cia);
200 oid bo = canditer_next(&cib);
201 while (!is_oid_nil(ao) && !is_oid_nil(bo)) {
202 if (ao < bo)
203 ao = canditer_next(&cia);
204 else if (bo < ao)
205 bo = canditer_next(&cib);
206 else {
207 *p++ = ao;
208 ao = canditer_next(&cia);
209 bo = canditer_next(&cib);
210 }
211 }
212 }
213
214 /* properties */
215 BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, 0)));
216 bn->trevsorted = BATcount(bn) <= 1;
217 bn->tsorted = true;
218 bn->tkey = true;
219 bn->tnil = false;
220 bn->tnonil = true;
221 return virtualize(bn);
222}
223
224/* calculate the difference of two candidate lists and produce a new one
225 */
226BAT *
227BATdiffcand(BAT *a, BAT *b)
228{
229 BAT *bn;
230 oid *restrict p;
231 struct canditer cia, cib;
232
233 BATcheck(a, "BATdiffcand", NULL);
234 BATcheck(b, "BATdiffcand", NULL);
235
236 canditer_init(&cia, NULL, a);
237 canditer_init(&cib, NULL, b);
238
239 if (cia.ncand == 0)
240 return BATdense(0, 0, 0);
241 if (cia.ncand == 0)
242 return canditer_slice(&cia, 0, cia.ncand);
243
244 if (cib.seq > canditer_last(&cia) || canditer_last(&cib) < cia.seq) {
245 /* no overlap, return a */
246 return canditer_slice(&cia, 0, cia.ncand);
247 }
248
249 if (cia.tpe == cand_dense && cib.tpe == cand_dense) {
250 /* both a and b are dense */
251 if (cia.seq < cib.seq) {
252 /* a starts before b */
253 if (cia.seq + cia.ncand <= cib.seq + cib.ncand) {
254 /* b overlaps with end of a */
255 return canditer_slice(&cia, 0, cib.seq - cia.seq);
256 }
257 /* b is subset of a */
258 return canditer_slice2(&cia, 0, cib.seq - cia.seq,
259 cib.seq + cib.ncand - cia.seq,
260 cia.ncand);
261 } else {
262 /* cia.seq >= cib.seq */
263 if (cia.seq + cia.ncand > cib.seq + cib.ncand) {
264 /* b overlaps with beginning of a */
265 return canditer_slice(&cia, cib.seq + cib.ncand - cia.seq, cia.ncand);
266 }
267 /* a is subset f b */
268 return BATdense(0, 0, 0);
269 }
270 }
271 if (cib.tpe == cand_dense) {
272 /* b is dense and a is not: we can copy the part of a
273 * that is before the start of b and the part of a
274 * that is after the end of b */
275 return canditer_slice2(&cia, 0,
276 canditer_search(&cia, cib.seq, true),
277 canditer_search(&cia, cib.seq + cib.ncand, true),
278 cia.ncand);
279 }
280
281 /* b is not dense */
282 bn = COLnew(0, TYPE_oid, BATcount(a), TRANSIENT);
283 if (bn == NULL)
284 return NULL;
285 p = Tloc(bn, 0);
286 /* find first position in b that is in range of a */
287 canditer_setidx(&cib, canditer_search(&cib, cia.seq, true));
288 oid ob = canditer_next(&cib);
289 for (BUN i = 0; i < cia.ncand; i++) {
290 oid oa = canditer_next(&cia);
291 while (!is_oid_nil(ob) && ob < oa) {
292 ob = canditer_next(&cib);
293 }
294 if (!is_oid_nil(ob) && oa < ob)
295 *p++ = oa;
296 }
297
298 /* properties */
299 BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, 0)));
300 bn->trevsorted = BATcount(bn) <= 1;
301 bn->tsorted = true;
302 bn->tkey = true;
303 bn->tnil = false;
304 bn->tnonil = true;
305 return virtualize(bn);
306}
307
308/* return offset of first value in cand that is >= o */
309static inline BUN
310binsearchcand(const oid *cand, BUN hi, oid o)
311{
312 BUN lo = 0;
313
314 if (o <= cand[lo])
315 return 0;
316 if (o > cand[hi])
317 return hi + 1;
318 /* loop invariant: cand[lo] < o <= cand[hi] */
319 while (hi > lo + 1) {
320 BUN mid = (lo + hi) / 2;
321 if (cand[mid] == o)
322 return mid;
323 if (cand[mid] < o)
324 lo = mid;
325 else
326 hi = mid;
327 }
328 return hi;
329}
330
331/* initialize a candidate iterator, return number of iterations */
332BUN
333canditer_init(struct canditer *ci, BAT *b, BAT *s)
334{
335 assert(ci != NULL);
336
337 if (s == NULL) {
338 if (b == NULL) {
339 /* trivially empty candidate list */
340 *ci = (struct canditer) {
341 .tpe = cand_dense,
342 };
343 return 0;
344 }
345 /* every row is a candidate */
346 *ci = (struct canditer) {
347 .tpe = cand_dense,
348 .seq = b->hseqbase,
349 .ncand = BATcount(b),
350 };
351 return ci->ncand;
352 }
353
354 assert(ATOMtype(s->ttype) == TYPE_oid);
355 assert(s->tsorted);
356 assert(s->tkey);
357 assert(s->tnonil);
358 assert(s->ttype == TYPE_void || s->tvheap == NULL);
359
360 BUN cnt = BATcount(s);
361
362 if (cnt == 0 || (b != NULL && BATcount(b) == 0)) {
363 /* candidate list for empty BAT or empty candidate list */
364 *ci = (struct canditer) {
365 .tpe = cand_dense,
366 .s = s,
367 };
368 return 0;
369 }
370
371 *ci = (struct canditer) {
372 .seq = s->tseqbase,
373 .s = s,
374 };
375
376 if (s->ttype == TYPE_void) {
377 assert(!is_oid_nil(ci->seq));
378 if (s->tvheap) {
379 assert(s->tvheap->free % SIZEOF_OID == 0);
380 ci->noids = s->tvheap->free / SIZEOF_OID;
381 if (ci->noids > 0) {
382 ci->tpe = cand_except;
383 ci->oids = (const oid *) s->tvheap->base;
384 } else {
385 /* why the vheap? */
386 ci->tpe = cand_dense;
387 ci->oids = NULL;
388 }
389 } else {
390 ci->tpe = cand_dense;
391 }
392 } else if (is_oid_nil(ci->seq)) {
393 ci->tpe = cand_materialized;
394 ci->oids = (const oid *) s->theap.base;
395 ci->seq = ci->oids[0];
396 ci->noids = cnt;
397 if (ci->oids[ci->noids - 1] - ci->oids[0] == ci->noids - 1) {
398 /* actually dense */
399 ci->tpe = cand_dense;
400 ci->oids = NULL;
401 }
402 } else {
403 /* materialized dense: no exceptions */
404 ci->tpe = cand_dense;
405 }
406 switch (ci->tpe) {
407 case cand_dense:
408 case_cand_dense:
409 if (b != NULL) {
410 if (ci->seq + cnt <= b->hseqbase ||
411 ci->seq >= b->hseqbase + BATcount(b)) {
412 ci->ncand = 0;
413 return 0;
414 }
415 if (b->hseqbase > ci->seq) {
416 cnt -= b->hseqbase - ci->seq;
417 ci->offset += b->hseqbase - ci->seq;
418 ci->seq = b->hseqbase;
419 }
420 if (ci->seq + cnt > b->hseqbase + BATcount(b))
421 cnt = b->hseqbase + BATcount(b) - ci->seq;
422 }
423 break;
424 case cand_materialized:
425 if (b != NULL) {
426 if (ci->oids[ci->noids - 1] < b->hseqbase) {
427 *ci = (struct canditer) {
428 .tpe = cand_dense,
429 .s = s,
430 };
431 return 0;
432 }
433 if (ci->oids[0] < b->hseqbase) {
434 BUN lo = 0;
435 BUN hi = cnt - 1;
436 const oid o = b->hseqbase;
437 /* loop invariant:
438 * ci->oids[lo] < o <= ci->oids[hi] */
439 while (hi - lo > 1) {
440 BUN mid = (lo + hi) / 2;
441 if (ci->oids[mid] >= o)
442 hi = mid;
443 else
444 lo = mid;
445 }
446 ci->offset = hi;
447 cnt -= hi;
448 ci->oids += hi;
449 ci->seq = ci->oids[0];
450 }
451 if (ci->oids[cnt - 1] >= b->hseqbase + BATcount(b)) {
452 BUN lo = 0;
453 BUN hi = cnt - 1;
454 const oid o = b->hseqbase + BATcount(b);
455 /* loop invariant:
456 * ci->oids[lo] < o <= ci->oids[hi] */
457 while (hi - lo > 1) {
458 BUN mid = (lo + hi) / 2;
459 if (ci->oids[mid] >= o)
460 hi = mid;
461 else
462 lo = mid;
463 }
464 cnt = hi;
465 }
466 ci->noids = cnt;
467 }
468 break;
469 case cand_except:
470 /* exceptions must all be within range of s */
471 assert(ci->oids[0] >= ci->seq);
472 assert(ci->oids[ci->noids - 1] < ci->seq + cnt + ci->noids);
473 if (b != NULL) {
474 if (ci->seq + cnt + ci->noids <= b->hseqbase ||
475 ci->seq >= b->hseqbase + BATcount(b)) {
476 *ci = (struct canditer) {
477 .tpe = cand_dense,
478 };
479 return 0;
480 }
481 }
482 /* prune exceptions at either end of range of s */
483 while (ci->noids > 0 && ci->oids[0] == ci->seq) {
484 ci->noids--;
485 ci->oids++;
486 ci->seq++;
487 }
488 while (ci->noids > 0 &&
489 ci->oids[ci->noids - 1] == ci->seq + cnt + ci->noids - 1)
490 ci->noids--;
491 /* WARNING: don't reset ci->oids to NULL when setting
492 * ci->tpe to cand_dense below: BATprojectchain will
493 * fail */
494 if (ci->noids == 0) {
495 ci->tpe = cand_dense;
496 goto case_cand_dense;
497 }
498 if (b != NULL) {
499 BUN p;
500 p = binsearchcand(ci->oids, ci->noids - 1, b->hseqbase);
501 if (p == ci->noids) {
502 /* all exceptions before start of b */
503 ci->offset = b->hseqbase - ci->seq - ci->noids;
504 cnt = ci->seq + cnt + ci->noids - b->hseqbase;
505 ci->seq = b->hseqbase;
506 ci->noids = 0;
507 ci->tpe = cand_dense;
508 break;
509 }
510 assert(b->hseqbase > ci->seq || p == 0);
511 if (b->hseqbase > ci->seq) {
512 /* skip candidates, possibly including
513 * exceptions */
514 ci->oids += p;
515 ci->noids -= p;
516 p = b->hseqbase - ci->seq - p;
517 cnt -= p;
518 ci->offset += p;
519 ci->seq = b->hseqbase;
520 }
521 if (ci->seq + cnt + ci->noids > b->hseqbase + BATcount(b)) {
522 p = binsearchcand(ci->oids, ci->noids - 1,
523 b->hseqbase + BATcount(b));
524 ci->noids = p;
525 cnt = b->hseqbase + BATcount(b) - ci->seq - ci->noids;
526 }
527 while (ci->noids > 0 && ci->oids[0] == ci->seq) {
528 ci->noids--;
529 ci->oids++;
530 ci->seq++;
531 }
532 while (ci->noids > 0 &&
533 ci->oids[ci->noids - 1] == ci->seq + cnt + ci->noids - 1)
534 ci->noids--;
535 if (ci->noids == 0) {
536 ci->tpe = cand_dense;
537 goto case_cand_dense;
538 }
539 }
540 break;
541 }
542 ci->ncand = cnt;
543 return cnt;
544}
545
546/* return the next candidate without advancing */
547oid
548canditer_peek(struct canditer *ci)
549{
550 if (ci->next == ci->ncand)
551 return oid_nil;
552 switch (ci->tpe) {
553 case cand_dense:
554 return ci->seq + ci->next;
555 case cand_materialized:
556 assert(ci->next < ci->noids);
557 return ci->oids[ci->next];
558 case cand_except:
559 /* work around compiler error: control reaches end of
560 * non-void function */
561 break;
562 }
563 oid o = ci->seq + ci->add + ci->next;
564 while (ci->add < ci->noids && o == ci->oids[ci->add]) {
565 ci->add++;
566 o++;
567 }
568 return o;
569}
570
571/* return the previous candidate */
572oid
573canditer_prev(struct canditer *ci)
574{
575 if (ci->next == 0)
576 return oid_nil;
577 switch (ci->tpe) {
578 case cand_dense:
579 return ci->seq + --ci->next;
580 case cand_materialized:
581 return ci->oids[--ci->next];
582 case cand_except:
583 break;
584 }
585 oid o = ci->seq + ci->add + --ci->next;
586 while (ci->add > 0 && o == ci->oids[ci->add - 1]) {
587 ci->add--;
588 o--;
589 }
590 return o;
591}
592
593/* return the previous candidate without retreating */
594oid
595canditer_peekprev(struct canditer *ci)
596{
597 if (ci->next == 0)
598 return oid_nil;
599 switch (ci->tpe) {
600 case cand_dense:
601 return ci->seq + ci->next - 1;
602 case cand_materialized:
603 return ci->oids[ci->next - 1];
604 case cand_except:
605 break;
606 }
607 oid o = ci->seq + ci->add + ci->next - 1;
608 while (ci->add > 0 && o == ci->oids[ci->add - 1]) {
609 ci->add--;
610 o--;
611 }
612 return o;
613}
614
615/* return the last candidate */
616oid
617canditer_last(struct canditer *ci)
618{
619 if (ci->ncand == 0)
620 return oid_nil;
621 switch (ci->tpe) {
622 case cand_dense:
623 return ci->seq + ci->ncand - 1;
624 case cand_materialized:
625 return ci->oids[ci->ncand - 1];
626 case cand_except:
627 /* work around compiler error: control reaches end of
628 * non-void function */
629 break;
630 }
631 return ci->seq + ci->ncand + ci->noids - 1;
632}
633
634/* return the candidate at the given index */
635oid
636canditer_idx(struct canditer *ci, BUN p)
637{
638 if (p >= ci->ncand)
639 return oid_nil;
640 switch (ci->tpe) {
641 case cand_dense:
642 return ci->seq + p;
643 case cand_materialized:
644 return ci->oids[p];
645 case cand_except:
646 /* work around compiler error: control reaches end of
647 * non-void function */
648 break;
649 }
650 oid o = ci->seq + p;
651 if (o < ci->oids[0])
652 return o;
653 if (o + ci->noids > ci->oids[ci->noids - 1])
654 return o + ci->noids;
655 BUN lo = 0, hi = ci->noids - 1;
656 while (hi - lo > 1) {
657 BUN mid = (hi + lo) / 2;
658 if (ci->oids[mid] - mid > o)
659 hi = mid;
660 else
661 lo = mid;
662 }
663 return o + hi;
664}
665
666/* set the index for the next candidate to be returned */
667void
668canditer_setidx(struct canditer *ci, BUN p)
669{
670 if (p != ci->next) {
671 if (p >= ci->ncand) {
672 ci->next = ci->ncand;
673 if (ci->tpe == cand_except)
674 ci->add = ci->noids;
675 } else {
676 ci->next = p;
677 if (ci->tpe == cand_except)
678 ci->add = canditer_idx(ci, p) - ci->seq - p;
679 }
680 }
681}
682
683/* reset */
684void
685canditer_reset(struct canditer *ci)
686{
687 ci->next = 0;
688 ci->add = 0;
689}
690
691/* return index of given candidate if it occurs; if the candidate does
692 * not occur, if next is set, return index of next larger candidate,
693 * if next is not set, return BUN_NONE */
694BUN
695canditer_search(struct canditer *ci, oid o, bool next)
696{
697 BUN p;
698
699 switch (ci->tpe) {
700 case cand_dense:
701 if (o < ci->seq)
702 return next ? 0 : BUN_NONE;
703 if (o >= ci->seq + ci->ncand)
704 return next ? ci->ncand : BUN_NONE;
705 return o - ci->seq;
706 case cand_materialized:
707 if (ci->noids == 0)
708 return 0;
709 p = binsearchcand(ci->oids, ci->noids - 1, o);
710 if (!next && (p == ci->noids || ci->oids[p] != o))
711 return BUN_NONE;
712 return p;
713 case cand_except:
714 break;
715 }
716 if (o < ci->seq)
717 return next ? 0 : BUN_NONE;
718 if (o >= ci->seq + ci->ncand + ci->noids)
719 return next ? ci->ncand : BUN_NONE;
720 p = binsearchcand(ci->oids, ci->noids - 1, o);
721 if (next || p == ci->noids || ci->oids[p] != o)
722 return o - ci->seq - p;
723 return BUN_NONE;
724}
725
726/* return either an actual BATslice or a new BAT that contains the
727 * "virtual" slice of the input candidate list BAT; except, unlike
728 * BATslice, the hseqbase of the returned BAT is 0 */
729BAT *
730canditer_slice(struct canditer *ci, BUN lo, BUN hi)
731{
732 BAT *bn;
733 oid o;
734 BUN add;
735
736 assert(ci != NULL);
737
738 if (lo >= ci->ncand || lo >= hi)
739 return BATdense(0, 0, 0);
740 if (hi > ci->ncand)
741 hi = ci->ncand;
742 switch (ci->tpe) {
743 case cand_materialized:
744 if (ci->s) {
745 bn = BATslice(ci->s, lo + ci->offset, hi + ci->offset);
746 BAThseqbase(bn, 0);
747 return bn;
748 }
749 bn = COLnew(0, TYPE_oid, hi - lo, TRANSIENT);
750 if (bn == NULL)
751 return NULL;
752 BATsetcount(bn, hi - lo);
753 memcpy(Tloc(bn, 0), ci->oids + lo, (hi - lo) * sizeof(oid));
754 break;
755 default: /* really: case cand_dense: */
756 return BATdense(0, ci->seq + lo, hi - lo);
757 case cand_except:
758 o = canditer_idx(ci, lo);
759 add = o - ci->seq - lo;
760 assert(add <= ci->noids);
761 if (add == ci->noids || o + hi - lo < ci->oids[add]) {
762 /* after last exception or before next
763 * exception: return dense sequence */
764 return BATdense(0, o, hi - lo);
765 }
766 bn = COLnew(0, TYPE_oid, hi - lo, TRANSIENT);
767 if (bn == NULL)
768 return NULL;
769 BATsetcount(bn, hi - lo);
770 for (oid *dst = Tloc(bn, 0); lo < hi; lo++) {
771 while (add < ci->noids && o == ci->oids[add]) {
772 o++;
773 add++;
774 }
775 *dst++ = o++;
776 }
777 break;
778 }
779 bn->tsorted = true;
780 bn->trevsorted = BATcount(bn) <= 1;
781 bn->tkey = true;
782 bn->tseqbase = oid_nil;
783 bn->tnil = false;
784 bn->tnonil = true;
785 return virtualize(bn);
786}
787
788/* return the combination of two slices */
789BAT *
790canditer_slice2(struct canditer *ci, BUN lo1, BUN hi1, BUN lo2, BUN hi2)
791{
792 BAT *bn;
793 oid o;
794 BUN add;
795
796 assert(lo1 <= hi1);
797 assert(lo2 <= hi2);
798 assert(hi1 <= lo2 || (lo2 == 0 && hi2 == 0));
799
800 if (hi1 == lo2) /* consecutive slices: combine into one */
801 return canditer_slice(ci, lo1, hi2);
802 if (lo2 == hi2 || hi1 >= ci->ncand || lo2 >= ci->ncand) {
803 /* empty second slice */
804 return canditer_slice(ci, lo1, hi1);
805 }
806 if (lo1 == hi1) /* empty first slice */
807 return canditer_slice(ci, lo2, hi2);
808 if (lo1 >= ci->ncand) /* out of range */
809 return BATdense(0, 0, 0);
810
811 if (hi2 >= ci->ncand)
812 hi2 = ci->ncand;
813
814 bn = COLnew(0, TYPE_oid, hi1 - lo1 + hi2 - lo2, TRANSIENT);
815 if (bn == NULL)
816 return NULL;
817 BATsetcount(bn, hi1 - lo1 + hi2 - lo2);
818 bn->tsorted = true;
819 bn->trevsorted = BATcount(bn) <= 1;
820 bn->tkey = true;
821 bn->tseqbase = oid_nil;
822 bn->tnil = false;
823 bn->tnonil = true;
824
825 oid *dst = Tloc(bn, 0);
826
827 switch (ci->tpe) {
828 case cand_materialized:
829 memcpy(dst, ci->oids + lo1, (hi1 - lo1) * sizeof(oid));
830 memcpy(dst + hi1 - lo1, ci->oids + lo2, (hi2 - lo2) * sizeof(oid));
831 break;
832 case cand_dense:
833 while (lo1 < hi1)
834 *dst++ = ci->seq + lo1++;
835 while (lo2 < hi2)
836 *dst++ = ci->seq + lo2++;
837 break;
838 case cand_except:
839 o = canditer_idx(ci, lo1);
840 add = o - ci->seq - lo1;
841 assert(add <= ci->noids);
842 if (add == ci->noids) {
843 /* after last exception: return dense sequence */
844 while (lo1 < hi1)
845 *dst++ = ci->seq + add + lo1++;
846 } else {
847 while (lo1 < hi1) {
848 while (add < ci->noids && o == ci->oids[add]) {
849 o++;
850 add++;
851 }
852 *dst++ = o++;
853 lo1++;
854 }
855 }
856 o = canditer_idx(ci, lo2);
857 add = o - ci->seq - lo2;
858 assert(add <= ci->noids);
859 if (add == ci->noids) {
860 /* after last exception: return dense sequence */
861 while (lo2 < hi2)
862 *dst++ = ci->seq + add + lo2++;
863 } else {
864 while (lo2 < hi2) {
865 while (add < ci->noids && o == ci->oids[add]) {
866 o++;
867 add++;
868 }
869 *dst++ = o++;
870 lo2++;
871 }
872 }
873 }
874 return virtualize(bn);
875}
876
877gdk_return
878BATnegcands(BAT *dense_cands, BAT *odels)
879{
880 const char *nme;
881 Heap *dels;
882 BUN lo, hi;
883
884 assert(BATtdense(dense_cands));
885 assert(dense_cands->ttype == TYPE_void);
886 assert(dense_cands->batRole == TRANSIENT);
887
888 if (BATcount(odels) == 0)
889 return GDK_SUCCEED;
890
891 lo = SORTfndfirst(odels, &dense_cands->tseqbase);
892 hi = SORTfndfirst(odels, &(oid) {dense_cands->tseqbase + BATcount(dense_cands)});
893 if (lo == hi)
894 return GDK_SUCCEED;
895
896 nme = BBP_physical(dense_cands->batCacheid);
897 if ((dels = (Heap*)GDKzalloc(sizeof(Heap))) == NULL ||
898 (dels->farmid = BBPselectfarm(dense_cands->batRole, dense_cands->ttype, varheap)) < 0){
899 GDKfree(dels);
900 return GDK_FAIL;
901 }
902 strconcat_len(dels->filename, sizeof(dels->filename),
903 nme, ".theap", NULL);
904
905 if (HEAPalloc(dels, hi - lo, sizeof(oid)) != GDK_SUCCEED) {
906 GDKfree(dels);
907 return GDK_FAIL;
908 }
909 dels->parentid = dense_cands->batCacheid;
910 dels->free = sizeof(oid) * (hi - lo);
911 if (odels->ttype == TYPE_void) {
912 for (BUN x = lo; x < hi; x++)
913 ((oid *) dels->base)[x - lo] = x + odels->tseqbase;
914 } else {
915 memcpy(dels->base, Tloc(odels, lo), dels->free);
916 }
917 dense_cands->batDirtydesc = true;
918 dense_cands->tvheap = dels;
919 BATsetcount(dense_cands, dense_cands->batCount - (hi - lo));
920 ALGODEBUG fprintf(stderr, "#BATnegcands(cands=" ALGOBATFMT ","
921 "dels=" ALGOBATFMT ")\n",
922 ALGOBATPAR(dense_cands),
923 ALGOBATPAR(odels));
924 return GDK_SUCCEED;
925}
926