1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | #include "monetdb_config.h" |
10 | #include "gdk.h" |
11 | #include "gdk_private.h" |
12 | #include "gdk_cand.h" |
13 | |
14 | /* create a new, dense candidate list with values from `first' up to, |
15 | * but not including, `last' */ |
16 | static inline BAT * |
17 | newdensecand(oid first, oid last) |
18 | { |
19 | if (last <= first) |
20 | first = last = 0; /* empty range */ |
21 | return BATdense(0, first, last - first); |
22 | } |
23 | |
24 | /* merge two candidate lists and produce a new one |
25 | * |
26 | * candidate lists are VOID-headed BATs with an OID tail which is |
27 | * sorted and unique. |
28 | */ |
29 | BAT * |
30 | BATmergecand(BAT *a, BAT *b) |
31 | { |
32 | BAT *bn; |
33 | oid *restrict p, i; |
34 | struct canditer cia, cib; |
35 | |
36 | BATcheck(a, "BATmergecand" , NULL); |
37 | BATcheck(b, "BATmergecand" , NULL); |
38 | |
39 | canditer_init(&cia, NULL, a); |
40 | canditer_init(&cib, NULL, b); |
41 | |
42 | /* we can return a if b is empty (and v.v.) */ |
43 | if (cia.ncand == 0) { |
44 | return canditer_slice(&cib, 0, cib.ncand); |
45 | } |
46 | if (cib.ncand == 0) { |
47 | return canditer_slice(&cia, 0, cia.ncand); |
48 | } |
49 | /* we can return a if a fully covers b (and v.v) */ |
50 | if (cia.tpe == cand_dense && cib.tpe == cand_dense) { |
51 | /* both are dense */ |
52 | if (cia.seq <= cib.seq && cib.seq <= cia.seq + cia.ncand) { |
53 | /* partial overlap starting with a, or b is |
54 | * smack bang after a */ |
55 | return newdensecand(cia.seq, cia.seq + cia.ncand < cib.seq + cib.ncand ? cib.seq + cib.ncand : cia.seq + cia.ncand); |
56 | } |
57 | if (cib.seq <= cia.seq && cia.seq <= cib.seq + cib.ncand) { |
58 | /* partial overlap starting with b, or a is |
59 | * smack bang after b */ |
60 | return newdensecand(cib.seq, cia.seq + cia.ncand < cib.seq + cib.ncand ? cib.seq + cib.ncand : cia.seq + cia.ncand); |
61 | } |
62 | } |
63 | if (cia.tpe == cand_dense |
64 | && cia.seq <= cib.seq |
65 | && canditer_last(&cia) >= canditer_last(&cib)) { |
66 | return canditer_slice(&cia, 0, cia.ncand); |
67 | } |
68 | if (cib.tpe == cand_dense |
69 | && cib.seq <= cia.seq |
70 | && canditer_last(&cib) >= canditer_last(&cia)) { |
71 | return canditer_slice(&cib, 0, cib.ncand); |
72 | } |
73 | |
74 | bn = COLnew(0, TYPE_oid, cia.ncand + cib.ncand, TRANSIENT); |
75 | if (bn == NULL) |
76 | return NULL; |
77 | p = (oid *) Tloc(bn, 0); |
78 | if (cia.tpe == cand_dense && cib.tpe == cand_dense) { |
79 | /* both lists are dense */ |
80 | if (cia.seq > cib.seq) { |
81 | struct canditer ci; |
82 | ci = cia; |
83 | cia = cib; |
84 | cib = ci; |
85 | } |
86 | /* cia completely before cib */ |
87 | assert(cia.seq + cia.ncand < cib.seq); |
88 | for (i = cia.seq; i < cia.seq + cia.ncand; i++) |
89 | *p++ = i; |
90 | /* there is a gap */ |
91 | for (i = cib.seq; i < cib.seq + cib.ncand; i++) |
92 | *p++ = i; |
93 | } else if (cia.tpe == cand_dense || cib.tpe == cand_dense) { |
94 | if (cib.tpe == cand_dense) { |
95 | struct canditer ci; |
96 | ci = cia; |
97 | cia = cib; |
98 | cib = ci; |
99 | } |
100 | /* only cia is dense */ |
101 | |
102 | /* copy part of cib that's before the start of cia */ |
103 | while (canditer_peek(&cib) < cia.seq) { |
104 | *p++ = canditer_next(&cib); |
105 | } |
106 | /* copy all of cia */ |
107 | for (i = cia.seq; i < cia.seq + cia.ncand; i++) |
108 | *p++ = i; |
109 | /* skip over part of cib that overlaps with cia */ |
110 | canditer_setidx(&cib, canditer_search(&cib, cia.seq + cia.ncand, true)); |
111 | /* copy rest of cib */ |
112 | while (cib.next < cib.ncand) { |
113 | *p++ = canditer_next(&cib); |
114 | } |
115 | } else { |
116 | /* a and b are both not dense */ |
117 | oid ao = canditer_next(&cia); |
118 | oid bo = canditer_next(&cib); |
119 | while (!is_oid_nil(ao) && !is_oid_nil(bo)) { |
120 | if (ao < bo) { |
121 | *p++ = ao; |
122 | ao = canditer_next(&cia); |
123 | } else if (bo < ao) { |
124 | *p++ = bo; |
125 | bo = canditer_next(&cib); |
126 | } else { |
127 | *p++ = ao; |
128 | ao = canditer_next(&cia); |
129 | bo = canditer_next(&cib); |
130 | } |
131 | } |
132 | while (!is_oid_nil(ao)) { |
133 | *p++ = ao; |
134 | ao = canditer_next(&cia); |
135 | } |
136 | while (!is_oid_nil(bo)) { |
137 | *p++ = bo; |
138 | bo = canditer_next(&cib); |
139 | } |
140 | } |
141 | |
142 | /* properties */ |
143 | BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, 0))); |
144 | bn->trevsorted = BATcount(bn) <= 1; |
145 | bn->tsorted = true; |
146 | bn->tkey = true; |
147 | bn->tnil = false; |
148 | bn->tnonil = true; |
149 | return virtualize(bn); |
150 | } |
151 | |
152 | /* intersect two candidate lists and produce a new one |
153 | * |
154 | * candidate lists are VOID-headed BATs with an OID tail which is |
155 | * sorted and unique. |
156 | */ |
157 | BAT * |
158 | BATintersectcand(BAT *a, BAT *b) |
159 | { |
160 | BAT *bn; |
161 | oid *restrict p; |
162 | struct canditer cia, cib; |
163 | |
164 | BATcheck(a, "BATintersectcand" , NULL); |
165 | BATcheck(b, "BATintersectcand" , NULL); |
166 | |
167 | canditer_init(&cia, NULL, a); |
168 | canditer_init(&cib, NULL, b); |
169 | |
170 | if (cia.ncand == 0 || cib.ncand == 0) { |
171 | return BATdense(0, 0, 0); |
172 | } |
173 | |
174 | if (cia.tpe == cand_dense && cib.tpe == cand_dense) { |
175 | /* both lists are dense */ |
176 | return newdensecand(MAX(cia.seq, cib.seq), MIN(cia.seq + cia.ncand, cib.seq + cib.ncand)); |
177 | } |
178 | |
179 | bn = COLnew(0, TYPE_oid, MIN(cia.ncand, cib.ncand), TRANSIENT); |
180 | if (bn == NULL) |
181 | return NULL; |
182 | p = (oid *) Tloc(bn, 0); |
183 | if (cia.tpe == cand_dense || cib.tpe == cand_dense) { |
184 | if (cib.tpe == cand_dense) { |
185 | struct canditer ci; |
186 | ci = cia; |
187 | cia = cib; |
188 | cib = ci; |
189 | } |
190 | /* only cia is dense */ |
191 | |
192 | /* search for first value in cib that is contained in cia */ |
193 | canditer_setidx(&cib, canditer_search(&cib, cia.seq, true)); |
194 | oid bo; |
195 | while (!is_oid_nil(bo = canditer_next(&cib)) && bo < cia.seq + cia.ncand) |
196 | *p++ = bo; |
197 | } else { |
198 | /* a and b are both not dense */ |
199 | oid ao = canditer_next(&cia); |
200 | oid bo = canditer_next(&cib); |
201 | while (!is_oid_nil(ao) && !is_oid_nil(bo)) { |
202 | if (ao < bo) |
203 | ao = canditer_next(&cia); |
204 | else if (bo < ao) |
205 | bo = canditer_next(&cib); |
206 | else { |
207 | *p++ = ao; |
208 | ao = canditer_next(&cia); |
209 | bo = canditer_next(&cib); |
210 | } |
211 | } |
212 | } |
213 | |
214 | /* properties */ |
215 | BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, 0))); |
216 | bn->trevsorted = BATcount(bn) <= 1; |
217 | bn->tsorted = true; |
218 | bn->tkey = true; |
219 | bn->tnil = false; |
220 | bn->tnonil = true; |
221 | return virtualize(bn); |
222 | } |
223 | |
224 | /* calculate the difference of two candidate lists and produce a new one |
225 | */ |
226 | BAT * |
227 | BATdiffcand(BAT *a, BAT *b) |
228 | { |
229 | BAT *bn; |
230 | oid *restrict p; |
231 | struct canditer cia, cib; |
232 | |
233 | BATcheck(a, "BATdiffcand" , NULL); |
234 | BATcheck(b, "BATdiffcand" , NULL); |
235 | |
236 | canditer_init(&cia, NULL, a); |
237 | canditer_init(&cib, NULL, b); |
238 | |
239 | if (cia.ncand == 0) |
240 | return BATdense(0, 0, 0); |
241 | if (cia.ncand == 0) |
242 | return canditer_slice(&cia, 0, cia.ncand); |
243 | |
244 | if (cib.seq > canditer_last(&cia) || canditer_last(&cib) < cia.seq) { |
245 | /* no overlap, return a */ |
246 | return canditer_slice(&cia, 0, cia.ncand); |
247 | } |
248 | |
249 | if (cia.tpe == cand_dense && cib.tpe == cand_dense) { |
250 | /* both a and b are dense */ |
251 | if (cia.seq < cib.seq) { |
252 | /* a starts before b */ |
253 | if (cia.seq + cia.ncand <= cib.seq + cib.ncand) { |
254 | /* b overlaps with end of a */ |
255 | return canditer_slice(&cia, 0, cib.seq - cia.seq); |
256 | } |
257 | /* b is subset of a */ |
258 | return canditer_slice2(&cia, 0, cib.seq - cia.seq, |
259 | cib.seq + cib.ncand - cia.seq, |
260 | cia.ncand); |
261 | } else { |
262 | /* cia.seq >= cib.seq */ |
263 | if (cia.seq + cia.ncand > cib.seq + cib.ncand) { |
264 | /* b overlaps with beginning of a */ |
265 | return canditer_slice(&cia, cib.seq + cib.ncand - cia.seq, cia.ncand); |
266 | } |
267 | /* a is subset f b */ |
268 | return BATdense(0, 0, 0); |
269 | } |
270 | } |
271 | if (cib.tpe == cand_dense) { |
272 | /* b is dense and a is not: we can copy the part of a |
273 | * that is before the start of b and the part of a |
274 | * that is after the end of b */ |
275 | return canditer_slice2(&cia, 0, |
276 | canditer_search(&cia, cib.seq, true), |
277 | canditer_search(&cia, cib.seq + cib.ncand, true), |
278 | cia.ncand); |
279 | } |
280 | |
281 | /* b is not dense */ |
282 | bn = COLnew(0, TYPE_oid, BATcount(a), TRANSIENT); |
283 | if (bn == NULL) |
284 | return NULL; |
285 | p = Tloc(bn, 0); |
286 | /* find first position in b that is in range of a */ |
287 | canditer_setidx(&cib, canditer_search(&cib, cia.seq, true)); |
288 | oid ob = canditer_next(&cib); |
289 | for (BUN i = 0; i < cia.ncand; i++) { |
290 | oid oa = canditer_next(&cia); |
291 | while (!is_oid_nil(ob) && ob < oa) { |
292 | ob = canditer_next(&cib); |
293 | } |
294 | if (!is_oid_nil(ob) && oa < ob) |
295 | *p++ = oa; |
296 | } |
297 | |
298 | /* properties */ |
299 | BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, 0))); |
300 | bn->trevsorted = BATcount(bn) <= 1; |
301 | bn->tsorted = true; |
302 | bn->tkey = true; |
303 | bn->tnil = false; |
304 | bn->tnonil = true; |
305 | return virtualize(bn); |
306 | } |
307 | |
308 | /* return offset of first value in cand that is >= o */ |
309 | static inline BUN |
310 | binsearchcand(const oid *cand, BUN hi, oid o) |
311 | { |
312 | BUN lo = 0; |
313 | |
314 | if (o <= cand[lo]) |
315 | return 0; |
316 | if (o > cand[hi]) |
317 | return hi + 1; |
318 | /* loop invariant: cand[lo] < o <= cand[hi] */ |
319 | while (hi > lo + 1) { |
320 | BUN mid = (lo + hi) / 2; |
321 | if (cand[mid] == o) |
322 | return mid; |
323 | if (cand[mid] < o) |
324 | lo = mid; |
325 | else |
326 | hi = mid; |
327 | } |
328 | return hi; |
329 | } |
330 | |
331 | /* initialize a candidate iterator, return number of iterations */ |
332 | BUN |
333 | canditer_init(struct canditer *ci, BAT *b, BAT *s) |
334 | { |
335 | assert(ci != NULL); |
336 | |
337 | if (s == NULL) { |
338 | if (b == NULL) { |
339 | /* trivially empty candidate list */ |
340 | *ci = (struct canditer) { |
341 | .tpe = cand_dense, |
342 | }; |
343 | return 0; |
344 | } |
345 | /* every row is a candidate */ |
346 | *ci = (struct canditer) { |
347 | .tpe = cand_dense, |
348 | .seq = b->hseqbase, |
349 | .ncand = BATcount(b), |
350 | }; |
351 | return ci->ncand; |
352 | } |
353 | |
354 | assert(ATOMtype(s->ttype) == TYPE_oid); |
355 | assert(s->tsorted); |
356 | assert(s->tkey); |
357 | assert(s->tnonil); |
358 | assert(s->ttype == TYPE_void || s->tvheap == NULL); |
359 | |
360 | BUN cnt = BATcount(s); |
361 | |
362 | if (cnt == 0 || (b != NULL && BATcount(b) == 0)) { |
363 | /* candidate list for empty BAT or empty candidate list */ |
364 | *ci = (struct canditer) { |
365 | .tpe = cand_dense, |
366 | .s = s, |
367 | }; |
368 | return 0; |
369 | } |
370 | |
371 | *ci = (struct canditer) { |
372 | .seq = s->tseqbase, |
373 | .s = s, |
374 | }; |
375 | |
376 | if (s->ttype == TYPE_void) { |
377 | assert(!is_oid_nil(ci->seq)); |
378 | if (s->tvheap) { |
379 | assert(s->tvheap->free % SIZEOF_OID == 0); |
380 | ci->noids = s->tvheap->free / SIZEOF_OID; |
381 | if (ci->noids > 0) { |
382 | ci->tpe = cand_except; |
383 | ci->oids = (const oid *) s->tvheap->base; |
384 | } else { |
385 | /* why the vheap? */ |
386 | ci->tpe = cand_dense; |
387 | ci->oids = NULL; |
388 | } |
389 | } else { |
390 | ci->tpe = cand_dense; |
391 | } |
392 | } else if (is_oid_nil(ci->seq)) { |
393 | ci->tpe = cand_materialized; |
394 | ci->oids = (const oid *) s->theap.base; |
395 | ci->seq = ci->oids[0]; |
396 | ci->noids = cnt; |
397 | if (ci->oids[ci->noids - 1] - ci->oids[0] == ci->noids - 1) { |
398 | /* actually dense */ |
399 | ci->tpe = cand_dense; |
400 | ci->oids = NULL; |
401 | } |
402 | } else { |
403 | /* materialized dense: no exceptions */ |
404 | ci->tpe = cand_dense; |
405 | } |
406 | switch (ci->tpe) { |
407 | case cand_dense: |
408 | case_cand_dense: |
409 | if (b != NULL) { |
410 | if (ci->seq + cnt <= b->hseqbase || |
411 | ci->seq >= b->hseqbase + BATcount(b)) { |
412 | ci->ncand = 0; |
413 | return 0; |
414 | } |
415 | if (b->hseqbase > ci->seq) { |
416 | cnt -= b->hseqbase - ci->seq; |
417 | ci->offset += b->hseqbase - ci->seq; |
418 | ci->seq = b->hseqbase; |
419 | } |
420 | if (ci->seq + cnt > b->hseqbase + BATcount(b)) |
421 | cnt = b->hseqbase + BATcount(b) - ci->seq; |
422 | } |
423 | break; |
424 | case cand_materialized: |
425 | if (b != NULL) { |
426 | if (ci->oids[ci->noids - 1] < b->hseqbase) { |
427 | *ci = (struct canditer) { |
428 | .tpe = cand_dense, |
429 | .s = s, |
430 | }; |
431 | return 0; |
432 | } |
433 | if (ci->oids[0] < b->hseqbase) { |
434 | BUN lo = 0; |
435 | BUN hi = cnt - 1; |
436 | const oid o = b->hseqbase; |
437 | /* loop invariant: |
438 | * ci->oids[lo] < o <= ci->oids[hi] */ |
439 | while (hi - lo > 1) { |
440 | BUN mid = (lo + hi) / 2; |
441 | if (ci->oids[mid] >= o) |
442 | hi = mid; |
443 | else |
444 | lo = mid; |
445 | } |
446 | ci->offset = hi; |
447 | cnt -= hi; |
448 | ci->oids += hi; |
449 | ci->seq = ci->oids[0]; |
450 | } |
451 | if (ci->oids[cnt - 1] >= b->hseqbase + BATcount(b)) { |
452 | BUN lo = 0; |
453 | BUN hi = cnt - 1; |
454 | const oid o = b->hseqbase + BATcount(b); |
455 | /* loop invariant: |
456 | * ci->oids[lo] < o <= ci->oids[hi] */ |
457 | while (hi - lo > 1) { |
458 | BUN mid = (lo + hi) / 2; |
459 | if (ci->oids[mid] >= o) |
460 | hi = mid; |
461 | else |
462 | lo = mid; |
463 | } |
464 | cnt = hi; |
465 | } |
466 | ci->noids = cnt; |
467 | } |
468 | break; |
469 | case cand_except: |
470 | /* exceptions must all be within range of s */ |
471 | assert(ci->oids[0] >= ci->seq); |
472 | assert(ci->oids[ci->noids - 1] < ci->seq + cnt + ci->noids); |
473 | if (b != NULL) { |
474 | if (ci->seq + cnt + ci->noids <= b->hseqbase || |
475 | ci->seq >= b->hseqbase + BATcount(b)) { |
476 | *ci = (struct canditer) { |
477 | .tpe = cand_dense, |
478 | }; |
479 | return 0; |
480 | } |
481 | } |
482 | /* prune exceptions at either end of range of s */ |
483 | while (ci->noids > 0 && ci->oids[0] == ci->seq) { |
484 | ci->noids--; |
485 | ci->oids++; |
486 | ci->seq++; |
487 | } |
488 | while (ci->noids > 0 && |
489 | ci->oids[ci->noids - 1] == ci->seq + cnt + ci->noids - 1) |
490 | ci->noids--; |
491 | /* WARNING: don't reset ci->oids to NULL when setting |
492 | * ci->tpe to cand_dense below: BATprojectchain will |
493 | * fail */ |
494 | if (ci->noids == 0) { |
495 | ci->tpe = cand_dense; |
496 | goto case_cand_dense; |
497 | } |
498 | if (b != NULL) { |
499 | BUN p; |
500 | p = binsearchcand(ci->oids, ci->noids - 1, b->hseqbase); |
501 | if (p == ci->noids) { |
502 | /* all exceptions before start of b */ |
503 | ci->offset = b->hseqbase - ci->seq - ci->noids; |
504 | cnt = ci->seq + cnt + ci->noids - b->hseqbase; |
505 | ci->seq = b->hseqbase; |
506 | ci->noids = 0; |
507 | ci->tpe = cand_dense; |
508 | break; |
509 | } |
510 | assert(b->hseqbase > ci->seq || p == 0); |
511 | if (b->hseqbase > ci->seq) { |
512 | /* skip candidates, possibly including |
513 | * exceptions */ |
514 | ci->oids += p; |
515 | ci->noids -= p; |
516 | p = b->hseqbase - ci->seq - p; |
517 | cnt -= p; |
518 | ci->offset += p; |
519 | ci->seq = b->hseqbase; |
520 | } |
521 | if (ci->seq + cnt + ci->noids > b->hseqbase + BATcount(b)) { |
522 | p = binsearchcand(ci->oids, ci->noids - 1, |
523 | b->hseqbase + BATcount(b)); |
524 | ci->noids = p; |
525 | cnt = b->hseqbase + BATcount(b) - ci->seq - ci->noids; |
526 | } |
527 | while (ci->noids > 0 && ci->oids[0] == ci->seq) { |
528 | ci->noids--; |
529 | ci->oids++; |
530 | ci->seq++; |
531 | } |
532 | while (ci->noids > 0 && |
533 | ci->oids[ci->noids - 1] == ci->seq + cnt + ci->noids - 1) |
534 | ci->noids--; |
535 | if (ci->noids == 0) { |
536 | ci->tpe = cand_dense; |
537 | goto case_cand_dense; |
538 | } |
539 | } |
540 | break; |
541 | } |
542 | ci->ncand = cnt; |
543 | return cnt; |
544 | } |
545 | |
546 | /* return the next candidate without advancing */ |
547 | oid |
548 | canditer_peek(struct canditer *ci) |
549 | { |
550 | if (ci->next == ci->ncand) |
551 | return oid_nil; |
552 | switch (ci->tpe) { |
553 | case cand_dense: |
554 | return ci->seq + ci->next; |
555 | case cand_materialized: |
556 | assert(ci->next < ci->noids); |
557 | return ci->oids[ci->next]; |
558 | case cand_except: |
559 | /* work around compiler error: control reaches end of |
560 | * non-void function */ |
561 | break; |
562 | } |
563 | oid o = ci->seq + ci->add + ci->next; |
564 | while (ci->add < ci->noids && o == ci->oids[ci->add]) { |
565 | ci->add++; |
566 | o++; |
567 | } |
568 | return o; |
569 | } |
570 | |
571 | /* return the previous candidate */ |
572 | oid |
573 | canditer_prev(struct canditer *ci) |
574 | { |
575 | if (ci->next == 0) |
576 | return oid_nil; |
577 | switch (ci->tpe) { |
578 | case cand_dense: |
579 | return ci->seq + --ci->next; |
580 | case cand_materialized: |
581 | return ci->oids[--ci->next]; |
582 | case cand_except: |
583 | break; |
584 | } |
585 | oid o = ci->seq + ci->add + --ci->next; |
586 | while (ci->add > 0 && o == ci->oids[ci->add - 1]) { |
587 | ci->add--; |
588 | o--; |
589 | } |
590 | return o; |
591 | } |
592 | |
593 | /* return the previous candidate without retreating */ |
594 | oid |
595 | canditer_peekprev(struct canditer *ci) |
596 | { |
597 | if (ci->next == 0) |
598 | return oid_nil; |
599 | switch (ci->tpe) { |
600 | case cand_dense: |
601 | return ci->seq + ci->next - 1; |
602 | case cand_materialized: |
603 | return ci->oids[ci->next - 1]; |
604 | case cand_except: |
605 | break; |
606 | } |
607 | oid o = ci->seq + ci->add + ci->next - 1; |
608 | while (ci->add > 0 && o == ci->oids[ci->add - 1]) { |
609 | ci->add--; |
610 | o--; |
611 | } |
612 | return o; |
613 | } |
614 | |
615 | /* return the last candidate */ |
616 | oid |
617 | canditer_last(struct canditer *ci) |
618 | { |
619 | if (ci->ncand == 0) |
620 | return oid_nil; |
621 | switch (ci->tpe) { |
622 | case cand_dense: |
623 | return ci->seq + ci->ncand - 1; |
624 | case cand_materialized: |
625 | return ci->oids[ci->ncand - 1]; |
626 | case cand_except: |
627 | /* work around compiler error: control reaches end of |
628 | * non-void function */ |
629 | break; |
630 | } |
631 | return ci->seq + ci->ncand + ci->noids - 1; |
632 | } |
633 | |
634 | /* return the candidate at the given index */ |
635 | oid |
636 | canditer_idx(struct canditer *ci, BUN p) |
637 | { |
638 | if (p >= ci->ncand) |
639 | return oid_nil; |
640 | switch (ci->tpe) { |
641 | case cand_dense: |
642 | return ci->seq + p; |
643 | case cand_materialized: |
644 | return ci->oids[p]; |
645 | case cand_except: |
646 | /* work around compiler error: control reaches end of |
647 | * non-void function */ |
648 | break; |
649 | } |
650 | oid o = ci->seq + p; |
651 | if (o < ci->oids[0]) |
652 | return o; |
653 | if (o + ci->noids > ci->oids[ci->noids - 1]) |
654 | return o + ci->noids; |
655 | BUN lo = 0, hi = ci->noids - 1; |
656 | while (hi - lo > 1) { |
657 | BUN mid = (hi + lo) / 2; |
658 | if (ci->oids[mid] - mid > o) |
659 | hi = mid; |
660 | else |
661 | lo = mid; |
662 | } |
663 | return o + hi; |
664 | } |
665 | |
666 | /* set the index for the next candidate to be returned */ |
667 | void |
668 | canditer_setidx(struct canditer *ci, BUN p) |
669 | { |
670 | if (p != ci->next) { |
671 | if (p >= ci->ncand) { |
672 | ci->next = ci->ncand; |
673 | if (ci->tpe == cand_except) |
674 | ci->add = ci->noids; |
675 | } else { |
676 | ci->next = p; |
677 | if (ci->tpe == cand_except) |
678 | ci->add = canditer_idx(ci, p) - ci->seq - p; |
679 | } |
680 | } |
681 | } |
682 | |
683 | /* reset */ |
684 | void |
685 | canditer_reset(struct canditer *ci) |
686 | { |
687 | ci->next = 0; |
688 | ci->add = 0; |
689 | } |
690 | |
691 | /* return index of given candidate if it occurs; if the candidate does |
692 | * not occur, if next is set, return index of next larger candidate, |
693 | * if next is not set, return BUN_NONE */ |
694 | BUN |
695 | canditer_search(struct canditer *ci, oid o, bool next) |
696 | { |
697 | BUN p; |
698 | |
699 | switch (ci->tpe) { |
700 | case cand_dense: |
701 | if (o < ci->seq) |
702 | return next ? 0 : BUN_NONE; |
703 | if (o >= ci->seq + ci->ncand) |
704 | return next ? ci->ncand : BUN_NONE; |
705 | return o - ci->seq; |
706 | case cand_materialized: |
707 | if (ci->noids == 0) |
708 | return 0; |
709 | p = binsearchcand(ci->oids, ci->noids - 1, o); |
710 | if (!next && (p == ci->noids || ci->oids[p] != o)) |
711 | return BUN_NONE; |
712 | return p; |
713 | case cand_except: |
714 | break; |
715 | } |
716 | if (o < ci->seq) |
717 | return next ? 0 : BUN_NONE; |
718 | if (o >= ci->seq + ci->ncand + ci->noids) |
719 | return next ? ci->ncand : BUN_NONE; |
720 | p = binsearchcand(ci->oids, ci->noids - 1, o); |
721 | if (next || p == ci->noids || ci->oids[p] != o) |
722 | return o - ci->seq - p; |
723 | return BUN_NONE; |
724 | } |
725 | |
726 | /* return either an actual BATslice or a new BAT that contains the |
727 | * "virtual" slice of the input candidate list BAT; except, unlike |
728 | * BATslice, the hseqbase of the returned BAT is 0 */ |
729 | BAT * |
730 | canditer_slice(struct canditer *ci, BUN lo, BUN hi) |
731 | { |
732 | BAT *bn; |
733 | oid o; |
734 | BUN add; |
735 | |
736 | assert(ci != NULL); |
737 | |
738 | if (lo >= ci->ncand || lo >= hi) |
739 | return BATdense(0, 0, 0); |
740 | if (hi > ci->ncand) |
741 | hi = ci->ncand; |
742 | switch (ci->tpe) { |
743 | case cand_materialized: |
744 | if (ci->s) { |
745 | bn = BATslice(ci->s, lo + ci->offset, hi + ci->offset); |
746 | BAThseqbase(bn, 0); |
747 | return bn; |
748 | } |
749 | bn = COLnew(0, TYPE_oid, hi - lo, TRANSIENT); |
750 | if (bn == NULL) |
751 | return NULL; |
752 | BATsetcount(bn, hi - lo); |
753 | memcpy(Tloc(bn, 0), ci->oids + lo, (hi - lo) * sizeof(oid)); |
754 | break; |
755 | default: /* really: case cand_dense: */ |
756 | return BATdense(0, ci->seq + lo, hi - lo); |
757 | case cand_except: |
758 | o = canditer_idx(ci, lo); |
759 | add = o - ci->seq - lo; |
760 | assert(add <= ci->noids); |
761 | if (add == ci->noids || o + hi - lo < ci->oids[add]) { |
762 | /* after last exception or before next |
763 | * exception: return dense sequence */ |
764 | return BATdense(0, o, hi - lo); |
765 | } |
766 | bn = COLnew(0, TYPE_oid, hi - lo, TRANSIENT); |
767 | if (bn == NULL) |
768 | return NULL; |
769 | BATsetcount(bn, hi - lo); |
770 | for (oid *dst = Tloc(bn, 0); lo < hi; lo++) { |
771 | while (add < ci->noids && o == ci->oids[add]) { |
772 | o++; |
773 | add++; |
774 | } |
775 | *dst++ = o++; |
776 | } |
777 | break; |
778 | } |
779 | bn->tsorted = true; |
780 | bn->trevsorted = BATcount(bn) <= 1; |
781 | bn->tkey = true; |
782 | bn->tseqbase = oid_nil; |
783 | bn->tnil = false; |
784 | bn->tnonil = true; |
785 | return virtualize(bn); |
786 | } |
787 | |
788 | /* return the combination of two slices */ |
789 | BAT * |
790 | canditer_slice2(struct canditer *ci, BUN lo1, BUN hi1, BUN lo2, BUN hi2) |
791 | { |
792 | BAT *bn; |
793 | oid o; |
794 | BUN add; |
795 | |
796 | assert(lo1 <= hi1); |
797 | assert(lo2 <= hi2); |
798 | assert(hi1 <= lo2 || (lo2 == 0 && hi2 == 0)); |
799 | |
800 | if (hi1 == lo2) /* consecutive slices: combine into one */ |
801 | return canditer_slice(ci, lo1, hi2); |
802 | if (lo2 == hi2 || hi1 >= ci->ncand || lo2 >= ci->ncand) { |
803 | /* empty second slice */ |
804 | return canditer_slice(ci, lo1, hi1); |
805 | } |
806 | if (lo1 == hi1) /* empty first slice */ |
807 | return canditer_slice(ci, lo2, hi2); |
808 | if (lo1 >= ci->ncand) /* out of range */ |
809 | return BATdense(0, 0, 0); |
810 | |
811 | if (hi2 >= ci->ncand) |
812 | hi2 = ci->ncand; |
813 | |
814 | bn = COLnew(0, TYPE_oid, hi1 - lo1 + hi2 - lo2, TRANSIENT); |
815 | if (bn == NULL) |
816 | return NULL; |
817 | BATsetcount(bn, hi1 - lo1 + hi2 - lo2); |
818 | bn->tsorted = true; |
819 | bn->trevsorted = BATcount(bn) <= 1; |
820 | bn->tkey = true; |
821 | bn->tseqbase = oid_nil; |
822 | bn->tnil = false; |
823 | bn->tnonil = true; |
824 | |
825 | oid *dst = Tloc(bn, 0); |
826 | |
827 | switch (ci->tpe) { |
828 | case cand_materialized: |
829 | memcpy(dst, ci->oids + lo1, (hi1 - lo1) * sizeof(oid)); |
830 | memcpy(dst + hi1 - lo1, ci->oids + lo2, (hi2 - lo2) * sizeof(oid)); |
831 | break; |
832 | case cand_dense: |
833 | while (lo1 < hi1) |
834 | *dst++ = ci->seq + lo1++; |
835 | while (lo2 < hi2) |
836 | *dst++ = ci->seq + lo2++; |
837 | break; |
838 | case cand_except: |
839 | o = canditer_idx(ci, lo1); |
840 | add = o - ci->seq - lo1; |
841 | assert(add <= ci->noids); |
842 | if (add == ci->noids) { |
843 | /* after last exception: return dense sequence */ |
844 | while (lo1 < hi1) |
845 | *dst++ = ci->seq + add + lo1++; |
846 | } else { |
847 | while (lo1 < hi1) { |
848 | while (add < ci->noids && o == ci->oids[add]) { |
849 | o++; |
850 | add++; |
851 | } |
852 | *dst++ = o++; |
853 | lo1++; |
854 | } |
855 | } |
856 | o = canditer_idx(ci, lo2); |
857 | add = o - ci->seq - lo2; |
858 | assert(add <= ci->noids); |
859 | if (add == ci->noids) { |
860 | /* after last exception: return dense sequence */ |
861 | while (lo2 < hi2) |
862 | *dst++ = ci->seq + add + lo2++; |
863 | } else { |
864 | while (lo2 < hi2) { |
865 | while (add < ci->noids && o == ci->oids[add]) { |
866 | o++; |
867 | add++; |
868 | } |
869 | *dst++ = o++; |
870 | lo2++; |
871 | } |
872 | } |
873 | } |
874 | return virtualize(bn); |
875 | } |
876 | |
877 | gdk_return |
878 | BATnegcands(BAT *dense_cands, BAT *odels) |
879 | { |
880 | const char *nme; |
881 | Heap *dels; |
882 | BUN lo, hi; |
883 | |
884 | assert(BATtdense(dense_cands)); |
885 | assert(dense_cands->ttype == TYPE_void); |
886 | assert(dense_cands->batRole == TRANSIENT); |
887 | |
888 | if (BATcount(odels) == 0) |
889 | return GDK_SUCCEED; |
890 | |
891 | lo = SORTfndfirst(odels, &dense_cands->tseqbase); |
892 | hi = SORTfndfirst(odels, &(oid) {dense_cands->tseqbase + BATcount(dense_cands)}); |
893 | if (lo == hi) |
894 | return GDK_SUCCEED; |
895 | |
896 | nme = BBP_physical(dense_cands->batCacheid); |
897 | if ((dels = (Heap*)GDKzalloc(sizeof(Heap))) == NULL || |
898 | (dels->farmid = BBPselectfarm(dense_cands->batRole, dense_cands->ttype, varheap)) < 0){ |
899 | GDKfree(dels); |
900 | return GDK_FAIL; |
901 | } |
902 | strconcat_len(dels->filename, sizeof(dels->filename), |
903 | nme, ".theap" , NULL); |
904 | |
905 | if (HEAPalloc(dels, hi - lo, sizeof(oid)) != GDK_SUCCEED) { |
906 | GDKfree(dels); |
907 | return GDK_FAIL; |
908 | } |
909 | dels->parentid = dense_cands->batCacheid; |
910 | dels->free = sizeof(oid) * (hi - lo); |
911 | if (odels->ttype == TYPE_void) { |
912 | for (BUN x = lo; x < hi; x++) |
913 | ((oid *) dels->base)[x - lo] = x + odels->tseqbase; |
914 | } else { |
915 | memcpy(dels->base, Tloc(odels, lo), dels->free); |
916 | } |
917 | dense_cands->batDirtydesc = true; |
918 | dense_cands->tvheap = dels; |
919 | BATsetcount(dense_cands, dense_cands->batCount - (hi - lo)); |
920 | ALGODEBUG fprintf(stderr, "#BATnegcands(cands=" ALGOBATFMT "," |
921 | "dels=" ALGOBATFMT ")\n" , |
922 | ALGOBATPAR(dense_cands), |
923 | ALGOBATPAR(odels)); |
924 | return GDK_SUCCEED; |
925 | } |
926 | |