| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | #include "monetdb_config.h" |
| 10 | #include "gdk.h" |
| 11 | #include "gdk_private.h" |
| 12 | #include "gdk_cand.h" |
| 13 | |
| 14 | /* create a new, dense candidate list with values from `first' up to, |
| 15 | * but not including, `last' */ |
| 16 | static inline BAT * |
| 17 | newdensecand(oid first, oid last) |
| 18 | { |
| 19 | if (last <= first) |
| 20 | first = last = 0; /* empty range */ |
| 21 | return BATdense(0, first, last - first); |
| 22 | } |
| 23 | |
| 24 | /* merge two candidate lists and produce a new one |
| 25 | * |
| 26 | * candidate lists are VOID-headed BATs with an OID tail which is |
| 27 | * sorted and unique. |
| 28 | */ |
| 29 | BAT * |
| 30 | BATmergecand(BAT *a, BAT *b) |
| 31 | { |
| 32 | BAT *bn; |
| 33 | oid *restrict p, i; |
| 34 | struct canditer cia, cib; |
| 35 | |
| 36 | BATcheck(a, "BATmergecand" , NULL); |
| 37 | BATcheck(b, "BATmergecand" , NULL); |
| 38 | |
| 39 | canditer_init(&cia, NULL, a); |
| 40 | canditer_init(&cib, NULL, b); |
| 41 | |
| 42 | /* we can return a if b is empty (and v.v.) */ |
| 43 | if (cia.ncand == 0) { |
| 44 | return canditer_slice(&cib, 0, cib.ncand); |
| 45 | } |
| 46 | if (cib.ncand == 0) { |
| 47 | return canditer_slice(&cia, 0, cia.ncand); |
| 48 | } |
| 49 | /* we can return a if a fully covers b (and v.v) */ |
| 50 | if (cia.tpe == cand_dense && cib.tpe == cand_dense) { |
| 51 | /* both are dense */ |
| 52 | if (cia.seq <= cib.seq && cib.seq <= cia.seq + cia.ncand) { |
| 53 | /* partial overlap starting with a, or b is |
| 54 | * smack bang after a */ |
| 55 | return newdensecand(cia.seq, cia.seq + cia.ncand < cib.seq + cib.ncand ? cib.seq + cib.ncand : cia.seq + cia.ncand); |
| 56 | } |
| 57 | if (cib.seq <= cia.seq && cia.seq <= cib.seq + cib.ncand) { |
| 58 | /* partial overlap starting with b, or a is |
| 59 | * smack bang after b */ |
| 60 | return newdensecand(cib.seq, cia.seq + cia.ncand < cib.seq + cib.ncand ? cib.seq + cib.ncand : cia.seq + cia.ncand); |
| 61 | } |
| 62 | } |
| 63 | if (cia.tpe == cand_dense |
| 64 | && cia.seq <= cib.seq |
| 65 | && canditer_last(&cia) >= canditer_last(&cib)) { |
| 66 | return canditer_slice(&cia, 0, cia.ncand); |
| 67 | } |
| 68 | if (cib.tpe == cand_dense |
| 69 | && cib.seq <= cia.seq |
| 70 | && canditer_last(&cib) >= canditer_last(&cia)) { |
| 71 | return canditer_slice(&cib, 0, cib.ncand); |
| 72 | } |
| 73 | |
| 74 | bn = COLnew(0, TYPE_oid, cia.ncand + cib.ncand, TRANSIENT); |
| 75 | if (bn == NULL) |
| 76 | return NULL; |
| 77 | p = (oid *) Tloc(bn, 0); |
| 78 | if (cia.tpe == cand_dense && cib.tpe == cand_dense) { |
| 79 | /* both lists are dense */ |
| 80 | if (cia.seq > cib.seq) { |
| 81 | struct canditer ci; |
| 82 | ci = cia; |
| 83 | cia = cib; |
| 84 | cib = ci; |
| 85 | } |
| 86 | /* cia completely before cib */ |
| 87 | assert(cia.seq + cia.ncand < cib.seq); |
| 88 | for (i = cia.seq; i < cia.seq + cia.ncand; i++) |
| 89 | *p++ = i; |
| 90 | /* there is a gap */ |
| 91 | for (i = cib.seq; i < cib.seq + cib.ncand; i++) |
| 92 | *p++ = i; |
| 93 | } else if (cia.tpe == cand_dense || cib.tpe == cand_dense) { |
| 94 | if (cib.tpe == cand_dense) { |
| 95 | struct canditer ci; |
| 96 | ci = cia; |
| 97 | cia = cib; |
| 98 | cib = ci; |
| 99 | } |
| 100 | /* only cia is dense */ |
| 101 | |
| 102 | /* copy part of cib that's before the start of cia */ |
| 103 | while (canditer_peek(&cib) < cia.seq) { |
| 104 | *p++ = canditer_next(&cib); |
| 105 | } |
| 106 | /* copy all of cia */ |
| 107 | for (i = cia.seq; i < cia.seq + cia.ncand; i++) |
| 108 | *p++ = i; |
| 109 | /* skip over part of cib that overlaps with cia */ |
| 110 | canditer_setidx(&cib, canditer_search(&cib, cia.seq + cia.ncand, true)); |
| 111 | /* copy rest of cib */ |
| 112 | while (cib.next < cib.ncand) { |
| 113 | *p++ = canditer_next(&cib); |
| 114 | } |
| 115 | } else { |
| 116 | /* a and b are both not dense */ |
| 117 | oid ao = canditer_next(&cia); |
| 118 | oid bo = canditer_next(&cib); |
| 119 | while (!is_oid_nil(ao) && !is_oid_nil(bo)) { |
| 120 | if (ao < bo) { |
| 121 | *p++ = ao; |
| 122 | ao = canditer_next(&cia); |
| 123 | } else if (bo < ao) { |
| 124 | *p++ = bo; |
| 125 | bo = canditer_next(&cib); |
| 126 | } else { |
| 127 | *p++ = ao; |
| 128 | ao = canditer_next(&cia); |
| 129 | bo = canditer_next(&cib); |
| 130 | } |
| 131 | } |
| 132 | while (!is_oid_nil(ao)) { |
| 133 | *p++ = ao; |
| 134 | ao = canditer_next(&cia); |
| 135 | } |
| 136 | while (!is_oid_nil(bo)) { |
| 137 | *p++ = bo; |
| 138 | bo = canditer_next(&cib); |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | /* properties */ |
| 143 | BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, 0))); |
| 144 | bn->trevsorted = BATcount(bn) <= 1; |
| 145 | bn->tsorted = true; |
| 146 | bn->tkey = true; |
| 147 | bn->tnil = false; |
| 148 | bn->tnonil = true; |
| 149 | return virtualize(bn); |
| 150 | } |
| 151 | |
| 152 | /* intersect two candidate lists and produce a new one |
| 153 | * |
| 154 | * candidate lists are VOID-headed BATs with an OID tail which is |
| 155 | * sorted and unique. |
| 156 | */ |
| 157 | BAT * |
| 158 | BATintersectcand(BAT *a, BAT *b) |
| 159 | { |
| 160 | BAT *bn; |
| 161 | oid *restrict p; |
| 162 | struct canditer cia, cib; |
| 163 | |
| 164 | BATcheck(a, "BATintersectcand" , NULL); |
| 165 | BATcheck(b, "BATintersectcand" , NULL); |
| 166 | |
| 167 | canditer_init(&cia, NULL, a); |
| 168 | canditer_init(&cib, NULL, b); |
| 169 | |
| 170 | if (cia.ncand == 0 || cib.ncand == 0) { |
| 171 | return BATdense(0, 0, 0); |
| 172 | } |
| 173 | |
| 174 | if (cia.tpe == cand_dense && cib.tpe == cand_dense) { |
| 175 | /* both lists are dense */ |
| 176 | return newdensecand(MAX(cia.seq, cib.seq), MIN(cia.seq + cia.ncand, cib.seq + cib.ncand)); |
| 177 | } |
| 178 | |
| 179 | bn = COLnew(0, TYPE_oid, MIN(cia.ncand, cib.ncand), TRANSIENT); |
| 180 | if (bn == NULL) |
| 181 | return NULL; |
| 182 | p = (oid *) Tloc(bn, 0); |
| 183 | if (cia.tpe == cand_dense || cib.tpe == cand_dense) { |
| 184 | if (cib.tpe == cand_dense) { |
| 185 | struct canditer ci; |
| 186 | ci = cia; |
| 187 | cia = cib; |
| 188 | cib = ci; |
| 189 | } |
| 190 | /* only cia is dense */ |
| 191 | |
| 192 | /* search for first value in cib that is contained in cia */ |
| 193 | canditer_setidx(&cib, canditer_search(&cib, cia.seq, true)); |
| 194 | oid bo; |
| 195 | while (!is_oid_nil(bo = canditer_next(&cib)) && bo < cia.seq + cia.ncand) |
| 196 | *p++ = bo; |
| 197 | } else { |
| 198 | /* a and b are both not dense */ |
| 199 | oid ao = canditer_next(&cia); |
| 200 | oid bo = canditer_next(&cib); |
| 201 | while (!is_oid_nil(ao) && !is_oid_nil(bo)) { |
| 202 | if (ao < bo) |
| 203 | ao = canditer_next(&cia); |
| 204 | else if (bo < ao) |
| 205 | bo = canditer_next(&cib); |
| 206 | else { |
| 207 | *p++ = ao; |
| 208 | ao = canditer_next(&cia); |
| 209 | bo = canditer_next(&cib); |
| 210 | } |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | /* properties */ |
| 215 | BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, 0))); |
| 216 | bn->trevsorted = BATcount(bn) <= 1; |
| 217 | bn->tsorted = true; |
| 218 | bn->tkey = true; |
| 219 | bn->tnil = false; |
| 220 | bn->tnonil = true; |
| 221 | return virtualize(bn); |
| 222 | } |
| 223 | |
| 224 | /* calculate the difference of two candidate lists and produce a new one |
| 225 | */ |
| 226 | BAT * |
| 227 | BATdiffcand(BAT *a, BAT *b) |
| 228 | { |
| 229 | BAT *bn; |
| 230 | oid *restrict p; |
| 231 | struct canditer cia, cib; |
| 232 | |
| 233 | BATcheck(a, "BATdiffcand" , NULL); |
| 234 | BATcheck(b, "BATdiffcand" , NULL); |
| 235 | |
| 236 | canditer_init(&cia, NULL, a); |
| 237 | canditer_init(&cib, NULL, b); |
| 238 | |
| 239 | if (cia.ncand == 0) |
| 240 | return BATdense(0, 0, 0); |
| 241 | if (cia.ncand == 0) |
| 242 | return canditer_slice(&cia, 0, cia.ncand); |
| 243 | |
| 244 | if (cib.seq > canditer_last(&cia) || canditer_last(&cib) < cia.seq) { |
| 245 | /* no overlap, return a */ |
| 246 | return canditer_slice(&cia, 0, cia.ncand); |
| 247 | } |
| 248 | |
| 249 | if (cia.tpe == cand_dense && cib.tpe == cand_dense) { |
| 250 | /* both a and b are dense */ |
| 251 | if (cia.seq < cib.seq) { |
| 252 | /* a starts before b */ |
| 253 | if (cia.seq + cia.ncand <= cib.seq + cib.ncand) { |
| 254 | /* b overlaps with end of a */ |
| 255 | return canditer_slice(&cia, 0, cib.seq - cia.seq); |
| 256 | } |
| 257 | /* b is subset of a */ |
| 258 | return canditer_slice2(&cia, 0, cib.seq - cia.seq, |
| 259 | cib.seq + cib.ncand - cia.seq, |
| 260 | cia.ncand); |
| 261 | } else { |
| 262 | /* cia.seq >= cib.seq */ |
| 263 | if (cia.seq + cia.ncand > cib.seq + cib.ncand) { |
| 264 | /* b overlaps with beginning of a */ |
| 265 | return canditer_slice(&cia, cib.seq + cib.ncand - cia.seq, cia.ncand); |
| 266 | } |
| 267 | /* a is subset f b */ |
| 268 | return BATdense(0, 0, 0); |
| 269 | } |
| 270 | } |
| 271 | if (cib.tpe == cand_dense) { |
| 272 | /* b is dense and a is not: we can copy the part of a |
| 273 | * that is before the start of b and the part of a |
| 274 | * that is after the end of b */ |
| 275 | return canditer_slice2(&cia, 0, |
| 276 | canditer_search(&cia, cib.seq, true), |
| 277 | canditer_search(&cia, cib.seq + cib.ncand, true), |
| 278 | cia.ncand); |
| 279 | } |
| 280 | |
| 281 | /* b is not dense */ |
| 282 | bn = COLnew(0, TYPE_oid, BATcount(a), TRANSIENT); |
| 283 | if (bn == NULL) |
| 284 | return NULL; |
| 285 | p = Tloc(bn, 0); |
| 286 | /* find first position in b that is in range of a */ |
| 287 | canditer_setidx(&cib, canditer_search(&cib, cia.seq, true)); |
| 288 | oid ob = canditer_next(&cib); |
| 289 | for (BUN i = 0; i < cia.ncand; i++) { |
| 290 | oid oa = canditer_next(&cia); |
| 291 | while (!is_oid_nil(ob) && ob < oa) { |
| 292 | ob = canditer_next(&cib); |
| 293 | } |
| 294 | if (!is_oid_nil(ob) && oa < ob) |
| 295 | *p++ = oa; |
| 296 | } |
| 297 | |
| 298 | /* properties */ |
| 299 | BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, 0))); |
| 300 | bn->trevsorted = BATcount(bn) <= 1; |
| 301 | bn->tsorted = true; |
| 302 | bn->tkey = true; |
| 303 | bn->tnil = false; |
| 304 | bn->tnonil = true; |
| 305 | return virtualize(bn); |
| 306 | } |
| 307 | |
| 308 | /* return offset of first value in cand that is >= o */ |
| 309 | static inline BUN |
| 310 | binsearchcand(const oid *cand, BUN hi, oid o) |
| 311 | { |
| 312 | BUN lo = 0; |
| 313 | |
| 314 | if (o <= cand[lo]) |
| 315 | return 0; |
| 316 | if (o > cand[hi]) |
| 317 | return hi + 1; |
| 318 | /* loop invariant: cand[lo] < o <= cand[hi] */ |
| 319 | while (hi > lo + 1) { |
| 320 | BUN mid = (lo + hi) / 2; |
| 321 | if (cand[mid] == o) |
| 322 | return mid; |
| 323 | if (cand[mid] < o) |
| 324 | lo = mid; |
| 325 | else |
| 326 | hi = mid; |
| 327 | } |
| 328 | return hi; |
| 329 | } |
| 330 | |
| 331 | /* initialize a candidate iterator, return number of iterations */ |
| 332 | BUN |
| 333 | canditer_init(struct canditer *ci, BAT *b, BAT *s) |
| 334 | { |
| 335 | assert(ci != NULL); |
| 336 | |
| 337 | if (s == NULL) { |
| 338 | if (b == NULL) { |
| 339 | /* trivially empty candidate list */ |
| 340 | *ci = (struct canditer) { |
| 341 | .tpe = cand_dense, |
| 342 | }; |
| 343 | return 0; |
| 344 | } |
| 345 | /* every row is a candidate */ |
| 346 | *ci = (struct canditer) { |
| 347 | .tpe = cand_dense, |
| 348 | .seq = b->hseqbase, |
| 349 | .ncand = BATcount(b), |
| 350 | }; |
| 351 | return ci->ncand; |
| 352 | } |
| 353 | |
| 354 | assert(ATOMtype(s->ttype) == TYPE_oid); |
| 355 | assert(s->tsorted); |
| 356 | assert(s->tkey); |
| 357 | assert(s->tnonil); |
| 358 | assert(s->ttype == TYPE_void || s->tvheap == NULL); |
| 359 | |
| 360 | BUN cnt = BATcount(s); |
| 361 | |
| 362 | if (cnt == 0 || (b != NULL && BATcount(b) == 0)) { |
| 363 | /* candidate list for empty BAT or empty candidate list */ |
| 364 | *ci = (struct canditer) { |
| 365 | .tpe = cand_dense, |
| 366 | .s = s, |
| 367 | }; |
| 368 | return 0; |
| 369 | } |
| 370 | |
| 371 | *ci = (struct canditer) { |
| 372 | .seq = s->tseqbase, |
| 373 | .s = s, |
| 374 | }; |
| 375 | |
| 376 | if (s->ttype == TYPE_void) { |
| 377 | assert(!is_oid_nil(ci->seq)); |
| 378 | if (s->tvheap) { |
| 379 | assert(s->tvheap->free % SIZEOF_OID == 0); |
| 380 | ci->noids = s->tvheap->free / SIZEOF_OID; |
| 381 | if (ci->noids > 0) { |
| 382 | ci->tpe = cand_except; |
| 383 | ci->oids = (const oid *) s->tvheap->base; |
| 384 | } else { |
| 385 | /* why the vheap? */ |
| 386 | ci->tpe = cand_dense; |
| 387 | ci->oids = NULL; |
| 388 | } |
| 389 | } else { |
| 390 | ci->tpe = cand_dense; |
| 391 | } |
| 392 | } else if (is_oid_nil(ci->seq)) { |
| 393 | ci->tpe = cand_materialized; |
| 394 | ci->oids = (const oid *) s->theap.base; |
| 395 | ci->seq = ci->oids[0]; |
| 396 | ci->noids = cnt; |
| 397 | if (ci->oids[ci->noids - 1] - ci->oids[0] == ci->noids - 1) { |
| 398 | /* actually dense */ |
| 399 | ci->tpe = cand_dense; |
| 400 | ci->oids = NULL; |
| 401 | } |
| 402 | } else { |
| 403 | /* materialized dense: no exceptions */ |
| 404 | ci->tpe = cand_dense; |
| 405 | } |
| 406 | switch (ci->tpe) { |
| 407 | case cand_dense: |
| 408 | case_cand_dense: |
| 409 | if (b != NULL) { |
| 410 | if (ci->seq + cnt <= b->hseqbase || |
| 411 | ci->seq >= b->hseqbase + BATcount(b)) { |
| 412 | ci->ncand = 0; |
| 413 | return 0; |
| 414 | } |
| 415 | if (b->hseqbase > ci->seq) { |
| 416 | cnt -= b->hseqbase - ci->seq; |
| 417 | ci->offset += b->hseqbase - ci->seq; |
| 418 | ci->seq = b->hseqbase; |
| 419 | } |
| 420 | if (ci->seq + cnt > b->hseqbase + BATcount(b)) |
| 421 | cnt = b->hseqbase + BATcount(b) - ci->seq; |
| 422 | } |
| 423 | break; |
| 424 | case cand_materialized: |
| 425 | if (b != NULL) { |
| 426 | if (ci->oids[ci->noids - 1] < b->hseqbase) { |
| 427 | *ci = (struct canditer) { |
| 428 | .tpe = cand_dense, |
| 429 | .s = s, |
| 430 | }; |
| 431 | return 0; |
| 432 | } |
| 433 | if (ci->oids[0] < b->hseqbase) { |
| 434 | BUN lo = 0; |
| 435 | BUN hi = cnt - 1; |
| 436 | const oid o = b->hseqbase; |
| 437 | /* loop invariant: |
| 438 | * ci->oids[lo] < o <= ci->oids[hi] */ |
| 439 | while (hi - lo > 1) { |
| 440 | BUN mid = (lo + hi) / 2; |
| 441 | if (ci->oids[mid] >= o) |
| 442 | hi = mid; |
| 443 | else |
| 444 | lo = mid; |
| 445 | } |
| 446 | ci->offset = hi; |
| 447 | cnt -= hi; |
| 448 | ci->oids += hi; |
| 449 | ci->seq = ci->oids[0]; |
| 450 | } |
| 451 | if (ci->oids[cnt - 1] >= b->hseqbase + BATcount(b)) { |
| 452 | BUN lo = 0; |
| 453 | BUN hi = cnt - 1; |
| 454 | const oid o = b->hseqbase + BATcount(b); |
| 455 | /* loop invariant: |
| 456 | * ci->oids[lo] < o <= ci->oids[hi] */ |
| 457 | while (hi - lo > 1) { |
| 458 | BUN mid = (lo + hi) / 2; |
| 459 | if (ci->oids[mid] >= o) |
| 460 | hi = mid; |
| 461 | else |
| 462 | lo = mid; |
| 463 | } |
| 464 | cnt = hi; |
| 465 | } |
| 466 | ci->noids = cnt; |
| 467 | } |
| 468 | break; |
| 469 | case cand_except: |
| 470 | /* exceptions must all be within range of s */ |
| 471 | assert(ci->oids[0] >= ci->seq); |
| 472 | assert(ci->oids[ci->noids - 1] < ci->seq + cnt + ci->noids); |
| 473 | if (b != NULL) { |
| 474 | if (ci->seq + cnt + ci->noids <= b->hseqbase || |
| 475 | ci->seq >= b->hseqbase + BATcount(b)) { |
| 476 | *ci = (struct canditer) { |
| 477 | .tpe = cand_dense, |
| 478 | }; |
| 479 | return 0; |
| 480 | } |
| 481 | } |
| 482 | /* prune exceptions at either end of range of s */ |
| 483 | while (ci->noids > 0 && ci->oids[0] == ci->seq) { |
| 484 | ci->noids--; |
| 485 | ci->oids++; |
| 486 | ci->seq++; |
| 487 | } |
| 488 | while (ci->noids > 0 && |
| 489 | ci->oids[ci->noids - 1] == ci->seq + cnt + ci->noids - 1) |
| 490 | ci->noids--; |
| 491 | /* WARNING: don't reset ci->oids to NULL when setting |
| 492 | * ci->tpe to cand_dense below: BATprojectchain will |
| 493 | * fail */ |
| 494 | if (ci->noids == 0) { |
| 495 | ci->tpe = cand_dense; |
| 496 | goto case_cand_dense; |
| 497 | } |
| 498 | if (b != NULL) { |
| 499 | BUN p; |
| 500 | p = binsearchcand(ci->oids, ci->noids - 1, b->hseqbase); |
| 501 | if (p == ci->noids) { |
| 502 | /* all exceptions before start of b */ |
| 503 | ci->offset = b->hseqbase - ci->seq - ci->noids; |
| 504 | cnt = ci->seq + cnt + ci->noids - b->hseqbase; |
| 505 | ci->seq = b->hseqbase; |
| 506 | ci->noids = 0; |
| 507 | ci->tpe = cand_dense; |
| 508 | break; |
| 509 | } |
| 510 | assert(b->hseqbase > ci->seq || p == 0); |
| 511 | if (b->hseqbase > ci->seq) { |
| 512 | /* skip candidates, possibly including |
| 513 | * exceptions */ |
| 514 | ci->oids += p; |
| 515 | ci->noids -= p; |
| 516 | p = b->hseqbase - ci->seq - p; |
| 517 | cnt -= p; |
| 518 | ci->offset += p; |
| 519 | ci->seq = b->hseqbase; |
| 520 | } |
| 521 | if (ci->seq + cnt + ci->noids > b->hseqbase + BATcount(b)) { |
| 522 | p = binsearchcand(ci->oids, ci->noids - 1, |
| 523 | b->hseqbase + BATcount(b)); |
| 524 | ci->noids = p; |
| 525 | cnt = b->hseqbase + BATcount(b) - ci->seq - ci->noids; |
| 526 | } |
| 527 | while (ci->noids > 0 && ci->oids[0] == ci->seq) { |
| 528 | ci->noids--; |
| 529 | ci->oids++; |
| 530 | ci->seq++; |
| 531 | } |
| 532 | while (ci->noids > 0 && |
| 533 | ci->oids[ci->noids - 1] == ci->seq + cnt + ci->noids - 1) |
| 534 | ci->noids--; |
| 535 | if (ci->noids == 0) { |
| 536 | ci->tpe = cand_dense; |
| 537 | goto case_cand_dense; |
| 538 | } |
| 539 | } |
| 540 | break; |
| 541 | } |
| 542 | ci->ncand = cnt; |
| 543 | return cnt; |
| 544 | } |
| 545 | |
| 546 | /* return the next candidate without advancing */ |
| 547 | oid |
| 548 | canditer_peek(struct canditer *ci) |
| 549 | { |
| 550 | if (ci->next == ci->ncand) |
| 551 | return oid_nil; |
| 552 | switch (ci->tpe) { |
| 553 | case cand_dense: |
| 554 | return ci->seq + ci->next; |
| 555 | case cand_materialized: |
| 556 | assert(ci->next < ci->noids); |
| 557 | return ci->oids[ci->next]; |
| 558 | case cand_except: |
| 559 | /* work around compiler error: control reaches end of |
| 560 | * non-void function */ |
| 561 | break; |
| 562 | } |
| 563 | oid o = ci->seq + ci->add + ci->next; |
| 564 | while (ci->add < ci->noids && o == ci->oids[ci->add]) { |
| 565 | ci->add++; |
| 566 | o++; |
| 567 | } |
| 568 | return o; |
| 569 | } |
| 570 | |
| 571 | /* return the previous candidate */ |
| 572 | oid |
| 573 | canditer_prev(struct canditer *ci) |
| 574 | { |
| 575 | if (ci->next == 0) |
| 576 | return oid_nil; |
| 577 | switch (ci->tpe) { |
| 578 | case cand_dense: |
| 579 | return ci->seq + --ci->next; |
| 580 | case cand_materialized: |
| 581 | return ci->oids[--ci->next]; |
| 582 | case cand_except: |
| 583 | break; |
| 584 | } |
| 585 | oid o = ci->seq + ci->add + --ci->next; |
| 586 | while (ci->add > 0 && o == ci->oids[ci->add - 1]) { |
| 587 | ci->add--; |
| 588 | o--; |
| 589 | } |
| 590 | return o; |
| 591 | } |
| 592 | |
| 593 | /* return the previous candidate without retreating */ |
| 594 | oid |
| 595 | canditer_peekprev(struct canditer *ci) |
| 596 | { |
| 597 | if (ci->next == 0) |
| 598 | return oid_nil; |
| 599 | switch (ci->tpe) { |
| 600 | case cand_dense: |
| 601 | return ci->seq + ci->next - 1; |
| 602 | case cand_materialized: |
| 603 | return ci->oids[ci->next - 1]; |
| 604 | case cand_except: |
| 605 | break; |
| 606 | } |
| 607 | oid o = ci->seq + ci->add + ci->next - 1; |
| 608 | while (ci->add > 0 && o == ci->oids[ci->add - 1]) { |
| 609 | ci->add--; |
| 610 | o--; |
| 611 | } |
| 612 | return o; |
| 613 | } |
| 614 | |
| 615 | /* return the last candidate */ |
| 616 | oid |
| 617 | canditer_last(struct canditer *ci) |
| 618 | { |
| 619 | if (ci->ncand == 0) |
| 620 | return oid_nil; |
| 621 | switch (ci->tpe) { |
| 622 | case cand_dense: |
| 623 | return ci->seq + ci->ncand - 1; |
| 624 | case cand_materialized: |
| 625 | return ci->oids[ci->ncand - 1]; |
| 626 | case cand_except: |
| 627 | /* work around compiler error: control reaches end of |
| 628 | * non-void function */ |
| 629 | break; |
| 630 | } |
| 631 | return ci->seq + ci->ncand + ci->noids - 1; |
| 632 | } |
| 633 | |
| 634 | /* return the candidate at the given index */ |
| 635 | oid |
| 636 | canditer_idx(struct canditer *ci, BUN p) |
| 637 | { |
| 638 | if (p >= ci->ncand) |
| 639 | return oid_nil; |
| 640 | switch (ci->tpe) { |
| 641 | case cand_dense: |
| 642 | return ci->seq + p; |
| 643 | case cand_materialized: |
| 644 | return ci->oids[p]; |
| 645 | case cand_except: |
| 646 | /* work around compiler error: control reaches end of |
| 647 | * non-void function */ |
| 648 | break; |
| 649 | } |
| 650 | oid o = ci->seq + p; |
| 651 | if (o < ci->oids[0]) |
| 652 | return o; |
| 653 | if (o + ci->noids > ci->oids[ci->noids - 1]) |
| 654 | return o + ci->noids; |
| 655 | BUN lo = 0, hi = ci->noids - 1; |
| 656 | while (hi - lo > 1) { |
| 657 | BUN mid = (hi + lo) / 2; |
| 658 | if (ci->oids[mid] - mid > o) |
| 659 | hi = mid; |
| 660 | else |
| 661 | lo = mid; |
| 662 | } |
| 663 | return o + hi; |
| 664 | } |
| 665 | |
| 666 | /* set the index for the next candidate to be returned */ |
| 667 | void |
| 668 | canditer_setidx(struct canditer *ci, BUN p) |
| 669 | { |
| 670 | if (p != ci->next) { |
| 671 | if (p >= ci->ncand) { |
| 672 | ci->next = ci->ncand; |
| 673 | if (ci->tpe == cand_except) |
| 674 | ci->add = ci->noids; |
| 675 | } else { |
| 676 | ci->next = p; |
| 677 | if (ci->tpe == cand_except) |
| 678 | ci->add = canditer_idx(ci, p) - ci->seq - p; |
| 679 | } |
| 680 | } |
| 681 | } |
| 682 | |
| 683 | /* reset */ |
| 684 | void |
| 685 | canditer_reset(struct canditer *ci) |
| 686 | { |
| 687 | ci->next = 0; |
| 688 | ci->add = 0; |
| 689 | } |
| 690 | |
| 691 | /* return index of given candidate if it occurs; if the candidate does |
| 692 | * not occur, if next is set, return index of next larger candidate, |
| 693 | * if next is not set, return BUN_NONE */ |
| 694 | BUN |
| 695 | canditer_search(struct canditer *ci, oid o, bool next) |
| 696 | { |
| 697 | BUN p; |
| 698 | |
| 699 | switch (ci->tpe) { |
| 700 | case cand_dense: |
| 701 | if (o < ci->seq) |
| 702 | return next ? 0 : BUN_NONE; |
| 703 | if (o >= ci->seq + ci->ncand) |
| 704 | return next ? ci->ncand : BUN_NONE; |
| 705 | return o - ci->seq; |
| 706 | case cand_materialized: |
| 707 | if (ci->noids == 0) |
| 708 | return 0; |
| 709 | p = binsearchcand(ci->oids, ci->noids - 1, o); |
| 710 | if (!next && (p == ci->noids || ci->oids[p] != o)) |
| 711 | return BUN_NONE; |
| 712 | return p; |
| 713 | case cand_except: |
| 714 | break; |
| 715 | } |
| 716 | if (o < ci->seq) |
| 717 | return next ? 0 : BUN_NONE; |
| 718 | if (o >= ci->seq + ci->ncand + ci->noids) |
| 719 | return next ? ci->ncand : BUN_NONE; |
| 720 | p = binsearchcand(ci->oids, ci->noids - 1, o); |
| 721 | if (next || p == ci->noids || ci->oids[p] != o) |
| 722 | return o - ci->seq - p; |
| 723 | return BUN_NONE; |
| 724 | } |
| 725 | |
| 726 | /* return either an actual BATslice or a new BAT that contains the |
| 727 | * "virtual" slice of the input candidate list BAT; except, unlike |
| 728 | * BATslice, the hseqbase of the returned BAT is 0 */ |
| 729 | BAT * |
| 730 | canditer_slice(struct canditer *ci, BUN lo, BUN hi) |
| 731 | { |
| 732 | BAT *bn; |
| 733 | oid o; |
| 734 | BUN add; |
| 735 | |
| 736 | assert(ci != NULL); |
| 737 | |
| 738 | if (lo >= ci->ncand || lo >= hi) |
| 739 | return BATdense(0, 0, 0); |
| 740 | if (hi > ci->ncand) |
| 741 | hi = ci->ncand; |
| 742 | switch (ci->tpe) { |
| 743 | case cand_materialized: |
| 744 | if (ci->s) { |
| 745 | bn = BATslice(ci->s, lo + ci->offset, hi + ci->offset); |
| 746 | BAThseqbase(bn, 0); |
| 747 | return bn; |
| 748 | } |
| 749 | bn = COLnew(0, TYPE_oid, hi - lo, TRANSIENT); |
| 750 | if (bn == NULL) |
| 751 | return NULL; |
| 752 | BATsetcount(bn, hi - lo); |
| 753 | memcpy(Tloc(bn, 0), ci->oids + lo, (hi - lo) * sizeof(oid)); |
| 754 | break; |
| 755 | default: /* really: case cand_dense: */ |
| 756 | return BATdense(0, ci->seq + lo, hi - lo); |
| 757 | case cand_except: |
| 758 | o = canditer_idx(ci, lo); |
| 759 | add = o - ci->seq - lo; |
| 760 | assert(add <= ci->noids); |
| 761 | if (add == ci->noids || o + hi - lo < ci->oids[add]) { |
| 762 | /* after last exception or before next |
| 763 | * exception: return dense sequence */ |
| 764 | return BATdense(0, o, hi - lo); |
| 765 | } |
| 766 | bn = COLnew(0, TYPE_oid, hi - lo, TRANSIENT); |
| 767 | if (bn == NULL) |
| 768 | return NULL; |
| 769 | BATsetcount(bn, hi - lo); |
| 770 | for (oid *dst = Tloc(bn, 0); lo < hi; lo++) { |
| 771 | while (add < ci->noids && o == ci->oids[add]) { |
| 772 | o++; |
| 773 | add++; |
| 774 | } |
| 775 | *dst++ = o++; |
| 776 | } |
| 777 | break; |
| 778 | } |
| 779 | bn->tsorted = true; |
| 780 | bn->trevsorted = BATcount(bn) <= 1; |
| 781 | bn->tkey = true; |
| 782 | bn->tseqbase = oid_nil; |
| 783 | bn->tnil = false; |
| 784 | bn->tnonil = true; |
| 785 | return virtualize(bn); |
| 786 | } |
| 787 | |
| 788 | /* return the combination of two slices */ |
| 789 | BAT * |
| 790 | canditer_slice2(struct canditer *ci, BUN lo1, BUN hi1, BUN lo2, BUN hi2) |
| 791 | { |
| 792 | BAT *bn; |
| 793 | oid o; |
| 794 | BUN add; |
| 795 | |
| 796 | assert(lo1 <= hi1); |
| 797 | assert(lo2 <= hi2); |
| 798 | assert(hi1 <= lo2 || (lo2 == 0 && hi2 == 0)); |
| 799 | |
| 800 | if (hi1 == lo2) /* consecutive slices: combine into one */ |
| 801 | return canditer_slice(ci, lo1, hi2); |
| 802 | if (lo2 == hi2 || hi1 >= ci->ncand || lo2 >= ci->ncand) { |
| 803 | /* empty second slice */ |
| 804 | return canditer_slice(ci, lo1, hi1); |
| 805 | } |
| 806 | if (lo1 == hi1) /* empty first slice */ |
| 807 | return canditer_slice(ci, lo2, hi2); |
| 808 | if (lo1 >= ci->ncand) /* out of range */ |
| 809 | return BATdense(0, 0, 0); |
| 810 | |
| 811 | if (hi2 >= ci->ncand) |
| 812 | hi2 = ci->ncand; |
| 813 | |
| 814 | bn = COLnew(0, TYPE_oid, hi1 - lo1 + hi2 - lo2, TRANSIENT); |
| 815 | if (bn == NULL) |
| 816 | return NULL; |
| 817 | BATsetcount(bn, hi1 - lo1 + hi2 - lo2); |
| 818 | bn->tsorted = true; |
| 819 | bn->trevsorted = BATcount(bn) <= 1; |
| 820 | bn->tkey = true; |
| 821 | bn->tseqbase = oid_nil; |
| 822 | bn->tnil = false; |
| 823 | bn->tnonil = true; |
| 824 | |
| 825 | oid *dst = Tloc(bn, 0); |
| 826 | |
| 827 | switch (ci->tpe) { |
| 828 | case cand_materialized: |
| 829 | memcpy(dst, ci->oids + lo1, (hi1 - lo1) * sizeof(oid)); |
| 830 | memcpy(dst + hi1 - lo1, ci->oids + lo2, (hi2 - lo2) * sizeof(oid)); |
| 831 | break; |
| 832 | case cand_dense: |
| 833 | while (lo1 < hi1) |
| 834 | *dst++ = ci->seq + lo1++; |
| 835 | while (lo2 < hi2) |
| 836 | *dst++ = ci->seq + lo2++; |
| 837 | break; |
| 838 | case cand_except: |
| 839 | o = canditer_idx(ci, lo1); |
| 840 | add = o - ci->seq - lo1; |
| 841 | assert(add <= ci->noids); |
| 842 | if (add == ci->noids) { |
| 843 | /* after last exception: return dense sequence */ |
| 844 | while (lo1 < hi1) |
| 845 | *dst++ = ci->seq + add + lo1++; |
| 846 | } else { |
| 847 | while (lo1 < hi1) { |
| 848 | while (add < ci->noids && o == ci->oids[add]) { |
| 849 | o++; |
| 850 | add++; |
| 851 | } |
| 852 | *dst++ = o++; |
| 853 | lo1++; |
| 854 | } |
| 855 | } |
| 856 | o = canditer_idx(ci, lo2); |
| 857 | add = o - ci->seq - lo2; |
| 858 | assert(add <= ci->noids); |
| 859 | if (add == ci->noids) { |
| 860 | /* after last exception: return dense sequence */ |
| 861 | while (lo2 < hi2) |
| 862 | *dst++ = ci->seq + add + lo2++; |
| 863 | } else { |
| 864 | while (lo2 < hi2) { |
| 865 | while (add < ci->noids && o == ci->oids[add]) { |
| 866 | o++; |
| 867 | add++; |
| 868 | } |
| 869 | *dst++ = o++; |
| 870 | lo2++; |
| 871 | } |
| 872 | } |
| 873 | } |
| 874 | return virtualize(bn); |
| 875 | } |
| 876 | |
| 877 | gdk_return |
| 878 | BATnegcands(BAT *dense_cands, BAT *odels) |
| 879 | { |
| 880 | const char *nme; |
| 881 | Heap *dels; |
| 882 | BUN lo, hi; |
| 883 | |
| 884 | assert(BATtdense(dense_cands)); |
| 885 | assert(dense_cands->ttype == TYPE_void); |
| 886 | assert(dense_cands->batRole == TRANSIENT); |
| 887 | |
| 888 | if (BATcount(odels) == 0) |
| 889 | return GDK_SUCCEED; |
| 890 | |
| 891 | lo = SORTfndfirst(odels, &dense_cands->tseqbase); |
| 892 | hi = SORTfndfirst(odels, &(oid) {dense_cands->tseqbase + BATcount(dense_cands)}); |
| 893 | if (lo == hi) |
| 894 | return GDK_SUCCEED; |
| 895 | |
| 896 | nme = BBP_physical(dense_cands->batCacheid); |
| 897 | if ((dels = (Heap*)GDKzalloc(sizeof(Heap))) == NULL || |
| 898 | (dels->farmid = BBPselectfarm(dense_cands->batRole, dense_cands->ttype, varheap)) < 0){ |
| 899 | GDKfree(dels); |
| 900 | return GDK_FAIL; |
| 901 | } |
| 902 | strconcat_len(dels->filename, sizeof(dels->filename), |
| 903 | nme, ".theap" , NULL); |
| 904 | |
| 905 | if (HEAPalloc(dels, hi - lo, sizeof(oid)) != GDK_SUCCEED) { |
| 906 | GDKfree(dels); |
| 907 | return GDK_FAIL; |
| 908 | } |
| 909 | dels->parentid = dense_cands->batCacheid; |
| 910 | dels->free = sizeof(oid) * (hi - lo); |
| 911 | if (odels->ttype == TYPE_void) { |
| 912 | for (BUN x = lo; x < hi; x++) |
| 913 | ((oid *) dels->base)[x - lo] = x + odels->tseqbase; |
| 914 | } else { |
| 915 | memcpy(dels->base, Tloc(odels, lo), dels->free); |
| 916 | } |
| 917 | dense_cands->batDirtydesc = true; |
| 918 | dense_cands->tvheap = dels; |
| 919 | BATsetcount(dense_cands, dense_cands->batCount - (hi - lo)); |
| 920 | ALGODEBUG fprintf(stderr, "#BATnegcands(cands=" ALGOBATFMT "," |
| 921 | "dels=" ALGOBATFMT ")\n" , |
| 922 | ALGOBATPAR(dense_cands), |
| 923 | ALGOBATPAR(odels)); |
| 924 | return GDK_SUCCEED; |
| 925 | } |
| 926 | |