| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | #include "monetdb_config.h" |
| 10 | #include "gdk.h" |
| 11 | #include "gdk_private.h" |
| 12 | |
| 13 | /* |
| 14 | * BATproject returns a BAT aligned with the left input whose values |
| 15 | * are the values from the right input that were referred to by the |
| 16 | * OIDs in the left input. |
| 17 | */ |
| 18 | |
| 19 | #define project_loop(TYPE) \ |
| 20 | static gdk_return \ |
| 21 | project_##TYPE(BAT *bn, BAT *l, struct canditer *restrict ci, BAT *r, bool nilcheck) \ |
| 22 | { \ |
| 23 | BUN lo, hi; \ |
| 24 | const TYPE *restrict rt; \ |
| 25 | TYPE *restrict bt; \ |
| 26 | TYPE v; \ |
| 27 | oid rseq, rend; \ |
| 28 | bool hasnil = false; \ |
| 29 | \ |
| 30 | rt = (const TYPE *) Tloc(r, 0); \ |
| 31 | bt = (TYPE *) Tloc(bn, 0); \ |
| 32 | rseq = r->hseqbase; \ |
| 33 | rend = rseq + BATcount(r); \ |
| 34 | if (ci) { \ |
| 35 | for (lo = 0, hi = ci->ncand; lo < hi; lo++) { \ |
| 36 | oid o = canditer_next(ci); \ |
| 37 | if (o < rseq || o >= rend) { \ |
| 38 | GDKerror("BATproject: does not match always\n"); \ |
| 39 | return GDK_FAIL; \ |
| 40 | } \ |
| 41 | v = rt[o - rseq]; \ |
| 42 | bt[lo] = v; \ |
| 43 | hasnil |= is_##TYPE##_nil(v); \ |
| 44 | } \ |
| 45 | } else { \ |
| 46 | const oid *restrict o = (const oid *) Tloc(l, 0); \ |
| 47 | for (lo = 0, hi = BATcount(l); lo < hi; lo++) { \ |
| 48 | if (is_oid_nil(o[lo])) { \ |
| 49 | assert(nilcheck); \ |
| 50 | bt[lo] = TYPE##_nil; \ |
| 51 | hasnil = true; \ |
| 52 | } else if (o[lo] < rseq || o[lo] >= rend) { \ |
| 53 | GDKerror("BATproject: does not match always\n"); \ |
| 54 | return GDK_FAIL; \ |
| 55 | } else { \ |
| 56 | v = rt[o[lo] - rseq]; \ |
| 57 | bt[lo] = v; \ |
| 58 | hasnil |= is_##TYPE##_nil(v); \ |
| 59 | } \ |
| 60 | } \ |
| 61 | } \ |
| 62 | if (nilcheck && hasnil) { \ |
| 63 | bn->tnonil = false; \ |
| 64 | bn->tnil = true; \ |
| 65 | } \ |
| 66 | BATsetcount(bn, lo); \ |
| 67 | return GDK_SUCCEED; \ |
| 68 | } |
| 69 | |
| 70 | |
| 71 | /* project type switch */ |
| 72 | project_loop(bte) |
| 73 | project_loop(sht) |
| 74 | project_loop(int) |
| 75 | project_loop(flt) |
| 76 | project_loop(dbl) |
| 77 | project_loop(lng) |
| 78 | #ifdef HAVE_HGE |
| 79 | project_loop(hge) |
| 80 | #endif |
| 81 | |
| 82 | static gdk_return |
| 83 | project_void(BAT *bn, BAT *l, struct canditer *restrict ci, BAT *r) |
| 84 | { |
| 85 | BUN lo, hi; |
| 86 | oid *restrict bt; |
| 87 | oid rseq, rend; |
| 88 | |
| 89 | assert(BATtdense(r)); |
| 90 | rseq = r->hseqbase; |
| 91 | rend = rseq + BATcount(r); |
| 92 | bt = (oid *) Tloc(bn, 0); |
| 93 | bn->tsorted = l->tsorted; |
| 94 | bn->trevsorted = l->trevsorted; |
| 95 | bn->tkey = l->tkey; |
| 96 | bn->tnonil = true; |
| 97 | bn->tnil = false; |
| 98 | if (ci) { |
| 99 | for (lo = 0, hi = ci->ncand; lo < hi; lo++) { |
| 100 | oid o = canditer_next(ci); |
| 101 | if (o < rseq || o >= rend) { |
| 102 | GDKerror("BATproject: does not match always\n" ); |
| 103 | return GDK_FAIL; |
| 104 | } |
| 105 | bt[lo] = o - rseq + r->tseqbase; |
| 106 | } |
| 107 | } else { |
| 108 | const oid *o = (const oid *) Tloc(l, 0); |
| 109 | for (lo = 0, hi = BATcount(l); lo < hi; lo++) { |
| 110 | if (o[lo] < rseq || o[lo] >= rend) { |
| 111 | if (is_oid_nil(o[lo])) { |
| 112 | bt[lo] = oid_nil; |
| 113 | bn->tnonil = false; |
| 114 | bn->tnil = true; |
| 115 | } else { |
| 116 | GDKerror("BATproject: does not match always\n" ); |
| 117 | return GDK_FAIL; |
| 118 | } |
| 119 | } else { |
| 120 | bt[lo] = o[lo] - rseq + r->tseqbase; |
| 121 | } |
| 122 | } |
| 123 | } |
| 124 | BATsetcount(bn, lo); |
| 125 | return GDK_SUCCEED; |
| 126 | } |
| 127 | |
| 128 | static gdk_return |
| 129 | project_cand(BAT *bn, BAT *l, struct canditer *restrict lci, BAT *r) |
| 130 | { |
| 131 | BUN lo, hi; |
| 132 | oid *restrict bt; |
| 133 | oid rseq, rend; |
| 134 | struct canditer rci; |
| 135 | |
| 136 | rseq = r->hseqbase; |
| 137 | rend = rseq + BATcount(r); |
| 138 | canditer_init(&rci, NULL, r); |
| 139 | bt = (oid *) Tloc(bn, 0); |
| 140 | bn->tsorted = l->tsorted; |
| 141 | bn->trevsorted = l->trevsorted; |
| 142 | bn->tkey = l->tkey; |
| 143 | bn->tnonil = true; |
| 144 | bn->tnil = false; |
| 145 | if (lci) { |
| 146 | for (lo = 0, hi = lci->ncand; lo < hi; lo++) { |
| 147 | oid o = canditer_next(lci); |
| 148 | if (o < rseq || o >= rend) { |
| 149 | GDKerror("BATproject: does not match always\n" ); |
| 150 | return GDK_FAIL; |
| 151 | } |
| 152 | bt[lo] = canditer_idx(&rci, o - rseq); |
| 153 | } |
| 154 | } else { |
| 155 | const oid *o = (const oid *) Tloc(l, 0); |
| 156 | for (lo = 0, hi = BATcount(l); lo < hi; lo++) { |
| 157 | if (o[lo] < rseq || o[lo] >= rend) { |
| 158 | if (is_oid_nil(o[lo])) { |
| 159 | bt[lo] = oid_nil; |
| 160 | bn->tnonil = false; |
| 161 | bn->tnil = true; |
| 162 | } else { |
| 163 | GDKerror("BATproject: does not match always\n" ); |
| 164 | return GDK_FAIL; |
| 165 | } |
| 166 | } else { |
| 167 | bt[lo] = canditer_idx(&rci, o[lo] - rseq); |
| 168 | } |
| 169 | } |
| 170 | } |
| 171 | BATsetcount(bn, lo); |
| 172 | return GDK_SUCCEED; |
| 173 | } |
| 174 | |
| 175 | static gdk_return |
| 176 | project_any(BAT *bn, BAT *l, struct canditer *restrict ci, BAT *r, bool nilcheck) |
| 177 | { |
| 178 | BUN lo, hi; |
| 179 | BATiter ri; |
| 180 | int (*cmp)(const void *, const void *) = ATOMcompare(r->ttype); |
| 181 | const void *nil = ATOMnilptr(r->ttype); |
| 182 | const void *v; |
| 183 | oid rseq, rend; |
| 184 | |
| 185 | ri = bat_iterator(r); |
| 186 | rseq = r->hseqbase; |
| 187 | rend = rseq + BATcount(r); |
| 188 | if (ci) { |
| 189 | for (lo = 0, hi = ci->ncand; lo < hi; lo++) { |
| 190 | oid o = canditer_next(ci); |
| 191 | if (o < rseq || o >= rend) { |
| 192 | GDKerror("BATproject: does not match always\n" ); |
| 193 | goto bunins_failed; |
| 194 | } |
| 195 | v = BUNtail(ri, o - rseq); |
| 196 | tfastins_nocheck(bn, lo, v, Tsize(bn)); |
| 197 | if (nilcheck && bn->tnonil && cmp(v, nil) == 0) { |
| 198 | bn->tnonil = false; |
| 199 | bn->tnil = true; |
| 200 | } |
| 201 | } |
| 202 | } else { |
| 203 | const oid *restrict o = (const oid *) Tloc(l, 0); |
| 204 | |
| 205 | for (lo = 0, hi = BATcount(l); lo < hi; lo++) { |
| 206 | if (is_oid_nil(o[lo])) { |
| 207 | tfastins_nocheck(bn, lo, nil, Tsize(bn)); |
| 208 | bn->tnonil = false; |
| 209 | bn->tnil = true; |
| 210 | } else if (o[lo] < rseq || o[lo] >= rend) { |
| 211 | GDKerror("BATproject: does not match always\n" ); |
| 212 | goto bunins_failed; |
| 213 | } else { |
| 214 | v = BUNtail(ri, o[lo] - rseq); |
| 215 | tfastins_nocheck(bn, lo, v, Tsize(bn)); |
| 216 | if (nilcheck && bn->tnonil && cmp(v, nil) == 0) { |
| 217 | bn->tnonil = false; |
| 218 | bn->tnil = true; |
| 219 | } |
| 220 | } |
| 221 | } |
| 222 | } |
| 223 | BATsetcount(bn, lo); |
| 224 | bn->theap.dirty = true; |
| 225 | return GDK_SUCCEED; |
| 226 | bunins_failed: |
| 227 | return GDK_FAIL; |
| 228 | } |
| 229 | |
| 230 | BAT * |
| 231 | BATproject(BAT *l, BAT *r) |
| 232 | { |
| 233 | BAT *bn; |
| 234 | oid lo, hi; |
| 235 | gdk_return res; |
| 236 | int tpe = ATOMtype(r->ttype); |
| 237 | bool nilcheck = true, stringtrick = false; |
| 238 | BUN lcount = BATcount(l), rcount = BATcount(r); |
| 239 | struct canditer ci, *lci = NULL; |
| 240 | lng t0 = 0; |
| 241 | |
| 242 | ALGODEBUG t0 = GDKusec(); |
| 243 | |
| 244 | ALGODEBUG fprintf(stderr, "#%s: %s(l=" ALGOBATFMT "," |
| 245 | "r=" ALGOBATFMT ")\n" , |
| 246 | MT_thread_getname(), __func__, |
| 247 | ALGOBATPAR(l), ALGOBATPAR(r)); |
| 248 | |
| 249 | assert(ATOMtype(l->ttype) == TYPE_oid); |
| 250 | |
| 251 | if (BATtdense(l) && lcount > 0) { |
| 252 | lo = l->tseqbase; |
| 253 | hi = l->tseqbase + lcount; |
| 254 | if (lo < r->hseqbase || hi > r->hseqbase + rcount) { |
| 255 | GDKerror("BATproject: does not match always\n" ); |
| 256 | return NULL; |
| 257 | } |
| 258 | bn = BATslice(r, lo - r->hseqbase, hi - r->hseqbase); |
| 259 | BAThseqbase(bn, l->hseqbase); |
| 260 | ALGODEBUG fprintf(stderr, "#%s: %s(l=%s,r=%s)=" ALGOOPTBATFMT " (slice)\n" , |
| 261 | MT_thread_getname(), __func__, |
| 262 | BATgetId(l), BATgetId(r), ALGOOPTBATPAR(bn)); |
| 263 | return bn; |
| 264 | } |
| 265 | if (l->ttype == TYPE_void && l->tvheap != NULL) { |
| 266 | /* l is candidate list with exceptions */ |
| 267 | lcount = canditer_init(&ci, NULL, l); |
| 268 | lci = &ci; |
| 269 | } |
| 270 | /* if l has type void, it is either empty or not dense (i.e. nil) */ |
| 271 | if (lcount == 0 || (l->ttype == TYPE_void && lci == NULL) || |
| 272 | (r->ttype == TYPE_void && is_oid_nil(r->tseqbase))) { |
| 273 | /* trivial: all values are nil (includes no entries at all) */ |
| 274 | const void *nil = ATOMnilptr(r->ttype); |
| 275 | |
| 276 | bn = BATconstant(l->hseqbase, r->ttype == TYPE_oid ? TYPE_void : r->ttype, |
| 277 | nil, lcount, TRANSIENT); |
| 278 | if (bn != NULL && |
| 279 | ATOMtype(bn->ttype) == TYPE_oid && |
| 280 | BATcount(bn) == 0) { |
| 281 | BATtseqbase(bn, 0); |
| 282 | } |
| 283 | ALGODEBUG fprintf(stderr, "#%s: %s(l=%s,r=%s)=" ALGOOPTBATFMT " (constant)\n" , |
| 284 | MT_thread_getname(), __func__, |
| 285 | BATgetId(l), BATgetId(r), ALGOOPTBATPAR(bn)); |
| 286 | return bn; |
| 287 | } |
| 288 | |
| 289 | if (ATOMstorage(tpe) == TYPE_str && |
| 290 | l->tnonil && |
| 291 | (rcount == 0 || |
| 292 | lcount > (rcount >> 3) || |
| 293 | r->batRestricted == BAT_READ)) { |
| 294 | /* insert strings as ints, we need to copy the string |
| 295 | * heap whole sale; we can't do this if there are nils |
| 296 | * in the left column, and we won't do it if the left |
| 297 | * is much smaller than the right and the right is |
| 298 | * writable (meaning we have to actually copy the |
| 299 | * right string heap) */ |
| 300 | tpe = r->twidth == 1 ? TYPE_bte : (r->twidth == 2 ? TYPE_sht : (r->twidth == 4 ? TYPE_int : TYPE_lng)); |
| 301 | /* int's nil representation is a valid offset, so |
| 302 | * don't check for nils */ |
| 303 | nilcheck = false; |
| 304 | stringtrick = true; |
| 305 | } |
| 306 | bn = COLnew(l->hseqbase, tpe, lcount, TRANSIENT); |
| 307 | if (bn == NULL) { |
| 308 | ALGODEBUG fprintf(stderr, "#%s: %s(l=%s,r=%s)=0\n" , |
| 309 | MT_thread_getname(), __func__, |
| 310 | BATgetId(l), BATgetId(r)); |
| 311 | return NULL; |
| 312 | } |
| 313 | if (stringtrick) { |
| 314 | /* "string type" */ |
| 315 | bn->tsorted = false; |
| 316 | bn->trevsorted = false; |
| 317 | bn->tkey = false; |
| 318 | bn->tnonil = false; |
| 319 | } else { |
| 320 | /* be optimistic, we'll clear these if necessary later */ |
| 321 | bn->tnonil = true; |
| 322 | bn->tsorted = true; |
| 323 | bn->trevsorted = true; |
| 324 | bn->tkey = true; |
| 325 | if (l->tnonil && r->tnonil) |
| 326 | nilcheck = false; /* don't bother checking: no nils */ |
| 327 | if (tpe != TYPE_oid && |
| 328 | tpe != ATOMstorage(tpe) && |
| 329 | !ATOMvarsized(tpe) && |
| 330 | ATOMcompare(tpe) == ATOMcompare(ATOMstorage(tpe)) && |
| 331 | (!nilcheck || |
| 332 | ATOMnilptr(tpe) == ATOMnilptr(ATOMstorage(tpe)))) { |
| 333 | /* use base type if we can: |
| 334 | * only fixed sized (no advantage for variable sized), |
| 335 | * compare function identical (for sorted check), |
| 336 | * either no nils, or nil representation identical, |
| 337 | * not oid (separate case for those) */ |
| 338 | tpe = ATOMstorage(tpe); |
| 339 | } |
| 340 | } |
| 341 | bn->tnil = false; |
| 342 | |
| 343 | switch (tpe) { |
| 344 | case TYPE_bte: |
| 345 | res = project_bte(bn, l, lci, r, nilcheck); |
| 346 | break; |
| 347 | case TYPE_sht: |
| 348 | res = project_sht(bn, l, lci, r, nilcheck); |
| 349 | break; |
| 350 | case TYPE_int: |
| 351 | res = project_int(bn, l, lci, r, nilcheck); |
| 352 | break; |
| 353 | case TYPE_flt: |
| 354 | res = project_flt(bn, l, lci, r, nilcheck); |
| 355 | break; |
| 356 | case TYPE_dbl: |
| 357 | res = project_dbl(bn, l, lci, r, nilcheck); |
| 358 | break; |
| 359 | case TYPE_lng: |
| 360 | res = project_lng(bn, l, lci, r, nilcheck); |
| 361 | break; |
| 362 | #ifdef HAVE_HGE |
| 363 | case TYPE_hge: |
| 364 | res = project_hge(bn, l, lci, r, nilcheck); |
| 365 | break; |
| 366 | #endif |
| 367 | case TYPE_oid: |
| 368 | if (BATtdense(r)) { |
| 369 | res = project_void(bn, l, lci, r); |
| 370 | } else if (r->ttype == TYPE_void) { |
| 371 | assert(r->tvheap != NULL); |
| 372 | res = project_cand(bn, l, lci, r); |
| 373 | } else { |
| 374 | #if SIZEOF_OID == SIZEOF_INT |
| 375 | res = project_int(bn, l, lci, r, nilcheck); |
| 376 | #else |
| 377 | res = project_lng(bn, l, lci, r, nilcheck); |
| 378 | #endif |
| 379 | } |
| 380 | break; |
| 381 | default: |
| 382 | res = project_any(bn, l, lci, r, nilcheck); |
| 383 | break; |
| 384 | } |
| 385 | |
| 386 | if (res != GDK_SUCCEED) |
| 387 | goto bailout; |
| 388 | |
| 389 | /* handle string trick */ |
| 390 | if (stringtrick) { |
| 391 | if (r->batRestricted == BAT_READ) { |
| 392 | /* really share string heap */ |
| 393 | assert(r->tvheap->parentid > 0); |
| 394 | BBPshare(r->tvheap->parentid); |
| 395 | bn->tvheap = r->tvheap; |
| 396 | } else { |
| 397 | /* make copy of string heap */ |
| 398 | bn->tvheap = (Heap *) GDKzalloc(sizeof(Heap)); |
| 399 | if (bn->tvheap == NULL) |
| 400 | goto bailout; |
| 401 | bn->tvheap->parentid = bn->batCacheid; |
| 402 | bn->tvheap->farmid = BBPselectfarm(bn->batRole, TYPE_str, varheap); |
| 403 | strconcat_len(bn->tvheap->filename, |
| 404 | sizeof(bn->tvheap->filename), |
| 405 | BBP_physical(bn->batCacheid), ".theap" , |
| 406 | NULL); |
| 407 | if (HEAPcopy(bn->tvheap, r->tvheap) != GDK_SUCCEED) |
| 408 | goto bailout; |
| 409 | } |
| 410 | bn->ttype = r->ttype; |
| 411 | bn->tvarsized = true; |
| 412 | bn->twidth = r->twidth; |
| 413 | bn->tshift = r->tshift; |
| 414 | |
| 415 | bn->tnil = false; /* we don't know */ |
| 416 | } |
| 417 | /* some properties follow from certain combinations of input |
| 418 | * properties */ |
| 419 | if (BATcount(bn) <= 1) { |
| 420 | bn->tkey = true; |
| 421 | bn->tsorted = true; |
| 422 | bn->trevsorted = true; |
| 423 | } else { |
| 424 | bn->tkey = l->tkey && r->tkey; |
| 425 | bn->tsorted = (l->tsorted & r->tsorted) | (l->trevsorted & r->trevsorted); |
| 426 | bn->trevsorted = (l->tsorted & r->trevsorted) | (l->trevsorted & r->tsorted); |
| 427 | } |
| 428 | bn->tnonil |= l->tnonil & r->tnonil; |
| 429 | |
| 430 | if (!BATtdense(r)) |
| 431 | BATtseqbase(bn, oid_nil); |
| 432 | ALGODEBUG fprintf(stderr, "#%s: %s(l=%s,r=%s)=" ALGOBATFMT "%s " LLFMT "us\n" , |
| 433 | MT_thread_getname(), __func__, |
| 434 | BATgetId(l), BATgetId(r), ALGOBATPAR(bn), |
| 435 | bn->ttype == TYPE_str && bn->tvheap == r->tvheap ? " shared string heap" : "" , |
| 436 | GDKusec() - t0); |
| 437 | return bn; |
| 438 | |
| 439 | bailout: |
| 440 | BBPreclaim(bn); |
| 441 | return NULL; |
| 442 | } |
| 443 | |
| 444 | /* Calculate a chain of BATproject calls. |
| 445 | * The argument is a NULL-terminated array of BAT pointers. |
| 446 | * This function is equivalent (apart from reference counting) to a |
| 447 | * sequence of calls |
| 448 | * bn = BATproject(bats[0], bats[1]); |
| 449 | * bn = BATproject(bn, bats[2]); |
| 450 | * ... |
| 451 | * bn = BATproject(bn, bats[n-1]); |
| 452 | * return bn; |
| 453 | * where none of the intermediates are actually produced (and bats[n]==NULL). |
| 454 | * Note that all BATs except the last must have type oid/void. |
| 455 | */ |
| 456 | BAT * |
| 457 | BATprojectchain(BAT **bats) |
| 458 | { |
| 459 | struct ba { |
| 460 | BAT *b; |
| 461 | oid hlo; |
| 462 | BUN cnt; |
| 463 | oid *t; |
| 464 | struct canditer ci; /* used if .ci.s != NULL */ |
| 465 | } *ba; |
| 466 | int n; |
| 467 | BAT *b = NULL, *bn; |
| 468 | bool allnil = false; |
| 469 | bool issorted = true; |
| 470 | bool nonil = true; |
| 471 | bool stringtrick = false; |
| 472 | const void *nil; |
| 473 | int tpe; |
| 474 | lng t0 = 0; |
| 475 | |
| 476 | ALGODEBUG t0 = GDKusec(); |
| 477 | |
| 478 | /* count number of participating BATs and allocate some |
| 479 | * temporary work space */ |
| 480 | for (n = 0; bats[n]; n++) { |
| 481 | b = bats[n]; |
| 482 | ALGODEBUG fprintf(stderr, "#%s: %s arg %d: " ALGOBATFMT "\n" , |
| 483 | MT_thread_getname(), __func__, n + 1, |
| 484 | ALGOBATPAR(b)); |
| 485 | } |
| 486 | if (n == 0) { |
| 487 | GDKerror("%s: must have BAT arguments\n" , __func__); |
| 488 | return NULL; |
| 489 | } |
| 490 | if (n == 1) { |
| 491 | bn = COLcopy(b, b->ttype, true, TRANSIENT); |
| 492 | ALGODEBUG fprintf(stderr, "#%s: %s with 1 bat: copy: " |
| 493 | ALGOOPTBATFMT " (" LLFMT " usec)\n" , |
| 494 | MT_thread_getname(), __func__, |
| 495 | ALGOOPTBATPAR(bn), GDKusec() - t0); |
| 496 | return bn; |
| 497 | } |
| 498 | |
| 499 | ba = GDKmalloc(sizeof(*ba) * n); |
| 500 | if (ba == NULL) |
| 501 | return NULL; |
| 502 | |
| 503 | for (n = 0; bats[n]; n++) { |
| 504 | b = bats[n]; |
| 505 | ba[n] = (struct ba) { |
| 506 | .b = b, |
| 507 | .hlo = b->hseqbase, |
| 508 | .cnt = b->batCount, |
| 509 | .t = (oid *) b->theap.base, |
| 510 | }; |
| 511 | allnil |= b->ttype == TYPE_void && is_oid_nil(b->tseqbase); |
| 512 | issorted &= b->tsorted; |
| 513 | nonil &= b->tnonil; |
| 514 | if (b->tnonil && b->tkey && b->tsorted && |
| 515 | ATOMtype(b->ttype) == TYPE_oid) { |
| 516 | canditer_init(&ba[n].ci, NULL, b); |
| 517 | } |
| 518 | } |
| 519 | /* b is last BAT in bats array */ |
| 520 | tpe = ATOMtype(b->ttype); |
| 521 | nil = ATOMnilptr(tpe); |
| 522 | if (allnil || ba[0].cnt == 0) { |
| 523 | bn = BATconstant(ba[0].hlo, tpe == TYPE_oid ? TYPE_void : tpe, |
| 524 | nil, ba[0].cnt, TRANSIENT); |
| 525 | GDKfree(ba); |
| 526 | ALGODEBUG fprintf(stderr, "#%s: %s with %d bats: nil/empty: " |
| 527 | ALGOOPTBATFMT " (" LLFMT " usec)\n" , |
| 528 | MT_thread_getname(), __func__, n, |
| 529 | ALGOOPTBATPAR(bn), GDKusec() - t0); |
| 530 | return bn; |
| 531 | } |
| 532 | |
| 533 | if (nonil && ATOMstorage(tpe) == TYPE_str && b->batRestricted == BAT_READ) { |
| 534 | stringtrick = true; |
| 535 | tpe = b->twidth == 1 ? TYPE_bte : (b->twidth == 2 ? TYPE_sht : (b->twidth == 4 ? TYPE_int : TYPE_lng)); |
| 536 | } |
| 537 | |
| 538 | bn = COLnew(ba[0].hlo, tpe, ba[0].cnt, TRANSIENT); |
| 539 | if (bn == NULL) { |
| 540 | GDKfree(ba); |
| 541 | return NULL; |
| 542 | } |
| 543 | |
| 544 | if (ATOMtype(b->ttype) == TYPE_oid) { |
| 545 | /* oid all the way */ |
| 546 | oid *d = (oid *) Tloc(bn, 0); |
| 547 | assert(!stringtrick); |
| 548 | for (BUN p = 0; p < ba[0].cnt; p++) { |
| 549 | oid o = ba[0].ci.s ? canditer_next(&ba[0].ci) : ba[0].t[p]; |
| 550 | for (int i = 1; i < n; i++) { |
| 551 | if (is_oid_nil(o)) { |
| 552 | bn->tnil = true; |
| 553 | break; |
| 554 | } |
| 555 | if (o < ba[i].hlo || o >= ba[i].hlo + ba[i].cnt) { |
| 556 | GDKerror("%s: does not match always\n" , |
| 557 | __func__); |
| 558 | goto bunins_failed; |
| 559 | } |
| 560 | o -= ba[i].hlo; |
| 561 | o = ba[i].ci.s ? canditer_idx(&ba[i].ci, o) : ba[i].t[o]; |
| 562 | } |
| 563 | bunfastappTYPE(oid, bn, &o); |
| 564 | ATOMputFIX(bn->ttype, d, &o); |
| 565 | d++; |
| 566 | } |
| 567 | } else if (!ATOMvarsized(tpe)) { |
| 568 | const void *v; |
| 569 | char *d = Tloc(bn, 0); |
| 570 | |
| 571 | bn->tnil = false; |
| 572 | n--; /* stop one before the end, also ba[n] is last */ |
| 573 | for (BUN p = 0; p < ba[0].cnt; p++) { |
| 574 | oid o = ba[0].ci.s ? canditer_next(&ba[0].ci) : ba[0].t[p]; |
| 575 | |
| 576 | for (int i = 1; i < n; i++) { |
| 577 | if (is_oid_nil(o)) { |
| 578 | bn->tnil = true; |
| 579 | break; |
| 580 | } |
| 581 | if (o < ba[i].hlo || o >= ba[i].hlo + ba[i].cnt) { |
| 582 | GDKerror("%s: does not match always\n" , |
| 583 | __func__); |
| 584 | goto bunins_failed; |
| 585 | } |
| 586 | o -= ba[i].hlo; |
| 587 | o = ba[i].ci.s ? canditer_idx(&ba[i].ci, o) : ba[i].t[o]; |
| 588 | } |
| 589 | if (is_oid_nil(o)) { |
| 590 | assert(!stringtrick); |
| 591 | bn->tnil = true; |
| 592 | v = nil; |
| 593 | } else if (o < ba[n].hlo || o >= ba[n].hlo + ba[n].cnt) { |
| 594 | GDKerror("%s: does not match always\n" , |
| 595 | __func__); |
| 596 | goto bunins_failed; |
| 597 | } else { |
| 598 | o -= ba[n].hlo; |
| 599 | v = Tloc(b, o); |
| 600 | } |
| 601 | ATOMputFIX(tpe, d, v); |
| 602 | d += b->twidth; |
| 603 | } |
| 604 | if (stringtrick) { |
| 605 | bn->tnil = false; |
| 606 | bn->tnonil = nonil; |
| 607 | bn->tkey = false; |
| 608 | BBPshare(b->tvheap->parentid); |
| 609 | bn->tvheap = b->tvheap; |
| 610 | bn->ttype = b->ttype; |
| 611 | bn->tvarsized = true; |
| 612 | assert(bn->twidth == b->twidth); |
| 613 | assert(bn->tshift == b->tshift); |
| 614 | } |
| 615 | n++; /* undo for debug print */ |
| 616 | } else { |
| 617 | BATiter bi = bat_iterator(b); |
| 618 | const void *v; |
| 619 | |
| 620 | assert(!stringtrick); |
| 621 | bn->tnil = false; |
| 622 | n--; /* stop one before the end, also ba[n] is last */ |
| 623 | for (BUN p = 0; p < ba[0].cnt; p++) { |
| 624 | oid o = ba[0].ci.s ? canditer_next(&ba[0].ci) : ba[0].t[p]; |
| 625 | for (int i = 1; i < n; i++) { |
| 626 | if (is_oid_nil(o)) { |
| 627 | bn->tnil = true; |
| 628 | break; |
| 629 | } |
| 630 | if (o < ba[i].hlo || o >= ba[i].hlo + ba[i].cnt) { |
| 631 | GDKerror("%s: does not match always\n" , |
| 632 | __func__); |
| 633 | goto bunins_failed; |
| 634 | } |
| 635 | o -= ba[i].hlo; |
| 636 | o = ba[i].ci.s ? canditer_idx(&ba[i].ci, o) : ba[i].t[o]; |
| 637 | } |
| 638 | if (is_oid_nil(o)) { |
| 639 | bn->tnil = true; |
| 640 | v = nil; |
| 641 | } else if (o < ba[n].hlo || o >= ba[n].hlo + ba[n].cnt) { |
| 642 | GDKerror("%s: does not match always\n" , |
| 643 | __func__); |
| 644 | goto bunins_failed; |
| 645 | } else { |
| 646 | o -= ba[n].hlo; |
| 647 | v = BUNtail(bi, o); |
| 648 | } |
| 649 | bunfastapp(bn, v); |
| 650 | } |
| 651 | n++; /* undo for debug print */ |
| 652 | } |
| 653 | BATsetcount(bn, ba[0].cnt); |
| 654 | bn->tsorted = (ba[0].cnt <= 1) | issorted; |
| 655 | bn->trevsorted = ba[0].cnt <= 1; |
| 656 | bn->tnonil = nonil; |
| 657 | bn->tseqbase = oid_nil; |
| 658 | GDKfree(ba); |
| 659 | ALGODEBUG fprintf(stderr, "#%s: %s with %d bats: " |
| 660 | ALGOOPTBATFMT " (" LLFMT " usec)\n" , |
| 661 | MT_thread_getname(), __func__, n, |
| 662 | ALGOOPTBATPAR(bn), GDKusec() - t0); |
| 663 | return bn; |
| 664 | |
| 665 | bunins_failed: |
| 666 | GDKfree(ba); |
| 667 | BBPreclaim(bn); |
| 668 | ALGODEBUG fprintf(stderr, "#%s: %s failed\n" , |
| 669 | MT_thread_getname(), __func__); |
| 670 | return NULL; |
| 671 | } |
| 672 | |