| 1 | /* |
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public |
| 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 5 | * |
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
| 7 | */ |
| 8 | |
| 9 | #include "monetdb_config.h" |
| 10 | #include "gdk.h" |
| 11 | #include "gdk_private.h" |
| 12 | #include <math.h> |
| 13 | |
| 14 | /* auxiliary functions and structs for imprints */ |
| 15 | #include "gdk_imprints.h" |
| 16 | |
| 17 | #define buninsfix(B,A,I,V,G,M,R) \ |
| 18 | do { \ |
| 19 | if ((I) == BATcapacity(B)) { \ |
| 20 | BATsetcount((B), (I)); \ |
| 21 | if (BATextend((B), \ |
| 22 | MIN(BATcapacity(B) + (G), \ |
| 23 | (M))) != GDK_SUCCEED) { \ |
| 24 | BBPreclaim(B); \ |
| 25 | return (R); \ |
| 26 | } \ |
| 27 | A = (oid *) Tloc((B), 0); \ |
| 28 | } \ |
| 29 | A[(I)] = (V); \ |
| 30 | } while (false) |
| 31 | |
| 32 | BAT * |
| 33 | virtualize(BAT *bn) |
| 34 | { |
| 35 | /* input must be a valid candidate list or NULL */ |
| 36 | assert(bn == NULL || |
| 37 | (((bn->ttype == TYPE_void && !is_oid_nil(bn->tseqbase)) || |
| 38 | bn->ttype == TYPE_oid) && |
| 39 | bn->tkey && bn->tsorted)); |
| 40 | /* since bn has unique and strictly ascending values, we can |
| 41 | * easily check whether the column is dense */ |
| 42 | if (bn && bn->ttype == TYPE_oid && |
| 43 | (BATcount(bn) <= 1 || |
| 44 | * (const oid *) Tloc(bn, 0) + BATcount(bn) - 1 == |
| 45 | * (const oid *) Tloc(bn, BUNlast(bn) - 1))) { |
| 46 | /* column is dense, replace by virtual oid */ |
| 47 | ALGODEBUG fprintf(stderr, "#%s: %s(bn=" ALGOBATFMT ",seq=" OIDFMT")\n" , |
| 48 | MT_thread_getname(), __func__, |
| 49 | ALGOBATPAR(bn), |
| 50 | BATcount(bn) > 0 ? * (const oid *) Tloc(bn, 0) : 0); |
| 51 | if (BATcount(bn) == 0) |
| 52 | bn->tseqbase = 0; |
| 53 | else |
| 54 | bn->tseqbase = * (const oid *) Tloc(bn, 0); |
| 55 | if (VIEWtparent(bn)) { |
| 56 | BBPunshare(VIEWtparent(bn)); |
| 57 | BBPunfix(VIEWtparent(bn)); |
| 58 | bn->theap.parentid = 0; |
| 59 | bn->theap.base = NULL; |
| 60 | } else { |
| 61 | HEAPfree(&bn->theap, true); |
| 62 | } |
| 63 | bn->theap.storage = bn->theap.newstorage = STORE_MEM; |
| 64 | bn->theap.size = 0; |
| 65 | bn->ttype = TYPE_void; |
| 66 | bn->tvarsized = true; |
| 67 | bn->twidth = 0; |
| 68 | bn->tshift = 0; |
| 69 | } |
| 70 | |
| 71 | return bn; |
| 72 | } |
| 73 | |
| 74 | #define HASHloop_bound(bi, h, hb, v, lo, hi) \ |
| 75 | for (hb = HASHget(h, HASHprobe((h), v)); \ |
| 76 | hb != HASHnil(h); \ |
| 77 | hb = HASHgetlink(h,hb)) \ |
| 78 | if (hb >= (lo) && hb < (hi) && \ |
| 79 | (cmp == NULL || \ |
| 80 | (*cmp)(v, BUNtail(bi, hb)) == 0)) |
| 81 | |
| 82 | static BAT * |
| 83 | hashselect(BAT *b, struct canditer *restrict ci, BAT *bn, |
| 84 | const void *tl, BUN maximum, bool phash, const char **algo) |
| 85 | { |
| 86 | BATiter bi; |
| 87 | BUN i, cnt; |
| 88 | oid o, *restrict dst; |
| 89 | BUN l, h, d = 0; |
| 90 | oid seq; |
| 91 | int (*cmp)(const void *, const void *); |
| 92 | |
| 93 | assert(bn->ttype == TYPE_oid); |
| 94 | seq = b->hseqbase; |
| 95 | l = ci->seq - seq; |
| 96 | h = canditer_last(ci) + 1 - seq; |
| 97 | |
| 98 | *algo = "hashselect" ; |
| 99 | if (phash) { |
| 100 | BAT *b2 = BBPdescriptor(VIEWtparent(b)); |
| 101 | *algo = "hashselect on parent" ; |
| 102 | ALGODEBUG fprintf(stderr, "#%s: %s(" ALGOBATFMT "): " |
| 103 | "using parent(" ALGOBATFMT ") " |
| 104 | "for hash\n" , |
| 105 | MT_thread_getname(), __func__, |
| 106 | ALGOBATPAR(b), |
| 107 | ALGOBATPAR(b2)); |
| 108 | d = (BUN) ((b->theap.base - b2->theap.base) >> b->tshift); |
| 109 | l += d; |
| 110 | h += d; |
| 111 | b = b2; |
| 112 | } |
| 113 | |
| 114 | if (BAThash(b) != GDK_SUCCEED) { |
| 115 | BBPreclaim(bn); |
| 116 | return NULL; |
| 117 | } |
| 118 | switch (ATOMbasetype(b->ttype)) { |
| 119 | case TYPE_bte: |
| 120 | case TYPE_sht: |
| 121 | cmp = NULL; /* no need to compare: "hash" is perfect */ |
| 122 | break; |
| 123 | default: |
| 124 | cmp = ATOMcompare(b->ttype); |
| 125 | break; |
| 126 | } |
| 127 | bi = bat_iterator(b); |
| 128 | dst = (oid *) Tloc(bn, 0); |
| 129 | cnt = 0; |
| 130 | if (ci->tpe != cand_dense) { |
| 131 | HASHloop_bound(bi, b->thash, i, tl, l, h) { |
| 132 | o = (oid) (i + seq - d); |
| 133 | if (canditer_search(ci, o, false) != BUN_NONE) { |
| 134 | buninsfix(bn, dst, cnt, o, |
| 135 | maximum - BATcapacity(bn), |
| 136 | maximum, NULL); |
| 137 | cnt++; |
| 138 | } |
| 139 | } |
| 140 | } else { |
| 141 | HASHloop_bound(bi, b->thash, i, tl, l, h) { |
| 142 | o = (oid) (i + seq - d); |
| 143 | buninsfix(bn, dst, cnt, o, |
| 144 | maximum - BATcapacity(bn), |
| 145 | maximum, NULL); |
| 146 | cnt++; |
| 147 | } |
| 148 | } |
| 149 | BATsetcount(bn, cnt); |
| 150 | bn->tkey = true; |
| 151 | if (cnt > 1) { |
| 152 | /* hash chains produce results in the order high to |
| 153 | * low, so we just need to reverse */ |
| 154 | for (l = 0, h = BUNlast(bn) - 1; l < h; l++, h--) { |
| 155 | o = dst[l]; |
| 156 | dst[l] = dst[h]; |
| 157 | dst[h] = o; |
| 158 | } |
| 159 | } |
| 160 | bn->tsorted = true; |
| 161 | bn->trevsorted = bn->batCount <= 1; |
| 162 | bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? *dst : oid_nil; |
| 163 | return bn; |
| 164 | } |
| 165 | |
| 166 | /* Imprints select code */ |
| 167 | |
| 168 | /* inner check */ |
| 169 | #define impscheck(canditer_next,TEST,ADD) \ |
| 170 | do { \ |
| 171 | const oid |
|---|