1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | #include "monetdb_config.h" |
10 | #include "gdk.h" |
11 | #include "gdk_private.h" |
12 | #include <math.h> |
13 | |
14 | /* auxiliary functions and structs for imprints */ |
15 | #include "gdk_imprints.h" |
16 | |
17 | #define buninsfix(B,A,I,V,G,M,R) \ |
18 | do { \ |
19 | if ((I) == BATcapacity(B)) { \ |
20 | BATsetcount((B), (I)); \ |
21 | if (BATextend((B), \ |
22 | MIN(BATcapacity(B) + (G), \ |
23 | (M))) != GDK_SUCCEED) { \ |
24 | BBPreclaim(B); \ |
25 | return (R); \ |
26 | } \ |
27 | A = (oid *) Tloc((B), 0); \ |
28 | } \ |
29 | A[(I)] = (V); \ |
30 | } while (false) |
31 | |
32 | BAT * |
33 | virtualize(BAT *bn) |
34 | { |
35 | /* input must be a valid candidate list or NULL */ |
36 | assert(bn == NULL || |
37 | (((bn->ttype == TYPE_void && !is_oid_nil(bn->tseqbase)) || |
38 | bn->ttype == TYPE_oid) && |
39 | bn->tkey && bn->tsorted)); |
40 | /* since bn has unique and strictly ascending values, we can |
41 | * easily check whether the column is dense */ |
42 | if (bn && bn->ttype == TYPE_oid && |
43 | (BATcount(bn) <= 1 || |
44 | * (const oid *) Tloc(bn, 0) + BATcount(bn) - 1 == |
45 | * (const oid *) Tloc(bn, BUNlast(bn) - 1))) { |
46 | /* column is dense, replace by virtual oid */ |
47 | ALGODEBUG fprintf(stderr, "#%s: %s(bn=" ALGOBATFMT ",seq=" OIDFMT")\n" , |
48 | MT_thread_getname(), __func__, |
49 | ALGOBATPAR(bn), |
50 | BATcount(bn) > 0 ? * (const oid *) Tloc(bn, 0) : 0); |
51 | if (BATcount(bn) == 0) |
52 | bn->tseqbase = 0; |
53 | else |
54 | bn->tseqbase = * (const oid *) Tloc(bn, 0); |
55 | if (VIEWtparent(bn)) { |
56 | BBPunshare(VIEWtparent(bn)); |
57 | BBPunfix(VIEWtparent(bn)); |
58 | bn->theap.parentid = 0; |
59 | bn->theap.base = NULL; |
60 | } else { |
61 | HEAPfree(&bn->theap, true); |
62 | } |
63 | bn->theap.storage = bn->theap.newstorage = STORE_MEM; |
64 | bn->theap.size = 0; |
65 | bn->ttype = TYPE_void; |
66 | bn->tvarsized = true; |
67 | bn->twidth = 0; |
68 | bn->tshift = 0; |
69 | } |
70 | |
71 | return bn; |
72 | } |
73 | |
74 | #define HASHloop_bound(bi, h, hb, v, lo, hi) \ |
75 | for (hb = HASHget(h, HASHprobe((h), v)); \ |
76 | hb != HASHnil(h); \ |
77 | hb = HASHgetlink(h,hb)) \ |
78 | if (hb >= (lo) && hb < (hi) && \ |
79 | (cmp == NULL || \ |
80 | (*cmp)(v, BUNtail(bi, hb)) == 0)) |
81 | |
82 | static BAT * |
83 | hashselect(BAT *b, struct canditer *restrict ci, BAT *bn, |
84 | const void *tl, BUN maximum, bool phash, const char **algo) |
85 | { |
86 | BATiter bi; |
87 | BUN i, cnt; |
88 | oid o, *restrict dst; |
89 | BUN l, h, d = 0; |
90 | oid seq; |
91 | int (*cmp)(const void *, const void *); |
92 | |
93 | assert(bn->ttype == TYPE_oid); |
94 | seq = b->hseqbase; |
95 | l = ci->seq - seq; |
96 | h = canditer_last(ci) + 1 - seq; |
97 | |
98 | *algo = "hashselect" ; |
99 | if (phash) { |
100 | BAT *b2 = BBPdescriptor(VIEWtparent(b)); |
101 | *algo = "hashselect on parent" ; |
102 | ALGODEBUG fprintf(stderr, "#%s: %s(" ALGOBATFMT "): " |
103 | "using parent(" ALGOBATFMT ") " |
104 | "for hash\n" , |
105 | MT_thread_getname(), __func__, |
106 | ALGOBATPAR(b), |
107 | ALGOBATPAR(b2)); |
108 | d = (BUN) ((b->theap.base - b2->theap.base) >> b->tshift); |
109 | l += d; |
110 | h += d; |
111 | b = b2; |
112 | } |
113 | |
114 | if (BAThash(b) != GDK_SUCCEED) { |
115 | BBPreclaim(bn); |
116 | return NULL; |
117 | } |
118 | switch (ATOMbasetype(b->ttype)) { |
119 | case TYPE_bte: |
120 | case TYPE_sht: |
121 | cmp = NULL; /* no need to compare: "hash" is perfect */ |
122 | break; |
123 | default: |
124 | cmp = ATOMcompare(b->ttype); |
125 | break; |
126 | } |
127 | bi = bat_iterator(b); |
128 | dst = (oid *) Tloc(bn, 0); |
129 | cnt = 0; |
130 | if (ci->tpe != cand_dense) { |
131 | HASHloop_bound(bi, b->thash, i, tl, l, h) { |
132 | o = (oid) (i + seq - d); |
133 | if (canditer_search(ci, o, false) != BUN_NONE) { |
134 | buninsfix(bn, dst, cnt, o, |
135 | maximum - BATcapacity(bn), |
136 | maximum, NULL); |
137 | cnt++; |
138 | } |
139 | } |
140 | } else { |
141 | HASHloop_bound(bi, b->thash, i, tl, l, h) { |
142 | o = (oid) (i + seq - d); |
143 | buninsfix(bn, dst, cnt, o, |
144 | maximum - BATcapacity(bn), |
145 | maximum, NULL); |
146 | cnt++; |
147 | } |
148 | } |
149 | BATsetcount(bn, cnt); |
150 | bn->tkey = true; |
151 | if (cnt > 1) { |
152 | /* hash chains produce results in the order high to |
153 | * low, so we just need to reverse */ |
154 | for (l = 0, h = BUNlast(bn) - 1; l < h; l++, h--) { |
155 | o = dst[l]; |
156 | dst[l] = dst[h]; |
157 | dst[h] = o; |
158 | } |
159 | } |
160 | bn->tsorted = true; |
161 | bn->trevsorted = bn->batCount <= 1; |
162 | bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? *dst : oid_nil; |
163 | return bn; |
164 | } |
165 | |
166 | /* Imprints select code */ |
167 | |
168 | /* inner check */ |
169 | #define impscheck(canditer_next,TEST,ADD) \ |
170 | do { \ |
171 | const oid |
---|