1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/*
10 * @a Peter Boncz, Niels Nes
11 * @* BAT Alignment
12 * For BATs that result from a n-ary relational scheme it may help to
13 * align the BATs on their head value. In particular, it permits
14 * replacing a hash-join by a merge-join, which is significantly
15 * faster on large tables. Especially if the BATs involved cause page
16 * activity or when you can not afford the large hash structures to
17 * speed-up processing.
18 *
19 * For orthogonality, we support alignment between arbitrary columns
20 * (head or tail).
21 *
22 * All standard GDK set-calls update the alignment info in their
23 * respective ways. For example, the routine @emph{BUNclustercopy}
24 * shuffles the first argument, such that the BUNs are in the same
25 * order as those in the second argument. This operation will mark
26 * both columns of the first @emph{BAT} as synced with the second
27 * (likewise, @emph{Colcopy()}, which makes a copy, instead of
28 * in-place shuffling, has the same alignment effect.
29 *
30 * Each alignment sequence is given a unique identifier, so as to
31 * easily detect this situation. It is retained in the @emph{BAT
32 * descriptor}.
33 * @+ Alignment Design Considerations
34 * Alignment primitives require the right hooks to be inserted in
35 * several places in the GDK, apart form this file:
36 * @itemize
37 * @item @emph{ BUN update operations}.
38 * The updated BATs have to be marked as un-aligned.
39 * @item @emph{ set operations}.
40 * For most relational operations some statements can be made about
41 * the size and order of the BATs they produce. This information can
42 * be formalized by indicating alignment information automatically.
43 * @item @emph{ transaction operations}.
44 * Alignment statuses must be kept consistent under database commits
45 * and aborts.
46 * @end itemize
47 */
48#include "monetdb_config.h"
49#include "gdk.h"
50#include "gdk_private.h"
51
52/* Return TRUE if the two BATs are aligned (same size, same
53 * hseqbase). */
54int
55ALIGNsynced(BAT *b1, BAT *b2)
56{
57 if (b1 == NULL || b2 == NULL)
58 return 0;
59
60 assert(!is_oid_nil(b1->hseqbase));
61 assert(!is_oid_nil(b2->hseqbase));
62
63 return BATcount(b1) == BATcount(b2) && b1->hseqbase == b2->hseqbase;
64}
65
66/*
67 * @+ View BATS
68 * The general routine for getting a 'view' BAT upon another BAT is
69 * @emph{VIEWcreate}. On this @emph{#read-only} BAT (there is kernel
70 * support for this), you can then make vertical slices.
71 *
72 * It is possible to create a view on a writable BAT. Updates in the
73 * parent are then automatically reflected in the VIEW. Note that the
74 * VIEW bat itself can never be modified.
75 *
76 * Horizontal views should only be given out on a view BAT, but only
77 * if it is dead sure the parent BAT is read-only. This because they
78 * cannot physically share the batBuns heap with the parent, as they
79 * need a modified version.
80 */
81BAT *
82VIEWcreate(oid seq, BAT *b)
83{
84 BAT *bn;
85 bat tp = 0;
86
87 BATcheck(b, "VIEWcreate", NULL);
88
89 bn = BATcreatedesc(seq, b->ttype, false, TRANSIENT);
90 if (bn == NULL)
91 return NULL;
92
93 tp = VIEWtparent(b);
94 if ((tp == 0 && b->ttype != TYPE_void) || b->theap.copied)
95 tp = b->batCacheid;
96 assert(b->ttype != TYPE_void || !tp);
97 /* the T column descriptor is fully copied. We need copies
98 * because in case of a mark, we are going to override a
99 * column with a void. Take care to zero the accelerator data,
100 * though. */
101 bn->batInserted = b->batInserted;
102 bn->batCount = b->batCount;
103 bn->batCapacity = b->batCapacity;
104 bn->T = b->T;
105
106 if (tp)
107 BBPshare(tp);
108 if (bn->tvheap) {
109 assert(b->tvheap);
110 assert(bn->tvheap->parentid > 0);
111 BBPshare(bn->tvheap->parentid);
112 }
113
114 /* note: theap points into bn which was just overwritten
115 * with a copy from the parent. Clear the copied flag since
116 * our heap was not copied from our parent(s) even if our
117 * parent's heap was copied from its parent. */
118 bn->theap.copied = false;
119 bn->tprops = NULL;
120
121 /* correct values after copy of head and tail info */
122 if (tp)
123 bn->theap.parentid = tp;
124 BATinit_idents(bn);
125 bn->batRestricted = BAT_READ;
126 bn->thash = NULL;
127 /* imprints are shared, but the check is dynamic */
128 bn->timprints = NULL;
129 /* Order OID index */
130 bn->torderidx = NULL;
131 if (BBPcacheit(bn, true) != GDK_SUCCEED) { /* enter in BBP */
132 if (tp)
133 BBPunshare(tp);
134 if (bn->tvheap)
135 BBPunshare(bn->tvheap->parentid);
136 MT_lock_destroy(&bn->batIdxLock);
137 GDKfree(bn);
138 return NULL;
139 }
140 ALGODEBUG fprintf(stderr, "#VIEWcreate(" ALGOBATFMT ")=" ALGOBATFMT "\n", ALGOBATPAR(b), ALGOBATPAR(bn));
141 return bn;
142}
143
144/*
145 * The BATmaterialize routine produces in-place materialized version
146 * of a void bat (which should not be a VIEW) (later we should add the
147 * code for VIEWs).
148 */
149
150gdk_return
151BATmaterialize(BAT *b)
152{
153 int tt;
154 BUN cnt;
155 Heap tail;
156 BUN p, q;
157 oid t, *x;
158
159 BATcheck(b, "BATmaterialize", GDK_FAIL);
160 assert(!isVIEW(b));
161 tt = b->ttype;
162 cnt = BATcapacity(b);
163 tail = b->theap;
164 p = 0;
165 q = BUNlast(b);
166 assert(cnt >= q - p);
167 ALGODEBUG fprintf(stderr, "#BATmaterialize(" ALGOBATFMT ")\n",
168 ALGOBATPAR(b));
169
170 if (tt != TYPE_void) {
171 /* no voids */
172 return GDK_SUCCEED;
173 }
174 tt = TYPE_oid;
175
176 /* cleanup possible ACC's */
177 HASHdestroy(b);
178 IMPSdestroy(b);
179 OIDXdestroy(b);
180
181 strconcat_len(b->theap.filename, sizeof(b->theap.filename),
182 BBP_physical(b->batCacheid), ".tail", NULL);
183 if (HEAPalloc(&b->theap, cnt, sizeof(oid)) != GDK_SUCCEED) {
184 b->theap = tail;
185 return GDK_FAIL;
186 }
187
188 /* point of no return */
189 b->ttype = tt;
190 BATsetdims(b);
191 b->batDirtydesc = true;
192 b->theap.dirty = true;
193
194 /* So now generate [t..t+cnt-1] */
195 t = b->tseqbase;
196 x = (oid *) b->theap.base;
197 if (is_oid_nil(t)) {
198 while (p < q)
199 x[p++] = oid_nil;
200 } else if (b->tvheap) {
201 assert(b->batRole == TRANSIENT);
202 assert(b->tvheap->free % SIZEOF_OID == 0);
203 BUN nexc = (BUN) (b->tvheap->free / SIZEOF_OID);
204 const oid *exc = (const oid *) b->tvheap->base;
205 BUN i = 0;
206 while (p < q) {
207 while (i < nexc && t == exc[i]) {
208 i++;
209 t++;
210 }
211 x[p++] = t++;
212 }
213 b->tseqbase = oid_nil;
214 HEAPfree(b->tvheap, true);
215 b->tvheap = NULL;
216 } else {
217 while (p < q)
218 x[p++] = t++;
219 }
220 BATsetcount(b, b->batCount);
221
222 /* cleanup the old heaps */
223 HEAPfree(&tail, false);
224 return GDK_SUCCEED;
225}
226
227/*
228 * The @#VIEWunlink@ routine cuts a reference to the parent. Part of the view
229 * destroy sequence.
230 */
231static void
232VIEWunlink(BAT *b)
233{
234 if (b) {
235 bat tp = VIEWtparent(b);
236 bat vtp = VIEWvtparent(b);
237 BAT *tpb = NULL;
238 BAT *vtpb = NULL;
239
240 assert(b->batCacheid > 0);
241 if (tp)
242 tpb = BBP_cache(tp);
243 if (tp && !vtp)
244 vtp = tp;
245 if (vtp)
246 vtpb = BBP_cache(vtp);
247
248 if (tpb == NULL && vtpb == NULL)
249 return;
250
251 /* unlink heaps shared with parent */
252 assert(b->tvheap == NULL || b->tvheap->parentid > 0);
253 if (b->tvheap && b->tvheap->parentid != b->batCacheid)
254 b->tvheap = NULL;
255
256 /* unlink properties shared with parent */
257 if (tpb && b->tprops && b->tprops == tpb->tprops)
258 b->tprops = NULL;
259
260 /* unlink imprints shared with parent */
261 if (tpb && b->timprints && b->timprints == tpb->timprints)
262 b->timprints = NULL;
263 }
264}
265
266/*
267 * Materialize a view into a normal BAT. If it is a slice, we really
268 * want to reduce storage of the new BAT.
269 */
270gdk_return
271VIEWreset(BAT *b)
272{
273 bat tp, tvp;
274 Heap tail, *th = NULL;
275 BAT *v = NULL;
276
277 if (b == NULL)
278 return GDK_FAIL;
279 assert(b->batCacheid > 0);
280 tp = VIEWtparent(b);
281 tvp = VIEWvtparent(b);
282 if (tp || tvp) {
283 BUN cnt;
284 const char *nme;
285
286 /* alloc heaps */
287 tail = (Heap) {0};
288
289 cnt = BATcount(b) + 1;
290 nme = BBP_physical(b->batCacheid);
291
292 assert(b->batCacheid > 0);
293 assert(tp || tvp || !b->ttype);
294
295 tail.farmid = BBPselectfarm(b->batRole, b->ttype, offheap);
296 strconcat_len(tail.filename, sizeof(tail.filename),
297 nme, ".tail", NULL);
298 if (b->ttype && HEAPalloc(&tail, cnt, Tsize(b)) != GDK_SUCCEED)
299 goto bailout;
300 if (b->tvheap) {
301 th = GDKzalloc(sizeof(Heap));
302 if (th == NULL)
303 goto bailout;
304 th->farmid = BBPselectfarm(b->batRole, b->ttype, varheap);
305 strconcat_len(th->filename, sizeof(th->filename),
306 nme, ".tail", NULL);
307 if (ATOMheap(b->ttype, th, cnt) != GDK_SUCCEED)
308 goto bailout;
309 }
310
311 v = VIEWcreate(b->hseqbase, b);
312 if (v == NULL)
313 goto bailout;
314
315 /* cut the link to your parents */
316 VIEWunlink(b);
317 if (tp) {
318 BBPunshare(tp);
319 BBPunfix(tp);
320 }
321 if (tvp) {
322 BBPunshare(tvp);
323 BBPunfix(tvp);
324 }
325
326 b->hseqbase = v->hseqbase;
327
328 b->ttype = v->ttype;
329 b->tvarsized = v->tvarsized;
330 b->tshift = v->tshift;
331 b->twidth = v->twidth;
332 b->tseqbase = v->tseqbase;
333
334 b->theap.parentid = 0;
335 b->batRestricted = BAT_WRITE;
336
337 b->tkey = BATtkey(v);
338 b->tunique = false;
339
340 /* copy the heaps */
341 b->theap = tail;
342
343 /* unshare from parents heap */
344 if (th) {
345 assert(b->tvheap == NULL);
346 b->tvheap = th;
347 th = NULL;
348 b->tvheap->parentid = b->batCacheid;
349 }
350
351 if (v->theap.parentid == b->batCacheid) {
352 assert(tp == 0);
353 assert(b->batSharecnt > 0);
354 BBPunshare(b->batCacheid);
355 BBPunfix(b->batCacheid);
356 v->theap.parentid = 0;
357 }
358 b->batSharecnt = 0;
359 b->batCopiedtodisk = false;
360 b->batDirtydesc = true;
361
362 b->tkey = BATtkey(v);
363 b->tunique = false;
364
365 /* make the BAT empty and insert all again */
366 DELTAinit(b);
367 /* reset capacity */
368 b->batCapacity = cnt;
369
370 /* insert all of v in b, and quit */
371 if (BATappend(b, v, NULL, false) != GDK_SUCCEED)
372 goto bailout;
373 BBPreclaim(v);
374 }
375 return GDK_SUCCEED;
376 bailout:
377 BBPreclaim(v);
378 HEAPfree(&tail, false);
379 GDKfree(th);
380 return GDK_FAIL;
381}
382
383/*
384 * The remainder are utilities to manipulate the BAT view and not to
385 * forget some details in the process. It expects a position range in
386 * the underlying BAT and compensates for outliers.
387 */
388void
389VIEWbounds(BAT *b, BAT *view, BUN l, BUN h)
390{
391 BUN cnt;
392 BATiter bi = bat_iterator(b);
393
394 if (b == NULL || view == NULL)
395 return;
396 if (h > BATcount(b))
397 h = BATcount(b);
398 if (h < l)
399 h = l;
400 cnt = h - l;
401 view->batInserted = 0;
402 view->theap.base = view->ttype ? BUNtloc(bi, l) : NULL;
403 view->theap.size = tailsize(view, cnt);
404 BATsetcount(view, cnt);
405 BATsetcapacity(view, cnt);
406 if (view->tnosorted > l && view->tnosorted < l + cnt)
407 view->tnosorted -= l;
408 else
409 view->tnosorted = 0;
410 if (view->tnorevsorted > l && view->tnorevsorted < l + cnt)
411 view->tnorevsorted -= l;
412 else
413 view->tnorevsorted = 0;
414 if (view->tnokey[0] >= l && view->tnokey[0] < l + cnt &&
415 view->tnokey[1] >= l && view->tnokey[1] < l + cnt &&
416 view->tnokey[0] != view->tnokey[1]) {
417 view->tnokey[0] -= l;
418 view->tnokey[1] -= l;
419 } else {
420 view->tnokey[0] = view->tnokey[1] = 0;
421 }
422}
423
424/*
425 * Destroy a view.
426 */
427void
428VIEWdestroy(BAT *b)
429{
430 assert(isVIEW(b));
431
432 /* remove any leftover private hash structures */
433 HASHdestroy(b);
434 IMPSdestroy(b);
435 OIDXdestroy(b);
436 VIEWunlink(b);
437
438 if (b->ttype && !b->theap.parentid) {
439 HEAPfree(&b->theap, false);
440 } else {
441 b->theap.base = NULL;
442 }
443 b->tvheap = NULL;
444 BATfree(b);
445}
446