1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/*
10 * @a M. L. Kersten, P. Boncz, N. Nes
11 * @* BAT Module
12 * In this Chapter we describe the BAT implementation in more detail.
13 * The routines mentioned are primarily meant to simplify the library
14 * implementation.
15 *
16 * @+ BAT Construction
17 * BATs are implemented in several blocks of memory, prepared for disk
18 * storage and easy shipment over a network.
19 *
20 * The BAT starts with a descriptor, which indicates the required BAT
21 * library version and the BAT administration details. In particular,
22 * it describes the binary relationship maintained and the location of
23 * fields required for storage.
24 *
25 * The general layout of the BAT in this implementation is as follows.
26 * Each BAT comes with a heap for the loc-size buns and, optionally,
27 * with heaps to manage the variable-sized data items of both
28 * dimensions. The buns are assumed to be stored as loc-size objects.
29 * This is essentially an array of structs to store the associations.
30 * The size is determined at BAT creation time using an upper bound on
31 * the number of elements to be accommodated. In case of overflow,
32 * its storage space is extended automatically.
33 *
34 * The capacity of a BAT places an upper limit on the number of BUNs
35 * to be stored initially. The actual space set aside may be quite
36 * large. Moreover, the size is aligned to int boundaries to speedup
37 * access and avoid some machine limitations.
38 *
39 * Initialization of the variable parts rely on type specific routines
40 * called atomHeap.
41 */
42#include "monetdb_config.h"
43#include "gdk.h"
44#include "gdk_private.h"
45
46#ifdef ALIGN
47#undef ALIGN
48#endif
49#define ALIGN(n,b) ((b)?(b)*(1+(((n)-1)/(b))):n)
50
51#define ATOMneedheap(tpe) (BATatoms[tpe].atomHeap != NULL)
52
53static char *BATstring_t = "t";
54
55#define default_ident(s) ((s) == BATstring_t)
56
57void
58BATinit_idents(BAT *bn)
59{
60 bn->tident = BATstring_t;
61}
62
63BAT *
64BATcreatedesc(oid hseq, int tt, bool heapnames, role_t role)
65{
66 BAT *bn;
67
68 /*
69 * Alloc space for the BAT and its dependent records.
70 */
71 assert(tt >= 0);
72
73 bn = GDKzalloc(sizeof(BAT));
74
75 if (bn == NULL)
76 return NULL;
77
78 /*
79 * Fill in basic column info
80 */
81 bn->hseqbase = hseq;
82
83 bn->ttype = tt;
84 bn->tkey = false;
85 bn->tunique = false;
86 bn->tnonil = true;
87 bn->tnil = false;
88 bn->tsorted = bn->trevsorted = ATOMlinear(tt);
89 bn->tident = BATstring_t;
90 bn->tseqbase = oid_nil;
91 bn->tprops = NULL;
92
93 bn->batRole = role;
94 bn->batTransient = true;
95 /*
96 * add to BBP
97 */
98 if (BBPinsert(bn) == 0) {
99 GDKfree(bn);
100 return NULL;
101 }
102 /*
103 * Default zero for order oid index
104 */
105 bn->torderidx = NULL;
106 /*
107 * fill in heap names, so HEAPallocs can resort to disk for
108 * very large writes.
109 */
110 assert(bn->batCacheid > 0);
111
112 const char *nme = BBP_physical(bn->batCacheid);
113 strconcat_len(bn->theap.filename, sizeof(bn->theap.filename),
114 nme, ".tail", NULL);
115 bn->theap.farmid = BBPselectfarm(role, bn->ttype, offheap);
116 if (heapnames && ATOMneedheap(tt)) {
117 if ((bn->tvheap = (Heap *) GDKzalloc(sizeof(Heap))) == NULL)
118 goto bailout;
119 strconcat_len(bn->tvheap->filename,
120 sizeof(bn->tvheap->filename),
121 nme, ".theap", NULL);
122 bn->tvheap->parentid = bn->batCacheid;
123 bn->tvheap->farmid = BBPselectfarm(role, bn->ttype, varheap);
124 }
125 char name[16];
126 snprintf(name, sizeof(name), "BATlock%d", bn->batCacheid); /* fits */
127 MT_lock_init(&bn->batIdxLock, name);
128 bn->batDirtydesc = true;
129 return bn;
130 bailout:
131 BBPclear(bn->batCacheid);
132 if (tt)
133 HEAPfree(&bn->theap, true);
134 if (bn->tvheap) {
135 HEAPfree(bn->tvheap, true);
136 GDKfree(bn->tvheap);
137 }
138 GDKfree(bn);
139 return NULL;
140}
141
142uint8_t
143ATOMelmshift(int sz)
144{
145 uint8_t sh;
146 int i = sz >> 1;
147
148 for (sh = 0; i != 0; sh++) {
149 i >>= 1;
150 }
151 return sh;
152}
153
154
155void
156BATsetdims(BAT *b)
157{
158 b->twidth = b->ttype == TYPE_str ? 1 : ATOMsize(b->ttype);
159 b->tshift = ATOMelmshift(Tsize(b));
160 assert_shift_width(b->tshift, b->twidth);
161 b->tvarsized = b->ttype == TYPE_void || BATatoms[b->ttype].atomPut != NULL;
162}
163
164/*
165 * @- BAT allocation
166 * Allocate BUN heap and variable-size atomheaps (see e.g. strHeap).
167 * We now initialize new BATs with their heapname such that the
168 * modified HEAPalloc/HEAPextend primitives can possibly use memory
169 * mapped files as temporary heap storage.
170 *
171 * In case of huge bats, we want HEAPalloc to write a file to disk,
172 * and memory map it. To make this possible, we must provide it with
173 * filenames.
174 */
175BAT *
176COLnew(oid hseq, int tt, BUN cap, role_t role)
177{
178 BAT *bn;
179
180 assert(cap <= BUN_MAX);
181 assert(hseq <= oid_nil);
182 assert(tt != TYPE_bat);
183 ERRORcheck((tt < 0) || (tt > GDKatomcnt), "COLnew:tt error\n", NULL);
184
185 /* round up to multiple of BATTINY */
186 if (cap < BUN_MAX - BATTINY)
187 cap = (cap + BATTINY - 1) & ~(BATTINY - 1);
188 if (cap < BATTINY)
189 cap = BATTINY;
190 /* limit the size */
191 if (cap > BUN_MAX)
192 cap = BUN_MAX;
193
194 bn = BATcreatedesc(hseq, tt, tt != TYPE_void, role);
195 if (bn == NULL)
196 return NULL;
197
198 BATsetdims(bn);
199 bn->batCapacity = cap;
200
201 /* alloc the main heaps */
202 if (tt && HEAPalloc(&bn->theap, cap, bn->twidth) != GDK_SUCCEED) {
203 goto bailout;
204 }
205
206 if (bn->tvheap && ATOMheap(tt, bn->tvheap, cap) != GDK_SUCCEED) {
207 GDKfree(bn->tvheap);
208 goto bailout;
209 }
210 DELTAinit(bn);
211 if (BBPcacheit(bn, true) != GDK_SUCCEED) {
212 GDKfree(bn->tvheap);
213 goto bailout;
214 }
215 ALGODEBUG fprintf(stderr, "#COLnew()=" ALGOBATFMT "\n", ALGOBATPAR(bn));
216 return bn;
217 bailout:
218 BBPclear(bn->batCacheid);
219 HEAPfree(&bn->theap, true);
220 MT_lock_destroy(&bn->batIdxLock);
221 GDKfree(bn);
222 return NULL;
223}
224
225BAT *
226BATdense(oid hseq, oid tseq, BUN cnt)
227{
228 BAT *bn;
229
230 bn = COLnew(hseq, TYPE_void, 0, TRANSIENT);
231 if (bn != NULL) {
232 BATtseqbase(bn, tseq);
233 BATsetcount(bn, cnt);
234 ALGODEBUG fprintf(stderr, "#BATdense()=" ALGOBATFMT "\n", ALGOBATPAR(bn));
235 }
236 return bn;
237}
238
239BAT *
240BATattach(int tt, const char *heapfile, role_t role)
241{
242 BAT *bn;
243 char *p;
244 size_t m;
245 FILE *f;
246
247 ERRORcheck(tt <= 0 , "BATattach: bad tail type (<=0)\n", NULL);
248 ERRORcheck(ATOMvarsized(tt) && ATOMstorage(tt) != TYPE_str, "BATattach: bad tail type (varsized and not str)\n", NULL);
249 ERRORcheck(heapfile == NULL, "BATattach: bad heapfile name\n", NULL);
250
251 if ((f = fopen(heapfile, "rb")) == NULL) {
252 GDKsyserror("BATattach: cannot open %s\n", heapfile);
253 return NULL;
254 }
255 if (ATOMstorage(tt) == TYPE_str) {
256 size_t n;
257 char *s;
258 int c, u;
259
260 if ((bn = COLnew(0, tt, 0, role)) == NULL) {
261 fclose(f);
262 return NULL;
263 }
264 m = 4096;
265 n = 0;
266 u = 0;
267 s = p = GDKmalloc(m);
268 if (p == NULL) {
269 fclose(f);
270 BBPreclaim(bn);
271 return NULL;
272 }
273 while ((c = getc(f)) != EOF) {
274 if (n == m) {
275 m += 4096;
276 s = GDKrealloc(p, m);
277 if (s == NULL) {
278 GDKfree(p);
279 BBPreclaim(bn);
280 fclose(f);
281 return NULL;
282 }
283 p = s;
284 s = p + n;
285 }
286 if (c == '\n' && n > 0 && s[-1] == '\r') {
287 /* deal with CR-LF sequence */
288 s[-1] = c;
289 } else {
290 *s++ = c;
291 n++;
292 }
293 if (u) {
294 if ((c & 0xC0) == 0x80)
295 u--;
296 else
297 goto notutf8;
298 } else if ((c & 0xF8) == 0xF0)
299 u = 3;
300 else if ((c & 0xF0) == 0xE0)
301 u = 2;
302 else if ((c & 0xE0) == 0xC0)
303 u = 1;
304 else if ((c & 0x80) == 0x80)
305 goto notutf8;
306 else if (c == 0) {
307 if (BUNappend(bn, p, false) != GDK_SUCCEED) {
308 BBPreclaim(bn);
309 fclose(f);
310 GDKfree(p);
311 return NULL;
312 }
313 s = p;
314 n = 0;
315 }
316 }
317 fclose(f);
318 GDKfree(p);
319 if (n > 0) {
320 BBPreclaim(bn);
321 GDKerror("BATattach: last string is not null-terminated\n");
322 return NULL;
323 }
324 } else {
325 struct stat st;
326 unsigned int atomsize;
327 BUN cap;
328 lng n;
329
330 if (fstat(fileno(f), &st) < 0) {
331 GDKsyserror("BATattach: cannot stat %s\n", heapfile);
332 fclose(f);
333 return NULL;
334 }
335 atomsize = ATOMsize(tt);
336 if (st.st_size % atomsize != 0) {
337 fclose(f);
338 GDKerror("BATattach: heapfile size not integral number of atoms\n");
339 return NULL;
340 }
341 if ((size_t) (st.st_size / atomsize) > (size_t) BUN_MAX) {
342 fclose(f);
343 GDKerror("BATattach: heapfile too large\n");
344 return NULL;
345 }
346 cap = (BUN) (st.st_size / atomsize);
347 bn = COLnew(0, tt, cap, role);
348 if (bn == NULL) {
349 fclose(f);
350 return NULL;
351 }
352 p = Tloc(bn, 0);
353 n = (lng) st.st_size;
354 while (n > 0 && (m = fread(p, 1, (size_t) MIN(1024*1024, n), f)) > 0) {
355 p += m;
356 n -= m;
357 }
358 fclose(f);
359 if (n > 0) {
360 GDKerror("BATattach: couldn't read the complete file\n");
361 BBPreclaim(bn);
362 return NULL;
363 }
364 BATsetcount(bn, cap);
365 bn->tnonil = cap == 0;
366 bn->tnil = false;
367 bn->tseqbase = oid_nil;
368 if (cap > 1) {
369 bn->tsorted = false;
370 bn->trevsorted = false;
371 bn->tkey = false;
372 } else {
373 bn->tsorted = true;
374 bn->trevsorted = true;
375 bn->tkey = true;
376 }
377 }
378 return bn;
379
380 notutf8:
381 fclose(f);
382 BBPreclaim(bn);
383 GDKfree(p);
384 GDKerror("BATattach: input is not UTF-8\n");
385 return NULL;
386}
387
388/*
389 * If the BAT runs out of storage for BUNS it will reallocate space.
390 * For memory mapped BATs we simple extend the administration after
391 * having an assurance that the BAT still can be safely stored away.
392 */
393BUN
394BATgrows(BAT *b)
395{
396 BUN oldcap, newcap;
397
398 BATcheck(b, "BATgrows", 0);
399
400 newcap = oldcap = BATcapacity(b);
401 if (newcap < BATTINY)
402 newcap = 2 * BATTINY;
403 else if (newcap < 10 * BATTINY)
404 newcap = 4 * newcap;
405 else if (newcap < 50 * BATTINY)
406 newcap = 2 * newcap;
407 else if ((double) newcap * BATMARGIN <= (double) BUN_MAX)
408 newcap = (BUN) ((double) newcap * BATMARGIN);
409 else
410 newcap = BUN_MAX;
411 if (newcap == oldcap) {
412 if (newcap <= BUN_MAX - 10)
413 newcap += 10;
414 else
415 newcap = BUN_MAX;
416 }
417 return newcap;
418}
419
420/*
421 * The routine should ensure that the BAT keeps its location in the
422 * BAT buffer.
423 *
424 * Overflow in the other heaps are dealt with in the atom routines.
425 * Here we merely copy their references into the new administration
426 * space.
427 */
428gdk_return
429BATextend(BAT *b, BUN newcap)
430{
431 size_t theap_size = newcap;
432
433 assert(newcap <= BUN_MAX);
434 BATcheck(b, "BATextend", GDK_FAIL);
435 /*
436 * The main issue is to properly predict the new BAT size.
437 * storage overflow. The assumption taken is that capacity
438 * overflow is rare. It is changed only when the position of
439 * the next available BUN surpasses the free area marker. Be
440 * aware that the newcap should be greater than the old value,
441 * otherwise you may easily corrupt the administration of
442 * malloc.
443 */
444 if (newcap <= BATcapacity(b)) {
445 return GDK_SUCCEED;
446 }
447
448 b->batCapacity = newcap;
449
450 theap_size *= Tsize(b);
451 if (b->theap.base && GDKdebug & HEAPMASK)
452 fprintf(stderr, "#HEAPextend in BATextend %s %zu %zu\n", b->theap.filename, b->theap.size, theap_size);
453 if (b->theap.base &&
454 HEAPextend(&b->theap, theap_size, b->batRestricted == BAT_READ) != GDK_SUCCEED)
455 return GDK_FAIL;
456 HASHdestroy(b);
457 IMPSdestroy(b);
458 OIDXdestroy(b);
459 return GDK_SUCCEED;
460}
461
462
463
464/*
465 * @+ BAT destruction
466 * BATclear quickly removes all elements from a BAT. It must respect
467 * the transaction rules; so stable elements must be moved to the
468 * "deleted" section of the BAT (they cannot be fully deleted
469 * yet). For the elements that really disappear, we must free
470 * heapspace and unfix the atoms if they have fix/unfix handles. As an
471 * optimization, in the case of no stable elements, we quickly empty
472 * the heaps by copying a standard small empty image over them.
473 */
474gdk_return
475BATclear(BAT *b, bool force)
476{
477 BUN p, q;
478
479 BATcheck(b, "BATclear", GDK_FAIL);
480
481 if (!force && b->batInserted > 0) {
482 GDKerror("BATclear: cannot clear committed BAT\n");
483 return GDK_FAIL;
484 }
485
486 /* kill all search accelerators */
487 HASHdestroy(b);
488 IMPSdestroy(b);
489 OIDXdestroy(b);
490 PROPdestroy(b);
491
492 /* we must dispose of all inserted atoms */
493 if (force && BATatoms[b->ttype].atomDel == NULL) {
494 assert(b->tvheap == NULL || b->tvheap->parentid == b->batCacheid);
495 /* no stable elements: we do a quick heap clean */
496 /* need to clean heap which keeps data even though the
497 BUNs got removed. This means reinitialize when
498 free > 0
499 */
500 if (b->tvheap && b->tvheap->free > 0) {
501 Heap th;
502
503 th = (Heap) {
504 .farmid = b->tvheap->farmid,
505 };
506 strcpy_len(th.filename, b->tvheap->filename, sizeof(th.filename));
507 if (ATOMheap(b->ttype, &th, 0) != GDK_SUCCEED)
508 return GDK_FAIL;
509 th.parentid = b->tvheap->parentid;
510 th.dirty = true;
511 HEAPfree(b->tvheap, false);
512 *b->tvheap = th;
513 }
514 } else {
515 /* do heap-delete of all inserted atoms */
516 void (*tatmdel)(Heap*,var_t*) = BATatoms[b->ttype].atomDel;
517
518 /* TYPE_str has no del method, so we shouldn't get here */
519 assert(tatmdel == NULL || b->twidth == sizeof(var_t));
520 if (tatmdel) {
521 BATiter bi = bat_iterator(b);
522
523 for (p = b->batInserted, q = BUNlast(b); p < q; p++)
524 (*tatmdel)(b->tvheap, (var_t*) BUNtloc(bi,p));
525 b->tvheap->dirty = true;
526 }
527 }
528
529 if (force)
530 b->batInserted = 0;
531 BATsetcount(b,0);
532 BAThseqbase(b, 0);
533 BATtseqbase(b, ATOMtype(b->ttype) == TYPE_oid ? 0 : oid_nil);
534 b->batDirtydesc = true;
535 b->theap.dirty = true;
536 BATsettrivprop(b);
537 b->tnosorted = b->tnorevsorted = 0;
538 b->tnokey[0] = b->tnokey[1] = 0;
539 return GDK_SUCCEED;
540}
541
542/* free a cached BAT; leave the bat descriptor cached */
543void
544BATfree(BAT *b)
545{
546 if (b == NULL)
547 return;
548
549 /* deallocate all memory for a bat */
550 assert(b->batCacheid > 0);
551 if (b->tident && !default_ident(b->tident))
552 GDKfree(b->tident);
553 b->tident = BATstring_t;
554 PROPdestroy(b);
555 HASHfree(b);
556 IMPSfree(b);
557 OIDXfree(b);
558 if (b->ttype)
559 HEAPfree(&b->theap, false);
560 else
561 assert(!b->theap.base);
562 if (b->tvheap) {
563 assert(b->tvheap->parentid == b->batCacheid);
564 HEAPfree(b->tvheap, false);
565 }
566}
567
568/* free a cached BAT descriptor */
569void
570BATdestroy(BAT *b)
571{
572 if (b->tident && !default_ident(b->tident))
573 GDKfree(b->tident);
574 b->tident = BATstring_t;
575 if (b->tvheap)
576 GDKfree(b->tvheap);
577 PROPdestroy(b);
578 MT_lock_destroy(&b->batIdxLock);
579 GDKfree(b);
580}
581
582/*
583 * @+ BAT copying
584 *
585 * BAT copying is an often used operation. So it deserves attention.
586 * When making a copy of a BAT, the following aspects are of
587 * importance:
588 *
589 * - the requested head and tail types. The purpose of the copy may be
590 * to slightly change these types (e.g. void <-> oid). We may also
591 * remap between types as long as they share the same
592 * ATOMstorage(type), i.e. the types have the same physical
593 * implementation. We may even want to allow 'dirty' trick such as
594 * viewing a flt-column suddenly as int.
595 *
596 * To allow such changes, the desired column-types is a
597 * parameter of COLcopy.
598 *
599 * - access mode. If we want a read-only copy of a read-only BAT, a
600 * VIEW may do (in this case, the user may be after just an
601 * independent BAT header and id). This is indicated by the
602 * parameter (writable = FALSE).
603 *
604 * In other cases, we really want an independent physical copy
605 * (writable = TRUE). Changing the mode to BAT_WRITE will be a
606 * zero-cost operation if the BAT was copied with (writable = TRUE).
607 *
608 * In GDK, the result is a BAT that is BAT_WRITE iff (writable ==
609 * TRUE).
610 *
611 * In these cases the copy becomes a logical view on the original,
612 * which ensures that the original cannot be modified or destroyed
613 * (which could affect the shared heaps).
614 */
615static void
616heapmove(Heap *dst, Heap *src)
617{
618 HEAPfree(dst, false);
619 *dst = *src;
620}
621
622static bool
623wrongtype(int t1, int t2)
624{
625 /* check if types are compatible. be extremely forgiving */
626 if (t1 != TYPE_void) {
627 t1 = ATOMtype(ATOMstorage(t1));
628 t2 = ATOMtype(ATOMstorage(t2));
629 if (t1 != t2) {
630 if (ATOMvarsized(t1) ||
631 ATOMvarsized(t2) ||
632 ATOMsize(t1) != ATOMsize(t2) ||
633 BATatoms[t1].atomFix ||
634 BATatoms[t2].atomFix)
635 return true;
636 }
637 }
638 return false;
639}
640
641/*
642 * There are four main implementation cases:
643 * (1) we are allowed to return a view (zero effort),
644 * (2) the result is void,void (zero effort),
645 * (3) we can copy the heaps (memcopy, or even VM page sharing)
646 * (4) we must insert BUN-by-BUN into the result (fallback)
647 * The latter case is still optimized for the case that the result
648 * is bat[void,T] for a simple fixed-size type T. In that case we
649 * do inline array[T] inserts.
650 */
651/* TODO make it simpler, ie copy per column */
652BAT *
653COLcopy(BAT *b, int tt, bool writable, role_t role)
654{
655 BUN bunstocopy = BUN_NONE;
656 BUN cnt;
657 BAT *bn = NULL;
658
659 BATcheck(b, "BATcopy", NULL);
660 assert(tt != TYPE_bat);
661 cnt = b->batCount;
662
663 /* maybe a bit ugly to change the requested bat type?? */
664 if (b->ttype == TYPE_void && !writable)
665 tt = TYPE_void;
666
667 if (tt != b->ttype && wrongtype(tt, b->ttype)) {
668 GDKerror("BATcopy: wrong tail-type requested\n");
669 return NULL;
670 }
671
672 /* first try case (1); create a view, possibly with different
673 * atom-types */
674 if (role == b->batRole &&
675 b->batRestricted == BAT_READ &&
676 (!VIEWtparent(b) ||
677 BBP_cache(VIEWtparent(b))->batRestricted == BAT_READ) &&
678 !writable) {
679 bn = VIEWcreate(b->hseqbase, b);
680 if (bn == NULL)
681 return NULL;
682 if (tt != bn->ttype) {
683 bn->ttype = tt;
684 bn->tvarsized = ATOMvarsized(tt);
685 bn->tseqbase = ATOMtype(tt) == TYPE_oid ? b->tseqbase : oid_nil;
686 }
687 } else {
688 /* check whether we need case (4); BUN-by-BUN copy (by
689 * setting bunstocopy != BUN_NONE) */
690 if (ATOMsize(tt) != ATOMsize(b->ttype)) {
691 /* oops, void materialization */
692 bunstocopy = cnt;
693 } else if (BATatoms[tt].atomFix) {
694 /* oops, we need to fix/unfix atoms */
695 bunstocopy = cnt;
696 } else if (isVIEW(b)) {
697 /* extra checks needed for views */
698 bat tp = VIEWtparent(b);
699
700 if (tp != 0 && BATcapacity(BBP_cache(tp)) > cnt + cnt)
701 /* reduced slice view: do not copy too
702 * much garbage */
703 bunstocopy = cnt;
704 }
705
706 bn = COLnew(b->hseqbase, tt, MAX(1, bunstocopy == BUN_NONE ? 0 : bunstocopy), role);
707 if (bn == NULL)
708 return NULL;
709
710 if (bn->tvarsized && bn->ttype && bunstocopy == BUN_NONE) {
711 bn->tshift = b->tshift;
712 bn->twidth = b->twidth;
713 if (HEAPextend(&bn->theap, BATcapacity(bn) << bn->tshift, true) != GDK_SUCCEED)
714 goto bunins_failed;
715 }
716
717 if (tt == TYPE_void) {
718 /* case (2): a void,void result => nothing to
719 * copy! */
720 bn->theap.free = 0;
721 } else if (bunstocopy == BUN_NONE) {
722 /* case (3): just copy the heaps; if possible
723 * with copy-on-write VM support */
724 Heap bthp, thp;
725
726 bthp = (Heap) {
727 .farmid = BBPselectfarm(role, b->ttype, offheap),
728 };
729 thp = (Heap) {
730 .farmid = BBPselectfarm(role, b->ttype, varheap),
731 };
732 strconcat_len(bthp.filename, sizeof(bthp.filename),
733 BBP_physical(bn->batCacheid),
734 ".tail", NULL);
735 strconcat_len(thp.filename, sizeof(thp.filename),
736 BBP_physical(bn->batCacheid),
737 ".theap", NULL);
738 if ((b->ttype && HEAPcopy(&bthp, &b->theap) != GDK_SUCCEED) ||
739 (bn->tvheap && HEAPcopy(&thp, b->tvheap) != GDK_SUCCEED)) {
740 HEAPfree(&thp, true);
741 HEAPfree(&bthp, true);
742 BBPreclaim(bn);
743 return NULL;
744 }
745 /* succeeded; replace dummy small heaps by the
746 * real ones */
747 heapmove(&bn->theap, &bthp);
748 thp.parentid = bn->batCacheid;
749 if (bn->tvheap)
750 heapmove(bn->tvheap, &thp);
751
752 /* make sure we use the correct capacity */
753 bn->batCapacity = (BUN) (bn->ttype ? bn->theap.size >> bn->tshift : 0);
754
755
756 /* first/inserted must point equally far into
757 * the heap as in the source */
758 bn->batInserted = b->batInserted;
759 } else if (BATatoms[tt].atomFix || tt != TYPE_void || ATOMextern(tt)) {
760 /* case (4): one-by-one BUN insert (really slow) */
761 BUN p, q, r = 0;
762 BATiter bi = bat_iterator(b);
763
764 BATloop(b, p, q) {
765 const void *t = BUNtail(bi, p);
766
767 bunfastapp_nocheck(bn, r, t, Tsize(bn));
768 r++;
769 }
770 bn->theap.dirty |= bunstocopy > 0;
771 } else if (tt != TYPE_void && b->ttype == TYPE_void) {
772 /* case (4): optimized for unary void
773 * materialization */
774 oid cur = b->tseqbase, *dst = (oid *) bn->theap.base;
775 oid inc = !is_oid_nil(cur);
776
777 bn->theap.free = bunstocopy * sizeof(oid);
778 bn->theap.dirty |= bunstocopy > 0;
779 while (bunstocopy--) {
780 *dst++ = cur;
781 cur += inc;
782 }
783 } else {
784 /* case (4): optimized for simple array copy */
785 bn->theap.free = bunstocopy * Tsize(bn);
786 bn->theap.dirty |= bunstocopy > 0;
787 memcpy(Tloc(bn, 0), Tloc(b, 0), bn->theap.free);
788 }
789 /* copy all properties (size+other) from the source bat */
790 BATsetcount(bn, cnt);
791 }
792 /* set properties (note that types may have changed in the copy) */
793 if (ATOMtype(tt) == ATOMtype(b->ttype)) {
794 if (ATOMtype(tt) == TYPE_oid) {
795 BATtseqbase(bn, b->tseqbase);
796 } else {
797 BATtseqbase(bn, oid_nil);
798 }
799 BATkey(bn, BATtkey(b));
800 bn->tsorted = BATtordered(b);
801 bn->trevsorted = BATtrevordered(b);
802 bn->batDirtydesc = true;
803 bn->tnorevsorted = b->tnorevsorted;
804 if (b->tnokey[0] != b->tnokey[1]) {
805 bn->tnokey[0] = b->tnokey[0];
806 bn->tnokey[1] = b->tnokey[1];
807 } else {
808 bn->tnokey[0] = bn->tnokey[1] = 0;
809 }
810 bn->tnosorted = b->tnosorted;
811 bn->tnonil = b->tnonil;
812 bn->tnil = b->tnil;
813 } else if (ATOMstorage(tt) == ATOMstorage(b->ttype) &&
814 ATOMcompare(tt) == ATOMcompare(b->ttype)) {
815 BUN h = BUNlast(b);
816 bn->tsorted = b->tsorted;
817 bn->trevsorted = b->trevsorted;
818 if (b->tkey)
819 BATkey(bn, true);
820 bn->tnonil = b->tnonil;
821 bn->tnil = b->tnil;
822 if (b->tnosorted > 0 && b->tnosorted < h)
823 bn->tnosorted = b->tnosorted;
824 else
825 bn->tnosorted = 0;
826 if (b->tnorevsorted > 0 && b->tnorevsorted < h)
827 bn->tnorevsorted = b->tnorevsorted;
828 else
829 bn->tnorevsorted = 0;
830 if (b->tnokey[0] < h &&
831 b->tnokey[1] < h &&
832 b->tnokey[0] != b->tnokey[1]) {
833 bn->tnokey[0] = b->tnokey[0];
834 bn->tnokey[1] = b->tnokey[1];
835 } else {
836 bn->tnokey[0] = bn->tnokey[1] = 0;
837 }
838 } else {
839 bn->tsorted = bn->trevsorted = false; /* set based on count later */
840 bn->tnonil = bn->tnil = false;
841 bn->tnosorted = bn->tnorevsorted = 0;
842 bn->tnokey[0] = bn->tnokey[1] = 0;
843 }
844 if (BATcount(bn) <= 1) {
845 bn->tsorted = ATOMlinear(b->ttype);
846 bn->trevsorted = ATOMlinear(b->ttype);
847 bn->tkey = true;
848 }
849 if (!writable)
850 bn->batRestricted = BAT_READ;
851 ALGODEBUG fprintf(stderr, "#COLcopy(" ALGOBATFMT ")=" ALGOBATFMT "\n",
852 ALGOBATPAR(b), ALGOBATPAR(bn));
853 return bn;
854 bunins_failed:
855 BBPreclaim(bn);
856 return NULL;
857}
858
859#ifdef HAVE_HGE
860#define un_move_sz16(src, dst, sz) \
861 if (sz == 16) { \
862 * (hge *) dst = * (hge *) src; \
863 } else
864#else
865#define un_move_sz16(src, dst, sz)
866#endif
867
868#define un_move(src, dst, sz) \
869 do { \
870 un_move_sz16(src,dst,sz) \
871 if (sz == 8) { \
872 * (lng *) dst = * (lng *) src; \
873 } else if (sz == 4) { \
874 * (int *) dst = * (int *) src; \
875 } else if (sz > 0) { \
876 char *_dst = (char *) dst; \
877 char *_src = (char *) src; \
878 char *_end = _src + sz; \
879 \
880 while (_src < _end) \
881 *_dst++ = *_src++; \
882 } \
883 } while (0)
884#define acc_move(l, p) \
885 do { \
886 char tmp[16]; \
887 /* avoid compiler warning: dereferencing type-punned pointer \
888 * will break strict-aliasing rules */ \
889 char *tmpp = tmp; \
890 \
891 assert(ts <= 16); \
892 \
893 /* move first to tmp */ \
894 un_move(Tloc(b, l), tmpp, ts); \
895 /* move delete to first */ \
896 un_move(Tloc(b, p), Tloc(b, l), ts); \
897 /* move first to deleted */ \
898 un_move(tmpp, Tloc(b, p), ts); \
899 } while (0)
900
901static void
902setcolprops(BAT *b, const void *x)
903{
904 bool isnil = b->ttype != TYPE_void &&
905 ATOMcmp(b->ttype, x, ATOMnilptr(b->ttype)) == 0;
906 BATiter bi;
907 BUN pos;
908 const void *prv;
909 int cmp;
910
911 /* x may only be NULL if the column type is VOID */
912 assert(x != NULL || b->ttype == TYPE_void);
913 if (b->batCount == 0) {
914 /* first value */
915 b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
916 b->tnosorted = b->tnorevsorted = 0;
917 b->tkey = true;
918 b->tnokey[0] = b->tnokey[1] = 0;
919 if (b->ttype == TYPE_void) {
920 if (x) {
921 b->tseqbase = * (const oid *) x;
922 }
923 b->tnil = is_oid_nil(b->tseqbase);
924 b->tnonil = !b->tnil;
925 } else {
926 b->tnil = isnil;
927 b->tnonil = !isnil;
928 if (b->ttype == TYPE_oid) {
929 b->tseqbase = * (const oid *) x;
930 }
931 if (!isnil && ATOMlinear(b->ttype)) {
932 BATsetprop(b, GDK_MAX_VALUE, b->ttype, x);
933 BATsetprop(b, GDK_MIN_VALUE, b->ttype, x);
934 }
935 }
936 return;
937 } else if (b->ttype == TYPE_void) {
938 /* not the first value in a VOID column: we keep the
939 * seqbase, and x is not used, so only some properties
940 * are affected */
941 if (!is_oid_nil(b->tseqbase)) {
942 if (b->trevsorted) {
943 b->tnorevsorted = BUNlast(b);
944 b->trevsorted = false;
945 }
946 b->tnil = false;
947 b->tnonil = true;
948 } else {
949 if (b->tkey) {
950 b->tnokey[0] = 0;
951 b->tnokey[1] = BUNlast(b);
952 b->tkey = false;
953 }
954 b->tnil = true;
955 b->tnonil = false;
956 }
957 return;
958 } else if (ATOMlinear(b->ttype)) {
959 PROPrec *prop;
960
961 bi = bat_iterator(b);
962 pos = BUNlast(b);
963 prv = BUNtail(bi, pos - 1);
964 cmp = ATOMcmp(b->ttype, prv, x);
965
966 if (!b->tunique && /* assume outside check if tunique */
967 b->tkey &&
968 (cmp == 0 || /* definitely not KEY */
969 (b->batCount > 1 && /* can't guarantee KEY if unordered */
970 ((b->tsorted && cmp > 0) ||
971 (b->trevsorted && cmp < 0) ||
972 (!b->tsorted && !b->trevsorted))))) {
973 b->tkey = false;
974 if (cmp == 0) {
975 b->tnokey[0] = pos - 1;
976 b->tnokey[1] = pos;
977 }
978 }
979 if (b->tsorted) {
980 if (cmp > 0) {
981 /* out of order */
982 b->tsorted = false;
983 b->tnosorted = pos;
984 } else if (cmp < 0 && !isnil) {
985 /* new largest value */
986 BATsetprop(b, GDK_MAX_VALUE, b->ttype, x);
987 }
988 } else if (!isnil &&
989 (prop = BATgetprop(b, GDK_MAX_VALUE)) != NULL &&
990 ATOMcmp(b->ttype, VALptr(&prop->v), x) < 0) {
991 BATsetprop(b, GDK_MAX_VALUE, b->ttype, x);
992 }
993 if (b->trevsorted) {
994 if (cmp < 0) {
995 /* out of order */
996 b->trevsorted = false;
997 b->tnorevsorted = pos;
998 /* if there is a nil in the BAT, it is
999 * the smallest, but that doesn't
1000 * count for the property, so the new
1001 * value may still be smaller than the
1002 * smallest non-nil so far */
1003 if (!b->tnonil && !isnil &&
1004 (prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL &&
1005 ATOMcmp(b->ttype, VALptr(&prop->v), x) > 0) {
1006 BATsetprop(b, GDK_MIN_VALUE, b->ttype, x);
1007 }
1008 } else if (cmp > 0 && !isnil) {
1009 /* new smallest value */
1010 BATsetprop(b, GDK_MIN_VALUE, b->ttype, x);
1011 }
1012 } else if (!isnil &&
1013 (prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL &&
1014 ATOMcmp(b->ttype, VALptr(&prop->v), x) > 0) {
1015 BATsetprop(b, GDK_MIN_VALUE, b->ttype, x);
1016 }
1017 if (BATtdense(b) && (cmp >= 0 || * (const oid *) prv + 1 != * (const oid *) x)) {
1018 assert(b->ttype == TYPE_oid);
1019 b->tseqbase = oid_nil;
1020 }
1021 }
1022 if (isnil) {
1023 b->tnonil = false;
1024 b->tnil = true;
1025 }
1026}
1027
1028/*
1029 * @+ BUNappend
1030 * The BUNappend function can be used to add a single value to void
1031 * and oid headed bats. The new head value will be a unique number,
1032 * (max(bat)+1).
1033 */
1034gdk_return
1035BUNappend(BAT *b, const void *t, bool force)
1036{
1037 BUN p;
1038 size_t tsize = 0;
1039
1040 BATcheck(b, "BUNappend", GDK_FAIL);
1041
1042 assert(!VIEWtparent(b));
1043 if (b->tunique && BUNfnd(b, t) != BUN_NONE) {
1044 return GDK_SUCCEED;
1045 }
1046
1047 p = BUNlast(b); /* insert at end */
1048 if (p == BUN_MAX || b->batCount == BUN_MAX) {
1049 GDKerror("BUNappend: bat too large\n");
1050 return GDK_FAIL;
1051 }
1052
1053 ALIGNapp(b, "BUNappend", force, GDK_FAIL);
1054 b->batDirtydesc = true;
1055 if (b->thash && b->tvheap)
1056 tsize = b->tvheap->size;
1057
1058 if (b->ttype == TYPE_void && BATtdense(b)) {
1059 if (b->batCount == 0) {
1060 b->tseqbase = * (const oid *) t;
1061 } else if (is_oid_nil(* (oid *) t) ||
1062 b->tseqbase + b->batCount != *(const oid *) t) {
1063 if (BATmaterialize(b) != GDK_SUCCEED)
1064 return GDK_FAIL;
1065 }
1066 }
1067
1068 if (unshare_string_heap(b) != GDK_SUCCEED) {
1069 return GDK_FAIL;
1070 }
1071
1072 setcolprops(b, t);
1073
1074 if (b->ttype != TYPE_void) {
1075 bunfastapp(b, t);
1076 b->theap.dirty = true;
1077 } else {
1078 BATsetcount(b, b->batCount + 1);
1079 }
1080
1081
1082 IMPSdestroy(b); /* no support for inserts in imprints yet */
1083 OIDXdestroy(b);
1084#if 0 /* enable if we have more properties than just min/max */
1085 PROPrec *prop;
1086 do {
1087 for (prop = b->tprops; prop; prop = prop->next)
1088 if (prop->id != GDK_MAX_VALUE &&
1089 prop->id != GDK_MIN_VALUE &&
1090 prop->id != GDK_HASH_MASK) {
1091 BATrmprop(b, prop->id);
1092 break;
1093 }
1094 } while (prop);
1095#endif
1096 if (b->thash) {
1097 HASHins(b, p, t);
1098 if (tsize && tsize != b->tvheap->size)
1099 HEAPwarm(b->tvheap);
1100 }
1101 return GDK_SUCCEED;
1102 bunins_failed:
1103 return GDK_FAIL;
1104}
1105
1106gdk_return
1107BUNdelete(BAT *b, oid o)
1108{
1109 BUN p;
1110 BATiter bi = bat_iterator(b);
1111 const void *val;
1112 PROPrec *prop;
1113
1114 assert(!is_oid_nil(b->hseqbase) || BATcount(b) == 0);
1115 if (o < b->hseqbase || o >= b->hseqbase + BATcount(b)) {
1116 /* value already not there */
1117 return GDK_SUCCEED;
1118 }
1119 assert(BATcount(b) > 0); /* follows from "if" above */
1120 p = o - b->hseqbase;
1121 if (p < b->batInserted) {
1122 GDKerror("BUNdelete: cannot delete committed value\n");
1123 return GDK_FAIL;
1124 }
1125 b->batDirtydesc = true;
1126 val = BUNtail(bi, p);
1127 if (ATOMcmp(b->ttype, ATOMnilptr(b->ttype), val) != 0) {
1128 if ((prop = BATgetprop(b, GDK_MAX_VALUE)) != NULL
1129 && ATOMcmp(b->ttype, VALptr(&prop->v), val) >= 0)
1130 BATrmprop(b, GDK_MAX_VALUE);
1131 if ((prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL
1132 && ATOMcmp(b->ttype, VALptr(&prop->v), val) <= 0)
1133 BATrmprop(b, GDK_MIN_VALUE);
1134 }
1135 ATOMunfix(b->ttype, val);
1136 ATOMdel(b->ttype, b->tvheap, (var_t *) BUNtloc(bi, p));
1137 if (p != BUNlast(b) - 1 &&
1138 (b->ttype != TYPE_void || BATtdense(b))) {
1139 /* replace to-be-delete BUN with last BUN; materialize
1140 * void column before doing so */
1141 if (b->ttype == TYPE_void &&
1142 BATmaterialize(b) != GDK_SUCCEED)
1143 return GDK_FAIL;
1144 memcpy(Tloc(b, p), Tloc(b, BUNlast(b) - 1), Tsize(b));
1145 /* no longer sorted */
1146 b->tsorted = b->trevsorted = false;
1147 b->theap.dirty = true;
1148 }
1149 if (b->tnosorted >= p)
1150 b->tnosorted = 0;
1151 if (b->tnorevsorted >= p)
1152 b->tnorevsorted = 0;
1153 b->batCount--;
1154 if (b->batCount <= 1) {
1155 /* some trivial properties */
1156 b->tkey = true;
1157 b->tsorted = b->trevsorted = true;
1158 b->tnosorted = b->tnorevsorted = 0;
1159 if (b->batCount == 0) {
1160 b->tnil = false;
1161 b->tnonil = true;
1162 }
1163 }
1164 IMPSdestroy(b);
1165 OIDXdestroy(b);
1166 HASHdestroy(b);
1167#if 0 /* enable if we have more properties than just min/max */
1168 do {
1169 for (prop = b->tprops; prop; prop = prop->next)
1170 if (prop->id != GDK_MAX_VALUE &&
1171 prop->id != GDK_MIN_VALUE &&
1172 prop->id != GDK_HASH_MASK) {
1173 BATrmprop(b, prop->id);
1174 break;
1175 }
1176 } while (prop);
1177#endif
1178 return GDK_SUCCEED;
1179}
1180
1181/* @- BUN replace
1182 * The last operation in this context is BUN replace. It assumes that
1183 * the header denotes a key. The old value association is destroyed
1184 * (if it exists in the first place) and the new value takes its
1185 * place.
1186 *
1187 * In order to make updates on void columns workable; replaces on them
1188 * are always done in-place. Performing them without bun-movements
1189 * greatly simplifies the problem. The 'downside' is that when
1190 * transaction management has to be performed, replaced values should
1191 * be saved explicitly.
1192 */
1193gdk_return
1194BUNinplace(BAT *b, BUN p, const void *t, bool force)
1195{
1196 BUN last = BUNlast(b) - 1;
1197 BATiter bi = bat_iterator(b);
1198 int tt;
1199 BUN prv, nxt;
1200 const void *val;
1201
1202 assert(p >= b->batInserted || force);
1203
1204 /* uncommitted BUN elements */
1205
1206 /* zap alignment info */
1207 if (!force && (b->batRestricted != BAT_WRITE || b->batSharecnt > 0)) {
1208 GDKerror("BUNinplace: access denied to %s, aborting.\n",
1209 BATgetId(b));
1210 return GDK_FAIL;
1211 }
1212 val = BUNtail(bi, p); /* old value */
1213 if (b->tnil &&
1214 ATOMcmp(b->ttype, val, ATOMnilptr(b->ttype)) == 0 &&
1215 ATOMcmp(b->ttype, t, ATOMnilptr(b->ttype)) != 0) {
1216 /* if old value is nil and new value isn't, we're not
1217 * sure anymore about the nil property, so we must
1218 * clear it */
1219 b->tnil = false;
1220 }
1221 HASHdestroy(b);
1222 if (b->ttype != TYPE_void && ATOMlinear(b->ttype)) {
1223 PROPrec *prop;
1224
1225 if ((prop = BATgetprop(b, GDK_MAX_VALUE)) != NULL) {
1226 if (ATOMcmp(b->ttype, t, ATOMnilptr(b->ttype)) != 0 &&
1227 ATOMcmp(b->ttype, VALptr(&prop->v), t) < 0) {
1228 /* new value is larger than previous
1229 * largest */
1230 BATsetprop(b, GDK_MAX_VALUE, b->ttype, t);
1231 } else if (ATOMcmp(b->ttype, t, val) != 0 &&
1232 ATOMcmp(b->ttype, VALptr(&prop->v), val) == 0) {
1233 /* old value is equal to largest and
1234 * new value is smaller (see above),
1235 * so we don't know anymore which is
1236 * the largest */
1237 BATrmprop(b, GDK_MAX_VALUE);
1238 }
1239 }
1240 if ((prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL) {
1241 if (ATOMcmp(b->ttype, t, ATOMnilptr(b->ttype)) != 0 &&
1242 ATOMcmp(b->ttype, VALptr(&prop->v), t) > 0) {
1243 /* new value is smaller than previous
1244 * smallest */
1245 BATsetprop(b, GDK_MIN_VALUE, b->ttype, t);
1246 } else if (ATOMcmp(b->ttype, t, val) != 0 &&
1247 ATOMcmp(b->ttype, VALptr(&prop->v), val) <= 0) {
1248 /* old value is equal to smallest and
1249 * new value is larger (see above), so
1250 * we don't know anymore which is the
1251 * smallest */
1252 BATrmprop(b, GDK_MIN_VALUE);
1253 }
1254 }
1255#if 0 /* enable if we have more properties than just min/max */
1256 do {
1257 for (prop = b->tprops; prop; prop = prop->next)
1258 if (prop->id != GDK_MAX_VALUE &&
1259 prop->id != GDK_MIN_VALUE &&
1260 prop->id != GDK_HASH_MASK) {
1261 BATrmprop(b, prop->id);
1262 break;
1263 }
1264 } while (prop);
1265#endif
1266 } else {
1267 PROPdestroy(b);
1268 }
1269 OIDXdestroy(b);
1270 IMPSdestroy(b);
1271 if (b->tvarsized && b->ttype) {
1272 var_t _d;
1273 ptr _ptr;
1274 _ptr = BUNtloc(bi, p);
1275 switch (b->twidth) {
1276 case 1:
1277 _d = (var_t) * (uint8_t *) _ptr + GDK_VAROFFSET;
1278 break;
1279 case 2:
1280 _d = (var_t) * (uint16_t *) _ptr + GDK_VAROFFSET;
1281 break;
1282 case 4:
1283 _d = (var_t) * (uint32_t *) _ptr;
1284 break;
1285#if SIZEOF_VAR_T == 8
1286 case 8:
1287 _d = (var_t) * (uint64_t *) _ptr;
1288 break;
1289#endif
1290 }
1291 ATOMreplaceVAR(b->ttype, b->tvheap, &_d, t);
1292 if (b->twidth < SIZEOF_VAR_T &&
1293 (b->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 << (8 * b->twidth))) {
1294 /* doesn't fit in current heap, upgrade it */
1295 if (GDKupgradevarheap(b, _d, false, b->batRestricted == BAT_READ) != GDK_SUCCEED)
1296 goto bunins_failed;
1297 }
1298 _ptr = BUNtloc(bi, p);
1299 switch (b->twidth) {
1300 case 1:
1301 * (uint8_t *) _ptr = (uint8_t) (_d - GDK_VAROFFSET);
1302 break;
1303 case 2:
1304 * (uint16_t *) _ptr = (uint16_t) (_d - GDK_VAROFFSET);
1305 break;
1306 case 4:
1307 * (uint32_t *) _ptr = (uint32_t) _d;
1308 break;
1309#if SIZEOF_VAR_T == 8
1310 case 8:
1311 * (uint64_t *) _ptr = (uint64_t) _d;
1312 break;
1313#endif
1314 }
1315 } else {
1316 assert(BATatoms[b->ttype].atomPut == NULL);
1317 ATOMfix(b->ttype, t);
1318 ATOMunfix(b->ttype, BUNtloc(bi, p));
1319 switch (ATOMsize(b->ttype)) {
1320 case 0: /* void */
1321 break;
1322 case 1:
1323 ((bte *) b->theap.base)[p] = * (bte *) t;
1324 break;
1325 case 2:
1326 ((sht *) b->theap.base)[p] = * (sht *) t;
1327 break;
1328 case 4:
1329 ((int *) b->theap.base)[p] = * (int *) t;
1330 break;
1331 case 8:
1332 ((lng *) b->theap.base)[p] = * (lng *) t;
1333 break;
1334#ifdef HAVE_HGE
1335 case 16:
1336 ((hge *) b->theap.base)[p] = * (hge *) t;
1337 break;
1338#endif
1339 default:
1340 memcpy(BUNtloc(bi, p), t, ATOMsize(b->ttype));
1341 break;
1342 }
1343 }
1344
1345 tt = b->ttype;
1346 prv = p > 0 ? p - 1 : BUN_NONE;
1347 nxt = p < last ? p + 1 : BUN_NONE;
1348
1349 if (BATtordered(b)) {
1350 if (prv != BUN_NONE &&
1351 ATOMcmp(tt, t, BUNtail(bi, prv)) < 0) {
1352 b->tsorted = false;
1353 b->tnosorted = p;
1354 } else if (nxt != BUN_NONE &&
1355 ATOMcmp(tt, t, BUNtail(bi, nxt)) > 0) {
1356 b->tsorted = false;
1357 b->tnosorted = nxt;
1358 } else if (b->ttype != TYPE_void && BATtdense(b)) {
1359 if (prv != BUN_NONE &&
1360 1 + * (oid *) BUNtloc(bi, prv) != * (oid *) t) {
1361 b->tseqbase = oid_nil;
1362 } else if (nxt != BUN_NONE &&
1363 * (oid *) BUNtloc(bi, nxt) != 1 + * (oid *) t) {
1364 b->tseqbase = oid_nil;
1365 } else if (prv == BUN_NONE &&
1366 nxt == BUN_NONE) {
1367 b->tseqbase = * (oid *) t;
1368 }
1369 }
1370 } else if (b->tnosorted >= p)
1371 b->tnosorted = 0;
1372 if (BATtrevordered(b)) {
1373 if (prv != BUN_NONE &&
1374 ATOMcmp(tt, t, BUNtail(bi, prv)) > 0) {
1375 b->trevsorted = false;
1376 b->tnorevsorted = p;
1377 } else if (nxt != BUN_NONE &&
1378 ATOMcmp(tt, t, BUNtail(bi, nxt)) < 0) {
1379 b->trevsorted = false;
1380 b->tnorevsorted = nxt;
1381 }
1382 } else if (b->tnorevsorted >= p)
1383 b->tnorevsorted = 0;
1384 if (((b->ttype != TYPE_void) & b->tkey & !b->tunique) && b->batCount > 1) {
1385 BATkey(b, false);
1386 } else if (!b->tkey && (b->tnokey[0] == p || b->tnokey[1] == p))
1387 b->tnokey[0] = b->tnokey[1] = 0;
1388 if (b->tnonil)
1389 b->tnonil = t && ATOMcmp(b->ttype, t, ATOMnilptr(b->ttype)) != 0;
1390 b->theap.dirty = true;
1391 if (b->tvheap)
1392 b->tvheap->dirty = true;
1393
1394 return GDK_SUCCEED;
1395
1396 bunins_failed:
1397 return GDK_FAIL;
1398}
1399
1400/* very much like void_inplace, except this materializes a void tail
1401 * column if necessarry */
1402gdk_return
1403BUNreplace(BAT *b, oid id, const void *t, bool force)
1404{
1405 BATcheck(b, "BUNreplace", GDK_FAIL);
1406 BATcheck(t, "BUNreplace: tail value is nil", GDK_FAIL);
1407
1408 if (id < b->hseqbase || id >= b->hseqbase + BATcount(b))
1409 return GDK_SUCCEED;
1410
1411 if (b->tunique && BUNfnd(b, t) != BUN_NONE) {
1412 return GDK_SUCCEED;
1413 }
1414 if (b->ttype == TYPE_void) {
1415 /* no need to materialize if value doesn't change */
1416 if (is_oid_nil(b->tseqbase) ||
1417 b->tseqbase + id - b->hseqbase == *(const oid *) t)
1418 return GDK_SUCCEED;
1419 if (BATmaterialize(b) != GDK_SUCCEED)
1420 return GDK_FAIL;
1421 }
1422
1423 return BUNinplace(b, id - b->hseqbase, t, force);
1424}
1425
1426/* very much like BUNreplace, but this doesn't make any changes if the
1427 * tail column is void */
1428gdk_return
1429void_inplace(BAT *b, oid id, const void *val, bool force)
1430{
1431 assert(id >= b->hseqbase && id < b->hseqbase + BATcount(b));
1432 if (id < b->hseqbase || id >= b->hseqbase + BATcount(b)) {
1433 GDKerror("void_inplace: id out of range\n");
1434 return GDK_FAIL;
1435 }
1436 if (b->tunique && BUNfnd(b, val) != BUN_NONE)
1437 return GDK_SUCCEED;
1438 if (b->ttype == TYPE_void)
1439 return GDK_SUCCEED;
1440 return BUNinplace(b, id - b->hseqbase, val, force);
1441}
1442
1443gdk_return
1444void_replace_bat(BAT *b, BAT *p, BAT *u, bool force)
1445{
1446 BUN r, s;
1447 BATiter uvi = bat_iterator(u);
1448
1449 BATloop(u, r, s) {
1450 oid updid = BUNtoid(p, r);
1451 const void *val = BUNtail(uvi, r);
1452
1453 if (void_inplace(b, updid, val, force) != GDK_SUCCEED)
1454 return GDK_FAIL;
1455 }
1456 return GDK_SUCCEED;
1457}
1458
1459/*
1460 * @- BUN Lookup
1461 * Location of a BUN using a value should use the available indexes to
1462 * speed up access. If indexes are lacking then a hash index is
1463 * constructed under the assumption that 1) multiple access to the BAT
1464 * can be expected and 2) building the hash is only slightly more
1465 * expensive than the full linear scan. BUN_NONE is returned if no
1466 * such element could be found. In those cases where the type is
1467 * known and a hash index is available, one should use the inline
1468 * functions to speed-up processing.
1469 */
1470static BUN
1471slowfnd(BAT *b, const void *v)
1472{
1473 BATiter bi = bat_iterator(b);
1474 BUN p, q;
1475 int (*cmp)(const void *, const void *) = ATOMcompare(b->ttype);
1476
1477 BATloop(b, p, q) {
1478 if ((*cmp)(v, BUNtail(bi, p)) == 0)
1479 return p;
1480 }
1481 return BUN_NONE;
1482}
1483
1484BUN
1485BUNfnd(BAT *b, const void *v)
1486{
1487 BUN r = BUN_NONE;
1488 BATiter bi;
1489
1490 BATcheck(b, "BUNfnd", BUN_NONE);
1491 if (!v)
1492 return r;
1493 if (b->ttype == TYPE_void && b->tvheap != NULL) {
1494 struct canditer ci;
1495 canditer_init(&ci, NULL, b);
1496 return canditer_search(&ci, * (const oid *) v, false);
1497 }
1498 if (BATtvoid(b))
1499 return BUNfndVOID(b, v);
1500 if (!BATcheckhash(b)) {
1501 if (BATordered(b) || BATordered_rev(b))
1502 return SORTfnd(b, v);
1503 }
1504 bi = bat_iterator(b);
1505 switch (ATOMbasetype(b->ttype)) {
1506 case TYPE_bte:
1507 HASHfnd_bte(r, bi, v);
1508 break;
1509 case TYPE_sht:
1510 HASHfnd_sht(r, bi, v);
1511 break;
1512 case TYPE_int:
1513 HASHfnd_int(r, bi, v);
1514 break;
1515 case TYPE_flt:
1516 HASHfnd_flt(r, bi, v);
1517 break;
1518 case TYPE_dbl:
1519 HASHfnd_dbl(r, bi, v);
1520 break;
1521 case TYPE_lng:
1522 HASHfnd_lng(r, bi, v);
1523 break;
1524#ifdef HAVE_HGE
1525 case TYPE_hge:
1526 HASHfnd_hge(r, bi, v);
1527 break;
1528#endif
1529 case TYPE_str:
1530 HASHfnd_str(r, bi, v);
1531 break;
1532 default:
1533 HASHfnd(r, bi, v);
1534 }
1535 return r;
1536 hashfnd_failed:
1537 /* can't build hash table, search the slow way */
1538 return slowfnd(b, v);
1539}
1540
1541/*
1542 * @+ BAT Property Management
1543 *
1544 * The function BATcount returns the number of active elements in a
1545 * BAT. Counting is type independent. It can be implemented quickly,
1546 * because the system ensures a dense BUN list.
1547 */
1548void
1549BATsetcapacity(BAT *b, BUN cnt)
1550{
1551 b->batCapacity = cnt;
1552 assert(b->batCount <= cnt);
1553}
1554
1555void
1556BATsetcount(BAT *b, BUN cnt)
1557{
1558 /* head column is always VOID, and some head properties never change */
1559 assert(!is_oid_nil(b->hseqbase));
1560 assert(cnt <= BUN_MAX);
1561
1562 b->batCount = cnt;
1563 b->batDirtydesc = true;
1564 b->theap.free = tailsize(b, cnt);
1565 if (b->ttype == TYPE_void)
1566 b->batCapacity = cnt;
1567 if (cnt <= 1) {
1568 b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
1569 b->tnosorted = b->tnorevsorted = 0;
1570 }
1571 /* if the BAT was made smaller, we need to zap some values */
1572 if (b->tnosorted >= BUNlast(b))
1573 b->tnosorted = 0;
1574 if (b->tnorevsorted >= BUNlast(b))
1575 b->tnorevsorted = 0;
1576 if (b->tnokey[0] >= BUNlast(b) || b->tnokey[1] >= BUNlast(b)) {
1577 b->tnokey[0] = 0;
1578 b->tnokey[1] = 0;
1579 }
1580 if (b->ttype == TYPE_void) {
1581 b->tsorted = true;
1582 if (is_oid_nil(b->tseqbase)) {
1583 b->tkey = cnt <= 1;
1584 b->trevsorted = true;
1585 b->tnil = true;
1586 b->tnonil = false;
1587 } else {
1588 b->tkey = true;
1589 b->trevsorted = cnt <= 1;
1590 b->tnil = false;
1591 b->tnonil = true;
1592 }
1593 }
1594 assert(b->batCapacity >= cnt);
1595}
1596
1597/*
1598 * The key and name properties can be changed at any time. Keyed
1599 * dimensions are automatically supported by an auxiliary hash-based
1600 * access structure to speed up searching. Turning off the key
1601 * integrity property does not cause the index to disappear. It can
1602 * still be used to speed-up retrieval. The routine BATkey sets the
1603 * key property of the association head.
1604 */
1605gdk_return
1606BATkey(BAT *b, bool flag)
1607{
1608 BATcheck(b, "BATkey", GDK_FAIL);
1609 assert(b->batCacheid > 0);
1610 assert(!b->tunique || flag);
1611 if (b->ttype == TYPE_void) {
1612 if (BATtdense(b) && !flag) {
1613 GDKerror("BATkey: dense column must be unique.\n");
1614 return GDK_FAIL;
1615 }
1616 if (is_oid_nil(b->tseqbase) && flag && b->batCount > 1) {
1617 GDKerror("BATkey: void column cannot be unique.\n");
1618 return GDK_FAIL;
1619 }
1620 }
1621 if (b->tkey != flag)
1622 b->batDirtydesc = true;
1623 b->tkey = flag;
1624 if (!flag) {
1625 b->tseqbase = oid_nil;
1626 } else
1627 b->tnokey[0] = b->tnokey[1] = 0;
1628 if (flag && VIEWtparent(b)) {
1629 /* if a view is key, then so is the parent if the two
1630 * are aligned */
1631 BAT *bp = BBP_cache(VIEWtparent(b));
1632 if (BATcount(b) == BATcount(bp) &&
1633 ATOMtype(BATttype(b)) == ATOMtype(BATttype(bp)) &&
1634 !BATtkey(bp) &&
1635 ((BATtvoid(b) && BATtvoid(bp) && b->tseqbase == bp->tseqbase) ||
1636 BATcount(b) == 0))
1637 return BATkey(bp, true);
1638 }
1639 return GDK_SUCCEED;
1640}
1641
1642void
1643BAThseqbase(BAT *b, oid o)
1644{
1645 if (b != NULL) {
1646 assert(o <= GDK_oid_max); /* i.e., not oid_nil */
1647 assert(o + BATcount(b) <= GDK_oid_max);
1648 assert(b->batCacheid > 0);
1649 if (b->hseqbase != o) {
1650 b->batDirtydesc = true;
1651 b->hseqbase = o;
1652 }
1653 }
1654}
1655
1656void
1657BATtseqbase(BAT *b, oid o)
1658{
1659 assert(o <= oid_nil);
1660 if (b == NULL)
1661 return;
1662 assert(is_oid_nil(o) || o + BATcount(b) <= GDK_oid_max);
1663 assert(b->batCacheid > 0);
1664 if (b->tseqbase != o) {
1665 b->batDirtydesc = true;
1666 }
1667 if (ATOMtype(b->ttype) == TYPE_oid) {
1668 b->tseqbase = o;
1669
1670 /* adapt keyness */
1671 if (BATtvoid(b)) {
1672 b->tsorted = true;
1673 if (is_oid_nil(o)) {
1674 b->tkey = b->batCount <= 1;
1675 b->tnonil = b->batCount == 0;
1676 b->tnil = b->batCount > 0;
1677 b->trevsorted = true;
1678 b->tnosorted = b->tnorevsorted = 0;
1679 if (!b->tkey) {
1680 b->tnokey[0] = 0;
1681 b->tnokey[1] = 1;
1682 } else {
1683 b->tnokey[0] = b->tnokey[1] = 0;
1684 }
1685 } else {
1686 if (!b->tkey) {
1687 b->tkey = true;
1688 b->tnokey[0] = b->tnokey[1] = 0;
1689 }
1690 b->tnonil = true;
1691 b->tnil = false;
1692 b->trevsorted = b->batCount <= 1;
1693 if (!b->trevsorted)
1694 b->tnorevsorted = 1;
1695 }
1696 }
1697 } else {
1698 assert(o == oid_nil);
1699 b->tseqbase = oid_nil;
1700 }
1701}
1702
1703gdk_return
1704BATroles(BAT *b, const char *tnme)
1705{
1706 if (b == NULL)
1707 return GDK_SUCCEED;
1708 if (b->tident && !default_ident(b->tident))
1709 GDKfree(b->tident);
1710 if (tnme)
1711 b->tident = GDKstrdup(tnme);
1712 else
1713 b->tident = BATstring_t;
1714 return b->tident ? GDK_SUCCEED : GDK_FAIL;
1715}
1716
1717/*
1718 * @- Change the BAT access permissions (read, append, write)
1719 * Regrettably, BAT access-permissions, persistent status and memory
1720 * map modes, interact in ways that makes one's brain sizzle. This
1721 * makes BATsetaccess and TMcommit (where a change in BAT persistence
1722 * mode is made permanent) points in which the memory map status of
1723 * bats needs to be carefully re-assessed and ensured.
1724 *
1725 * Another complication is the fact that during commit, concurrent
1726 * users may access the heaps, such that the simple solution
1727 * unmap;re-map is out of the question.
1728 * Even worse, it is not possible to even rename an open mmap file in
1729 * Windows. For this purpose, we dropped the old .priv scheme, which
1730 * relied on file moves. Now, the file that is opened with mmap is
1731 * always the X file, in case of newstorage=STORE_PRIV, we save in a
1732 * new file X.new
1733 *
1734 * we must consider the following dimensions:
1735 *
1736 * persistence:
1737 * not simply the current persistence mode but whether the bat *was*
1738 * present at the last commit point (BBP status & BBPEXISTING).
1739 * The crucial issue is namely whether we must guarantee recovery
1740 * to a previous sane state.
1741 *
1742 * access:
1743 * whether the BAT is BAT_READ or BAT_WRITE. Note that BAT_APPEND
1744 * is usually the same as BAT_READ (as our concern are only data pages
1745 * that already existed at the last commit).
1746 *
1747 * storage:
1748 * the current way the heap file X is memory-mapped;
1749 * STORE_MMAP uses direct mapping (so dirty pages may be flushed
1750 * at any time to disk), STORE_PRIV uses copy-on-write.
1751 *
1752 * newstorage:
1753 * the current save-regime. STORE_MMAP calls msync() on the heap X,
1754 * whereas STORE_PRIV writes the *entire* heap in a file: X.new
1755 * If a BAT is loaded from disk, the field newstorage is used
1756 * to set storage as well (so before change-access and commit-
1757 * persistence mayhem, we always have newstorage=storage).
1758 *
1759 * change-access:
1760 * what happens if the bat-access mode is changed from
1761 * BAT_READ into BAT_WRITE (or vice versa).
1762 *
1763 * commit-persistence:
1764 * what happens during commit if the bat-persistence mode was
1765 * changed (from TRANSIENT into PERSISTENT, or vice versa).
1766 *
1767 * this is the scheme:
1768 *
1769 * persistence access newstorage storage change-access commit-persistence
1770 * =========== ========= ========== ========== ============= ==================
1771 * 0 transient BAT_READ STORE_MMAP STORE_MMAP =>2 =>4
1772 * 1 transient BAT_READ STORE_PRIV STORE_PRIV =>3 =>5
1773 * 2 transient BAT_WRITE STORE_MMAP STORE_MMAP =>0 =>6+
1774 * 3 transient BAT_WRITE STORE_PRIV STORE_PRIV =>1 =>7
1775 * 4 persistent BAT_READ STORE_MMAP STORE_MMAP =>6+ =>0
1776 * 5 persistent BAT_READ STORE_PRIV STORE_PRIV =>7 =>1
1777 * 6 persistent BAT_WRITE STORE_PRIV STORE_MMAP del X.new=>4+ del X.new;=>2+
1778 * 7 persistent BAT_WRITE STORE_PRIV STORE_PRIV =>5 =>3
1779 *
1780 * exception states:
1781 * a transient BAT_READ STORE_PRIV STORE_MMAP =>b =>c
1782 * b transient BAT_WRITE STORE_PRIV STORE_MMAP =>a =>6
1783 * c persistent BAT_READ STORE_PRIV STORE_MMAP =>6 =>a
1784 *
1785 * (+) indicates that we must ensure that the heap gets saved in its new mode
1786 *
1787 * Note that we now allow a heap with save-regime STORE_PRIV that was
1788 * actually mapped STORE_MMAP. In effect, the potential corruption of
1789 * the X file is compensated by writing out full X.new files that take
1790 * precedence. When transitioning out of this state towards one with
1791 * both storage regime and OS as STORE_MMAP we need to move the X.new
1792 * files into the backup directory. Then msync the X file and (on
1793 * success) remove the X.new; see backup_new().
1794 *
1795 * Exception states are only reachable if the commit fails and those
1796 * new persistent bats have already been processed (but never become
1797 * part of a committed state). In that case a transition 2=>6 may end
1798 * up 2=>b. Exception states a and c are reachable from b.
1799 *
1800 * Errors in HEAPchangeaccess() can be handled atomically inside the
1801 * routine. The work on changing mmap modes HEAPcommitpersistence()
1802 * is done during the BBPsync() for all bats that are newly persistent
1803 * (BBPNEW). After the TMcommit(), it is done for those bats that are
1804 * no longer persistent after the commit (BBPDELETED), only if it
1805 * succeeds. Such transient bats cannot be processed before the
1806 * commit, because the commit may fail and then the more unsafe
1807 * transient mmap modes would be present on a persistent bat.
1808 *
1809 * See dirty_bat() in BBPsync() -- gdk_bbp.c and epilogue() in
1810 * gdk_tm.c.
1811 *
1812 * Including the exception states, we have 11 of the 16
1813 * combinations. As for the 5 avoided states, all four
1814 * (persistence,access) states with (STORE_MMAP,STORE_PRIV) are
1815 * omitted (this would amount to an msync() save regime on a
1816 * copy-on-write heap -- which does not work). The remaining avoided
1817 * state is the patently unsafe
1818 * (persistent,BAT_WRITE,STORE_MMAP,STORE_MMAP).
1819 *
1820 * Note that after a server restart exception states are gone, as on
1821 * BAT loads the saved descriptor is inspected again (which will
1822 * reproduce the state at the last succeeded commit).
1823 *
1824 * To avoid exception states, a TMsubcommit protocol would need to be
1825 * used which is too heavy for BATsetaccess().
1826 *
1827 * Note that this code is not about making heaps mmap-ed in the first
1828 * place. It is just about determining which flavor of mmap should be
1829 * used. The MAL user is oblivious of such details.
1830 */
1831
1832/* rather than deleting X.new, we comply with the commit protocol and
1833 * move it to backup storage */
1834static gdk_return
1835backup_new(Heap *hp, int lockbat)
1836{
1837 int batret, bakret, xx, ret = 0;
1838 char *batpath, *bakpath;
1839 struct stat st;
1840
1841 /* file actions here interact with the global commits */
1842 for (xx = 0; xx <= lockbat; xx++)
1843 MT_lock_set(&GDKtrimLock(xx));
1844
1845 /* check for an existing X.new in BATDIR, BAKDIR and SUBDIR */
1846 batpath = GDKfilepath(hp->farmid, BATDIR, hp->filename, ".new");
1847 bakpath = GDKfilepath(hp->farmid, BAKDIR, hp->filename, ".new");
1848 batret = stat(batpath, &st);
1849 bakret = stat(bakpath, &st);
1850
1851 if (batret == 0 && bakret) {
1852 /* no backup yet, so move the existing X.new there out
1853 * of the way */
1854 if ((ret = rename(batpath, bakpath)) < 0)
1855 GDKsyserror("backup_new: rename %s to %s failed\n",
1856 batpath, bakpath);
1857 IODEBUG fprintf(stderr, "#rename(%s,%s) = %d\n", batpath, bakpath, ret);
1858 } else if (batret == 0) {
1859 /* there is a backup already; just remove the X.new */
1860 if ((ret = remove(batpath)) != 0)
1861 GDKsyserror("backup_new: remove %s failed\n", batpath);
1862 IODEBUG fprintf(stderr, "#remove(%s) = %d\n", batpath, ret);
1863 }
1864 GDKfree(batpath);
1865 GDKfree(bakpath);
1866 for (xx = lockbat; xx >= 0; xx--)
1867 MT_lock_unset(&GDKtrimLock(xx));
1868 return ret ? GDK_FAIL : GDK_SUCCEED;
1869}
1870
1871#define ACCESSMODE(wr,rd) ((wr)?BAT_WRITE:(rd)?BAT_READ:-1)
1872
1873/* transition heap from readonly to writable */
1874static storage_t
1875HEAPchangeaccess(Heap *hp, int dstmode, bool existing)
1876{
1877 if (hp->base == NULL || hp->newstorage == STORE_MEM || !existing || dstmode == -1)
1878 return hp->newstorage; /* 0<=>2,1<=>3,a<=>b */
1879
1880 if (dstmode == BAT_WRITE) {
1881 if (hp->storage != STORE_PRIV)
1882 hp->dirty = true; /* exception c does not make it dirty */
1883 return STORE_PRIV; /* 4=>6,5=>7,c=>6 persistent BAT_WRITE needs STORE_PRIV */
1884 }
1885 if (hp->storage == STORE_MMAP) { /* 6=>4 */
1886 hp->dirty = true;
1887 return backup_new(hp, BBP_THREADMASK) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP; /* only called for existing bats */
1888 }
1889 return hp->storage; /* 7=>5 */
1890}
1891
1892/* heap changes persistence mode (at commit point) */
1893static storage_t
1894HEAPcommitpersistence(Heap *hp, bool writable, bool existing)
1895{
1896 if (existing) { /* existing, ie will become transient */
1897 if (hp->storage == STORE_MMAP && hp->newstorage == STORE_PRIV && writable) { /* 6=>2 */
1898 hp->dirty = true;
1899 return backup_new(hp, -1) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP; /* only called for existing bats */
1900 }
1901 return hp->newstorage; /* 4=>0,5=>1,7=>3,c=>a no change */
1902 }
1903 /* !existing, ie will become persistent */
1904 if (hp->newstorage == STORE_MEM)
1905 return hp->newstorage;
1906 if (hp->newstorage == STORE_MMAP && !writable)
1907 return STORE_MMAP; /* 0=>4 STORE_MMAP */
1908
1909 if (hp->newstorage == STORE_MMAP)
1910 hp->dirty = true; /* 2=>6 */
1911 return STORE_PRIV; /* 1=>5,2=>6,3=>7,a=>c,b=>6 states */
1912}
1913
1914
1915#define ATOMappendpriv(t, h) (ATOMstorage(t) != TYPE_str || GDK_ELIMDOUBLES(h))
1916
1917/* change the heap modes at a commit */
1918gdk_return
1919BATcheckmodes(BAT *b, bool existing)
1920{
1921 bool wr = (b->batRestricted == BAT_WRITE);
1922 storage_t m1 = STORE_MEM, m3 = STORE_MEM;
1923 bool dirty = false;
1924
1925 BATcheck(b, "BATcheckmodes", GDK_FAIL);
1926
1927 if (b->ttype) {
1928 m1 = HEAPcommitpersistence(&b->theap, wr, existing);
1929 dirty |= (b->theap.newstorage != m1);
1930 }
1931
1932 if (b->tvheap) {
1933 bool ta = (b->batRestricted == BAT_APPEND) && ATOMappendpriv(b->ttype, b->tvheap);
1934 m3 = HEAPcommitpersistence(b->tvheap, wr || ta, existing);
1935 dirty |= (b->tvheap->newstorage != m3);
1936 }
1937 if (m1 == STORE_INVALID || m3 == STORE_INVALID)
1938 return GDK_FAIL;
1939
1940 if (dirty) {
1941 b->batDirtydesc = true;
1942 b->theap.newstorage = m1;
1943 if (b->tvheap)
1944 b->tvheap->newstorage = m3;
1945 }
1946 return GDK_SUCCEED;
1947}
1948
1949gdk_return
1950BATsetaccess(BAT *b, restrict_t newmode)
1951{
1952 restrict_t bakmode;
1953 bool bakdirty;
1954
1955 BATcheck(b, "BATsetaccess", GDK_FAIL);
1956 if (isVIEW(b) && newmode != BAT_READ) {
1957 if (VIEWreset(b) != GDK_SUCCEED)
1958 return GDK_FAIL;
1959 }
1960 bakmode = (restrict_t) b->batRestricted;
1961 bakdirty = b->batDirtydesc;
1962 if (bakmode != newmode || (b->batSharecnt && newmode != BAT_READ)) {
1963 bool existing = (BBP_status(b->batCacheid) & BBPEXISTING) != 0;
1964 bool wr = (newmode == BAT_WRITE);
1965 bool rd = (bakmode == BAT_WRITE);
1966 storage_t m1, m3 = STORE_MEM;
1967 storage_t b1, b3 = STORE_MEM;
1968
1969 if (b->batSharecnt && newmode != BAT_READ) {
1970 BATDEBUG fprintf(stderr, "#BATsetaccess: %s has %d views; try creating a copy\n", BATgetId(b), b->batSharecnt);
1971 GDKerror("BATsetaccess: %s has %d views\n",
1972 BATgetId(b), b->batSharecnt);
1973 return GDK_FAIL;
1974 }
1975
1976 b1 = b->theap.newstorage;
1977 m1 = HEAPchangeaccess(&b->theap, ACCESSMODE(wr, rd), existing);
1978 if (b->tvheap) {
1979 bool ta = (newmode == BAT_APPEND && ATOMappendpriv(b->ttype, b->tvheap));
1980 b3 = b->tvheap->newstorage;
1981 m3 = HEAPchangeaccess(b->tvheap, ACCESSMODE(wr && ta, rd && ta), existing);
1982 }
1983 if (m1 == STORE_INVALID || m3 == STORE_INVALID)
1984 return GDK_FAIL;
1985
1986 /* set new access mode and mmap modes */
1987 b->batRestricted = (unsigned int) newmode;
1988 b->batDirtydesc = true;
1989 b->theap.newstorage = m1;
1990 if (b->tvheap)
1991 b->tvheap->newstorage = m3;
1992
1993 if (existing && BBPsave(b) != GDK_SUCCEED) {
1994 /* roll back all changes */
1995 b->batRestricted = (unsigned int) bakmode;
1996 b->batDirtydesc = bakdirty;
1997 b->theap.newstorage = b1;
1998 if (b->tvheap)
1999 b->tvheap->newstorage = b3;
2000 return GDK_FAIL;
2001 }
2002 }
2003 return GDK_SUCCEED;
2004}
2005
2006restrict_t
2007BATgetaccess(BAT *b)
2008{
2009 BATcheck(b, "BATgetaccess", BAT_WRITE /* 0 */);
2010 assert(b->batRestricted != 3); /* only valid restrict_t values */
2011 return (restrict_t) b->batRestricted;
2012}
2013
2014/*
2015 * @- change BAT persistency (persistent,session,transient)
2016 * In the past, we prevented BATS with certain types from being saved at all:
2017 * - BATs of BATs, as having recursive bats creates cascading
2018 * complexities in commits/aborts.
2019 * - any atom with refcounts, as the BBP has no overview of such
2020 * user-defined refcounts.
2021 * - pointer types, as the values they point to are bound to be transient.
2022 *
2023 * However, nowadays we do allow such saves, as the BBP swapping
2024 * mechanism was altered to be able to save transient bats temporarily
2025 * to disk in order to make room. Thus, we must be able to save any
2026 * transient BAT to disk.
2027 *
2028 * What we don't allow is to make such bats persistent.
2029 *
2030 * Although the persistent state does influence the allowed mmap
2031 * modes, this only goes for the *real* committed persistent
2032 * state. Making the bat persistent with BATmode does not matter for
2033 * the heap modes until the commit point is reached. So we do not need
2034 * to do anything with heap modes yet at this point.
2035 */
2036#define check_type(tp) \
2037 do { \
2038 if (ATOMisdescendant((tp), TYPE_ptr) || \
2039 BATatoms[tp].atomUnfix || \
2040 BATatoms[tp].atomFix) { \
2041 GDKerror("BATmode: %s type implies that %s[%s] " \
2042 "cannot be made persistent.\n", \
2043 ATOMname(tp), BATgetId(b), \
2044 ATOMname(b->ttype)); \
2045 return GDK_FAIL; \
2046 } \
2047 } while (0)
2048
2049gdk_return
2050BATmode(BAT *b, bool transient)
2051{
2052 BATcheck(b, "BATmode", GDK_FAIL);
2053
2054 /* can only make a bat PERSISTENT if its role is already
2055 * PERSISTENT */
2056 assert(transient || b->batRole == PERSISTENT);
2057
2058 if (b->batRole == TRANSIENT && !transient) {
2059 GDKerror("cannot change mode of BAT in TRANSIENT farm.\n");
2060 return GDK_FAIL;
2061 }
2062
2063 if (transient != b->batTransient) {
2064 bat bid = b->batCacheid;
2065
2066 if (!transient) {
2067 check_type(b->ttype);
2068 }
2069
2070 if (!transient && isVIEW(b)) {
2071 if (VIEWreset(b) != GDK_SUCCEED) {
2072 return GDK_FAIL;
2073 }
2074 }
2075 /* persistent BATs get a logical reference */
2076 if (!transient) {
2077 BBPretain(bid);
2078 } else if (!b->batTransient) {
2079 BBPrelease(bid);
2080 }
2081 MT_lock_set(&GDKswapLock(bid));
2082 if (!transient) {
2083 if (!(BBP_status(bid) & BBPDELETED))
2084 BBP_status_on(bid, BBPNEW, "BATmode");
2085 else
2086 BBP_status_on(bid, BBPEXISTING, "BATmode");
2087 BBP_status_off(bid, BBPDELETED, "BATmode");
2088 } else if (!b->batTransient) {
2089 if (!(BBP_status(bid) & BBPNEW))
2090 BBP_status_on(bid, BBPDELETED, "BATmode");
2091 BBP_status_off(bid, BBPPERSISTENT, "BATmode");
2092 }
2093 /* session bats or persistent bats that did not
2094 * witness a commit yet may have been saved */
2095 if (b->batCopiedtodisk) {
2096 if (!transient) {
2097 BBP_status_off(bid, BBPTMP, "BATmode");
2098 } else {
2099 /* TMcommit must remove it to
2100 * guarantee free space */
2101 BBP_status_on(bid, BBPTMP, "BATmode");
2102 }
2103 }
2104 b->batTransient = transient;
2105 MT_lock_unset(&GDKswapLock(bid));
2106 }
2107 return GDK_SUCCEED;
2108}
2109
2110/* BATassertProps checks whether properties are set correctly. Under
2111 * no circumstances will it change any properties. Note that the
2112 * "nil" property is not actually used anywhere, but it is checked. */
2113
2114#ifdef NDEBUG
2115/* assertions are disabled, turn failing tests into a message */
2116#undef assert
2117#define assert(test) ((void) ((test) || fprintf(stderr, "!WARNING: %s:%d: assertion `%s' failed\n", __FILE__, __LINE__, #test)))
2118#endif
2119
2120/* Assert that properties are set correctly.
2121 *
2122 * A BAT can have a bunch of properties set. Mostly, the property
2123 * bits are set if we *know* the property holds, and not set if we
2124 * don't know whether the property holds (or if we know it doesn't
2125 * hold). All properties are per column.
2126 *
2127 * The properties currently maintained are:
2128 *
2129 * seqbase Only valid for TYPE_oid and TYPE_void columns: each
2130 * value in the column is exactly one more than the
2131 * previous value, starting at position 0 with the value
2132 * stored in this property.
2133 * This implies sorted, key, nonil (which therefore need
2134 * to be set).
2135 * nil There is at least one NIL value in the column.
2136 * nonil There are no NIL values in the column.
2137 * key All values in the column are distinct.
2138 * sorted The column is sorted (ascending). If also revsorted,
2139 * then all values are equal.
2140 * revsorted The column is reversely sorted (descending). If
2141 * also sorted, then all values are equal.
2142 * nosorted BUN position which proofs not sorted (given position
2143 * and one before are not ordered correctly).
2144 * norevsorted BUN position which proofs not revsorted (given position
2145 * and one before are not ordered correctly).
2146 * nokey Pair of BUN positions that proof not all values are
2147 * distinct (i.e. values at given locations are equal).
2148 *
2149 * In addition there is a property "unique" that, when set, indicates
2150 * that values must be kept unique (and hence that the "key" property
2151 * must be set). This property is only used when changing (adding,
2152 * replacing) values.
2153 *
2154 * Note that the functions BATtseqbase and BATkey also set more
2155 * properties than you might suspect. When setting properties on a
2156 * newly created and filled BAT, you may want to first make sure the
2157 * batCount is set correctly (e.g. by calling BATsetcount), then use
2158 * BATtseqbase and BATkey, and finally set the other properties.
2159 */
2160
2161void
2162BATassertProps(BAT *b)
2163{
2164 unsigned bbpstatus;
2165 BATiter bi = bat_iterator(b);
2166 BUN p, q;
2167 int (*cmpf)(const void *, const void *);
2168 int cmp;
2169 const void *prev = NULL, *valp, *nilp;
2170
2171 /* general BAT sanity */
2172 assert(b != NULL);
2173 assert(b->batCacheid > 0);
2174 assert(b->batCount >= b->batInserted);
2175
2176 /* headless */
2177 assert(b->hseqbase <= GDK_oid_max); /* non-nil seqbase */
2178 assert(b->hseqbase + BATcount(b) <= GDK_oid_max);
2179
2180 bbpstatus = BBP_status(b->batCacheid);
2181 /* only at most one of BBPDELETED, BBPEXISTING, BBPNEW may be set */
2182 assert(((bbpstatus & BBPDELETED) != 0) +
2183 ((bbpstatus & BBPEXISTING) != 0) +
2184 ((bbpstatus & BBPNEW) != 0) <= 1);
2185
2186 assert(b != NULL);
2187 assert(b->ttype >= TYPE_void);
2188 assert(b->ttype < GDKatomcnt);
2189 assert(b->ttype != TYPE_bat);
2190 assert(!b->tunique || b->tkey); /* if unique, then key */
2191 assert(isVIEW(b) ||
2192 b->ttype == TYPE_void ||
2193 BBPfarms[b->theap.farmid].roles & (1 << b->batRole));
2194 assert(isVIEW(b) ||
2195 b->tvheap == NULL ||
2196 (BBPfarms[b->tvheap->farmid].roles & (1 << b->batRole)));
2197
2198 cmpf = ATOMcompare(b->ttype);
2199 nilp = ATOMnilptr(b->ttype);
2200
2201 assert(b->theap.free >= tailsize(b, BUNlast(b)));
2202 if (b->ttype != TYPE_void) {
2203 assert(b->batCount <= b->batCapacity);
2204 assert(b->theap.size >= b->theap.free);
2205 assert(b->theap.size >> b->tshift >= b->batCapacity);
2206 }
2207
2208 /* void and str imply varsized */
2209 if (b->ttype == TYPE_void ||
2210 ATOMstorage(b->ttype) == TYPE_str)
2211 assert(b->tvarsized);
2212 /* other "known" types are not varsized */
2213 if (ATOMstorage(b->ttype) > TYPE_void &&
2214 ATOMstorage(b->ttype) < TYPE_str)
2215 assert(!b->tvarsized);
2216 /* shift and width have a particular relationship */
2217 if (ATOMstorage(b->ttype) == TYPE_str)
2218 assert(b->twidth >= 1 && b->twidth <= ATOMsize(b->ttype));
2219 else
2220 assert(b->twidth == ATOMsize(b->ttype));
2221 assert(b->tseqbase <= oid_nil);
2222 /* only oid/void columns can be dense */
2223 assert(is_oid_nil(b->tseqbase) || b->ttype == TYPE_oid || b->ttype == TYPE_void);
2224 /* a column cannot both have and not have NILs */
2225 assert(!b->tnil || !b->tnonil);
2226 if (b->ttype == TYPE_void) {
2227 assert(b->tshift == 0);
2228 assert(b->twidth == 0);
2229 assert(b->tsorted);
2230 if (is_oid_nil(b->tseqbase)) {
2231 assert(b->tvheap == NULL);
2232 assert(BATcount(b) == 0 || !b->tnonil);
2233 assert(BATcount(b) <= 1 || !b->tkey);
2234 assert(b->trevsorted);
2235 } else {
2236 if (b->tvheap != NULL) {
2237 /* candidate list with exceptions */
2238 assert(b->batRole == TRANSIENT);
2239 assert(b->tvheap->free <= b->tvheap->size);
2240 assert(b->tvheap->free % SIZEOF_OID == 0);
2241 if (b->tvheap->free > 0) {
2242 const oid *oids = (const oid *) b->tvheap->base;
2243 q = b->tvheap->free / SIZEOF_OID;
2244 assert(oids != NULL);
2245 assert(b->tseqbase + BATcount(b) + q <= GDK_oid_max);
2246 /* exceptions within range */
2247 assert(oids[0] >= b->tseqbase);
2248 assert(oids[q - 1] < b->tseqbase + BATcount(b) + q);
2249 /* exceptions sorted */
2250 for (p = 1; p < q; p++)
2251 assert(oids[p - 1] < oids[p]);
2252 }
2253 }
2254 assert(b->tseqbase + b->batCount <= GDK_oid_max);
2255 assert(BATcount(b) == 0 || !b->tnil);
2256 assert(BATcount(b) <= 1 || !b->trevsorted);
2257 assert(b->tkey);
2258 assert(b->tnonil);
2259 }
2260 return;
2261 }
2262 if (BATtdense(b)) {
2263 assert(b->tseqbase + b->batCount <= GDK_oid_max);
2264 assert(b->ttype == TYPE_oid);
2265 assert(b->tsorted);
2266 assert(b->tkey);
2267 assert(b->tnonil);
2268 if ((q = b->batCount) != 0) {
2269 const oid *o = (const oid *) Tloc(b, 0);
2270 assert(*o == b->tseqbase);
2271 for (p = 1; p < q; p++)
2272 assert(o[p - 1] + 1 == o[p]);
2273 }
2274 return;
2275 }
2276 assert(1 << b->tshift == b->twidth);
2277 /* only linear atoms can be sorted */
2278 assert(!b->tsorted || ATOMlinear(b->ttype));
2279 assert(!b->trevsorted || ATOMlinear(b->ttype));
2280 if (ATOMlinear(b->ttype)) {
2281 assert(b->tnosorted == 0 ||
2282 (b->tnosorted > 0 &&
2283 b->tnosorted < b->batCount));
2284 assert(!b->tsorted || b->tnosorted == 0);
2285 if (!b->tsorted &&
2286 b->tnosorted > 0 &&
2287 b->tnosorted < b->batCount)
2288 assert(cmpf(BUNtail(bi, b->tnosorted - 1),
2289 BUNtail(bi, b->tnosorted)) > 0);
2290 assert(b->tnorevsorted == 0 ||
2291 (b->tnorevsorted > 0 &&
2292 b->tnorevsorted < b->batCount));
2293 assert(!b->trevsorted || b->tnorevsorted == 0);
2294 if (!b->trevsorted &&
2295 b->tnorevsorted > 0 &&
2296 b->tnorevsorted < b->batCount)
2297 assert(cmpf(BUNtail(bi, b->tnorevsorted - 1),
2298 BUNtail(bi, b->tnorevsorted)) < 0);
2299 }
2300 /* if tkey property set, both tnokey values must be 0 */
2301 assert(!b->tkey || (b->tnokey[0] == 0 && b->tnokey[1] == 0));
2302 if (!b->tkey && (b->tnokey[0] != 0 || b->tnokey[1] != 0)) {
2303 /* if tkey not set and tnokey indicates a proof of
2304 * non-key-ness, make sure the tnokey values are in
2305 * range and indeed provide a proof */
2306 assert(b->tnokey[0] != b->tnokey[1]);
2307 assert(b->tnokey[0] < b->batCount);
2308 assert(b->tnokey[1] < b->batCount);
2309 assert(cmpf(BUNtail(bi, b->tnokey[0]),
2310 BUNtail(bi, b->tnokey[1])) == 0);
2311 }
2312 /* var heaps must have sane sizes */
2313 assert(b->tvheap == NULL || b->tvheap->free <= b->tvheap->size);
2314
2315 if (!b->tkey && !b->tsorted && !b->trevsorted &&
2316 !b->tnonil && !b->tnil) {
2317 /* nothing more to prove */
2318 return;
2319 }
2320
2321 PROPDEBUG { /* only do a scan if property checking is requested */
2322 PROPrec *prop;
2323 const void *maxval = NULL;
2324 const void *minval = NULL;
2325 bool seenmax = false, seenmin = false;
2326 bool seennil = false;
2327
2328 if ((prop = BATgetprop(b, GDK_MAX_VALUE)) != NULL)
2329 maxval = VALptr(&prop->v);
2330 if ((prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL)
2331 minval = VALptr(&prop->v);
2332 if (b->tsorted || b->trevsorted || !b->tkey) {
2333 /* if sorted (either way), or we don't have to
2334 * prove uniqueness, we can do a simple
2335 * scan */
2336 /* only call compare function if we have to */
2337 bool cmpprv = b->tsorted | b->trevsorted | b->tkey;
2338 bool cmpnil = b->tnonil | b->tnil;
2339
2340 BATloop(b, p, q) {
2341 valp = BUNtail(bi, p);
2342 bool isnil = cmpf(valp, nilp) == 0;
2343 if (maxval && !isnil) {
2344 cmp = cmpf(maxval, valp);
2345 assert(cmp >= 0);
2346 seenmax |= cmp == 0;
2347 }
2348 if (minval && !isnil) {
2349 cmp = cmpf(minval, valp);
2350 assert(cmp <= 0);
2351 seenmin |= cmp == 0;
2352 }
2353 if (prev && cmpprv) {
2354 cmp = cmpf(prev, valp);
2355 assert(!b->tsorted || cmp <= 0);
2356 assert(!b->trevsorted || cmp >= 0);
2357 assert(!b->tkey || cmp != 0);
2358 }
2359 if (cmpnil) {
2360 assert(!b->tnonil || !isnil);
2361 if (isnil) {
2362 /* we found a nil:
2363 * we're done checking
2364 * for them */
2365 seennil = true;
2366 cmpnil = 0;
2367 if (!cmpprv && maxval == NULL && minval == NULL) {
2368 /* we were
2369 * only
2370 * checking
2371 * for nils,
2372 * so nothing
2373 * more to
2374 * do */
2375 break;
2376 }
2377 }
2378 }
2379 prev = valp;
2380 }
2381 } else { /* b->tkey && !b->tsorted && !b->trevsorted */
2382 /* we need to check for uniqueness the hard
2383 * way (i.e. using a hash table) */
2384 const char *nme = BBP_physical(b->batCacheid);
2385 Hash *hs = NULL;
2386 BUN mask;
2387 int len;
2388
2389 if ((hs = GDKzalloc(sizeof(Hash))) == NULL) {
2390 fprintf(stderr,
2391 "#BATassertProps: cannot allocate "
2392 "hash table\n");
2393 goto abort_check;
2394 }
2395 len = snprintf(hs->heap.filename, sizeof(hs->heap.filename), "%s.hash%d", nme, THRgettid());
2396 if (len == -1 || len > (int) sizeof(hs->heap.filename)) {
2397 GDKfree(hs);
2398 fprintf(stderr,
2399 "#BATassertProps: heap filename "
2400 "is too large\n");
2401 goto abort_check;
2402 }
2403 if (ATOMsize(b->ttype) == 1)
2404 mask = (BUN) 1 << 8;
2405 else if (ATOMsize(b->ttype) == 2)
2406 mask = (BUN) 1 << 16;
2407 else
2408 mask = HASHmask(b->batCount);
2409 if ((hs->heap.farmid = BBPselectfarm(TRANSIENT, b->ttype,
2410 hashheap)) < 0 ||
2411 HASHnew(hs, b->ttype, BUNlast(b),
2412 mask, BUN_NONE) != GDK_SUCCEED) {
2413 GDKfree(hs);
2414 fprintf(stderr,
2415 "#BATassertProps: cannot allocate "
2416 "hash table\n");
2417 goto abort_check;
2418 }
2419 BATloop(b, p, q) {
2420 BUN hb;
2421 BUN prb;
2422 valp = BUNtail(bi, p);
2423 bool isnil = cmpf(valp, nilp) == 0;
2424 if (maxval && !isnil) {
2425 cmp = cmpf(maxval, valp);
2426 assert(cmp >= 0);
2427 seenmax |= cmp == 0;
2428 }
2429 if (minval && !isnil) {
2430 cmp = cmpf(minval, valp);
2431 assert(cmp <= 0);
2432 seenmin |= cmp == 0;
2433 }
2434 prb = HASHprobe(hs, valp);
2435 for (hb = HASHget(hs,prb);
2436 hb != HASHnil(hs);
2437 hb = HASHgetlink(hs,hb))
2438 if (cmpf(valp, BUNtail(bi, hb)) == 0)
2439 assert(!b->tkey);
2440 HASHputlink(hs,p, HASHget(hs,prb));
2441 HASHput(hs,prb,p);
2442 assert(!b->tnonil || !isnil);
2443 seennil |= isnil;
2444 }
2445 HEAPfree(&hs->heap, true);
2446 GDKfree(hs);
2447 }
2448 abort_check:
2449 assert(maxval == NULL || seenmax);
2450 assert(minval == NULL || seenmin);
2451 assert(!b->tnil || seennil);
2452 }
2453}
2454