1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/*
10 * @t The Goblin Database Kernel
11 * @v Version 3.05
12 * @a Martin L. Kersten, Peter Boncz, Niels Nes, Sjoerd Mullender
13 *
14 * @+ The Inner Core
15 * The innermost library of the MonetDB database system is formed by
16 * the library called GDK, an abbreviation of Goblin Database Kernel.
17 * Its development was originally rooted in the design of a pure
18 * active-object-oriented programming language, before development
19 * was shifted towards a re-usable database kernel engine.
20 *
21 * GDK is a C library that provides ACID properties on a DSM model
22 * @tex
23 * [@cite{Copeland85}]
24 * @end tex
25 * , using main-memory
26 * database algorithms
27 * @tex
28 * [@cite{Garcia-Molina92}]
29 * @end tex
30 * built on virtual-memory
31 * OS primitives and multi-threaded parallelism.
32 * Its implementation has undergone various changes over its decade
33 * of development, many of which were driven by external needs to
34 * obtain a robust and fast database system.
35 *
36 * The coding scheme explored in GDK has also laid a foundation to
37 * communicate over time experiences and to provide (hopefully)
38 * helpful advice near to the place where the code-reader needs it.
39 * Of course, over such a long time the documentation diverges from
40 * reality. Especially in areas where the environment of this package
41 * is being described.
42 * Consider such deviations as historic landmarks, e.g. crystallization
43 * of brave ideas and mistakes rectified at a later stage.
44 *
45 * @+ Short Outline
46 * The facilities provided in this implementation are:
47 * @itemize
48 * @item
49 * GDK or Goblin Database Kernel routines for session management
50 * @item
51 * BAT routines that define the primitive operations on the
52 * database tables (BATs).
53 * @item
54 * BBP routines to manage the BAT Buffer Pool (BBP).
55 * @item
56 * ATOM routines to manipulate primitive types, define new types
57 * using an ADT interface.
58 * @item
59 * HEAP routines for manipulating heaps: linear spaces of memory
60 * that are GDK's vehicle of mass storage (on which BATs are built).
61 * @item
62 * DELTA routines to access inserted/deleted elements within a
63 * transaction.
64 * @item
65 * HASH routines for manipulating GDK's built-in linear-chained
66 * hash tables, for accelerating lookup searches on BATs.
67 * @item
68 * TM routines that provide basic transaction management primitives.
69 * @item
70 * TRG routines that provided active database support. [DEPRECATED]
71 * @item
72 * ALIGN routines that implement BAT alignment management.
73 * @end itemize
74 *
75 * The Binary Association Table (BAT) is the lowest level of storage
76 * considered in the Goblin runtime system
77 * @tex
78 * [@cite{Goblin}]
79 * @end tex
80 * . A BAT is a
81 * self-descriptive main-memory structure that represents the
82 * @strong{binary relationship} between two atomic types. The
83 * association can be defined over:
84 * @table @code
85 * @item void:
86 * virtual-OIDs: a densely ascending column of OIDs (takes zero-storage).
87 * @item bit:
88 * Booleans, implemented as one byte values.
89 * @item bte:
90 * Tiny (1-byte) integers (8-bit @strong{integer}s).
91 * @item sht:
92 * Short integers (16-bit @strong{integer}s).
93 * @item int:
94 * This is the C @strong{int} type (32-bit).
95 * @item oid:
96 * Unique @strong{long int} values uses as object identifier. Highest
97 * bit cleared always. Thus, oids-s are 31-bit numbers on
98 * 32-bit systems, and 63-bit numbers on 64-bit systems.
99 * @item ptr:
100 * Memory pointer values. DEPRECATED. Can only be stored in transient
101 * BATs.
102 * @item flt:
103 * The IEEE @strong{float} type.
104 * @item dbl:
105 * The IEEE @strong{double} type.
106 * @item lng:
107 * Longs: the C @strong{long long} type (64-bit integers).
108 * @item hge:
109 * "huge" integers: the GCC @strong{__int128} type (128-bit integers).
110 * @item str:
111 * UTF-8 strings (Unicode). A zero-terminated byte sequence.
112 * @item bat:
113 * Bat descriptor. This allows for recursive administered tables, but
114 * severely complicates transaction management. Therefore, they CAN
115 * ONLY BE STORED IN TRANSIENT BATs.
116 * @end table
117 *
118 * This model can be used as a back-end model underlying other -higher
119 * level- models, in order to achieve @strong{better performance} and
120 * @strong{data independence} in one go. The relational model and the
121 * object-oriented model can be mapped on BATs by vertically splitting
122 * every table (or class) for each attribute. Each such a column is
123 * then stored in a BAT with type @strong{bat[oid,attribute]}, where
124 * the unique object identifiers link tuples in the different BATs.
125 * Relationship attributes in the object-oriented model hence are
126 * mapped to @strong{bat[oid,oid]} tables, being equivalent to the
127 * concept of @emph{join indexes} @tex [@cite{Valduriez87}] @end tex .
128 *
129 * The set of built-in types can be extended with user-defined types
130 * through an ADT interface. They are linked with the kernel to
131 * obtain an enhanced library, or they are dynamically loaded upon
132 * request.
133 *
134 * Types can be derived from other types. They represent something
135 * different than that from which they are derived, but their internal
136 * storage management is equal. This feature facilitates the work of
137 * extension programmers, by enabling reuse of implementation code,
138 * but is also used to keep the GDK code portable from 32-bits to
139 * 64-bits machines: the @strong{oid} and @strong{ptr} types are
140 * derived from @strong{int} on 32-bits machines, but is derived from
141 * @strong{lng} on 64 bits machines. This requires changes in only two
142 * lines of code each.
143 *
144 * To accelerate lookup and search in BATs, GDK supports one built-in
145 * search accelerator: hash tables. We choose an implementation
146 * efficient for main-memory: bucket chained hash
147 * @tex
148 * [@cite{LehCar86,Analyti92}]
149 * @end tex
150 * . Alternatively, when the table is sorted, it will resort to
151 * merge-scan operations or binary lookups.
152 *
153 * BATs are built on the concept of heaps, which are large pieces of
154 * main memory. They can also consist of virtual memory, in case the
155 * working set exceeds main-memory. In this case, GDK supports
156 * operations that cluster the heaps of a BAT, in order to improve
157 * performance of its main-memory.
158 *
159 *
160 * @- Rationale
161 * The rationale for choosing a BAT as the building block for both
162 * relational and object-oriented system is based on the following
163 * observations:
164 *
165 * @itemize
166 * @item -
167 * Given the fact that CPU speed and main-memory increase in current
168 * workstation hardware for the last years has been exceeding IO
169 * access speed increase, traditional disk-page oriented algorithms do
170 * no longer take best advantage of hardware, in most database
171 * operations.
172 *
173 * Instead of having a disk-block oriented kernel with a large memory
174 * cache, we choose to build a main-memory kernel, that only under
175 * large data volumes slowly degrades to IO-bound performance,
176 * comparable to traditional systems
177 * @tex
178 * [@cite{boncz95,boncz96}]
179 * @end tex
180 * .
181 *
182 * @item -
183 * Traditional (disk-based) relational systems move too much data
184 * around to save on (main-memory) join operations.
185 *
186 * The fully decomposed store (DSM
187 * @tex
188 * [@cite{Copeland85})]
189 * @end tex
190 * assures that only those attributes of a relation that are needed,
191 * will have to be accessed.
192 *
193 * @item -
194 * The data management issues for a binary association is much
195 * easier to deal with than traditional @emph{struct}-based approaches
196 * encountered in relational systems.
197 *
198 * @item -
199 * Object-oriented systems often maintain a double cache, one with the
200 * disk-based representation and a C pointer-based main-memory
201 * structure. This causes expensive conversions and replicated
202 * storage management. GDK does not do such `pointer swizzling'. It
203 * used virtual-memory (@strong{mmap()}) and buffer management advice
204 * (@strong{madvise()}) OS primitives to cache only once. Tables take
205 * the same form in memory as on disk, making the use of this
206 * technique transparent
207 * @tex
208 * [@cite{oo7}]
209 * @end tex
210 * .
211 * @end itemize
212 *
213 * A RDBMS or OODBMS based on BATs strongly depends on our ability to
214 * efficiently support tuples and to handle small joins, respectively.
215 *
216 * The remainder of this document describes the Goblin Database kernel
217 * implementation at greater detail. It is organized as follows:
218 * @table @code
219 * @item @strong{GDK Interface}:
220 *
221 * It describes the global interface with which GDK sessions can be
222 * started and ended, and environment variables used.
223 *
224 * @item @strong{Binary Association Tables}:
225 *
226 * As already mentioned, these are the primary data structure of GDK.
227 * This chapter describes the kernel operations for creation,
228 * destruction and basic manipulation of BATs and BUNs (i.e. tuples:
229 * Binary UNits).
230 *
231 * @item @strong{BAT Buffer Pool:}
232 *
233 * All BATs are registered in the BAT Buffer Pool. This directory is
234 * used to guide swapping in and out of BATs. Here we find routines
235 * that guide this swapping process.
236 *
237 * @item @strong{GDK Extensibility:}
238 *
239 * Atoms can be defined using a unified ADT interface. There is also
240 * an interface to extend the GDK library with dynamically linked
241 * object code.
242 *
243 * @item @strong{GDK Utilities:}
244 *
245 * Memory allocation and error handling primitives are
246 * provided. Layers built on top of GDK should use them, for proper
247 * system monitoring. Thread management is also included here.
248 *
249 * @item @strong{Transaction Management:}
250 *
251 * For the time being, we just provide BAT-grained concurrency and
252 * global transactions. Work is needed here.
253 *
254 * @item @strong{BAT Alignment:}
255 * Due to the mapping of multi-ary datamodels onto the BAT model, we
256 * expect many correspondences among BATs, e.g.
257 * @emph{bat(oid,attr1),.. bat(oid,attrN)} vertical
258 * decompositions. Frequent activities will be to jump from one
259 * attribute to the other (`bunhopping'). If the head columns are
260 * equal lists in two BATs, merge or even array lookups can be used
261 * instead of hash lookups. The alignment interface makes these
262 * relations explicitly manageable.
263 *
264 * In GDK, complex data models are mapped with DSM on binary tables.
265 * Usually, one decomposes @emph{N}-ary relations into @emph{N} BATs
266 * with an @strong{oid} in the head column, and the attribute in the
267 * tail column. There may well be groups of tables that have the same
268 * sets of @strong{oid}s, equally ordered. The alignment interface is
269 * intended to make this explicit. Implementations can use this
270 * interface to detect this situation, and use cheaper algorithms
271 * (like merge-join, or even array lookup) instead.
272 *
273 * @item @strong{BAT Iterators:}
274 *
275 * Iterators are C macros that generally encapsulate a complex
276 * for-loop. They would be the equivalent of cursors in the SQL
277 * model. The macro interface (instead of a function call interface)
278 * is chosen to achieve speed when iterating main-memory tables.
279 *
280 * @item @strong{Common BAT Operations:}
281 *
282 * These are much used operations on BATs, such as aggregate functions
283 * and relational operators. They are implemented in terms of BAT- and
284 * BUN-manipulation GDK primitives.
285 * @end table
286 *
287 * @+ Interface Files
288 * In this section we summarize the user interface to the GDK library.
289 * It consist of a header file (gdk.h) and an object library
290 * (gdklib.a), which implements the required functionality. The header
291 * file must be included in any program that uses the library. The
292 * library must be linked with such a program.
293 *
294 * @- Database Context
295 *
296 * The MonetDB environment settings are collected in a configuration
297 * file. Amongst others it contains the location of the database
298 * directory. First, the database directory is closed for other
299 * servers running at the same time. Second, performance enhancements
300 * may take effect, such as locking the code into memory (if the OS
301 * permits) and preloading the data dictionary. An error at this
302 * stage normally lead to an abort.
303 */
304
305#ifndef _GDK_H_
306#define _GDK_H_
307
308/* standard includes upon which all configure tests depend */
309#ifdef HAVE_SYS_TYPES_H
310# include <sys/types.h>
311#endif
312#ifdef HAVE_SYS_STAT_H
313# include <sys/stat.h>
314#endif
315#include <stddef.h>
316#include <string.h>
317#ifdef HAVE_UNISTD_H
318# include <unistd.h>
319#endif
320
321#include <ctype.h> /* isspace etc. */
322
323#ifdef HAVE_SYS_FILE_H
324# include <sys/file.h>
325#endif
326
327#ifdef HAVE_DIRENT_H
328# include <dirent.h>
329#endif
330
331#include <limits.h> /* for *_MIN and *_MAX */
332#include <float.h> /* for FLT_MAX and DBL_MAX */
333
334#include "gdk_system.h"
335#include "gdk_posix.h"
336#include "stream.h"
337#include "mstring.h"
338
339#undef MIN
340#undef MAX
341#define MAX(A,B) ((A)<(B)?(B):(A))
342#define MIN(A,B) ((A)>(B)?(B):(A))
343
344/* defines from ctype with casts that allow passing char values */
345#define GDKisspace(c) isspace((unsigned char) (c))
346#define GDKisalnum(c) isalnum((unsigned char) (c))
347#define GDKisdigit(c) isdigit((unsigned char) (c))
348
349#define BATDIR "bat"
350#define TEMPDIR_NAME "TEMP_DATA"
351
352#define DELDIR BATDIR DIR_SEP_STR "DELETE_ME"
353#define BAKDIR BATDIR DIR_SEP_STR "BACKUP"
354#define SUBDIR BAKDIR DIR_SEP_STR "SUBCOMMIT" /* note K, not T */
355#define LEFTDIR BATDIR DIR_SEP_STR "LEFTOVERS"
356#define TEMPDIR BATDIR DIR_SEP_STR TEMPDIR_NAME
357
358/*
359 See `man mserver5` or tools/mserver/mserver5.1
360 for a documentation of the following debug options.
361*/
362
363#define THRDMASK (1)
364#define THRDDEBUG if (GDKdebug & THRDMASK)
365#define CHECKMASK (1<<1)
366#define CHECKDEBUG if (GDKdebug & CHECKMASK)
367#define MEMMASK (1<<2)
368#define MEMDEBUG if (GDKdebug & MEMMASK)
369#define PROPMASK (1<<3)
370#define PROPDEBUG if (GDKdebug & PROPMASK)
371#define IOMASK (1<<4)
372#define IODEBUG if (GDKdebug & IOMASK)
373#define BATMASK (1<<5)
374#define BATDEBUG if (GDKdebug & BATMASK)
375/* PARSEMASK not used anymore
376#define PARSEMASK (1<<6)
377#define PARSEDEBUG if (GDKdebug & PARSEMASK)
378*/
379#define PARMASK (1<<7)
380#define PARDEBUG if (GDKdebug & PARMASK)
381/* HEADLESSMASK not used anymore
382#define HEADLESSMASK (1<<8)
383#define HEADLESSDEBUG if (GDKdebug & HEADLESSMASK)
384*/
385#define TMMASK (1<<9)
386#define TMDEBUG if (GDKdebug & TMMASK)
387#define TEMMASK (1<<10)
388#define TEMDEBUG if (GDKdebug & TEMMASK)
389/* DLMASK not used anymore
390#define DLMASK (1<<11)
391#define DLDEBUG if (GDKdebug & DLMASK)
392*/
393#define PERFMASK (1<<12)
394#define PERFDEBUG if (GDKdebug & PERFMASK)
395#define DELTAMASK (1<<13)
396#define DELTADEBUG if (GDKdebug & DELTAMASK)
397#define LOADMASK (1<<14)
398#define LOADDEBUG if (GDKdebug & LOADMASK)
399/* YACCMASK not used anymore
400#define YACCMASK (1<<15)
401#define YACCDEBUG if (GDKdebug & YACCMASK)
402*/
403/*
404#define ?tcpip? if (GDKdebug&(1<<16))
405#define ?monet_multiplex? if (GDKdebug&(1<<17))
406#define ?ddbench? if (GDKdebug&(1<<18))
407#define ?ddbench? if (GDKdebug&(1<<19))
408#define ?ddbench? if (GDKdebug&(1<<20))
409*/
410#define ACCELMASK (1<<20)
411#define ACCELDEBUG if (GDKdebug & (ACCELMASK|ALGOMASK))
412#define ALGOMASK (1<<21)
413#define ALGODEBUG if (GDKdebug & ALGOMASK)
414#define ESTIMASK (1<<22)
415#define ESTIDEBUG if (GDKdebug & ESTIMASK)
416/* XPROPMASK not used anymore
417#define XPROPMASK (1<<23)
418#define XPROPDEBUG if (GDKdebug & XPROPMASK)
419*/
420
421#define NOSYNCMASK (1<<24)
422
423#define DEADBEEFMASK (1<<25)
424#define DEADBEEFCHK if (!(GDKdebug & DEADBEEFMASK))
425
426#define ALLOCMASK (1<<26)
427#define ALLOCDEBUG if (GDKdebug & ALLOCMASK)
428
429/* M5, only; cf.,
430 * monetdb5/mal/mal.h
431 */
432#define OPTMASK (1<<27)
433#define OPTDEBUG if (GDKdebug & OPTMASK)
434
435#define HEAPMASK (1<<28)
436#define HEAPDEBUG if (GDKdebug & HEAPMASK)
437
438#define FORCEMITOMASK (1<<29)
439#define FORCEMITODEBUG if (GDKdebug & FORCEMITOMASK)
440
441/*
442 * @- GDK session handling
443 * @multitable @columnfractions 0.08 0.7
444 * @item int
445 * @tab GDKinit (char *db, char *dbpath, int allocmap)
446 * @item int
447 * @tab GDKexit (int status)
448 * @end multitable
449 *
450 * The session is bracketed by GDKinit and GDKexit. Initialization
451 * involves setting up the administration for database access, such as
452 * memory allocation for the database buffer pool. During the exit
453 * phase any pending transaction is aborted and the database is freed
454 * for access by other users. A zero is returned upon encountering an
455 * erroneous situation.
456 *
457 * @- Definitions
458 * The interface definitions for the application programs are shown
459 * below. The global variables should not be modified directly.
460 */
461#ifndef TRUE
462#define TRUE true
463#define FALSE false
464#endif
465
466#define IDLENGTH 64 /* maximum BAT id length */
467#define BATMARGIN 1.2 /* extra free margin for new heaps */
468#define BATTINY_BITS 8
469#define BATTINY ((BUN)1<<BATTINY_BITS) /* minimum allocation buncnt for a BAT */
470
471enum {
472 TYPE_void = 0,
473 TYPE_bit,
474 TYPE_bte,
475 TYPE_sht,
476 TYPE_bat, /* BAT id: index in BBPcache */
477 TYPE_int,
478 TYPE_oid,
479 TYPE_ptr, /* C pointer! */
480 TYPE_flt,
481 TYPE_dbl,
482 TYPE_lng,
483#ifdef HAVE_HGE
484 TYPE_hge,
485#endif
486 TYPE_str,
487 TYPE_any = 255, /* limit types to <255! */
488};
489
490typedef int8_t bit;
491typedef int8_t bte;
492typedef int16_t sht;
493typedef int64_t lng;
494typedef uint64_t ulng;
495
496#define SIZEOF_OID SIZEOF_SIZE_T
497typedef size_t oid;
498#define OIDFMT "%zu"
499
500typedef int bat; /* Index into BBP */
501typedef void *ptr; /* Internal coding of types */
502
503#define SIZEOF_PTR SIZEOF_VOID_P
504typedef float flt;
505typedef double dbl;
506typedef char *str;
507
508#define SIZEOF_LNG 8
509#define LL_CONSTANT(val) INT64_C(val)
510#define LLFMT "%" PRId64
511#define ULLFMT "%" PRIu64
512#define LLSCN "%" SCNd64
513#define ULLSCN "%" SCNu64
514
515typedef oid var_t; /* type used for heap index of var-sized BAT */
516#define SIZEOF_VAR_T SIZEOF_OID
517#define VARFMT OIDFMT
518
519#if SIZEOF_VAR_T == SIZEOF_INT
520#define VAR_MAX ((var_t) INT_MAX)
521#else
522#define VAR_MAX ((var_t) INT64_MAX)
523#endif
524
525typedef oid BUN; /* BUN position */
526#define SIZEOF_BUN SIZEOF_OID
527#define BUNFMT OIDFMT
528/* alternatively:
529typedef size_t BUN;
530#define SIZEOF_BUN SIZEOF_SIZE_T
531#define BUNFMT "%zu"
532*/
533#if SIZEOF_BUN == SIZEOF_INT
534#define BUN_NONE ((BUN) INT_MAX)
535#else
536#define BUN_NONE ((BUN) INT64_MAX)
537#endif
538#define BUN_MAX (BUN_NONE - 1) /* maximum allowed size of a BAT */
539
540#define BUN2 2
541#define BUN4 4
542#if SIZEOF_BUN > 4
543#define BUN8 8
544#endif
545typedef uint16_t BUN2type;
546typedef uint32_t BUN4type;
547#if SIZEOF_BUN > 4
548typedef uint64_t BUN8type;
549#endif
550#define BUN2_NONE ((BUN2type) UINT16_C(0xFFFF))
551#define BUN4_NONE ((BUN4type) UINT32_C(0xFFFFFFFF))
552#if SIZEOF_BUN > 4
553#define BUN8_NONE ((BUN8type) UINT64_C(0xFFFFFFFFFFFFFFFF))
554#endif
555
556#include "gdk_atoms.h"
557
558/*
559 * @- Checking and Error definitions:
560 */
561typedef enum { GDK_FAIL, GDK_SUCCEED } gdk_return;
562
563#define ATOMextern(t) (ATOMstorage(t) >= TYPE_str)
564
565typedef enum {
566 PERSISTENT = 0,
567 TRANSIENT,
568} role_t;
569
570/* Heap storage modes */
571typedef enum {
572 STORE_MEM = 0, /* load into GDKmalloced memory */
573 STORE_MMAP = 1, /* mmap() into virtual memory */
574 STORE_PRIV = 2, /* BAT copy of copy-on-write mmap */
575 STORE_CMEM = 3, /* load into malloc (not GDKmalloc) memory*/
576 STORE_NOWN = 4, /* memory not owned by the BAT */
577 STORE_MMAPABS = 5, /* mmap() into virtual memory from an
578 * absolute path (not part of dbfarm) */
579 STORE_INVALID /* invalid value, used to indicate error */
580} storage_t;
581
582typedef struct {
583 size_t free; /* index where free area starts. */
584 size_t size; /* size of the heap (bytes) */
585 char *base; /* base pointer in memory. */
586 char filename[32]; /* file containing image of the heap */
587
588 bte farmid; /* id of farm where heap is located */
589 bool copied:1, /* a copy of an existing map. */
590 hashash:1, /* the string heap contains hash values */
591 cleanhash:1, /* string heaps must clean hash */
592 dirty:1; /* specific heap dirty marker */
593 storage_t storage; /* storage mode (mmap/malloc). */
594 storage_t newstorage; /* new desired storage mode at re-allocation. */
595 bat parentid; /* cache id of VIEW parent bat */
596} Heap;
597
598typedef struct {
599 int type; /* type of index entity */
600 int width; /* width of hash entries */
601 BUN nil; /* nil representation */
602 BUN lim; /* collision list size */
603 BUN mask; /* number of hash buckets-1 (power of 2) */
604 void *Hash; /* hash table */
605 void *Link; /* collision list */
606 Heap heap; /* heap where the hash is stored */
607} Hash;
608
609typedef struct Imprints Imprints;
610
611/*
612 * @+ Binary Association Tables
613 * Having gone to the previous preliminary definitions, we will now
614 * introduce the structure of Binary Association Tables (BATs) in
615 * detail. They are the basic storage unit on which GDK is modeled.
616 *
617 * The BAT holds an unlimited number of binary associations, called
618 * BUNs (@strong{Binary UNits}). The two attributes of a BUN are
619 * called @strong{head} (left) and @strong{tail} (right) in the
620 * remainder of this document.
621 *
622 * @c image{http://monetdb.cwi.nl/projects/monetdb-mk/imgs/bat1,,,,feps}
623 *
624 * The above figure shows what a BAT looks like. It consists of two
625 * columns, called head and tail, such that we have always binary
626 * tuples (BUNs). The overlooking structure is the @strong{BAT
627 * record}. It points to a heap structure called the @strong{BUN
628 * heap}. This heap contains the atomic values inside the two
629 * columns. If they are fixed-sized atoms, these atoms reside directly
630 * in the BUN heap. If they are variable-sized atoms (such as string
631 * or polygon), however, the columns has an extra heap for storing
632 * those (such @strong{variable-sized atom heaps} are then referred to
633 * as @strong{Head Heap}s and @strong{Tail Heap}s). The BUN heap then
634 * contains integer byte-offsets (fixed-sized, of course) into a head-
635 * or tail-heap.
636 *
637 * The BUN heap contains a contiguous range of BUNs. It starts after
638 * the @strong{first} pointer, and finishes at the end in the
639 * @strong{free} area of the BUN. All BUNs after the @strong{inserted}
640 * pointer have been added in the last transaction (and will be
641 * deleted on a transaction abort). All BUNs between the
642 * @strong{deleted} pointer and the @strong{first} have been deleted
643 * in this transaction (and will be reinserted at a transaction
644 * abort).
645 *
646 * The location of a certain BUN in a BAT may change between
647 * successive library routine invocations. Therefore, one should
648 * avoid keeping references into the BAT storage area for long
649 * periods.
650 *
651 * Passing values between the library routines and the enclosing C
652 * program is primarily through value pointers of type ptr. Pointers
653 * into the BAT storage area should only be used for retrieval. Direct
654 * updates of data stored in a BAT is forbidden. The user should
655 * adhere to the interface conventions to guarantee the integrity
656 * rules and to maintain the (hidden) auxiliary search structures.
657 *
658 * @- GDK variant record type
659 * When manipulating values, MonetDB puts them into value records.
660 * The built-in types have a direct entry in the union. Others should
661 * be represented as a pointer of memory in pval or as a string, which
662 * is basically the same. In such cases the len field indicates the
663 * size of this piece of memory.
664 */
665typedef struct {
666 union { /* storage is first in the record */
667 int ival;
668 oid oval;
669 sht shval;
670 bte btval;
671 flt fval;
672 ptr pval;
673 bat bval;
674 str sval;
675 dbl dval;
676 lng lval;
677#ifdef HAVE_HGE
678 hge hval;
679#endif
680 } val;
681 size_t len;
682 int vtype;
683} *ValPtr, ValRecord;
684
685/* interface definitions */
686gdk_export void *VALconvert(int typ, ValPtr t);
687gdk_export char *VALformat(const ValRecord *res);
688gdk_export ValPtr VALcopy(ValPtr dst, const ValRecord *src);
689gdk_export ValPtr VALinit(ValPtr d, int tpe, const void *s);
690gdk_export void VALempty(ValPtr v);
691gdk_export void VALclear(ValPtr v);
692gdk_export ValPtr VALset(ValPtr v, int t, void *p);
693gdk_export void *VALget(ValPtr v);
694gdk_export int VALcmp(const ValRecord *p, const ValRecord *q);
695gdk_export int VALisnil(const ValRecord *v);
696
697/*
698 * @- The BAT record
699 * The elements of the BAT structure are introduced in the remainder.
700 * Instead of using the underlying types hidden beneath it, one should
701 * use a @emph{BAT} type that is supposed to look like this:
702 * @verbatim
703 * typedef struct {
704 * // static BAT properties
705 * bat batCacheid; // bat id: index in BBPcache
706 * bool batTransient; // persistence mode
707 * bool batCopiedtodisk; // BAT is saved on disk?
708 * // dynamic BAT properties
709 * int batHeat; // heat of BAT in the BBP
710 * bool batDirtydesc; // BAT descriptor specific dirty flag
711 * Heap* batBuns; // Heap where the buns are stored
712 * // DELTA status
713 * BUN batInserted; // first inserted BUN
714 * BUN batCount; // Tuple count
715 * // Tail properties
716 * int ttype; // Tail type number
717 * str tident; // name for tail column
718 * bool tkey; // tail values are unique
719 * bool tunique; // tail values must be kept unique
720 * bool tnonil; // tail has no nils
721 * bool tsorted; // are tail values currently ordered?
722 * bool tvarsized; // for speed: tail type is varsized?
723 * // Tail storage
724 * int tloc; // byte-offset in BUN for tail elements
725 * Heap *theap; // heap for varsized tail values
726 * Hash *thash; // linear chained hash table on tail
727 * Imprints *timprints; // column imprints index on tail
728 * orderidx torderidx; // order oid index on tail
729 * } BAT;
730 * @end verbatim
731 *
732 * The internal structure of the @strong{BAT} record is in fact much
733 * more complex, but GDK programmers should refrain of making use of
734 * that.
735 *
736 * Since we don't want to pay cost to keep both views in line with
737 * each other under BAT updates, we work with shared pieces of memory
738 * between the two views. An update to one will thus automatically
739 * update the other. In the same line, we allow @strong{synchronized
740 * BATs} (BATs with identical head columns, and marked as such in the
741 * @strong{BAT Alignment} interface) now to be clustered horizontally.
742 *
743 * @c image{http://monetdb.cwi.nl/projects/monetdb-mk/imgs/bat2,,,,feps}
744 */
745
746typedef struct PROPrec PROPrec;
747
748/* see also comment near BATassertProps() for more information about
749 * the properties */
750typedef struct {
751 str id; /* label for column */
752
753 uint16_t width; /* byte-width of the atom array */
754 int8_t type; /* type id. */
755 uint8_t shift; /* log2 of bun width */
756 bool varsized:1, /* varsized/void (true) or fixedsized (false) */
757 key:1, /* no duplicate values present */
758 unique:1, /* no duplicate values allowed */
759 nonil:1, /* there are no nils in the column */
760 nil:1, /* there is a nil in the column */
761 sorted:1, /* column is sorted in ascending order */
762 revsorted:1; /* column is sorted in descending order */
763 BUN nokey[2]; /* positions that prove key==FALSE */
764 BUN nosorted; /* position that proves sorted==FALSE */
765 BUN norevsorted; /* position that proves revsorted==FALSE */
766 oid seq; /* start of dense sequence */
767
768 Heap heap; /* space for the column. */
769 Heap *vheap; /* space for the varsized data. */
770 Hash *hash; /* hash table */
771 Imprints *imprints; /* column imprints index */
772 Heap *orderidx; /* order oid index */
773
774 PROPrec *props; /* list of dynamic properties stored in the bat descriptor */
775} COLrec;
776
777#define ORDERIDXOFF 3
778
779/* assert that atom width is power of 2, i.e., width == 1<<shift */
780#define assert_shift_width(shift,width) assert(((shift) == 0 && (width) == 0) || ((unsigned)1<<(shift)) == (unsigned)(width))
781
782#define GDKLIBRARY_TALIGN 061036U /* talign field in BBP.dir */
783#define GDKLIBRARY_NIL_NAN 061037U /* flt/dbl NIL not represented by NaN */
784#define GDKLIBRARY_BLOB_SORT 061040U /* blob compare changed */
785#define GDKLIBRARY_OLDDATE 061041U
786#define GDKLIBRARY 061042U
787
788typedef struct BAT {
789 /* static bat properties */
790 bat batCacheid; /* index into BBP */
791 oid hseqbase; /* head seq base */
792
793 /* dynamic bat properties */
794 MT_Id creator_tid; /* which thread created it */
795 bool
796 batCopiedtodisk:1, /* once written */
797 batDirtyflushed:1, /* was dirty before commit started? */
798 batDirtydesc:1, /* bat descriptor dirty marker */
799 batTransient:1; /* should the BAT persist on disk? */
800 uint8_t /* adjacent bit fields are packed together (if they fit) */
801 batRestricted:2; /* access privileges */
802 role_t batRole; /* role of the bat */
803 uint16_t unused; /* value=0 for now (sneakily used by mat.c) */
804 int batSharecnt; /* incoming view count */
805
806 /* delta status administration */
807 BUN batInserted; /* start of inserted elements */
808 BUN batCount; /* tuple count */
809 BUN batCapacity; /* tuple capacity */
810
811 /* dynamic column properties */
812 COLrec T; /* column info */
813
814 MT_Lock batIdxLock; /* lock to manipulate indexes */
815} BAT;
816
817typedef struct BATiter {
818 BAT *b;
819 oid tvid;
820} BATiter;
821
822/* macros to hide complexity of the BAT structure */
823#define ttype T.type
824#define tkey T.key
825#define tunique T.unique
826#define tvarsized T.varsized
827#define tseqbase T.seq
828#define tsorted T.sorted
829#define trevsorted T.revsorted
830#define tident T.id
831#define torderidx T.orderidx
832#define twidth T.width
833#define tshift T.shift
834#define tnonil T.nonil
835#define tnil T.nil
836#define tnokey T.nokey
837#define tnosorted T.nosorted
838#define tnorevsorted T.norevsorted
839#define theap T.heap
840#define tvheap T.vheap
841#define thash T.hash
842#define timprints T.imprints
843#define tprops T.props
844
845
846
847/*
848 * @- Heap Management
849 * Heaps are the low-level entities of mass storage in
850 * BATs. Currently, they can either be stored on disk, loaded into
851 * memory, or memory mapped.
852 * @multitable @columnfractions 0.08 0.7
853 * @item int
854 * @tab
855 * HEAPalloc (Heap *h, size_t nitems, size_t itemsize);
856 * @item int
857 * @tab
858 * HEAPfree (Heap *h, bool remove);
859 * @item int
860 * @tab
861 * HEAPextend (Heap *h, size_t size, bool mayshare);
862 * @item int
863 * @tab
864 * HEAPload (Heap *h, str nme,ext, bool trunc);
865 * @item int
866 * @tab
867 * HEAPsave (Heap *h, str nme,ext);
868 * @item int
869 * @tab
870 * HEAPcopy (Heap *dst,*src);
871 * @item int
872 * @tab
873 * HEAPdelete (Heap *dst, str o, str ext);
874 * @item int
875 * @tab
876 * HEAPwarm (Heap *h);
877 * @end multitable
878 *
879 *
880 * These routines should be used to alloc free or extend heaps; they
881 * isolate you from the different ways heaps can be accessed.
882 */
883gdk_export gdk_return HEAPextend(Heap *h, size_t size, bool mayshare)
884 __attribute__((__warn_unused_result__));
885gdk_export size_t HEAPvmsize(Heap *h);
886gdk_export size_t HEAPmemsize(Heap *h);
887
888/*
889 * @- Internal HEAP Chunk Management
890 * Heaps are used in BATs to store data for variable-size atoms. The
891 * implementor must manage malloc()/free() functionality for atoms in
892 * this heap. A standard implementation is provided here.
893 *
894 * @table @code
895 * @item void
896 * HEAP_initialize (Heap* h, size_t nbytes, size_t nprivate, int align )
897 * @item void
898 * HEAP_destroy (Heap* h)
899 * @item var_t
900 * HEAP_malloc (Heap* heap, size_t nbytes)
901 * @item void
902 * HEAP_free (Heap *heap, var_t block)
903 * @item int
904 * HEAP_private (Heap* h)
905 * @item void
906 * HEAP_printstatus (Heap* h)
907 * @end table
908 *
909 * The heap space starts with a private space that is left untouched
910 * by the normal chunk allocation. You can use this private space
911 * e.g. to store the root of an rtree HEAP_malloc allocates a chunk of
912 * memory on the heap, and returns an index to it. HEAP_free frees a
913 * previously allocated chunk HEAP_private returns an integer index to
914 * private space.
915 */
916
917gdk_export void HEAP_initialize(
918 Heap *heap, /* nbytes -- Initial size of the heap. */
919 size_t nbytes, /* alignment -- for objects on the heap. */
920 size_t nprivate, /* nprivate -- Size of private space */
921 int alignment /* alignment restriction for allocated chunks */
922 );
923
924gdk_export var_t HEAP_malloc(Heap *heap, size_t nbytes);
925gdk_export void HEAP_free(Heap *heap, var_t block);
926
927/*
928 * @- BAT construction
929 * @multitable @columnfractions 0.08 0.7
930 * @item @code{BAT* }
931 * @tab COLnew (oid headseq, int tailtype, BUN cap, role_t role)
932 * @item @code{BAT* }
933 * @tab BATextend (BAT *b, BUN newcap)
934 * @end multitable
935 *
936 * A temporary BAT is instantiated using COLnew with the type aliases
937 * of the required binary association. The aliases include the
938 * built-in types, such as TYPE_int....TYPE_ptr, and the atomic types
939 * introduced by the user. The initial capacity to be accommodated
940 * within a BAT is indicated by cap. Their extend is automatically
941 * incremented upon storage overflow. Failure to create the BAT
942 * results in a NULL pointer.
943 *
944 * The routine BATclone creates an empty BAT storage area with the
945 * properties inherited from its argument.
946 */
947#define BATDELETE (-9999)
948
949gdk_export BAT *COLnew(oid hseq, int tltype, BUN capacity, role_t role)
950 __attribute__((__warn_unused_result__));
951gdk_export BAT *BATdense(oid hseq, oid tseq, BUN cnt)
952 __attribute__((__warn_unused_result__));
953gdk_export gdk_return BATextend(BAT *b, BUN newcap)
954 __attribute__((__warn_unused_result__));
955
956/* internal */
957gdk_export uint8_t ATOMelmshift(int sz);
958
959/*
960 * @- BUN manipulation
961 * @multitable @columnfractions 0.08 0.7
962 * @item BAT*
963 * @tab BATappend (BAT *b, BAT *n, BAT *s, bool force)
964 * @item BAT*
965 * @tab BUNappend (BAT *b, ptr right, bool force)
966 * @item BAT*
967 * @tab BUNreplace (BAT *b, oid left, ptr right, bool force)
968 * @item int
969 * @tab BUNfnd (BAT *b, ptr tail)
970 * @item BUN
971 * @tab BUNlocate (BAT *b, ptr head, ptr tail)
972 * @item ptr
973 * @tab BUNtail (BAT *b, BUN p)
974 * @end multitable
975 *
976 * The BATs contain a number of fixed-sized slots to store the binary
977 * associations. These slots are called BUNs or BAT units. A BUN
978 * variable is a pointer into the storage area of the BAT, but it has
979 * limited validity. After a BAT modification, previously obtained
980 * BUNs may no longer reside at the same location.
981 *
982 * The association list does not contain holes. This density permits
983 * users to quickly access successive elements without the need to
984 * test the items for validity. Moreover, it simplifies transport to
985 * disk and other systems. The negative effect is that the user should
986 * be aware of the evolving nature of the sequence, which may require
987 * copying the BAT first.
988 *
989 * The update operations come in two flavors: BUNappend and
990 * BUNreplace. The batch version of BUNappend is BATappend.
991 *
992 * The routine BUNfnd provides fast access to a single BUN providing a
993 * value for the tail of the binary association.
994 *
995 * The routine BUNtail returns a pointer to the second value in an
996 * association. To guard against side effects on the BAT, one should
997 * normally copy this value into a scratch variable for further
998 * processing.
999 *
1000 * Behind the interface we use several macros to access the BUN fixed
1001 * part and the variable part. The BUN operators always require a BAT
1002 * pointer and BUN identifier.
1003 * @itemize
1004 * @item
1005 * BATttype(b) finds out the type of a BAT.
1006 * @item
1007 * BUNlast(b) returns the BUN pointer directly after the last BUN
1008 * in the BAT.
1009 * @end itemize
1010 */
1011/* NOTE: `p' is evaluated after a possible upgrade of the heap */
1012#if SIZEOF_VAR_T == 8
1013#define Tputvalue(b, p, v, copyall) \
1014 do { \
1015 if ((b)->tvarsized && (b)->ttype) { \
1016 var_t _d; \
1017 void *_ptr; \
1018 ATOMputVAR((b)->ttype, (b)->tvheap, &_d, v); \
1019 if ((b)->twidth < SIZEOF_VAR_T && \
1020 ((b)->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 << (8 * (b)->twidth))) { \
1021 /* doesn't fit in current heap, upgrade it */ \
1022 if (GDKupgradevarheap((b), _d, (copyall), (b)->batRestricted == BAT_READ) != GDK_SUCCEED) \
1023 goto bunins_failed; \
1024 } \
1025 _ptr = (p); \
1026 switch ((b)->twidth) { \
1027 case 1: \
1028 * (uint8_t *) _ptr = (uint8_t) (_d - GDK_VAROFFSET); \
1029 break; \
1030 case 2: \
1031 * (uint16_t *) _ptr = (uint16_t) (_d - GDK_VAROFFSET); \
1032 break; \
1033 case 4: \
1034 * (uint32_t *) _ptr = (uint32_t) _d; \
1035 break; \
1036 case 8: \
1037 * (var_t *) _ptr = _d; \
1038 break; \
1039 } \
1040 } else { \
1041 ATOMputFIX((b)->ttype, (p), v); \
1042 } \
1043 } while (false)
1044#else
1045#define Tputvalue(b, p, v, copyall) \
1046 do { \
1047 if ((b)->tvarsized && (b)->ttype) { \
1048 var_t _d; \
1049 void *_ptr; \
1050 ATOMputVAR((b)->ttype, (b)->tvheap, &_d, v); \
1051 if ((b)->twidth < SIZEOF_VAR_T && \
1052 ((b)->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 << (8 * (b)->twidth))) { \
1053 /* doesn't fit in current heap, upgrade it */ \
1054 if (GDKupgradevarheap((b), _d, (copyall), (b)->batRestricted == BAT_READ) != GDK_SUCCEED) \
1055 goto bunins_failed; \
1056 } \
1057 _ptr = (p); \
1058 switch ((b)->twidth) { \
1059 case 1: \
1060 * (uint8_t *) _ptr = (uint8_t) (_d - GDK_VAROFFSET); \
1061 break; \
1062 case 2: \
1063 * (uint16_t *) _ptr = (uint16_t) (_d - GDK_VAROFFSET); \
1064 break; \
1065 case 4: \
1066 * (var_t *) _ptr = _d; \
1067 break; \
1068 } \
1069 } else { \
1070 ATOMputFIX((b)->ttype, (p), v); \
1071 } \
1072 } while (false)
1073#endif
1074#define tfastins_nocheck(b, p, v, s) \
1075 do { \
1076 (b)->theap.free += (s); \
1077 Tputvalue((b), Tloc((b), (p)), (v), false); \
1078 } while (false)
1079
1080#define bunfastapp_nocheck(b, p, v, ts) \
1081 do { \
1082 tfastins_nocheck(b, p, v, ts); \
1083 (b)->batCount++; \
1084 } while (false)
1085
1086#define bunfastapp(b, v) \
1087 do { \
1088 if (BATcount(b) >= BATcapacity(b)) { \
1089 if (BATcount(b) == BUN_MAX) { \
1090 GDKerror("bunfastapp: too many elements to accomodate (" BUNFMT ")\n", BUN_MAX); \
1091 goto bunins_failed; \
1092 } \
1093 if (BATextend((b), BATgrows(b)) != GDK_SUCCEED) \
1094 goto bunins_failed; \
1095 } \
1096 bunfastapp_nocheck(b, (b)->batCount, v, Tsize(b)); \
1097 } while (false)
1098
1099#define bunfastappTYPE(TYPE, b, v) \
1100 do { \
1101 if (BATcount(b) >= BATcapacity(b)) { \
1102 if (BATcount(b) == BUN_MAX) { \
1103 GDKerror("bunfastapp: too many elements to accomodate (" BUNFMT ")\n", BUN_MAX); \
1104 goto bunins_failed; \
1105 } \
1106 if (BATextend((b), BATgrows(b)) != GDK_SUCCEED) \
1107 goto bunins_failed; \
1108 } \
1109 (b)->theap.free += sizeof(TYPE); \
1110 ((TYPE *) (b)->theap.base)[(b)->batCount++] = * (const TYPE *) (v); \
1111 } while (false)
1112
1113#define tfastins_nocheckVAR(b, p, v, s) \
1114 do { \
1115 var_t _d; \
1116 (b)->theap.free += (s); \
1117 ATOMputVAR((b)->ttype, (b)->tvheap, &_d, v); \
1118 if ((b)->twidth < SIZEOF_VAR_T && \
1119 ((b)->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 << (8 * (b)->twidth))) { \
1120 /* doesn't fit in current heap, upgrade it */ \
1121 if (GDKupgradevarheap((b), _d, false, (b)->batRestricted == BAT_READ) != GDK_SUCCEED) \
1122 goto bunins_failed; \
1123 } \
1124 switch ((b)->twidth) { \
1125 case 1: \
1126 ((uint8_t *) (b)->theap.base)[p] = (uint8_t) (_d - GDK_VAROFFSET); \
1127 break; \
1128 case 2: \
1129 ((uint16_t *) (b)->theap.base)[p] = (uint16_t) (_d - GDK_VAROFFSET); \
1130 break; \
1131 case 4: \
1132 ((uint32_t *) (b)->theap.base)[p] = (uint32_t) _d; \
1133 break; \
1134 case 8: /* superfluous on 32-bit archs */ \
1135 ((uint64_t *) (b)->theap.base)[p] = (uint64_t) _d; \
1136 break; \
1137 } \
1138 } while (false)
1139
1140#define bunfastapp_nocheckVAR(b, p, v, ts) \
1141 do { \
1142 tfastins_nocheckVAR(b, p, v, ts); \
1143 (b)->batCount++; \
1144 } while (false)
1145
1146#define bunfastappVAR(b, v) \
1147 do { \
1148 if ((b)->batCount >= BATcapacity(b)) { \
1149 if ((b)->batCount == BUN_MAX || BATcount(b) == BUN_MAX) { \
1150 GDKerror("bunfastapp: too many elements to accomodate (" BUNFMT ")\n", BUN_MAX); \
1151 goto bunins_failed; \
1152 } \
1153 if (BATextend((b), BATgrows(b)) != GDK_SUCCEED) \
1154 goto bunins_failed; \
1155 } \
1156 bunfastapp_nocheckVAR(b, (b)->batCount, v, Tsize(b)); \
1157 } while (false)
1158
1159gdk_export gdk_return GDKupgradevarheap(BAT *b, var_t v, bool copyall, bool mayshare)
1160 __attribute__((__warn_unused_result__));
1161gdk_export gdk_return BUNappend(BAT *b, const void *right, bool force)
1162 __attribute__((__warn_unused_result__));
1163gdk_export gdk_return BATappend(BAT *b, BAT *n, BAT *s, bool force)
1164 __attribute__((__warn_unused_result__));
1165
1166gdk_export gdk_return BUNdelete(BAT *b, oid o)
1167 __attribute__((__warn_unused_result__));
1168gdk_export gdk_return BATdel(BAT *b, BAT *d)
1169 __attribute__((__warn_unused_result__));
1170
1171gdk_export gdk_return BUNinplace(BAT *b, BUN p, const void *right, bool force)
1172 __attribute__((__warn_unused_result__));
1173gdk_export gdk_return BATreplace(BAT *b, BAT *p, BAT *n, bool force)
1174 __attribute__((__warn_unused_result__));
1175
1176/* Functions to perform a binary search on a sorted BAT.
1177 * See gdk_search.c for details. */
1178gdk_export BUN SORTfnd(BAT *b, const void *v);
1179gdk_export BUN SORTfndfirst(BAT *b, const void *v);
1180gdk_export BUN SORTfndlast(BAT *b, const void *v);
1181
1182gdk_export BUN ORDERfnd(BAT *b, const void *v);
1183gdk_export BUN ORDERfndfirst(BAT *b, const void *v);
1184gdk_export BUN ORDERfndlast(BAT *b, const void *v);
1185
1186gdk_export BUN BUNfnd(BAT *b, const void *right);
1187
1188#define BUNfndVOID(b, v) \
1189 ((is_oid_nil(*(const oid*)(v)) ^ is_oid_nil((b)->tseqbase)) | \
1190 (*(const oid*)(v) < (b)->tseqbase) | \
1191 (*(const oid*)(v) >= (b)->tseqbase + (b)->batCount) ? \
1192 BUN_NONE : \
1193 (BUN) (*(const oid*)(v) - (b)->tseqbase))
1194
1195#define BATttype(b) (BATtdense(b) ? TYPE_oid : (b)->ttype)
1196#define Tbase(b) ((b)->tvheap->base)
1197
1198#define Tsize(b) ((b)->twidth)
1199
1200#define tailsize(b,p) ((b)->ttype?((size_t)(p))<<(b)->tshift:0)
1201
1202#define Tloc(b,p) ((void *)((b)->theap.base+(((size_t)(p))<<(b)->tshift)))
1203
1204typedef var_t stridx_t;
1205#define SIZEOF_STRIDX_T SIZEOF_VAR_T
1206#define GDK_VARALIGN SIZEOF_STRIDX_T
1207
1208#if SIZEOF_VAR_T == 8
1209#define VarHeapValRaw(b,p,w) \
1210 ((w) == 1 ? (var_t) ((uint8_t *) (b))[p] + GDK_VAROFFSET : \
1211 (w) == 2 ? (var_t) ((uint16_t *) (b))[p] + GDK_VAROFFSET : \
1212 (w) == 4 ? (var_t) ((uint32_t *) (b))[p] : \
1213 ((var_t *) (b))[p])
1214#else
1215#define VarHeapValRaw(b,p,w) \
1216 ((w) == 1 ? (var_t) ((uint8_t *) (b))[p] + GDK_VAROFFSET : \
1217 (w) == 2 ? (var_t) ((uint16_t *) (b))[p] + GDK_VAROFFSET : \
1218 ((var_t *) (b))[p])
1219#endif
1220#define VarHeapVal(b,p,w) ((size_t) VarHeapValRaw(b,p,w))
1221#define BUNtvaroff(bi,p) VarHeapVal((bi).b->theap.base, (p), (bi).b->twidth)
1222
1223#define BUNtloc(bi,p) Tloc((bi).b,p)
1224#define BUNtpos(bi,p) Tpos(&(bi),p)
1225#define BUNtvar(bi,p) (assert((bi).b->ttype && (bi).b->tvarsized), (void *) (Tbase((bi).b)+BUNtvaroff(bi,p)))
1226#define BUNtail(bi,p) ((bi).b->ttype?(bi).b->tvarsized?BUNtvar(bi,p):BUNtloc(bi,p):BUNtpos(bi,p))
1227
1228#define BUNlast(b) (assert((b)->batCount <= BUN_MAX), (b)->batCount)
1229
1230#define BATcount(b) ((b)->batCount)
1231
1232/* return the oid value at BUN position p from the (v)oid bat b
1233 * works with any TYPE_void or TYPE_oid bat */
1234static inline oid
1235BUNtoid(BAT *b, BUN p)
1236{
1237 assert(ATOMtype(b->ttype) == TYPE_oid);
1238 /* BATcount is the number of valid entries, so with
1239 * exceptions, the last value can well be larger than
1240 * b->tseqbase + BATcount(b) */
1241 assert(p < BATcount(b));
1242 assert(b->ttype == TYPE_void || b->tvheap == NULL);
1243 if (is_oid_nil(b->tseqbase)) {
1244 if (b->ttype == TYPE_void)
1245 return b->tseqbase;
1246 return ((const oid *) (b)->theap.base)[p];
1247 }
1248 oid o = b->tseqbase + p;
1249 if (b->ttype == TYPE_oid || b->tvheap == NULL) {
1250 return o;
1251 }
1252 /* only exceptions allowed on transient BATs */
1253 assert(b->batRole == TRANSIENT);
1254 /* make sure exception area is a reasonable size */
1255 assert(b->tvheap->free % SIZEOF_OID == 0);
1256 BUN nexc = (BUN) (b->tvheap->free / SIZEOF_OID);
1257 if (nexc == 0) {
1258 /* no exceptions (why the vheap?) */
1259 return o;
1260 }
1261 const oid *exc = (oid *) b->tvheap->base;
1262 if (o < exc[0])
1263 return o;
1264 if (o + nexc > exc[nexc - 1])
1265 return o + nexc;
1266 BUN lo = 0, hi = nexc - 1;
1267 while (hi - lo > 1) {
1268 BUN mid = (hi + lo) / 2;
1269 if (exc[mid] - mid > o)
1270 hi = mid;
1271 else
1272 lo = mid;
1273 }
1274 return o + hi;
1275}
1276
1277#define bat_iterator(_b) ((BATiter) {.b = (_b), .tvid = 0})
1278
1279/*
1280 * @- BAT properties
1281 * @multitable @columnfractions 0.08 0.7
1282 * @item BUN
1283 * @tab BATcount (BAT *b)
1284 * @item void
1285 * @tab BATsetcapacity (BAT *b, BUN cnt)
1286 * @item void
1287 * @tab BATsetcount (BAT *b, BUN cnt)
1288 * @item BAT *
1289 * @tab BATkey (BAT *b, bool onoff)
1290 * @item BAT *
1291 * @tab BATmode (BAT *b, bool transient)
1292 * @item BAT *
1293 * @tab BATsetaccess (BAT *b, restrict_t mode)
1294 * @item int
1295 * @tab BATdirty (BAT *b)
1296 * @item restrict_t
1297 * @tab BATgetaccess (BAT *b)
1298 * @end multitable
1299 *
1300 * The function BATcount returns the number of associations stored in
1301 * the BAT.
1302 *
1303 * The BAT is given a new logical name using BBPrename.
1304 *
1305 * The integrity properties to be maintained for the BAT are
1306 * controlled separately. A key property indicates that duplicates in
1307 * the association dimension are not permitted.
1308 *
1309 * The persistency indicator tells the retention period of BATs. The
1310 * system support two modes: PERSISTENT and TRANSIENT.
1311 * The PERSISTENT BATs are automatically saved upon session boundary
1312 * or transaction commit. TRANSIENT BATs are removed upon transaction
1313 * boundary. All BATs are initially TRANSIENT unless their mode is
1314 * changed using the routine BATmode.
1315 *
1316 * The BAT properties may be changed at any time using BATkey
1317 * and BATmode.
1318 *
1319 * Valid BAT access properties can be set with BATsetaccess and
1320 * BATgetaccess: BAT_READ, BAT_APPEND, and BAT_WRITE. BATs can be
1321 * designated to be read-only. In this case some memory optimizations
1322 * may be made (slice and fragment bats can point to stable subsets of
1323 * a parent bat). A special mode is append-only. It is then allowed
1324 * to insert BUNs at the end of the BAT, but not to modify anything
1325 * that already was in there.
1326 */
1327gdk_export BUN BATcount_no_nil(BAT *b);
1328gdk_export void BATsetcapacity(BAT *b, BUN cnt);
1329gdk_export void BATsetcount(BAT *b, BUN cnt);
1330gdk_export BUN BATgrows(BAT *b);
1331gdk_export gdk_return BATkey(BAT *b, bool onoff);
1332gdk_export gdk_return BATmode(BAT *b, bool transient);
1333gdk_export gdk_return BATroles(BAT *b, const char *tnme);
1334gdk_export void BAThseqbase(BAT *b, oid o);
1335gdk_export void BATtseqbase(BAT *b, oid o);
1336
1337/* The batRestricted field indicates whether a BAT is readonly.
1338 * we have modes: BAT_WRITE = all permitted
1339 * BAT_APPEND = append-only
1340 * BAT_READ = read-only
1341 * VIEW bats are always mapped read-only.
1342 */
1343typedef enum {
1344 BAT_WRITE, /* all kinds of access allowed */
1345 BAT_READ, /* only read-access allowed */
1346 BAT_APPEND, /* only reads and appends allowed */
1347} restrict_t;
1348
1349gdk_export gdk_return BATsetaccess(BAT *b, restrict_t mode);
1350gdk_export restrict_t BATgetaccess(BAT *b);
1351
1352
1353#define BATdirty(b) (!(b)->batCopiedtodisk || \
1354 (b)->batDirtydesc || \
1355 (b)->theap.dirty || \
1356 ((b)->tvheap != NULL && (b)->tvheap->dirty))
1357
1358#define BATcapacity(b) (b)->batCapacity
1359/*
1360 * @- BAT manipulation
1361 * @multitable @columnfractions 0.08 0.7
1362 * @item BAT *
1363 * @tab BATclear (BAT *b, bool force)
1364 * @item BAT *
1365 * @tab COLcopy (BAT *b, int tt, bool writeable, role_t role)
1366 * @end multitable
1367 *
1368 * The routine BATclear removes the binary associations, leading to an
1369 * empty, but (re-)initialized BAT. Its properties are retained. A
1370 * temporary copy is obtained with Colcopy. The new BAT has an unique
1371 * name.
1372 */
1373gdk_export gdk_return BATclear(BAT *b, bool force);
1374gdk_export BAT *COLcopy(BAT *b, int tt, bool writable, role_t role);
1375
1376gdk_export gdk_return BATgroup(BAT **groups, BAT **extents, BAT **histo, BAT *b, BAT *s, BAT *g, BAT *e, BAT *h)
1377 __attribute__((__warn_unused_result__));
1378
1379/*
1380 * @- BAT Input/Output
1381 * @multitable @columnfractions 0.08 0.7
1382 * @item BAT *
1383 * @tab BATload (str name)
1384 * @item BAT *
1385 * @tab BATsave (BAT *b)
1386 * @item int
1387 * @tab BATdelete (BAT *b)
1388 * @end multitable
1389 *
1390 * A BAT created by COLnew is considered temporary until one calls the
1391 * routine BATsave or BATmode. This routine reserves disk space and
1392 * checks for name clashes in the BAT directory. It also makes the BAT
1393 * persistent. The empty BAT is initially marked as ordered on both
1394 * columns.
1395 *
1396 * Failure to read or write the BAT results in a NULL, otherwise it
1397 * returns the BAT pointer.
1398 *
1399 * @- Heap Storage Modes
1400 * The discriminative storage modes are memory-mapped, compressed, or
1401 * loaded in memory. As can be seen in the bat record, each BAT has
1402 * one BUN-heap (@emph{bn}), and possibly two heaps (@emph{hh} and
1403 * @emph{th}) for variable-sized atoms.
1404 */
1405
1406gdk_export gdk_return BATsave(BAT *b)
1407 __attribute__((__warn_unused_result__));
1408gdk_export void BATmsync(BAT *b);
1409
1410#define NOFARM (-1) /* indicate to GDKfilepath to create relative path */
1411
1412gdk_export char *GDKfilepath(int farmid, const char *dir, const char *nme, const char *ext);
1413gdk_export bool GDKinmemory(void);
1414gdk_export gdk_return GDKcreatedir(const char *nme);
1415
1416gdk_export void OIDXdestroy(BAT *b);
1417
1418/*
1419 * @- Printing
1420 * @multitable @columnfractions 0.08 0.7
1421 * @item int
1422 * @tab BATprintcolumns (stream *f, int argc, BAT *b[]);
1423 * @end multitable
1424 *
1425 * The functions to convert BATs into ASCII. They are primarily meant for ease of
1426 * debugging and to a lesser extent for output processing. Printing a
1427 * BAT is done essentially by looping through its components, printing
1428 * each association.
1429 *
1430 */
1431gdk_export gdk_return BATprintcolumns(stream *s, int argc, BAT *argv[]);
1432gdk_export gdk_return BATprint(stream *s, BAT *b);
1433
1434/*
1435 * @- BAT clustering
1436 * @multitable @columnfractions 0.08 0.7
1437 * @item bool
1438 * @tab BATordered (BAT *b)
1439 * @end multitable
1440 *
1441 * When working in a main-memory situation, clustering of data on
1442 * disk-pages is not important. Whenever mmap()-ed data is used
1443 * intensively, reducing the number of page faults is a hot issue.
1444 *
1445 * The above functions rearrange data in MonetDB heaps (used for
1446 * storing BUNs var-sized atoms, or accelerators). Applying these
1447 * clusterings will allow that MonetDB's main-memory oriented
1448 * algorithms work efficiently also in a disk-oriented context.
1449 *
1450 * BATordered starts a check on the tail values to see if they are
1451 * ordered. The result is returned and stored in the tsorted field of
1452 * the BAT.
1453 */
1454gdk_export bool BATkeyed(BAT *b);
1455gdk_export bool BATordered(BAT *b);
1456gdk_export bool BATordered_rev(BAT *b);
1457gdk_export gdk_return BATsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
1458 __attribute__((__warn_unused_result__));
1459
1460
1461gdk_export void GDKqsort(void *restrict h, void *restrict t, const void *restrict base, size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast);
1462
1463#define BATtordered(b) ((b)->tsorted)
1464#define BATtrevordered(b) ((b)->trevsorted)
1465/* BAT is dense (i.e., BATtvoid() is true and tseqbase is not NIL) */
1466#define BATtdense(b) (!is_oid_nil((b)->tseqbase) && \
1467 ((b)->tvheap == NULL || (b)->tvheap->free == 0))
1468/* BATtvoid: BAT can be (or actually is) represented by TYPE_void */
1469#define BATtvoid(b) (BATtdense(b) || (b)->ttype==TYPE_void)
1470#define BATtkey(b) ((b)->tkey || BATtdense(b))
1471
1472/* set some properties that are trivial to deduce */
1473#define BATsettrivprop(b) \
1474 do { \
1475 assert(!is_oid_nil((b)->hseqbase)); \
1476 (b)->batDirtydesc = true; /* likely already set */ \
1477 assert(is_oid_nil((b)->tseqbase) || \
1478 ATOMtype((b)->ttype) == TYPE_oid); \
1479 if ((b)->ttype == TYPE_void) { \
1480 if (is_oid_nil((b)->tseqbase)) { \
1481 (b)->tnonil = (b)->batCount == 0; \
1482 (b)->tnil = !(b)->tnonil; \
1483 (b)->trevsorted = true; \
1484 (b)->tkey = (b)->batCount <= 1; \
1485 } else { \
1486 (b)->tnonil = true; \
1487 (b)->tnil = false; \
1488 (b)->tkey = true; \
1489 (b)->trevsorted = (b)->batCount <= 1; \
1490 } \
1491 (b)->tsorted = true; \
1492 } else if ((b)->batCount <= 1) { \
1493 if (ATOMlinear((b)->ttype)) { \
1494 (b)->tsorted = true; \
1495 (b)->trevsorted = true; \
1496 } \
1497 (b)->tkey = true; \
1498 if ((b)->batCount == 0) { \
1499 (b)->tnonil = true; \
1500 (b)->tnil = false; \
1501 if ((b)->ttype == TYPE_oid) { \
1502 (b)->tseqbase = 0; \
1503 } \
1504 } else if ((b)->ttype == TYPE_oid) { \
1505 /* b->batCount == 1 */ \
1506 oid sqbs = ((const oid *) (b)->theap.base)[0]; \
1507 if (is_oid_nil(sqbs)) { \
1508 (b)->tnonil = false; \
1509 (b)->tnil = true; \
1510 } else { \
1511 (b)->tnonil = true; \
1512 (b)->tnil = false; \
1513 } \
1514 (b)->tseqbase = sqbs; \
1515 } \
1516 } else if ((b)->batCount == 2 && ATOMlinear((b)->ttype)) { \
1517 int _c_; \
1518 if ((b)->tvarsized) \
1519 _c_ = ATOMcmp((b)->ttype, \
1520 Tbase(b) + VarHeapVal((b)->theap.base, 0, (b)->twidth), \
1521 Tbase(b) + VarHeapVal((b)->theap.base, 1, (b)->twidth)); \
1522 else \
1523 _c_ = ATOMcmp((b)->ttype, \
1524 Tloc((b), 0), \
1525 Tloc((b), 1)); \
1526 (b)->tsorted = _c_ <= 0; \
1527 (b)->tnosorted = !(b)->tsorted; \
1528 (b)->trevsorted = _c_ >= 0; \
1529 (b)->tnorevsorted = !(b)->trevsorted; \
1530 (b)->tkey = _c_ != 0; \
1531 (b)->tnokey[0] = 0; \
1532 (b)->tnokey[1] = !(b)->tkey; \
1533 } else if (!ATOMlinear((b)->ttype)) { \
1534 (b)->tsorted = false; \
1535 (b)->trevsorted = false; \
1536 } \
1537 } while (false)
1538
1539/*
1540 * @+ BAT Buffer Pool
1541 * @multitable @columnfractions 0.08 0.7
1542 * @item int
1543 * @tab BBPfix (bat bi)
1544 * @item int
1545 * @tab BBPunfix (bat bi)
1546 * @item int
1547 * @tab BBPretain (bat bi)
1548 * @item int
1549 * @tab BBPrelease (bat bi)
1550 * @item str
1551 * @tab BBPname (bat bi)
1552 * @item bat
1553 * @tab BBPindex (str nme)
1554 * @item BAT*
1555 * @tab BATdescriptor (bat bi)
1556 * @end multitable
1557 *
1558 * The BAT Buffer Pool module contains the code to manage the storage
1559 * location of BATs.
1560 *
1561 * The remaining BBP tables contain status information to load, swap
1562 * and migrate the BATs. The core table is BBPcache which contains a
1563 * pointer to the BAT descriptor with its heaps. A zero entry means
1564 * that the file resides on disk. Otherwise it has been read or mapped
1565 * into memory.
1566 *
1567 * BATs loaded into memory are retained in a BAT buffer pool. They
1568 * retain their position within the cache during their life cycle,
1569 * which make indexing BATs a stable operation.
1570 *
1571 * The BBPindex routine checks if a BAT with a certain name is
1572 * registered in the buffer pools. If so, it returns its BAT id. The
1573 * BATdescriptor routine has a BAT id parameter, and returns a pointer
1574 * to the corresponding BAT record (after incrementing the reference
1575 * count). The BAT will be loaded into memory, if necessary.
1576 *
1577 * The structure of the BBP file obeys the tuple format for GDK.
1578 *
1579 * The status and BAT persistency information is encoded in the status
1580 * field.
1581 */
1582typedef struct {
1583 BAT *cache; /* if loaded: BAT* handle */
1584 char *logical; /* logical name (may point at bak) */
1585 char bak[16]; /* logical name backup (tmp_%o) */
1586 bat next; /* next BBP slot in linked list */
1587 BAT *desc; /* the BAT descriptor */
1588 char physical[20]; /* dir + basename for storage */
1589 str options; /* A string list of options */
1590 int refs; /* in-memory references on which the loaded status of a BAT relies */
1591 int lrefs; /* logical references on which the existence of a BAT relies */
1592 volatile unsigned status; /* status mask used for spin locking */
1593 /* MT_Id pid; non-zero thread-id if this BAT is private */
1594} BBPrec;
1595
1596gdk_export bat BBPlimit;
1597#define N_BBPINIT 1000
1598#if SIZEOF_VOID_P == 4
1599#define BBPINITLOG 11
1600#else
1601#define BBPINITLOG 14
1602#endif
1603#define BBPINIT (1 << BBPINITLOG)
1604/* absolute maximum number of BATs is N_BBPINIT * BBPINIT
1605 * this also gives the longest possible "physical" name and "bak" name
1606 * of a BAT: the "bak" name is "tmp_%o", so at most 12 + \0 bytes on
1607 * 64 bit architecture and 11 + \0 on 32 bit architecture; the
1608 * physical name is a bit more complicated, but the longest possible
1609 * name is 17 + \0 bytes (16 + \0 on 32 bits) */
1610gdk_export BBPrec *BBP[N_BBPINIT];
1611
1612/* fast defines without checks; internal use only */
1613#define BBP_cache(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].cache
1614#define BBP_logical(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].logical
1615#define BBP_bak(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].bak
1616#define BBP_next(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].next
1617#define BBP_physical(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].physical
1618#define BBP_options(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].options
1619#define BBP_desc(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].desc
1620#define BBP_refs(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].refs
1621#define BBP_lrefs(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].lrefs
1622#define BBP_status(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].status
1623#define BBP_pid(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].pid
1624
1625/* macros that nicely check parameters */
1626#define BBPstatus(i) (BBPcheck((i),"BBPstatus")?BBP_status(i):0)
1627#define BBPrefs(i) (BBPcheck((i),"BBPrefs")?BBP_refs(i):-1)
1628#define BBPcache(i) (BBPcheck((i),"BBPcache")?BBP_cache(i):(BAT*) NULL)
1629#define BBPname(i) \
1630 (BBPcheck((i), "BBPname") ? \
1631 BBP[(i) >> BBPINITLOG][(i) & (BBPINIT - 1)].logical : \
1632 "")
1633#define BBPvalid(i) (BBP_logical(i) != NULL && *BBP_logical(i) != '.')
1634#define BATgetId(b) BBPname((b)->batCacheid)
1635
1636#define BBPRENAME_ALREADY (-1)
1637#define BBPRENAME_ILLEGAL (-2)
1638#define BBPRENAME_LONG (-3)
1639
1640gdk_export void BBPlock(void);
1641
1642gdk_export void BBPunlock(void);
1643
1644gdk_export BAT *BBPquickdesc(bat b, bool delaccess);
1645
1646/*
1647 * @+ GDK Extensibility
1648 * GDK can be extended with new atoms, search accelerators and storage
1649 * modes.
1650 *
1651 * @- Atomic Type Descriptors
1652 * The atomic types over which the binary associations are maintained
1653 * are described by an atom descriptor.
1654 * @multitable @columnfractions 0.08 0.7
1655 * @item void
1656 * @tab ATOMallocate (str nme);
1657 * @item int
1658 * @tab ATOMindex (char *nme);
1659 * @item int
1660 * @tab ATOMdump ();
1661 * @item void
1662 * @tab ATOMdelete (int id);
1663 * @item str
1664 * @tab ATOMname (int id);
1665 * @item unsigned int
1666 * @tab ATOMsize (int id);
1667 * @item int
1668 * @tab ATOMvarsized (int id);
1669 * @item ptr
1670 * @tab ATOMnilptr (int id);
1671 * @item ssize_t
1672 * @tab ATOMfromstr (int id, str s, size_t* len, ptr* v_dst);
1673 * @item ssize_t
1674 * @tab ATOMtostr (int id, str s, size_t* len, ptr* v_dst);
1675 * @item hash_t
1676 * @tab ATOMhash (int id, ptr val, in mask);
1677 * @item int
1678 * @tab ATOMcmp (int id, ptr val_1, ptr val_2);
1679 * @item int
1680 * @tab ATOMfix (int id, ptr v);
1681 * @item int
1682 * @tab ATOMunfix (int id, ptr v);
1683 * @item int
1684 * @tab ATOMheap (int id, Heap *hp, size_t cap);
1685 * @item int
1686 * @tab ATOMput (int id, Heap *hp, BUN pos_dst, ptr val_src);
1687 * @item int
1688 * @tab ATOMdel (int id, Heap *hp, BUN v_src);
1689 * @item size_t
1690 * @tab ATOMlen (int id, ptr val);
1691 * @item ptr
1692 * @tab ATOMnil (int id);
1693 * @item ssize_t
1694 * @tab ATOMformat (int id, ptr val, char** buf);
1695 * @item int
1696 * @tab ATOMprint (int id, ptr val, stream *fd);
1697 * @item ptr
1698 * @tab ATOMdup (int id, ptr val );
1699 * @end multitable
1700 *
1701 * @- Atom Definition
1702 * User defined atomic types can be added to a running system with the
1703 * following interface:.
1704 *
1705 * @itemize
1706 * @item @emph{ATOMallocate()} registers a new atom definition if
1707 * there is no atom registered yet under that name.
1708 *
1709 * @item @emph{ATOMdelete()} unregisters an atom definition.
1710 *
1711 * @item @emph{ATOMindex()} looks up the atom descriptor with a certain name.
1712 * @end itemize
1713 *
1714 * @- Atom Manipulation
1715 *
1716 * @itemize
1717 * @item The @emph{ATOMname()} operation retrieves the name of an atom
1718 * using its id.
1719 *
1720 * @item The @emph{ATOMsize()} operation returns the atoms fixed size.
1721 *
1722 * @item The @emph{ATOMnilptr()} operation returns a pointer to the
1723 * nil-value of an atom. We usually take one dedicated value halfway
1724 * down the negative extreme of the atom range (if such a concept
1725 * fits), as the nil value.
1726 *
1727 * @item The @emph{ATOMnil()} operation returns a copy of the nil
1728 * value, allocated with GDKmalloc().
1729 *
1730 * @item The @emph{ATOMheap()} operation creates a new var-sized atom
1731 * heap in 'hp' with capacity 'cap'.
1732 *
1733 * @item The @emph{ATOMhash()} computes a hash index for a
1734 * value. `val' is a direct pointer to the atom value. Its return
1735 * value should be an hash_t between 0 and 'mask'.
1736 *
1737 * @item The @emph{ATOMcmp()} operation compares two atomic
1738 * values. Its parameters are pointers to atomic values.
1739 *
1740 * @item The @emph{ATOMlen()} operation computes the byte length for a
1741 * value. `val' is a direct pointer to the atom value. Its return
1742 * value should be an integer between 0 and 'mask'.
1743 *
1744 * @item The @emph{ATOMdel()} operation deletes a var-sized atom from
1745 * its heap `hp'. The integer byte-index of this value in the heap is
1746 * pointed to by `val_src'.
1747 *
1748 * @item The @emph{ATOMput()} operation inserts an atom `src_val' in a
1749 * BUN at `dst_pos'. This involves copying the fixed sized part in the
1750 * BUN. In case of a var-sized atom, this fixed sized part is an
1751 * integer byte-index into a heap of var-sized atoms. The atom is then
1752 * also copied into that heap `hp'.
1753 *
1754 * @item The @emph{ATOMfix()} and @emph{ATOMunfix()} operations do
1755 * bookkeeping on the number of references that a GDK application
1756 * maintains to the atom. In MonetDB, we use this to count the number
1757 * of references directly, or through BATs that have columns of these
1758 * atoms. The only operator for which this is currently relevant is
1759 * BAT. The operators return the POST reference count to the
1760 * atom. BATs with fixable atoms may not be stored persistently.
1761 *
1762 * @item The @emph{ATOMfromstr()} parses an atom value from string
1763 * `s'. The memory allocation policy is the same as in
1764 * @emph{ATOMget()}. The return value is the number of parsed
1765 * characters or -1 on failure. Also in case of failure, the output
1766 * parameter buf is a valid pointer or NULL.
1767 *
1768 * @item The @emph{ATOMprint()} prints an ASCII description of the
1769 * atom value pointed to by `val' on file descriptor `fd'. The return
1770 * value is the number of parsed characters.
1771 *
1772 * @item The @emph{ATOMformat()} is similar to @emph{ATOMprint()}. It
1773 * prints an atom on a newly allocated string. It must later be freed
1774 * with @strong{GDKfree}. The number of characters written is
1775 * returned. This is minimally the size of the allocated buffer.
1776 *
1777 * @item The @emph{ATOMdup()} makes a copy of the given atom. The
1778 * storage needed for this is allocated and should be removed by the
1779 * user.
1780 * @end itemize
1781 *
1782 * These wrapper functions correspond closely to the interface
1783 * functions one has to provide for a user-defined atom. They
1784 * basically (with exception of @emph{ATOMput()}, @emph{ATOMprint()}
1785 * and @emph{ATOMformat()}) just have the atom id parameter prepended
1786 * to them.
1787 */
1788
1789/* atomFromStr returns the number of bytes of the input string that
1790 * were processed. atomToStr returns the length of the string
1791 * produced. Both functions return -1 on (any kind of) failure. If
1792 * *dst is not NULL, *len specifies the available space. If there is
1793 * not enough space, or if *dst is NULL, *dst will be freed (if not
1794 * NULL) and a new buffer will be allocated and returned in *dst.
1795 * *len will be set to reflect the actual size allocated. If
1796 * allocation fails, *dst will be NULL on return and *len is
1797 * undefined. In any case, if the function returns, *buf is either
1798 * NULL or a valid pointer and then *len is the size of the area *buf
1799 * points to. */
1800
1801typedef struct {
1802 /* simple attributes */
1803 char name[IDLENGTH];
1804 uint8_t storage; /* stored as another type? */
1805 bool linear; /* atom can be ordered linearly */
1806 uint16_t size; /* fixed size of atom */
1807
1808 /* automatically generated fields */
1809 const void *atomNull; /* global nil value */
1810
1811 /* generic (fixed + varsized atom) ADT functions */
1812 ssize_t (*atomFromStr) (const char *src, size_t *len, void **dst, bool external);
1813 ssize_t (*atomToStr) (char **dst, size_t *len, const void *src, bool external);
1814 void *(*atomRead) (void *dst, stream *s, size_t cnt);
1815 gdk_return (*atomWrite) (const void *src, stream *s, size_t cnt);
1816 int (*atomCmp) (const void *v1, const void *v2);
1817 BUN (*atomHash) (const void *v);
1818 /* optional functions */
1819 int (*atomFix) (const void *atom);
1820 int (*atomUnfix) (const void *atom);
1821
1822 /* varsized atom-only ADT functions */
1823 var_t (*atomPut) (Heap *, var_t *off, const void *src);
1824 void (*atomDel) (Heap *, var_t *atom);
1825 size_t (*atomLen) (const void *atom);
1826 void (*atomHeap) (Heap *, size_t);
1827} atomDesc;
1828
1829gdk_export atomDesc BATatoms[];
1830gdk_export int GDKatomcnt;
1831
1832gdk_export int ATOMallocate(const char *nme);
1833gdk_export int ATOMindex(const char *nme);
1834
1835gdk_export str ATOMname(int id);
1836gdk_export size_t ATOMlen(int id, const void *v);
1837gdk_export void *ATOMnil(int id);
1838gdk_export int ATOMprint(int id, const void *val, stream *fd);
1839gdk_export char *ATOMformat(int id, const void *val);
1840
1841gdk_export void *ATOMdup(int id, const void *val);
1842
1843/*
1844 * @- Built-in Accelerator Functions
1845 *
1846 * @multitable @columnfractions 0.08 0.7
1847 * @item BAT*
1848 * @tab
1849 * BAThash (BAT *b)
1850 * @end multitable
1851 *
1852 * The current BAT implementation supports three search accelerators:
1853 * hashing, imprints, and ordered index.
1854 *
1855 * The routine BAThash makes sure that a hash accelerator on the tail of the
1856 * BAT exists. GDK_FAIL is returned upon failure to create the supportive
1857 * structures.
1858 */
1859gdk_export gdk_return BAThash(BAT *b);
1860
1861/*
1862 * @- Column Imprints Functions
1863 *
1864 * @multitable @columnfractions 0.08 0.7
1865 * @item BAT*
1866 * @tab
1867 * BATimprints (BAT *b)
1868 * @end multitable
1869 *
1870 * The column imprints index structure.
1871 *
1872 */
1873
1874gdk_export gdk_return BATimprints(BAT *b);
1875gdk_export void IMPSdestroy(BAT *b);
1876gdk_export lng IMPSimprintsize(BAT *b);
1877
1878/* The ordered index structure */
1879
1880gdk_export gdk_return BATorderidx(BAT *b, bool stable);
1881gdk_export gdk_return GDKmergeidx(BAT *b, BAT**a, int n_ar);
1882gdk_export bool BATcheckorderidx(BAT *b);
1883
1884/*
1885 * @- Multilevel Storage Modes
1886 *
1887 * We should bring in the compressed mode as the first, maybe
1888 * built-in, mode. We could then add for instance HTTP remote storage,
1889 * SQL storage, and READONLY (cd-rom) storage.
1890 *
1891 * @+ GDK Utilities
1892 * Interfaces for memory management, error handling, thread management
1893 * and system information.
1894 *
1895 * @- GDK memory management
1896 * @multitable @columnfractions 0.08 0.8
1897 * @item void*
1898 * @tab GDKmalloc (size_t size)
1899 * @item void*
1900 * @tab GDKzalloc (size_t size)
1901 * @item void*
1902 * @tab GDKmallocmax (size_t size, size_t *maxsize, int emergency)
1903 * @item void*
1904 * @tab GDKrealloc (void* pold, size_t size)
1905 * @item void*
1906 * @tab GDKreallocmax (void* pold, size_t size, size_t *maxsize, int emergency)
1907 * @item void
1908 * @tab GDKfree (void* blk)
1909 * @item str
1910 * @tab GDKstrdup (str s)
1911 * @item str
1912 * @tab GDKstrndup (str s, size_t n)
1913 * @end multitable
1914 *
1915 * These utilities are primarily used to maintain control over
1916 * critical interfaces to the C library. Moreover, the statistic
1917 * routines help in identifying performance and bottlenecks in the
1918 * current implementation.
1919 *
1920 * Compiled with -DMEMLEAKS the GDK memory management log their
1921 * activities, and are checked on inconsistent frees and memory leaks.
1922 */
1923
1924/* we prefer to use vm_alloc routines on size > GDKmmap */
1925gdk_export void *GDKmmap(const char *path, int mode, size_t len);
1926
1927gdk_export size_t GDK_mem_maxsize; /* max allowed size of committed memory */
1928gdk_export size_t GDK_vm_maxsize; /* max allowed size of reserved vm */
1929
1930gdk_export size_t GDKmem_cursize(void); /* RAM/swapmem that MonetDB has claimed from OS */
1931gdk_export size_t GDKvm_cursize(void); /* current MonetDB VM address space usage */
1932
1933gdk_export void *GDKmalloc(size_t size)
1934 __attribute__((__malloc__))
1935 __attribute__((__alloc_size__(1)))
1936 __attribute__((__warn_unused_result__));
1937gdk_export void *GDKzalloc(size_t size)
1938 __attribute__((__malloc__))
1939 __attribute__((__alloc_size__(1)))
1940 __attribute__((__warn_unused_result__));
1941gdk_export void *GDKrealloc(void *pold, size_t size)
1942 __attribute__((__alloc_size__(2)))
1943 __attribute__((__warn_unused_result__));
1944gdk_export void GDKfree(void *blk);
1945gdk_export str GDKstrdup(const char *s)
1946 __attribute__((__warn_unused_result__));
1947gdk_export str GDKstrndup(const char *s, size_t n)
1948 __attribute__((__warn_unused_result__));
1949
1950#if !defined(NDEBUG) && !defined(STATIC_CODE_ANALYSIS)
1951/* In debugging mode, replace GDKmalloc and other functions with a
1952 * version that optionally prints calling information.
1953 *
1954 * We have two versions of this code: one using a GNU C extension, and
1955 * one using traditional C. The GNU C version also prints the name of
1956 * the calling function.
1957 */
1958#ifdef __GNUC__
1959#define GDKmalloc(s) \
1960 ({ \
1961 size_t _size = (s); \
1962 void *_res = GDKmalloc(_size); \
1963 ALLOCDEBUG \
1964 fprintf(stderr, \
1965 "#GDKmalloc(%zu) -> %p" \
1966 " %s[%s:%d]\n", \
1967 _size, _res, \
1968 __func__, __FILE__, __LINE__); \
1969 _res; \
1970 })
1971#define GDKzalloc(s) \
1972 ({ \
1973 size_t _size = (s); \
1974 void *_res = GDKzalloc(_size); \
1975 ALLOCDEBUG \
1976 fprintf(stderr, \
1977 "#GDKzalloc(%zu) -> %p" \
1978 " %s[%s:%d]\n", \
1979 _size, _res, \
1980 __func__, __FILE__, __LINE__); \
1981 _res; \
1982 })
1983#define GDKrealloc(p, s) \
1984 ({ \
1985 void *_ptr = (p); \
1986 size_t _size = (s); \
1987 void *_res = GDKrealloc(_ptr, _size); \
1988 ALLOCDEBUG \
1989 fprintf(stderr, \
1990 "#GDKrealloc(%p,%zu) -> %p" \
1991 " %s[%s:%d]\n", \
1992 _ptr, _size, _res, \
1993 __func__, __FILE__, __LINE__); \
1994 _res; \
1995 })
1996#define GDKfree(p) \
1997 ({ \
1998 void *_ptr = (p); \
1999 ALLOCDEBUG if (_ptr) \
2000 fprintf(stderr, \
2001 "#GDKfree(%p)" \
2002 " %s[%s:%d]\n", \
2003 _ptr, \
2004 __func__, __FILE__, __LINE__); \
2005 GDKfree(_ptr); \
2006 })
2007#define GDKstrdup(s) \
2008 ({ \
2009 const char *_str = (s); \
2010 void *_res = GDKstrdup(_str); \
2011 ALLOCDEBUG \
2012 fprintf(stderr, \
2013 "#GDKstrdup(len=%zu) -> %p" \
2014 " %s[%s:%d]\n", \
2015 _str ? strlen(_str) : 0, \
2016 _res, \
2017 __func__, __FILE__, __LINE__); \
2018 _res; \
2019 })
2020#define GDKstrndup(s, n) \
2021 ({ \
2022 const char *_str = (s); \
2023 size_t _n = (n); \
2024 void *_res = GDKstrndup(_str, _n); \
2025 ALLOCDEBUG \
2026 fprintf(stderr, \
2027 "#GDKstrndup(len=%zu) -> %p" \
2028 " %s[%s:%d]\n", \
2029 _n, \
2030 _res, \
2031 __func__, __FILE__, __LINE__); \
2032 _res; \
2033 })
2034#define GDKmmap(p, m, l) \
2035 ({ \
2036 const char *_path = (p); \
2037 int _mode = (m); \
2038 size_t _len = (l); \
2039 void *_res = GDKmmap(_path, _mode, _len); \
2040 ALLOCDEBUG \
2041 fprintf(stderr, \
2042 "#GDKmmap(%s,0x%x,%zu) -> %p" \
2043 " %s[%s:%d]\n", \
2044 _path ? _path : "NULL", \
2045 (unsigned) _mode, _len, \
2046 _res, \
2047 __func__, __FILE__, __LINE__); \
2048 _res; \
2049 })
2050#define malloc(s) \
2051 ({ \
2052 size_t _size = (s); \
2053 void *_res = malloc(_size); \
2054 ALLOCDEBUG \
2055 fprintf(stderr, \
2056 "#malloc(%zu) -> %p" \
2057 " %s[%s:%d]\n", \
2058 _size, _res, \
2059 __func__, __FILE__, __LINE__); \
2060 _res; \
2061 })
2062#define calloc(n, s) \
2063 ({ \
2064 size_t _nmemb = (n); \
2065 size_t _size = (s); \
2066 void *_res = calloc(_nmemb,_size); \
2067 ALLOCDEBUG \
2068 fprintf(stderr, \
2069 "#calloc(%zu,%zu) -> %p" \
2070 " %s[%s:%d]\n", \
2071 _nmemb, _size, _res, \
2072 __func__, __FILE__, __LINE__); \
2073 _res; \
2074 })
2075#define realloc(p, s) \
2076 ({ \
2077 void *_ptr = (p); \
2078 size_t _size = (s); \
2079 void *_res = realloc(_ptr, _size); \
2080 ALLOCDEBUG \
2081 fprintf(stderr, \
2082 "#realloc(%p,%zu) -> %p" \
2083 " %s[%s:%d]\n", \
2084 _ptr, _size, _res, \
2085 __func__, __FILE__, __LINE__); \
2086 _res; \
2087 })
2088#define free(p) \
2089 ({ \
2090 void *_ptr = (p); \
2091 ALLOCDEBUG \
2092 fprintf(stderr, \
2093 "#free(%p)" \
2094 " %s[%s:%d]\n", \
2095 _ptr, \
2096 __func__, __FILE__, __LINE__); \
2097 free(_ptr); \
2098 })
2099#else
2100static inline void *
2101GDKmalloc_debug(size_t size, const char *filename, int lineno)
2102{
2103 void *res = GDKmalloc(size);
2104 ALLOCDEBUG fprintf(stderr,
2105 "#GDKmalloc(%zu) -> %p [%s:%d]\n",
2106 size, res, filename, lineno);
2107 return res;
2108}
2109#define GDKmalloc(s) GDKmalloc_debug((s), __FILE__, __LINE__)
2110static inline void *
2111GDKzalloc_debug(size_t size, const char *filename, int lineno)
2112{
2113 void *res = GDKzalloc(size);
2114 ALLOCDEBUG fprintf(stderr,
2115 "#GDKzalloc(%zu) -> %p [%s:%d]\n",
2116 size, res, filename, lineno);
2117 return res;
2118}
2119#define GDKzalloc(s) GDKzalloc_debug((s), __FILE__, __LINE__)
2120static inline void *
2121GDKrealloc_debug(void *ptr, size_t size, const char *filename, int lineno)
2122{
2123 void *res = GDKrealloc(ptr, size);
2124 ALLOCDEBUG fprintf(stderr,
2125 "#GDKrealloc(%p,%zu) -> "
2126 "%p [%s:%d]\n",
2127 ptr, size, res,
2128 filename, lineno);
2129 return res;
2130}
2131#define GDKrealloc(p, s) GDKrealloc_debug((p), (s), __FILE__, __LINE__)
2132static inline void
2133GDKfree_debug(void *ptr, const char *filename, int lineno)
2134{
2135 ALLOCDEBUG fprintf(stderr, "#GDKfree(%p) [%s:%d]\n",
2136 ptr, filename, lineno);
2137 GDKfree(ptr);
2138}
2139#define GDKfree(p) GDKfree_debug((p), __FILE__, __LINE__)
2140static inline char *
2141GDKstrdup_debug(const char *str, const char *filename, int lineno)
2142{
2143 void *res = GDKstrdup(str);
2144 ALLOCDEBUG fprintf(stderr, "#GDKstrdup(len=%zu) -> "
2145 "%p [%s:%d]\n",
2146 str ? strlen(str) : 0, res, filename, lineno);
2147 return res;
2148}
2149#define GDKstrdup(s) GDKstrdup_debug((s), __FILE__, __LINE__)
2150static inline char *
2151GDKstrndup_debug(const char *str, size_t n, const char *filename, int lineno)
2152{
2153 void *res = GDKstrndup(str, n);
2154 ALLOCDEBUG fprintf(stderr, "#GDKstrndup(len=%zu) -> "
2155 "%p [%s:%d]\n",
2156 n, res, filename, lineno);
2157 return res;
2158}
2159#define GDKstrndup(s, n) GDKstrndup_debug((s), (n), __FILE__, __LINE__)
2160static inline void *
2161GDKmmap_debug(const char *path, int mode, size_t len, const char *filename, int lineno)
2162{
2163 void *res = GDKmmap(path, mode, len);
2164 ALLOCDEBUG fprintf(stderr,
2165 "#GDKmmap(%s,0x%x,%zu) -> "
2166 "%p [%s:%d]\n",
2167 path ? path : "NULL", mode, len,
2168 res, filename, lineno);
2169 return res;
2170}
2171#define GDKmmap(p, m, l) GDKmmap_debug((p), (m), (l), __FILE__, __LINE__)
2172static inline void *
2173malloc_debug(size_t size, const char *filename, int lineno)
2174{
2175 void *res = malloc(size);
2176 ALLOCDEBUG fprintf(stderr,
2177 "#malloc(%zu) -> %p [%s:%d]\n",
2178 size, res, filename, lineno);
2179 return res;
2180}
2181#define malloc(s) malloc_debug((s), __FILE__, __LINE__)
2182static inline void *
2183calloc_debug(size_t nmemb, size_t size, const char *filename, int lineno)
2184{
2185 void *res = calloc(nmemb, size);
2186 ALLOCDEBUG fprintf(stderr,
2187 "#calloc(%zu,%zu) -> "
2188 "%p [%s:%d]\n",
2189 nmemb, size, res, filename, lineno);
2190 return res;
2191}
2192#define calloc(n, s) calloc_debug((n), (s), __FILE__, __LINE__)
2193static inline void *
2194realloc_debug(void *ptr, size_t size, const char *filename, int lineno)
2195{
2196 void *res = realloc(ptr, size);
2197 ALLOCDEBUG fprintf(stderr,
2198 "#realloc(%p,%zu) -> "
2199 "%p [%s:%d]\n",
2200 ptr, size, res,
2201 filename, lineno);
2202 return res;
2203}
2204#define realloc(p, s) realloc_debug((p), (s), __FILE__, __LINE__)
2205static inline void
2206free_debug(void *ptr, const char *filename, int lineno)
2207{
2208 ALLOCDEBUG fprintf(stderr, "#free(%p) [%s:%d]\n",
2209 ptr, filename, lineno);
2210 free(ptr);
2211}
2212#define free(p) free_debug((p), __FILE__, __LINE__)
2213#endif
2214#endif
2215
2216/*
2217 * @- GDK error handling
2218 * @multitable @columnfractions 0.08 0.7
2219 * @item str
2220 * @tab
2221 * GDKmessage
2222 * @item bit
2223 * @tab
2224 * GDKfatal(str msg)
2225 * @item int
2226 * @tab
2227 * GDKwarning(str msg)
2228 * @item int
2229 * @tab
2230 * GDKerror (str msg)
2231 * @item int
2232 * @tab
2233 * GDKgoterrors ()
2234 * @item int
2235 * @tab
2236 * GDKsyserror (str msg)
2237 * @item str
2238 * @tab
2239 * GDKerrbuf
2240 * @item
2241 * @tab GDKsetbuf (str buf)
2242 * @end multitable
2243 *
2244 * The error handling mechanism is not sophisticated yet. Experience
2245 * should show if this mechanism is sufficient. Most routines return
2246 * a pointer with zero to indicate an error.
2247 *
2248 * The error messages are also copied to standard output. The last
2249 * error message is kept around in a global variable.
2250 *
2251 * Error messages can also be collected in a user-provided buffer,
2252 * instead of being echoed to a stream. This is a thread-specific
2253 * issue; you want to decide on the error mechanism on a
2254 * thread-specific basis. This effect is established with
2255 * GDKsetbuf. The memory (de)allocation of this buffer, that must at
2256 * least be 1024 chars long, is entirely by the user. A pointer to
2257 * this buffer is kept in the pseudo-variable GDKerrbuf. Normally,
2258 * this is a NULL pointer.
2259 */
2260#define GDKMAXERRLEN 10240
2261#define GDKWARNING "!WARNING: "
2262#define GDKERROR "!ERROR: "
2263#define GDKMESSAGE "!OS: "
2264#define GDKFATAL "!FATAL: "
2265
2266/* Data Distilleries uses ICU for internationalization of some MonetDB error messages */
2267
2268gdk_export void GDKerror(_In_z_ _Printf_format_string_ const char *format, ...)
2269 __attribute__((__format__(__printf__, 1, 2)));
2270gdk_export void GDKsyserror(_In_z_ _Printf_format_string_ const char *format, ...)
2271 __attribute__((__format__(__printf__, 1, 2)));
2272#ifndef HAVE_EMBEDDED
2273gdk_export _Noreturn void GDKfatal(_In_z_ _Printf_format_string_ const char *format, ...)
2274 __attribute__((__format__(__printf__, 1, 2)));
2275#else
2276gdk_export void GDKfatal(_In_z_ _Printf_format_string_ const char *format, ...)
2277 __attribute__((__format__(__printf__, 1, 2)));
2278#endif
2279gdk_export void GDKclrerr(void);
2280
2281#include "gdk_delta.h"
2282#include "gdk_hash.h"
2283#include "gdk_bbp.h"
2284#include "gdk_utils.h"
2285
2286/* functions defined in gdk_bat.c */
2287gdk_export gdk_return void_replace_bat(BAT *b, BAT *p, BAT *u, bool force)
2288 __attribute__((__warn_unused_result__));
2289gdk_export gdk_return void_inplace(BAT *b, oid id, const void *val, bool force)
2290 __attribute__((__warn_unused_result__));
2291gdk_export BAT *BATattach(int tt, const char *heapfile, role_t role);
2292
2293#ifdef NATIVE_WIN32
2294#ifdef _MSC_VER
2295#define fileno _fileno
2296#endif
2297#define fdopen _fdopen
2298#define putenv _putenv
2299#endif
2300
2301/* Return a pointer to the value contained in V. Also see VALget
2302 * which returns a void *. */
2303static inline const void *
2304VALptr(const ValRecord *v)
2305{
2306 switch (ATOMstorage(v->vtype)) {
2307 case TYPE_void: return (const void *) &v->val.oval;
2308 case TYPE_bte: return (const void *) &v->val.btval;
2309 case TYPE_sht: return (const void *) &v->val.shval;
2310 case TYPE_int: return (const void *) &v->val.ival;
2311 case TYPE_flt: return (const void *) &v->val.fval;
2312 case TYPE_dbl: return (const void *) &v->val.dval;
2313 case TYPE_lng: return (const void *) &v->val.lval;
2314#ifdef HAVE_HGE
2315 case TYPE_hge: return (const void *) &v->val.hval;
2316#endif
2317 case TYPE_str: return (const void *) v->val.sval;
2318 default: return (const void *) v->val.pval;
2319 }
2320}
2321
2322/*
2323 * The kernel maintains a central table of all active threads. They
2324 * are indexed by their tid. The structure contains information on the
2325 * input/output file descriptors, which should be set before a
2326 * database operation is started. It ensures that output is delivered
2327 * to the proper client.
2328 *
2329 * The Thread structure should be ideally made directly accessible to
2330 * each thread. This speeds up access to tid and file descriptors.
2331 */
2332#define THREADS 1024
2333#define THREADDATA 3
2334
2335typedef struct threadStruct {
2336 int tid; /* logical ID by MonetDB; val == index
2337 * into this array + 1 (0 is
2338 * invalid) */
2339 ATOMIC_TYPE pid; /* thread id, 0 = unallocated */
2340 str name;
2341 void *data[THREADDATA];
2342 uintptr_t sp;
2343} ThreadRec, *Thread;
2344
2345
2346gdk_export int THRgettid(void);
2347gdk_export Thread THRget(int tid);
2348gdk_export MT_Id THRcreate(void (*f) (void *), void *arg, enum MT_thr_detach d, const char *name);
2349gdk_export void THRdel(Thread t);
2350gdk_export void THRsetdata(int, void *);
2351gdk_export void *THRgetdata(int);
2352gdk_export int THRhighwater(void);
2353
2354gdk_export void *THRdata[THREADDATA];
2355
2356#define GDKstdout ((stream*)THRdata[0])
2357#define GDKstdin ((stream*)THRdata[1])
2358
2359#define GDKerrbuf ((char*)THRgetdata(2))
2360#define GDKsetbuf(x) THRsetdata(2,(void *)(x))
2361
2362#define THRget_errbuf(t) ((char*)t->data[2])
2363#define THRset_errbuf(t,b) (t->data[2] = b)
2364
2365#ifndef GDK_NOLINK
2366
2367static inline bat
2368BBPcheck(bat x, const char *y)
2369{
2370 if (!is_bat_nil(x)) {
2371 assert(x > 0);
2372
2373 if (x < 0 || x >= getBBPsize() || BBP_logical(x) == NULL) {
2374 CHECKDEBUG fprintf(stderr,"#%s: range error %d\n", y, (int) x);
2375 } else {
2376 return x;
2377 }
2378 }
2379 return 0;
2380}
2381
2382static inline BAT *
2383BATdescriptor(bat i)
2384{
2385 BAT *b = NULL;
2386
2387 if (BBPcheck(i, "BATdescriptor")) {
2388 if (BBPfix(i) <= 0)
2389 return NULL;
2390 b = BBP_cache(i);
2391 if (b == NULL)
2392 b = BBPdescriptor(i);
2393 }
2394 return b;
2395}
2396
2397static inline void *
2398Tpos(BATiter *bi, BUN p)
2399{
2400 bi->tvid = BUNtoid(bi->b, p);
2401 return (void*)&bi->tvid;
2402}
2403
2404#endif
2405
2406/*
2407 * @+ Transaction Management
2408 * @multitable @columnfractions 0.08 0.7
2409 * @item int
2410 * @tab
2411 * TMcommit ()
2412 * @item int
2413 * @tab
2414 * TMabort ()
2415 * @item int
2416 * @tab
2417 * TMsubcommit ()
2418 * @end multitable
2419 *
2420 * MonetDB by default offers a global transaction environment. The
2421 * global transaction involves all activities on all persistent BATs
2422 * by all threads. Each global transaction ends with either TMabort
2423 * or TMcommit, and immediately starts a new transaction. TMcommit
2424 * implements atomic commit to disk on the collection of all
2425 * persistent BATs. For all persistent BATs, the global commit also
2426 * flushes the delta status for these BATs (see
2427 * BATcommit/BATabort). This allows to perform TMabort quickly in
2428 * memory (without re-reading all disk images from disk). The
2429 * collection of which BATs is persistent is also part of the global
2430 * transaction state. All BATs that where persistent at the last
2431 * commit, but were made transient since then, are made persistent
2432 * again by TMabort. In other words, BATs that are deleted, are only
2433 * physically deleted at TMcommit time. Until that time, rollback
2434 * (TMabort) is possible.
2435 *
2436 * Use of TMabort is currently NOT RECOMMENDED due to two bugs:
2437 *
2438 * @itemize
2439 * @item
2440 * TMabort after a failed %TMcommit@ does not bring us back to the
2441 * previous committed state; but to the state at the failed TMcommit.
2442 * @item
2443 * At runtime, TMabort does not undo BAT name changes, whereas a cold
2444 * MonetDB restart does.
2445 * @end itemize
2446 *
2447 * In effect, the problems with TMabort reduce the functionality of
2448 * the global transaction mechanism to consistent checkpointing at
2449 * each TMcommit. For many applications, consistent checkpointingis
2450 * enough.
2451 *
2452 * Extension modules exist that provide fine grained locking (lock
2453 * module) and Write Ahead Logging (sqlserver). Applications that
2454 * need more fine-grained transactions, should build this on top of
2455 * these extension primitives.
2456 *
2457 * TMsubcommit is intended to quickly add or remove BATs from the
2458 * persistent set. In both cases, rollback is not necessary, such that
2459 * the commit protocol can be accelerated. It comes down to writing a
2460 * new BBP.dir.
2461 *
2462 * Its parameter is a BAT-of-BATs (in the tail); the persistence
2463 * status of that BAT is committed. We assume here that the calling
2464 * thread has exclusive access to these bats. An error is reported if
2465 * you try to partially commit an already committed persistent BAT (it
2466 * needs the rollback mechanism).
2467 */
2468gdk_export gdk_return TMcommit(void);
2469gdk_export void TMabort(void);
2470gdk_export gdk_return TMsubcommit(BAT *bl);
2471gdk_export gdk_return TMsubcommit_list(bat *subcommit, int cnt);
2472
2473/*
2474 * @- Delta Management
2475 * @multitable @columnfractions 0.08 0.6
2476 * @item BAT *
2477 * @tab BATcommit (BAT *b)
2478 * @item BAT *
2479 * @tab BATfakeCommit (BAT *b)
2480 * @item BAT *
2481 * @tab BATundo (BAT *b)
2482 * @end multitable
2483 *
2484 * The BAT keeps track of updates with respect to a 'previous state'.
2485 * Do not confuse 'previous state' with 'stable' or
2486 * 'commited-on-disk', because these concepts are not always the
2487 * same. In particular, they diverge when BATcommit, BATfakecommit,
2488 * and BATundo are called explictly, bypassing the normal global
2489 * TMcommit protocol (some applications need that flexibility).
2490 *
2491 * BATcommit make the current BAT state the new 'stable state'. This
2492 * happens inside the global TMcommit on all persistent BATs previous
2493 * to writing all bats to persistent storage using a BBPsync.
2494 *
2495 * EXPERT USE ONLY: The routine BATfakeCommit updates the delta
2496 * information on BATs and clears the dirty bit. This avoids any
2497 * copying to disk. Expert usage only, as it bypasses the global
2498 * commit protocol, and changes may be lost after quitting or crashing
2499 * MonetDB.
2500 *
2501 * BATabort undo-s all changes since the previous state. The global
2502 * TMabort achieves a rollback to the previously committed state by
2503 * doing BATabort on all persistent bats.
2504 *
2505 * BUG: after a failed TMcommit, TMabort does not do anything because
2506 * TMcommit does the BATcommits @emph{before} attempting to sync to
2507 * disk instead of @sc{after} doing this.
2508 */
2509gdk_export void BATcommit(BAT *b);
2510gdk_export void BATfakeCommit(BAT *b);
2511gdk_export void BATundo(BAT *b);
2512
2513/*
2514 * @+ BAT Alignment and BAT views
2515 * @multitable @columnfractions 0.08 0.7
2516 * @item int
2517 * @tab ALIGNsynced (BAT* b1, BAT* b2)
2518 * @item int
2519 * @tab ALIGNsync (BAT *b1, BAT *b2)
2520 * @item int
2521 * @tab ALIGNrelated (BAT *b1, BAT *b2)
2522 *
2523 * @item BAT*
2524 * @tab VIEWcreate (oid seq, BAT *b)
2525 * @item int
2526 * @tab isVIEW (BAT *b)
2527 * @item bat
2528 * @tab VIEWhparent (BAT *b)
2529 * @item bat
2530 * @tab VIEWtparent (BAT *b)
2531 * @item BAT*
2532 * @tab VIEWreset (BAT *b)
2533 * @end multitable
2534 *
2535 * Alignments of two columns of a BAT means that the system knows
2536 * whether these two columns are exactly equal. Relatedness of two
2537 * BATs means that one pair of columns (either head or tail) of both
2538 * BATs is aligned. The first property is checked by ALIGNsynced, the
2539 * latter by ALIGNrelated.
2540 *
2541 * All algebraic BAT commands propagate the properties - including
2542 * alignment properly on their results.
2543 *
2544 * VIEW BATs are BATs that lend their storage from a parent BAT. They
2545 * are just a descriptor that points to the data in this parent BAT. A
2546 * view is created with VIEWcreate. The cache id of the parent (if
2547 * any) is returned by VIEWtparent (otherwise it returns 0).
2548 *
2549 * VIEW bats are read-only!!
2550 *
2551 * VIEWreset creates a normal BAT with the same contents as its view
2552 * parameter (it converts void columns with seqbase!=nil to
2553 * materialized oid columns).
2554 */
2555gdk_export int ALIGNsynced(BAT *b1, BAT *b2);
2556
2557gdk_export void BATassertProps(BAT *b);
2558
2559#define BATPROPS_QUICK 0 /* only derive easy (non-resource consuming) properties */
2560#define BATPROPS_ALL 1 /* derive all possible properties; no matter what cost (key=hash) */
2561#define BATPROPS_CHECK 3 /* BATPROPS_ALL, but start from scratch and report illegally set properties */
2562
2563gdk_export BAT *VIEWcreate(oid seq, BAT *b);
2564gdk_export void VIEWbounds(BAT *b, BAT *view, BUN l, BUN h);
2565
2566#define ALIGNapp(x, y, f, e) \
2567 do { \
2568 if (!(f) && ((x)->batRestricted == BAT_READ || \
2569 (x)->batSharecnt > 0)) { \
2570 GDKerror("%s: access denied to %s, aborting.\n", \
2571 (y), BATgetId(x)); \
2572 return (e); \
2573 } \
2574 } while (false)
2575
2576/* the parentid in a VIEW is correct for the normal view. We must
2577 * correct for the reversed view.
2578 */
2579#define isVIEW(x) \
2580 (assert((x)->batCacheid > 0), \
2581 ((x)->theap.parentid || \
2582 ((x)->tvheap && (x)->tvheap->parentid != (x)->batCacheid)))
2583
2584#define VIEWtparent(x) ((x)->theap.parentid)
2585#define VIEWvtparent(x) ((x)->tvheap == NULL || (x)->tvheap->parentid == (x)->batCacheid ? 0 : (x)->tvheap->parentid)
2586
2587/*
2588 * @+ BAT Iterators
2589 * @multitable @columnfractions 0.15 0.7
2590 * @item BATloop
2591 * @tab
2592 * (BAT *b; BUN p, BUN q)
2593 * @item BATloopDEL
2594 * @tab
2595 * (BAT *b; BUN p; BUN q; int dummy)
2596 * @item HASHloop
2597 * @tab
2598 * (BAT *b; Hash *h, size_t dummy; ptr value)
2599 * @item HASHloop_bte
2600 * @tab
2601 * (BAT *b; Hash *h, size_t idx; bte *value, BUN w)
2602 * @item HASHloop_sht
2603 * @tab
2604 * (BAT *b; Hash *h, size_t idx; sht *value, BUN w)
2605 * @item HASHloop_int
2606 * @tab
2607 * (BAT *b; Hash *h, size_t idx; int *value, BUN w)
2608 * @item HASHloop_flt
2609 * @tab
2610 * (BAT *b; Hash *h, size_t idx; flt *value, BUN w)
2611 * @item HASHloop_lng
2612 * @tab
2613 * (BAT *b; Hash *h, size_t idx; lng *value, BUN w)
2614 * @item HASHloop_hge
2615 * @tab
2616 * (BAT *b; Hash *h, size_t idx; hge *value, BUN w)
2617 * @item HASHloop_dbl
2618 * @tab
2619 * (BAT *b; Hash *h, size_t idx; dbl *value, BUN w)
2620 * @item HASHloop_str
2621 * @tab
2622 * (BAT *b; Hash *h, size_t idx; str value, BUN w)
2623 * @item HASHlooploc
2624 * @tab
2625 * (BAT *b; Hash *h, size_t idx; ptr value, BUN w)
2626 * @item HASHloopvar
2627 * @tab
2628 * (BAT *b; Hash *h, size_t idx; ptr value, BUN w)
2629 * @end multitable
2630 *
2631 * The @emph{BATloop()} looks like a function call, but is actually a
2632 * macro.
2633 *
2634 * @- simple sequential scan
2635 * The first parameter is a BAT, the p and q are BUN pointers, where p
2636 * is the iteration variable.
2637 */
2638#define BATloop(r, p, q) \
2639 for (q = BUNlast(r), p = 0; p < q; p++)
2640
2641/*
2642 * @- hash-table supported loop over BUNs
2643 * The first parameter `b' is a BAT, the second (`h') should point to
2644 * `b->thash', and `v' a pointer to an atomic value (corresponding
2645 * to the head column of `b'). The 'hb' is an integer index, pointing
2646 * out the `hb'-th BUN.
2647 */
2648#define GDK_STREQ(l,r) (*(char*) (l) == *(char*) (r) && !strcmp(l,r))
2649
2650#define HASHloop(bi, h, hb, v) \
2651 for (hb = HASHget(h, HASHprobe((h), v)); \
2652 hb != HASHnil(h); \
2653 hb = HASHgetlink(h,hb)) \
2654 if (ATOMcmp(h->type, v, BUNtail(bi, hb)) == 0)
2655#define HASHloop_str_hv(bi, h, hb, v) \
2656 for (hb = HASHget((h),((BUN *) (v))[-1]&(h)->mask); \
2657 hb != HASHnil(h); \
2658 hb = HASHgetlink(h,hb)) \
2659 if (GDK_STREQ(v, BUNtvar(bi, hb)))
2660#define HASHloop_str(bi, h, hb, v) \
2661 for (hb = HASHget((h),strHash(v)&(h)->mask); \
2662 hb != HASHnil(h); \
2663 hb = HASHgetlink(h,hb)) \
2664 if (GDK_STREQ(v, BUNtvar(bi, hb)))
2665
2666/*
2667 * HASHloops come in various flavors, from the general HASHloop, as
2668 * above, to specialized versions (for speed) where the type is known
2669 * (e.g. HASHloop_int), or the fact that the atom is fixed-sized
2670 * (HASHlooploc) or variable-sized (HASHloopvar).
2671 */
2672#define HASHlooploc(bi, h, hb, v) \
2673 for (hb = HASHget(h, HASHprobe(h, v)); \
2674 hb != HASHnil(h); \
2675 hb = HASHgetlink(h,hb)) \
2676 if (ATOMcmp(h->type, v, BUNtloc(bi, hb)) == 0)
2677#define HASHloopvar(bi, h, hb, v) \
2678 for (hb = HASHget(h,HASHprobe(h, v)); \
2679 hb != HASHnil(h); \
2680 hb = HASHgetlink(h,hb)) \
2681 if (ATOMcmp(h->type, v, BUNtvar(bi, hb)) == 0)
2682
2683#define HASHloop_TYPE(bi, h, hb, v, TYPE) \
2684 for (hb = HASHget(h, hash_##TYPE(h, v)); \
2685 hb != HASHnil(h); \
2686 hb = HASHgetlink(h,hb)) \
2687 if (* (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb))
2688
2689#define HASHloop_bte(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, bte)
2690#define HASHloop_sht(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, sht)
2691#define HASHloop_int(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, int)
2692#define HASHloop_lng(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, lng)
2693#ifdef HAVE_HGE
2694#define HASHloop_hge(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, hge)
2695#endif
2696#define HASHloop_flt(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, flt)
2697#define HASHloop_dbl(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, dbl)
2698
2699/*
2700 * @+ Common BAT Operations
2701 * Much used, but not necessarily kernel-operations on BATs.
2702 *
2703 * For each BAT we maintain its dimensions as separately accessible
2704 * properties. They can be used to improve query processing at higher
2705 * levels.
2706 */
2707enum prop_t {
2708 GDK_MIN_VALUE = 3, /* smallest non-nil value in BAT */
2709 GDK_MAX_VALUE, /* largest non-nil value in BAT */
2710 GDK_HASH_MASK, /* last used hash mask */
2711};
2712
2713/*
2714 * @- BAT relational operators
2715 *
2716 * The full-materialization policy intermediate results in MonetDB
2717 * means that a join can produce an arbitrarily large result and choke
2718 * the system. The Data Distilleries tool therefore first computes the
2719 * join result size before the actual join (better waste time than
2720 * crash the server). To exploit that perfect result size knowledge,
2721 * an result-size estimate parameter was added to all equi-join
2722 * implementations. TODO: add this for
2723 * semijoin/select/unique/diff/intersect
2724 *
2725 * @- modes for thethajoin
2726 */
2727#define JOIN_EQ 0
2728#define JOIN_LT (-1)
2729#define JOIN_LE (-2)
2730#define JOIN_GT 1
2731#define JOIN_GE 2
2732#define JOIN_BAND 3
2733#define JOIN_NE (-3)
2734
2735gdk_export BAT *BATselect(BAT *b, BAT *s, const void *tl, const void *th, bool li, bool hi, bool anti);
2736gdk_export BAT *BATthetaselect(BAT *b, BAT *s, const void *val, const char *op);
2737
2738gdk_export BAT *BATconstant(oid hseq, int tt, const void *val, BUN cnt, role_t role);
2739gdk_export gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr)
2740 __attribute__((__warn_unused_result__));
2741
2742gdk_export gdk_return BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate)
2743 __attribute__((__warn_unused_result__));
2744gdk_export gdk_return BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate)
2745 __attribute__((__warn_unused_result__));
2746gdk_export gdk_return BATthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int op, bool nil_matches, BUN estimate)
2747 __attribute__((__warn_unused_result__));
2748gdk_export gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate)
2749 __attribute__((__warn_unused_result__));
2750gdk_export BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate);
2751gdk_export BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool not_in, BUN estimate);
2752gdk_export gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate)
2753 __attribute__((__warn_unused_result__));
2754gdk_export gdk_return BATbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, const void *c1, const void *c2, bool li, bool hi, BUN estimate)
2755 __attribute__((__warn_unused_result__));
2756gdk_export gdk_return BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT *rh, BAT *sl, BAT *sr, bool li, bool hi, bool anti, bool symmetric, BUN estimate)
2757 __attribute__((__warn_unused_result__));
2758gdk_export BAT *BATproject(BAT *l, BAT *r);
2759gdk_export BAT *BATprojectchain(BAT **bats);
2760
2761gdk_export BAT *BATslice(BAT *b, BUN low, BUN high);
2762
2763gdk_export BAT *BATunique(BAT *b, BAT *s);
2764
2765gdk_export BAT *BATmergecand(BAT *a, BAT *b);
2766gdk_export BAT *BATintersectcand(BAT *a, BAT *b);
2767gdk_export BAT *BATdiffcand(BAT *a, BAT *b);
2768
2769gdk_export gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, BUN n, bool asc, bool nilslast, bool distinct)
2770 __attribute__((__warn_unused_result__));
2771
2772#include "gdk_cand.h"
2773#include "gdk_calc.h"
2774
2775/*
2776 * @- BAT sample operators
2777 *
2778 * @multitable @columnfractions 0.08 0.7
2779 * @item BAT *
2780 * @tab BATsample (BAT *b, n)
2781 * @end multitable
2782 *
2783 * The routine BATsample returns a random sample on n BUNs of a BAT.
2784 *
2785 */
2786gdk_export BAT *BATsample(BAT *b, BUN n);
2787gdk_export BAT *BATsample_with_seed(BAT *b, BUN n, unsigned seed);
2788
2789/*
2790 *
2791 */
2792#define MAXPARAMS 32
2793
2794#endif /* _GDK_H_ */
2795