1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* |
10 | * @t The Goblin Database Kernel |
11 | * @v Version 3.05 |
12 | * @a Martin L. Kersten, Peter Boncz, Niels Nes, Sjoerd Mullender |
13 | * |
14 | * @+ The Inner Core |
15 | * The innermost library of the MonetDB database system is formed by |
16 | * the library called GDK, an abbreviation of Goblin Database Kernel. |
17 | * Its development was originally rooted in the design of a pure |
18 | * active-object-oriented programming language, before development |
19 | * was shifted towards a re-usable database kernel engine. |
20 | * |
21 | * GDK is a C library that provides ACID properties on a DSM model |
22 | * @tex |
23 | * [@cite{Copeland85}] |
24 | * @end tex |
25 | * , using main-memory |
26 | * database algorithms |
27 | * @tex |
28 | * [@cite{Garcia-Molina92}] |
29 | * @end tex |
30 | * built on virtual-memory |
31 | * OS primitives and multi-threaded parallelism. |
32 | * Its implementation has undergone various changes over its decade |
33 | * of development, many of which were driven by external needs to |
34 | * obtain a robust and fast database system. |
35 | * |
36 | * The coding scheme explored in GDK has also laid a foundation to |
37 | * communicate over time experiences and to provide (hopefully) |
38 | * helpful advice near to the place where the code-reader needs it. |
39 | * Of course, over such a long time the documentation diverges from |
40 | * reality. Especially in areas where the environment of this package |
41 | * is being described. |
42 | * Consider such deviations as historic landmarks, e.g. crystallization |
43 | * of brave ideas and mistakes rectified at a later stage. |
44 | * |
45 | * @+ Short Outline |
46 | * The facilities provided in this implementation are: |
47 | * @itemize |
48 | * @item |
49 | * GDK or Goblin Database Kernel routines for session management |
50 | * @item |
51 | * BAT routines that define the primitive operations on the |
52 | * database tables (BATs). |
53 | * @item |
54 | * BBP routines to manage the BAT Buffer Pool (BBP). |
55 | * @item |
56 | * ATOM routines to manipulate primitive types, define new types |
57 | * using an ADT interface. |
58 | * @item |
59 | * HEAP routines for manipulating heaps: linear spaces of memory |
60 | * that are GDK's vehicle of mass storage (on which BATs are built). |
61 | * @item |
62 | * DELTA routines to access inserted/deleted elements within a |
63 | * transaction. |
64 | * @item |
65 | * HASH routines for manipulating GDK's built-in linear-chained |
66 | * hash tables, for accelerating lookup searches on BATs. |
67 | * @item |
68 | * TM routines that provide basic transaction management primitives. |
69 | * @item |
70 | * TRG routines that provided active database support. [DEPRECATED] |
71 | * @item |
72 | * ALIGN routines that implement BAT alignment management. |
73 | * @end itemize |
74 | * |
75 | * The Binary Association Table (BAT) is the lowest level of storage |
76 | * considered in the Goblin runtime system |
77 | * @tex |
78 | * [@cite{Goblin}] |
79 | * @end tex |
80 | * . A BAT is a |
81 | * self-descriptive main-memory structure that represents the |
82 | * @strong{binary relationship} between two atomic types. The |
83 | * association can be defined over: |
84 | * @table @code |
85 | * @item void: |
86 | * virtual-OIDs: a densely ascending column of OIDs (takes zero-storage). |
87 | * @item bit: |
88 | * Booleans, implemented as one byte values. |
89 | * @item bte: |
90 | * Tiny (1-byte) integers (8-bit @strong{integer}s). |
91 | * @item sht: |
92 | * Short integers (16-bit @strong{integer}s). |
93 | * @item int: |
94 | * This is the C @strong{int} type (32-bit). |
95 | * @item oid: |
96 | * Unique @strong{long int} values uses as object identifier. Highest |
97 | * bit cleared always. Thus, oids-s are 31-bit numbers on |
98 | * 32-bit systems, and 63-bit numbers on 64-bit systems. |
99 | * @item ptr: |
100 | * Memory pointer values. DEPRECATED. Can only be stored in transient |
101 | * BATs. |
102 | * @item flt: |
103 | * The IEEE @strong{float} type. |
104 | * @item dbl: |
105 | * The IEEE @strong{double} type. |
106 | * @item lng: |
107 | * Longs: the C @strong{long long} type (64-bit integers). |
108 | * @item hge: |
109 | * "huge" integers: the GCC @strong{__int128} type (128-bit integers). |
110 | * @item str: |
111 | * UTF-8 strings (Unicode). A zero-terminated byte sequence. |
112 | * @item bat: |
113 | * Bat descriptor. This allows for recursive administered tables, but |
114 | * severely complicates transaction management. Therefore, they CAN |
115 | * ONLY BE STORED IN TRANSIENT BATs. |
116 | * @end table |
117 | * |
118 | * This model can be used as a back-end model underlying other -higher |
119 | * level- models, in order to achieve @strong{better performance} and |
120 | * @strong{data independence} in one go. The relational model and the |
121 | * object-oriented model can be mapped on BATs by vertically splitting |
122 | * every table (or class) for each attribute. Each such a column is |
123 | * then stored in a BAT with type @strong{bat[oid,attribute]}, where |
124 | * the unique object identifiers link tuples in the different BATs. |
125 | * Relationship attributes in the object-oriented model hence are |
126 | * mapped to @strong{bat[oid,oid]} tables, being equivalent to the |
127 | * concept of @emph{join indexes} @tex [@cite{Valduriez87}] @end tex . |
128 | * |
129 | * The set of built-in types can be extended with user-defined types |
130 | * through an ADT interface. They are linked with the kernel to |
131 | * obtain an enhanced library, or they are dynamically loaded upon |
132 | * request. |
133 | * |
134 | * Types can be derived from other types. They represent something |
135 | * different than that from which they are derived, but their internal |
136 | * storage management is equal. This feature facilitates the work of |
137 | * extension programmers, by enabling reuse of implementation code, |
138 | * but is also used to keep the GDK code portable from 32-bits to |
139 | * 64-bits machines: the @strong{oid} and @strong{ptr} types are |
140 | * derived from @strong{int} on 32-bits machines, but is derived from |
141 | * @strong{lng} on 64 bits machines. This requires changes in only two |
142 | * lines of code each. |
143 | * |
144 | * To accelerate lookup and search in BATs, GDK supports one built-in |
145 | * search accelerator: hash tables. We choose an implementation |
146 | * efficient for main-memory: bucket chained hash |
147 | * @tex |
148 | * [@cite{LehCar86,Analyti92}] |
149 | * @end tex |
150 | * . Alternatively, when the table is sorted, it will resort to |
151 | * merge-scan operations or binary lookups. |
152 | * |
153 | * BATs are built on the concept of heaps, which are large pieces of |
154 | * main memory. They can also consist of virtual memory, in case the |
155 | * working set exceeds main-memory. In this case, GDK supports |
156 | * operations that cluster the heaps of a BAT, in order to improve |
157 | * performance of its main-memory. |
158 | * |
159 | * |
160 | * @- Rationale |
161 | * The rationale for choosing a BAT as the building block for both |
162 | * relational and object-oriented system is based on the following |
163 | * observations: |
164 | * |
165 | * @itemize |
166 | * @item - |
167 | * Given the fact that CPU speed and main-memory increase in current |
168 | * workstation hardware for the last years has been exceeding IO |
169 | * access speed increase, traditional disk-page oriented algorithms do |
170 | * no longer take best advantage of hardware, in most database |
171 | * operations. |
172 | * |
173 | * Instead of having a disk-block oriented kernel with a large memory |
174 | * cache, we choose to build a main-memory kernel, that only under |
175 | * large data volumes slowly degrades to IO-bound performance, |
176 | * comparable to traditional systems |
177 | * @tex |
178 | * [@cite{boncz95,boncz96}] |
179 | * @end tex |
180 | * . |
181 | * |
182 | * @item - |
183 | * Traditional (disk-based) relational systems move too much data |
184 | * around to save on (main-memory) join operations. |
185 | * |
186 | * The fully decomposed store (DSM |
187 | * @tex |
188 | * [@cite{Copeland85})] |
189 | * @end tex |
190 | * assures that only those attributes of a relation that are needed, |
191 | * will have to be accessed. |
192 | * |
193 | * @item - |
194 | * The data management issues for a binary association is much |
195 | * easier to deal with than traditional @emph{struct}-based approaches |
196 | * encountered in relational systems. |
197 | * |
198 | * @item - |
199 | * Object-oriented systems often maintain a double cache, one with the |
200 | * disk-based representation and a C pointer-based main-memory |
201 | * structure. This causes expensive conversions and replicated |
202 | * storage management. GDK does not do such `pointer swizzling'. It |
203 | * used virtual-memory (@strong{mmap()}) and buffer management advice |
204 | * (@strong{madvise()}) OS primitives to cache only once. Tables take |
205 | * the same form in memory as on disk, making the use of this |
206 | * technique transparent |
207 | * @tex |
208 | * [@cite{oo7}] |
209 | * @end tex |
210 | * . |
211 | * @end itemize |
212 | * |
213 | * A RDBMS or OODBMS based on BATs strongly depends on our ability to |
214 | * efficiently support tuples and to handle small joins, respectively. |
215 | * |
216 | * The remainder of this document describes the Goblin Database kernel |
217 | * implementation at greater detail. It is organized as follows: |
218 | * @table @code |
219 | * @item @strong{GDK Interface}: |
220 | * |
221 | * It describes the global interface with which GDK sessions can be |
222 | * started and ended, and environment variables used. |
223 | * |
224 | * @item @strong{Binary Association Tables}: |
225 | * |
226 | * As already mentioned, these are the primary data structure of GDK. |
227 | * This chapter describes the kernel operations for creation, |
228 | * destruction and basic manipulation of BATs and BUNs (i.e. tuples: |
229 | * Binary UNits). |
230 | * |
231 | * @item @strong{BAT Buffer Pool:} |
232 | * |
233 | * All BATs are registered in the BAT Buffer Pool. This directory is |
234 | * used to guide swapping in and out of BATs. Here we find routines |
235 | * that guide this swapping process. |
236 | * |
237 | * @item @strong{GDK Extensibility:} |
238 | * |
239 | * Atoms can be defined using a unified ADT interface. There is also |
240 | * an interface to extend the GDK library with dynamically linked |
241 | * object code. |
242 | * |
243 | * @item @strong{GDK Utilities:} |
244 | * |
245 | * Memory allocation and error handling primitives are |
246 | * provided. Layers built on top of GDK should use them, for proper |
247 | * system monitoring. Thread management is also included here. |
248 | * |
249 | * @item @strong{Transaction Management:} |
250 | * |
251 | * For the time being, we just provide BAT-grained concurrency and |
252 | * global transactions. Work is needed here. |
253 | * |
254 | * @item @strong{BAT Alignment:} |
255 | * Due to the mapping of multi-ary datamodels onto the BAT model, we |
256 | * expect many correspondences among BATs, e.g. |
257 | * @emph{bat(oid,attr1),.. bat(oid,attrN)} vertical |
258 | * decompositions. Frequent activities will be to jump from one |
259 | * attribute to the other (`bunhopping'). If the head columns are |
260 | * equal lists in two BATs, merge or even array lookups can be used |
261 | * instead of hash lookups. The alignment interface makes these |
262 | * relations explicitly manageable. |
263 | * |
264 | * In GDK, complex data models are mapped with DSM on binary tables. |
265 | * Usually, one decomposes @emph{N}-ary relations into @emph{N} BATs |
266 | * with an @strong{oid} in the head column, and the attribute in the |
267 | * tail column. There may well be groups of tables that have the same |
268 | * sets of @strong{oid}s, equally ordered. The alignment interface is |
269 | * intended to make this explicit. Implementations can use this |
270 | * interface to detect this situation, and use cheaper algorithms |
271 | * (like merge-join, or even array lookup) instead. |
272 | * |
273 | * @item @strong{BAT Iterators:} |
274 | * |
275 | * Iterators are C macros that generally encapsulate a complex |
276 | * for-loop. They would be the equivalent of cursors in the SQL |
277 | * model. The macro interface (instead of a function call interface) |
278 | * is chosen to achieve speed when iterating main-memory tables. |
279 | * |
280 | * @item @strong{Common BAT Operations:} |
281 | * |
282 | * These are much used operations on BATs, such as aggregate functions |
283 | * and relational operators. They are implemented in terms of BAT- and |
284 | * BUN-manipulation GDK primitives. |
285 | * @end table |
286 | * |
287 | * @+ Interface Files |
288 | * In this section we summarize the user interface to the GDK library. |
289 | * It consist of a header file (gdk.h) and an object library |
290 | * (gdklib.a), which implements the required functionality. The header |
291 | * file must be included in any program that uses the library. The |
292 | * library must be linked with such a program. |
293 | * |
294 | * @- Database Context |
295 | * |
296 | * The MonetDB environment settings are collected in a configuration |
297 | * file. Amongst others it contains the location of the database |
298 | * directory. First, the database directory is closed for other |
299 | * servers running at the same time. Second, performance enhancements |
300 | * may take effect, such as locking the code into memory (if the OS |
301 | * permits) and preloading the data dictionary. An error at this |
302 | * stage normally lead to an abort. |
303 | */ |
304 | |
305 | #ifndef _GDK_H_ |
306 | #define _GDK_H_ |
307 | |
308 | /* standard includes upon which all configure tests depend */ |
309 | #ifdef HAVE_SYS_TYPES_H |
310 | # include <sys/types.h> |
311 | #endif |
312 | #ifdef HAVE_SYS_STAT_H |
313 | # include <sys/stat.h> |
314 | #endif |
315 | #include <stddef.h> |
316 | #include <string.h> |
317 | #ifdef HAVE_UNISTD_H |
318 | # include <unistd.h> |
319 | #endif |
320 | |
321 | #include <ctype.h> /* isspace etc. */ |
322 | |
323 | #ifdef HAVE_SYS_FILE_H |
324 | # include <sys/file.h> |
325 | #endif |
326 | |
327 | #ifdef HAVE_DIRENT_H |
328 | # include <dirent.h> |
329 | #endif |
330 | |
331 | #include <limits.h> /* for *_MIN and *_MAX */ |
332 | #include <float.h> /* for FLT_MAX and DBL_MAX */ |
333 | |
334 | #include "gdk_system.h" |
335 | #include "gdk_posix.h" |
336 | #include "stream.h" |
337 | #include "mstring.h" |
338 | |
339 | #undef MIN |
340 | #undef MAX |
341 | #define MAX(A,B) ((A)<(B)?(B):(A)) |
342 | #define MIN(A,B) ((A)>(B)?(B):(A)) |
343 | |
344 | /* defines from ctype with casts that allow passing char values */ |
345 | #define GDKisspace(c) isspace((unsigned char) (c)) |
346 | #define GDKisalnum(c) isalnum((unsigned char) (c)) |
347 | #define GDKisdigit(c) isdigit((unsigned char) (c)) |
348 | |
349 | #define BATDIR "bat" |
350 | #define TEMPDIR_NAME "TEMP_DATA" |
351 | |
352 | #define DELDIR BATDIR DIR_SEP_STR "DELETE_ME" |
353 | #define BAKDIR BATDIR DIR_SEP_STR "BACKUP" |
354 | #define SUBDIR BAKDIR DIR_SEP_STR "SUBCOMMIT" /* note K, not T */ |
355 | #define LEFTDIR BATDIR DIR_SEP_STR "LEFTOVERS" |
356 | #define TEMPDIR BATDIR DIR_SEP_STR TEMPDIR_NAME |
357 | |
358 | /* |
359 | See `man mserver5` or tools/mserver/mserver5.1 |
360 | for a documentation of the following debug options. |
361 | */ |
362 | |
363 | #define THRDMASK (1) |
364 | #define THRDDEBUG if (GDKdebug & THRDMASK) |
365 | #define CHECKMASK (1<<1) |
366 | #define CHECKDEBUG if (GDKdebug & CHECKMASK) |
367 | #define MEMMASK (1<<2) |
368 | #define MEMDEBUG if (GDKdebug & MEMMASK) |
369 | #define PROPMASK (1<<3) |
370 | #define PROPDEBUG if (GDKdebug & PROPMASK) |
371 | #define IOMASK (1<<4) |
372 | #define IODEBUG if (GDKdebug & IOMASK) |
373 | #define BATMASK (1<<5) |
374 | #define BATDEBUG if (GDKdebug & BATMASK) |
375 | /* PARSEMASK not used anymore |
376 | #define PARSEMASK (1<<6) |
377 | #define PARSEDEBUG if (GDKdebug & PARSEMASK) |
378 | */ |
379 | #define PARMASK (1<<7) |
380 | #define PARDEBUG if (GDKdebug & PARMASK) |
381 | /* HEADLESSMASK not used anymore |
382 | #define HEADLESSMASK (1<<8) |
383 | #define HEADLESSDEBUG if (GDKdebug & HEADLESSMASK) |
384 | */ |
385 | #define TMMASK (1<<9) |
386 | #define TMDEBUG if (GDKdebug & TMMASK) |
387 | #define TEMMASK (1<<10) |
388 | #define TEMDEBUG if (GDKdebug & TEMMASK) |
389 | /* DLMASK not used anymore |
390 | #define DLMASK (1<<11) |
391 | #define DLDEBUG if (GDKdebug & DLMASK) |
392 | */ |
393 | #define PERFMASK (1<<12) |
394 | #define PERFDEBUG if (GDKdebug & PERFMASK) |
395 | #define DELTAMASK (1<<13) |
396 | #define DELTADEBUG if (GDKdebug & DELTAMASK) |
397 | #define LOADMASK (1<<14) |
398 | #define LOADDEBUG if (GDKdebug & LOADMASK) |
399 | /* YACCMASK not used anymore |
400 | #define YACCMASK (1<<15) |
401 | #define YACCDEBUG if (GDKdebug & YACCMASK) |
402 | */ |
403 | /* |
404 | #define ?tcpip? if (GDKdebug&(1<<16)) |
405 | #define ?monet_multiplex? if (GDKdebug&(1<<17)) |
406 | #define ?ddbench? if (GDKdebug&(1<<18)) |
407 | #define ?ddbench? if (GDKdebug&(1<<19)) |
408 | #define ?ddbench? if (GDKdebug&(1<<20)) |
409 | */ |
410 | #define ACCELMASK (1<<20) |
411 | #define ACCELDEBUG if (GDKdebug & (ACCELMASK|ALGOMASK)) |
412 | #define ALGOMASK (1<<21) |
413 | #define ALGODEBUG if (GDKdebug & ALGOMASK) |
414 | #define ESTIMASK (1<<22) |
415 | #define ESTIDEBUG if (GDKdebug & ESTIMASK) |
416 | /* XPROPMASK not used anymore |
417 | #define XPROPMASK (1<<23) |
418 | #define XPROPDEBUG if (GDKdebug & XPROPMASK) |
419 | */ |
420 | |
421 | #define NOSYNCMASK (1<<24) |
422 | |
423 | #define DEADBEEFMASK (1<<25) |
424 | #define DEADBEEFCHK if (!(GDKdebug & DEADBEEFMASK)) |
425 | |
426 | #define ALLOCMASK (1<<26) |
427 | #define ALLOCDEBUG if (GDKdebug & ALLOCMASK) |
428 | |
429 | /* M5, only; cf., |
430 | * monetdb5/mal/mal.h |
431 | */ |
432 | #define OPTMASK (1<<27) |
433 | #define OPTDEBUG if (GDKdebug & OPTMASK) |
434 | |
435 | #define HEAPMASK (1<<28) |
436 | #define HEAPDEBUG if (GDKdebug & HEAPMASK) |
437 | |
438 | #define FORCEMITOMASK (1<<29) |
439 | #define FORCEMITODEBUG if (GDKdebug & FORCEMITOMASK) |
440 | |
441 | /* |
442 | * @- GDK session handling |
443 | * @multitable @columnfractions 0.08 0.7 |
444 | * @item int |
445 | * @tab GDKinit (char *db, char *dbpath, int allocmap) |
446 | * @item int |
447 | * @tab GDKexit (int status) |
448 | * @end multitable |
449 | * |
450 | * The session is bracketed by GDKinit and GDKexit. Initialization |
451 | * involves setting up the administration for database access, such as |
452 | * memory allocation for the database buffer pool. During the exit |
453 | * phase any pending transaction is aborted and the database is freed |
454 | * for access by other users. A zero is returned upon encountering an |
455 | * erroneous situation. |
456 | * |
457 | * @- Definitions |
458 | * The interface definitions for the application programs are shown |
459 | * below. The global variables should not be modified directly. |
460 | */ |
461 | #ifndef TRUE |
462 | #define TRUE true |
463 | #define FALSE false |
464 | #endif |
465 | |
466 | #define IDLENGTH 64 /* maximum BAT id length */ |
467 | #define BATMARGIN 1.2 /* extra free margin for new heaps */ |
468 | #define BATTINY_BITS 8 |
469 | #define BATTINY ((BUN)1<<BATTINY_BITS) /* minimum allocation buncnt for a BAT */ |
470 | |
471 | enum { |
472 | TYPE_void = 0, |
473 | TYPE_bit, |
474 | TYPE_bte, |
475 | TYPE_sht, |
476 | TYPE_bat, /* BAT id: index in BBPcache */ |
477 | TYPE_int, |
478 | TYPE_oid, |
479 | TYPE_ptr, /* C pointer! */ |
480 | TYPE_flt, |
481 | TYPE_dbl, |
482 | TYPE_lng, |
483 | #ifdef HAVE_HGE |
484 | TYPE_hge, |
485 | #endif |
486 | TYPE_str, |
487 | TYPE_any = 255, /* limit types to <255! */ |
488 | }; |
489 | |
490 | typedef int8_t bit; |
491 | typedef int8_t bte; |
492 | typedef int16_t sht; |
493 | typedef int64_t lng; |
494 | typedef uint64_t ulng; |
495 | |
496 | #define SIZEOF_OID SIZEOF_SIZE_T |
497 | typedef size_t oid; |
498 | #define OIDFMT "%zu" |
499 | |
500 | typedef int bat; /* Index into BBP */ |
501 | typedef void *ptr; /* Internal coding of types */ |
502 | |
503 | #define SIZEOF_PTR SIZEOF_VOID_P |
504 | typedef float flt; |
505 | typedef double dbl; |
506 | typedef char *str; |
507 | |
508 | #define SIZEOF_LNG 8 |
509 | #define LL_CONSTANT(val) INT64_C(val) |
510 | #define LLFMT "%" PRId64 |
511 | #define ULLFMT "%" PRIu64 |
512 | #define LLSCN "%" SCNd64 |
513 | #define ULLSCN "%" SCNu64 |
514 | |
515 | typedef oid var_t; /* type used for heap index of var-sized BAT */ |
516 | #define SIZEOF_VAR_T SIZEOF_OID |
517 | #define VARFMT OIDFMT |
518 | |
519 | #if SIZEOF_VAR_T == SIZEOF_INT |
520 | #define VAR_MAX ((var_t) INT_MAX) |
521 | #else |
522 | #define VAR_MAX ((var_t) INT64_MAX) |
523 | #endif |
524 | |
525 | typedef oid BUN; /* BUN position */ |
526 | #define SIZEOF_BUN SIZEOF_OID |
527 | #define BUNFMT OIDFMT |
528 | /* alternatively: |
529 | typedef size_t BUN; |
530 | #define SIZEOF_BUN SIZEOF_SIZE_T |
531 | #define BUNFMT "%zu" |
532 | */ |
533 | #if SIZEOF_BUN == SIZEOF_INT |
534 | #define BUN_NONE ((BUN) INT_MAX) |
535 | #else |
536 | #define BUN_NONE ((BUN) INT64_MAX) |
537 | #endif |
538 | #define BUN_MAX (BUN_NONE - 1) /* maximum allowed size of a BAT */ |
539 | |
540 | #define BUN2 2 |
541 | #define BUN4 4 |
542 | #if SIZEOF_BUN > 4 |
543 | #define BUN8 8 |
544 | #endif |
545 | typedef uint16_t BUN2type; |
546 | typedef uint32_t BUN4type; |
547 | #if SIZEOF_BUN > 4 |
548 | typedef uint64_t BUN8type; |
549 | #endif |
550 | #define BUN2_NONE ((BUN2type) UINT16_C(0xFFFF)) |
551 | #define BUN4_NONE ((BUN4type) UINT32_C(0xFFFFFFFF)) |
552 | #if SIZEOF_BUN > 4 |
553 | #define BUN8_NONE ((BUN8type) UINT64_C(0xFFFFFFFFFFFFFFFF)) |
554 | #endif |
555 | |
556 | #include "gdk_atoms.h" |
557 | |
558 | /* |
559 | * @- Checking and Error definitions: |
560 | */ |
561 | typedef enum { GDK_FAIL, GDK_SUCCEED } gdk_return; |
562 | |
563 | #define ATOMextern(t) (ATOMstorage(t) >= TYPE_str) |
564 | |
565 | typedef enum { |
566 | PERSISTENT = 0, |
567 | TRANSIENT, |
568 | } role_t; |
569 | |
570 | /* Heap storage modes */ |
571 | typedef enum { |
572 | STORE_MEM = 0, /* load into GDKmalloced memory */ |
573 | STORE_MMAP = 1, /* mmap() into virtual memory */ |
574 | STORE_PRIV = 2, /* BAT copy of copy-on-write mmap */ |
575 | STORE_CMEM = 3, /* load into malloc (not GDKmalloc) memory*/ |
576 | STORE_NOWN = 4, /* memory not owned by the BAT */ |
577 | STORE_MMAPABS = 5, /* mmap() into virtual memory from an |
578 | * absolute path (not part of dbfarm) */ |
579 | STORE_INVALID /* invalid value, used to indicate error */ |
580 | } storage_t; |
581 | |
582 | typedef struct { |
583 | size_t free; /* index where free area starts. */ |
584 | size_t size; /* size of the heap (bytes) */ |
585 | char *base; /* base pointer in memory. */ |
586 | char filename[32]; /* file containing image of the heap */ |
587 | |
588 | bte farmid; /* id of farm where heap is located */ |
589 | bool copied:1, /* a copy of an existing map. */ |
590 | hashash:1, /* the string heap contains hash values */ |
591 | cleanhash:1, /* string heaps must clean hash */ |
592 | dirty:1; /* specific heap dirty marker */ |
593 | storage_t storage; /* storage mode (mmap/malloc). */ |
594 | storage_t newstorage; /* new desired storage mode at re-allocation. */ |
595 | bat parentid; /* cache id of VIEW parent bat */ |
596 | } Heap; |
597 | |
598 | typedef struct { |
599 | int type; /* type of index entity */ |
600 | int width; /* width of hash entries */ |
601 | BUN nil; /* nil representation */ |
602 | BUN lim; /* collision list size */ |
603 | BUN mask; /* number of hash buckets-1 (power of 2) */ |
604 | void *Hash; /* hash table */ |
605 | void *Link; /* collision list */ |
606 | Heap heap; /* heap where the hash is stored */ |
607 | } Hash; |
608 | |
609 | typedef struct Imprints Imprints; |
610 | |
611 | /* |
612 | * @+ Binary Association Tables |
613 | * Having gone to the previous preliminary definitions, we will now |
614 | * introduce the structure of Binary Association Tables (BATs) in |
615 | * detail. They are the basic storage unit on which GDK is modeled. |
616 | * |
617 | * The BAT holds an unlimited number of binary associations, called |
618 | * BUNs (@strong{Binary UNits}). The two attributes of a BUN are |
619 | * called @strong{head} (left) and @strong{tail} (right) in the |
620 | * remainder of this document. |
621 | * |
622 | * @c image{http://monetdb.cwi.nl/projects/monetdb-mk/imgs/bat1,,,,feps} |
623 | * |
624 | * The above figure shows what a BAT looks like. It consists of two |
625 | * columns, called head and tail, such that we have always binary |
626 | * tuples (BUNs). The overlooking structure is the @strong{BAT |
627 | * record}. It points to a heap structure called the @strong{BUN |
628 | * heap}. This heap contains the atomic values inside the two |
629 | * columns. If they are fixed-sized atoms, these atoms reside directly |
630 | * in the BUN heap. If they are variable-sized atoms (such as string |
631 | * or polygon), however, the columns has an extra heap for storing |
632 | * those (such @strong{variable-sized atom heaps} are then referred to |
633 | * as @strong{Head Heap}s and @strong{Tail Heap}s). The BUN heap then |
634 | * contains integer byte-offsets (fixed-sized, of course) into a head- |
635 | * or tail-heap. |
636 | * |
637 | * The BUN heap contains a contiguous range of BUNs. It starts after |
638 | * the @strong{first} pointer, and finishes at the end in the |
639 | * @strong{free} area of the BUN. All BUNs after the @strong{inserted} |
640 | * pointer have been added in the last transaction (and will be |
641 | * deleted on a transaction abort). All BUNs between the |
642 | * @strong{deleted} pointer and the @strong{first} have been deleted |
643 | * in this transaction (and will be reinserted at a transaction |
644 | * abort). |
645 | * |
646 | * The location of a certain BUN in a BAT may change between |
647 | * successive library routine invocations. Therefore, one should |
648 | * avoid keeping references into the BAT storage area for long |
649 | * periods. |
650 | * |
651 | * Passing values between the library routines and the enclosing C |
652 | * program is primarily through value pointers of type ptr. Pointers |
653 | * into the BAT storage area should only be used for retrieval. Direct |
654 | * updates of data stored in a BAT is forbidden. The user should |
655 | * adhere to the interface conventions to guarantee the integrity |
656 | * rules and to maintain the (hidden) auxiliary search structures. |
657 | * |
658 | * @- GDK variant record type |
659 | * When manipulating values, MonetDB puts them into value records. |
660 | * The built-in types have a direct entry in the union. Others should |
661 | * be represented as a pointer of memory in pval or as a string, which |
662 | * is basically the same. In such cases the len field indicates the |
663 | * size of this piece of memory. |
664 | */ |
665 | typedef struct { |
666 | union { /* storage is first in the record */ |
667 | int ival; |
668 | oid oval; |
669 | sht shval; |
670 | bte btval; |
671 | flt fval; |
672 | ptr pval; |
673 | bat bval; |
674 | str sval; |
675 | dbl dval; |
676 | lng lval; |
677 | #ifdef HAVE_HGE |
678 | hge hval; |
679 | #endif |
680 | } val; |
681 | size_t len; |
682 | int vtype; |
683 | } *ValPtr, ValRecord; |
684 | |
685 | /* interface definitions */ |
686 | gdk_export void *VALconvert(int typ, ValPtr t); |
687 | gdk_export char *VALformat(const ValRecord *res); |
688 | gdk_export ValPtr VALcopy(ValPtr dst, const ValRecord *src); |
689 | gdk_export ValPtr VALinit(ValPtr d, int tpe, const void *s); |
690 | gdk_export void VALempty(ValPtr v); |
691 | gdk_export void VALclear(ValPtr v); |
692 | gdk_export ValPtr VALset(ValPtr v, int t, void *p); |
693 | gdk_export void *VALget(ValPtr v); |
694 | gdk_export int VALcmp(const ValRecord *p, const ValRecord *q); |
695 | gdk_export int VALisnil(const ValRecord *v); |
696 | |
697 | /* |
698 | * @- The BAT record |
699 | * The elements of the BAT structure are introduced in the remainder. |
700 | * Instead of using the underlying types hidden beneath it, one should |
701 | * use a @emph{BAT} type that is supposed to look like this: |
702 | * @verbatim |
703 | * typedef struct { |
704 | * // static BAT properties |
705 | * bat batCacheid; // bat id: index in BBPcache |
706 | * bool batTransient; // persistence mode |
707 | * bool batCopiedtodisk; // BAT is saved on disk? |
708 | * // dynamic BAT properties |
709 | * int batHeat; // heat of BAT in the BBP |
710 | * bool batDirtydesc; // BAT descriptor specific dirty flag |
711 | * Heap* batBuns; // Heap where the buns are stored |
712 | * // DELTA status |
713 | * BUN batInserted; // first inserted BUN |
714 | * BUN batCount; // Tuple count |
715 | * // Tail properties |
716 | * int ttype; // Tail type number |
717 | * str tident; // name for tail column |
718 | * bool tkey; // tail values are unique |
719 | * bool tunique; // tail values must be kept unique |
720 | * bool tnonil; // tail has no nils |
721 | * bool tsorted; // are tail values currently ordered? |
722 | * bool tvarsized; // for speed: tail type is varsized? |
723 | * // Tail storage |
724 | * int tloc; // byte-offset in BUN for tail elements |
725 | * Heap *theap; // heap for varsized tail values |
726 | * Hash *thash; // linear chained hash table on tail |
727 | * Imprints *timprints; // column imprints index on tail |
728 | * orderidx torderidx; // order oid index on tail |
729 | * } BAT; |
730 | * @end verbatim |
731 | * |
732 | * The internal structure of the @strong{BAT} record is in fact much |
733 | * more complex, but GDK programmers should refrain of making use of |
734 | * that. |
735 | * |
736 | * Since we don't want to pay cost to keep both views in line with |
737 | * each other under BAT updates, we work with shared pieces of memory |
738 | * between the two views. An update to one will thus automatically |
739 | * update the other. In the same line, we allow @strong{synchronized |
740 | * BATs} (BATs with identical head columns, and marked as such in the |
741 | * @strong{BAT Alignment} interface) now to be clustered horizontally. |
742 | * |
743 | * @c image{http://monetdb.cwi.nl/projects/monetdb-mk/imgs/bat2,,,,feps} |
744 | */ |
745 | |
746 | typedef struct PROPrec PROPrec; |
747 | |
748 | /* see also comment near BATassertProps() for more information about |
749 | * the properties */ |
750 | typedef struct { |
751 | str id; /* label for column */ |
752 | |
753 | uint16_t width; /* byte-width of the atom array */ |
754 | int8_t type; /* type id. */ |
755 | uint8_t shift; /* log2 of bun width */ |
756 | bool varsized:1, /* varsized/void (true) or fixedsized (false) */ |
757 | key:1, /* no duplicate values present */ |
758 | unique:1, /* no duplicate values allowed */ |
759 | nonil:1, /* there are no nils in the column */ |
760 | nil:1, /* there is a nil in the column */ |
761 | sorted:1, /* column is sorted in ascending order */ |
762 | revsorted:1; /* column is sorted in descending order */ |
763 | BUN nokey[2]; /* positions that prove key==FALSE */ |
764 | BUN nosorted; /* position that proves sorted==FALSE */ |
765 | BUN norevsorted; /* position that proves revsorted==FALSE */ |
766 | oid seq; /* start of dense sequence */ |
767 | |
768 | Heap heap; /* space for the column. */ |
769 | Heap *vheap; /* space for the varsized data. */ |
770 | Hash *hash; /* hash table */ |
771 | Imprints *imprints; /* column imprints index */ |
772 | Heap *orderidx; /* order oid index */ |
773 | |
774 | PROPrec *props; /* list of dynamic properties stored in the bat descriptor */ |
775 | } COLrec; |
776 | |
777 | #define ORDERIDXOFF 3 |
778 | |
779 | /* assert that atom width is power of 2, i.e., width == 1<<shift */ |
780 | #define assert_shift_width(shift,width) assert(((shift) == 0 && (width) == 0) || ((unsigned)1<<(shift)) == (unsigned)(width)) |
781 | |
782 | #define GDKLIBRARY_TALIGN 061036U /* talign field in BBP.dir */ |
783 | #define GDKLIBRARY_NIL_NAN 061037U /* flt/dbl NIL not represented by NaN */ |
784 | #define GDKLIBRARY_BLOB_SORT 061040U /* blob compare changed */ |
785 | #define GDKLIBRARY_OLDDATE 061041U |
786 | #define GDKLIBRARY 061042U |
787 | |
788 | typedef struct BAT { |
789 | /* static bat properties */ |
790 | bat batCacheid; /* index into BBP */ |
791 | oid hseqbase; /* head seq base */ |
792 | |
793 | /* dynamic bat properties */ |
794 | MT_Id creator_tid; /* which thread created it */ |
795 | bool |
796 | batCopiedtodisk:1, /* once written */ |
797 | batDirtyflushed:1, /* was dirty before commit started? */ |
798 | batDirtydesc:1, /* bat descriptor dirty marker */ |
799 | batTransient:1; /* should the BAT persist on disk? */ |
800 | uint8_t /* adjacent bit fields are packed together (if they fit) */ |
801 | batRestricted:2; /* access privileges */ |
802 | role_t batRole; /* role of the bat */ |
803 | uint16_t unused; /* value=0 for now (sneakily used by mat.c) */ |
804 | int batSharecnt; /* incoming view count */ |
805 | |
806 | /* delta status administration */ |
807 | BUN batInserted; /* start of inserted elements */ |
808 | BUN batCount; /* tuple count */ |
809 | BUN batCapacity; /* tuple capacity */ |
810 | |
811 | /* dynamic column properties */ |
812 | COLrec T; /* column info */ |
813 | |
814 | MT_Lock batIdxLock; /* lock to manipulate indexes */ |
815 | } BAT; |
816 | |
817 | typedef struct BATiter { |
818 | BAT *b; |
819 | oid tvid; |
820 | } BATiter; |
821 | |
822 | /* macros to hide complexity of the BAT structure */ |
823 | #define ttype T.type |
824 | #define tkey T.key |
825 | #define tunique T.unique |
826 | #define tvarsized T.varsized |
827 | #define tseqbase T.seq |
828 | #define tsorted T.sorted |
829 | #define trevsorted T.revsorted |
830 | #define tident T.id |
831 | #define torderidx T.orderidx |
832 | #define twidth T.width |
833 | #define tshift T.shift |
834 | #define tnonil T.nonil |
835 | #define tnil T.nil |
836 | #define tnokey T.nokey |
837 | #define tnosorted T.nosorted |
838 | #define tnorevsorted T.norevsorted |
839 | #define theap T.heap |
840 | #define tvheap T.vheap |
841 | #define thash T.hash |
842 | #define timprints T.imprints |
843 | #define tprops T.props |
844 | |
845 | |
846 | |
847 | /* |
848 | * @- Heap Management |
849 | * Heaps are the low-level entities of mass storage in |
850 | * BATs. Currently, they can either be stored on disk, loaded into |
851 | * memory, or memory mapped. |
852 | * @multitable @columnfractions 0.08 0.7 |
853 | * @item int |
854 | * @tab |
855 | * HEAPalloc (Heap *h, size_t nitems, size_t itemsize); |
856 | * @item int |
857 | * @tab |
858 | * HEAPfree (Heap *h, bool remove); |
859 | * @item int |
860 | * @tab |
861 | * HEAPextend (Heap *h, size_t size, bool mayshare); |
862 | * @item int |
863 | * @tab |
864 | * HEAPload (Heap *h, str nme,ext, bool trunc); |
865 | * @item int |
866 | * @tab |
867 | * HEAPsave (Heap *h, str nme,ext); |
868 | * @item int |
869 | * @tab |
870 | * HEAPcopy (Heap *dst,*src); |
871 | * @item int |
872 | * @tab |
873 | * HEAPdelete (Heap *dst, str o, str ext); |
874 | * @item int |
875 | * @tab |
876 | * HEAPwarm (Heap *h); |
877 | * @end multitable |
878 | * |
879 | * |
880 | * These routines should be used to alloc free or extend heaps; they |
881 | * isolate you from the different ways heaps can be accessed. |
882 | */ |
883 | gdk_export gdk_return HEAPextend(Heap *h, size_t size, bool mayshare) |
884 | __attribute__((__warn_unused_result__)); |
885 | gdk_export size_t HEAPvmsize(Heap *h); |
886 | gdk_export size_t HEAPmemsize(Heap *h); |
887 | |
888 | /* |
889 | * @- Internal HEAP Chunk Management |
890 | * Heaps are used in BATs to store data for variable-size atoms. The |
891 | * implementor must manage malloc()/free() functionality for atoms in |
892 | * this heap. A standard implementation is provided here. |
893 | * |
894 | * @table @code |
895 | * @item void |
896 | * HEAP_initialize (Heap* h, size_t nbytes, size_t nprivate, int align ) |
897 | * @item void |
898 | * HEAP_destroy (Heap* h) |
899 | * @item var_t |
900 | * HEAP_malloc (Heap* heap, size_t nbytes) |
901 | * @item void |
902 | * HEAP_free (Heap *heap, var_t block) |
903 | * @item int |
904 | * HEAP_private (Heap* h) |
905 | * @item void |
906 | * HEAP_printstatus (Heap* h) |
907 | * @end table |
908 | * |
909 | * The heap space starts with a private space that is left untouched |
910 | * by the normal chunk allocation. You can use this private space |
911 | * e.g. to store the root of an rtree HEAP_malloc allocates a chunk of |
912 | * memory on the heap, and returns an index to it. HEAP_free frees a |
913 | * previously allocated chunk HEAP_private returns an integer index to |
914 | * private space. |
915 | */ |
916 | |
917 | gdk_export void HEAP_initialize( |
918 | Heap *heap, /* nbytes -- Initial size of the heap. */ |
919 | size_t nbytes, /* alignment -- for objects on the heap. */ |
920 | size_t nprivate, /* nprivate -- Size of private space */ |
921 | int alignment /* alignment restriction for allocated chunks */ |
922 | ); |
923 | |
924 | gdk_export var_t HEAP_malloc(Heap *heap, size_t nbytes); |
925 | gdk_export void HEAP_free(Heap *heap, var_t block); |
926 | |
927 | /* |
928 | * @- BAT construction |
929 | * @multitable @columnfractions 0.08 0.7 |
930 | * @item @code{BAT* } |
931 | * @tab COLnew (oid headseq, int tailtype, BUN cap, role_t role) |
932 | * @item @code{BAT* } |
933 | * @tab BATextend (BAT *b, BUN newcap) |
934 | * @end multitable |
935 | * |
936 | * A temporary BAT is instantiated using COLnew with the type aliases |
937 | * of the required binary association. The aliases include the |
938 | * built-in types, such as TYPE_int....TYPE_ptr, and the atomic types |
939 | * introduced by the user. The initial capacity to be accommodated |
940 | * within a BAT is indicated by cap. Their extend is automatically |
941 | * incremented upon storage overflow. Failure to create the BAT |
942 | * results in a NULL pointer. |
943 | * |
944 | * The routine BATclone creates an empty BAT storage area with the |
945 | * properties inherited from its argument. |
946 | */ |
947 | #define BATDELETE (-9999) |
948 | |
949 | gdk_export BAT *COLnew(oid hseq, int tltype, BUN capacity, role_t role) |
950 | __attribute__((__warn_unused_result__)); |
951 | gdk_export BAT *BATdense(oid hseq, oid tseq, BUN cnt) |
952 | __attribute__((__warn_unused_result__)); |
953 | gdk_export gdk_return BATextend(BAT *b, BUN newcap) |
954 | __attribute__((__warn_unused_result__)); |
955 | |
956 | /* internal */ |
957 | gdk_export uint8_t ATOMelmshift(int sz); |
958 | |
959 | /* |
960 | * @- BUN manipulation |
961 | * @multitable @columnfractions 0.08 0.7 |
962 | * @item BAT* |
963 | * @tab BATappend (BAT *b, BAT *n, BAT *s, bool force) |
964 | * @item BAT* |
965 | * @tab BUNappend (BAT *b, ptr right, bool force) |
966 | * @item BAT* |
967 | * @tab BUNreplace (BAT *b, oid left, ptr right, bool force) |
968 | * @item int |
969 | * @tab BUNfnd (BAT *b, ptr tail) |
970 | * @item BUN |
971 | * @tab BUNlocate (BAT *b, ptr head, ptr tail) |
972 | * @item ptr |
973 | * @tab BUNtail (BAT *b, BUN p) |
974 | * @end multitable |
975 | * |
976 | * The BATs contain a number of fixed-sized slots to store the binary |
977 | * associations. These slots are called BUNs or BAT units. A BUN |
978 | * variable is a pointer into the storage area of the BAT, but it has |
979 | * limited validity. After a BAT modification, previously obtained |
980 | * BUNs may no longer reside at the same location. |
981 | * |
982 | * The association list does not contain holes. This density permits |
983 | * users to quickly access successive elements without the need to |
984 | * test the items for validity. Moreover, it simplifies transport to |
985 | * disk and other systems. The negative effect is that the user should |
986 | * be aware of the evolving nature of the sequence, which may require |
987 | * copying the BAT first. |
988 | * |
989 | * The update operations come in two flavors: BUNappend and |
990 | * BUNreplace. The batch version of BUNappend is BATappend. |
991 | * |
992 | * The routine BUNfnd provides fast access to a single BUN providing a |
993 | * value for the tail of the binary association. |
994 | * |
995 | * The routine BUNtail returns a pointer to the second value in an |
996 | * association. To guard against side effects on the BAT, one should |
997 | * normally copy this value into a scratch variable for further |
998 | * processing. |
999 | * |
1000 | * Behind the interface we use several macros to access the BUN fixed |
1001 | * part and the variable part. The BUN operators always require a BAT |
1002 | * pointer and BUN identifier. |
1003 | * @itemize |
1004 | * @item |
1005 | * BATttype(b) finds out the type of a BAT. |
1006 | * @item |
1007 | * BUNlast(b) returns the BUN pointer directly after the last BUN |
1008 | * in the BAT. |
1009 | * @end itemize |
1010 | */ |
1011 | /* NOTE: `p' is evaluated after a possible upgrade of the heap */ |
1012 | #if SIZEOF_VAR_T == 8 |
1013 | #define Tputvalue(b, p, v, copyall) \ |
1014 | do { \ |
1015 | if ((b)->tvarsized && (b)->ttype) { \ |
1016 | var_t _d; \ |
1017 | void *_ptr; \ |
1018 | ATOMputVAR((b)->ttype, (b)->tvheap, &_d, v); \ |
1019 | if ((b)->twidth < SIZEOF_VAR_T && \ |
1020 | ((b)->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 << (8 * (b)->twidth))) { \ |
1021 | /* doesn't fit in current heap, upgrade it */ \ |
1022 | if (GDKupgradevarheap((b), _d, (copyall), (b)->batRestricted == BAT_READ) != GDK_SUCCEED) \ |
1023 | goto bunins_failed; \ |
1024 | } \ |
1025 | _ptr = (p); \ |
1026 | switch ((b)->twidth) { \ |
1027 | case 1: \ |
1028 | * (uint8_t *) _ptr = (uint8_t) (_d - GDK_VAROFFSET); \ |
1029 | break; \ |
1030 | case 2: \ |
1031 | * (uint16_t *) _ptr = (uint16_t) (_d - GDK_VAROFFSET); \ |
1032 | break; \ |
1033 | case 4: \ |
1034 | * (uint32_t *) _ptr = (uint32_t) _d; \ |
1035 | break; \ |
1036 | case 8: \ |
1037 | * (var_t *) _ptr = _d; \ |
1038 | break; \ |
1039 | } \ |
1040 | } else { \ |
1041 | ATOMputFIX((b)->ttype, (p), v); \ |
1042 | } \ |
1043 | } while (false) |
1044 | #else |
1045 | #define Tputvalue(b, p, v, copyall) \ |
1046 | do { \ |
1047 | if ((b)->tvarsized && (b)->ttype) { \ |
1048 | var_t _d; \ |
1049 | void *_ptr; \ |
1050 | ATOMputVAR((b)->ttype, (b)->tvheap, &_d, v); \ |
1051 | if ((b)->twidth < SIZEOF_VAR_T && \ |
1052 | ((b)->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 << (8 * (b)->twidth))) { \ |
1053 | /* doesn't fit in current heap, upgrade it */ \ |
1054 | if (GDKupgradevarheap((b), _d, (copyall), (b)->batRestricted == BAT_READ) != GDK_SUCCEED) \ |
1055 | goto bunins_failed; \ |
1056 | } \ |
1057 | _ptr = (p); \ |
1058 | switch ((b)->twidth) { \ |
1059 | case 1: \ |
1060 | * (uint8_t *) _ptr = (uint8_t) (_d - GDK_VAROFFSET); \ |
1061 | break; \ |
1062 | case 2: \ |
1063 | * (uint16_t *) _ptr = (uint16_t) (_d - GDK_VAROFFSET); \ |
1064 | break; \ |
1065 | case 4: \ |
1066 | * (var_t *) _ptr = _d; \ |
1067 | break; \ |
1068 | } \ |
1069 | } else { \ |
1070 | ATOMputFIX((b)->ttype, (p), v); \ |
1071 | } \ |
1072 | } while (false) |
1073 | #endif |
1074 | #define tfastins_nocheck(b, p, v, s) \ |
1075 | do { \ |
1076 | (b)->theap.free += (s); \ |
1077 | Tputvalue((b), Tloc((b), (p)), (v), false); \ |
1078 | } while (false) |
1079 | |
1080 | #define bunfastapp_nocheck(b, p, v, ts) \ |
1081 | do { \ |
1082 | tfastins_nocheck(b, p, v, ts); \ |
1083 | (b)->batCount++; \ |
1084 | } while (false) |
1085 | |
1086 | #define bunfastapp(b, v) \ |
1087 | do { \ |
1088 | if (BATcount(b) >= BATcapacity(b)) { \ |
1089 | if (BATcount(b) == BUN_MAX) { \ |
1090 | GDKerror("bunfastapp: too many elements to accomodate (" BUNFMT ")\n", BUN_MAX); \ |
1091 | goto bunins_failed; \ |
1092 | } \ |
1093 | if (BATextend((b), BATgrows(b)) != GDK_SUCCEED) \ |
1094 | goto bunins_failed; \ |
1095 | } \ |
1096 | bunfastapp_nocheck(b, (b)->batCount, v, Tsize(b)); \ |
1097 | } while (false) |
1098 | |
1099 | #define bunfastappTYPE(TYPE, b, v) \ |
1100 | do { \ |
1101 | if (BATcount(b) >= BATcapacity(b)) { \ |
1102 | if (BATcount(b) == BUN_MAX) { \ |
1103 | GDKerror("bunfastapp: too many elements to accomodate (" BUNFMT ")\n", BUN_MAX); \ |
1104 | goto bunins_failed; \ |
1105 | } \ |
1106 | if (BATextend((b), BATgrows(b)) != GDK_SUCCEED) \ |
1107 | goto bunins_failed; \ |
1108 | } \ |
1109 | (b)->theap.free += sizeof(TYPE); \ |
1110 | ((TYPE *) (b)->theap.base)[(b)->batCount++] = * (const TYPE *) (v); \ |
1111 | } while (false) |
1112 | |
1113 | #define tfastins_nocheckVAR(b, p, v, s) \ |
1114 | do { \ |
1115 | var_t _d; \ |
1116 | (b)->theap.free += (s); \ |
1117 | ATOMputVAR((b)->ttype, (b)->tvheap, &_d, v); \ |
1118 | if ((b)->twidth < SIZEOF_VAR_T && \ |
1119 | ((b)->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 << (8 * (b)->twidth))) { \ |
1120 | /* doesn't fit in current heap, upgrade it */ \ |
1121 | if (GDKupgradevarheap((b), _d, false, (b)->batRestricted == BAT_READ) != GDK_SUCCEED) \ |
1122 | goto bunins_failed; \ |
1123 | } \ |
1124 | switch ((b)->twidth) { \ |
1125 | case 1: \ |
1126 | ((uint8_t *) (b)->theap.base)[p] = (uint8_t) (_d - GDK_VAROFFSET); \ |
1127 | break; \ |
1128 | case 2: \ |
1129 | ((uint16_t *) (b)->theap.base)[p] = (uint16_t) (_d - GDK_VAROFFSET); \ |
1130 | break; \ |
1131 | case 4: \ |
1132 | ((uint32_t *) (b)->theap.base)[p] = (uint32_t) _d; \ |
1133 | break; \ |
1134 | case 8: /* superfluous on 32-bit archs */ \ |
1135 | ((uint64_t *) (b)->theap.base)[p] = (uint64_t) _d; \ |
1136 | break; \ |
1137 | } \ |
1138 | } while (false) |
1139 | |
1140 | #define bunfastapp_nocheckVAR(b, p, v, ts) \ |
1141 | do { \ |
1142 | tfastins_nocheckVAR(b, p, v, ts); \ |
1143 | (b)->batCount++; \ |
1144 | } while (false) |
1145 | |
1146 | #define bunfastappVAR(b, v) \ |
1147 | do { \ |
1148 | if ((b)->batCount >= BATcapacity(b)) { \ |
1149 | if ((b)->batCount == BUN_MAX || BATcount(b) == BUN_MAX) { \ |
1150 | GDKerror("bunfastapp: too many elements to accomodate (" BUNFMT ")\n", BUN_MAX); \ |
1151 | goto bunins_failed; \ |
1152 | } \ |
1153 | if (BATextend((b), BATgrows(b)) != GDK_SUCCEED) \ |
1154 | goto bunins_failed; \ |
1155 | } \ |
1156 | bunfastapp_nocheckVAR(b, (b)->batCount, v, Tsize(b)); \ |
1157 | } while (false) |
1158 | |
1159 | gdk_export gdk_return GDKupgradevarheap(BAT *b, var_t v, bool copyall, bool mayshare) |
1160 | __attribute__((__warn_unused_result__)); |
1161 | gdk_export gdk_return BUNappend(BAT *b, const void *right, bool force) |
1162 | __attribute__((__warn_unused_result__)); |
1163 | gdk_export gdk_return BATappend(BAT *b, BAT *n, BAT *s, bool force) |
1164 | __attribute__((__warn_unused_result__)); |
1165 | |
1166 | gdk_export gdk_return BUNdelete(BAT *b, oid o) |
1167 | __attribute__((__warn_unused_result__)); |
1168 | gdk_export gdk_return BATdel(BAT *b, BAT *d) |
1169 | __attribute__((__warn_unused_result__)); |
1170 | |
1171 | gdk_export gdk_return BUNinplace(BAT *b, BUN p, const void *right, bool force) |
1172 | __attribute__((__warn_unused_result__)); |
1173 | gdk_export gdk_return BATreplace(BAT *b, BAT *p, BAT *n, bool force) |
1174 | __attribute__((__warn_unused_result__)); |
1175 | |
1176 | /* Functions to perform a binary search on a sorted BAT. |
1177 | * See gdk_search.c for details. */ |
1178 | gdk_export BUN SORTfnd(BAT *b, const void *v); |
1179 | gdk_export BUN SORTfndfirst(BAT *b, const void *v); |
1180 | gdk_export BUN SORTfndlast(BAT *b, const void *v); |
1181 | |
1182 | gdk_export BUN ORDERfnd(BAT *b, const void *v); |
1183 | gdk_export BUN ORDERfndfirst(BAT *b, const void *v); |
1184 | gdk_export BUN ORDERfndlast(BAT *b, const void *v); |
1185 | |
1186 | gdk_export BUN BUNfnd(BAT *b, const void *right); |
1187 | |
1188 | #define BUNfndVOID(b, v) \ |
1189 | ((is_oid_nil(*(const oid*)(v)) ^ is_oid_nil((b)->tseqbase)) | \ |
1190 | (*(const oid*)(v) < (b)->tseqbase) | \ |
1191 | (*(const oid*)(v) >= (b)->tseqbase + (b)->batCount) ? \ |
1192 | BUN_NONE : \ |
1193 | (BUN) (*(const oid*)(v) - (b)->tseqbase)) |
1194 | |
1195 | #define BATttype(b) (BATtdense(b) ? TYPE_oid : (b)->ttype) |
1196 | #define Tbase(b) ((b)->tvheap->base) |
1197 | |
1198 | #define Tsize(b) ((b)->twidth) |
1199 | |
1200 | #define tailsize(b,p) ((b)->ttype?((size_t)(p))<<(b)->tshift:0) |
1201 | |
1202 | #define Tloc(b,p) ((void *)((b)->theap.base+(((size_t)(p))<<(b)->tshift))) |
1203 | |
1204 | typedef var_t stridx_t; |
1205 | #define SIZEOF_STRIDX_T SIZEOF_VAR_T |
1206 | #define GDK_VARALIGN SIZEOF_STRIDX_T |
1207 | |
1208 | #if SIZEOF_VAR_T == 8 |
1209 | #define VarHeapValRaw(b,p,w) \ |
1210 | ((w) == 1 ? (var_t) ((uint8_t *) (b))[p] + GDK_VAROFFSET : \ |
1211 | (w) == 2 ? (var_t) ((uint16_t *) (b))[p] + GDK_VAROFFSET : \ |
1212 | (w) == 4 ? (var_t) ((uint32_t *) (b))[p] : \ |
1213 | ((var_t *) (b))[p]) |
1214 | #else |
1215 | #define VarHeapValRaw(b,p,w) \ |
1216 | ((w) == 1 ? (var_t) ((uint8_t *) (b))[p] + GDK_VAROFFSET : \ |
1217 | (w) == 2 ? (var_t) ((uint16_t *) (b))[p] + GDK_VAROFFSET : \ |
1218 | ((var_t *) (b))[p]) |
1219 | #endif |
1220 | #define VarHeapVal(b,p,w) ((size_t) VarHeapValRaw(b,p,w)) |
1221 | #define BUNtvaroff(bi,p) VarHeapVal((bi).b->theap.base, (p), (bi).b->twidth) |
1222 | |
1223 | #define BUNtloc(bi,p) Tloc((bi).b,p) |
1224 | #define BUNtpos(bi,p) Tpos(&(bi),p) |
1225 | #define BUNtvar(bi,p) (assert((bi).b->ttype && (bi).b->tvarsized), (void *) (Tbase((bi).b)+BUNtvaroff(bi,p))) |
1226 | #define BUNtail(bi,p) ((bi).b->ttype?(bi).b->tvarsized?BUNtvar(bi,p):BUNtloc(bi,p):BUNtpos(bi,p)) |
1227 | |
1228 | #define BUNlast(b) (assert((b)->batCount <= BUN_MAX), (b)->batCount) |
1229 | |
1230 | #define BATcount(b) ((b)->batCount) |
1231 | |
1232 | /* return the oid value at BUN position p from the (v)oid bat b |
1233 | * works with any TYPE_void or TYPE_oid bat */ |
1234 | static inline oid |
1235 | BUNtoid(BAT *b, BUN p) |
1236 | { |
1237 | assert(ATOMtype(b->ttype) == TYPE_oid); |
1238 | /* BATcount is the number of valid entries, so with |
1239 | * exceptions, the last value can well be larger than |
1240 | * b->tseqbase + BATcount(b) */ |
1241 | assert(p < BATcount(b)); |
1242 | assert(b->ttype == TYPE_void || b->tvheap == NULL); |
1243 | if (is_oid_nil(b->tseqbase)) { |
1244 | if (b->ttype == TYPE_void) |
1245 | return b->tseqbase; |
1246 | return ((const oid *) (b)->theap.base)[p]; |
1247 | } |
1248 | oid o = b->tseqbase + p; |
1249 | if (b->ttype == TYPE_oid || b->tvheap == NULL) { |
1250 | return o; |
1251 | } |
1252 | /* only exceptions allowed on transient BATs */ |
1253 | assert(b->batRole == TRANSIENT); |
1254 | /* make sure exception area is a reasonable size */ |
1255 | assert(b->tvheap->free % SIZEOF_OID == 0); |
1256 | BUN nexc = (BUN) (b->tvheap->free / SIZEOF_OID); |
1257 | if (nexc == 0) { |
1258 | /* no exceptions (why the vheap?) */ |
1259 | return o; |
1260 | } |
1261 | const oid *exc = (oid *) b->tvheap->base; |
1262 | if (o < exc[0]) |
1263 | return o; |
1264 | if (o + nexc > exc[nexc - 1]) |
1265 | return o + nexc; |
1266 | BUN lo = 0, hi = nexc - 1; |
1267 | while (hi - lo > 1) { |
1268 | BUN mid = (hi + lo) / 2; |
1269 | if (exc[mid] - mid > o) |
1270 | hi = mid; |
1271 | else |
1272 | lo = mid; |
1273 | } |
1274 | return o + hi; |
1275 | } |
1276 | |
1277 | #define bat_iterator(_b) ((BATiter) {.b = (_b), .tvid = 0}) |
1278 | |
1279 | /* |
1280 | * @- BAT properties |
1281 | * @multitable @columnfractions 0.08 0.7 |
1282 | * @item BUN |
1283 | * @tab BATcount (BAT *b) |
1284 | * @item void |
1285 | * @tab BATsetcapacity (BAT *b, BUN cnt) |
1286 | * @item void |
1287 | * @tab BATsetcount (BAT *b, BUN cnt) |
1288 | * @item BAT * |
1289 | * @tab BATkey (BAT *b, bool onoff) |
1290 | * @item BAT * |
1291 | * @tab BATmode (BAT *b, bool transient) |
1292 | * @item BAT * |
1293 | * @tab BATsetaccess (BAT *b, restrict_t mode) |
1294 | * @item int |
1295 | * @tab BATdirty (BAT *b) |
1296 | * @item restrict_t |
1297 | * @tab BATgetaccess (BAT *b) |
1298 | * @end multitable |
1299 | * |
1300 | * The function BATcount returns the number of associations stored in |
1301 | * the BAT. |
1302 | * |
1303 | * The BAT is given a new logical name using BBPrename. |
1304 | * |
1305 | * The integrity properties to be maintained for the BAT are |
1306 | * controlled separately. A key property indicates that duplicates in |
1307 | * the association dimension are not permitted. |
1308 | * |
1309 | * The persistency indicator tells the retention period of BATs. The |
1310 | * system support two modes: PERSISTENT and TRANSIENT. |
1311 | * The PERSISTENT BATs are automatically saved upon session boundary |
1312 | * or transaction commit. TRANSIENT BATs are removed upon transaction |
1313 | * boundary. All BATs are initially TRANSIENT unless their mode is |
1314 | * changed using the routine BATmode. |
1315 | * |
1316 | * The BAT properties may be changed at any time using BATkey |
1317 | * and BATmode. |
1318 | * |
1319 | * Valid BAT access properties can be set with BATsetaccess and |
1320 | * BATgetaccess: BAT_READ, BAT_APPEND, and BAT_WRITE. BATs can be |
1321 | * designated to be read-only. In this case some memory optimizations |
1322 | * may be made (slice and fragment bats can point to stable subsets of |
1323 | * a parent bat). A special mode is append-only. It is then allowed |
1324 | * to insert BUNs at the end of the BAT, but not to modify anything |
1325 | * that already was in there. |
1326 | */ |
1327 | gdk_export BUN BATcount_no_nil(BAT *b); |
1328 | gdk_export void BATsetcapacity(BAT *b, BUN cnt); |
1329 | gdk_export void BATsetcount(BAT *b, BUN cnt); |
1330 | gdk_export BUN BATgrows(BAT *b); |
1331 | gdk_export gdk_return BATkey(BAT *b, bool onoff); |
1332 | gdk_export gdk_return BATmode(BAT *b, bool transient); |
1333 | gdk_export gdk_return BATroles(BAT *b, const char *tnme); |
1334 | gdk_export void BAThseqbase(BAT *b, oid o); |
1335 | gdk_export void BATtseqbase(BAT *b, oid o); |
1336 | |
1337 | /* The batRestricted field indicates whether a BAT is readonly. |
1338 | * we have modes: BAT_WRITE = all permitted |
1339 | * BAT_APPEND = append-only |
1340 | * BAT_READ = read-only |
1341 | * VIEW bats are always mapped read-only. |
1342 | */ |
1343 | typedef enum { |
1344 | BAT_WRITE, /* all kinds of access allowed */ |
1345 | BAT_READ, /* only read-access allowed */ |
1346 | BAT_APPEND, /* only reads and appends allowed */ |
1347 | } restrict_t; |
1348 | |
1349 | gdk_export gdk_return BATsetaccess(BAT *b, restrict_t mode); |
1350 | gdk_export restrict_t BATgetaccess(BAT *b); |
1351 | |
1352 | |
1353 | #define BATdirty(b) (!(b)->batCopiedtodisk || \ |
1354 | (b)->batDirtydesc || \ |
1355 | (b)->theap.dirty || \ |
1356 | ((b)->tvheap != NULL && (b)->tvheap->dirty)) |
1357 | |
1358 | #define BATcapacity(b) (b)->batCapacity |
1359 | /* |
1360 | * @- BAT manipulation |
1361 | * @multitable @columnfractions 0.08 0.7 |
1362 | * @item BAT * |
1363 | * @tab BATclear (BAT *b, bool force) |
1364 | * @item BAT * |
1365 | * @tab COLcopy (BAT *b, int tt, bool writeable, role_t role) |
1366 | * @end multitable |
1367 | * |
1368 | * The routine BATclear removes the binary associations, leading to an |
1369 | * empty, but (re-)initialized BAT. Its properties are retained. A |
1370 | * temporary copy is obtained with Colcopy. The new BAT has an unique |
1371 | * name. |
1372 | */ |
1373 | gdk_export gdk_return BATclear(BAT *b, bool force); |
1374 | gdk_export BAT *COLcopy(BAT *b, int tt, bool writable, role_t role); |
1375 | |
1376 | gdk_export gdk_return BATgroup(BAT **groups, BAT **extents, BAT **histo, BAT *b, BAT *s, BAT *g, BAT *e, BAT *h) |
1377 | __attribute__((__warn_unused_result__)); |
1378 | |
1379 | /* |
1380 | * @- BAT Input/Output |
1381 | * @multitable @columnfractions 0.08 0.7 |
1382 | * @item BAT * |
1383 | * @tab BATload (str name) |
1384 | * @item BAT * |
1385 | * @tab BATsave (BAT *b) |
1386 | * @item int |
1387 | * @tab BATdelete (BAT *b) |
1388 | * @end multitable |
1389 | * |
1390 | * A BAT created by COLnew is considered temporary until one calls the |
1391 | * routine BATsave or BATmode. This routine reserves disk space and |
1392 | * checks for name clashes in the BAT directory. It also makes the BAT |
1393 | * persistent. The empty BAT is initially marked as ordered on both |
1394 | * columns. |
1395 | * |
1396 | * Failure to read or write the BAT results in a NULL, otherwise it |
1397 | * returns the BAT pointer. |
1398 | * |
1399 | * @- Heap Storage Modes |
1400 | * The discriminative storage modes are memory-mapped, compressed, or |
1401 | * loaded in memory. As can be seen in the bat record, each BAT has |
1402 | * one BUN-heap (@emph{bn}), and possibly two heaps (@emph{hh} and |
1403 | * @emph{th}) for variable-sized atoms. |
1404 | */ |
1405 | |
1406 | gdk_export gdk_return BATsave(BAT *b) |
1407 | __attribute__((__warn_unused_result__)); |
1408 | gdk_export void BATmsync(BAT *b); |
1409 | |
1410 | #define NOFARM (-1) /* indicate to GDKfilepath to create relative path */ |
1411 | |
1412 | gdk_export char *GDKfilepath(int farmid, const char *dir, const char *nme, const char *ext); |
1413 | gdk_export bool GDKinmemory(void); |
1414 | gdk_export gdk_return GDKcreatedir(const char *nme); |
1415 | |
1416 | gdk_export void OIDXdestroy(BAT *b); |
1417 | |
1418 | /* |
1419 | * @- Printing |
1420 | * @multitable @columnfractions 0.08 0.7 |
1421 | * @item int |
1422 | * @tab BATprintcolumns (stream *f, int argc, BAT *b[]); |
1423 | * @end multitable |
1424 | * |
1425 | * The functions to convert BATs into ASCII. They are primarily meant for ease of |
1426 | * debugging and to a lesser extent for output processing. Printing a |
1427 | * BAT is done essentially by looping through its components, printing |
1428 | * each association. |
1429 | * |
1430 | */ |
1431 | gdk_export gdk_return BATprintcolumns(stream *s, int argc, BAT *argv[]); |
1432 | gdk_export gdk_return BATprint(stream *s, BAT *b); |
1433 | |
1434 | /* |
1435 | * @- BAT clustering |
1436 | * @multitable @columnfractions 0.08 0.7 |
1437 | * @item bool |
1438 | * @tab BATordered (BAT *b) |
1439 | * @end multitable |
1440 | * |
1441 | * When working in a main-memory situation, clustering of data on |
1442 | * disk-pages is not important. Whenever mmap()-ed data is used |
1443 | * intensively, reducing the number of page faults is a hot issue. |
1444 | * |
1445 | * The above functions rearrange data in MonetDB heaps (used for |
1446 | * storing BUNs var-sized atoms, or accelerators). Applying these |
1447 | * clusterings will allow that MonetDB's main-memory oriented |
1448 | * algorithms work efficiently also in a disk-oriented context. |
1449 | * |
1450 | * BATordered starts a check on the tail values to see if they are |
1451 | * ordered. The result is returned and stored in the tsorted field of |
1452 | * the BAT. |
1453 | */ |
1454 | gdk_export bool BATkeyed(BAT *b); |
1455 | gdk_export bool BATordered(BAT *b); |
1456 | gdk_export bool BATordered_rev(BAT *b); |
1457 | gdk_export gdk_return BATsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable) |
1458 | __attribute__((__warn_unused_result__)); |
1459 | |
1460 | |
1461 | gdk_export void GDKqsort(void *restrict h, void *restrict t, const void *restrict base, size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast); |
1462 | |
1463 | #define BATtordered(b) ((b)->tsorted) |
1464 | #define BATtrevordered(b) ((b)->trevsorted) |
1465 | /* BAT is dense (i.e., BATtvoid() is true and tseqbase is not NIL) */ |
1466 | #define BATtdense(b) (!is_oid_nil((b)->tseqbase) && \ |
1467 | ((b)->tvheap == NULL || (b)->tvheap->free == 0)) |
1468 | /* BATtvoid: BAT can be (or actually is) represented by TYPE_void */ |
1469 | #define BATtvoid(b) (BATtdense(b) || (b)->ttype==TYPE_void) |
1470 | #define BATtkey(b) ((b)->tkey || BATtdense(b)) |
1471 | |
1472 | /* set some properties that are trivial to deduce */ |
1473 | #define BATsettrivprop(b) \ |
1474 | do { \ |
1475 | assert(!is_oid_nil((b)->hseqbase)); \ |
1476 | (b)->batDirtydesc = true; /* likely already set */ \ |
1477 | assert(is_oid_nil((b)->tseqbase) || \ |
1478 | ATOMtype((b)->ttype) == TYPE_oid); \ |
1479 | if ((b)->ttype == TYPE_void) { \ |
1480 | if (is_oid_nil((b)->tseqbase)) { \ |
1481 | (b)->tnonil = (b)->batCount == 0; \ |
1482 | (b)->tnil = !(b)->tnonil; \ |
1483 | (b)->trevsorted = true; \ |
1484 | (b)->tkey = (b)->batCount <= 1; \ |
1485 | } else { \ |
1486 | (b)->tnonil = true; \ |
1487 | (b)->tnil = false; \ |
1488 | (b)->tkey = true; \ |
1489 | (b)->trevsorted = (b)->batCount <= 1; \ |
1490 | } \ |
1491 | (b)->tsorted = true; \ |
1492 | } else if ((b)->batCount <= 1) { \ |
1493 | if (ATOMlinear((b)->ttype)) { \ |
1494 | (b)->tsorted = true; \ |
1495 | (b)->trevsorted = true; \ |
1496 | } \ |
1497 | (b)->tkey = true; \ |
1498 | if ((b)->batCount == 0) { \ |
1499 | (b)->tnonil = true; \ |
1500 | (b)->tnil = false; \ |
1501 | if ((b)->ttype == TYPE_oid) { \ |
1502 | (b)->tseqbase = 0; \ |
1503 | } \ |
1504 | } else if ((b)->ttype == TYPE_oid) { \ |
1505 | /* b->batCount == 1 */ \ |
1506 | oid sqbs = ((const oid *) (b)->theap.base)[0]; \ |
1507 | if (is_oid_nil(sqbs)) { \ |
1508 | (b)->tnonil = false; \ |
1509 | (b)->tnil = true; \ |
1510 | } else { \ |
1511 | (b)->tnonil = true; \ |
1512 | (b)->tnil = false; \ |
1513 | } \ |
1514 | (b)->tseqbase = sqbs; \ |
1515 | } \ |
1516 | } else if ((b)->batCount == 2 && ATOMlinear((b)->ttype)) { \ |
1517 | int _c_; \ |
1518 | if ((b)->tvarsized) \ |
1519 | _c_ = ATOMcmp((b)->ttype, \ |
1520 | Tbase(b) + VarHeapVal((b)->theap.base, 0, (b)->twidth), \ |
1521 | Tbase(b) + VarHeapVal((b)->theap.base, 1, (b)->twidth)); \ |
1522 | else \ |
1523 | _c_ = ATOMcmp((b)->ttype, \ |
1524 | Tloc((b), 0), \ |
1525 | Tloc((b), 1)); \ |
1526 | (b)->tsorted = _c_ <= 0; \ |
1527 | (b)->tnosorted = !(b)->tsorted; \ |
1528 | (b)->trevsorted = _c_ >= 0; \ |
1529 | (b)->tnorevsorted = !(b)->trevsorted; \ |
1530 | (b)->tkey = _c_ != 0; \ |
1531 | (b)->tnokey[0] = 0; \ |
1532 | (b)->tnokey[1] = !(b)->tkey; \ |
1533 | } else if (!ATOMlinear((b)->ttype)) { \ |
1534 | (b)->tsorted = false; \ |
1535 | (b)->trevsorted = false; \ |
1536 | } \ |
1537 | } while (false) |
1538 | |
1539 | /* |
1540 | * @+ BAT Buffer Pool |
1541 | * @multitable @columnfractions 0.08 0.7 |
1542 | * @item int |
1543 | * @tab BBPfix (bat bi) |
1544 | * @item int |
1545 | * @tab BBPunfix (bat bi) |
1546 | * @item int |
1547 | * @tab BBPretain (bat bi) |
1548 | * @item int |
1549 | * @tab BBPrelease (bat bi) |
1550 | * @item str |
1551 | * @tab BBPname (bat bi) |
1552 | * @item bat |
1553 | * @tab BBPindex (str nme) |
1554 | * @item BAT* |
1555 | * @tab BATdescriptor (bat bi) |
1556 | * @end multitable |
1557 | * |
1558 | * The BAT Buffer Pool module contains the code to manage the storage |
1559 | * location of BATs. |
1560 | * |
1561 | * The remaining BBP tables contain status information to load, swap |
1562 | * and migrate the BATs. The core table is BBPcache which contains a |
1563 | * pointer to the BAT descriptor with its heaps. A zero entry means |
1564 | * that the file resides on disk. Otherwise it has been read or mapped |
1565 | * into memory. |
1566 | * |
1567 | * BATs loaded into memory are retained in a BAT buffer pool. They |
1568 | * retain their position within the cache during their life cycle, |
1569 | * which make indexing BATs a stable operation. |
1570 | * |
1571 | * The BBPindex routine checks if a BAT with a certain name is |
1572 | * registered in the buffer pools. If so, it returns its BAT id. The |
1573 | * BATdescriptor routine has a BAT id parameter, and returns a pointer |
1574 | * to the corresponding BAT record (after incrementing the reference |
1575 | * count). The BAT will be loaded into memory, if necessary. |
1576 | * |
1577 | * The structure of the BBP file obeys the tuple format for GDK. |
1578 | * |
1579 | * The status and BAT persistency information is encoded in the status |
1580 | * field. |
1581 | */ |
1582 | typedef struct { |
1583 | BAT *cache; /* if loaded: BAT* handle */ |
1584 | char *logical; /* logical name (may point at bak) */ |
1585 | char bak[16]; /* logical name backup (tmp_%o) */ |
1586 | bat next; /* next BBP slot in linked list */ |
1587 | BAT *desc; /* the BAT descriptor */ |
1588 | char physical[20]; /* dir + basename for storage */ |
1589 | str options; /* A string list of options */ |
1590 | int refs; /* in-memory references on which the loaded status of a BAT relies */ |
1591 | int lrefs; /* logical references on which the existence of a BAT relies */ |
1592 | volatile unsigned status; /* status mask used for spin locking */ |
1593 | /* MT_Id pid; non-zero thread-id if this BAT is private */ |
1594 | } BBPrec; |
1595 | |
1596 | gdk_export bat BBPlimit; |
1597 | #define N_BBPINIT 1000 |
1598 | #if SIZEOF_VOID_P == 4 |
1599 | #define BBPINITLOG 11 |
1600 | #else |
1601 | #define BBPINITLOG 14 |
1602 | #endif |
1603 | #define BBPINIT (1 << BBPINITLOG) |
1604 | /* absolute maximum number of BATs is N_BBPINIT * BBPINIT |
1605 | * this also gives the longest possible "physical" name and "bak" name |
1606 | * of a BAT: the "bak" name is "tmp_%o", so at most 12 + \0 bytes on |
1607 | * 64 bit architecture and 11 + \0 on 32 bit architecture; the |
1608 | * physical name is a bit more complicated, but the longest possible |
1609 | * name is 17 + \0 bytes (16 + \0 on 32 bits) */ |
1610 | gdk_export BBPrec *BBP[N_BBPINIT]; |
1611 | |
1612 | /* fast defines without checks; internal use only */ |
1613 | #define BBP_cache(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].cache |
1614 | #define BBP_logical(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].logical |
1615 | #define BBP_bak(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].bak |
1616 | #define BBP_next(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].next |
1617 | #define BBP_physical(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].physical |
1618 | #define BBP_options(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].options |
1619 | #define BBP_desc(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].desc |
1620 | #define BBP_refs(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].refs |
1621 | #define BBP_lrefs(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].lrefs |
1622 | #define BBP_status(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].status |
1623 | #define BBP_pid(i) BBP[(i)>>BBPINITLOG][(i)&(BBPINIT-1)].pid |
1624 | |
1625 | /* macros that nicely check parameters */ |
1626 | #define BBPstatus(i) (BBPcheck((i),"BBPstatus")?BBP_status(i):0) |
1627 | #define BBPrefs(i) (BBPcheck((i),"BBPrefs")?BBP_refs(i):-1) |
1628 | #define BBPcache(i) (BBPcheck((i),"BBPcache")?BBP_cache(i):(BAT*) NULL) |
1629 | #define BBPname(i) \ |
1630 | (BBPcheck((i), "BBPname") ? \ |
1631 | BBP[(i) >> BBPINITLOG][(i) & (BBPINIT - 1)].logical : \ |
1632 | "") |
1633 | #define BBPvalid(i) (BBP_logical(i) != NULL && *BBP_logical(i) != '.') |
1634 | #define BATgetId(b) BBPname((b)->batCacheid) |
1635 | |
1636 | #define BBPRENAME_ALREADY (-1) |
1637 | #define BBPRENAME_ILLEGAL (-2) |
1638 | #define BBPRENAME_LONG (-3) |
1639 | |
1640 | gdk_export void BBPlock(void); |
1641 | |
1642 | gdk_export void BBPunlock(void); |
1643 | |
1644 | gdk_export BAT *BBPquickdesc(bat b, bool delaccess); |
1645 | |
1646 | /* |
1647 | * @+ GDK Extensibility |
1648 | * GDK can be extended with new atoms, search accelerators and storage |
1649 | * modes. |
1650 | * |
1651 | * @- Atomic Type Descriptors |
1652 | * The atomic types over which the binary associations are maintained |
1653 | * are described by an atom descriptor. |
1654 | * @multitable @columnfractions 0.08 0.7 |
1655 | * @item void |
1656 | * @tab ATOMallocate (str nme); |
1657 | * @item int |
1658 | * @tab ATOMindex (char *nme); |
1659 | * @item int |
1660 | * @tab ATOMdump (); |
1661 | * @item void |
1662 | * @tab ATOMdelete (int id); |
1663 | * @item str |
1664 | * @tab ATOMname (int id); |
1665 | * @item unsigned int |
1666 | * @tab ATOMsize (int id); |
1667 | * @item int |
1668 | * @tab ATOMvarsized (int id); |
1669 | * @item ptr |
1670 | * @tab ATOMnilptr (int id); |
1671 | * @item ssize_t |
1672 | * @tab ATOMfromstr (int id, str s, size_t* len, ptr* v_dst); |
1673 | * @item ssize_t |
1674 | * @tab ATOMtostr (int id, str s, size_t* len, ptr* v_dst); |
1675 | * @item hash_t |
1676 | * @tab ATOMhash (int id, ptr val, in mask); |
1677 | * @item int |
1678 | * @tab ATOMcmp (int id, ptr val_1, ptr val_2); |
1679 | * @item int |
1680 | * @tab ATOMfix (int id, ptr v); |
1681 | * @item int |
1682 | * @tab ATOMunfix (int id, ptr v); |
1683 | * @item int |
1684 | * @tab ATOMheap (int id, Heap *hp, size_t cap); |
1685 | * @item int |
1686 | * @tab ATOMput (int id, Heap *hp, BUN pos_dst, ptr val_src); |
1687 | * @item int |
1688 | * @tab ATOMdel (int id, Heap *hp, BUN v_src); |
1689 | * @item size_t |
1690 | * @tab ATOMlen (int id, ptr val); |
1691 | * @item ptr |
1692 | * @tab ATOMnil (int id); |
1693 | * @item ssize_t |
1694 | * @tab ATOMformat (int id, ptr val, char** buf); |
1695 | * @item int |
1696 | * @tab ATOMprint (int id, ptr val, stream *fd); |
1697 | * @item ptr |
1698 | * @tab ATOMdup (int id, ptr val ); |
1699 | * @end multitable |
1700 | * |
1701 | * @- Atom Definition |
1702 | * User defined atomic types can be added to a running system with the |
1703 | * following interface:. |
1704 | * |
1705 | * @itemize |
1706 | * @item @emph{ATOMallocate()} registers a new atom definition if |
1707 | * there is no atom registered yet under that name. |
1708 | * |
1709 | * @item @emph{ATOMdelete()} unregisters an atom definition. |
1710 | * |
1711 | * @item @emph{ATOMindex()} looks up the atom descriptor with a certain name. |
1712 | * @end itemize |
1713 | * |
1714 | * @- Atom Manipulation |
1715 | * |
1716 | * @itemize |
1717 | * @item The @emph{ATOMname()} operation retrieves the name of an atom |
1718 | * using its id. |
1719 | * |
1720 | * @item The @emph{ATOMsize()} operation returns the atoms fixed size. |
1721 | * |
1722 | * @item The @emph{ATOMnilptr()} operation returns a pointer to the |
1723 | * nil-value of an atom. We usually take one dedicated value halfway |
1724 | * down the negative extreme of the atom range (if such a concept |
1725 | * fits), as the nil value. |
1726 | * |
1727 | * @item The @emph{ATOMnil()} operation returns a copy of the nil |
1728 | * value, allocated with GDKmalloc(). |
1729 | * |
1730 | * @item The @emph{ATOMheap()} operation creates a new var-sized atom |
1731 | * heap in 'hp' with capacity 'cap'. |
1732 | * |
1733 | * @item The @emph{ATOMhash()} computes a hash index for a |
1734 | * value. `val' is a direct pointer to the atom value. Its return |
1735 | * value should be an hash_t between 0 and 'mask'. |
1736 | * |
1737 | * @item The @emph{ATOMcmp()} operation compares two atomic |
1738 | * values. Its parameters are pointers to atomic values. |
1739 | * |
1740 | * @item The @emph{ATOMlen()} operation computes the byte length for a |
1741 | * value. `val' is a direct pointer to the atom value. Its return |
1742 | * value should be an integer between 0 and 'mask'. |
1743 | * |
1744 | * @item The @emph{ATOMdel()} operation deletes a var-sized atom from |
1745 | * its heap `hp'. The integer byte-index of this value in the heap is |
1746 | * pointed to by `val_src'. |
1747 | * |
1748 | * @item The @emph{ATOMput()} operation inserts an atom `src_val' in a |
1749 | * BUN at `dst_pos'. This involves copying the fixed sized part in the |
1750 | * BUN. In case of a var-sized atom, this fixed sized part is an |
1751 | * integer byte-index into a heap of var-sized atoms. The atom is then |
1752 | * also copied into that heap `hp'. |
1753 | * |
1754 | * @item The @emph{ATOMfix()} and @emph{ATOMunfix()} operations do |
1755 | * bookkeeping on the number of references that a GDK application |
1756 | * maintains to the atom. In MonetDB, we use this to count the number |
1757 | * of references directly, or through BATs that have columns of these |
1758 | * atoms. The only operator for which this is currently relevant is |
1759 | * BAT. The operators return the POST reference count to the |
1760 | * atom. BATs with fixable atoms may not be stored persistently. |
1761 | * |
1762 | * @item The @emph{ATOMfromstr()} parses an atom value from string |
1763 | * `s'. The memory allocation policy is the same as in |
1764 | * @emph{ATOMget()}. The return value is the number of parsed |
1765 | * characters or -1 on failure. Also in case of failure, the output |
1766 | * parameter buf is a valid pointer or NULL. |
1767 | * |
1768 | * @item The @emph{ATOMprint()} prints an ASCII description of the |
1769 | * atom value pointed to by `val' on file descriptor `fd'. The return |
1770 | * value is the number of parsed characters. |
1771 | * |
1772 | * @item The @emph{ATOMformat()} is similar to @emph{ATOMprint()}. It |
1773 | * prints an atom on a newly allocated string. It must later be freed |
1774 | * with @strong{GDKfree}. The number of characters written is |
1775 | * returned. This is minimally the size of the allocated buffer. |
1776 | * |
1777 | * @item The @emph{ATOMdup()} makes a copy of the given atom. The |
1778 | * storage needed for this is allocated and should be removed by the |
1779 | * user. |
1780 | * @end itemize |
1781 | * |
1782 | * These wrapper functions correspond closely to the interface |
1783 | * functions one has to provide for a user-defined atom. They |
1784 | * basically (with exception of @emph{ATOMput()}, @emph{ATOMprint()} |
1785 | * and @emph{ATOMformat()}) just have the atom id parameter prepended |
1786 | * to them. |
1787 | */ |
1788 | |
1789 | /* atomFromStr returns the number of bytes of the input string that |
1790 | * were processed. atomToStr returns the length of the string |
1791 | * produced. Both functions return -1 on (any kind of) failure. If |
1792 | * *dst is not NULL, *len specifies the available space. If there is |
1793 | * not enough space, or if *dst is NULL, *dst will be freed (if not |
1794 | * NULL) and a new buffer will be allocated and returned in *dst. |
1795 | * *len will be set to reflect the actual size allocated. If |
1796 | * allocation fails, *dst will be NULL on return and *len is |
1797 | * undefined. In any case, if the function returns, *buf is either |
1798 | * NULL or a valid pointer and then *len is the size of the area *buf |
1799 | * points to. */ |
1800 | |
1801 | typedef struct { |
1802 | /* simple attributes */ |
1803 | char name[IDLENGTH]; |
1804 | uint8_t storage; /* stored as another type? */ |
1805 | bool linear; /* atom can be ordered linearly */ |
1806 | uint16_t size; /* fixed size of atom */ |
1807 | |
1808 | /* automatically generated fields */ |
1809 | const void *atomNull; /* global nil value */ |
1810 | |
1811 | /* generic (fixed + varsized atom) ADT functions */ |
1812 | ssize_t (*atomFromStr) (const char *src, size_t *len, void **dst, bool external); |
1813 | ssize_t (*atomToStr) (char **dst, size_t *len, const void *src, bool external); |
1814 | void *(*atomRead) (void *dst, stream *s, size_t cnt); |
1815 | gdk_return (*atomWrite) (const void *src, stream *s, size_t cnt); |
1816 | int (*atomCmp) (const void *v1, const void *v2); |
1817 | BUN (*atomHash) (const void *v); |
1818 | /* optional functions */ |
1819 | int (*atomFix) (const void *atom); |
1820 | int (*atomUnfix) (const void *atom); |
1821 | |
1822 | /* varsized atom-only ADT functions */ |
1823 | var_t (*atomPut) (Heap *, var_t *off, const void *src); |
1824 | void (*atomDel) (Heap *, var_t *atom); |
1825 | size_t (*atomLen) (const void *atom); |
1826 | void (*atomHeap) (Heap *, size_t); |
1827 | } atomDesc; |
1828 | |
1829 | gdk_export atomDesc BATatoms[]; |
1830 | gdk_export int GDKatomcnt; |
1831 | |
1832 | gdk_export int ATOMallocate(const char *nme); |
1833 | gdk_export int ATOMindex(const char *nme); |
1834 | |
1835 | gdk_export str ATOMname(int id); |
1836 | gdk_export size_t ATOMlen(int id, const void *v); |
1837 | gdk_export void *ATOMnil(int id); |
1838 | gdk_export int ATOMprint(int id, const void *val, stream *fd); |
1839 | gdk_export char *ATOMformat(int id, const void *val); |
1840 | |
1841 | gdk_export void *ATOMdup(int id, const void *val); |
1842 | |
1843 | /* |
1844 | * @- Built-in Accelerator Functions |
1845 | * |
1846 | * @multitable @columnfractions 0.08 0.7 |
1847 | * @item BAT* |
1848 | * @tab |
1849 | * BAThash (BAT *b) |
1850 | * @end multitable |
1851 | * |
1852 | * The current BAT implementation supports three search accelerators: |
1853 | * hashing, imprints, and ordered index. |
1854 | * |
1855 | * The routine BAThash makes sure that a hash accelerator on the tail of the |
1856 | * BAT exists. GDK_FAIL is returned upon failure to create the supportive |
1857 | * structures. |
1858 | */ |
1859 | gdk_export gdk_return BAThash(BAT *b); |
1860 | |
1861 | /* |
1862 | * @- Column Imprints Functions |
1863 | * |
1864 | * @multitable @columnfractions 0.08 0.7 |
1865 | * @item BAT* |
1866 | * @tab |
1867 | * BATimprints (BAT *b) |
1868 | * @end multitable |
1869 | * |
1870 | * The column imprints index structure. |
1871 | * |
1872 | */ |
1873 | |
1874 | gdk_export gdk_return BATimprints(BAT *b); |
1875 | gdk_export void IMPSdestroy(BAT *b); |
1876 | gdk_export lng IMPSimprintsize(BAT *b); |
1877 | |
1878 | /* The ordered index structure */ |
1879 | |
1880 | gdk_export gdk_return BATorderidx(BAT *b, bool stable); |
1881 | gdk_export gdk_return GDKmergeidx(BAT *b, BAT**a, int n_ar); |
1882 | gdk_export bool BATcheckorderidx(BAT *b); |
1883 | |
1884 | /* |
1885 | * @- Multilevel Storage Modes |
1886 | * |
1887 | * We should bring in the compressed mode as the first, maybe |
1888 | * built-in, mode. We could then add for instance HTTP remote storage, |
1889 | * SQL storage, and READONLY (cd-rom) storage. |
1890 | * |
1891 | * @+ GDK Utilities |
1892 | * Interfaces for memory management, error handling, thread management |
1893 | * and system information. |
1894 | * |
1895 | * @- GDK memory management |
1896 | * @multitable @columnfractions 0.08 0.8 |
1897 | * @item void* |
1898 | * @tab GDKmalloc (size_t size) |
1899 | * @item void* |
1900 | * @tab GDKzalloc (size_t size) |
1901 | * @item void* |
1902 | * @tab GDKmallocmax (size_t size, size_t *maxsize, int emergency) |
1903 | * @item void* |
1904 | * @tab GDKrealloc (void* pold, size_t size) |
1905 | * @item void* |
1906 | * @tab GDKreallocmax (void* pold, size_t size, size_t *maxsize, int emergency) |
1907 | * @item void |
1908 | * @tab GDKfree (void* blk) |
1909 | * @item str |
1910 | * @tab GDKstrdup (str s) |
1911 | * @item str |
1912 | * @tab GDKstrndup (str s, size_t n) |
1913 | * @end multitable |
1914 | * |
1915 | * These utilities are primarily used to maintain control over |
1916 | * critical interfaces to the C library. Moreover, the statistic |
1917 | * routines help in identifying performance and bottlenecks in the |
1918 | * current implementation. |
1919 | * |
1920 | * Compiled with -DMEMLEAKS the GDK memory management log their |
1921 | * activities, and are checked on inconsistent frees and memory leaks. |
1922 | */ |
1923 | |
1924 | /* we prefer to use vm_alloc routines on size > GDKmmap */ |
1925 | gdk_export void *GDKmmap(const char *path, int mode, size_t len); |
1926 | |
1927 | gdk_export size_t GDK_mem_maxsize; /* max allowed size of committed memory */ |
1928 | gdk_export size_t GDK_vm_maxsize; /* max allowed size of reserved vm */ |
1929 | |
1930 | gdk_export size_t GDKmem_cursize(void); /* RAM/swapmem that MonetDB has claimed from OS */ |
1931 | gdk_export size_t GDKvm_cursize(void); /* current MonetDB VM address space usage */ |
1932 | |
1933 | gdk_export void *GDKmalloc(size_t size) |
1934 | __attribute__((__malloc__)) |
1935 | __attribute__((__alloc_size__(1))) |
1936 | __attribute__((__warn_unused_result__)); |
1937 | gdk_export void *GDKzalloc(size_t size) |
1938 | __attribute__((__malloc__)) |
1939 | __attribute__((__alloc_size__(1))) |
1940 | __attribute__((__warn_unused_result__)); |
1941 | gdk_export void *GDKrealloc(void *pold, size_t size) |
1942 | __attribute__((__alloc_size__(2))) |
1943 | __attribute__((__warn_unused_result__)); |
1944 | gdk_export void GDKfree(void *blk); |
1945 | gdk_export str GDKstrdup(const char *s) |
1946 | __attribute__((__warn_unused_result__)); |
1947 | gdk_export str GDKstrndup(const char *s, size_t n) |
1948 | __attribute__((__warn_unused_result__)); |
1949 | |
1950 | #if !defined(NDEBUG) && !defined(STATIC_CODE_ANALYSIS) |
1951 | /* In debugging mode, replace GDKmalloc and other functions with a |
1952 | * version that optionally prints calling information. |
1953 | * |
1954 | * We have two versions of this code: one using a GNU C extension, and |
1955 | * one using traditional C. The GNU C version also prints the name of |
1956 | * the calling function. |
1957 | */ |
1958 | #ifdef __GNUC__ |
1959 | #define GDKmalloc(s) \ |
1960 | ({ \ |
1961 | size_t _size = (s); \ |
1962 | void *_res = GDKmalloc(_size); \ |
1963 | ALLOCDEBUG \ |
1964 | fprintf(stderr, \ |
1965 | "#GDKmalloc(%zu) -> %p" \ |
1966 | " %s[%s:%d]\n", \ |
1967 | _size, _res, \ |
1968 | __func__, __FILE__, __LINE__); \ |
1969 | _res; \ |
1970 | }) |
1971 | #define GDKzalloc(s) \ |
1972 | ({ \ |
1973 | size_t _size = (s); \ |
1974 | void *_res = GDKzalloc(_size); \ |
1975 | ALLOCDEBUG \ |
1976 | fprintf(stderr, \ |
1977 | "#GDKzalloc(%zu) -> %p" \ |
1978 | " %s[%s:%d]\n", \ |
1979 | _size, _res, \ |
1980 | __func__, __FILE__, __LINE__); \ |
1981 | _res; \ |
1982 | }) |
1983 | #define GDKrealloc(p, s) \ |
1984 | ({ \ |
1985 | void *_ptr = (p); \ |
1986 | size_t _size = (s); \ |
1987 | void *_res = GDKrealloc(_ptr, _size); \ |
1988 | ALLOCDEBUG \ |
1989 | fprintf(stderr, \ |
1990 | "#GDKrealloc(%p,%zu) -> %p" \ |
1991 | " %s[%s:%d]\n", \ |
1992 | _ptr, _size, _res, \ |
1993 | __func__, __FILE__, __LINE__); \ |
1994 | _res; \ |
1995 | }) |
1996 | #define GDKfree(p) \ |
1997 | ({ \ |
1998 | void *_ptr = (p); \ |
1999 | ALLOCDEBUG if (_ptr) \ |
2000 | fprintf(stderr, \ |
2001 | "#GDKfree(%p)" \ |
2002 | " %s[%s:%d]\n", \ |
2003 | _ptr, \ |
2004 | __func__, __FILE__, __LINE__); \ |
2005 | GDKfree(_ptr); \ |
2006 | }) |
2007 | #define GDKstrdup(s) \ |
2008 | ({ \ |
2009 | const char *_str = (s); \ |
2010 | void *_res = GDKstrdup(_str); \ |
2011 | ALLOCDEBUG \ |
2012 | fprintf(stderr, \ |
2013 | "#GDKstrdup(len=%zu) -> %p" \ |
2014 | " %s[%s:%d]\n", \ |
2015 | _str ? strlen(_str) : 0, \ |
2016 | _res, \ |
2017 | __func__, __FILE__, __LINE__); \ |
2018 | _res; \ |
2019 | }) |
2020 | #define GDKstrndup(s, n) \ |
2021 | ({ \ |
2022 | const char *_str = (s); \ |
2023 | size_t _n = (n); \ |
2024 | void *_res = GDKstrndup(_str, _n); \ |
2025 | ALLOCDEBUG \ |
2026 | fprintf(stderr, \ |
2027 | "#GDKstrndup(len=%zu) -> %p" \ |
2028 | " %s[%s:%d]\n", \ |
2029 | _n, \ |
2030 | _res, \ |
2031 | __func__, __FILE__, __LINE__); \ |
2032 | _res; \ |
2033 | }) |
2034 | #define GDKmmap(p, m, l) \ |
2035 | ({ \ |
2036 | const char *_path = (p); \ |
2037 | int _mode = (m); \ |
2038 | size_t _len = (l); \ |
2039 | void *_res = GDKmmap(_path, _mode, _len); \ |
2040 | ALLOCDEBUG \ |
2041 | fprintf(stderr, \ |
2042 | "#GDKmmap(%s,0x%x,%zu) -> %p" \ |
2043 | " %s[%s:%d]\n", \ |
2044 | _path ? _path : "NULL", \ |
2045 | (unsigned) _mode, _len, \ |
2046 | _res, \ |
2047 | __func__, __FILE__, __LINE__); \ |
2048 | _res; \ |
2049 | }) |
2050 | #define malloc(s) \ |
2051 | ({ \ |
2052 | size_t _size = (s); \ |
2053 | void *_res = malloc(_size); \ |
2054 | ALLOCDEBUG \ |
2055 | fprintf(stderr, \ |
2056 | "#malloc(%zu) -> %p" \ |
2057 | " %s[%s:%d]\n", \ |
2058 | _size, _res, \ |
2059 | __func__, __FILE__, __LINE__); \ |
2060 | _res; \ |
2061 | }) |
2062 | #define calloc(n, s) \ |
2063 | ({ \ |
2064 | size_t _nmemb = (n); \ |
2065 | size_t _size = (s); \ |
2066 | void *_res = calloc(_nmemb,_size); \ |
2067 | ALLOCDEBUG \ |
2068 | fprintf(stderr, \ |
2069 | "#calloc(%zu,%zu) -> %p" \ |
2070 | " %s[%s:%d]\n", \ |
2071 | _nmemb, _size, _res, \ |
2072 | __func__, __FILE__, __LINE__); \ |
2073 | _res; \ |
2074 | }) |
2075 | #define realloc(p, s) \ |
2076 | ({ \ |
2077 | void *_ptr = (p); \ |
2078 | size_t _size = (s); \ |
2079 | void *_res = realloc(_ptr, _size); \ |
2080 | ALLOCDEBUG \ |
2081 | fprintf(stderr, \ |
2082 | "#realloc(%p,%zu) -> %p" \ |
2083 | " %s[%s:%d]\n", \ |
2084 | _ptr, _size, _res, \ |
2085 | __func__, __FILE__, __LINE__); \ |
2086 | _res; \ |
2087 | }) |
2088 | #define free(p) \ |
2089 | ({ \ |
2090 | void *_ptr = (p); \ |
2091 | ALLOCDEBUG \ |
2092 | fprintf(stderr, \ |
2093 | "#free(%p)" \ |
2094 | " %s[%s:%d]\n", \ |
2095 | _ptr, \ |
2096 | __func__, __FILE__, __LINE__); \ |
2097 | free(_ptr); \ |
2098 | }) |
2099 | #else |
2100 | static inline void * |
2101 | GDKmalloc_debug(size_t size, const char *filename, int lineno) |
2102 | { |
2103 | void *res = GDKmalloc(size); |
2104 | ALLOCDEBUG fprintf(stderr, |
2105 | "#GDKmalloc(%zu) -> %p [%s:%d]\n" , |
2106 | size, res, filename, lineno); |
2107 | return res; |
2108 | } |
2109 | #define GDKmalloc(s) GDKmalloc_debug((s), __FILE__, __LINE__) |
2110 | static inline void * |
2111 | GDKzalloc_debug(size_t size, const char *filename, int lineno) |
2112 | { |
2113 | void *res = GDKzalloc(size); |
2114 | ALLOCDEBUG fprintf(stderr, |
2115 | "#GDKzalloc(%zu) -> %p [%s:%d]\n" , |
2116 | size, res, filename, lineno); |
2117 | return res; |
2118 | } |
2119 | #define GDKzalloc(s) GDKzalloc_debug((s), __FILE__, __LINE__) |
2120 | static inline void * |
2121 | GDKrealloc_debug(void *ptr, size_t size, const char *filename, int lineno) |
2122 | { |
2123 | void *res = GDKrealloc(ptr, size); |
2124 | ALLOCDEBUG fprintf(stderr, |
2125 | "#GDKrealloc(%p,%zu) -> " |
2126 | "%p [%s:%d]\n" , |
2127 | ptr, size, res, |
2128 | filename, lineno); |
2129 | return res; |
2130 | } |
2131 | #define GDKrealloc(p, s) GDKrealloc_debug((p), (s), __FILE__, __LINE__) |
2132 | static inline void |
2133 | GDKfree_debug(void *ptr, const char *filename, int lineno) |
2134 | { |
2135 | ALLOCDEBUG fprintf(stderr, "#GDKfree(%p) [%s:%d]\n" , |
2136 | ptr, filename, lineno); |
2137 | GDKfree(ptr); |
2138 | } |
2139 | #define GDKfree(p) GDKfree_debug((p), __FILE__, __LINE__) |
2140 | static inline char * |
2141 | GDKstrdup_debug(const char *str, const char *filename, int lineno) |
2142 | { |
2143 | void *res = GDKstrdup(str); |
2144 | ALLOCDEBUG fprintf(stderr, "#GDKstrdup(len=%zu) -> " |
2145 | "%p [%s:%d]\n" , |
2146 | str ? strlen(str) : 0, res, filename, lineno); |
2147 | return res; |
2148 | } |
2149 | #define GDKstrdup(s) GDKstrdup_debug((s), __FILE__, __LINE__) |
2150 | static inline char * |
2151 | GDKstrndup_debug(const char *str, size_t n, const char *filename, int lineno) |
2152 | { |
2153 | void *res = GDKstrndup(str, n); |
2154 | ALLOCDEBUG fprintf(stderr, "#GDKstrndup(len=%zu) -> " |
2155 | "%p [%s:%d]\n" , |
2156 | n, res, filename, lineno); |
2157 | return res; |
2158 | } |
2159 | #define GDKstrndup(s, n) GDKstrndup_debug((s), (n), __FILE__, __LINE__) |
2160 | static inline void * |
2161 | GDKmmap_debug(const char *path, int mode, size_t len, const char *filename, int lineno) |
2162 | { |
2163 | void *res = GDKmmap(path, mode, len); |
2164 | ALLOCDEBUG fprintf(stderr, |
2165 | "#GDKmmap(%s,0x%x,%zu) -> " |
2166 | "%p [%s:%d]\n" , |
2167 | path ? path : "NULL" , mode, len, |
2168 | res, filename, lineno); |
2169 | return res; |
2170 | } |
2171 | #define GDKmmap(p, m, l) GDKmmap_debug((p), (m), (l), __FILE__, __LINE__) |
2172 | static inline void * |
2173 | malloc_debug(size_t size, const char *filename, int lineno) |
2174 | { |
2175 | void *res = malloc(size); |
2176 | ALLOCDEBUG fprintf(stderr, |
2177 | "#malloc(%zu) -> %p [%s:%d]\n" , |
2178 | size, res, filename, lineno); |
2179 | return res; |
2180 | } |
2181 | #define malloc(s) malloc_debug((s), __FILE__, __LINE__) |
2182 | static inline void * |
2183 | calloc_debug(size_t nmemb, size_t size, const char *filename, int lineno) |
2184 | { |
2185 | void *res = calloc(nmemb, size); |
2186 | ALLOCDEBUG fprintf(stderr, |
2187 | "#calloc(%zu,%zu) -> " |
2188 | "%p [%s:%d]\n" , |
2189 | nmemb, size, res, filename, lineno); |
2190 | return res; |
2191 | } |
2192 | #define calloc(n, s) calloc_debug((n), (s), __FILE__, __LINE__) |
2193 | static inline void * |
2194 | realloc_debug(void *ptr, size_t size, const char *filename, int lineno) |
2195 | { |
2196 | void *res = realloc(ptr, size); |
2197 | ALLOCDEBUG fprintf(stderr, |
2198 | "#realloc(%p,%zu) -> " |
2199 | "%p [%s:%d]\n" , |
2200 | ptr, size, res, |
2201 | filename, lineno); |
2202 | return res; |
2203 | } |
2204 | #define realloc(p, s) realloc_debug((p), (s), __FILE__, __LINE__) |
2205 | static inline void |
2206 | free_debug(void *ptr, const char *filename, int lineno) |
2207 | { |
2208 | ALLOCDEBUG fprintf(stderr, "#free(%p) [%s:%d]\n" , |
2209 | ptr, filename, lineno); |
2210 | free(ptr); |
2211 | } |
2212 | #define free(p) free_debug((p), __FILE__, __LINE__) |
2213 | #endif |
2214 | #endif |
2215 | |
2216 | /* |
2217 | * @- GDK error handling |
2218 | * @multitable @columnfractions 0.08 0.7 |
2219 | * @item str |
2220 | * @tab |
2221 | * GDKmessage |
2222 | * @item bit |
2223 | * @tab |
2224 | * GDKfatal(str msg) |
2225 | * @item int |
2226 | * @tab |
2227 | * GDKwarning(str msg) |
2228 | * @item int |
2229 | * @tab |
2230 | * GDKerror (str msg) |
2231 | * @item int |
2232 | * @tab |
2233 | * GDKgoterrors () |
2234 | * @item int |
2235 | * @tab |
2236 | * GDKsyserror (str msg) |
2237 | * @item str |
2238 | * @tab |
2239 | * GDKerrbuf |
2240 | * @item |
2241 | * @tab GDKsetbuf (str buf) |
2242 | * @end multitable |
2243 | * |
2244 | * The error handling mechanism is not sophisticated yet. Experience |
2245 | * should show if this mechanism is sufficient. Most routines return |
2246 | * a pointer with zero to indicate an error. |
2247 | * |
2248 | * The error messages are also copied to standard output. The last |
2249 | * error message is kept around in a global variable. |
2250 | * |
2251 | * Error messages can also be collected in a user-provided buffer, |
2252 | * instead of being echoed to a stream. This is a thread-specific |
2253 | * issue; you want to decide on the error mechanism on a |
2254 | * thread-specific basis. This effect is established with |
2255 | * GDKsetbuf. The memory (de)allocation of this buffer, that must at |
2256 | * least be 1024 chars long, is entirely by the user. A pointer to |
2257 | * this buffer is kept in the pseudo-variable GDKerrbuf. Normally, |
2258 | * this is a NULL pointer. |
2259 | */ |
2260 | #define GDKMAXERRLEN 10240 |
2261 | #define GDKWARNING "!WARNING: " |
2262 | #define GDKERROR "!ERROR: " |
2263 | #define GDKMESSAGE "!OS: " |
2264 | #define GDKFATAL "!FATAL: " |
2265 | |
2266 | /* Data Distilleries uses ICU for internationalization of some MonetDB error messages */ |
2267 | |
2268 | gdk_export void GDKerror(_In_z_ _Printf_format_string_ const char *format, ...) |
2269 | __attribute__((__format__(__printf__, 1, 2))); |
2270 | gdk_export void GDKsyserror(_In_z_ _Printf_format_string_ const char *format, ...) |
2271 | __attribute__((__format__(__printf__, 1, 2))); |
2272 | #ifndef HAVE_EMBEDDED |
2273 | gdk_export _Noreturn void GDKfatal(_In_z_ _Printf_format_string_ const char *format, ...) |
2274 | __attribute__((__format__(__printf__, 1, 2))); |
2275 | #else |
2276 | gdk_export void GDKfatal(_In_z_ _Printf_format_string_ const char *format, ...) |
2277 | __attribute__((__format__(__printf__, 1, 2))); |
2278 | #endif |
2279 | gdk_export void GDKclrerr(void); |
2280 | |
2281 | #include "gdk_delta.h" |
2282 | #include "gdk_hash.h" |
2283 | #include "gdk_bbp.h" |
2284 | #include "gdk_utils.h" |
2285 | |
2286 | /* functions defined in gdk_bat.c */ |
2287 | gdk_export gdk_return void_replace_bat(BAT *b, BAT *p, BAT *u, bool force) |
2288 | __attribute__((__warn_unused_result__)); |
2289 | gdk_export gdk_return void_inplace(BAT *b, oid id, const void *val, bool force) |
2290 | __attribute__((__warn_unused_result__)); |
2291 | gdk_export BAT *BATattach(int tt, const char *heapfile, role_t role); |
2292 | |
2293 | #ifdef NATIVE_WIN32 |
2294 | #ifdef _MSC_VER |
2295 | #define fileno _fileno |
2296 | #endif |
2297 | #define fdopen _fdopen |
2298 | #define putenv _putenv |
2299 | #endif |
2300 | |
2301 | /* Return a pointer to the value contained in V. Also see VALget |
2302 | * which returns a void *. */ |
2303 | static inline const void * |
2304 | VALptr(const ValRecord *v) |
2305 | { |
2306 | switch (ATOMstorage(v->vtype)) { |
2307 | case TYPE_void: return (const void *) &v->val.oval; |
2308 | case TYPE_bte: return (const void *) &v->val.btval; |
2309 | case TYPE_sht: return (const void *) &v->val.shval; |
2310 | case TYPE_int: return (const void *) &v->val.ival; |
2311 | case TYPE_flt: return (const void *) &v->val.fval; |
2312 | case TYPE_dbl: return (const void *) &v->val.dval; |
2313 | case TYPE_lng: return (const void *) &v->val.lval; |
2314 | #ifdef HAVE_HGE |
2315 | case TYPE_hge: return (const void *) &v->val.hval; |
2316 | #endif |
2317 | case TYPE_str: return (const void *) v->val.sval; |
2318 | default: return (const void *) v->val.pval; |
2319 | } |
2320 | } |
2321 | |
2322 | /* |
2323 | * The kernel maintains a central table of all active threads. They |
2324 | * are indexed by their tid. The structure contains information on the |
2325 | * input/output file descriptors, which should be set before a |
2326 | * database operation is started. It ensures that output is delivered |
2327 | * to the proper client. |
2328 | * |
2329 | * The Thread structure should be ideally made directly accessible to |
2330 | * each thread. This speeds up access to tid and file descriptors. |
2331 | */ |
2332 | #define THREADS 1024 |
2333 | #define THREADDATA 3 |
2334 | |
2335 | typedef struct threadStruct { |
2336 | int tid; /* logical ID by MonetDB; val == index |
2337 | * into this array + 1 (0 is |
2338 | * invalid) */ |
2339 | ATOMIC_TYPE pid; /* thread id, 0 = unallocated */ |
2340 | str name; |
2341 | void *data[THREADDATA]; |
2342 | uintptr_t sp; |
2343 | } ThreadRec, *Thread; |
2344 | |
2345 | |
2346 | gdk_export int THRgettid(void); |
2347 | gdk_export Thread THRget(int tid); |
2348 | gdk_export MT_Id THRcreate(void (*f) (void *), void *arg, enum MT_thr_detach d, const char *name); |
2349 | gdk_export void THRdel(Thread t); |
2350 | gdk_export void THRsetdata(int, void *); |
2351 | gdk_export void *THRgetdata(int); |
2352 | gdk_export int THRhighwater(void); |
2353 | |
2354 | gdk_export void *THRdata[THREADDATA]; |
2355 | |
2356 | #define GDKstdout ((stream*)THRdata[0]) |
2357 | #define GDKstdin ((stream*)THRdata[1]) |
2358 | |
2359 | #define GDKerrbuf ((char*)THRgetdata(2)) |
2360 | #define GDKsetbuf(x) THRsetdata(2,(void *)(x)) |
2361 | |
2362 | #define THRget_errbuf(t) ((char*)t->data[2]) |
2363 | #define THRset_errbuf(t,b) (t->data[2] = b) |
2364 | |
2365 | #ifndef GDK_NOLINK |
2366 | |
2367 | static inline bat |
2368 | BBPcheck(bat x, const char *y) |
2369 | { |
2370 | if (!is_bat_nil(x)) { |
2371 | assert(x > 0); |
2372 | |
2373 | if (x < 0 || x >= getBBPsize() || BBP_logical(x) == NULL) { |
2374 | CHECKDEBUG fprintf(stderr,"#%s: range error %d\n" , y, (int) x); |
2375 | } else { |
2376 | return x; |
2377 | } |
2378 | } |
2379 | return 0; |
2380 | } |
2381 | |
2382 | static inline BAT * |
2383 | BATdescriptor(bat i) |
2384 | { |
2385 | BAT *b = NULL; |
2386 | |
2387 | if (BBPcheck(i, "BATdescriptor" )) { |
2388 | if (BBPfix(i) <= 0) |
2389 | return NULL; |
2390 | b = BBP_cache(i); |
2391 | if (b == NULL) |
2392 | b = BBPdescriptor(i); |
2393 | } |
2394 | return b; |
2395 | } |
2396 | |
2397 | static inline void * |
2398 | Tpos(BATiter *bi, BUN p) |
2399 | { |
2400 | bi->tvid = BUNtoid(bi->b, p); |
2401 | return (void*)&bi->tvid; |
2402 | } |
2403 | |
2404 | #endif |
2405 | |
2406 | /* |
2407 | * @+ Transaction Management |
2408 | * @multitable @columnfractions 0.08 0.7 |
2409 | * @item int |
2410 | * @tab |
2411 | * TMcommit () |
2412 | * @item int |
2413 | * @tab |
2414 | * TMabort () |
2415 | * @item int |
2416 | * @tab |
2417 | * TMsubcommit () |
2418 | * @end multitable |
2419 | * |
2420 | * MonetDB by default offers a global transaction environment. The |
2421 | * global transaction involves all activities on all persistent BATs |
2422 | * by all threads. Each global transaction ends with either TMabort |
2423 | * or TMcommit, and immediately starts a new transaction. TMcommit |
2424 | * implements atomic commit to disk on the collection of all |
2425 | * persistent BATs. For all persistent BATs, the global commit also |
2426 | * flushes the delta status for these BATs (see |
2427 | * BATcommit/BATabort). This allows to perform TMabort quickly in |
2428 | * memory (without re-reading all disk images from disk). The |
2429 | * collection of which BATs is persistent is also part of the global |
2430 | * transaction state. All BATs that where persistent at the last |
2431 | * commit, but were made transient since then, are made persistent |
2432 | * again by TMabort. In other words, BATs that are deleted, are only |
2433 | * physically deleted at TMcommit time. Until that time, rollback |
2434 | * (TMabort) is possible. |
2435 | * |
2436 | * Use of TMabort is currently NOT RECOMMENDED due to two bugs: |
2437 | * |
2438 | * @itemize |
2439 | * @item |
2440 | * TMabort after a failed %TMcommit@ does not bring us back to the |
2441 | * previous committed state; but to the state at the failed TMcommit. |
2442 | * @item |
2443 | * At runtime, TMabort does not undo BAT name changes, whereas a cold |
2444 | * MonetDB restart does. |
2445 | * @end itemize |
2446 | * |
2447 | * In effect, the problems with TMabort reduce the functionality of |
2448 | * the global transaction mechanism to consistent checkpointing at |
2449 | * each TMcommit. For many applications, consistent checkpointingis |
2450 | * enough. |
2451 | * |
2452 | * Extension modules exist that provide fine grained locking (lock |
2453 | * module) and Write Ahead Logging (sqlserver). Applications that |
2454 | * need more fine-grained transactions, should build this on top of |
2455 | * these extension primitives. |
2456 | * |
2457 | * TMsubcommit is intended to quickly add or remove BATs from the |
2458 | * persistent set. In both cases, rollback is not necessary, such that |
2459 | * the commit protocol can be accelerated. It comes down to writing a |
2460 | * new BBP.dir. |
2461 | * |
2462 | * Its parameter is a BAT-of-BATs (in the tail); the persistence |
2463 | * status of that BAT is committed. We assume here that the calling |
2464 | * thread has exclusive access to these bats. An error is reported if |
2465 | * you try to partially commit an already committed persistent BAT (it |
2466 | * needs the rollback mechanism). |
2467 | */ |
2468 | gdk_export gdk_return TMcommit(void); |
2469 | gdk_export void TMabort(void); |
2470 | gdk_export gdk_return TMsubcommit(BAT *bl); |
2471 | gdk_export gdk_return TMsubcommit_list(bat *subcommit, int cnt); |
2472 | |
2473 | /* |
2474 | * @- Delta Management |
2475 | * @multitable @columnfractions 0.08 0.6 |
2476 | * @item BAT * |
2477 | * @tab BATcommit (BAT *b) |
2478 | * @item BAT * |
2479 | * @tab BATfakeCommit (BAT *b) |
2480 | * @item BAT * |
2481 | * @tab BATundo (BAT *b) |
2482 | * @end multitable |
2483 | * |
2484 | * The BAT keeps track of updates with respect to a 'previous state'. |
2485 | * Do not confuse 'previous state' with 'stable' or |
2486 | * 'commited-on-disk', because these concepts are not always the |
2487 | * same. In particular, they diverge when BATcommit, BATfakecommit, |
2488 | * and BATundo are called explictly, bypassing the normal global |
2489 | * TMcommit protocol (some applications need that flexibility). |
2490 | * |
2491 | * BATcommit make the current BAT state the new 'stable state'. This |
2492 | * happens inside the global TMcommit on all persistent BATs previous |
2493 | * to writing all bats to persistent storage using a BBPsync. |
2494 | * |
2495 | * EXPERT USE ONLY: The routine BATfakeCommit updates the delta |
2496 | * information on BATs and clears the dirty bit. This avoids any |
2497 | * copying to disk. Expert usage only, as it bypasses the global |
2498 | * commit protocol, and changes may be lost after quitting or crashing |
2499 | * MonetDB. |
2500 | * |
2501 | * BATabort undo-s all changes since the previous state. The global |
2502 | * TMabort achieves a rollback to the previously committed state by |
2503 | * doing BATabort on all persistent bats. |
2504 | * |
2505 | * BUG: after a failed TMcommit, TMabort does not do anything because |
2506 | * TMcommit does the BATcommits @emph{before} attempting to sync to |
2507 | * disk instead of @sc{after} doing this. |
2508 | */ |
2509 | gdk_export void BATcommit(BAT *b); |
2510 | gdk_export void BATfakeCommit(BAT *b); |
2511 | gdk_export void BATundo(BAT *b); |
2512 | |
2513 | /* |
2514 | * @+ BAT Alignment and BAT views |
2515 | * @multitable @columnfractions 0.08 0.7 |
2516 | * @item int |
2517 | * @tab ALIGNsynced (BAT* b1, BAT* b2) |
2518 | * @item int |
2519 | * @tab ALIGNsync (BAT *b1, BAT *b2) |
2520 | * @item int |
2521 | * @tab ALIGNrelated (BAT *b1, BAT *b2) |
2522 | * |
2523 | * @item BAT* |
2524 | * @tab VIEWcreate (oid seq, BAT *b) |
2525 | * @item int |
2526 | * @tab isVIEW (BAT *b) |
2527 | * @item bat |
2528 | * @tab VIEWhparent (BAT *b) |
2529 | * @item bat |
2530 | * @tab VIEWtparent (BAT *b) |
2531 | * @item BAT* |
2532 | * @tab VIEWreset (BAT *b) |
2533 | * @end multitable |
2534 | * |
2535 | * Alignments of two columns of a BAT means that the system knows |
2536 | * whether these two columns are exactly equal. Relatedness of two |
2537 | * BATs means that one pair of columns (either head or tail) of both |
2538 | * BATs is aligned. The first property is checked by ALIGNsynced, the |
2539 | * latter by ALIGNrelated. |
2540 | * |
2541 | * All algebraic BAT commands propagate the properties - including |
2542 | * alignment properly on their results. |
2543 | * |
2544 | * VIEW BATs are BATs that lend their storage from a parent BAT. They |
2545 | * are just a descriptor that points to the data in this parent BAT. A |
2546 | * view is created with VIEWcreate. The cache id of the parent (if |
2547 | * any) is returned by VIEWtparent (otherwise it returns 0). |
2548 | * |
2549 | * VIEW bats are read-only!! |
2550 | * |
2551 | * VIEWreset creates a normal BAT with the same contents as its view |
2552 | * parameter (it converts void columns with seqbase!=nil to |
2553 | * materialized oid columns). |
2554 | */ |
2555 | gdk_export int ALIGNsynced(BAT *b1, BAT *b2); |
2556 | |
2557 | gdk_export void BATassertProps(BAT *b); |
2558 | |
2559 | #define BATPROPS_QUICK 0 /* only derive easy (non-resource consuming) properties */ |
2560 | #define BATPROPS_ALL 1 /* derive all possible properties; no matter what cost (key=hash) */ |
2561 | #define BATPROPS_CHECK 3 /* BATPROPS_ALL, but start from scratch and report illegally set properties */ |
2562 | |
2563 | gdk_export BAT *VIEWcreate(oid seq, BAT *b); |
2564 | gdk_export void VIEWbounds(BAT *b, BAT *view, BUN l, BUN h); |
2565 | |
2566 | #define ALIGNapp(x, y, f, e) \ |
2567 | do { \ |
2568 | if (!(f) && ((x)->batRestricted == BAT_READ || \ |
2569 | (x)->batSharecnt > 0)) { \ |
2570 | GDKerror("%s: access denied to %s, aborting.\n", \ |
2571 | (y), BATgetId(x)); \ |
2572 | return (e); \ |
2573 | } \ |
2574 | } while (false) |
2575 | |
2576 | /* the parentid in a VIEW is correct for the normal view. We must |
2577 | * correct for the reversed view. |
2578 | */ |
2579 | #define isVIEW(x) \ |
2580 | (assert((x)->batCacheid > 0), \ |
2581 | ((x)->theap.parentid || \ |
2582 | ((x)->tvheap && (x)->tvheap->parentid != (x)->batCacheid))) |
2583 | |
2584 | #define VIEWtparent(x) ((x)->theap.parentid) |
2585 | #define VIEWvtparent(x) ((x)->tvheap == NULL || (x)->tvheap->parentid == (x)->batCacheid ? 0 : (x)->tvheap->parentid) |
2586 | |
2587 | /* |
2588 | * @+ BAT Iterators |
2589 | * @multitable @columnfractions 0.15 0.7 |
2590 | * @item BATloop |
2591 | * @tab |
2592 | * (BAT *b; BUN p, BUN q) |
2593 | * @item BATloopDEL |
2594 | * @tab |
2595 | * (BAT *b; BUN p; BUN q; int dummy) |
2596 | * @item HASHloop |
2597 | * @tab |
2598 | * (BAT *b; Hash *h, size_t dummy; ptr value) |
2599 | * @item HASHloop_bte |
2600 | * @tab |
2601 | * (BAT *b; Hash *h, size_t idx; bte *value, BUN w) |
2602 | * @item HASHloop_sht |
2603 | * @tab |
2604 | * (BAT *b; Hash *h, size_t idx; sht *value, BUN w) |
2605 | * @item HASHloop_int |
2606 | * @tab |
2607 | * (BAT *b; Hash *h, size_t idx; int *value, BUN w) |
2608 | * @item HASHloop_flt |
2609 | * @tab |
2610 | * (BAT *b; Hash *h, size_t idx; flt *value, BUN w) |
2611 | * @item HASHloop_lng |
2612 | * @tab |
2613 | * (BAT *b; Hash *h, size_t idx; lng *value, BUN w) |
2614 | * @item HASHloop_hge |
2615 | * @tab |
2616 | * (BAT *b; Hash *h, size_t idx; hge *value, BUN w) |
2617 | * @item HASHloop_dbl |
2618 | * @tab |
2619 | * (BAT *b; Hash *h, size_t idx; dbl *value, BUN w) |
2620 | * @item HASHloop_str |
2621 | * @tab |
2622 | * (BAT *b; Hash *h, size_t idx; str value, BUN w) |
2623 | * @item HASHlooploc |
2624 | * @tab |
2625 | * (BAT *b; Hash *h, size_t idx; ptr value, BUN w) |
2626 | * @item HASHloopvar |
2627 | * @tab |
2628 | * (BAT *b; Hash *h, size_t idx; ptr value, BUN w) |
2629 | * @end multitable |
2630 | * |
2631 | * The @emph{BATloop()} looks like a function call, but is actually a |
2632 | * macro. |
2633 | * |
2634 | * @- simple sequential scan |
2635 | * The first parameter is a BAT, the p and q are BUN pointers, where p |
2636 | * is the iteration variable. |
2637 | */ |
2638 | #define BATloop(r, p, q) \ |
2639 | for (q = BUNlast(r), p = 0; p < q; p++) |
2640 | |
2641 | /* |
2642 | * @- hash-table supported loop over BUNs |
2643 | * The first parameter `b' is a BAT, the second (`h') should point to |
2644 | * `b->thash', and `v' a pointer to an atomic value (corresponding |
2645 | * to the head column of `b'). The 'hb' is an integer index, pointing |
2646 | * out the `hb'-th BUN. |
2647 | */ |
2648 | #define GDK_STREQ(l,r) (*(char*) (l) == *(char*) (r) && !strcmp(l,r)) |
2649 | |
2650 | #define HASHloop(bi, h, hb, v) \ |
2651 | for (hb = HASHget(h, HASHprobe((h), v)); \ |
2652 | hb != HASHnil(h); \ |
2653 | hb = HASHgetlink(h,hb)) \ |
2654 | if (ATOMcmp(h->type, v, BUNtail(bi, hb)) == 0) |
2655 | #define HASHloop_str_hv(bi, h, hb, v) \ |
2656 | for (hb = HASHget((h),((BUN *) (v))[-1]&(h)->mask); \ |
2657 | hb != HASHnil(h); \ |
2658 | hb = HASHgetlink(h,hb)) \ |
2659 | if (GDK_STREQ(v, BUNtvar(bi, hb))) |
2660 | #define HASHloop_str(bi, h, hb, v) \ |
2661 | for (hb = HASHget((h),strHash(v)&(h)->mask); \ |
2662 | hb != HASHnil(h); \ |
2663 | hb = HASHgetlink(h,hb)) \ |
2664 | if (GDK_STREQ(v, BUNtvar(bi, hb))) |
2665 | |
2666 | /* |
2667 | * HASHloops come in various flavors, from the general HASHloop, as |
2668 | * above, to specialized versions (for speed) where the type is known |
2669 | * (e.g. HASHloop_int), or the fact that the atom is fixed-sized |
2670 | * (HASHlooploc) or variable-sized (HASHloopvar). |
2671 | */ |
2672 | #define HASHlooploc(bi, h, hb, v) \ |
2673 | for (hb = HASHget(h, HASHprobe(h, v)); \ |
2674 | hb != HASHnil(h); \ |
2675 | hb = HASHgetlink(h,hb)) \ |
2676 | if (ATOMcmp(h->type, v, BUNtloc(bi, hb)) == 0) |
2677 | #define HASHloopvar(bi, h, hb, v) \ |
2678 | for (hb = HASHget(h,HASHprobe(h, v)); \ |
2679 | hb != HASHnil(h); \ |
2680 | hb = HASHgetlink(h,hb)) \ |
2681 | if (ATOMcmp(h->type, v, BUNtvar(bi, hb)) == 0) |
2682 | |
2683 | #define HASHloop_TYPE(bi, h, hb, v, TYPE) \ |
2684 | for (hb = HASHget(h, hash_##TYPE(h, v)); \ |
2685 | hb != HASHnil(h); \ |
2686 | hb = HASHgetlink(h,hb)) \ |
2687 | if (* (const TYPE *) (v) == * (const TYPE *) BUNtloc(bi, hb)) |
2688 | |
2689 | #define HASHloop_bte(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, bte) |
2690 | #define HASHloop_sht(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, sht) |
2691 | #define HASHloop_int(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, int) |
2692 | #define HASHloop_lng(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, lng) |
2693 | #ifdef HAVE_HGE |
2694 | #define HASHloop_hge(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, hge) |
2695 | #endif |
2696 | #define HASHloop_flt(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, flt) |
2697 | #define HASHloop_dbl(bi, h, hb, v) HASHloop_TYPE(bi, h, hb, v, dbl) |
2698 | |
2699 | /* |
2700 | * @+ Common BAT Operations |
2701 | * Much used, but not necessarily kernel-operations on BATs. |
2702 | * |
2703 | * For each BAT we maintain its dimensions as separately accessible |
2704 | * properties. They can be used to improve query processing at higher |
2705 | * levels. |
2706 | */ |
2707 | enum prop_t { |
2708 | GDK_MIN_VALUE = 3, /* smallest non-nil value in BAT */ |
2709 | GDK_MAX_VALUE, /* largest non-nil value in BAT */ |
2710 | GDK_HASH_MASK, /* last used hash mask */ |
2711 | }; |
2712 | |
2713 | /* |
2714 | * @- BAT relational operators |
2715 | * |
2716 | * The full-materialization policy intermediate results in MonetDB |
2717 | * means that a join can produce an arbitrarily large result and choke |
2718 | * the system. The Data Distilleries tool therefore first computes the |
2719 | * join result size before the actual join (better waste time than |
2720 | * crash the server). To exploit that perfect result size knowledge, |
2721 | * an result-size estimate parameter was added to all equi-join |
2722 | * implementations. TODO: add this for |
2723 | * semijoin/select/unique/diff/intersect |
2724 | * |
2725 | * @- modes for thethajoin |
2726 | */ |
2727 | #define JOIN_EQ 0 |
2728 | #define JOIN_LT (-1) |
2729 | #define JOIN_LE (-2) |
2730 | #define JOIN_GT 1 |
2731 | #define JOIN_GE 2 |
2732 | #define JOIN_BAND 3 |
2733 | #define JOIN_NE (-3) |
2734 | |
2735 | gdk_export BAT *BATselect(BAT *b, BAT *s, const void *tl, const void *th, bool li, bool hi, bool anti); |
2736 | gdk_export BAT *BATthetaselect(BAT *b, BAT *s, const void *val, const char *op); |
2737 | |
2738 | gdk_export BAT *BATconstant(oid hseq, int tt, const void *val, BUN cnt, role_t role); |
2739 | gdk_export gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr) |
2740 | __attribute__((__warn_unused_result__)); |
2741 | |
2742 | gdk_export gdk_return BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) |
2743 | __attribute__((__warn_unused_result__)); |
2744 | gdk_export gdk_return BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) |
2745 | __attribute__((__warn_unused_result__)); |
2746 | gdk_export gdk_return BATthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int op, bool nil_matches, BUN estimate) |
2747 | __attribute__((__warn_unused_result__)); |
2748 | gdk_export gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) |
2749 | __attribute__((__warn_unused_result__)); |
2750 | gdk_export BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate); |
2751 | gdk_export BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool not_in, BUN estimate); |
2752 | gdk_export gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) |
2753 | __attribute__((__warn_unused_result__)); |
2754 | gdk_export gdk_return BATbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, const void *c1, const void *c2, bool li, bool hi, BUN estimate) |
2755 | __attribute__((__warn_unused_result__)); |
2756 | gdk_export gdk_return BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT *rh, BAT *sl, BAT *sr, bool li, bool hi, bool anti, bool symmetric, BUN estimate) |
2757 | __attribute__((__warn_unused_result__)); |
2758 | gdk_export BAT *BATproject(BAT *l, BAT *r); |
2759 | gdk_export BAT *BATprojectchain(BAT **bats); |
2760 | |
2761 | gdk_export BAT *BATslice(BAT *b, BUN low, BUN high); |
2762 | |
2763 | gdk_export BAT *BATunique(BAT *b, BAT *s); |
2764 | |
2765 | gdk_export BAT *BATmergecand(BAT *a, BAT *b); |
2766 | gdk_export BAT *BATintersectcand(BAT *a, BAT *b); |
2767 | gdk_export BAT *BATdiffcand(BAT *a, BAT *b); |
2768 | |
2769 | gdk_export gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, BUN n, bool asc, bool nilslast, bool distinct) |
2770 | __attribute__((__warn_unused_result__)); |
2771 | |
2772 | #include "gdk_cand.h" |
2773 | #include "gdk_calc.h" |
2774 | |
2775 | /* |
2776 | * @- BAT sample operators |
2777 | * |
2778 | * @multitable @columnfractions 0.08 0.7 |
2779 | * @item BAT * |
2780 | * @tab BATsample (BAT *b, n) |
2781 | * @end multitable |
2782 | * |
2783 | * The routine BATsample returns a random sample on n BUNs of a BAT. |
2784 | * |
2785 | */ |
2786 | gdk_export BAT *BATsample(BAT *b, BUN n); |
2787 | gdk_export BAT *BATsample_with_seed(BAT *b, BUN n, unsigned seed); |
2788 | |
2789 | /* |
2790 | * |
2791 | */ |
2792 | #define MAXPARAMS 32 |
2793 | |
2794 | #endif /* _GDK_H_ */ |
2795 | |