1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/*
10 * @a M. L. Kersten, P. Boncz, N. J. Nes
11 * @* BAT Buffer Pool (BBP)
12 * The BATs created and loaded are collected in a BAT buffer pool.
13 * The Bat Buffer Pool has a number of functions:
14 * @table @code
15 *
16 * @item administration and lookup
17 * The BBP is a directory which contains status information about all
18 * known BATs. This interface may be used very heavily, by
19 * data-intensive applications. To eliminate all overhead, read-only
20 * access to the BBP may be done by table-lookups. The integer index
21 * type for these lookups is @emph{bat}, as retrieved by
22 * @emph{b->batCacheid}. The @emph{bat} zero is reserved for the nil
23 * bat.
24 *
25 * @item persistence
26 * The BBP is made persistent by saving it to the dictionary file
27 * called @emph{BBP.dir} in the database.
28 *
29 * When the number of BATs rises, having all files in one directory
30 * becomes a bottleneck. The BBP therefore implements a scheme that
31 * distributes all BATs in a growing directory tree with at most 64
32 * BATs stored in one node.
33 *
34 * @item buffer management
35 * The BBP is responsible for loading and saving of BATs to disk. It
36 * also contains routines to unload BATs from memory when memory
37 * resources get scarce. For this purpose, it administers BAT memory
38 * reference counts (to know which BATs can be unloaded) and BAT usage
39 * statistics (it unloads the least recently used BATs).
40 *
41 * @item recovery
42 * When the database is closed or during a run-time syncpoint, the
43 * system tables must be written to disk in a safe way, that is immune
44 * for system failures (like disk full). To do so, the BBP implements
45 * an atomic commit and recovery protocol: first all files to be
46 * overwritten are moved to a BACKUP/ dir. If that succeeds, the
47 * writes are done. If that also fully succeeds the BACKUP/ dir is
48 * renamed to DELETE_ME/ and subsequently deleted. If not, all files
49 * in BACKUP/ are moved back to their original location.
50 *
51 * @item unloading
52 * Bats which have a logical reference (ie. a lrefs > 0) but no memory
53 * reference (refcnt == 0) can be unloaded. Unloading dirty bats
54 * means, moving the original (committed version) to the BACKUP/ dir
55 * and saving the bat. This complicates the commit and recovery/abort
56 * issues. The commit has to check if the bat is already moved. And
57 * The recovery has to always move back the files from the BACKUP/
58 * dir.
59 *
60 * @item reference counting
61 * Bats use have two kinds of references: logical and physical
62 * (pointer) ones. The logical references are administered by
63 * BBPretain/BBPrelease, the physical ones by BBPfix/BBPunfix.
64 *
65 * @item share counting
66 * Views use the heaps of there parent bats. To save guard this, the
67 * parent has a shared counter, which is incremented and decremented
68 * using BBPshare and BBPunshare. These functions make sure the parent
69 * is memory resident as required because of the 'pointer' sharing.
70 * @end table
71 */
72
73#include "monetdb_config.h"
74#include "gdk.h"
75#include "gdk_private.h"
76#include "gdk_storage.h"
77#include "mutils.h"
78
79#ifndef F_OK
80#define F_OK 0
81#endif
82#ifdef _MSC_VER
83#define access(f, m) _access(f, m)
84#endif
85
86/*
87 * The BBP has a fixed address, so re-allocation due to a growing BBP
88 * caused by one thread does not disturb reads to the old entries by
89 * another. This is implemented using anonymous virtual memory;
90 * extensions on the same address are guaranteed because a large
91 * non-committed VM area is requested initially. New slots in the BBP
92 * are found in O(1) by keeping a freelist that uses the 'next' field
93 * in the BBPrec records.
94 */
95BBPrec *BBP[N_BBPINIT]; /* fixed base VM address of BBP array */
96bat BBPlimit = 0; /* current committed VM BBP array */
97static ATOMIC_TYPE BBPsize = ATOMIC_VAR_INIT(0); /* current used size of BBP array */
98
99struct BBPfarm_t BBPfarms[MAXFARMS];
100
101#define KITTENNAP 1 /* used to suspend processing */
102#define BBPNONAME "." /* filler for no name in BBP.dir */
103/*
104 * The hash index uses a bucket index (int array) of size mask that is
105 * tuned for perfect hashing (1 lookup). The bucket chain uses the
106 * 'next' field in the BBPrec records.
107 */
108bat *BBP_hash = NULL; /* BBP logical name hash buckets */
109bat BBP_mask = 0; /* number of buckets = & mask */
110
111static gdk_return BBPfree(BAT *b, const char *calledFrom);
112static void BBPdestroy(BAT *b);
113static void BBPuncacheit(bat bid, bool unloaddesc);
114static gdk_return BBPprepare(bool subcommit);
115static BAT *getBBPdescriptor(bat i, bool lock);
116static gdk_return BBPbackup(BAT *b, bool subcommit);
117static gdk_return BBPdir(int cnt, bat *subcommit);
118
119#ifdef HAVE_HGE
120/* start out by saying we have no hge, but as soon as we've seen one,
121 * we'll always say we do have it */
122static bool havehge = false;
123#endif
124
125#define BBPnamecheck(s) (BBPtmpcheck(s) ? strtol((s) + 4, NULL, 8) : 0)
126
127static void
128BBP_insert(bat i)
129{
130 bat idx = (bat) (strHash(BBP_logical(i)) & BBP_mask);
131
132 BBP_next(i) = BBP_hash[idx];
133 BBP_hash[idx] = i;
134}
135
136static void
137BBP_delete(bat i)
138{
139 bat *h = BBP_hash;
140 const char *s = BBP_logical(i);
141 bat idx = (bat) (strHash(s) & BBP_mask);
142
143 for (h += idx; (i = *h) != 0; h = &BBP_next(i)) {
144 if (strcmp(BBP_logical(i), s) == 0) {
145 *h = BBP_next(i);
146 break;
147 }
148 }
149}
150
151bat
152getBBPsize(void)
153{
154 return (bat) ATOMIC_GET(&BBPsize);
155}
156
157
158/*
159 * @+ BBP Consistency and Concurrency
160 * While GDK provides the basic building blocks for an ACID system, in
161 * itself it is not such a system, as we this would entail too much
162 * overhead that is often not needed. Hence, some consistency control
163 * is left to the user. The first important user constraint is that if
164 * a user updates a BAT, (s)he himself must assure that no-one else
165 * accesses this BAT.
166 *
167 * Concerning buffer management, the BBP carries out a swapping
168 * policy. BATs are kept in memory till the memory is full. If the
169 * memory is full, the malloc functions initiate BBP trim actions,
170 * that unload the coldest BATs that have a zero reference count. The
171 * second important user constraint is therefore that a user may only
172 * manipulate live BAT data in memory if it is sure that there is at
173 * least one reference count to that BAT.
174 *
175 * The main BBP array is protected by two locks:
176 * @table @code
177 * @item GDKcacheLock]
178 * this lock guards the free slot management in the BBP array. The
179 * BBP operations that allocate a new slot for a new BAT
180 * (@emph{BBPinit},@emph{BBPcacheit}), delete the slot of a destroyed
181 * BAT (@emph{BBPreclaim}), or rename a BAT (@emph{BBPrename}), hold
182 * this lock. It also protects all BAT (re)naming actions include
183 * (read and write) in the hash table with BAT names.
184 * @item GDKswapLock
185 * this lock guards the swap (loaded/unloaded) status of the
186 * BATs. Hence, all BBP routines that influence the swapping policy,
187 * or actually carry out the swapping policy itself, acquire this lock
188 * (e.g. @emph{BBPfix},@emph{BBPunfix}). Note that this also means
189 * that updates to the BBP_status indicator array must be protected by
190 * GDKswapLock.
191 *
192 * To reduce contention GDKswapLock was split into multiple locks; it
193 * is now an array of lock pointers which is accessed by
194 * GDKswapLock(bat)
195 * @end table
196 *
197 * Routines that need both locks should first acquire the locks in the
198 * GDKswapLock array (in ascending order) and then GDKcacheLock (and
199 * release them in reverse order).
200 *
201 * To obtain maximum speed, read operations to existing elements in
202 * the BBP are unguarded. As said, it is the users responsibility that
203 * the BAT that is being read is not being modified. BBP update
204 * actions that modify the BBP data structure itself are locked by the
205 * BBP functions themselves. Hence, multiple concurrent BBP read
206 * operations may be ongoing while at the same time at most one BBP
207 * write operation @strong{on a different BAT} is executing. This
208 * holds for accesses to the public (quasi-) arrays @emph{BBPcache},
209 * @emph{BBPstatus} and @emph{BBPrefs}.
210 * These arrays are called quasi as now they are
211 * actually stored together in one big BBPrec array called BBP, that
212 * is allocated in anonymous VM space, so we can reallocate this
213 * structure without changing the base address (a crucial feature if
214 * read actions are to go on unlocked while other entries in the BBP
215 * may be modified).
216 */
217static volatile MT_Id locked_by = 0;
218
219#define BBP_unload_inc() \
220 do { \
221 MT_lock_set(&GDKunloadLock); \
222 BBPunloadCnt++; \
223 MT_lock_unset(&GDKunloadLock); \
224 } while (0)
225
226#define BBP_unload_dec() \
227 do { \
228 MT_lock_set(&GDKunloadLock); \
229 --BBPunloadCnt; \
230 assert(BBPunloadCnt >= 0); \
231 MT_lock_unset(&GDKunloadLock); \
232 } while (0)
233
234static int BBPunloadCnt = 0;
235static MT_Lock GDKunloadLock = MT_LOCK_INITIALIZER("GDKunloadLock");
236
237void
238BBPlock(void)
239{
240 int i;
241
242 /* wait for all pending unloads to finish */
243 MT_lock_set(&GDKunloadLock);
244 while (BBPunloadCnt > 0) {
245 MT_lock_unset(&GDKunloadLock);
246 MT_sleep_ms(1);
247 MT_lock_set(&GDKunloadLock);
248 }
249
250 for (i = 0; i <= BBP_THREADMASK; i++)
251 MT_lock_set(&GDKtrimLock(i));
252 for (i = 0; i <= BBP_THREADMASK; i++)
253 MT_lock_set(&GDKcacheLock(i));
254 for (i = 0; i <= BBP_BATMASK; i++)
255 MT_lock_set(&GDKswapLock(i));
256 locked_by = MT_getpid();
257
258 MT_lock_unset(&GDKunloadLock);
259}
260
261void
262BBPunlock(void)
263{
264 int i;
265
266 for (i = BBP_BATMASK; i >= 0; i--)
267 MT_lock_unset(&GDKswapLock(i));
268 for (i = BBP_THREADMASK; i >= 0; i--)
269 MT_lock_unset(&GDKcacheLock(i));
270 locked_by = 0;
271 for (i = BBP_THREADMASK; i >= 0; i--)
272 MT_lock_unset(&GDKtrimLock(i));
273}
274
275
276static gdk_return
277BBPinithash(int j)
278{
279 bat i = (bat) ATOMIC_GET(&BBPsize);
280
281 assert(j >= 0 && j <= BBP_THREADMASK);
282 for (BBP_mask = 1; (BBP_mask << 1) <= BBPlimit; BBP_mask <<= 1)
283 ;
284 BBP_hash = (bat *) GDKzalloc(BBP_mask * sizeof(bat));
285 if (BBP_hash == NULL) {
286 GDKerror("BBPinithash: cannot allocate memory\n");
287 return GDK_FAIL;
288 }
289 BBP_mask--;
290
291 while (--i > 0) {
292 const char *s = BBP_logical(i);
293
294 if (s) {
295 if (*s != '.' && BBPtmpcheck(s) == 0) {
296 BBP_insert(i);
297 }
298 } else {
299 BBP_next(i) = BBP_free(j);
300 BBP_free(j) = i;
301 if (++j > BBP_THREADMASK)
302 j = 0;
303 }
304 }
305 return GDK_SUCCEED;
306}
307
308int
309BBPselectfarm(role_t role, int type, enum heaptype hptype)
310{
311 int i;
312
313 (void) type; /* may use in future */
314 (void) hptype; /* may use in future */
315
316 if (GDKinmemory())
317 return 0;
318
319#ifndef PERSISTENTHASH
320 if (hptype == hashheap)
321 role = TRANSIENT;
322#endif
323#ifndef PERSISTENTIDX
324 if (hptype == orderidxheap)
325 role = TRANSIENT;
326#endif
327 for (i = 0; i < MAXFARMS; i++)
328 if (BBPfarms[i].dirname && BBPfarms[i].roles & (1 << (int) role))
329 return i;
330 /* must be able to find farms for TRANSIENT and PERSISTENT */
331 assert(role != TRANSIENT && role != PERSISTENT);
332 return -1;
333}
334
335/*
336 * BBPextend must take the trimlock, as it is called when other BBP
337 * locks are held and it will allocate memory.
338 */
339static gdk_return
340BBPextend(int idx, bool buildhash)
341{
342 if ((bat) ATOMIC_GET(&BBPsize) >= N_BBPINIT * BBPINIT) {
343 GDKerror("BBPextend: trying to extend BAT pool beyond the "
344 "limit (%d)\n", N_BBPINIT * BBPINIT);
345 return GDK_FAIL;
346 }
347
348 /* make sure the new size is at least BBPsize large */
349 while (BBPlimit < (bat) ATOMIC_GET(&BBPsize)) {
350 assert(BBP[BBPlimit >> BBPINITLOG] == NULL);
351 BBP[BBPlimit >> BBPINITLOG] = GDKzalloc(BBPINIT * sizeof(BBPrec));
352 if (BBP[BBPlimit >> BBPINITLOG] == NULL) {
353 GDKerror("BBPextend: failed to extend BAT pool\n");
354 return GDK_FAIL;
355 }
356 BBPlimit += BBPINIT;
357 }
358
359 if (buildhash) {
360 int i;
361
362 GDKfree(BBP_hash);
363 BBP_hash = NULL;
364 for (i = 0; i <= BBP_THREADMASK; i++)
365 BBP_free(i) = 0;
366 if (BBPinithash(idx) != GDK_SUCCEED)
367 return GDK_FAIL;
368 }
369 return GDK_SUCCEED;
370}
371
372static gdk_return
373recover_dir(int farmid, bool direxists)
374{
375 if (direxists) {
376 /* just try; don't care about these non-vital files */
377 if (GDKunlink(farmid, BATDIR, "BBP", "bak") != GDK_SUCCEED)
378 fprintf(stderr, "#recover_dir: unlink of BBP.bak failed\n");
379 if (GDKmove(farmid, BATDIR, "BBP", "dir", BATDIR, "BBP", "bak") != GDK_SUCCEED)
380 fprintf(stderr, "#recover_dir: rename of BBP.dir to BBP.bak failed\n");
381 }
382 return GDKmove(farmid, BAKDIR, "BBP", "dir", BATDIR, "BBP", "dir");
383}
384
385static gdk_return BBPrecover(int farmid);
386static gdk_return BBPrecover_subdir(void);
387static bool BBPdiskscan(const char *, size_t);
388
389#ifdef GDKLIBRARY_NIL_NAN
390static gdk_return
391fixfltheap(BAT *b)
392{
393 long_str filename;
394 Heap h1; /* old heap */
395 Heap h2; /* new heap */
396 const char *nme, *bnme;
397 char *srcdir;
398 BUN i;
399 bool nofix = true;
400
401 nme = BBP_physical(b->batCacheid);
402 srcdir = GDKfilepath(NOFARM, BATDIR, nme, NULL);
403 if (srcdir == NULL) {
404 return GDK_FAIL;
405 }
406 *strrchr(srcdir, DIR_SEP) = 0;
407
408 if ((bnme = strrchr(nme, DIR_SEP)) != NULL)
409 bnme++;
410 else
411 bnme = nme;
412 sprintf(filename, "BACKUP%c%s", DIR_SEP, bnme);
413
414 /* make backup of heap */
415 if (GDKmove(b->theap.farmid, srcdir, bnme, "tail", BAKDIR, bnme, "tail") != GDK_SUCCEED) {
416 GDKfree(srcdir);
417 GDKerror("fixfltheap: cannot make backup of %s.tail\n", nme);
418 return GDK_FAIL;
419 }
420 /* load old heap */
421 h1 = b->theap;
422 strconcat_len(h1.filename, sizeof(h1.filename),
423 filename, ".tail", NULL);
424 h1.base = NULL;
425 h1.dirty = false;
426 if (HEAPload(&h1, filename, "tail", false) != GDK_SUCCEED) {
427 GDKfree(srcdir);
428 GDKerror("fixfltheap: loading old tail heap "
429 "for BAT %d failed\n", b->batCacheid);
430 return GDK_FAIL;
431 }
432
433 /* create new heap */
434 h2 = b->theap;
435 strconcat_len(h2.filename, sizeof(h2.filename), nme, ".tail", NULL);
436 if (HEAPalloc(&h2, b->batCapacity, b->twidth) != GDK_SUCCEED) {
437 GDKfree(srcdir);
438 HEAPfree(&h1, false);
439 GDKerror("fixfltheap: allocating new tail heap "
440 "for BAT %d failed\n", b->batCacheid);
441 return GDK_FAIL;
442 }
443 h2.dirty = true;
444 h2.free = h1.free;
445
446 switch (b->ttype) {
447 case TYPE_flt: {
448 const flt *restrict o = (const flt *) h1.base;
449 flt *restrict n = (flt *) h2.base;
450
451 for (i = 0; i < b->batCount; i++) {
452 if (o[i] == GDK_flt_min) {
453 b->tnil = true;
454 n[i] = flt_nil;
455 nofix = false;
456 } else {
457 n[i] = o[i];
458 }
459 }
460 break;
461 }
462 case TYPE_dbl: {
463 const dbl *restrict o = (const dbl *) h1.base;
464 dbl *restrict n = (dbl *) h2.base;
465
466 for (i = 0; i < b->batCount; i++) {
467 if (o[i] == GDK_dbl_min) {
468 b->tnil = true;
469 n[i] = dbl_nil;
470 nofix = false;
471 } else {
472 n[i] = o[i];
473 }
474 }
475 break;
476 }
477 default: {
478 struct mbr {
479 float xmin, ymin, xmax, ymax;
480 };
481 const struct mbr *restrict o = (const struct mbr *) h1.base;
482 struct mbr *restrict n = (struct mbr *) h2.base;
483
484 assert(strcmp(ATOMunknown_name(b->ttype), "mbr") == 0);
485 assert(b->twidth == 4 * sizeof(flt));
486
487 for (i = 0; i < b->batCount; i++) {
488 if (o[i].xmin == GDK_flt_min ||
489 o[i].xmax == GDK_flt_min ||
490 o[i].ymin == GDK_flt_min ||
491 o[i].ymax == GDK_flt_min) {
492 b->tnil = true;
493 n[i].xmin = n[i].xmax = n[i].ymin = n[i].ymax = flt_nil;
494 nofix = false;
495 } else {
496 n[i] = o[i];
497 }
498 }
499 break;
500 }
501 }
502
503 /* cleanup */
504 HEAPfree(&h1, false);
505 if (nofix) {
506 /* didn't fix anything, move backup back */
507 HEAPfree(&h2, true);
508 if (GDKmove(b->theap.farmid, BAKDIR, bnme, "tail", srcdir, bnme, "tail") != GDK_SUCCEED) {
509 GDKfree(srcdir);
510 GDKerror("fixfltheap: cannot restore backup of %s.tail\n", nme);
511 return GDK_FAIL;
512 }
513 } else {
514 /* heap was fixed */
515 b->batDirtydesc = true;
516 if (HEAPsave(&h2, nme, "tail") != GDK_SUCCEED) {
517 HEAPfree(&h2, false);
518 GDKfree(srcdir);
519 GDKerror("fixfltheap: saving heap failed\n");
520 return GDK_FAIL;
521 }
522 HEAPfree(&h2, false);
523 b->theap = h2;
524 }
525 GDKfree(srcdir);
526 return GDK_SUCCEED;
527}
528
529static gdk_return
530fixfloatbats(void)
531{
532 bat bid;
533 BAT *b;
534 char filename[FILENAME_MAX];
535 FILE *fp;
536 size_t len;
537 int written;
538
539 for (bid = 1; bid < (bat) ATOMIC_GET(&BBPsize); bid++) {
540 if ((b = BBP_desc(bid)) == NULL) {
541 /* not a valid BAT */
542 continue;
543 }
544 if (BBP_logical(bid) &&
545 (len = strlen(BBP_logical(bid))) > 12 &&
546 strcmp(BBP_logical(bid) + len - 12, "_catalog_nme") == 0) {
547 /* this is one of the files used by the
548 * logger. We need to communicate to the
549 * logger that it also needs to do a
550 * conversion. That is done by creating a
551 * file here based on the name of this BAT. */
552 written = snprintf(filename, sizeof(filename),
553 "%s/%.*s_nil-nan-convert",
554 BBPfarms[0].dirname,
555 (int) (len - 12), BBP_logical(bid));
556 if (written == -1 || written >= FILENAME_MAX) {
557 GDKerror("fixfloatbats: cannot create file %s has a very large pathname\n",
558 filename);
559 return GDK_FAIL;
560 }
561 fp = fopen(filename, "w");
562 if (fp == NULL) {
563 GDKsyserror("fixfloatbats: cannot create file %s\n",
564 filename);
565 return GDK_FAIL;
566 }
567 fclose(fp);
568 }
569 if (b->batCount == 0 || b->tnonil) {
570 /* no NILs to convert */
571 continue;
572 }
573 if (b->ttype < 0) {
574 const char *anme;
575
576 /* as yet unknown tail column type */
577 anme = ATOMunknown_name(b->ttype);
578 /* known string types */
579 if (strcmp(anme, "mbr") != 0)
580 continue;
581 } else if (b->ttype != TYPE_flt && b->ttype != TYPE_dbl)
582 continue;
583 if (fixfltheap(b) != GDK_SUCCEED)
584 return GDK_FAIL;
585 }
586 return GDK_SUCCEED;
587}
588#endif
589
590#ifdef GDKLIBRARY_OLDDATE
591#define leapyear(y) ((y) % 4 == 0 && ((y) % 100 != 0 || (y) % 400 == 0))
592#define YEARDAYS(y) (leapyear(y) ? 366 : 365)
593static int CUMLEAPDAYS[13] = {
594 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366
595};
596static int CUMDAYS[13] = {
597 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365
598};
599static int
600leapyears(int year)
601{
602 /* count the 4-fold years that passed since jan-1-0 */
603 int y4 = year / 4;
604
605 /* count the 100-fold years */
606 int y100 = year / 100;
607
608 /* count the 400-fold years */
609 int y400 = year / 400;
610
611 return y4 + y400 - y100 + (year >= 0); /* may be negative */
612}
613
614#define YEAR_OFFSET 4712
615#define YEAR_MIN (-YEAR_OFFSET)
616#define DTDAY_WIDTH 5 /* 1..28/29/30/31, depending on month */
617#define DTDAY_SHIFT 0
618#define DTMONTH_WIDTH 21 /* enough for 174761 years */
619#define DTMONTH_SHIFT (DTDAY_WIDTH+DTDAY_SHIFT)
620#define YEAR_MAX (YEAR_MIN+(1<<DTMONTH_WIDTH)/12-1)
621#define mkdate(d, m, y) (((((y) + YEAR_OFFSET) * 12 + (m) - 1) << DTMONTH_SHIFT) \
622 | ((d) << DTDAY_SHIFT))
623#define TSTIME_WIDTH 37 /* [0..24*60*60*1000000) */
624#define TSTIME_SHIFT 0
625#define TSDATE_WIDTH (DTDAY_WIDTH+DTMONTH_WIDTH)
626#define TSDATE_SHIFT (TSTIME_SHIFT+TSTIME_WIDTH)
627#define mktimestamp(d, t) ((lng) (((uint64_t) (d) << TSDATE_SHIFT) | \
628 ((uint64_t) (t) << TSTIME_SHIFT)))
629
630int
631cvtdate(int n)
632{
633 int day, month, year;
634
635 year = n / 365;
636 day = (n - year * 365) - leapyears(year >= 0 ? year - 1 : year);
637 if (n < 0) {
638 year--;
639 while (day >= 0) {
640 year++;
641 day -= YEARDAYS(year);
642 }
643 day = YEARDAYS(year) + day;
644 } else {
645 while (day < 0) {
646 year--;
647 day += YEARDAYS(year);
648 }
649 }
650
651 day++;
652 if (leapyear(year)) {
653 for (month = day / 31 == 0 ? 1 : day / 31; month <= 12; month++)
654 if (day > CUMLEAPDAYS[month - 1] && day <= CUMLEAPDAYS[month]) {
655 break;
656 }
657 day -= CUMLEAPDAYS[month - 1];
658 } else {
659 for (month = day / 31 == 0 ? 1 : day / 31; month <= 12; month++)
660 if (day > CUMDAYS[month - 1] && day <= CUMDAYS[month]) {
661 break;
662 }
663 day -= CUMDAYS[month - 1];
664 }
665 /* clamp date */
666 if (year < YEAR_MIN) {
667 day = 1;
668 month = 1;
669 year = YEAR_MIN;
670 } else if (year > YEAR_MAX) {
671 day = 31;
672 month = 12;
673 year = YEAR_MAX;
674 }
675 return mkdate(day, month, year);
676}
677
678static gdk_return
679fixdateheap(BAT *b, const char *anme)
680{
681 long_str filename;
682 Heap h1; /* old heap */
683 Heap h2; /* new heap */
684 const char *nme, *bnme;
685 char *srcdir;
686 BUN i;
687 bool nofix = true;
688
689 nme = BBP_physical(b->batCacheid);
690 srcdir = GDKfilepath(NOFARM, BATDIR, nme, NULL);
691 if (srcdir == NULL) {
692 return GDK_FAIL;
693 }
694 *strrchr(srcdir, DIR_SEP) = 0;
695
696 if ((bnme = strrchr(nme, DIR_SEP)) != NULL)
697 bnme++;
698 else
699 bnme = nme;
700 sprintf(filename, "BACKUP%c%s", DIR_SEP, bnme);
701
702 /* make backup of heap */
703 if (GDKmove(b->theap.farmid, srcdir, bnme, "tail", BAKDIR, bnme, "tail") != GDK_SUCCEED) {
704 GDKfree(srcdir);
705 GDKerror("fixdateheap: cannot make backup of %s.tail\n", nme);
706 return GDK_FAIL;
707 }
708 /* load old heap */
709 h1 = b->theap;
710 strconcat_len(h1.filename, sizeof(h1.filename),
711 filename, ".tail", NULL);
712 h1.base = NULL;
713 h1.dirty = false;
714 if (HEAPload(&h1, filename, "tail", false) != GDK_SUCCEED) {
715 GDKfree(srcdir);
716 GDKerror("fixdateheap: loading old tail heap "
717 "for BAT %d failed\n", b->batCacheid);
718 return GDK_FAIL;
719 }
720
721 /* create new heap */
722 h2 = b->theap;
723 strconcat_len(h2.filename, sizeof(h2.filename), nme, ".tail", NULL);
724 if (HEAPalloc(&h2, b->batCapacity, strcmp(anme, "date") == 0 ? 4 : 8) != GDK_SUCCEED) {
725 GDKfree(srcdir);
726 HEAPfree(&h1, false);
727 GDKerror("fixdateheap: allocating new tail heap "
728 "for BAT %d failed\n", b->batCacheid);
729 return GDK_FAIL;
730 }
731 h2.dirty = true;
732 h2.free = h1.free;
733
734 if (strcmp(anme, "date") == 0) {
735 const int *restrict o = (const int *) h1.base;
736 int *restrict n = (int *) h2.base;
737
738 for (i = 0; i < b->batCount; i++) {
739 if (is_int_nil(o[i])) {
740 b->tnil = true;
741 n[i] = int_nil;
742 } else {
743 n[i] = cvtdate(o[i]);
744 nofix = false;
745 }
746 }
747 } else if (strcmp(anme, "timestamp") == 0) {
748 union timestamp {
749 lng l;
750 struct {
751#ifndef WORDS_BIGENDIAN
752 int p_msecs;
753 int p_days;
754#else
755 int p_days;
756 int p_msecs;
757#endif
758 } t;
759 };
760 const union timestamp *restrict o = (const union timestamp *) h1.base;
761 lng *restrict n = (lng *) h2.base;
762 for (i = 0; i < b->batCount; i++) {
763 if (is_lng_nil(o[i].l)) {
764 b->tnil = true;
765 n[i] = lng_nil;
766 } else {
767 n[i] = mktimestamp(cvtdate(o[i].t.p_days),
768 o[i].t.p_msecs * LL_CONSTANT(1000));
769 nofix = false;
770 }
771 }
772 } else {
773 /* daytime */
774 const int *restrict o = (const int *) h1.base;
775 lng *restrict n = (lng *) h2.base;
776
777 h2.free <<= 1;
778 nofix = false;
779 for (i = 0; i < b->batCount; i++) {
780 if (is_int_nil(o[i])) {
781 b->tnil = true;
782 n[i] = lng_nil;
783 } else {
784 n[i] = o[i] * LL_CONSTANT(1000);
785 }
786 }
787 }
788
789 /* cleanup */
790 HEAPfree(&h1, false);
791 if (nofix) {
792 /* didn't fix anything, move backup back */
793 HEAPfree(&h2, true);
794 if (GDKmove(b->theap.farmid, BAKDIR, bnme, "tail", srcdir, bnme, "tail") != GDK_SUCCEED) {
795 GDKfree(srcdir);
796 GDKerror("fixdateheap: cannot restore backup of %s.tail\n", nme);
797 return GDK_FAIL;
798 }
799 } else {
800 /* heap was fixed */
801 b->batDirtydesc = true;
802 if (HEAPsave(&h2, nme, "tail") != GDK_SUCCEED) {
803 HEAPfree(&h2, false);
804 GDKfree(srcdir);
805 GDKerror("fixdateheap: saving heap failed\n");
806 return GDK_FAIL;
807 }
808 if (strcmp(anme, "daytime") == 0) {
809 b->twidth = 8;
810 b->tshift = 3;
811 }
812 HEAPfree(&h2, false);
813 b->theap = h2;
814 }
815 GDKfree(srcdir);
816 return GDK_SUCCEED;
817}
818
819static gdk_return
820fixdatebats(void)
821{
822 bat bid;
823 BAT *b;
824 char filename[FILENAME_MAX];
825 FILE *fp;
826 size_t len;
827 int written;
828
829 for (bid = 1; bid < (bat) ATOMIC_GET(&BBPsize); bid++) {
830 if ((b = BBP_desc(bid)) == NULL) {
831 /* not a valid BAT */
832 continue;
833 }
834 if (BBP_logical(bid) &&
835 (len = strlen(BBP_logical(bid))) > 12 &&
836 strcmp(BBP_logical(bid) + len - 12, "_catalog_nme") == 0) {
837 /* this is one of the files used by the
838 * logger. We need to communicate to the
839 * logger that it also needs to do a
840 * conversion. That is done by creating a
841 * file here based on the name of this BAT. */
842 written = snprintf(filename, sizeof(filename),
843 "%s/%.*s_date-convert",
844 BBPfarms[0].dirname,
845 (int) (len - 12), BBP_logical(bid));
846 if (written == -1 || written >= FILENAME_MAX) {
847 GDKerror("fixdatebats: cannot create file %s has a very large pathname\n",
848 filename);
849 return GDK_FAIL;
850 }
851 fp = fopen(filename, "w");
852 if (fp == NULL) {
853 GDKsyserror("fixdatebats: cannot create file %s\n",
854 filename);
855 return GDK_FAIL;
856 }
857 fclose(fp);
858 }
859 /* The date type is not known in GDK when reading the BBP */
860 if (b->ttype < 0) {
861 const char *anme;
862
863 /* as yet unknown tail column type */
864 anme = ATOMunknown_name(b->ttype);
865 /* known string types */
866 if ((strcmp(anme, "date") == 0 ||
867 strcmp(anme, "timestamp") == 0 ||
868 strcmp(anme, "daytime") == 0) &&
869 fixdateheap(b, anme) != GDK_SUCCEED)
870 return GDK_FAIL;
871 }
872 }
873 return GDK_SUCCEED;
874}
875#endif
876
877static int
878heapinit(BAT *b, const char *buf, int *hashash, unsigned bbpversion, bat bid, const char *filename)
879{
880 int t;
881 char type[33];
882 uint16_t width;
883 uint16_t var;
884 uint16_t properties;
885 uint64_t nokey0;
886 uint64_t nokey1;
887 uint64_t nosorted;
888 uint64_t norevsorted;
889 uint64_t base;
890 uint64_t align;
891 uint64_t free;
892 uint64_t size;
893 uint16_t storage;
894 int n;
895
896 (void) bbpversion; /* could be used to implement compatibility */
897
898 norevsorted = 0; /* default for first case */
899 if (bbpversion <= GDKLIBRARY_TALIGN ?
900 sscanf(buf,
901 " %32s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
902 " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
903 " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu16
904 "%n",
905 type, &width, &var, &properties, &nokey0,
906 &nokey1, &nosorted, &norevsorted, &base,
907 &align, &free, &size, &storage,
908 &n) < 13 :
909 sscanf(buf,
910 " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
911 " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
912 " %" SCNu64 " %" SCNu64 " %" SCNu16
913 "%n",
914 type, &width, &var, &properties, &nokey0,
915 &nokey1, &nosorted, &norevsorted, &base,
916 &free, &size, &storage,
917 &n) < 12) {
918 GDKerror("BBPinit: invalid format for BBP.dir\n%s", buf);
919 return -1;
920 }
921
922 if (properties & ~0x0F81) {
923 GDKerror("BBPinit: unknown properties are set: incompatible database\n");
924 return -1;
925 }
926 *hashash = var & 2;
927 var &= ~2;
928#ifdef HAVE_HGE
929 if (strcmp(type, "hge") == 0)
930 havehge = true;
931#endif
932 /* sqlblob was changed to plain blob in the Apr2019 release */
933 if (strcmp(type, "sqlblob") == 0)
934 strcpy(type, "blob");
935 if ((t = ATOMindex(type)) < 0) {
936 if ((t = ATOMunknown_find(type)) == 0) {
937 GDKerror("BBPinit: no space for atom %s", type);
938 return -1;
939 }
940 } else if (var != (t == TYPE_void || BATatoms[t].atomPut != NULL)) {
941 GDKerror("BBPinit: inconsistent entry in BBP.dir: tvarsized mismatch for BAT %d\n", (int) bid);
942 return -1;
943 } else if (var && t != 0 ?
944 ATOMsize(t) < width ||
945 (width != 1 && width != 2 && width != 4
946#if SIZEOF_VAR_T == 8
947 && width != 8
948#endif
949 ) :
950 ATOMsize(t) != width) {
951 GDKerror("BBPinit: inconsistent entry in BBP.dir: tsize mismatch for BAT %d\n", (int) bid);
952 return -1;
953 }
954 b->ttype = t;
955 b->twidth = width;
956 b->tvarsized = var != 0;
957 b->tshift = ATOMelmshift(width);
958 assert_shift_width(b->tshift,b->twidth);
959 b->tnokey[0] = (BUN) nokey0;
960 b->tnokey[1] = (BUN) nokey1;
961 b->tsorted = (bit) ((properties & 0x0001) != 0);
962 b->trevsorted = (bit) ((properties & 0x0080) != 0);
963 b->tkey = (properties & 0x0100) != 0;
964 b->tnonil = (properties & 0x0400) != 0;
965 b->tnil = (properties & 0x0800) != 0;
966 b->tnosorted = (BUN) nosorted;
967 b->tnorevsorted = (BUN) norevsorted;
968 /* (properties & 0x0200) is the old tdense flag */
969 b->tseqbase = (properties & 0x0200) == 0 || base >= (uint64_t) oid_nil ? oid_nil : (oid) base;
970 b->theap.free = (size_t) free;
971 b->theap.size = (size_t) size;
972 b->theap.base = NULL;
973 strconcat_len(b->theap.filename, sizeof(b->theap.filename),
974 filename, ".tail", NULL);
975 b->theap.storage = (storage_t) storage;
976 b->theap.copied = false;
977 b->theap.newstorage = (storage_t) storage;
978 b->theap.farmid = BBPselectfarm(PERSISTENT, b->ttype, offheap);
979 b->theap.dirty = false;
980#ifdef GDKLIBRARY_BLOB_SORT
981 if (bbpversion <= GDKLIBRARY_BLOB_SORT && strcmp(type, "blob") == 0) {
982 b->tsorted = b->trevsorted = false;
983 b->tnosorted = b->tnorevsorted = 0;
984 OIDXdestroy(b);
985 }
986#endif
987 if (b->theap.free > b->theap.size) {
988 GDKerror("BBPinit: \"free\" value larger than \"size\" in heap of bat %d\n", (int) bid);
989 return -1;
990 }
991 return n;
992}
993
994static int
995vheapinit(BAT *b, const char *buf, int hashash, bat bid, const char *filename)
996{
997 int n = 0;
998 uint64_t free, size;
999 uint16_t storage;
1000
1001 if (b->tvarsized && b->ttype != TYPE_void) {
1002 b->tvheap = GDKzalloc(sizeof(Heap));
1003 if (b->tvheap == NULL) {
1004 GDKerror("BBPinit: cannot allocate memory for heap.");
1005 return -1;
1006 }
1007 if (sscanf(buf,
1008 " %" SCNu64 " %" SCNu64 " %" SCNu16
1009 "%n",
1010 &free, &size, &storage, &n) < 3) {
1011 GDKerror("BBPinit: invalid format for BBP.dir\n%s", buf);
1012 return -1;
1013 }
1014 b->tvheap->free = (size_t) free;
1015 b->tvheap->size = (size_t) size;
1016 b->tvheap->base = NULL;
1017 strconcat_len(b->tvheap->filename, sizeof(b->tvheap->filename),
1018 filename, ".theap", NULL);
1019 b->tvheap->storage = (storage_t) storage;
1020 b->tvheap->copied = false;
1021 b->tvheap->hashash = hashash != 0;
1022 b->tvheap->cleanhash = true;
1023 b->tvheap->newstorage = (storage_t) storage;
1024 b->tvheap->dirty = false;
1025 b->tvheap->parentid = bid;
1026 b->tvheap->farmid = BBPselectfarm(PERSISTENT, b->ttype, varheap);
1027 if (b->tvheap->free > b->tvheap->size) {
1028 GDKerror("BBPinit: \"free\" value larger than \"size\" in var heap of bat %d\n", (int) bid);
1029 return -1;
1030 }
1031 }
1032 return n;
1033}
1034
1035static gdk_return
1036BBPreadEntries(FILE *fp, unsigned bbpversion)
1037{
1038 bat bid = 0;
1039 char buf[4096];
1040 BAT *bn;
1041
1042 /* read the BBP.dir and insert the BATs into the BBP */
1043 while (fgets(buf, sizeof(buf), fp) != NULL) {
1044 uint64_t batid;
1045 uint16_t status;
1046 char headname[129];
1047 char filename[20];
1048 unsigned int properties;
1049 int nread, n;
1050 char *s, *options = NULL;
1051 char logical[1024];
1052 uint64_t first = 0, count, capacity, base = 0;
1053 int Thashash;
1054
1055 static_assert(sizeof(BBP_physical(0)) == sizeof(filename),
1056 "filename should be same size as BBPrec.physical");
1057 if ((s = strchr(buf, '\r')) != NULL) {
1058 /* convert \r\n into just \n */
1059 if (s[1] != '\n') {
1060 GDKerror("BBPinit: invalid format for BBP.dir");
1061 return GDK_FAIL;
1062 }
1063 *s++ = '\n';
1064 *s = 0;
1065 }
1066
1067 if (sscanf(buf,
1068 "%" SCNu64 " %" SCNu16 " %128s %19s %u %" SCNu64
1069 " %" SCNu64 " %" SCNu64
1070 "%n",
1071 &batid, &status, headname, filename,
1072 &properties,
1073 &count, &capacity, &base,
1074 &nread) < 8) {
1075 GDKerror("BBPinit: invalid format for BBP.dir\n%s", buf);
1076 return GDK_FAIL;
1077 }
1078
1079 if (batid >= N_BBPINIT * BBPINIT) {
1080 GDKerror("BBPinit: bat ID (%" PRIu64 ") too large to accomodate (max %d).", batid, N_BBPINIT * BBPINIT - 1);
1081 return GDK_FAIL;
1082 }
1083
1084 /* convert both / and \ path separators to our own DIR_SEP */
1085#if DIR_SEP != '/'
1086 s = filename;
1087 while ((s = strchr(s, '/')) != NULL)
1088 *s++ = DIR_SEP;
1089#endif
1090#if DIR_SEP != '\\'
1091 s = filename;
1092 while ((s = strchr(s, '\\')) != NULL)
1093 *s++ = DIR_SEP;
1094#endif
1095
1096 if (first != 0) {
1097 GDKerror("BBPinit: first != 0 (ID = %" PRIu64 ").",
1098 batid);
1099 return GDK_FAIL;
1100 }
1101
1102 bid = (bat) batid;
1103 if (batid >= (uint64_t) ATOMIC_GET(&BBPsize)) {
1104 ATOMIC_SET(&BBPsize, batid + 1);
1105 if ((bat) ATOMIC_GET(&BBPsize) >= BBPlimit)
1106 BBPextend(0, false);
1107 }
1108 if (BBP_desc(bid) != NULL) {
1109 GDKerror("BBPinit: duplicate entry in BBP.dir (ID = "
1110 "%" PRIu64 ").", batid);
1111 return GDK_FAIL;
1112 }
1113 bn = GDKzalloc(sizeof(BAT));
1114 if (bn == NULL) {
1115 GDKerror("BBPinit: cannot allocate memory for BAT.");
1116 return GDK_FAIL;
1117 }
1118 bn->batCacheid = bid;
1119 if (BATroles(bn, NULL) != GDK_SUCCEED) {
1120 GDKfree(bn);
1121 GDKerror("BBPinit: BATroles failed.");
1122 return GDK_FAIL;
1123 }
1124 bn->batTransient = false;
1125 bn->batCopiedtodisk = true;
1126 bn->batRestricted = (properties & 0x06) >> 1;
1127 bn->batCount = (BUN) count;
1128 bn->batInserted = bn->batCount;
1129 bn->batCapacity = (BUN) capacity;
1130 char name[16];
1131 snprintf(name, sizeof(name), "BATlock%d", bn->batCacheid); /* fits */
1132 MT_lock_init(&bn->batIdxLock, name);
1133
1134 if (base > (uint64_t) GDK_oid_max) {
1135 BATdestroy(bn);
1136 GDKerror("BBPinit: head seqbase out of range (ID = %" PRIu64 ", seq = %" PRIu64 ").", batid, base);
1137 return GDK_FAIL;
1138 }
1139 bn->hseqbase = (oid) base;
1140 n = heapinit(bn, buf + nread, &Thashash, bbpversion, bid, filename);
1141 if (n < 0) {
1142 BATdestroy(bn);
1143 return GDK_FAIL;
1144 }
1145 nread += n;
1146 n = vheapinit(bn, buf + nread, Thashash, bid, filename);
1147 if (n < 0) {
1148 BATdestroy(bn);
1149 return GDK_FAIL;
1150 }
1151 nread += n;
1152
1153 if (buf[nread] != '\n' && buf[nread] != ' ') {
1154 BATdestroy(bn);
1155 GDKerror("BBPinit: invalid format for BBP.dir\n%s", buf);
1156 return GDK_FAIL;
1157 }
1158 if (buf[nread] == ' ')
1159 options = buf + nread + 1;
1160
1161 if ((s = strchr(headname, '~')) != NULL && s == headname) {
1162 int len = snprintf(logical, sizeof(logical), "tmp_%o", (unsigned) bid);
1163 if (len == -1 || len >= (int) sizeof(logical))
1164 GDKfatal("BBPinit: BBP logical filename directory is too large\n");
1165 } else {
1166 if (s)
1167 *s = 0;
1168 strcpy_len(logical, headname, sizeof(logical));
1169 }
1170 s = logical;
1171 BBP_logical(bid) = GDKstrdup(s);
1172 if (BBP_logical(bid) == NULL) {
1173 BATdestroy(bn);
1174 return GDK_FAIL;
1175 }
1176 /* tailname is ignored */
1177 strcpy_len(BBP_physical(bid), filename, sizeof(BBP_physical(bid)));
1178#ifdef STATIC_CODE_ANALYSIS
1179 /* help coverity */
1180 BBP_physical(bid)[sizeof(BBP_physical(bid)) - 1] = 0;
1181#endif
1182 BBP_options(bid) = NULL;
1183 if (options)
1184 BBP_options(bid) = GDKstrdup(options);
1185 BBP_refs(bid) = 0;
1186 BBP_lrefs(bid) = 1; /* any BAT we encounter here is persistent, so has a logical reference */
1187 BBP_desc(bid) = bn;
1188 BBP_status(bid) = BBPEXISTING; /* do we need other status bits? */
1189 }
1190 return GDK_SUCCEED;
1191}
1192
1193/* check that the necessary files for all BATs exist and are large
1194 * enough */
1195static gdk_return
1196BBPcheckbats(void)
1197{
1198 for (bat bid = 1; bid < (bat) ATOMIC_GET(&BBPsize); bid++) {
1199 struct stat statb;
1200 BAT *b;
1201 char *path;
1202
1203 if ((b = BBP_desc(bid)) == NULL) {
1204 /* not a valid BAT */
1205 continue;
1206 }
1207 if (b->ttype == TYPE_void) {
1208 /* no files needed */
1209 continue;
1210 }
1211 path = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), "tail");
1212 if (path == NULL)
1213 return GDK_FAIL;
1214 if (stat(path, &statb) < 0) {
1215 GDKsyserror("BBPcheckbats: cannot stat file %s\n",
1216 path);
1217 GDKfree(path);
1218 return GDK_FAIL;
1219 }
1220 if ((size_t) statb.st_size < b->theap.free) {
1221 GDKerror("BBPcheckbats: file %s too small (expected %zu, actual %zu)\n", path, b->theap.free, (size_t) statb.st_size);
1222 GDKfree(path);
1223 return GDK_FAIL;
1224 }
1225 GDKfree(path);
1226 if (b->tvheap != NULL) {
1227 path = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), "theap");
1228 if (path == NULL)
1229 return GDK_FAIL;
1230 if (stat(path, &statb) < 0) {
1231 GDKsyserror("BBPcheckbats: cannot stat file %s\n",
1232 path);
1233 GDKfree(path);
1234 return GDK_FAIL;
1235 }
1236 if ((size_t) statb.st_size < b->tvheap->free) {
1237 GDKerror("BBPcheckbats: file %s too small (expected %zu, actual %zu)\n", path, b->tvheap->free, (size_t) statb.st_size);
1238 GDKfree(path);
1239 return GDK_FAIL;
1240 }
1241 GDKfree(path);
1242 }
1243 }
1244 return GDK_SUCCEED;
1245}
1246
1247#ifdef HAVE_HGE
1248#define SIZEOF_MAX_INT SIZEOF_HGE
1249#else
1250#define SIZEOF_MAX_INT SIZEOF_LNG
1251#endif
1252
1253static unsigned
1254BBPheader(FILE *fp)
1255{
1256 char buf[BUFSIZ];
1257 int sz, ptrsize, oidsize, intsize;
1258 unsigned bbpversion;
1259
1260 if (fgets(buf, sizeof(buf), fp) == NULL) {
1261 GDKerror("BBPinit: BBP.dir is empty");
1262 return 0;
1263 }
1264 if (sscanf(buf, "BBP.dir, GDKversion %u\n", &bbpversion) != 1) {
1265 GDKerror("BBPinit: old BBP without version number");
1266 GDKerror("dump the database using a compatible version,");
1267 GDKerror("then restore into new database using this version.\n");
1268 return 0;
1269 }
1270 if (bbpversion != GDKLIBRARY &&
1271 bbpversion != GDKLIBRARY_OLDDATE &&
1272 bbpversion != GDKLIBRARY_BLOB_SORT &&
1273 bbpversion != GDKLIBRARY_NIL_NAN &&
1274 bbpversion != GDKLIBRARY_TALIGN) {
1275 GDKerror("BBPinit: incompatible BBP version: expected 0%o, got 0%o.\n"
1276 "This database was probably created by %s version of MonetDB.",
1277 GDKLIBRARY, bbpversion,
1278 bbpversion > GDKLIBRARY ? "a newer" : "a too old");
1279 return 0;
1280 }
1281 if (fgets(buf, sizeof(buf), fp) == NULL) {
1282 GDKerror("BBPinit: short BBP");
1283 return 0;
1284 }
1285 if (sscanf(buf, "%d %d %d", &ptrsize, &oidsize, &intsize) != 3) {
1286 GDKerror("BBPinit: BBP.dir has incompatible format: pointer, OID, and max. integer sizes are missing");
1287 return 0;
1288 }
1289 if (ptrsize != SIZEOF_SIZE_T || oidsize != SIZEOF_OID) {
1290 GDKerror("BBPinit: database created with incompatible server:\n"
1291 "expected pointer size %d, got %d, expected OID size %d, got %d.",
1292 SIZEOF_SIZE_T, ptrsize, SIZEOF_OID, oidsize);
1293 return 0;
1294 }
1295 if (intsize > SIZEOF_MAX_INT) {
1296 GDKerror("BBPinit: database created with incompatible server:\n"
1297 "expected max. integer size %d, got %d.",
1298 SIZEOF_MAX_INT, intsize);
1299 return 0;
1300 }
1301 if (fgets(buf, sizeof(buf), fp) == NULL) {
1302 GDKerror("BBPinit: short BBP");
1303 return 0;
1304 }
1305#ifdef GDKLIBRARY_TALIGN
1306 char *s;
1307 if ((s = strstr(buf, "BBPsize")) != NULL) {
1308 if (sscanf(s, "BBPsize=%d", &sz) != 1) {
1309 GDKerror("BBPinit: no BBPsize value found\n");
1310 return 0;
1311 }
1312 sz = (int) (sz * BATMARGIN);
1313 if (sz > (bat) ATOMIC_GET(&BBPsize))
1314 ATOMIC_SET(&BBPsize, sz);
1315 }
1316#else
1317 if (sscanf(buf, "BBPsize=%d", &sz) != 1) {
1318 GDKerror("BBPinit: no BBPsize value found\n");
1319 return 0;
1320 }
1321 sz = (int) (sz * BATMARGIN);
1322 if (sz > (bat) ATOMIC_GET(&BBPsize))
1323 ATOMIC_SET(&BBPsize, sz);
1324#endif
1325 assert(bbpversion != 0);
1326 return bbpversion;
1327}
1328
1329bool
1330GDKinmemory(void)
1331{
1332 return BBPfarms[0].dirname == NULL;
1333}
1334
1335/* all errors are fatal */
1336gdk_return
1337BBPaddfarm(const char *dirname, int rolemask)
1338{
1339 struct stat st;
1340 int i;
1341
1342 if (dirname == NULL) {
1343 assert(BBPfarms[0].dirname == NULL);
1344 assert(rolemask & 1);
1345 assert(BBPfarms[0].roles == 0);
1346 BBPfarms[0].roles = rolemask;
1347 return GDK_SUCCEED;
1348 }
1349 if (strchr(dirname, '\n') != NULL) {
1350 GDKerror("BBPaddfarm: no newline allowed in directory name\n");
1351 return GDK_FAIL;
1352 }
1353 if (rolemask == 0 || (rolemask & 1 && BBPfarms[0].dirname != NULL)) {
1354 GDKerror("BBPaddfarm: bad rolemask\n");
1355 return GDK_FAIL;
1356 }
1357 if (mkdir(dirname, MONETDB_DIRMODE) < 0) {
1358 if (errno == EEXIST) {
1359 if (stat(dirname, &st) == -1 || !S_ISDIR(st.st_mode)) {
1360 GDKerror("BBPaddfarm: %s: not a directory\n", dirname);
1361 return GDK_FAIL;
1362 }
1363 } else {
1364 GDKerror("BBPaddfarm: %s: cannot create directory\n", dirname);
1365 return GDK_FAIL;
1366 }
1367 }
1368 for (i = 0; i < MAXFARMS; i++) {
1369 if (BBPfarms[i].dirname == NULL) {
1370 BBPfarms[i].dirname = GDKstrdup(dirname);
1371 if (BBPfarms[i].dirname == NULL)
1372 return GDK_FAIL;
1373 BBPfarms[i].roles = rolemask;
1374 if ((rolemask & 1) == 0) {
1375 char *bbpdir;
1376 int j;
1377
1378 for (j = 0; j < i; j++)
1379 if (strcmp(BBPfarms[i].dirname,
1380 BBPfarms[j].dirname) == 0)
1381 return GDK_SUCCEED;
1382 /* if an extra farm, make sure we
1383 * don't find a BBP.dir there that
1384 * might belong to an existing
1385 * database */
1386 bbpdir = GDKfilepath(i, BATDIR, "BBP", "dir");
1387 if (bbpdir == NULL) {
1388 GDKerror("BBPaddfarm: malloc failed\n");
1389 return GDK_FAIL;
1390 }
1391 if (stat(bbpdir, &st) != -1 || errno != ENOENT) {
1392 GDKfree(bbpdir);
1393 GDKerror("BBPaddfarm: %s is a database\n", dirname);
1394 return GDK_FAIL;
1395 }
1396 GDKfree(bbpdir);
1397 bbpdir = GDKfilepath(i, BAKDIR, "BBP", "dir");
1398 if (bbpdir == NULL) {
1399 GDKerror("BBPaddfarm: malloc failed\n");
1400 return GDK_FAIL;
1401 }
1402 if (stat(bbpdir, &st) != -1 || errno != ENOENT) {
1403 GDKfree(bbpdir);
1404 GDKerror("BBPaddfarm: %s is a database\n", dirname);
1405 return GDK_FAIL;
1406 }
1407 GDKfree(bbpdir);
1408 }
1409 return GDK_SUCCEED;
1410 }
1411 }
1412 GDKerror("BBPaddfarm: too many farms\n");
1413 return GDK_FAIL;
1414}
1415
1416gdk_return
1417BBPinit(void)
1418{
1419 FILE *fp = NULL;
1420 struct stat st;
1421 unsigned bbpversion = 0;
1422 int i;
1423
1424 if (!GDKinmemory()) {
1425 str bbpdirstr, backupbbpdirstr;
1426
1427 if (!(bbpdirstr = GDKfilepath(0, BATDIR, "BBP", "dir"))) {
1428 GDKerror("BBPinit: GDKmalloc failed\n");
1429 return GDK_FAIL;
1430 }
1431
1432 if (!(backupbbpdirstr = GDKfilepath(0, BAKDIR, "BBP", "dir"))) {
1433 GDKfree(bbpdirstr);
1434 GDKerror("BBPinit: GDKmalloc failed\n");
1435 return GDK_FAIL;
1436 }
1437
1438 if (GDKremovedir(0, TEMPDIR) != GDK_SUCCEED) {
1439 GDKfree(bbpdirstr);
1440 GDKfree(backupbbpdirstr);
1441 GDKerror("BBPinit: cannot remove directory %s\n", TEMPDIR);
1442 return GDK_FAIL;
1443 }
1444
1445 if (GDKremovedir(0, DELDIR) != GDK_SUCCEED) {
1446 GDKfree(bbpdirstr);
1447 GDKfree(backupbbpdirstr);
1448 GDKerror("BBPinit: cannot remove directory %s\n", DELDIR);
1449 return GDK_FAIL;
1450 }
1451
1452 /* first move everything from SUBDIR to BAKDIR (its parent) */
1453 if (BBPrecover_subdir() != GDK_SUCCEED) {
1454 GDKfree(bbpdirstr);
1455 GDKfree(backupbbpdirstr);
1456 GDKerror("BBPinit: cannot properly recover_subdir process %s. Please check whether your disk is full or write-protected", SUBDIR);
1457 return GDK_FAIL;
1458 }
1459
1460 /* try to obtain a BBP.dir from bakdir */
1461 if (stat(backupbbpdirstr, &st) == 0) {
1462 /* backup exists; *must* use it */
1463 if (recover_dir(0, stat(bbpdirstr, &st) == 0) != GDK_SUCCEED)
1464 goto bailout;
1465 if ((fp = GDKfilelocate(0, "BBP", "r", "dir")) == NULL) {
1466 GDKfree(bbpdirstr);
1467 GDKfree(backupbbpdirstr);
1468 GDKerror("BBPinit: cannot open recovered BBP.dir.");
1469 return GDK_FAIL;
1470 }
1471 } else if ((fp = GDKfilelocate(0, "BBP", "r", "dir")) == NULL) {
1472 /* there was no BBP.dir either. Panic! try to use a
1473 * BBP.bak */
1474 if (stat(backupbbpdirstr, &st) < 0) {
1475 /* no BBP.bak (nor BBP.dir or BACKUP/BBP.dir):
1476 * create a new one */
1477 IODEBUG fprintf(stderr, "#BBPdir: initializing BBP.\n"); /* BBPdir instead of BBPinit for backward compatibility of error messages */
1478 if (BBPdir(0, NULL) != GDK_SUCCEED)
1479 goto bailout;
1480 } else if (GDKmove(0, BATDIR, "BBP", "bak", BATDIR, "BBP", "dir") == GDK_SUCCEED)
1481 IODEBUG fprintf(stderr, "#BBPinit: reverting to dir saved in BBP.bak.\n");
1482
1483 if ((fp = GDKfilelocate(0, "BBP", "r", "dir")) == NULL)
1484 goto bailout;
1485 }
1486 assert(fp != NULL);
1487 GDKfree(bbpdirstr);
1488 GDKfree(backupbbpdirstr);
1489 }
1490
1491 /* scan the BBP.dir to obtain current size */
1492 BBPlimit = 0;
1493 memset(BBP, 0, sizeof(BBP));
1494 ATOMIC_SET(&BBPsize, 1);
1495
1496 if (GDKinmemory()) {
1497 bbpversion = GDKLIBRARY;
1498 } else {
1499 bbpversion = BBPheader(fp);
1500 if (bbpversion == 0)
1501 return GDK_FAIL;
1502 }
1503
1504 BBPextend(0, false); /* allocate BBP records */
1505
1506 if (!GDKinmemory()) {
1507 ATOMIC_SET(&BBPsize, 1);
1508 if (BBPreadEntries(fp, bbpversion) != GDK_SUCCEED)
1509 return GDK_FAIL;
1510 fclose(fp);
1511 }
1512
1513 if (BBPinithash(0) != GDK_SUCCEED) {
1514 GDKerror("BBPinit: BBPinithash failed");
1515 return GDK_FAIL;
1516 }
1517
1518 /* will call BBPrecover if needed */
1519 if (!GDKinmemory() && BBPprepare(false) != GDK_SUCCEED) {
1520 GDKerror("BBPinit: cannot properly prepare process %s. Please check whether your disk is full or write-protected", BAKDIR);
1521 return GDK_FAIL;
1522 }
1523
1524 if (BBPcheckbats() != GDK_SUCCEED)
1525 return GDK_FAIL;
1526
1527 /* cleanup any leftovers (must be done after BBPrecover) */
1528 for (i = 0; i < MAXFARMS && BBPfarms[i].dirname != NULL; i++) {
1529 int j;
1530 for (j = 0; j < i; j++) {
1531 /* don't clean a directory twice */
1532 if (BBPfarms[j].dirname &&
1533 strcmp(BBPfarms[i].dirname,
1534 BBPfarms[j].dirname) == 0)
1535 break;
1536 }
1537 if (j == i) {
1538 char *d = GDKfilepath(i, NULL, BATDIR, NULL);
1539 if (d == NULL) {
1540 GDKerror("BBPinit: malloc failed\n");
1541 return GDK_FAIL;
1542 }
1543 BBPdiskscan(d, strlen(d) - strlen(BATDIR));
1544 GDKfree(d);
1545 }
1546 }
1547
1548#ifdef GDKLIBRARY_NIL_NAN
1549 if (bbpversion <= GDKLIBRARY_NIL_NAN)
1550 if (fixfloatbats() != GDK_SUCCEED)
1551 return GDK_FAIL;
1552#endif
1553#ifdef GDKLIBRARY_OLDDATE
1554 if (bbpversion <= GDKLIBRARY_OLDDATE)
1555 if (fixdatebats() != GDK_SUCCEED)
1556 return GDK_FAIL;
1557#endif
1558 if (bbpversion < GDKLIBRARY)
1559 TMcommit();
1560 return GDK_SUCCEED;
1561
1562 bailout:
1563 /* now it is time for real panic */
1564 GDKerror("BBPinit: could not write %s%cBBP.dir. Please check whether your disk is full or write-protected", BATDIR, DIR_SEP);
1565 return GDK_FAIL;
1566}
1567
1568/*
1569 * During the exit phase all non-persistent BATs are removed. Upon
1570 * exit the status of the BBP tables is saved on disk. This function
1571 * is called once and during the shutdown of the server. Since
1572 * shutdown may be issued from any thread (dangerous) it may lead to
1573 * interference in a parallel session.
1574 */
1575
1576static int backup_files = 0, backup_dir = 0, backup_subdir = 0;
1577
1578void
1579BBPexit(void)
1580{
1581 bat i;
1582 bool skipped;
1583
1584 BBPlock(); /* stop all threads ever touching more descriptors */
1585
1586 /* free all memory (just for leak-checking in Purify) */
1587 do {
1588 skipped = false;
1589 for (i = 0; i < (bat) ATOMIC_GET(&BBPsize); i++) {
1590 if (BBPvalid(i)) {
1591 BAT *b = BBP_desc(i);
1592
1593 if (b) {
1594 if (b->batSharecnt > 0) {
1595 skipped = true;
1596 continue;
1597 }
1598 if (isVIEW(b)) {
1599 /* "manually"
1600 * decrement parent
1601 * references, since
1602 * VIEWdestroy doesn't
1603 * (and can't here due
1604 * to locks) do it */
1605 bat tp = VIEWtparent(b);
1606 bat vtp = VIEWvtparent(b);
1607 if (tp) {
1608 BBP_desc(tp)->batSharecnt--;
1609 --BBP_lrefs(tp);
1610 }
1611 if (vtp) {
1612 BBP_desc(vtp)->batSharecnt--;
1613 --BBP_lrefs(vtp);
1614 }
1615 VIEWdestroy(b);
1616 } else {
1617 BATfree(b);
1618 }
1619 }
1620 BBPuncacheit(i, true);
1621 if (BBP_logical(i) != BBP_bak(i))
1622 GDKfree(BBP_logical(i));
1623 BBP_logical(i) = NULL;
1624 }
1625 }
1626 } while (skipped);
1627 GDKfree(BBP_hash);
1628 BBP_hash = 0;
1629 // these need to be NULL, otherwise no new ones get created
1630 backup_files = 0;
1631 backup_dir = 0;
1632 backup_subdir = 0;
1633
1634}
1635
1636/*
1637 * The routine BBPdir creates the BAT pool dictionary file. It
1638 * includes some information about the current state of affair in the
1639 * pool. The location in the buffer pool is saved for later use as
1640 * well. This is merely done for ease of debugging and of no
1641 * importance to front-ends. The tail of non-used entries is
1642 * reclaimed as well.
1643 */
1644static inline int
1645heap_entry(FILE *fp, BAT *b)
1646{
1647 return fprintf(fp, " %s %d %d %d " BUNFMT " " BUNFMT " " BUNFMT " "
1648 BUNFMT " " OIDFMT " %zu %zu %d",
1649 b->ttype >= 0 ? BATatoms[b->ttype].name : ATOMunknown_name(b->ttype),
1650 b->twidth,
1651 b->tvarsized | (b->tvheap ? b->tvheap->hashash << 1 : 0),
1652 (unsigned short) b->tsorted |
1653 ((unsigned short) b->trevsorted << 7) |
1654 (((unsigned short) b->tkey & 0x01) << 8) |
1655 ((unsigned short) BATtdense(b) << 9) |
1656 ((unsigned short) b->tnonil << 10) |
1657 ((unsigned short) b->tnil << 11),
1658 b->tnokey[0],
1659 b->tnokey[1],
1660 b->tnosorted,
1661 b->tnorevsorted,
1662 b->tseqbase,
1663 b->theap.free,
1664 b->theap.size,
1665 (int) b->theap.newstorage);
1666}
1667
1668static inline int
1669vheap_entry(FILE *fp, Heap *h)
1670{
1671 if (h == NULL)
1672 return 0;
1673 return fprintf(fp, " %zu %zu %d",
1674 h->free, h->size, (int) h->newstorage);
1675}
1676
1677static gdk_return
1678new_bbpentry(FILE *fp, bat i, const char *prefix)
1679{
1680#ifndef NDEBUG
1681 assert(i > 0);
1682 assert(i < (bat) ATOMIC_GET(&BBPsize));
1683 assert(BBP_desc(i));
1684 assert(BBP_desc(i)->batCacheid == i);
1685 assert(BBP_desc(i)->batRole == PERSISTENT);
1686 assert(0 <= BBP_desc(i)->theap.farmid && BBP_desc(i)->theap.farmid < MAXFARMS);
1687 assert(BBPfarms[BBP_desc(i)->theap.farmid].roles & (1 << PERSISTENT));
1688 if (BBP_desc(i)->tvheap) {
1689 assert(0 <= BBP_desc(i)->tvheap->farmid && BBP_desc(i)->tvheap->farmid < MAXFARMS);
1690 assert(BBPfarms[BBP_desc(i)->tvheap->farmid].roles & (1 << PERSISTENT));
1691 }
1692#endif
1693
1694 if (fprintf(fp, "%s%zd %u %s %s %d " BUNFMT " "
1695 BUNFMT " " OIDFMT, prefix,
1696 /* BAT info */
1697 (ssize_t) i,
1698 BBP_status(i) & BBPPERSISTENT,
1699 BBP_logical(i),
1700 BBP_physical(i),
1701 BBP_desc(i)->batRestricted << 1,
1702 BBP_desc(i)->batCount,
1703 BBP_desc(i)->batCapacity,
1704 BBP_desc(i)->hseqbase) < 0 ||
1705 heap_entry(fp, BBP_desc(i)) < 0 ||
1706 vheap_entry(fp, BBP_desc(i)->tvheap) < 0 ||
1707 (BBP_options(i) &&
1708 fprintf(fp, " %s", BBP_options(i)) < 0) ||
1709 fprintf(fp, "\n") < 0) {
1710 GDKsyserror("new_bbpentry: Writing BBP.dir entry failed\n");
1711 return GDK_FAIL;
1712 }
1713
1714 return GDK_SUCCEED;
1715}
1716
1717static gdk_return
1718BBPdir_header(FILE *f, int n)
1719{
1720 if (fprintf(f, "BBP.dir, GDKversion %u\n%d %d %d\nBBPsize=%d\n",
1721 GDKLIBRARY, SIZEOF_SIZE_T, SIZEOF_OID,
1722#ifdef HAVE_HGE
1723 havehge ? SIZEOF_HGE :
1724#endif
1725 SIZEOF_LNG, n) < 0 ||
1726 ferror(f)) {
1727 GDKsyserror("BBPdir_header: Writing BBP.dir header failed\n");
1728 return GDK_FAIL;
1729 }
1730 return GDK_SUCCEED;
1731}
1732
1733static gdk_return
1734BBPdir_subcommit(int cnt, bat *subcommit)
1735{
1736 FILE *obbpf, *nbbpf;
1737 bat j = 1;
1738 char buf[3000];
1739 int n;
1740
1741#ifndef NDEBUG
1742 assert(subcommit != NULL);
1743 for (n = 2; n < cnt; n++)
1744 assert(subcommit[n - 1] < subcommit[n]);
1745#endif
1746
1747 if ((nbbpf = GDKfilelocate(0, "BBP", "w", "dir")) == NULL)
1748 return GDK_FAIL;
1749
1750 n = (bat) ATOMIC_GET(&BBPsize);
1751
1752 /* we need to copy the backup BBP.dir to the new, but
1753 * replacing the entries for the subcommitted bats */
1754 if ((obbpf = GDKfileopen(0, SUBDIR, "BBP", "dir", "r")) == NULL &&
1755 (obbpf = GDKfileopen(0, BAKDIR, "BBP", "dir", "r")) == NULL) {
1756 GDKerror("BBPdir: subcommit attempted without backup BBP.dir.");
1757 return GDK_FAIL;
1758 }
1759 /* read first three lines */
1760 if (fgets(buf, sizeof(buf), obbpf) == NULL || /* BBP.dir, GDKversion %d */
1761 fgets(buf, sizeof(buf), obbpf) == NULL || /* SIZEOF_SIZE_T SIZEOF_OID SIZEOF_MAX_INT */
1762 fgets(buf, sizeof(buf), obbpf) == NULL) { /* BBPsize=%d */
1763 GDKerror("BBPdir: subcommit attempted with invalid backup BBP.dir.");
1764 return GDK_FAIL;
1765 }
1766 /* third line contains BBPsize */
1767 sscanf(buf, "BBPsize=%d", &n);
1768 if (n < (bat) ATOMIC_GET(&BBPsize))
1769 n = (bat) ATOMIC_GET(&BBPsize);
1770
1771 IODEBUG fprintf(stderr, "#BBPdir: writing BBP.dir (%d bats).\n", n);
1772
1773 if (BBPdir_header(nbbpf, n) != GDK_SUCCEED) {
1774 goto bailout;
1775 }
1776 n = 0;
1777 for (;;) {
1778 /* but for subcommits, all except the bats in the list
1779 * retain their existing mode */
1780 if (n == 0 && obbpf != NULL) {
1781 if (fgets(buf, sizeof(buf), obbpf) == NULL) {
1782 fclose(obbpf);
1783 obbpf = NULL;
1784 } else if (sscanf(buf, "%d", &n) != 1 || n <= 0) {
1785 GDKerror("BBPdir: subcommit attempted with invalid backup BBP.dir.");
1786 return GDK_FAIL;
1787 }
1788 /* at this point, obbpf == NULL, or n > 0 */
1789 }
1790 if (j == cnt && n == 0) {
1791 assert(obbpf == NULL);
1792 break;
1793 }
1794 if (j < cnt && (n == 0 || subcommit[j] <= n || obbpf == NULL)) {
1795 bat i = subcommit[j];
1796 /* BBP.dir consists of all persistent bats only */
1797 if (BBP_status(i) & BBPPERSISTENT) {
1798 if (new_bbpentry(nbbpf, i, "") != GDK_SUCCEED) {
1799 goto bailout;
1800 }
1801 IODEBUG new_bbpentry(stderr, i, "#");
1802 }
1803 if (i == n)
1804 n = 0; /* read new entry (i.e. skip this one from old BBP.dir */
1805 do
1806 /* go to next, skipping duplicates */
1807 j++;
1808 while (j < cnt && subcommit[j] == i);
1809 } else {
1810 if (fprintf(nbbpf, "%s", buf) < 0) {
1811 GDKsyserror("BBPdir_subcommit: Copying BBP.dir entry failed\n");
1812 goto bailout;
1813 }
1814 IODEBUG fprintf(stderr, "#%s", buf);
1815 n = 0;
1816 }
1817 }
1818
1819 if (fflush(nbbpf) == EOF ||
1820 (!(GDKdebug & NOSYNCMASK)
1821#if defined(NATIVE_WIN32)
1822 && _commit(_fileno(nbbpf)) < 0
1823#elif defined(HAVE_FDATASYNC)
1824 && fdatasync(fileno(nbbpf)) < 0
1825#elif defined(HAVE_FSYNC)
1826 && fsync(fileno(nbbpf)) < 0
1827#endif
1828 )) {
1829 GDKsyserror("BBPdir_subcommit: Syncing BBP.dir file failed\n");
1830 goto bailout;
1831 }
1832 if (fclose(nbbpf) == EOF) {
1833 GDKsyserror("BBPdir_subcommit: Closing BBP.dir file failed\n");
1834 goto bailout;
1835 }
1836
1837 IODEBUG fprintf(stderr, "#BBPdir end\n");
1838
1839 return GDK_SUCCEED;
1840
1841 bailout:
1842 if (obbpf != NULL)
1843 fclose(obbpf);
1844 if (nbbpf != NULL)
1845 fclose(nbbpf);
1846 return GDK_FAIL;
1847}
1848
1849gdk_return
1850BBPdir(int cnt, bat *subcommit)
1851{
1852 FILE *fp;
1853 bat i;
1854
1855 if (subcommit)
1856 return BBPdir_subcommit(cnt, subcommit);
1857
1858 IODEBUG fprintf(stderr, "#BBPdir: writing BBP.dir (%d bats).\n", (int) (bat) ATOMIC_GET(&BBPsize));
1859 if ((fp = GDKfilelocate(0, "BBP", "w", "dir")) == NULL) {
1860 goto bailout;
1861 }
1862
1863 if (BBPdir_header(fp, (bat) ATOMIC_GET(&BBPsize)) != GDK_SUCCEED) {
1864 goto bailout;
1865 }
1866
1867 for (i = 1; i < (bat) ATOMIC_GET(&BBPsize); i++) {
1868 /* write the entry
1869 * BBP.dir consists of all persistent bats */
1870 if (BBP_status(i) & BBPPERSISTENT) {
1871 if (new_bbpentry(fp, i, "") != GDK_SUCCEED) {
1872 goto bailout;
1873 }
1874 IODEBUG new_bbpentry(stderr, i, "#");
1875 }
1876 }
1877
1878 if (fflush(fp) == EOF ||
1879 (!(GDKdebug & NOSYNCMASK)
1880#if defined(NATIVE_WIN32)
1881 && _commit(_fileno(fp)) < 0
1882#elif defined(HAVE_FDATASYNC)
1883 && fdatasync(fileno(fp)) < 0
1884#elif defined(HAVE_FSYNC)
1885 && fsync(fileno(fp)) < 0
1886#endif
1887 )) {
1888 GDKsyserror("BBPdir: Syncing BBP.dir file failed\n");
1889 goto bailout;
1890 }
1891 if (fclose(fp) == EOF) {
1892 GDKsyserror("BBPdir: Closing BBP.dir file failed\n");
1893 return GDK_FAIL;
1894 }
1895
1896 IODEBUG fprintf(stderr, "#BBPdir end\n");
1897
1898 if (i < (bat) ATOMIC_GET(&BBPsize))
1899 return GDK_FAIL;
1900
1901 return GDK_SUCCEED;
1902
1903 bailout:
1904 if (fp != NULL)
1905 fclose(fp);
1906 return GDK_FAIL;
1907}
1908
1909/* function used for debugging */
1910void
1911BBPdump(void)
1912{
1913 bat i;
1914 size_t mem = 0, vm = 0;
1915 size_t cmem = 0, cvm = 0;
1916 int n = 0, nc = 0;
1917
1918 for (i = 0; i < (bat) ATOMIC_GET(&BBPsize); i++) {
1919 BAT *b = BBP_cache(i);
1920 if (b == NULL)
1921 continue;
1922 fprintf(stderr,
1923 "# %d[%s]: nme='%s' refs=%d lrefs=%d "
1924 "status=%u count=" BUNFMT,
1925 i,
1926 ATOMname(b->ttype),
1927 BBP_logical(i) ? BBP_logical(i) : "<NULL>",
1928 BBP_refs(i),
1929 BBP_lrefs(i),
1930 BBP_status(i),
1931 b->batCount);
1932 if (b->batSharecnt > 0)
1933 fprintf(stderr, " shares=%d", b->batSharecnt);
1934 if (b->batDirtydesc)
1935 fprintf(stderr, " DirtyDesc");
1936 if (b->theap.parentid) {
1937 fprintf(stderr, " Theap -> %d", b->theap.parentid);
1938 } else {
1939 fprintf(stderr,
1940 " Theap=[%zu,%zu]%s",
1941 HEAPmemsize(&b->theap),
1942 HEAPvmsize(&b->theap),
1943 b->theap.dirty ? "(Dirty)" : "");
1944 if (BBP_logical(i) && BBP_logical(i)[0] == '.') {
1945 cmem += HEAPmemsize(&b->theap);
1946 cvm += HEAPvmsize(&b->theap);
1947 nc++;
1948 } else {
1949 mem += HEAPmemsize(&b->theap);
1950 vm += HEAPvmsize(&b->theap);
1951 n++;
1952 }
1953 }
1954 if (b->tvheap) {
1955 if (b->tvheap->parentid != b->batCacheid) {
1956 fprintf(stderr,
1957 " Tvheap -> %d",
1958 b->tvheap->parentid);
1959 } else {
1960 fprintf(stderr,
1961 " Tvheap=[%zu,%zu]%s",
1962 HEAPmemsize(b->tvheap),
1963 HEAPvmsize(b->tvheap),
1964 b->tvheap->dirty ? "(Dirty)" : "");
1965 if (BBP_logical(i) && BBP_logical(i)[0] == '.') {
1966 cmem += HEAPmemsize(b->tvheap);
1967 cvm += HEAPvmsize(b->tvheap);
1968 } else {
1969 mem += HEAPmemsize(b->tvheap);
1970 vm += HEAPvmsize(b->tvheap);
1971 }
1972 }
1973 }
1974 if (b->thash && b->thash != (Hash *) 1) {
1975 fprintf(stderr,
1976 " Thash=[%zu,%zu]",
1977 HEAPmemsize(&b->thash->heap),
1978 HEAPvmsize(&b->thash->heap));
1979 if (BBP_logical(i) && BBP_logical(i)[0] == '.') {
1980 cmem += HEAPmemsize(&b->thash->heap);
1981 cvm += HEAPvmsize(&b->thash->heap);
1982 } else {
1983 mem += HEAPmemsize(&b->thash->heap);
1984 vm += HEAPvmsize(&b->thash->heap);
1985 }
1986 }
1987 fprintf(stderr, " role: %s, persistence: %s\n",
1988 b->batRole == PERSISTENT ? "persistent" : "transient",
1989 b->batTransient ? "transient" : "persistent");
1990 }
1991 fprintf(stderr,
1992 "# %d bats: mem=%zu, vm=%zu %d cached bats: mem=%zu, vm=%zu\n",
1993 n, mem, vm, nc, cmem, cvm);
1994 fflush(stderr);
1995}
1996
1997/*
1998 * @+ BBP Readonly Interface
1999 *
2000 * These interface functions do not change the BBP tables. If they
2001 * only access one specific BAT, the caller must have ensured that no
2002 * other thread is modifying that BAT, therefore such functions do not
2003 * need locking.
2004 *
2005 * BBP index lookup by BAT name:
2006 */
2007static inline bat
2008BBP_find(const char *nme, bool lock)
2009{
2010 bat i = BBPnamecheck(nme);
2011
2012 if (i != 0) {
2013 /* for tmp_X BATs, we already know X */
2014 const char *s;
2015
2016 if (i >= (bat) ATOMIC_GET(&BBPsize) || (s = BBP_logical(i)) == NULL || strcmp(s, nme)) {
2017 i = 0;
2018 }
2019 } else if (*nme != '.') {
2020 /* must lock since hash-lookup traverses other BATs */
2021 if (lock)
2022 MT_lock_set(&GDKnameLock);
2023 for (i = BBP_hash[strHash(nme) & BBP_mask]; i; i = BBP_next(i)) {
2024 if (strcmp(BBP_logical(i), nme) == 0)
2025 break;
2026 }
2027 if (lock)
2028 MT_lock_unset(&GDKnameLock);
2029 }
2030 return i;
2031}
2032
2033bat
2034BBPindex(const char *nme)
2035{
2036 return BBP_find(nme, true);
2037}
2038
2039BAT *
2040BBPgetdesc(bat i)
2041{
2042 if (is_bat_nil(i))
2043 return NULL;
2044 if (i < 0)
2045 i = -i;
2046 if (i != 0 && i < (bat) ATOMIC_GET(&BBPsize) && i && BBP_logical(i)) {
2047 return BBP_desc(i);
2048 }
2049 return NULL;
2050}
2051
2052/*
2053 * @+ BBP Update Interface
2054 * Operations to insert, delete, clear, and modify BBP entries.
2055 * Our policy for the BBP is to provide unlocked BBP access for
2056 * speed, but still write operations have to be locked.
2057 * #ifdef DEBUG_THREADLOCAL_BATS
2058 * Create the shadow version (reversed) of a bat.
2059 *
2060 * An existing BAT is inserted into the BBP
2061 */
2062static inline str
2063BBPsubdir_recursive(str s, bat i)
2064{
2065 i >>= 6;
2066 if (i >= 0100) {
2067 s = BBPsubdir_recursive(s, i);
2068 *s++ = DIR_SEP;
2069 }
2070 i &= 077;
2071 *s++ = '0' + (i >> 3);
2072 *s++ = '0' + (i & 7);
2073 return s;
2074}
2075
2076static inline void
2077BBPgetsubdir(str s, bat i)
2078{
2079 if (i >= 0100) {
2080 s = BBPsubdir_recursive(s, i);
2081 }
2082 *s = 0;
2083}
2084
2085/* There are BBP_THREADMASK+1 (64) free lists, and ours (idx) is
2086 * empty. Here we find a longish free list (at least 20 entries), and
2087 * if we can find one, we take one entry from that list. If no long
2088 * enough list can be found, we create a new entry by either just
2089 * increasing BBPsize (up to BBPlimit) or extending the BBP (which
2090 * increases BBPlimit). Every time this function is called we start
2091 * searching in a following free list (variable "last"). */
2092static gdk_return
2093maybeextend(int idx)
2094{
2095 int t, m;
2096 int n, l;
2097 bat i;
2098 static int last = 0;
2099
2100 l = 0; /* length of longest list */
2101 m = 0; /* index of longest list */
2102 /* find a longish free list */
2103 for (t = 0; t <= BBP_THREADMASK && l <= 20; t++) {
2104 n = 0;
2105 for (i = BBP_free((t + last) & BBP_THREADMASK);
2106 i != 0 && n <= 20;
2107 i = BBP_next(i))
2108 n++;
2109 if (n > l) {
2110 m = (t + last) & BBP_THREADMASK;
2111 l = n;
2112 }
2113 }
2114 if (l > 20) {
2115 /* list is long enough, get an entry from there */
2116 i = BBP_free(m);
2117 BBP_free(m) = BBP_next(i);
2118 BBP_next(i) = 0;
2119 BBP_free(idx) = i;
2120 } else {
2121 /* let the longest list alone, get a fresh entry */
2122 if ((bat) ATOMIC_ADD(&BBPsize, 1) >= BBPlimit) {
2123 if (BBPextend(idx, true) != GDK_SUCCEED) {
2124 /* undo add */
2125 ATOMIC_SUB(&BBPsize, 1);
2126 /* couldn't extend; if there is any
2127 * free entry, take it from the
2128 * longest list after all */
2129 if (l > 0) {
2130 i = BBP_free(m);
2131 BBP_free(m) = BBP_next(i);
2132 BBP_next(i) = 0;
2133 BBP_free(idx) = i;
2134 } else {
2135 /* nothing available */
2136 return GDK_FAIL;
2137 }
2138 }
2139 } else {
2140 BBP_free(idx) = (bat) ATOMIC_GET(&BBPsize) - 1;
2141 }
2142 }
2143 last = (last + 1) & BBP_THREADMASK;
2144 return GDK_SUCCEED;
2145}
2146
2147/* return new BAT id (> 0); return 0 on failure */
2148bat
2149BBPinsert(BAT *bn)
2150{
2151 MT_Id pid = MT_getpid();
2152 bool lock = locked_by == 0 || locked_by != pid;
2153 char dirname[24];
2154 bat i;
2155 int idx = threadmask(pid), len = 0;
2156
2157 /* critical section: get a new BBP entry */
2158 if (lock) {
2159 MT_lock_set(&GDKtrimLock(idx));
2160 MT_lock_set(&GDKcacheLock(idx));
2161 }
2162
2163 /* find an empty slot */
2164 if (BBP_free(idx) <= 0) {
2165 /* we need to extend the BBP */
2166 gdk_return r = GDK_SUCCEED;
2167 if (lock) {
2168 /* we must take all locks in a consistent
2169 * order so first unset the one we've already
2170 * got */
2171 MT_lock_unset(&GDKcacheLock(idx));
2172 for (i = 0; i <= BBP_THREADMASK; i++)
2173 MT_lock_set(&GDKcacheLock(i));
2174 }
2175 MT_lock_set(&GDKnameLock);
2176 /* check again in case some other thread extended
2177 * while we were waiting */
2178 if (BBP_free(idx) <= 0) {
2179 r = maybeextend(idx);
2180 }
2181 MT_lock_unset(&GDKnameLock);
2182 if (lock)
2183 for (i = BBP_THREADMASK; i >= 0; i--)
2184 if (i != idx)
2185 MT_lock_unset(&GDKcacheLock(i));
2186 if (r != GDK_SUCCEED) {
2187 if (lock) {
2188 MT_lock_unset(&GDKcacheLock(idx));
2189 MT_lock_unset(&GDKtrimLock(idx));
2190 }
2191 return 0;
2192 }
2193 }
2194 i = BBP_free(idx);
2195 assert(i > 0);
2196 BBP_free(idx) = BBP_next(i);
2197
2198 if (lock) {
2199 MT_lock_unset(&GDKcacheLock(idx));
2200 MT_lock_unset(&GDKtrimLock(idx));
2201 }
2202 /* rest of the work outside the lock */
2203
2204 /* fill in basic BBP fields for the new bat */
2205
2206 bn->batCacheid = i;
2207 bn->creator_tid = MT_getpid();
2208
2209 BBP_status_set(i, BBPDELETING, "BBPinsert");
2210 BBP_cache(i) = NULL;
2211 BBP_desc(i) = NULL;
2212 BBP_refs(i) = 1; /* new bats have 1 pin */
2213 BBP_lrefs(i) = 0; /* ie. no logical refs */
2214
2215#ifdef HAVE_HGE
2216 if (bn->ttype == TYPE_hge)
2217 havehge = true;
2218#endif
2219
2220 if (*BBP_bak(i) == 0)
2221 len = snprintf(BBP_bak(i), sizeof(BBP_bak(i)), "tmp_%o", (unsigned) i);
2222 if (len == -1 || len >= FILENAME_MAX)
2223 return 0;
2224 BBP_logical(i) = BBP_bak(i);
2225
2226 /* Keep the physical location around forever */
2227 if (!GDKinmemory() && *BBP_physical(i) == 0) {
2228 BBPgetsubdir(dirname, i);
2229
2230 if (*dirname) /* i.e., i >= 0100 */
2231 len = snprintf(BBP_physical(i), sizeof(BBP_physical(i)),
2232 "%s%c%o", dirname, DIR_SEP, (unsigned) i);
2233 else
2234 len = snprintf(BBP_physical(i), sizeof(BBP_physical(i)),
2235 "%o", (unsigned) i);
2236 if (len == -1 || len >= FILENAME_MAX)
2237 return 0;
2238
2239 BATDEBUG fprintf(stderr, "#%d = new %s(%s)\n", (int) i, BBPname(i), ATOMname(bn->ttype));
2240 }
2241
2242 return i;
2243}
2244
2245gdk_return
2246BBPcacheit(BAT *bn, bool lock)
2247{
2248 bat i = bn->batCacheid;
2249 unsigned mode;
2250
2251 if (lock)
2252 lock = locked_by == 0 || locked_by != MT_getpid();
2253
2254 if (i) {
2255 assert(i > 0);
2256 } else {
2257 i = BBPinsert(bn); /* bat was not previously entered */
2258 if (i == 0)
2259 return GDK_FAIL;
2260 if (bn->tvheap)
2261 bn->tvheap->parentid = i;
2262 }
2263 assert(bn->batCacheid > 0);
2264
2265 if (lock)
2266 MT_lock_set(&GDKswapLock(i));
2267 mode = (BBP_status(i) | BBPLOADED) & ~(BBPLOADING | BBPDELETING);
2268 BBP_status_set(i, mode, "BBPcacheit");
2269 BBP_desc(i) = bn;
2270
2271 /* cache it! */
2272 BBP_cache(i) = bn;
2273
2274 if (lock)
2275 MT_lock_unset(&GDKswapLock(i));
2276 return GDK_SUCCEED;
2277}
2278
2279/*
2280 * BBPuncacheit changes the BBP status to swapped out. Currently only
2281 * used in BBPfree (bat swapped out) and BBPclear (bat destroyed
2282 * forever).
2283 */
2284
2285static void
2286BBPuncacheit(bat i, bool unloaddesc)
2287{
2288 if (i < 0)
2289 i = -i;
2290 if (BBPcheck(i, "BBPuncacheit")) {
2291 BAT *b = BBP_desc(i);
2292
2293 if (b) {
2294 if (BBP_cache(i)) {
2295 BATDEBUG fprintf(stderr, "#uncache %d (%s)\n", (int) i, BBPname(i));
2296
2297 BBP_cache(i) = NULL;
2298
2299 /* clearing bits can be done without the lock */
2300 BBP_status_off(i, BBPLOADED, "BBPuncacheit");
2301 }
2302 if (unloaddesc) {
2303 BBP_desc(i) = NULL;
2304 BATdestroy(b);
2305 }
2306 }
2307 }
2308}
2309
2310/*
2311 * @- BBPclear
2312 * BBPclear removes a BAT from the BBP directory forever.
2313 */
2314static inline void
2315bbpclear(bat i, int idx, bool lock)
2316{
2317 BATDEBUG {
2318 fprintf(stderr, "#clear %d (%s)\n", (int) i, BBPname(i));
2319 }
2320 BBPuncacheit(i, true);
2321 BATDEBUG {
2322 fprintf(stderr, "#BBPclear set to unloading %d\n", i);
2323 }
2324 BBP_status_set(i, BBPUNLOADING, "BBPclear");
2325 BBP_refs(i) = 0;
2326 BBP_lrefs(i) = 0;
2327 if (lock)
2328 MT_lock_set(&GDKcacheLock(idx));
2329
2330 if (BBPtmpcheck(BBP_logical(i)) == 0) {
2331 MT_lock_set(&GDKnameLock);
2332 BBP_delete(i);
2333 MT_lock_unset(&GDKnameLock);
2334 }
2335 if (BBP_logical(i) != BBP_bak(i))
2336 GDKfree(BBP_logical(i));
2337 BBP_status_set(i, 0, "BBPclear");
2338 BBP_logical(i) = NULL;
2339 BBP_next(i) = BBP_free(idx);
2340 BBP_free(idx) = i;
2341 if (lock)
2342 MT_lock_unset(&GDKcacheLock(idx));
2343}
2344
2345void
2346BBPclear(bat i)
2347{
2348 MT_Id pid = MT_getpid();
2349 bool lock = locked_by == 0 || locked_by != pid;
2350
2351 if (BBPcheck(i, "BBPclear")) {
2352 bbpclear(i, threadmask(pid), lock);
2353 }
2354}
2355
2356/*
2357 * @- BBP rename
2358 *
2359 * Each BAT has a logical name that is globally unique. Its reverse
2360 * view can also be assigned a name, that also has to be globally
2361 * unique. The batId is the same as the logical BAT name.
2362 *
2363 * The default logical name of a BAT is tmp_X, where X is the
2364 * batCacheid. Apart from being globally unique, new logical bat
2365 * names cannot be of the form tmp_X, unless X is the batCacheid.
2366 *
2367 * Physical names consist of a directory name followed by a logical
2368 * name suffix. The directory name is derived from the batCacheid,
2369 * and is currently organized in a hierarchy that puts max 64 bats in
2370 * each directory (see BBPgetsubdir).
2371 *
2372 * Concerning the physical suffix: it is almost always bat_X. This
2373 * saves us a whole lot of trouble, as bat_X is always unique and no
2374 * conflicts can occur. Other suffixes are only supported in order
2375 * just for backward compatibility with old repositories (you won't
2376 * see them anymore in new repositories).
2377 */
2378int
2379BBPrename(bat bid, const char *nme)
2380{
2381 BAT *b = BBPdescriptor(bid);
2382 char dirname[24];
2383 bat tmpid = 0, i;
2384 int idx;
2385
2386 if (b == NULL)
2387 return 0;
2388
2389 /* If name stays same, do nothing */
2390 if (BBP_logical(bid) && strcmp(BBP_logical(bid), nme) == 0)
2391 return 0;
2392
2393 BBPgetsubdir(dirname, bid);
2394
2395 if ((tmpid = BBPnamecheck(nme)) && tmpid != bid) {
2396 GDKerror("BBPrename: illegal temporary name: '%s'\n", nme);
2397 return BBPRENAME_ILLEGAL;
2398 }
2399 if (strlen(dirname) + strLen(nme) + 1 >= IDLENGTH) {
2400 GDKerror("BBPrename: illegal temporary name: '%s'\n", nme);
2401 return BBPRENAME_LONG;
2402 }
2403 idx = threadmask(MT_getpid());
2404 MT_lock_set(&GDKtrimLock(idx));
2405 MT_lock_set(&GDKnameLock);
2406 i = BBP_find(nme, false);
2407 if (i != 0) {
2408 MT_lock_unset(&GDKnameLock);
2409 MT_lock_unset(&GDKtrimLock(idx));
2410 GDKerror("BBPrename: name is in use: '%s'.\n", nme);
2411 return BBPRENAME_ALREADY;
2412 }
2413
2414 /* carry through the name change */
2415 if (BBP_logical(bid) && BBPtmpcheck(BBP_logical(bid)) == 0) {
2416 BBP_delete(bid);
2417 }
2418 if (BBP_logical(bid) != BBP_bak(bid))
2419 GDKfree(BBP_logical(bid));
2420 BBP_logical(bid) = GDKstrdup(nme);
2421 if (tmpid == 0) {
2422 BBP_insert(bid);
2423 }
2424 b->batDirtydesc = true;
2425 if (!b->batTransient) {
2426 bool lock = locked_by == 0 || locked_by != MT_getpid();
2427
2428 if (lock)
2429 MT_lock_set(&GDKswapLock(i));
2430 BBP_status_on(bid, BBPRENAMED, "BBPrename");
2431 if (lock)
2432 MT_lock_unset(&GDKswapLock(i));
2433 }
2434 MT_lock_unset(&GDKnameLock);
2435 MT_lock_unset(&GDKtrimLock(idx));
2436 return 0;
2437}
2438
2439/*
2440 * @+ BBP swapping Policy
2441 * The BAT can be moved back to disk using the routine BBPfree. It
2442 * frees the storage for other BATs. After this call BAT* references
2443 * maintained for the BAT are wrong. We should keep track of dirty
2444 * unloaded BATs. They may have to be committed later on, which may
2445 * include reading them in again.
2446 *
2447 * BBPswappable: may this bat be unloaded? Only real bats without
2448 * memory references can be unloaded.
2449 */
2450static inline void
2451BBPspin(bat i, const char *s, unsigned event)
2452{
2453 if (BBPcheck(i, "BBPspin") && (BBP_status(i) & event)) {
2454 lng spin = LL_CONSTANT(0);
2455
2456 do {
2457 MT_sleep_ms(KITTENNAP);
2458 spin++;
2459 } while (BBP_status(i) & event);
2460 BATDEBUG fprintf(stderr, "#BBPspin(%d,%s,%u): " LLFMT " loops\n", (int) i, s, event, spin);
2461 }
2462}
2463
2464/* This function can fail if the input parameter (i) is incorrect
2465 * (unlikely), of if the bat is a view, this is a physical (not
2466 * logical) incref (i.e. called through BBPfix(), and it is the first
2467 * reference (refs was 0 and should become 1). It can fail in this
2468 * case if the parent bat cannot be loaded.
2469 * This means the return value of BBPfix should be checked in these
2470 * circumstances, but not necessarily in others. */
2471static inline int
2472incref(bat i, bool logical, bool lock)
2473{
2474 int refs;
2475 bat tp, tvp;
2476 BAT *b, *pb = NULL, *pvb = NULL;
2477 bool load = false;
2478
2479 if (!BBPcheck(i, logical ? "BBPretain" : "BBPfix"))
2480 return 0;
2481
2482 /* Before we get the lock and before we do all sorts of
2483 * things, make sure we can load the parent bats if there are
2484 * any. If we can't load them, we can still easily fail. If
2485 * this is indeed a view, but not the first physical
2486 * reference, getting the parent BAT descriptor is
2487 * superfluous, but not too expensive, so we do it anyway. */
2488 if (!logical && (b = BBP_desc(i)) != NULL) {
2489 if (b->theap.parentid) {
2490 pb = BATdescriptor(b->theap.parentid);
2491 if (pb == NULL)
2492 return 0;
2493 }
2494 if (b->tvheap && b->tvheap->parentid != i) {
2495 pvb = BATdescriptor(b->tvheap->parentid);
2496 if (pvb == NULL) {
2497 if (pb)
2498 BBPunfix(pb->batCacheid);
2499 return 0;
2500 }
2501 }
2502 }
2503
2504 if (lock) {
2505 for (;;) {
2506 MT_lock_set(&GDKswapLock(i));
2507 if (!(BBP_status(i) & (BBPUNSTABLE|BBPLOADING)))
2508 break;
2509 /* the BATs is "unstable", try again */
2510 MT_lock_unset(&GDKswapLock(i));
2511 MT_sleep_ms(KITTENNAP);
2512 }
2513 }
2514 /* we have the lock */
2515
2516 b = BBP_desc(i);
2517 if (b == NULL) {
2518 /* should not have happened */
2519 if (lock)
2520 MT_lock_unset(&GDKswapLock(i));
2521 return 0;
2522 }
2523
2524 assert(BBP_refs(i) + BBP_lrefs(i) ||
2525 BBP_status(i) & (BBPDELETED | BBPSWAPPED));
2526 if (logical) {
2527 /* parent BATs are not relevant for logical refs */
2528 tp = tvp = 0;
2529 refs = ++BBP_lrefs(i);
2530 } else {
2531 tp = b->theap.parentid;
2532 assert(tp >= 0);
2533 tvp = b->tvheap == 0 || b->tvheap->parentid == i ? 0 : b->tvheap->parentid;
2534 refs = ++BBP_refs(i);
2535 if (refs == 1 && (tp || tvp)) {
2536 /* If this is a view, we must load the parent
2537 * BATs, but we must do that outside of the
2538 * lock. Set the BBPLOADING flag so that
2539 * other threads will wait until we're
2540 * done. */
2541 BBP_status_on(i, BBPLOADING, "BBPfix");
2542 load = true;
2543 }
2544 }
2545 if (lock)
2546 MT_lock_unset(&GDKswapLock(i));
2547
2548 if (load) {
2549 /* load the parent BATs and set the heap base pointers
2550 * to the correct values */
2551 assert(!logical);
2552 if (tp) {
2553 assert(pb != NULL);
2554 b->theap.base = pb->theap.base + (size_t) b->theap.base;
2555 }
2556 /* done loading, release descriptor */
2557 BBP_status_off(i, BBPLOADING, "BBPfix");
2558 } else if (!logical) {
2559 /* this wasn't the first physical reference, so undo
2560 * the fixes on the parent bats */
2561 if (pb)
2562 BBPunfix(pb->batCacheid);
2563 if (pvb)
2564 BBPunfix(pvb->batCacheid);
2565 }
2566 return refs;
2567}
2568
2569/* see comment for incref */
2570int
2571BBPfix(bat i)
2572{
2573 bool lock = locked_by == 0 || locked_by != MT_getpid();
2574
2575 return incref(i, false, lock);
2576}
2577
2578int
2579BBPretain(bat i)
2580{
2581 bool lock = locked_by == 0 || locked_by != MT_getpid();
2582
2583 return incref(i, true, lock);
2584}
2585
2586void
2587BBPshare(bat parent)
2588{
2589 bool lock = locked_by == 0 || locked_by != MT_getpid();
2590
2591 assert(parent > 0);
2592 (void) incref(parent, true, lock);
2593 if (lock)
2594 MT_lock_set(&GDKswapLock(parent));
2595 ++BBP_cache(parent)->batSharecnt;
2596 assert(BBP_refs(parent) > 0);
2597 if (lock)
2598 MT_lock_unset(&GDKswapLock(parent));
2599 (void) incref(parent, false, lock);
2600}
2601
2602static inline int
2603decref(bat i, bool logical, bool releaseShare, bool lock, const char *func)
2604{
2605 int refs = 0;
2606 bool swap = false;
2607 bat tp = 0, tvp = 0;
2608 BAT *b;
2609
2610 assert(i > 0);
2611 if (lock)
2612 MT_lock_set(&GDKswapLock(i));
2613 if (releaseShare) {
2614 --BBP_desc(i)->batSharecnt;
2615 if (lock)
2616 MT_lock_unset(&GDKswapLock(i));
2617 return refs;
2618 }
2619
2620 while (BBP_status(i) & BBPUNLOADING) {
2621 if (lock)
2622 MT_lock_unset(&GDKswapLock(i));
2623 BBPspin(i, func, BBPUNLOADING);
2624 if (lock)
2625 MT_lock_set(&GDKswapLock(i));
2626 }
2627
2628 b = BBP_cache(i);
2629
2630 /* decrement references by one */
2631 if (logical) {
2632 if (BBP_lrefs(i) == 0) {
2633 GDKerror("%s: %s does not have logical references.\n", func, BBPname(i));
2634 assert(0);
2635 } else {
2636 refs = --BBP_lrefs(i);
2637 }
2638 } else {
2639 if (BBP_refs(i) == 0) {
2640 GDKerror("%s: %s does not have pointer fixes.\n", func, BBPname(i));
2641 assert(0);
2642 } else {
2643 assert(b == NULL || b->theap.parentid == 0 || BBP_refs(b->theap.parentid) > 0);
2644 assert(b == NULL || b->tvheap == NULL || b->tvheap->parentid == 0 || BBP_refs(b->tvheap->parentid) > 0);
2645 refs = --BBP_refs(i);
2646 if (b && refs == 0) {
2647 if ((tp = b->theap.parentid) != 0)
2648 b->theap.base = (char *) (b->theap.base - BBP_cache(tp)->theap.base);
2649 tvp = VIEWvtparent(b);
2650 }
2651 }
2652 }
2653
2654 /* we destroy transients asap and unload persistent bats only
2655 * if they have been made cold or are not dirty */
2656 if (BBP_refs(i) > 0 ||
2657 (BBP_lrefs(i) > 0 &&
2658 (b == NULL || BATdirty(b) || !(BBP_status(i) & BBPPERSISTENT) || GDKinmemory()))) {
2659 /* bat cannot be swapped out */
2660 } else if (b ? b->batSharecnt == 0 : (BBP_status(i) & BBPTMP)) {
2661 /* bat will be unloaded now. set the UNLOADING bit
2662 * while locked so no other thread thinks it's
2663 * available anymore */
2664 assert((BBP_status(i) & BBPUNLOADING) == 0);
2665 BATDEBUG {
2666 fprintf(stderr, "#%s set to unloading BAT %d\n", func, i);
2667 }
2668 BBP_status_on(i, BBPUNLOADING, func);
2669 swap = true;
2670 }
2671
2672 /* unlock before re-locking in unload; as saving a dirty
2673 * persistent bat may take a long time */
2674 if (lock)
2675 MT_lock_unset(&GDKswapLock(i));
2676
2677 if (swap && b != NULL) {
2678 if (BBP_lrefs(i) == 0 && (BBP_status(i) & BBPDELETED) == 0) {
2679 /* free memory (if loaded) and delete from
2680 * disk (if transient but saved) */
2681 BBPdestroy(b);
2682 } else {
2683 BATDEBUG {
2684 fprintf(stderr, "#%s unload and free bat %d\n", func, i);
2685 }
2686 /* free memory of transient */
2687 if (BBPfree(b, func) != GDK_SUCCEED)
2688 return -1; /* indicate failure */
2689 }
2690 }
2691 if (tp)
2692 decref(tp, false, false, lock, func);
2693 if (tvp)
2694 decref(tvp, false, false, lock, func);
2695 return refs;
2696}
2697
2698int
2699BBPunfix(bat i)
2700{
2701 if (BBPcheck(i, "BBPunfix") == 0) {
2702 return -1;
2703 }
2704 return decref(i, false, false, true, "BBPunfix");
2705}
2706
2707int
2708BBPrelease(bat i)
2709{
2710 if (BBPcheck(i, "BBPrelease") == 0) {
2711 return -1;
2712 }
2713 return decref(i, true, false, true, "BBPrelease");
2714}
2715
2716/*
2717 * M5 often changes the physical ref into a logical reference. This
2718 * state change consist of the sequence BBPretain(b);BBPunfix(b).
2719 * A faster solution is given below, because it does not trigger the
2720 * BBP management actions, such as garbage collecting the bats.
2721 * [first step, initiate code change]
2722 */
2723void
2724BBPkeepref(bat i)
2725{
2726 if (is_bat_nil(i))
2727 return;
2728 if (BBPcheck(i, "BBPkeepref")) {
2729 bool lock = locked_by == 0 || locked_by != MT_getpid();
2730 BAT *b;
2731
2732 if ((b = BBPdescriptor(i)) != NULL) {
2733 BATsettrivprop(b);
2734 if (GDKdebug & (CHECKMASK | PROPMASK))
2735 BATassertProps(b);
2736 }
2737
2738 incref(i, true, lock);
2739 assert(BBP_refs(i));
2740 decref(i, false, false, lock, "BBPkeepref");
2741 }
2742}
2743
2744static inline void
2745GDKunshare(bat parent)
2746{
2747 (void) decref(parent, false, true, true, "GDKunshare");
2748 (void) decref(parent, true, false, true, "GDKunshare");
2749}
2750
2751void
2752BBPunshare(bat parent)
2753{
2754 GDKunshare(parent);
2755}
2756
2757/*
2758 * BBPreclaim is a user-exported function; the common way to destroy a
2759 * BAT the hard way.
2760 *
2761 * Return values:
2762 * -1 = bat cannot be unloaded (it has more than your own memory fix)
2763 * 0 = unloaded successfully
2764 * 1 = unload failed (due to write-to-disk failure)
2765 */
2766int
2767BBPreclaim(BAT *b)
2768{
2769 bat i;
2770 bool lock = locked_by == 0 || locked_by != MT_getpid();
2771
2772 if (b == NULL)
2773 return -1;
2774 i = b->batCacheid;
2775
2776 assert(BBP_refs(i) == 1);
2777
2778 return decref(i, false, false, lock, "BBPreclaim") <0;
2779}
2780
2781/*
2782 * BBPdescriptor checks whether BAT needs loading and does so if
2783 * necessary. You must have at least one fix on the BAT before calling
2784 * this.
2785 */
2786static BAT *
2787getBBPdescriptor(bat i, bool lock)
2788{
2789 bool load = false;
2790 BAT *b = NULL;
2791
2792 assert(i > 0);
2793 if (!BBPcheck(i, "BBPdescriptor")) {
2794 return NULL;
2795 }
2796 assert(BBP_refs(i));
2797 if ((b = BBP_cache(i)) == NULL) {
2798
2799 if (lock)
2800 MT_lock_set(&GDKswapLock(i));
2801 while (BBP_status(i) & BBPWAITING) { /* wait for bat to be loaded by other thread */
2802 if (lock)
2803 MT_lock_unset(&GDKswapLock(i));
2804 MT_sleep_ms(KITTENNAP);
2805 if (lock)
2806 MT_lock_set(&GDKswapLock(i));
2807 }
2808 if (BBPvalid(i)) {
2809 b = BBP_cache(i);
2810 if (b == NULL) {
2811 load = true;
2812 BATDEBUG {
2813 fprintf(stderr, "#BBPdescriptor set to loading BAT %d\n", i);
2814 }
2815 BBP_status_on(i, BBPLOADING, "BBPdescriptor");
2816 }
2817 }
2818 if (lock)
2819 MT_lock_unset(&GDKswapLock(i));
2820 }
2821 if (load) {
2822 IODEBUG fprintf(stderr, "#load %s\n", BBPname(i));
2823
2824 b = BATload_intern(i, lock);
2825
2826 /* clearing bits can be done without the lock */
2827 BBP_status_off(i, BBPLOADING, "BBPdescriptor");
2828 CHECKDEBUG if (b != NULL)
2829 BATassertProps(b);
2830 }
2831 return b;
2832}
2833
2834BAT *
2835BBPdescriptor(bat i)
2836{
2837 bool lock = locked_by == 0 || locked_by != MT_getpid();
2838
2839 return getBBPdescriptor(i, lock);
2840}
2841
2842/*
2843 * In BBPsave executes unlocked; it just marks the BBP_status of the
2844 * BAT to BBPsaving, so others that want to save or unload this BAT
2845 * must spin lock on the BBP_status field.
2846 */
2847gdk_return
2848BBPsave(BAT *b)
2849{
2850 bool lock = locked_by == 0 || locked_by != MT_getpid();
2851 bat bid = b->batCacheid;
2852 gdk_return ret = GDK_SUCCEED;
2853
2854 if (BBP_lrefs(bid) == 0 || isVIEW(b) || !BATdirty(b))
2855 /* do nothing */
2856 return GDK_SUCCEED;
2857
2858 if (lock)
2859 MT_lock_set(&GDKswapLock(bid));
2860
2861 if (BBP_status(bid) & BBPSAVING) {
2862 /* wait until save in other thread completes */
2863 if (lock)
2864 MT_lock_unset(&GDKswapLock(bid));
2865 BBPspin(bid, "BBPsave", BBPSAVING);
2866 } else {
2867 /* save it */
2868 unsigned flags = BBPSAVING;
2869
2870 if (DELTAdirty(b)) {
2871 flags |= BBPSWAPPED;
2872 }
2873 if (b->batTransient) {
2874 flags |= BBPTMP;
2875 }
2876 BBP_status_on(bid, flags, "BBPsave");
2877 if (lock)
2878 MT_lock_unset(&GDKswapLock(bid));
2879
2880 IODEBUG fprintf(stderr, "#save %s\n", BATgetId(b));
2881
2882 /* do the time-consuming work unlocked */
2883 if (BBP_status(bid) & BBPEXISTING)
2884 ret = BBPbackup(b, false);
2885 if (ret == GDK_SUCCEED) {
2886 ret = BATsave(b);
2887 }
2888 /* clearing bits can be done without the lock */
2889 BBP_status_off(bid, BBPSAVING, "BBPsave");
2890 }
2891 return ret;
2892}
2893
2894/*
2895 * TODO merge BBPfree with BATfree? Its function is to prepare a BAT
2896 * for being unloaded (or even destroyed, if the BAT is not
2897 * persistent).
2898 */
2899static void
2900BBPdestroy(BAT *b)
2901{
2902 bat tp = b->theap.parentid;
2903 bat vtp = VIEWvtparent(b);
2904
2905 if (isVIEW(b)) { /* a physical view */
2906 VIEWdestroy(b);
2907 } else {
2908 /* bats that get destroyed must unfix their atoms */
2909 int (*tunfix) (const void *) = BATatoms[b->ttype].atomUnfix;
2910 BUN p, q;
2911 BATiter bi = bat_iterator(b);
2912
2913 assert(b->batSharecnt == 0);
2914 if (tunfix) {
2915 BATloop(b, p, q) {
2916 (*tunfix) (BUNtail(bi, p));
2917 }
2918 }
2919 BATdelete(b); /* handles persistent case also (file deletes) */
2920 }
2921 BBPclear(b->batCacheid); /* if destroyed; de-register from BBP */
2922
2923 /* parent released when completely done with child */
2924 if (tp)
2925 GDKunshare(tp);
2926 if (vtp)
2927 GDKunshare(vtp);
2928}
2929
2930static gdk_return
2931BBPfree(BAT *b, const char *calledFrom)
2932{
2933 bat bid = b->batCacheid, tp = VIEWtparent(b), vtp = VIEWvtparent(b);
2934 gdk_return ret;
2935
2936 assert(bid > 0);
2937 assert(BBPswappable(b));
2938 (void) calledFrom;
2939
2940 BBP_unload_inc();
2941 /* write dirty BATs before being unloaded */
2942 ret = BBPsave(b);
2943 if (ret == GDK_SUCCEED) {
2944 if (isVIEW(b)) { /* physical view */
2945 VIEWdestroy(b);
2946 } else {
2947 if (BBP_cache(bid))
2948 BATfree(b); /* free memory */
2949 }
2950 BBPuncacheit(bid, false);
2951 }
2952 /* clearing bits can be done without the lock */
2953 BATDEBUG {
2954 fprintf(stderr, "#BBPfree turn off unloading %d\n", bid);
2955 }
2956 BBP_status_off(bid, BBPUNLOADING, calledFrom);
2957 BBP_unload_dec();
2958
2959 /* parent released when completely done with child */
2960 if (ret == GDK_SUCCEED && tp)
2961 GDKunshare(tp);
2962 if (ret == GDK_SUCCEED && vtp)
2963 GDKunshare(vtp);
2964 return ret;
2965}
2966
2967/*
2968 * BBPquickdesc loads a BAT descriptor without loading the entire BAT,
2969 * of which the result be used only for a *limited* number of
2970 * purposes. Specifically, during the global sync/commit, we do not
2971 * want to load any BATs that are not already loaded, both because
2972 * this costs performance, and because getting into memory shortage
2973 * during a commit is extremely dangerous. Loading a BAT tends not to
2974 * be required, since the commit actions mostly involve moving some
2975 * pointers in the BAT descriptor. However, some column types do
2976 * require loading the full bat. This is tested by the complexatom()
2977 * routine. Such columns are those of which the type has a fix/unfix
2978 * method, or those that have HeapDelete methods. The HeapDelete
2979 * actions are not always required and therefore the BBPquickdesc is
2980 * parametrized.
2981 */
2982static bool
2983complexatom(int t, bool delaccess)
2984{
2985 if (t >= 0 && (BATatoms[t].atomFix || (delaccess && BATatoms[t].atomDel))) {
2986 return true;
2987 }
2988 return false;
2989}
2990
2991BAT *
2992BBPquickdesc(bat bid, bool delaccess)
2993{
2994 BAT *b;
2995
2996 if (is_bat_nil(bid))
2997 return NULL;
2998 if (bid < 0) {
2999 GDKerror("BBPquickdesc: called with negative batid.\n");
3000 assert(0);
3001 return NULL;
3002 }
3003 if ((b = BBP_cache(bid)) != NULL)
3004 return b; /* already cached */
3005 b = (BAT *) BBPgetdesc(bid);
3006 if (b == NULL ||
3007 complexatom(b->ttype, delaccess)) {
3008 b = BATload_intern(bid, true);
3009 }
3010 return b;
3011}
3012
3013/*
3014 * @+ Global Commit
3015 */
3016static BAT *
3017dirty_bat(bat *i, bool subcommit)
3018{
3019 if (BBPvalid(*i)) {
3020 BAT *b;
3021 BBPspin(*i, "dirty_bat", BBPSAVING);
3022 b = BBP_cache(*i);
3023 if (b != NULL) {
3024 if ((BBP_status(*i) & BBPNEW) &&
3025 BATcheckmodes(b, false) != GDK_SUCCEED) /* check mmap modes */
3026 *i = 0; /* error */
3027 if ((BBP_status(*i) & BBPPERSISTENT) &&
3028 (subcommit || BATdirty(b)))
3029 return b; /* the bat is loaded, persistent and dirty */
3030 } else if (BBP_status(*i) & BBPSWAPPED) {
3031 b = (BAT *) BBPquickdesc(*i, true);
3032 if (b && (subcommit || b->batDirtydesc))
3033 return b; /* only the desc is loaded & dirty */
3034 }
3035 }
3036 return NULL;
3037}
3038
3039/*
3040 * @- backup-bat
3041 * Backup-bat moves all files of a BAT to a backup directory. Only
3042 * after this succeeds, it may be saved. If some failure occurs
3043 * halfway saving, we can thus always roll back.
3044 */
3045static gdk_return
3046file_move(int farmid, const char *srcdir, const char *dstdir, const char *name, const char *ext)
3047{
3048 if (GDKmove(farmid, srcdir, name, ext, dstdir, name, ext) == GDK_SUCCEED) {
3049 return GDK_SUCCEED;
3050 } else {
3051 char *path;
3052 struct stat st;
3053
3054 path = GDKfilepath(farmid, srcdir, name, ext);
3055 if (path == NULL)
3056 return GDK_FAIL;
3057 if (stat(path, &st)) {
3058 /* source file does not exist; the best
3059 * recovery is to give an error but continue
3060 * by considering the BAT as not saved; making
3061 * sure that this time it does get saved.
3062 */
3063 GDKsyserror("file_move: cannot stat %s\n", path);
3064 GDKfree(path);
3065 return GDK_FAIL; /* fishy, but not fatal */
3066 }
3067 GDKfree(path);
3068 }
3069 return GDK_FAIL;
3070}
3071
3072/* returns true if the file exists */
3073static bool
3074file_exists(int farmid, const char *dir, const char *name, const char *ext)
3075{
3076 char *path;
3077 struct stat st;
3078 int ret = -1;
3079
3080 path = GDKfilepath(farmid, dir, name, ext);
3081 if (path) {
3082 ret = stat(path, &st);
3083 IODEBUG fprintf(stderr, "#stat(%s) = %d\n", path, ret);
3084 GDKfree(path);
3085 }
3086 return (ret == 0);
3087}
3088
3089static gdk_return
3090heap_move(Heap *hp, const char *srcdir, const char *dstdir, const char *nme, const char *ext)
3091{
3092 /* see doc at BATsetaccess()/gdk_bat.c for an expose on mmap
3093 * heap modes */
3094 if (file_exists(hp->farmid, dstdir, nme, ext)) {
3095 /* dont overwrite heap with the committed state
3096 * already in dstdir */
3097 return GDK_SUCCEED;
3098 } else if (hp->newstorage == STORE_PRIV &&
3099 !file_exists(hp->farmid, srcdir, nme, ext)) {
3100
3101 /* In order to prevent half-saved X.new files
3102 * surviving a recover we create a dummy file in the
3103 * BACKUP(dstdir) whose presence will trigger
3104 * BBPrecover to remove them. Thus, X will prevail
3105 * where it otherwise wouldn't have. If X already has
3106 * a saved X.new, that one is backed up as normal.
3107 */
3108
3109 FILE *fp;
3110 long_str kill_ext;
3111 char *path;
3112
3113 strconcat_len(kill_ext, sizeof(kill_ext), ext, ".kill", NULL);
3114 path = GDKfilepath(hp->farmid, dstdir, nme, kill_ext);
3115 if (path == NULL)
3116 return GDK_FAIL;
3117 fp = fopen(path, "w");
3118 if (fp == NULL)
3119 GDKsyserror("heap_move: cannot open file %s\n", path);
3120 IODEBUG fprintf(stderr, "#open %s = %d\n", path, fp ? 0 : -1);
3121 GDKfree(path);
3122
3123 if (fp != NULL) {
3124 fclose(fp);
3125 return GDK_SUCCEED;
3126 } else {
3127 return GDK_FAIL;
3128 }
3129 }
3130 return file_move(hp->farmid, srcdir, dstdir, nme, ext);
3131}
3132
3133/*
3134 * @- BBPprepare
3135 *
3136 * this routine makes sure there is a BAKDIR/, and initiates one if
3137 * not. For subcommits, it does the same with SUBDIR.
3138 *
3139 * It is now locked, to get proper file counters, and also to prevent
3140 * concurrent BBPrecovers, etc.
3141 *
3142 * backup_dir == 0 => no backup BBP.dir
3143 * backup_dir == 1 => BBP.dir saved in BACKUP/
3144 * backup_dir == 2 => BBP.dir saved in SUBCOMMIT/
3145 */
3146
3147static gdk_return
3148BBPprepare(bool subcommit)
3149{
3150 bool start_subcommit;
3151 int set = 1 + subcommit;
3152 str bakdirpath, subdirpath;
3153 gdk_return ret = GDK_SUCCEED;
3154
3155 if(!(bakdirpath = GDKfilepath(0, NULL, BAKDIR, NULL)))
3156 return GDK_FAIL;
3157 if(!(subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL))) {
3158 GDKfree(bakdirpath);
3159 return GDK_FAIL;
3160 }
3161
3162 /* tmLock is only used here, helds usually very shortly just
3163 * to protect the file counters */
3164 MT_lock_set(&GDKtmLock);
3165
3166 start_subcommit = (subcommit && backup_subdir == 0);
3167 if (start_subcommit) {
3168 /* starting a subcommit. Make sure SUBDIR and DELDIR
3169 * are clean */
3170 ret = BBPrecover_subdir();
3171 }
3172 if (backup_files == 0) {
3173 backup_dir = 0;
3174 ret = BBPrecover(0);
3175 if (ret == GDK_SUCCEED) {
3176 if (mkdir(bakdirpath, MONETDB_DIRMODE) < 0 && errno != EEXIST) {
3177 GDKsyserror("BBPprepare: cannot create directory %s\n", bakdirpath);
3178 ret = GDK_FAIL;
3179 }
3180 /* if BAKDIR already exists, don't signal error */
3181 IODEBUG fprintf(stderr, "#mkdir %s = %d\n", bakdirpath, (int) ret);
3182 }
3183 }
3184 if (ret == GDK_SUCCEED && start_subcommit) {
3185 /* make a new SUBDIR (subdir of BAKDIR) */
3186 if (mkdir(subdirpath, MONETDB_DIRMODE) < 0) {
3187 GDKsyserror("BBPprepare: cannot create directory %s\n", subdirpath);
3188 ret = GDK_FAIL;
3189 }
3190 IODEBUG fprintf(stderr, "#mkdir %s = %d\n", subdirpath, (int) ret);
3191 }
3192 if (ret == GDK_SUCCEED && backup_dir != set) {
3193 /* a valid backup dir *must* at least contain BBP.dir */
3194 if ((ret = GDKmove(0, backup_dir ? BAKDIR : BATDIR, "BBP", "dir", subcommit ? SUBDIR : BAKDIR, "BBP", "dir")) == GDK_SUCCEED) {
3195 backup_dir = set;
3196 }
3197 }
3198 /* increase counters */
3199 if (ret == GDK_SUCCEED) {
3200 backup_subdir += subcommit;
3201 backup_files++;
3202 }
3203 MT_lock_unset(&GDKtmLock);
3204 GDKfree(bakdirpath);
3205 GDKfree(subdirpath);
3206 return ret;
3207}
3208
3209static gdk_return
3210do_backup(const char *srcdir, const char *nme, const char *ext,
3211 Heap *h, bool dirty, bool subcommit)
3212{
3213 gdk_return ret = GDK_SUCCEED;
3214
3215 /* direct mmap is unprotected (readonly usage, or has WAL
3216 * protection); however, if we're backing up for subcommit
3217 * and a backup already exists in the main backup directory
3218 * (see GDKupgradevarheap), move the file */
3219 if (subcommit && file_exists(h->farmid, BAKDIR, nme, ext)) {
3220 if (file_move(h->farmid, BAKDIR, SUBDIR, nme, ext) != GDK_SUCCEED)
3221 return GDK_FAIL;
3222 }
3223 if (h->storage != STORE_MMAP) {
3224 /* STORE_PRIV saves into X.new files. Two cases could
3225 * happen. The first is when a valid X.new exists
3226 * because of an access change or a previous
3227 * commit. This X.new should be backed up as
3228 * usual. The second case is when X.new doesn't
3229 * exist. In that case we could have half written
3230 * X.new files (after a crash). To protect against
3231 * these we write X.new.kill files in the backup
3232 * directory (see heap_move). */
3233 char extnew[16];
3234 gdk_return mvret = GDK_SUCCEED;
3235
3236 strconcat_len(extnew, sizeof(extnew), ext, ".new", NULL);
3237 if (dirty &&
3238 !file_exists(h->farmid, BAKDIR, nme, extnew) &&
3239 !file_exists(h->farmid, BAKDIR, nme, ext)) {
3240 /* if the heap is dirty and there is no heap
3241 * file (with or without .new extension) in
3242 * the BAKDIR, move the heap (preferably with
3243 * .new extension) to the correct backup
3244 * directory */
3245 if (file_exists(h->farmid, srcdir, nme, extnew))
3246 mvret = heap_move(h, srcdir,
3247 subcommit ? SUBDIR : BAKDIR,
3248 nme, extnew);
3249 else
3250 mvret = heap_move(h, srcdir,
3251 subcommit ? SUBDIR : BAKDIR,
3252 nme, ext);
3253 } else if (subcommit) {
3254 /* if subcommit, wqe may need to move an
3255 * already made backup from BAKDIR to
3256 * SUBSIR */
3257 if (file_exists(h->farmid, BAKDIR, nme, extnew))
3258 mvret = file_move(h->farmid, BAKDIR, SUBDIR, nme, extnew);
3259 else if (file_exists(h->farmid, BAKDIR, nme, ext))
3260 mvret = file_move(h->farmid, BAKDIR, SUBDIR, nme, ext);
3261 }
3262 /* there is a situation where the move may fail,
3263 * namely if this heap was not supposed to be existing
3264 * before, i.e. after a BATmaterialize on a persistent
3265 * bat as a workaround, do not complain about move
3266 * failure if the source file is nonexistent
3267 */
3268 if (mvret != GDK_SUCCEED && file_exists(h->farmid, srcdir, nme, ext)) {
3269 ret = GDK_FAIL;
3270 }
3271 if (subcommit &&
3272 (h->storage == STORE_PRIV || h->newstorage == STORE_PRIV)) {
3273 long_str kill_ext;
3274
3275 strconcat_len(kill_ext, sizeof(kill_ext),
3276 ext, ".new.kill", NULL);
3277 if (file_exists(h->farmid, BAKDIR, nme, kill_ext) &&
3278 file_move(h->farmid, BAKDIR, SUBDIR, nme, kill_ext) != GDK_SUCCEED) {
3279 ret = GDK_FAIL;
3280 }
3281 }
3282 }
3283 return ret;
3284}
3285
3286static gdk_return
3287BBPbackup(BAT *b, bool subcommit)
3288{
3289 char *srcdir;
3290 long_str nme;
3291 const char *s = BBP_physical(b->batCacheid);
3292 size_t slen;
3293
3294 if (BBPprepare(subcommit) != GDK_SUCCEED) {
3295 return GDK_FAIL;
3296 }
3297 if (!b->batCopiedtodisk || b->batTransient) {
3298 return GDK_SUCCEED;
3299 }
3300 /* determine location dir and physical suffix */
3301 if (!(srcdir = GDKfilepath(NOFARM, BATDIR, s, NULL)))
3302 goto fail;
3303 s = strrchr(srcdir, DIR_SEP);
3304 if (!s)
3305 goto fail;
3306
3307 slen = strlen(++s);
3308 if (slen >= sizeof(nme))
3309 goto fail;
3310 memcpy(nme, s, slen + 1);
3311 srcdir[s - srcdir] = 0;
3312
3313 if (b->ttype != TYPE_void &&
3314 do_backup(srcdir, nme, "tail", &b->theap,
3315 b->batDirtydesc || b->theap.dirty,
3316 subcommit) != GDK_SUCCEED)
3317 goto fail;
3318 if (b->tvheap &&
3319 do_backup(srcdir, nme, "theap", b->tvheap,
3320 b->batDirtydesc || b->tvheap->dirty,
3321 subcommit) != GDK_SUCCEED)
3322 goto fail;
3323 GDKfree(srcdir);
3324 return GDK_SUCCEED;
3325fail:
3326 if(srcdir)
3327 GDKfree(srcdir);
3328 return GDK_FAIL;
3329}
3330
3331/*
3332 * @+ Atomic Write
3333 * The atomic BBPsync() function first safeguards the old images of
3334 * all files to be written in BAKDIR. It then saves all files. If that
3335 * succeeds fully, BAKDIR is renamed to DELDIR. The rename is
3336 * considered an atomic action. If it succeeds, the DELDIR is removed.
3337 * If something fails, the pre-sync status can be obtained by moving
3338 * back all backed up files; this is done by BBPrecover().
3339 *
3340 * The BBP.dir is also moved into the BAKDIR.
3341 */
3342gdk_return
3343BBPsync(int cnt, bat *subcommit)
3344{
3345 gdk_return ret = GDK_SUCCEED;
3346 int t0 = 0, t1 = 0;
3347 str bakdir, deldir;
3348
3349 if(!(bakdir = GDKfilepath(0, NULL, subcommit ? SUBDIR : BAKDIR, NULL)))
3350 return GDK_FAIL;
3351 if(!(deldir = GDKfilepath(0, NULL, DELDIR, NULL))) {
3352 GDKfree(bakdir);
3353 return GDK_FAIL;
3354 }
3355
3356 PERFDEBUG t0 = t1 = GDKms();
3357
3358 ret = BBPprepare(subcommit != NULL);
3359
3360 /* PHASE 1: safeguard everything in a backup-dir */
3361 if (ret == GDK_SUCCEED) {
3362 int idx = 0;
3363
3364 while (++idx < cnt) {
3365 bat i = subcommit ? subcommit[idx] : idx;
3366 BAT *b = dirty_bat(&i, subcommit != NULL);
3367 if (i <= 0)
3368 break;
3369 if (BBP_status(i) & BBPEXISTING) {
3370 if (b != NULL && BBPbackup(b, subcommit != NULL) != GDK_SUCCEED)
3371 break;
3372 } else if (subcommit && (b = BBP_desc(i)) && BBP_status(i) & BBPDELETED) {
3373 char o[10];
3374 char *f;
3375 snprintf(o, sizeof(o), "%o", (unsigned) b->batCacheid);
3376 f = GDKfilepath(b->theap.farmid, BAKDIR, o, "tail");
3377 if (access(f, F_OK) == 0)
3378 file_move(b->theap.farmid, BAKDIR, SUBDIR, o, "tail");
3379 GDKfree(f);
3380 f = GDKfilepath(b->theap.farmid, BAKDIR, o, "theap");
3381 if (access(f, F_OK) == 0)
3382 file_move(b->theap.farmid, BAKDIR, SUBDIR, o, "theap");
3383 GDKfree(f);
3384 }
3385 }
3386 if (idx < cnt)
3387 ret = GDK_FAIL;
3388 }
3389 PERFDEBUG fprintf(stderr, "#BBPsync (move time %d) %d files\n", (t1 = GDKms()) - t0, backup_files);
3390
3391 /* PHASE 2: save the repository */
3392 if (ret == GDK_SUCCEED) {
3393 int idx = 0;
3394
3395 while (++idx < cnt) {
3396 bat i = subcommit ? subcommit[idx] : idx;
3397
3398 if (BBP_status(i) & BBPPERSISTENT) {
3399 BAT *b = dirty_bat(&i, subcommit != NULL);
3400 if (i <= 0)
3401 break;
3402 if (b != NULL && BATsave(b) != GDK_SUCCEED)
3403 break; /* write error */
3404 }
3405 }
3406 if (idx < cnt)
3407 ret = GDK_FAIL;
3408 }
3409
3410 PERFDEBUG fprintf(stderr, "#BBPsync (write time %d)\n", (t0 = GDKms()) - t1);
3411
3412 if (ret == GDK_SUCCEED) {
3413 ret = BBPdir(cnt, subcommit);
3414 }
3415
3416 PERFDEBUG fprintf(stderr, "#BBPsync (dir time %d) %d bats\n", (t1 = GDKms()) - t0, (bat) ATOMIC_GET(&BBPsize));
3417
3418 if (ret == GDK_SUCCEED) {
3419 /* atomic switchover */
3420 /* this is the big one: this call determines
3421 * whether the operation of this function
3422 * succeeded, so no changing of ret after this
3423 * call anymore */
3424
3425 if (rename(bakdir, deldir) < 0)
3426 ret = GDK_FAIL;
3427 if (ret != GDK_SUCCEED &&
3428 GDKremovedir(0, DELDIR) == GDK_SUCCEED && /* maybe there was an old deldir */
3429 rename(bakdir, deldir) < 0)
3430 ret = GDK_FAIL;
3431 if (ret != GDK_SUCCEED)
3432 GDKsyserror("BBPsync: rename(%s,%s) failed.\n", bakdir, deldir);
3433 IODEBUG fprintf(stderr, "#BBPsync: rename %s %s = %d\n", bakdir, deldir, (int) ret);
3434 }
3435
3436 /* AFTERMATH */
3437 if (ret == GDK_SUCCEED) {
3438 backup_files = subcommit ? (backup_files - backup_subdir) : 0;
3439 backup_dir = backup_subdir = 0;
3440 if (GDKremovedir(0, DELDIR) != GDK_SUCCEED)
3441 fprintf(stderr, "#BBPsync: cannot remove directory %s\n", DELDIR);
3442 (void) BBPprepare(false); /* (try to) remove DELDIR and set up new BAKDIR */
3443 if (backup_files > 1) {
3444 PERFDEBUG fprintf(stderr, "#BBPsync (backup_files %d > 1)\n", backup_files);
3445 backup_files = 1;
3446 }
3447 }
3448 PERFDEBUG fprintf(stderr, "#BBPsync (ready time %d)\n", (t0 = GDKms()) - t1);
3449 GDKfree(bakdir);
3450 GDKfree(deldir);
3451 return ret;
3452}
3453
3454/*
3455 * Recovery just moves all files back to their original location. this
3456 * is an incremental process: if something fails, just stop with still
3457 * files left for moving in BACKUP/. The recovery process can resume
3458 * later with the left over files.
3459 */
3460static gdk_return
3461force_move(int farmid, const char *srcdir, const char *dstdir, const char *name)
3462{
3463 const char *p;
3464 char *dstpath, *killfile;
3465 gdk_return ret = GDK_SUCCEED;
3466
3467 if ((p = strrchr(name, '.')) != NULL && strcmp(p, ".kill") == 0) {
3468 /* Found a X.new.kill file, ie remove the X.new file */
3469 ptrdiff_t len = p - name;
3470 long_str srcpath;
3471
3472 strncpy(srcpath, name, len);
3473 srcpath[len] = '\0';
3474 if(!(dstpath = GDKfilepath(farmid, dstdir, srcpath, NULL))) {
3475 GDKsyserror("force_move: malloc fail\n");
3476 return GDK_FAIL;
3477 }
3478
3479 /* step 1: remove the X.new file that is going to be
3480 * overridden by X */
3481 if (remove(dstpath) != 0 && errno != ENOENT) {
3482 /* if it exists and cannot be removed, all
3483 * this is going to fail */
3484 GDKsyserror("force_move: remove(%s)\n", dstpath);
3485 GDKfree(dstpath);
3486 return GDK_FAIL;
3487 }
3488 GDKfree(dstpath);
3489
3490 /* step 2: now remove the .kill file. This one is
3491 * crucial, otherwise we'll never finish recovering */
3492 if(!(killfile = GDKfilepath(farmid, srcdir, name, NULL))) {
3493 GDKsyserror("force_move: malloc fail\n");
3494 return GDK_FAIL;
3495 }
3496 if (remove(killfile) != 0) {
3497 ret = GDK_FAIL;
3498 GDKsyserror("force_move: remove(%s)\n", killfile);
3499 }
3500 GDKfree(killfile);
3501 return ret;
3502 }
3503 /* try to rename it */
3504 ret = GDKmove(farmid, srcdir, name, NULL, dstdir, name, NULL);
3505
3506 if (ret != GDK_SUCCEED) {
3507 char *srcpath;
3508
3509 /* two legal possible causes: file exists or dir
3510 * doesn't exist */
3511 if(!(dstpath = GDKfilepath(farmid, dstdir, name, NULL)))
3512 return GDK_FAIL;
3513 if(!(srcpath = GDKfilepath(farmid, srcdir, name, NULL))) {
3514 GDKfree(dstpath);
3515 return GDK_FAIL;
3516 }
3517 if (remove(dstpath) != 0) /* clear destination */
3518 ret = GDK_FAIL;
3519 IODEBUG fprintf(stderr, "#remove %s = %d\n", dstpath, (int) ret);
3520
3521 (void) GDKcreatedir(dstdir); /* if fails, move will fail */
3522 ret = GDKmove(farmid, srcdir, name, NULL, dstdir, name, NULL);
3523 IODEBUG fprintf(stderr, "#link %s %s = %d\n", srcpath, dstpath, (int) ret);
3524 GDKfree(dstpath);
3525 GDKfree(srcpath);
3526 }
3527 return ret;
3528}
3529
3530gdk_return
3531BBPrecover(int farmid)
3532{
3533 str bakdirpath;
3534 str leftdirpath;
3535 DIR *dirp;
3536 struct dirent *dent;
3537 long_str path, dstpath;
3538 bat i;
3539 size_t j = strlen(BATDIR);
3540 gdk_return ret = GDK_SUCCEED;
3541 bool dirseen = false;
3542 str dstdir;
3543
3544 bakdirpath = GDKfilepath(farmid, NULL, BAKDIR, NULL);
3545 leftdirpath = GDKfilepath(farmid, NULL, LEFTDIR, NULL);
3546 if (bakdirpath == NULL || leftdirpath == NULL) {
3547 GDKfree(bakdirpath);
3548 GDKfree(leftdirpath);
3549 return GDK_FAIL;
3550 }
3551 dirp = opendir(bakdirpath);
3552 if (dirp == NULL) {
3553 GDKfree(bakdirpath);
3554 GDKfree(leftdirpath);
3555 return GDK_SUCCEED; /* nothing to do */
3556 }
3557 memcpy(dstpath, BATDIR, j);
3558 dstpath[j] = DIR_SEP;
3559 dstpath[++j] = 0;
3560 dstdir = dstpath + j;
3561 IODEBUG fprintf(stderr, "#BBPrecover(start)\n");
3562
3563 if (mkdir(leftdirpath, MONETDB_DIRMODE) < 0 && errno != EEXIST) {
3564 GDKsyserror("BBPrecover: cannot create directory %s\n", leftdirpath);
3565 closedir(dirp);
3566 GDKfree(bakdirpath);
3567 GDKfree(leftdirpath);
3568 return GDK_FAIL;
3569 }
3570
3571 /* move back all files */
3572 while ((dent = readdir(dirp)) != NULL) {
3573 const char *q = strchr(dent->d_name, '.');
3574
3575 if (q == dent->d_name) {
3576 char *fn;
3577
3578 if (strcmp(dent->d_name, ".") == 0 ||
3579 strcmp(dent->d_name, "..") == 0)
3580 continue;
3581 fn = GDKfilepath(farmid, BAKDIR, dent->d_name, NULL);
3582 if (fn) {
3583 int uret = remove(fn);
3584 IODEBUG fprintf(stderr, "#remove %s = %d\n",
3585 fn, uret);
3586 GDKfree(fn);
3587 }
3588 continue;
3589 } else if (strcmp(dent->d_name, "BBP.dir") == 0) {
3590 dirseen = true;
3591 continue;
3592 }
3593 if (q == NULL)
3594 q = dent->d_name + strlen(dent->d_name);
3595 if ((j = q - dent->d_name) + 1 > sizeof(path)) {
3596 /* name too long: ignore */
3597 continue;
3598 }
3599 strncpy(path, dent->d_name, j);
3600 path[j] = 0;
3601 if (GDKisdigit(*path)) {
3602 i = strtol(path, NULL, 8);
3603 } else {
3604 i = BBP_find(path, false);
3605 if (i < 0)
3606 i = -i;
3607 }
3608 if (i == 0 || i >= (bat) ATOMIC_GET(&BBPsize) || !BBPvalid(i)) {
3609 force_move(farmid, BAKDIR, LEFTDIR, dent->d_name);
3610 } else {
3611 BBPgetsubdir(dstdir, i);
3612 if (force_move(farmid, BAKDIR, dstpath, dent->d_name) != GDK_SUCCEED)
3613 ret = GDK_FAIL;
3614 }
3615 }
3616 closedir(dirp);
3617 if (dirseen && ret == GDK_SUCCEED) { /* we have a saved BBP.dir; it should be moved back!! */
3618 struct stat st;
3619 char *fn;
3620
3621 fn = GDKfilepath(farmid, BATDIR, "BBP", "dir");
3622 ret = recover_dir(farmid, stat(fn, &st) == 0);
3623 GDKfree(fn);
3624 }
3625
3626 if (ret == GDK_SUCCEED) {
3627 if (rmdir(bakdirpath) < 0) {
3628 GDKsyserror("BBPrecover: cannot remove directory %s\n", bakdirpath);
3629 ret = GDK_FAIL;
3630 }
3631 IODEBUG fprintf(stderr, "#rmdir %s = %d\n", bakdirpath, (int) ret);
3632 }
3633 if (ret != GDK_SUCCEED)
3634 GDKerror("BBPrecover: recovery failed. Please check whether your disk is full or write-protected.\n");
3635
3636 IODEBUG fprintf(stderr, "#BBPrecover(end)\n");
3637 GDKfree(bakdirpath);
3638 GDKfree(leftdirpath);
3639 return ret;
3640}
3641
3642/*
3643 * SUBDIR recovery is quite mindlessly moving all files back to the
3644 * parent (BAKDIR). We do recognize moving back BBP.dir and set
3645 * backed_up_subdir accordingly.
3646 */
3647gdk_return
3648BBPrecover_subdir(void)
3649{
3650 str subdirpath;
3651 DIR *dirp;
3652 struct dirent *dent;
3653 gdk_return ret = GDK_SUCCEED;
3654
3655 subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL);
3656 if (subdirpath == NULL)
3657 return GDK_FAIL;
3658 dirp = opendir(subdirpath);
3659 GDKfree(subdirpath);
3660 if (dirp == NULL) {
3661 return GDK_SUCCEED; /* nothing to do */
3662 }
3663 IODEBUG fprintf(stderr, "#BBPrecover_subdir(start)\n");
3664
3665 /* move back all files */
3666 while ((dent = readdir(dirp)) != NULL) {
3667 if (dent->d_name[0] == '.')
3668 continue;
3669 ret = GDKmove(0, SUBDIR, dent->d_name, NULL, BAKDIR, dent->d_name, NULL);
3670 if (ret == GDK_SUCCEED && strcmp(dent->d_name, "BBP.dir") == 0)
3671 backup_dir = 1;
3672 if (ret != GDK_SUCCEED)
3673 break;
3674 }
3675 closedir(dirp);
3676
3677 /* delete the directory */
3678 if (ret == GDK_SUCCEED) {
3679 ret = GDKremovedir(0, SUBDIR);
3680 if (backup_dir == 2) {
3681 IODEBUG fprintf(stderr, "#BBPrecover_subdir: %s%cBBP.dir had disappeared!", SUBDIR, DIR_SEP);
3682 backup_dir = 0;
3683 }
3684 }
3685 IODEBUG fprintf(stderr, "#BBPrecover_subdir(end) = %d\n", (int) ret);
3686
3687 if (ret != GDK_SUCCEED)
3688 GDKerror("BBPrecover_subdir: recovery failed. Please check whether your disk is full or write-protected.\n");
3689 return ret;
3690}
3691
3692/*
3693 * @- The diskscan
3694 * The BBPdiskscan routine walks through the BAT dir, cleans up
3695 * leftovers, and measures disk occupancy. Leftovers are files that
3696 * cannot belong to a BAT. in order to establish this for [ht]heap
3697 * files, the BAT descriptor is loaded in order to determine whether
3698 * these files are still required.
3699 *
3700 * The routine gathers all bat sizes in a bat that contains bat-ids
3701 * and bytesizes. The return value is the number of bytes of space
3702 * freed.
3703 */
3704static bool
3705persistent_bat(bat bid)
3706{
3707 if (bid >= 0 && bid < (bat) ATOMIC_GET(&BBPsize) && BBPvalid(bid)) {
3708 BAT *b = BBP_cache(bid);
3709
3710 if (b == NULL || b->batCopiedtodisk) {
3711 return true;
3712 }
3713 }
3714 return false;
3715}
3716
3717static BAT *
3718getdesc(bat bid)
3719{
3720 BAT *b = BBPgetdesc(bid);
3721
3722 if (b == NULL)
3723 BBPclear(bid);
3724 return b;
3725}
3726
3727static bool
3728BBPdiskscan(const char *parent, size_t baseoff)
3729{
3730 DIR *dirp = opendir(parent);
3731 struct dirent *dent;
3732 char fullname[FILENAME_MAX];
3733 str dst = fullname;
3734 size_t dstlen = sizeof(fullname);
3735 const char *src = parent;
3736
3737 if (dirp == NULL)
3738 return true; /* nothing to do */
3739
3740 while (*src) {
3741 *dst++ = *src++;
3742 dstlen--;
3743 }
3744 if (dst > fullname && dst[-1] != DIR_SEP) {
3745 *dst++ = DIR_SEP;
3746 dstlen--;
3747 }
3748
3749 while ((dent = readdir(dirp)) != NULL) {
3750 const char *p;
3751 bat bid;
3752 bool ok, delete;
3753
3754 if (dent->d_name[0] == '.')
3755 continue; /* ignore .dot files and directories (. ..) */
3756
3757 if (strncmp(dent->d_name, "BBP.", 4) == 0 &&
3758 (strcmp(parent + baseoff, BATDIR) == 0 ||
3759 strncmp(parent + baseoff, BAKDIR, strlen(BAKDIR)) == 0 ||
3760 strncmp(parent + baseoff, SUBDIR, strlen(SUBDIR)) == 0))
3761 continue;
3762
3763 p = strchr(dent->d_name, '.');
3764
3765 if (strlen(dent->d_name) >= dstlen) {
3766 /* found a file with too long a name
3767 (i.e. unknown); stop pruning in this
3768 subdir */
3769 fprintf(stderr, "BBPdiskscan: unexpected file %s, leaving %s.\n", dent->d_name, parent);
3770 break;
3771 }
3772 strncpy(dst, dent->d_name, dstlen);
3773 fullname[sizeof(fullname) - 1] = 0;
3774
3775 if (p == NULL && !BBPdiskscan(fullname, baseoff)) {
3776 /* it was a directory */
3777 continue;
3778 }
3779
3780 if (p && strcmp(p + 1, "tmp") == 0) {
3781 delete = true;
3782 ok = true;
3783 bid = 0;
3784 } else {
3785 bid = strtol(dent->d_name, NULL, 8);
3786 ok = p && bid;
3787 delete = false;
3788
3789 if (!ok || !persistent_bat(bid)) {
3790 delete = true;
3791 } else if (strncmp(p + 1, "tail", 4) == 0) {
3792 BAT *b = getdesc(bid);
3793 delete = (b == NULL || !b->ttype || !b->batCopiedtodisk);
3794 } else if (strncmp(p + 1, "theap", 5) == 0) {
3795 BAT *b = getdesc(bid);
3796 delete = (b == NULL || !b->tvheap || !b->batCopiedtodisk);
3797 } else if (strncmp(p + 1, "thash", 5) == 0) {
3798#ifdef PERSISTENTHASH
3799 BAT *b = getdesc(bid);
3800 delete = b == NULL;
3801 if (!delete)
3802 b->thash = (Hash *) 1;
3803#else
3804 delete = true;
3805#endif
3806 } else if (strncmp(p + 1, "timprints", 9) == 0) {
3807 BAT *b = getdesc(bid);
3808 delete = b == NULL;
3809 if (!delete)
3810 b->timprints = (Imprints *) 1;
3811 } else if (strncmp(p + 1, "torderidx", 9) == 0) {
3812#ifdef PERSISTENTIDX
3813 BAT *b = getdesc(bid);
3814 delete = b == NULL;
3815 if (!delete)
3816 b->torderidx = (Heap *) 1;
3817#else
3818 delete = true;
3819#endif
3820 } else if (strncmp(p + 1, "new", 3) != 0) {
3821 ok = false;
3822 }
3823 }
3824 if (!ok) {
3825 /* found an unknown file; stop pruning in this
3826 * subdir */
3827 fprintf(stderr, "BBPdiskscan: unexpected file %s, leaving %s.\n", dent->d_name, parent);
3828 break;
3829 }
3830 if (delete) {
3831 if (remove(fullname) != 0 && errno != ENOENT) {
3832 GDKsyserror("BBPdiskscan: remove(%s)", fullname);
3833 continue;
3834 }
3835 IODEBUG fprintf(stderr, "#BBPcleanup: remove(%s) = 0\n", fullname);
3836 }
3837 }
3838 closedir(dirp);
3839 return false;
3840}
3841
3842void
3843gdk_bbp_reset(void)
3844{
3845 int i;
3846
3847 while (BBPlimit > 0) {
3848 BBPlimit -= BBPINIT;
3849 assert(BBPlimit >= 0);
3850 GDKfree(BBP[BBPlimit >> BBPINITLOG]);
3851 BBP[BBPlimit >> BBPINITLOG] = NULL;
3852 }
3853 ATOMIC_SET(&BBPsize, 0);
3854 for (i = 0; i < MAXFARMS; i++)
3855 GDKfree((void *) BBPfarms[i].dirname); /* loose "const" */
3856 memset(BBPfarms, 0, sizeof(BBPfarms));
3857 BBP_hash = 0;
3858 BBP_mask = 0;
3859
3860 locked_by = 0;
3861 BBPunloadCnt = 0;
3862 backup_files = 0;
3863 backup_dir = 0;
3864 backup_subdir = 0;
3865}
3866