1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* |
10 | * @a M. L. Kersten, P. Boncz, N. J. Nes |
11 | * @* BAT Buffer Pool (BBP) |
12 | * The BATs created and loaded are collected in a BAT buffer pool. |
13 | * The Bat Buffer Pool has a number of functions: |
14 | * @table @code |
15 | * |
16 | * @item administration and lookup |
17 | * The BBP is a directory which contains status information about all |
18 | * known BATs. This interface may be used very heavily, by |
19 | * data-intensive applications. To eliminate all overhead, read-only |
20 | * access to the BBP may be done by table-lookups. The integer index |
21 | * type for these lookups is @emph{bat}, as retrieved by |
22 | * @emph{b->batCacheid}. The @emph{bat} zero is reserved for the nil |
23 | * bat. |
24 | * |
25 | * @item persistence |
26 | * The BBP is made persistent by saving it to the dictionary file |
27 | * called @emph{BBP.dir} in the database. |
28 | * |
29 | * When the number of BATs rises, having all files in one directory |
30 | * becomes a bottleneck. The BBP therefore implements a scheme that |
31 | * distributes all BATs in a growing directory tree with at most 64 |
32 | * BATs stored in one node. |
33 | * |
34 | * @item buffer management |
35 | * The BBP is responsible for loading and saving of BATs to disk. It |
36 | * also contains routines to unload BATs from memory when memory |
37 | * resources get scarce. For this purpose, it administers BAT memory |
38 | * reference counts (to know which BATs can be unloaded) and BAT usage |
39 | * statistics (it unloads the least recently used BATs). |
40 | * |
41 | * @item recovery |
42 | * When the database is closed or during a run-time syncpoint, the |
43 | * system tables must be written to disk in a safe way, that is immune |
44 | * for system failures (like disk full). To do so, the BBP implements |
45 | * an atomic commit and recovery protocol: first all files to be |
46 | * overwritten are moved to a BACKUP/ dir. If that succeeds, the |
47 | * writes are done. If that also fully succeeds the BACKUP/ dir is |
48 | * renamed to DELETE_ME/ and subsequently deleted. If not, all files |
49 | * in BACKUP/ are moved back to their original location. |
50 | * |
51 | * @item unloading |
52 | * Bats which have a logical reference (ie. a lrefs > 0) but no memory |
53 | * reference (refcnt == 0) can be unloaded. Unloading dirty bats |
54 | * means, moving the original (committed version) to the BACKUP/ dir |
55 | * and saving the bat. This complicates the commit and recovery/abort |
56 | * issues. The commit has to check if the bat is already moved. And |
57 | * The recovery has to always move back the files from the BACKUP/ |
58 | * dir. |
59 | * |
60 | * @item reference counting |
61 | * Bats use have two kinds of references: logical and physical |
62 | * (pointer) ones. The logical references are administered by |
63 | * BBPretain/BBPrelease, the physical ones by BBPfix/BBPunfix. |
64 | * |
65 | * @item share counting |
66 | * Views use the heaps of there parent bats. To save guard this, the |
67 | * parent has a shared counter, which is incremented and decremented |
68 | * using BBPshare and BBPunshare. These functions make sure the parent |
69 | * is memory resident as required because of the 'pointer' sharing. |
70 | * @end table |
71 | */ |
72 | |
73 | #include "monetdb_config.h" |
74 | #include "gdk.h" |
75 | #include "gdk_private.h" |
76 | #include "gdk_storage.h" |
77 | #include "mutils.h" |
78 | |
79 | #ifndef F_OK |
80 | #define F_OK 0 |
81 | #endif |
82 | #ifdef _MSC_VER |
83 | #define access(f, m) _access(f, m) |
84 | #endif |
85 | |
86 | /* |
87 | * The BBP has a fixed address, so re-allocation due to a growing BBP |
88 | * caused by one thread does not disturb reads to the old entries by |
89 | * another. This is implemented using anonymous virtual memory; |
90 | * extensions on the same address are guaranteed because a large |
91 | * non-committed VM area is requested initially. New slots in the BBP |
92 | * are found in O(1) by keeping a freelist that uses the 'next' field |
93 | * in the BBPrec records. |
94 | */ |
95 | BBPrec *BBP[N_BBPINIT]; /* fixed base VM address of BBP array */ |
96 | bat BBPlimit = 0; /* current committed VM BBP array */ |
97 | static ATOMIC_TYPE BBPsize = ATOMIC_VAR_INIT(0); /* current used size of BBP array */ |
98 | |
99 | struct BBPfarm_t BBPfarms[MAXFARMS]; |
100 | |
101 | #define KITTENNAP 1 /* used to suspend processing */ |
102 | #define BBPNONAME "." /* filler for no name in BBP.dir */ |
103 | /* |
104 | * The hash index uses a bucket index (int array) of size mask that is |
105 | * tuned for perfect hashing (1 lookup). The bucket chain uses the |
106 | * 'next' field in the BBPrec records. |
107 | */ |
108 | bat *BBP_hash = NULL; /* BBP logical name hash buckets */ |
109 | bat BBP_mask = 0; /* number of buckets = & mask */ |
110 | |
111 | static gdk_return BBPfree(BAT *b, const char *calledFrom); |
112 | static void BBPdestroy(BAT *b); |
113 | static void BBPuncacheit(bat bid, bool unloaddesc); |
114 | static gdk_return BBPprepare(bool subcommit); |
115 | static BAT *getBBPdescriptor(bat i, bool lock); |
116 | static gdk_return BBPbackup(BAT *b, bool subcommit); |
117 | static gdk_return BBPdir(int cnt, bat *subcommit); |
118 | |
119 | #ifdef HAVE_HGE |
120 | /* start out by saying we have no hge, but as soon as we've seen one, |
121 | * we'll always say we do have it */ |
122 | static bool havehge = false; |
123 | #endif |
124 | |
125 | #define BBPnamecheck(s) (BBPtmpcheck(s) ? strtol((s) + 4, NULL, 8) : 0) |
126 | |
127 | static void |
128 | BBP_insert(bat i) |
129 | { |
130 | bat idx = (bat) (strHash(BBP_logical(i)) & BBP_mask); |
131 | |
132 | BBP_next(i) = BBP_hash[idx]; |
133 | BBP_hash[idx] = i; |
134 | } |
135 | |
136 | static void |
137 | BBP_delete(bat i) |
138 | { |
139 | bat *h = BBP_hash; |
140 | const char *s = BBP_logical(i); |
141 | bat idx = (bat) (strHash(s) & BBP_mask); |
142 | |
143 | for (h += idx; (i = *h) != 0; h = &BBP_next(i)) { |
144 | if (strcmp(BBP_logical(i), s) == 0) { |
145 | *h = BBP_next(i); |
146 | break; |
147 | } |
148 | } |
149 | } |
150 | |
151 | bat |
152 | getBBPsize(void) |
153 | { |
154 | return (bat) ATOMIC_GET(&BBPsize); |
155 | } |
156 | |
157 | |
158 | /* |
159 | * @+ BBP Consistency and Concurrency |
160 | * While GDK provides the basic building blocks for an ACID system, in |
161 | * itself it is not such a system, as we this would entail too much |
162 | * overhead that is often not needed. Hence, some consistency control |
163 | * is left to the user. The first important user constraint is that if |
164 | * a user updates a BAT, (s)he himself must assure that no-one else |
165 | * accesses this BAT. |
166 | * |
167 | * Concerning buffer management, the BBP carries out a swapping |
168 | * policy. BATs are kept in memory till the memory is full. If the |
169 | * memory is full, the malloc functions initiate BBP trim actions, |
170 | * that unload the coldest BATs that have a zero reference count. The |
171 | * second important user constraint is therefore that a user may only |
172 | * manipulate live BAT data in memory if it is sure that there is at |
173 | * least one reference count to that BAT. |
174 | * |
175 | * The main BBP array is protected by two locks: |
176 | * @table @code |
177 | * @item GDKcacheLock] |
178 | * this lock guards the free slot management in the BBP array. The |
179 | * BBP operations that allocate a new slot for a new BAT |
180 | * (@emph{BBPinit},@emph{BBPcacheit}), delete the slot of a destroyed |
181 | * BAT (@emph{BBPreclaim}), or rename a BAT (@emph{BBPrename}), hold |
182 | * this lock. It also protects all BAT (re)naming actions include |
183 | * (read and write) in the hash table with BAT names. |
184 | * @item GDKswapLock |
185 | * this lock guards the swap (loaded/unloaded) status of the |
186 | * BATs. Hence, all BBP routines that influence the swapping policy, |
187 | * or actually carry out the swapping policy itself, acquire this lock |
188 | * (e.g. @emph{BBPfix},@emph{BBPunfix}). Note that this also means |
189 | * that updates to the BBP_status indicator array must be protected by |
190 | * GDKswapLock. |
191 | * |
192 | * To reduce contention GDKswapLock was split into multiple locks; it |
193 | * is now an array of lock pointers which is accessed by |
194 | * GDKswapLock(bat) |
195 | * @end table |
196 | * |
197 | * Routines that need both locks should first acquire the locks in the |
198 | * GDKswapLock array (in ascending order) and then GDKcacheLock (and |
199 | * release them in reverse order). |
200 | * |
201 | * To obtain maximum speed, read operations to existing elements in |
202 | * the BBP are unguarded. As said, it is the users responsibility that |
203 | * the BAT that is being read is not being modified. BBP update |
204 | * actions that modify the BBP data structure itself are locked by the |
205 | * BBP functions themselves. Hence, multiple concurrent BBP read |
206 | * operations may be ongoing while at the same time at most one BBP |
207 | * write operation @strong{on a different BAT} is executing. This |
208 | * holds for accesses to the public (quasi-) arrays @emph{BBPcache}, |
209 | * @emph{BBPstatus} and @emph{BBPrefs}. |
210 | * These arrays are called quasi as now they are |
211 | * actually stored together in one big BBPrec array called BBP, that |
212 | * is allocated in anonymous VM space, so we can reallocate this |
213 | * structure without changing the base address (a crucial feature if |
214 | * read actions are to go on unlocked while other entries in the BBP |
215 | * may be modified). |
216 | */ |
217 | static volatile MT_Id locked_by = 0; |
218 | |
219 | #define BBP_unload_inc() \ |
220 | do { \ |
221 | MT_lock_set(&GDKunloadLock); \ |
222 | BBPunloadCnt++; \ |
223 | MT_lock_unset(&GDKunloadLock); \ |
224 | } while (0) |
225 | |
226 | #define BBP_unload_dec() \ |
227 | do { \ |
228 | MT_lock_set(&GDKunloadLock); \ |
229 | --BBPunloadCnt; \ |
230 | assert(BBPunloadCnt >= 0); \ |
231 | MT_lock_unset(&GDKunloadLock); \ |
232 | } while (0) |
233 | |
234 | static int BBPunloadCnt = 0; |
235 | static MT_Lock GDKunloadLock = MT_LOCK_INITIALIZER("GDKunloadLock" ); |
236 | |
237 | void |
238 | BBPlock(void) |
239 | { |
240 | int i; |
241 | |
242 | /* wait for all pending unloads to finish */ |
243 | MT_lock_set(&GDKunloadLock); |
244 | while (BBPunloadCnt > 0) { |
245 | MT_lock_unset(&GDKunloadLock); |
246 | MT_sleep_ms(1); |
247 | MT_lock_set(&GDKunloadLock); |
248 | } |
249 | |
250 | for (i = 0; i <= BBP_THREADMASK; i++) |
251 | MT_lock_set(&GDKtrimLock(i)); |
252 | for (i = 0; i <= BBP_THREADMASK; i++) |
253 | MT_lock_set(&GDKcacheLock(i)); |
254 | for (i = 0; i <= BBP_BATMASK; i++) |
255 | MT_lock_set(&GDKswapLock(i)); |
256 | locked_by = MT_getpid(); |
257 | |
258 | MT_lock_unset(&GDKunloadLock); |
259 | } |
260 | |
261 | void |
262 | BBPunlock(void) |
263 | { |
264 | int i; |
265 | |
266 | for (i = BBP_BATMASK; i >= 0; i--) |
267 | MT_lock_unset(&GDKswapLock(i)); |
268 | for (i = BBP_THREADMASK; i >= 0; i--) |
269 | MT_lock_unset(&GDKcacheLock(i)); |
270 | locked_by = 0; |
271 | for (i = BBP_THREADMASK; i >= 0; i--) |
272 | MT_lock_unset(&GDKtrimLock(i)); |
273 | } |
274 | |
275 | |
276 | static gdk_return |
277 | BBPinithash(int j) |
278 | { |
279 | bat i = (bat) ATOMIC_GET(&BBPsize); |
280 | |
281 | assert(j >= 0 && j <= BBP_THREADMASK); |
282 | for (BBP_mask = 1; (BBP_mask << 1) <= BBPlimit; BBP_mask <<= 1) |
283 | ; |
284 | BBP_hash = (bat *) GDKzalloc(BBP_mask * sizeof(bat)); |
285 | if (BBP_hash == NULL) { |
286 | GDKerror("BBPinithash: cannot allocate memory\n" ); |
287 | return GDK_FAIL; |
288 | } |
289 | BBP_mask--; |
290 | |
291 | while (--i > 0) { |
292 | const char *s = BBP_logical(i); |
293 | |
294 | if (s) { |
295 | if (*s != '.' && BBPtmpcheck(s) == 0) { |
296 | BBP_insert(i); |
297 | } |
298 | } else { |
299 | BBP_next(i) = BBP_free(j); |
300 | BBP_free(j) = i; |
301 | if (++j > BBP_THREADMASK) |
302 | j = 0; |
303 | } |
304 | } |
305 | return GDK_SUCCEED; |
306 | } |
307 | |
308 | int |
309 | BBPselectfarm(role_t role, int type, enum heaptype hptype) |
310 | { |
311 | int i; |
312 | |
313 | (void) type; /* may use in future */ |
314 | (void) hptype; /* may use in future */ |
315 | |
316 | if (GDKinmemory()) |
317 | return 0; |
318 | |
319 | #ifndef PERSISTENTHASH |
320 | if (hptype == hashheap) |
321 | role = TRANSIENT; |
322 | #endif |
323 | #ifndef PERSISTENTIDX |
324 | if (hptype == orderidxheap) |
325 | role = TRANSIENT; |
326 | #endif |
327 | for (i = 0; i < MAXFARMS; i++) |
328 | if (BBPfarms[i].dirname && BBPfarms[i].roles & (1 << (int) role)) |
329 | return i; |
330 | /* must be able to find farms for TRANSIENT and PERSISTENT */ |
331 | assert(role != TRANSIENT && role != PERSISTENT); |
332 | return -1; |
333 | } |
334 | |
335 | /* |
336 | * BBPextend must take the trimlock, as it is called when other BBP |
337 | * locks are held and it will allocate memory. |
338 | */ |
339 | static gdk_return |
340 | BBPextend(int idx, bool buildhash) |
341 | { |
342 | if ((bat) ATOMIC_GET(&BBPsize) >= N_BBPINIT * BBPINIT) { |
343 | GDKerror("BBPextend: trying to extend BAT pool beyond the " |
344 | "limit (%d)\n" , N_BBPINIT * BBPINIT); |
345 | return GDK_FAIL; |
346 | } |
347 | |
348 | /* make sure the new size is at least BBPsize large */ |
349 | while (BBPlimit < (bat) ATOMIC_GET(&BBPsize)) { |
350 | assert(BBP[BBPlimit >> BBPINITLOG] == NULL); |
351 | BBP[BBPlimit >> BBPINITLOG] = GDKzalloc(BBPINIT * sizeof(BBPrec)); |
352 | if (BBP[BBPlimit >> BBPINITLOG] == NULL) { |
353 | GDKerror("BBPextend: failed to extend BAT pool\n" ); |
354 | return GDK_FAIL; |
355 | } |
356 | BBPlimit += BBPINIT; |
357 | } |
358 | |
359 | if (buildhash) { |
360 | int i; |
361 | |
362 | GDKfree(BBP_hash); |
363 | BBP_hash = NULL; |
364 | for (i = 0; i <= BBP_THREADMASK; i++) |
365 | BBP_free(i) = 0; |
366 | if (BBPinithash(idx) != GDK_SUCCEED) |
367 | return GDK_FAIL; |
368 | } |
369 | return GDK_SUCCEED; |
370 | } |
371 | |
372 | static gdk_return |
373 | recover_dir(int farmid, bool direxists) |
374 | { |
375 | if (direxists) { |
376 | /* just try; don't care about these non-vital files */ |
377 | if (GDKunlink(farmid, BATDIR, "BBP" , "bak" ) != GDK_SUCCEED) |
378 | fprintf(stderr, "#recover_dir: unlink of BBP.bak failed\n" ); |
379 | if (GDKmove(farmid, BATDIR, "BBP" , "dir" , BATDIR, "BBP" , "bak" ) != GDK_SUCCEED) |
380 | fprintf(stderr, "#recover_dir: rename of BBP.dir to BBP.bak failed\n" ); |
381 | } |
382 | return GDKmove(farmid, BAKDIR, "BBP" , "dir" , BATDIR, "BBP" , "dir" ); |
383 | } |
384 | |
385 | static gdk_return BBPrecover(int farmid); |
386 | static gdk_return BBPrecover_subdir(void); |
387 | static bool BBPdiskscan(const char *, size_t); |
388 | |
389 | #ifdef GDKLIBRARY_NIL_NAN |
390 | static gdk_return |
391 | fixfltheap(BAT *b) |
392 | { |
393 | long_str filename; |
394 | Heap h1; /* old heap */ |
395 | Heap h2; /* new heap */ |
396 | const char *nme, *bnme; |
397 | char *srcdir; |
398 | BUN i; |
399 | bool nofix = true; |
400 | |
401 | nme = BBP_physical(b->batCacheid); |
402 | srcdir = GDKfilepath(NOFARM, BATDIR, nme, NULL); |
403 | if (srcdir == NULL) { |
404 | return GDK_FAIL; |
405 | } |
406 | *strrchr(srcdir, DIR_SEP) = 0; |
407 | |
408 | if ((bnme = strrchr(nme, DIR_SEP)) != NULL) |
409 | bnme++; |
410 | else |
411 | bnme = nme; |
412 | sprintf(filename, "BACKUP%c%s" , DIR_SEP, bnme); |
413 | |
414 | /* make backup of heap */ |
415 | if (GDKmove(b->theap.farmid, srcdir, bnme, "tail" , BAKDIR, bnme, "tail" ) != GDK_SUCCEED) { |
416 | GDKfree(srcdir); |
417 | GDKerror("fixfltheap: cannot make backup of %s.tail\n" , nme); |
418 | return GDK_FAIL; |
419 | } |
420 | /* load old heap */ |
421 | h1 = b->theap; |
422 | strconcat_len(h1.filename, sizeof(h1.filename), |
423 | filename, ".tail" , NULL); |
424 | h1.base = NULL; |
425 | h1.dirty = false; |
426 | if (HEAPload(&h1, filename, "tail" , false) != GDK_SUCCEED) { |
427 | GDKfree(srcdir); |
428 | GDKerror("fixfltheap: loading old tail heap " |
429 | "for BAT %d failed\n" , b->batCacheid); |
430 | return GDK_FAIL; |
431 | } |
432 | |
433 | /* create new heap */ |
434 | h2 = b->theap; |
435 | strconcat_len(h2.filename, sizeof(h2.filename), nme, ".tail" , NULL); |
436 | if (HEAPalloc(&h2, b->batCapacity, b->twidth) != GDK_SUCCEED) { |
437 | GDKfree(srcdir); |
438 | HEAPfree(&h1, false); |
439 | GDKerror("fixfltheap: allocating new tail heap " |
440 | "for BAT %d failed\n" , b->batCacheid); |
441 | return GDK_FAIL; |
442 | } |
443 | h2.dirty = true; |
444 | h2.free = h1.free; |
445 | |
446 | switch (b->ttype) { |
447 | case TYPE_flt: { |
448 | const flt *restrict o = (const flt *) h1.base; |
449 | flt *restrict n = (flt *) h2.base; |
450 | |
451 | for (i = 0; i < b->batCount; i++) { |
452 | if (o[i] == GDK_flt_min) { |
453 | b->tnil = true; |
454 | n[i] = flt_nil; |
455 | nofix = false; |
456 | } else { |
457 | n[i] = o[i]; |
458 | } |
459 | } |
460 | break; |
461 | } |
462 | case TYPE_dbl: { |
463 | const dbl *restrict o = (const dbl *) h1.base; |
464 | dbl *restrict n = (dbl *) h2.base; |
465 | |
466 | for (i = 0; i < b->batCount; i++) { |
467 | if (o[i] == GDK_dbl_min) { |
468 | b->tnil = true; |
469 | n[i] = dbl_nil; |
470 | nofix = false; |
471 | } else { |
472 | n[i] = o[i]; |
473 | } |
474 | } |
475 | break; |
476 | } |
477 | default: { |
478 | struct mbr { |
479 | float xmin, ymin, xmax, ymax; |
480 | }; |
481 | const struct mbr *restrict o = (const struct mbr *) h1.base; |
482 | struct mbr *restrict n = (struct mbr *) h2.base; |
483 | |
484 | assert(strcmp(ATOMunknown_name(b->ttype), "mbr" ) == 0); |
485 | assert(b->twidth == 4 * sizeof(flt)); |
486 | |
487 | for (i = 0; i < b->batCount; i++) { |
488 | if (o[i].xmin == GDK_flt_min || |
489 | o[i].xmax == GDK_flt_min || |
490 | o[i].ymin == GDK_flt_min || |
491 | o[i].ymax == GDK_flt_min) { |
492 | b->tnil = true; |
493 | n[i].xmin = n[i].xmax = n[i].ymin = n[i].ymax = flt_nil; |
494 | nofix = false; |
495 | } else { |
496 | n[i] = o[i]; |
497 | } |
498 | } |
499 | break; |
500 | } |
501 | } |
502 | |
503 | /* cleanup */ |
504 | HEAPfree(&h1, false); |
505 | if (nofix) { |
506 | /* didn't fix anything, move backup back */ |
507 | HEAPfree(&h2, true); |
508 | if (GDKmove(b->theap.farmid, BAKDIR, bnme, "tail" , srcdir, bnme, "tail" ) != GDK_SUCCEED) { |
509 | GDKfree(srcdir); |
510 | GDKerror("fixfltheap: cannot restore backup of %s.tail\n" , nme); |
511 | return GDK_FAIL; |
512 | } |
513 | } else { |
514 | /* heap was fixed */ |
515 | b->batDirtydesc = true; |
516 | if (HEAPsave(&h2, nme, "tail" ) != GDK_SUCCEED) { |
517 | HEAPfree(&h2, false); |
518 | GDKfree(srcdir); |
519 | GDKerror("fixfltheap: saving heap failed\n" ); |
520 | return GDK_FAIL; |
521 | } |
522 | HEAPfree(&h2, false); |
523 | b->theap = h2; |
524 | } |
525 | GDKfree(srcdir); |
526 | return GDK_SUCCEED; |
527 | } |
528 | |
529 | static gdk_return |
530 | fixfloatbats(void) |
531 | { |
532 | bat bid; |
533 | BAT *b; |
534 | char filename[FILENAME_MAX]; |
535 | FILE *fp; |
536 | size_t len; |
537 | int written; |
538 | |
539 | for (bid = 1; bid < (bat) ATOMIC_GET(&BBPsize); bid++) { |
540 | if ((b = BBP_desc(bid)) == NULL) { |
541 | /* not a valid BAT */ |
542 | continue; |
543 | } |
544 | if (BBP_logical(bid) && |
545 | (len = strlen(BBP_logical(bid))) > 12 && |
546 | strcmp(BBP_logical(bid) + len - 12, "_catalog_nme" ) == 0) { |
547 | /* this is one of the files used by the |
548 | * logger. We need to communicate to the |
549 | * logger that it also needs to do a |
550 | * conversion. That is done by creating a |
551 | * file here based on the name of this BAT. */ |
552 | written = snprintf(filename, sizeof(filename), |
553 | "%s/%.*s_nil-nan-convert" , |
554 | BBPfarms[0].dirname, |
555 | (int) (len - 12), BBP_logical(bid)); |
556 | if (written == -1 || written >= FILENAME_MAX) { |
557 | GDKerror("fixfloatbats: cannot create file %s has a very large pathname\n" , |
558 | filename); |
559 | return GDK_FAIL; |
560 | } |
561 | fp = fopen(filename, "w" ); |
562 | if (fp == NULL) { |
563 | GDKsyserror("fixfloatbats: cannot create file %s\n" , |
564 | filename); |
565 | return GDK_FAIL; |
566 | } |
567 | fclose(fp); |
568 | } |
569 | if (b->batCount == 0 || b->tnonil) { |
570 | /* no NILs to convert */ |
571 | continue; |
572 | } |
573 | if (b->ttype < 0) { |
574 | const char *anme; |
575 | |
576 | /* as yet unknown tail column type */ |
577 | anme = ATOMunknown_name(b->ttype); |
578 | /* known string types */ |
579 | if (strcmp(anme, "mbr" ) != 0) |
580 | continue; |
581 | } else if (b->ttype != TYPE_flt && b->ttype != TYPE_dbl) |
582 | continue; |
583 | if (fixfltheap(b) != GDK_SUCCEED) |
584 | return GDK_FAIL; |
585 | } |
586 | return GDK_SUCCEED; |
587 | } |
588 | #endif |
589 | |
590 | #ifdef GDKLIBRARY_OLDDATE |
591 | #define leapyear(y) ((y) % 4 == 0 && ((y) % 100 != 0 || (y) % 400 == 0)) |
592 | #define YEARDAYS(y) (leapyear(y) ? 366 : 365) |
593 | static int CUMLEAPDAYS[13] = { |
594 | 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 |
595 | }; |
596 | static int CUMDAYS[13] = { |
597 | 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 |
598 | }; |
599 | static int |
600 | leapyears(int year) |
601 | { |
602 | /* count the 4-fold years that passed since jan-1-0 */ |
603 | int y4 = year / 4; |
604 | |
605 | /* count the 100-fold years */ |
606 | int y100 = year / 100; |
607 | |
608 | /* count the 400-fold years */ |
609 | int y400 = year / 400; |
610 | |
611 | return y4 + y400 - y100 + (year >= 0); /* may be negative */ |
612 | } |
613 | |
614 | #define YEAR_OFFSET 4712 |
615 | #define YEAR_MIN (-YEAR_OFFSET) |
616 | #define DTDAY_WIDTH 5 /* 1..28/29/30/31, depending on month */ |
617 | #define DTDAY_SHIFT 0 |
618 | #define DTMONTH_WIDTH 21 /* enough for 174761 years */ |
619 | #define DTMONTH_SHIFT (DTDAY_WIDTH+DTDAY_SHIFT) |
620 | #define YEAR_MAX (YEAR_MIN+(1<<DTMONTH_WIDTH)/12-1) |
621 | #define mkdate(d, m, y) (((((y) + YEAR_OFFSET) * 12 + (m) - 1) << DTMONTH_SHIFT) \ |
622 | | ((d) << DTDAY_SHIFT)) |
623 | #define TSTIME_WIDTH 37 /* [0..24*60*60*1000000) */ |
624 | #define TSTIME_SHIFT 0 |
625 | #define TSDATE_WIDTH (DTDAY_WIDTH+DTMONTH_WIDTH) |
626 | #define TSDATE_SHIFT (TSTIME_SHIFT+TSTIME_WIDTH) |
627 | #define mktimestamp(d, t) ((lng) (((uint64_t) (d) << TSDATE_SHIFT) | \ |
628 | ((uint64_t) (t) << TSTIME_SHIFT))) |
629 | |
630 | int |
631 | cvtdate(int n) |
632 | { |
633 | int day, month, year; |
634 | |
635 | year = n / 365; |
636 | day = (n - year * 365) - leapyears(year >= 0 ? year - 1 : year); |
637 | if (n < 0) { |
638 | year--; |
639 | while (day >= 0) { |
640 | year++; |
641 | day -= YEARDAYS(year); |
642 | } |
643 | day = YEARDAYS(year) + day; |
644 | } else { |
645 | while (day < 0) { |
646 | year--; |
647 | day += YEARDAYS(year); |
648 | } |
649 | } |
650 | |
651 | day++; |
652 | if (leapyear(year)) { |
653 | for (month = day / 31 == 0 ? 1 : day / 31; month <= 12; month++) |
654 | if (day > CUMLEAPDAYS[month - 1] && day <= CUMLEAPDAYS[month]) { |
655 | break; |
656 | } |
657 | day -= CUMLEAPDAYS[month - 1]; |
658 | } else { |
659 | for (month = day / 31 == 0 ? 1 : day / 31; month <= 12; month++) |
660 | if (day > CUMDAYS[month - 1] && day <= CUMDAYS[month]) { |
661 | break; |
662 | } |
663 | day -= CUMDAYS[month - 1]; |
664 | } |
665 | /* clamp date */ |
666 | if (year < YEAR_MIN) { |
667 | day = 1; |
668 | month = 1; |
669 | year = YEAR_MIN; |
670 | } else if (year > YEAR_MAX) { |
671 | day = 31; |
672 | month = 12; |
673 | year = YEAR_MAX; |
674 | } |
675 | return mkdate(day, month, year); |
676 | } |
677 | |
678 | static gdk_return |
679 | fixdateheap(BAT *b, const char *anme) |
680 | { |
681 | long_str filename; |
682 | Heap h1; /* old heap */ |
683 | Heap h2; /* new heap */ |
684 | const char *nme, *bnme; |
685 | char *srcdir; |
686 | BUN i; |
687 | bool nofix = true; |
688 | |
689 | nme = BBP_physical(b->batCacheid); |
690 | srcdir = GDKfilepath(NOFARM, BATDIR, nme, NULL); |
691 | if (srcdir == NULL) { |
692 | return GDK_FAIL; |
693 | } |
694 | *strrchr(srcdir, DIR_SEP) = 0; |
695 | |
696 | if ((bnme = strrchr(nme, DIR_SEP)) != NULL) |
697 | bnme++; |
698 | else |
699 | bnme = nme; |
700 | sprintf(filename, "BACKUP%c%s" , DIR_SEP, bnme); |
701 | |
702 | /* make backup of heap */ |
703 | if (GDKmove(b->theap.farmid, srcdir, bnme, "tail" , BAKDIR, bnme, "tail" ) != GDK_SUCCEED) { |
704 | GDKfree(srcdir); |
705 | GDKerror("fixdateheap: cannot make backup of %s.tail\n" , nme); |
706 | return GDK_FAIL; |
707 | } |
708 | /* load old heap */ |
709 | h1 = b->theap; |
710 | strconcat_len(h1.filename, sizeof(h1.filename), |
711 | filename, ".tail" , NULL); |
712 | h1.base = NULL; |
713 | h1.dirty = false; |
714 | if (HEAPload(&h1, filename, "tail" , false) != GDK_SUCCEED) { |
715 | GDKfree(srcdir); |
716 | GDKerror("fixdateheap: loading old tail heap " |
717 | "for BAT %d failed\n" , b->batCacheid); |
718 | return GDK_FAIL; |
719 | } |
720 | |
721 | /* create new heap */ |
722 | h2 = b->theap; |
723 | strconcat_len(h2.filename, sizeof(h2.filename), nme, ".tail" , NULL); |
724 | if (HEAPalloc(&h2, b->batCapacity, strcmp(anme, "date" ) == 0 ? 4 : 8) != GDK_SUCCEED) { |
725 | GDKfree(srcdir); |
726 | HEAPfree(&h1, false); |
727 | GDKerror("fixdateheap: allocating new tail heap " |
728 | "for BAT %d failed\n" , b->batCacheid); |
729 | return GDK_FAIL; |
730 | } |
731 | h2.dirty = true; |
732 | h2.free = h1.free; |
733 | |
734 | if (strcmp(anme, "date" ) == 0) { |
735 | const int *restrict o = (const int *) h1.base; |
736 | int *restrict n = (int *) h2.base; |
737 | |
738 | for (i = 0; i < b->batCount; i++) { |
739 | if (is_int_nil(o[i])) { |
740 | b->tnil = true; |
741 | n[i] = int_nil; |
742 | } else { |
743 | n[i] = cvtdate(o[i]); |
744 | nofix = false; |
745 | } |
746 | } |
747 | } else if (strcmp(anme, "timestamp" ) == 0) { |
748 | union timestamp { |
749 | lng l; |
750 | struct { |
751 | #ifndef WORDS_BIGENDIAN |
752 | int p_msecs; |
753 | int p_days; |
754 | #else |
755 | int p_days; |
756 | int p_msecs; |
757 | #endif |
758 | } t; |
759 | }; |
760 | const union timestamp *restrict o = (const union timestamp *) h1.base; |
761 | lng *restrict n = (lng *) h2.base; |
762 | for (i = 0; i < b->batCount; i++) { |
763 | if (is_lng_nil(o[i].l)) { |
764 | b->tnil = true; |
765 | n[i] = lng_nil; |
766 | } else { |
767 | n[i] = mktimestamp(cvtdate(o[i].t.p_days), |
768 | o[i].t.p_msecs * LL_CONSTANT(1000)); |
769 | nofix = false; |
770 | } |
771 | } |
772 | } else { |
773 | /* daytime */ |
774 | const int *restrict o = (const int *) h1.base; |
775 | lng *restrict n = (lng *) h2.base; |
776 | |
777 | h2.free <<= 1; |
778 | nofix = false; |
779 | for (i = 0; i < b->batCount; i++) { |
780 | if (is_int_nil(o[i])) { |
781 | b->tnil = true; |
782 | n[i] = lng_nil; |
783 | } else { |
784 | n[i] = o[i] * LL_CONSTANT(1000); |
785 | } |
786 | } |
787 | } |
788 | |
789 | /* cleanup */ |
790 | HEAPfree(&h1, false); |
791 | if (nofix) { |
792 | /* didn't fix anything, move backup back */ |
793 | HEAPfree(&h2, true); |
794 | if (GDKmove(b->theap.farmid, BAKDIR, bnme, "tail" , srcdir, bnme, "tail" ) != GDK_SUCCEED) { |
795 | GDKfree(srcdir); |
796 | GDKerror("fixdateheap: cannot restore backup of %s.tail\n" , nme); |
797 | return GDK_FAIL; |
798 | } |
799 | } else { |
800 | /* heap was fixed */ |
801 | b->batDirtydesc = true; |
802 | if (HEAPsave(&h2, nme, "tail" ) != GDK_SUCCEED) { |
803 | HEAPfree(&h2, false); |
804 | GDKfree(srcdir); |
805 | GDKerror("fixdateheap: saving heap failed\n" ); |
806 | return GDK_FAIL; |
807 | } |
808 | if (strcmp(anme, "daytime" ) == 0) { |
809 | b->twidth = 8; |
810 | b->tshift = 3; |
811 | } |
812 | HEAPfree(&h2, false); |
813 | b->theap = h2; |
814 | } |
815 | GDKfree(srcdir); |
816 | return GDK_SUCCEED; |
817 | } |
818 | |
819 | static gdk_return |
820 | fixdatebats(void) |
821 | { |
822 | bat bid; |
823 | BAT *b; |
824 | char filename[FILENAME_MAX]; |
825 | FILE *fp; |
826 | size_t len; |
827 | int written; |
828 | |
829 | for (bid = 1; bid < (bat) ATOMIC_GET(&BBPsize); bid++) { |
830 | if ((b = BBP_desc(bid)) == NULL) { |
831 | /* not a valid BAT */ |
832 | continue; |
833 | } |
834 | if (BBP_logical(bid) && |
835 | (len = strlen(BBP_logical(bid))) > 12 && |
836 | strcmp(BBP_logical(bid) + len - 12, "_catalog_nme" ) == 0) { |
837 | /* this is one of the files used by the |
838 | * logger. We need to communicate to the |
839 | * logger that it also needs to do a |
840 | * conversion. That is done by creating a |
841 | * file here based on the name of this BAT. */ |
842 | written = snprintf(filename, sizeof(filename), |
843 | "%s/%.*s_date-convert" , |
844 | BBPfarms[0].dirname, |
845 | (int) (len - 12), BBP_logical(bid)); |
846 | if (written == -1 || written >= FILENAME_MAX) { |
847 | GDKerror("fixdatebats: cannot create file %s has a very large pathname\n" , |
848 | filename); |
849 | return GDK_FAIL; |
850 | } |
851 | fp = fopen(filename, "w" ); |
852 | if (fp == NULL) { |
853 | GDKsyserror("fixdatebats: cannot create file %s\n" , |
854 | filename); |
855 | return GDK_FAIL; |
856 | } |
857 | fclose(fp); |
858 | } |
859 | /* The date type is not known in GDK when reading the BBP */ |
860 | if (b->ttype < 0) { |
861 | const char *anme; |
862 | |
863 | /* as yet unknown tail column type */ |
864 | anme = ATOMunknown_name(b->ttype); |
865 | /* known string types */ |
866 | if ((strcmp(anme, "date" ) == 0 || |
867 | strcmp(anme, "timestamp" ) == 0 || |
868 | strcmp(anme, "daytime" ) == 0) && |
869 | fixdateheap(b, anme) != GDK_SUCCEED) |
870 | return GDK_FAIL; |
871 | } |
872 | } |
873 | return GDK_SUCCEED; |
874 | } |
875 | #endif |
876 | |
877 | static int |
878 | heapinit(BAT *b, const char *buf, int *hashash, unsigned bbpversion, bat bid, const char *filename) |
879 | { |
880 | int t; |
881 | char type[33]; |
882 | uint16_t width; |
883 | uint16_t var; |
884 | uint16_t properties; |
885 | uint64_t nokey0; |
886 | uint64_t nokey1; |
887 | uint64_t nosorted; |
888 | uint64_t norevsorted; |
889 | uint64_t base; |
890 | uint64_t align; |
891 | uint64_t free; |
892 | uint64_t size; |
893 | uint16_t storage; |
894 | int n; |
895 | |
896 | (void) bbpversion; /* could be used to implement compatibility */ |
897 | |
898 | norevsorted = 0; /* default for first case */ |
899 | if (bbpversion <= GDKLIBRARY_TALIGN ? |
900 | sscanf(buf, |
901 | " %32s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64 |
902 | " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64 |
903 | " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu16 |
904 | "%n" , |
905 | type, &width, &var, &properties, &nokey0, |
906 | &nokey1, &nosorted, &norevsorted, &base, |
907 | &align, &free, &size, &storage, |
908 | &n) < 13 : |
909 | sscanf(buf, |
910 | " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64 |
911 | " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64 |
912 | " %" SCNu64 " %" SCNu64 " %" SCNu16 |
913 | "%n" , |
914 | type, &width, &var, &properties, &nokey0, |
915 | &nokey1, &nosorted, &norevsorted, &base, |
916 | &free, &size, &storage, |
917 | &n) < 12) { |
918 | GDKerror("BBPinit: invalid format for BBP.dir\n%s" , buf); |
919 | return -1; |
920 | } |
921 | |
922 | if (properties & ~0x0F81) { |
923 | GDKerror("BBPinit: unknown properties are set: incompatible database\n" ); |
924 | return -1; |
925 | } |
926 | *hashash = var & 2; |
927 | var &= ~2; |
928 | #ifdef HAVE_HGE |
929 | if (strcmp(type, "hge" ) == 0) |
930 | havehge = true; |
931 | #endif |
932 | /* sqlblob was changed to plain blob in the Apr2019 release */ |
933 | if (strcmp(type, "sqlblob" ) == 0) |
934 | strcpy(type, "blob" ); |
935 | if ((t = ATOMindex(type)) < 0) { |
936 | if ((t = ATOMunknown_find(type)) == 0) { |
937 | GDKerror("BBPinit: no space for atom %s" , type); |
938 | return -1; |
939 | } |
940 | } else if (var != (t == TYPE_void || BATatoms[t].atomPut != NULL)) { |
941 | GDKerror("BBPinit: inconsistent entry in BBP.dir: tvarsized mismatch for BAT %d\n" , (int) bid); |
942 | return -1; |
943 | } else if (var && t != 0 ? |
944 | ATOMsize(t) < width || |
945 | (width != 1 && width != 2 && width != 4 |
946 | #if SIZEOF_VAR_T == 8 |
947 | && width != 8 |
948 | #endif |
949 | ) : |
950 | ATOMsize(t) != width) { |
951 | GDKerror("BBPinit: inconsistent entry in BBP.dir: tsize mismatch for BAT %d\n" , (int) bid); |
952 | return -1; |
953 | } |
954 | b->ttype = t; |
955 | b->twidth = width; |
956 | b->tvarsized = var != 0; |
957 | b->tshift = ATOMelmshift(width); |
958 | assert_shift_width(b->tshift,b->twidth); |
959 | b->tnokey[0] = (BUN) nokey0; |
960 | b->tnokey[1] = (BUN) nokey1; |
961 | b->tsorted = (bit) ((properties & 0x0001) != 0); |
962 | b->trevsorted = (bit) ((properties & 0x0080) != 0); |
963 | b->tkey = (properties & 0x0100) != 0; |
964 | b->tnonil = (properties & 0x0400) != 0; |
965 | b->tnil = (properties & 0x0800) != 0; |
966 | b->tnosorted = (BUN) nosorted; |
967 | b->tnorevsorted = (BUN) norevsorted; |
968 | /* (properties & 0x0200) is the old tdense flag */ |
969 | b->tseqbase = (properties & 0x0200) == 0 || base >= (uint64_t) oid_nil ? oid_nil : (oid) base; |
970 | b->theap.free = (size_t) free; |
971 | b->theap.size = (size_t) size; |
972 | b->theap.base = NULL; |
973 | strconcat_len(b->theap.filename, sizeof(b->theap.filename), |
974 | filename, ".tail" , NULL); |
975 | b->theap.storage = (storage_t) storage; |
976 | b->theap.copied = false; |
977 | b->theap.newstorage = (storage_t) storage; |
978 | b->theap.farmid = BBPselectfarm(PERSISTENT, b->ttype, offheap); |
979 | b->theap.dirty = false; |
980 | #ifdef GDKLIBRARY_BLOB_SORT |
981 | if (bbpversion <= GDKLIBRARY_BLOB_SORT && strcmp(type, "blob" ) == 0) { |
982 | b->tsorted = b->trevsorted = false; |
983 | b->tnosorted = b->tnorevsorted = 0; |
984 | OIDXdestroy(b); |
985 | } |
986 | #endif |
987 | if (b->theap.free > b->theap.size) { |
988 | GDKerror("BBPinit: \"free\" value larger than \"size\" in heap of bat %d\n" , (int) bid); |
989 | return -1; |
990 | } |
991 | return n; |
992 | } |
993 | |
994 | static int |
995 | vheapinit(BAT *b, const char *buf, int hashash, bat bid, const char *filename) |
996 | { |
997 | int n = 0; |
998 | uint64_t free, size; |
999 | uint16_t storage; |
1000 | |
1001 | if (b->tvarsized && b->ttype != TYPE_void) { |
1002 | b->tvheap = GDKzalloc(sizeof(Heap)); |
1003 | if (b->tvheap == NULL) { |
1004 | GDKerror("BBPinit: cannot allocate memory for heap." ); |
1005 | return -1; |
1006 | } |
1007 | if (sscanf(buf, |
1008 | " %" SCNu64 " %" SCNu64 " %" SCNu16 |
1009 | "%n" , |
1010 | &free, &size, &storage, &n) < 3) { |
1011 | GDKerror("BBPinit: invalid format for BBP.dir\n%s" , buf); |
1012 | return -1; |
1013 | } |
1014 | b->tvheap->free = (size_t) free; |
1015 | b->tvheap->size = (size_t) size; |
1016 | b->tvheap->base = NULL; |
1017 | strconcat_len(b->tvheap->filename, sizeof(b->tvheap->filename), |
1018 | filename, ".theap" , NULL); |
1019 | b->tvheap->storage = (storage_t) storage; |
1020 | b->tvheap->copied = false; |
1021 | b->tvheap->hashash = hashash != 0; |
1022 | b->tvheap->cleanhash = true; |
1023 | b->tvheap->newstorage = (storage_t) storage; |
1024 | b->tvheap->dirty = false; |
1025 | b->tvheap->parentid = bid; |
1026 | b->tvheap->farmid = BBPselectfarm(PERSISTENT, b->ttype, varheap); |
1027 | if (b->tvheap->free > b->tvheap->size) { |
1028 | GDKerror("BBPinit: \"free\" value larger than \"size\" in var heap of bat %d\n" , (int) bid); |
1029 | return -1; |
1030 | } |
1031 | } |
1032 | return n; |
1033 | } |
1034 | |
1035 | static gdk_return |
1036 | BBPreadEntries(FILE *fp, unsigned bbpversion) |
1037 | { |
1038 | bat bid = 0; |
1039 | char buf[4096]; |
1040 | BAT *bn; |
1041 | |
1042 | /* read the BBP.dir and insert the BATs into the BBP */ |
1043 | while (fgets(buf, sizeof(buf), fp) != NULL) { |
1044 | uint64_t batid; |
1045 | uint16_t status; |
1046 | char headname[129]; |
1047 | char filename[20]; |
1048 | unsigned int properties; |
1049 | int nread, n; |
1050 | char *s, *options = NULL; |
1051 | char logical[1024]; |
1052 | uint64_t first = 0, count, capacity, base = 0; |
1053 | int Thashash; |
1054 | |
1055 | static_assert(sizeof(BBP_physical(0)) == sizeof(filename), |
1056 | "filename should be same size as BBPrec.physical" ); |
1057 | if ((s = strchr(buf, '\r')) != NULL) { |
1058 | /* convert \r\n into just \n */ |
1059 | if (s[1] != '\n') { |
1060 | GDKerror("BBPinit: invalid format for BBP.dir" ); |
1061 | return GDK_FAIL; |
1062 | } |
1063 | *s++ = '\n'; |
1064 | *s = 0; |
1065 | } |
1066 | |
1067 | if (sscanf(buf, |
1068 | "%" SCNu64 " %" SCNu16 " %128s %19s %u %" SCNu64 |
1069 | " %" SCNu64 " %" SCNu64 |
1070 | "%n" , |
1071 | &batid, &status, headname, filename, |
1072 | &properties, |
1073 | &count, &capacity, &base, |
1074 | &nread) < 8) { |
1075 | GDKerror("BBPinit: invalid format for BBP.dir\n%s" , buf); |
1076 | return GDK_FAIL; |
1077 | } |
1078 | |
1079 | if (batid >= N_BBPINIT * BBPINIT) { |
1080 | GDKerror("BBPinit: bat ID (%" PRIu64 ") too large to accomodate (max %d)." , batid, N_BBPINIT * BBPINIT - 1); |
1081 | return GDK_FAIL; |
1082 | } |
1083 | |
1084 | /* convert both / and \ path separators to our own DIR_SEP */ |
1085 | #if DIR_SEP != '/' |
1086 | s = filename; |
1087 | while ((s = strchr(s, '/')) != NULL) |
1088 | *s++ = DIR_SEP; |
1089 | #endif |
1090 | #if DIR_SEP != '\\' |
1091 | s = filename; |
1092 | while ((s = strchr(s, '\\')) != NULL) |
1093 | *s++ = DIR_SEP; |
1094 | #endif |
1095 | |
1096 | if (first != 0) { |
1097 | GDKerror("BBPinit: first != 0 (ID = %" PRIu64 ")." , |
1098 | batid); |
1099 | return GDK_FAIL; |
1100 | } |
1101 | |
1102 | bid = (bat) batid; |
1103 | if (batid >= (uint64_t) ATOMIC_GET(&BBPsize)) { |
1104 | ATOMIC_SET(&BBPsize, batid + 1); |
1105 | if ((bat) ATOMIC_GET(&BBPsize) >= BBPlimit) |
1106 | BBPextend(0, false); |
1107 | } |
1108 | if (BBP_desc(bid) != NULL) { |
1109 | GDKerror("BBPinit: duplicate entry in BBP.dir (ID = " |
1110 | "%" PRIu64 ")." , batid); |
1111 | return GDK_FAIL; |
1112 | } |
1113 | bn = GDKzalloc(sizeof(BAT)); |
1114 | if (bn == NULL) { |
1115 | GDKerror("BBPinit: cannot allocate memory for BAT." ); |
1116 | return GDK_FAIL; |
1117 | } |
1118 | bn->batCacheid = bid; |
1119 | if (BATroles(bn, NULL) != GDK_SUCCEED) { |
1120 | GDKfree(bn); |
1121 | GDKerror("BBPinit: BATroles failed." ); |
1122 | return GDK_FAIL; |
1123 | } |
1124 | bn->batTransient = false; |
1125 | bn->batCopiedtodisk = true; |
1126 | bn->batRestricted = (properties & 0x06) >> 1; |
1127 | bn->batCount = (BUN) count; |
1128 | bn->batInserted = bn->batCount; |
1129 | bn->batCapacity = (BUN) capacity; |
1130 | char name[16]; |
1131 | snprintf(name, sizeof(name), "BATlock%d" , bn->batCacheid); /* fits */ |
1132 | MT_lock_init(&bn->batIdxLock, name); |
1133 | |
1134 | if (base > (uint64_t) GDK_oid_max) { |
1135 | BATdestroy(bn); |
1136 | GDKerror("BBPinit: head seqbase out of range (ID = %" PRIu64 ", seq = %" PRIu64 ")." , batid, base); |
1137 | return GDK_FAIL; |
1138 | } |
1139 | bn->hseqbase = (oid) base; |
1140 | n = heapinit(bn, buf + nread, &Thashash, bbpversion, bid, filename); |
1141 | if (n < 0) { |
1142 | BATdestroy(bn); |
1143 | return GDK_FAIL; |
1144 | } |
1145 | nread += n; |
1146 | n = vheapinit(bn, buf + nread, Thashash, bid, filename); |
1147 | if (n < 0) { |
1148 | BATdestroy(bn); |
1149 | return GDK_FAIL; |
1150 | } |
1151 | nread += n; |
1152 | |
1153 | if (buf[nread] != '\n' && buf[nread] != ' ') { |
1154 | BATdestroy(bn); |
1155 | GDKerror("BBPinit: invalid format for BBP.dir\n%s" , buf); |
1156 | return GDK_FAIL; |
1157 | } |
1158 | if (buf[nread] == ' ') |
1159 | options = buf + nread + 1; |
1160 | |
1161 | if ((s = strchr(headname, '~')) != NULL && s == headname) { |
1162 | int len = snprintf(logical, sizeof(logical), "tmp_%o" , (unsigned) bid); |
1163 | if (len == -1 || len >= (int) sizeof(logical)) |
1164 | GDKfatal("BBPinit: BBP logical filename directory is too large\n" ); |
1165 | } else { |
1166 | if (s) |
1167 | *s = 0; |
1168 | strcpy_len(logical, headname, sizeof(logical)); |
1169 | } |
1170 | s = logical; |
1171 | BBP_logical(bid) = GDKstrdup(s); |
1172 | if (BBP_logical(bid) == NULL) { |
1173 | BATdestroy(bn); |
1174 | return GDK_FAIL; |
1175 | } |
1176 | /* tailname is ignored */ |
1177 | strcpy_len(BBP_physical(bid), filename, sizeof(BBP_physical(bid))); |
1178 | #ifdef STATIC_CODE_ANALYSIS |
1179 | /* help coverity */ |
1180 | BBP_physical(bid)[sizeof(BBP_physical(bid)) - 1] = 0; |
1181 | #endif |
1182 | BBP_options(bid) = NULL; |
1183 | if (options) |
1184 | BBP_options(bid) = GDKstrdup(options); |
1185 | BBP_refs(bid) = 0; |
1186 | BBP_lrefs(bid) = 1; /* any BAT we encounter here is persistent, so has a logical reference */ |
1187 | BBP_desc(bid) = bn; |
1188 | BBP_status(bid) = BBPEXISTING; /* do we need other status bits? */ |
1189 | } |
1190 | return GDK_SUCCEED; |
1191 | } |
1192 | |
1193 | /* check that the necessary files for all BATs exist and are large |
1194 | * enough */ |
1195 | static gdk_return |
1196 | BBPcheckbats(void) |
1197 | { |
1198 | for (bat bid = 1; bid < (bat) ATOMIC_GET(&BBPsize); bid++) { |
1199 | struct stat statb; |
1200 | BAT *b; |
1201 | char *path; |
1202 | |
1203 | if ((b = BBP_desc(bid)) == NULL) { |
1204 | /* not a valid BAT */ |
1205 | continue; |
1206 | } |
1207 | if (b->ttype == TYPE_void) { |
1208 | /* no files needed */ |
1209 | continue; |
1210 | } |
1211 | path = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), "tail" ); |
1212 | if (path == NULL) |
1213 | return GDK_FAIL; |
1214 | if (stat(path, &statb) < 0) { |
1215 | GDKsyserror("BBPcheckbats: cannot stat file %s\n" , |
1216 | path); |
1217 | GDKfree(path); |
1218 | return GDK_FAIL; |
1219 | } |
1220 | if ((size_t) statb.st_size < b->theap.free) { |
1221 | GDKerror("BBPcheckbats: file %s too small (expected %zu, actual %zu)\n" , path, b->theap.free, (size_t) statb.st_size); |
1222 | GDKfree(path); |
1223 | return GDK_FAIL; |
1224 | } |
1225 | GDKfree(path); |
1226 | if (b->tvheap != NULL) { |
1227 | path = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), "theap" ); |
1228 | if (path == NULL) |
1229 | return GDK_FAIL; |
1230 | if (stat(path, &statb) < 0) { |
1231 | GDKsyserror("BBPcheckbats: cannot stat file %s\n" , |
1232 | path); |
1233 | GDKfree(path); |
1234 | return GDK_FAIL; |
1235 | } |
1236 | if ((size_t) statb.st_size < b->tvheap->free) { |
1237 | GDKerror("BBPcheckbats: file %s too small (expected %zu, actual %zu)\n" , path, b->tvheap->free, (size_t) statb.st_size); |
1238 | GDKfree(path); |
1239 | return GDK_FAIL; |
1240 | } |
1241 | GDKfree(path); |
1242 | } |
1243 | } |
1244 | return GDK_SUCCEED; |
1245 | } |
1246 | |
1247 | #ifdef HAVE_HGE |
1248 | #define SIZEOF_MAX_INT SIZEOF_HGE |
1249 | #else |
1250 | #define SIZEOF_MAX_INT SIZEOF_LNG |
1251 | #endif |
1252 | |
1253 | static unsigned |
1254 | (FILE *fp) |
1255 | { |
1256 | char buf[BUFSIZ]; |
1257 | int sz, ptrsize, oidsize, intsize; |
1258 | unsigned bbpversion; |
1259 | |
1260 | if (fgets(buf, sizeof(buf), fp) == NULL) { |
1261 | GDKerror("BBPinit: BBP.dir is empty" ); |
1262 | return 0; |
1263 | } |
1264 | if (sscanf(buf, "BBP.dir, GDKversion %u\n" , &bbpversion) != 1) { |
1265 | GDKerror("BBPinit: old BBP without version number" ); |
1266 | GDKerror("dump the database using a compatible version," ); |
1267 | GDKerror("then restore into new database using this version.\n" ); |
1268 | return 0; |
1269 | } |
1270 | if (bbpversion != GDKLIBRARY && |
1271 | bbpversion != GDKLIBRARY_OLDDATE && |
1272 | bbpversion != GDKLIBRARY_BLOB_SORT && |
1273 | bbpversion != GDKLIBRARY_NIL_NAN && |
1274 | bbpversion != GDKLIBRARY_TALIGN) { |
1275 | GDKerror("BBPinit: incompatible BBP version: expected 0%o, got 0%o.\n" |
1276 | "This database was probably created by %s version of MonetDB." , |
1277 | GDKLIBRARY, bbpversion, |
1278 | bbpversion > GDKLIBRARY ? "a newer" : "a too old" ); |
1279 | return 0; |
1280 | } |
1281 | if (fgets(buf, sizeof(buf), fp) == NULL) { |
1282 | GDKerror("BBPinit: short BBP" ); |
1283 | return 0; |
1284 | } |
1285 | if (sscanf(buf, "%d %d %d" , &ptrsize, &oidsize, &intsize) != 3) { |
1286 | GDKerror("BBPinit: BBP.dir has incompatible format: pointer, OID, and max. integer sizes are missing" ); |
1287 | return 0; |
1288 | } |
1289 | if (ptrsize != SIZEOF_SIZE_T || oidsize != SIZEOF_OID) { |
1290 | GDKerror("BBPinit: database created with incompatible server:\n" |
1291 | "expected pointer size %d, got %d, expected OID size %d, got %d." , |
1292 | SIZEOF_SIZE_T, ptrsize, SIZEOF_OID, oidsize); |
1293 | return 0; |
1294 | } |
1295 | if (intsize > SIZEOF_MAX_INT) { |
1296 | GDKerror("BBPinit: database created with incompatible server:\n" |
1297 | "expected max. integer size %d, got %d." , |
1298 | SIZEOF_MAX_INT, intsize); |
1299 | return 0; |
1300 | } |
1301 | if (fgets(buf, sizeof(buf), fp) == NULL) { |
1302 | GDKerror("BBPinit: short BBP" ); |
1303 | return 0; |
1304 | } |
1305 | #ifdef GDKLIBRARY_TALIGN |
1306 | char *s; |
1307 | if ((s = strstr(buf, "BBPsize" )) != NULL) { |
1308 | if (sscanf(s, "BBPsize=%d" , &sz) != 1) { |
1309 | GDKerror("BBPinit: no BBPsize value found\n" ); |
1310 | return 0; |
1311 | } |
1312 | sz = (int) (sz * BATMARGIN); |
1313 | if (sz > (bat) ATOMIC_GET(&BBPsize)) |
1314 | ATOMIC_SET(&BBPsize, sz); |
1315 | } |
1316 | #else |
1317 | if (sscanf(buf, "BBPsize=%d" , &sz) != 1) { |
1318 | GDKerror("BBPinit: no BBPsize value found\n" ); |
1319 | return 0; |
1320 | } |
1321 | sz = (int) (sz * BATMARGIN); |
1322 | if (sz > (bat) ATOMIC_GET(&BBPsize)) |
1323 | ATOMIC_SET(&BBPsize, sz); |
1324 | #endif |
1325 | assert(bbpversion != 0); |
1326 | return bbpversion; |
1327 | } |
1328 | |
1329 | bool |
1330 | GDKinmemory(void) |
1331 | { |
1332 | return BBPfarms[0].dirname == NULL; |
1333 | } |
1334 | |
1335 | /* all errors are fatal */ |
1336 | gdk_return |
1337 | BBPaddfarm(const char *dirname, int rolemask) |
1338 | { |
1339 | struct stat st; |
1340 | int i; |
1341 | |
1342 | if (dirname == NULL) { |
1343 | assert(BBPfarms[0].dirname == NULL); |
1344 | assert(rolemask & 1); |
1345 | assert(BBPfarms[0].roles == 0); |
1346 | BBPfarms[0].roles = rolemask; |
1347 | return GDK_SUCCEED; |
1348 | } |
1349 | if (strchr(dirname, '\n') != NULL) { |
1350 | GDKerror("BBPaddfarm: no newline allowed in directory name\n" ); |
1351 | return GDK_FAIL; |
1352 | } |
1353 | if (rolemask == 0 || (rolemask & 1 && BBPfarms[0].dirname != NULL)) { |
1354 | GDKerror("BBPaddfarm: bad rolemask\n" ); |
1355 | return GDK_FAIL; |
1356 | } |
1357 | if (mkdir(dirname, MONETDB_DIRMODE) < 0) { |
1358 | if (errno == EEXIST) { |
1359 | if (stat(dirname, &st) == -1 || !S_ISDIR(st.st_mode)) { |
1360 | GDKerror("BBPaddfarm: %s: not a directory\n" , dirname); |
1361 | return GDK_FAIL; |
1362 | } |
1363 | } else { |
1364 | GDKerror("BBPaddfarm: %s: cannot create directory\n" , dirname); |
1365 | return GDK_FAIL; |
1366 | } |
1367 | } |
1368 | for (i = 0; i < MAXFARMS; i++) { |
1369 | if (BBPfarms[i].dirname == NULL) { |
1370 | BBPfarms[i].dirname = GDKstrdup(dirname); |
1371 | if (BBPfarms[i].dirname == NULL) |
1372 | return GDK_FAIL; |
1373 | BBPfarms[i].roles = rolemask; |
1374 | if ((rolemask & 1) == 0) { |
1375 | char *bbpdir; |
1376 | int j; |
1377 | |
1378 | for (j = 0; j < i; j++) |
1379 | if (strcmp(BBPfarms[i].dirname, |
1380 | BBPfarms[j].dirname) == 0) |
1381 | return GDK_SUCCEED; |
1382 | /* if an extra farm, make sure we |
1383 | * don't find a BBP.dir there that |
1384 | * might belong to an existing |
1385 | * database */ |
1386 | bbpdir = GDKfilepath(i, BATDIR, "BBP" , "dir" ); |
1387 | if (bbpdir == NULL) { |
1388 | GDKerror("BBPaddfarm: malloc failed\n" ); |
1389 | return GDK_FAIL; |
1390 | } |
1391 | if (stat(bbpdir, &st) != -1 || errno != ENOENT) { |
1392 | GDKfree(bbpdir); |
1393 | GDKerror("BBPaddfarm: %s is a database\n" , dirname); |
1394 | return GDK_FAIL; |
1395 | } |
1396 | GDKfree(bbpdir); |
1397 | bbpdir = GDKfilepath(i, BAKDIR, "BBP" , "dir" ); |
1398 | if (bbpdir == NULL) { |
1399 | GDKerror("BBPaddfarm: malloc failed\n" ); |
1400 | return GDK_FAIL; |
1401 | } |
1402 | if (stat(bbpdir, &st) != -1 || errno != ENOENT) { |
1403 | GDKfree(bbpdir); |
1404 | GDKerror("BBPaddfarm: %s is a database\n" , dirname); |
1405 | return GDK_FAIL; |
1406 | } |
1407 | GDKfree(bbpdir); |
1408 | } |
1409 | return GDK_SUCCEED; |
1410 | } |
1411 | } |
1412 | GDKerror("BBPaddfarm: too many farms\n" ); |
1413 | return GDK_FAIL; |
1414 | } |
1415 | |
1416 | gdk_return |
1417 | BBPinit(void) |
1418 | { |
1419 | FILE *fp = NULL; |
1420 | struct stat st; |
1421 | unsigned bbpversion = 0; |
1422 | int i; |
1423 | |
1424 | if (!GDKinmemory()) { |
1425 | str bbpdirstr, backupbbpdirstr; |
1426 | |
1427 | if (!(bbpdirstr = GDKfilepath(0, BATDIR, "BBP" , "dir" ))) { |
1428 | GDKerror("BBPinit: GDKmalloc failed\n" ); |
1429 | return GDK_FAIL; |
1430 | } |
1431 | |
1432 | if (!(backupbbpdirstr = GDKfilepath(0, BAKDIR, "BBP" , "dir" ))) { |
1433 | GDKfree(bbpdirstr); |
1434 | GDKerror("BBPinit: GDKmalloc failed\n" ); |
1435 | return GDK_FAIL; |
1436 | } |
1437 | |
1438 | if (GDKremovedir(0, TEMPDIR) != GDK_SUCCEED) { |
1439 | GDKfree(bbpdirstr); |
1440 | GDKfree(backupbbpdirstr); |
1441 | GDKerror("BBPinit: cannot remove directory %s\n" , TEMPDIR); |
1442 | return GDK_FAIL; |
1443 | } |
1444 | |
1445 | if (GDKremovedir(0, DELDIR) != GDK_SUCCEED) { |
1446 | GDKfree(bbpdirstr); |
1447 | GDKfree(backupbbpdirstr); |
1448 | GDKerror("BBPinit: cannot remove directory %s\n" , DELDIR); |
1449 | return GDK_FAIL; |
1450 | } |
1451 | |
1452 | /* first move everything from SUBDIR to BAKDIR (its parent) */ |
1453 | if (BBPrecover_subdir() != GDK_SUCCEED) { |
1454 | GDKfree(bbpdirstr); |
1455 | GDKfree(backupbbpdirstr); |
1456 | GDKerror("BBPinit: cannot properly recover_subdir process %s. Please check whether your disk is full or write-protected" , SUBDIR); |
1457 | return GDK_FAIL; |
1458 | } |
1459 | |
1460 | /* try to obtain a BBP.dir from bakdir */ |
1461 | if (stat(backupbbpdirstr, &st) == 0) { |
1462 | /* backup exists; *must* use it */ |
1463 | if (recover_dir(0, stat(bbpdirstr, &st) == 0) != GDK_SUCCEED) |
1464 | goto bailout; |
1465 | if ((fp = GDKfilelocate(0, "BBP" , "r" , "dir" )) == NULL) { |
1466 | GDKfree(bbpdirstr); |
1467 | GDKfree(backupbbpdirstr); |
1468 | GDKerror("BBPinit: cannot open recovered BBP.dir." ); |
1469 | return GDK_FAIL; |
1470 | } |
1471 | } else if ((fp = GDKfilelocate(0, "BBP" , "r" , "dir" )) == NULL) { |
1472 | /* there was no BBP.dir either. Panic! try to use a |
1473 | * BBP.bak */ |
1474 | if (stat(backupbbpdirstr, &st) < 0) { |
1475 | /* no BBP.bak (nor BBP.dir or BACKUP/BBP.dir): |
1476 | * create a new one */ |
1477 | IODEBUG fprintf(stderr, "#BBPdir: initializing BBP.\n" ); /* BBPdir instead of BBPinit for backward compatibility of error messages */ |
1478 | if (BBPdir(0, NULL) != GDK_SUCCEED) |
1479 | goto bailout; |
1480 | } else if (GDKmove(0, BATDIR, "BBP" , "bak" , BATDIR, "BBP" , "dir" ) == GDK_SUCCEED) |
1481 | IODEBUG fprintf(stderr, "#BBPinit: reverting to dir saved in BBP.bak.\n" ); |
1482 | |
1483 | if ((fp = GDKfilelocate(0, "BBP" , "r" , "dir" )) == NULL) |
1484 | goto bailout; |
1485 | } |
1486 | assert(fp != NULL); |
1487 | GDKfree(bbpdirstr); |
1488 | GDKfree(backupbbpdirstr); |
1489 | } |
1490 | |
1491 | /* scan the BBP.dir to obtain current size */ |
1492 | BBPlimit = 0; |
1493 | memset(BBP, 0, sizeof(BBP)); |
1494 | ATOMIC_SET(&BBPsize, 1); |
1495 | |
1496 | if (GDKinmemory()) { |
1497 | bbpversion = GDKLIBRARY; |
1498 | } else { |
1499 | bbpversion = BBPheader(fp); |
1500 | if (bbpversion == 0) |
1501 | return GDK_FAIL; |
1502 | } |
1503 | |
1504 | BBPextend(0, false); /* allocate BBP records */ |
1505 | |
1506 | if (!GDKinmemory()) { |
1507 | ATOMIC_SET(&BBPsize, 1); |
1508 | if (BBPreadEntries(fp, bbpversion) != GDK_SUCCEED) |
1509 | return GDK_FAIL; |
1510 | fclose(fp); |
1511 | } |
1512 | |
1513 | if (BBPinithash(0) != GDK_SUCCEED) { |
1514 | GDKerror("BBPinit: BBPinithash failed" ); |
1515 | return GDK_FAIL; |
1516 | } |
1517 | |
1518 | /* will call BBPrecover if needed */ |
1519 | if (!GDKinmemory() && BBPprepare(false) != GDK_SUCCEED) { |
1520 | GDKerror("BBPinit: cannot properly prepare process %s. Please check whether your disk is full or write-protected" , BAKDIR); |
1521 | return GDK_FAIL; |
1522 | } |
1523 | |
1524 | if (BBPcheckbats() != GDK_SUCCEED) |
1525 | return GDK_FAIL; |
1526 | |
1527 | /* cleanup any leftovers (must be done after BBPrecover) */ |
1528 | for (i = 0; i < MAXFARMS && BBPfarms[i].dirname != NULL; i++) { |
1529 | int j; |
1530 | for (j = 0; j < i; j++) { |
1531 | /* don't clean a directory twice */ |
1532 | if (BBPfarms[j].dirname && |
1533 | strcmp(BBPfarms[i].dirname, |
1534 | BBPfarms[j].dirname) == 0) |
1535 | break; |
1536 | } |
1537 | if (j == i) { |
1538 | char *d = GDKfilepath(i, NULL, BATDIR, NULL); |
1539 | if (d == NULL) { |
1540 | GDKerror("BBPinit: malloc failed\n" ); |
1541 | return GDK_FAIL; |
1542 | } |
1543 | BBPdiskscan(d, strlen(d) - strlen(BATDIR)); |
1544 | GDKfree(d); |
1545 | } |
1546 | } |
1547 | |
1548 | #ifdef GDKLIBRARY_NIL_NAN |
1549 | if (bbpversion <= GDKLIBRARY_NIL_NAN) |
1550 | if (fixfloatbats() != GDK_SUCCEED) |
1551 | return GDK_FAIL; |
1552 | #endif |
1553 | #ifdef GDKLIBRARY_OLDDATE |
1554 | if (bbpversion <= GDKLIBRARY_OLDDATE) |
1555 | if (fixdatebats() != GDK_SUCCEED) |
1556 | return GDK_FAIL; |
1557 | #endif |
1558 | if (bbpversion < GDKLIBRARY) |
1559 | TMcommit(); |
1560 | return GDK_SUCCEED; |
1561 | |
1562 | bailout: |
1563 | /* now it is time for real panic */ |
1564 | GDKerror("BBPinit: could not write %s%cBBP.dir. Please check whether your disk is full or write-protected" , BATDIR, DIR_SEP); |
1565 | return GDK_FAIL; |
1566 | } |
1567 | |
1568 | /* |
1569 | * During the exit phase all non-persistent BATs are removed. Upon |
1570 | * exit the status of the BBP tables is saved on disk. This function |
1571 | * is called once and during the shutdown of the server. Since |
1572 | * shutdown may be issued from any thread (dangerous) it may lead to |
1573 | * interference in a parallel session. |
1574 | */ |
1575 | |
1576 | static int backup_files = 0, backup_dir = 0, backup_subdir = 0; |
1577 | |
1578 | void |
1579 | BBPexit(void) |
1580 | { |
1581 | bat i; |
1582 | bool skipped; |
1583 | |
1584 | BBPlock(); /* stop all threads ever touching more descriptors */ |
1585 | |
1586 | /* free all memory (just for leak-checking in Purify) */ |
1587 | do { |
1588 | skipped = false; |
1589 | for (i = 0; i < (bat) ATOMIC_GET(&BBPsize); i++) { |
1590 | if (BBPvalid(i)) { |
1591 | BAT *b = BBP_desc(i); |
1592 | |
1593 | if (b) { |
1594 | if (b->batSharecnt > 0) { |
1595 | skipped = true; |
1596 | continue; |
1597 | } |
1598 | if (isVIEW(b)) { |
1599 | /* "manually" |
1600 | * decrement parent |
1601 | * references, since |
1602 | * VIEWdestroy doesn't |
1603 | * (and can't here due |
1604 | * to locks) do it */ |
1605 | bat tp = VIEWtparent(b); |
1606 | bat vtp = VIEWvtparent(b); |
1607 | if (tp) { |
1608 | BBP_desc(tp)->batSharecnt--; |
1609 | --BBP_lrefs(tp); |
1610 | } |
1611 | if (vtp) { |
1612 | BBP_desc(vtp)->batSharecnt--; |
1613 | --BBP_lrefs(vtp); |
1614 | } |
1615 | VIEWdestroy(b); |
1616 | } else { |
1617 | BATfree(b); |
1618 | } |
1619 | } |
1620 | BBPuncacheit(i, true); |
1621 | if (BBP_logical(i) != BBP_bak(i)) |
1622 | GDKfree(BBP_logical(i)); |
1623 | BBP_logical(i) = NULL; |
1624 | } |
1625 | } |
1626 | } while (skipped); |
1627 | GDKfree(BBP_hash); |
1628 | BBP_hash = 0; |
1629 | // these need to be NULL, otherwise no new ones get created |
1630 | backup_files = 0; |
1631 | backup_dir = 0; |
1632 | backup_subdir = 0; |
1633 | |
1634 | } |
1635 | |
1636 | /* |
1637 | * The routine BBPdir creates the BAT pool dictionary file. It |
1638 | * includes some information about the current state of affair in the |
1639 | * pool. The location in the buffer pool is saved for later use as |
1640 | * well. This is merely done for ease of debugging and of no |
1641 | * importance to front-ends. The tail of non-used entries is |
1642 | * reclaimed as well. |
1643 | */ |
1644 | static inline int |
1645 | heap_entry(FILE *fp, BAT *b) |
1646 | { |
1647 | return fprintf(fp, " %s %d %d %d " BUNFMT " " BUNFMT " " BUNFMT " " |
1648 | BUNFMT " " OIDFMT " %zu %zu %d" , |
1649 | b->ttype >= 0 ? BATatoms[b->ttype].name : ATOMunknown_name(b->ttype), |
1650 | b->twidth, |
1651 | b->tvarsized | (b->tvheap ? b->tvheap->hashash << 1 : 0), |
1652 | (unsigned short) b->tsorted | |
1653 | ((unsigned short) b->trevsorted << 7) | |
1654 | (((unsigned short) b->tkey & 0x01) << 8) | |
1655 | ((unsigned short) BATtdense(b) << 9) | |
1656 | ((unsigned short) b->tnonil << 10) | |
1657 | ((unsigned short) b->tnil << 11), |
1658 | b->tnokey[0], |
1659 | b->tnokey[1], |
1660 | b->tnosorted, |
1661 | b->tnorevsorted, |
1662 | b->tseqbase, |
1663 | b->theap.free, |
1664 | b->theap.size, |
1665 | (int) b->theap.newstorage); |
1666 | } |
1667 | |
1668 | static inline int |
1669 | vheap_entry(FILE *fp, Heap *h) |
1670 | { |
1671 | if (h == NULL) |
1672 | return 0; |
1673 | return fprintf(fp, " %zu %zu %d" , |
1674 | h->free, h->size, (int) h->newstorage); |
1675 | } |
1676 | |
1677 | static gdk_return |
1678 | new_bbpentry(FILE *fp, bat i, const char *prefix) |
1679 | { |
1680 | #ifndef NDEBUG |
1681 | assert(i > 0); |
1682 | assert(i < (bat) ATOMIC_GET(&BBPsize)); |
1683 | assert(BBP_desc(i)); |
1684 | assert(BBP_desc(i)->batCacheid == i); |
1685 | assert(BBP_desc(i)->batRole == PERSISTENT); |
1686 | assert(0 <= BBP_desc(i)->theap.farmid && BBP_desc(i)->theap.farmid < MAXFARMS); |
1687 | assert(BBPfarms[BBP_desc(i)->theap.farmid].roles & (1 << PERSISTENT)); |
1688 | if (BBP_desc(i)->tvheap) { |
1689 | assert(0 <= BBP_desc(i)->tvheap->farmid && BBP_desc(i)->tvheap->farmid < MAXFARMS); |
1690 | assert(BBPfarms[BBP_desc(i)->tvheap->farmid].roles & (1 << PERSISTENT)); |
1691 | } |
1692 | #endif |
1693 | |
1694 | if (fprintf(fp, "%s%zd %u %s %s %d " BUNFMT " " |
1695 | BUNFMT " " OIDFMT, prefix, |
1696 | /* BAT info */ |
1697 | (ssize_t) i, |
1698 | BBP_status(i) & BBPPERSISTENT, |
1699 | BBP_logical(i), |
1700 | BBP_physical(i), |
1701 | BBP_desc(i)->batRestricted << 1, |
1702 | BBP_desc(i)->batCount, |
1703 | BBP_desc(i)->batCapacity, |
1704 | BBP_desc(i)->hseqbase) < 0 || |
1705 | heap_entry(fp, BBP_desc(i)) < 0 || |
1706 | vheap_entry(fp, BBP_desc(i)->tvheap) < 0 || |
1707 | (BBP_options(i) && |
1708 | fprintf(fp, " %s" , BBP_options(i)) < 0) || |
1709 | fprintf(fp, "\n" ) < 0) { |
1710 | GDKsyserror("new_bbpentry: Writing BBP.dir entry failed\n" ); |
1711 | return GDK_FAIL; |
1712 | } |
1713 | |
1714 | return GDK_SUCCEED; |
1715 | } |
1716 | |
1717 | static gdk_return |
1718 | (FILE *f, int n) |
1719 | { |
1720 | if (fprintf(f, "BBP.dir, GDKversion %u\n%d %d %d\nBBPsize=%d\n" , |
1721 | GDKLIBRARY, SIZEOF_SIZE_T, SIZEOF_OID, |
1722 | #ifdef HAVE_HGE |
1723 | havehge ? SIZEOF_HGE : |
1724 | #endif |
1725 | SIZEOF_LNG, n) < 0 || |
1726 | ferror(f)) { |
1727 | GDKsyserror("BBPdir_header: Writing BBP.dir header failed\n" ); |
1728 | return GDK_FAIL; |
1729 | } |
1730 | return GDK_SUCCEED; |
1731 | } |
1732 | |
1733 | static gdk_return |
1734 | BBPdir_subcommit(int cnt, bat *subcommit) |
1735 | { |
1736 | FILE *obbpf, *nbbpf; |
1737 | bat j = 1; |
1738 | char buf[3000]; |
1739 | int n; |
1740 | |
1741 | #ifndef NDEBUG |
1742 | assert(subcommit != NULL); |
1743 | for (n = 2; n < cnt; n++) |
1744 | assert(subcommit[n - 1] < subcommit[n]); |
1745 | #endif |
1746 | |
1747 | if ((nbbpf = GDKfilelocate(0, "BBP" , "w" , "dir" )) == NULL) |
1748 | return GDK_FAIL; |
1749 | |
1750 | n = (bat) ATOMIC_GET(&BBPsize); |
1751 | |
1752 | /* we need to copy the backup BBP.dir to the new, but |
1753 | * replacing the entries for the subcommitted bats */ |
1754 | if ((obbpf = GDKfileopen(0, SUBDIR, "BBP" , "dir" , "r" )) == NULL && |
1755 | (obbpf = GDKfileopen(0, BAKDIR, "BBP" , "dir" , "r" )) == NULL) { |
1756 | GDKerror("BBPdir: subcommit attempted without backup BBP.dir." ); |
1757 | return GDK_FAIL; |
1758 | } |
1759 | /* read first three lines */ |
1760 | if (fgets(buf, sizeof(buf), obbpf) == NULL || /* BBP.dir, GDKversion %d */ |
1761 | fgets(buf, sizeof(buf), obbpf) == NULL || /* SIZEOF_SIZE_T SIZEOF_OID SIZEOF_MAX_INT */ |
1762 | fgets(buf, sizeof(buf), obbpf) == NULL) { /* BBPsize=%d */ |
1763 | GDKerror("BBPdir: subcommit attempted with invalid backup BBP.dir." ); |
1764 | return GDK_FAIL; |
1765 | } |
1766 | /* third line contains BBPsize */ |
1767 | sscanf(buf, "BBPsize=%d" , &n); |
1768 | if (n < (bat) ATOMIC_GET(&BBPsize)) |
1769 | n = (bat) ATOMIC_GET(&BBPsize); |
1770 | |
1771 | IODEBUG fprintf(stderr, "#BBPdir: writing BBP.dir (%d bats).\n" , n); |
1772 | |
1773 | if (BBPdir_header(nbbpf, n) != GDK_SUCCEED) { |
1774 | goto bailout; |
1775 | } |
1776 | n = 0; |
1777 | for (;;) { |
1778 | /* but for subcommits, all except the bats in the list |
1779 | * retain their existing mode */ |
1780 | if (n == 0 && obbpf != NULL) { |
1781 | if (fgets(buf, sizeof(buf), obbpf) == NULL) { |
1782 | fclose(obbpf); |
1783 | obbpf = NULL; |
1784 | } else if (sscanf(buf, "%d" , &n) != 1 || n <= 0) { |
1785 | GDKerror("BBPdir: subcommit attempted with invalid backup BBP.dir." ); |
1786 | return GDK_FAIL; |
1787 | } |
1788 | /* at this point, obbpf == NULL, or n > 0 */ |
1789 | } |
1790 | if (j == cnt && n == 0) { |
1791 | assert(obbpf == NULL); |
1792 | break; |
1793 | } |
1794 | if (j < cnt && (n == 0 || subcommit[j] <= n || obbpf == NULL)) { |
1795 | bat i = subcommit[j]; |
1796 | /* BBP.dir consists of all persistent bats only */ |
1797 | if (BBP_status(i) & BBPPERSISTENT) { |
1798 | if (new_bbpentry(nbbpf, i, "" ) != GDK_SUCCEED) { |
1799 | goto bailout; |
1800 | } |
1801 | IODEBUG new_bbpentry(stderr, i, "#" ); |
1802 | } |
1803 | if (i == n) |
1804 | n = 0; /* read new entry (i.e. skip this one from old BBP.dir */ |
1805 | do |
1806 | /* go to next, skipping duplicates */ |
1807 | j++; |
1808 | while (j < cnt && subcommit[j] == i); |
1809 | } else { |
1810 | if (fprintf(nbbpf, "%s" , buf) < 0) { |
1811 | GDKsyserror("BBPdir_subcommit: Copying BBP.dir entry failed\n" ); |
1812 | goto bailout; |
1813 | } |
1814 | IODEBUG fprintf(stderr, "#%s" , buf); |
1815 | n = 0; |
1816 | } |
1817 | } |
1818 | |
1819 | if (fflush(nbbpf) == EOF || |
1820 | (!(GDKdebug & NOSYNCMASK) |
1821 | #if defined(NATIVE_WIN32) |
1822 | && _commit(_fileno(nbbpf)) < 0 |
1823 | #elif defined(HAVE_FDATASYNC) |
1824 | && fdatasync(fileno(nbbpf)) < 0 |
1825 | #elif defined(HAVE_FSYNC) |
1826 | && fsync(fileno(nbbpf)) < 0 |
1827 | #endif |
1828 | )) { |
1829 | GDKsyserror("BBPdir_subcommit: Syncing BBP.dir file failed\n" ); |
1830 | goto bailout; |
1831 | } |
1832 | if (fclose(nbbpf) == EOF) { |
1833 | GDKsyserror("BBPdir_subcommit: Closing BBP.dir file failed\n" ); |
1834 | goto bailout; |
1835 | } |
1836 | |
1837 | IODEBUG fprintf(stderr, "#BBPdir end\n" ); |
1838 | |
1839 | return GDK_SUCCEED; |
1840 | |
1841 | bailout: |
1842 | if (obbpf != NULL) |
1843 | fclose(obbpf); |
1844 | if (nbbpf != NULL) |
1845 | fclose(nbbpf); |
1846 | return GDK_FAIL; |
1847 | } |
1848 | |
1849 | gdk_return |
1850 | BBPdir(int cnt, bat *subcommit) |
1851 | { |
1852 | FILE *fp; |
1853 | bat i; |
1854 | |
1855 | if (subcommit) |
1856 | return BBPdir_subcommit(cnt, subcommit); |
1857 | |
1858 | IODEBUG fprintf(stderr, "#BBPdir: writing BBP.dir (%d bats).\n" , (int) (bat) ATOMIC_GET(&BBPsize)); |
1859 | if ((fp = GDKfilelocate(0, "BBP" , "w" , "dir" )) == NULL) { |
1860 | goto bailout; |
1861 | } |
1862 | |
1863 | if (BBPdir_header(fp, (bat) ATOMIC_GET(&BBPsize)) != GDK_SUCCEED) { |
1864 | goto bailout; |
1865 | } |
1866 | |
1867 | for (i = 1; i < (bat) ATOMIC_GET(&BBPsize); i++) { |
1868 | /* write the entry |
1869 | * BBP.dir consists of all persistent bats */ |
1870 | if (BBP_status(i) & BBPPERSISTENT) { |
1871 | if (new_bbpentry(fp, i, "" ) != GDK_SUCCEED) { |
1872 | goto bailout; |
1873 | } |
1874 | IODEBUG new_bbpentry(stderr, i, "#" ); |
1875 | } |
1876 | } |
1877 | |
1878 | if (fflush(fp) == EOF || |
1879 | (!(GDKdebug & NOSYNCMASK) |
1880 | #if defined(NATIVE_WIN32) |
1881 | && _commit(_fileno(fp)) < 0 |
1882 | #elif defined(HAVE_FDATASYNC) |
1883 | && fdatasync(fileno(fp)) < 0 |
1884 | #elif defined(HAVE_FSYNC) |
1885 | && fsync(fileno(fp)) < 0 |
1886 | #endif |
1887 | )) { |
1888 | GDKsyserror("BBPdir: Syncing BBP.dir file failed\n" ); |
1889 | goto bailout; |
1890 | } |
1891 | if (fclose(fp) == EOF) { |
1892 | GDKsyserror("BBPdir: Closing BBP.dir file failed\n" ); |
1893 | return GDK_FAIL; |
1894 | } |
1895 | |
1896 | IODEBUG fprintf(stderr, "#BBPdir end\n" ); |
1897 | |
1898 | if (i < (bat) ATOMIC_GET(&BBPsize)) |
1899 | return GDK_FAIL; |
1900 | |
1901 | return GDK_SUCCEED; |
1902 | |
1903 | bailout: |
1904 | if (fp != NULL) |
1905 | fclose(fp); |
1906 | return GDK_FAIL; |
1907 | } |
1908 | |
1909 | /* function used for debugging */ |
1910 | void |
1911 | BBPdump(void) |
1912 | { |
1913 | bat i; |
1914 | size_t mem = 0, vm = 0; |
1915 | size_t cmem = 0, cvm = 0; |
1916 | int n = 0, nc = 0; |
1917 | |
1918 | for (i = 0; i < (bat) ATOMIC_GET(&BBPsize); i++) { |
1919 | BAT *b = BBP_cache(i); |
1920 | if (b == NULL) |
1921 | continue; |
1922 | fprintf(stderr, |
1923 | "# %d[%s]: nme='%s' refs=%d lrefs=%d " |
1924 | "status=%u count=" BUNFMT, |
1925 | i, |
1926 | ATOMname(b->ttype), |
1927 | BBP_logical(i) ? BBP_logical(i) : "<NULL>" , |
1928 | BBP_refs(i), |
1929 | BBP_lrefs(i), |
1930 | BBP_status(i), |
1931 | b->batCount); |
1932 | if (b->batSharecnt > 0) |
1933 | fprintf(stderr, " shares=%d" , b->batSharecnt); |
1934 | if (b->batDirtydesc) |
1935 | fprintf(stderr, " DirtyDesc" ); |
1936 | if (b->theap.parentid) { |
1937 | fprintf(stderr, " Theap -> %d" , b->theap.parentid); |
1938 | } else { |
1939 | fprintf(stderr, |
1940 | " Theap=[%zu,%zu]%s" , |
1941 | HEAPmemsize(&b->theap), |
1942 | HEAPvmsize(&b->theap), |
1943 | b->theap.dirty ? "(Dirty)" : "" ); |
1944 | if (BBP_logical(i) && BBP_logical(i)[0] == '.') { |
1945 | cmem += HEAPmemsize(&b->theap); |
1946 | cvm += HEAPvmsize(&b->theap); |
1947 | nc++; |
1948 | } else { |
1949 | mem += HEAPmemsize(&b->theap); |
1950 | vm += HEAPvmsize(&b->theap); |
1951 | n++; |
1952 | } |
1953 | } |
1954 | if (b->tvheap) { |
1955 | if (b->tvheap->parentid != b->batCacheid) { |
1956 | fprintf(stderr, |
1957 | " Tvheap -> %d" , |
1958 | b->tvheap->parentid); |
1959 | } else { |
1960 | fprintf(stderr, |
1961 | " Tvheap=[%zu,%zu]%s" , |
1962 | HEAPmemsize(b->tvheap), |
1963 | HEAPvmsize(b->tvheap), |
1964 | b->tvheap->dirty ? "(Dirty)" : "" ); |
1965 | if (BBP_logical(i) && BBP_logical(i)[0] == '.') { |
1966 | cmem += HEAPmemsize(b->tvheap); |
1967 | cvm += HEAPvmsize(b->tvheap); |
1968 | } else { |
1969 | mem += HEAPmemsize(b->tvheap); |
1970 | vm += HEAPvmsize(b->tvheap); |
1971 | } |
1972 | } |
1973 | } |
1974 | if (b->thash && b->thash != (Hash *) 1) { |
1975 | fprintf(stderr, |
1976 | " Thash=[%zu,%zu]" , |
1977 | HEAPmemsize(&b->thash->heap), |
1978 | HEAPvmsize(&b->thash->heap)); |
1979 | if (BBP_logical(i) && BBP_logical(i)[0] == '.') { |
1980 | cmem += HEAPmemsize(&b->thash->heap); |
1981 | cvm += HEAPvmsize(&b->thash->heap); |
1982 | } else { |
1983 | mem += HEAPmemsize(&b->thash->heap); |
1984 | vm += HEAPvmsize(&b->thash->heap); |
1985 | } |
1986 | } |
1987 | fprintf(stderr, " role: %s, persistence: %s\n" , |
1988 | b->batRole == PERSISTENT ? "persistent" : "transient" , |
1989 | b->batTransient ? "transient" : "persistent" ); |
1990 | } |
1991 | fprintf(stderr, |
1992 | "# %d bats: mem=%zu, vm=%zu %d cached bats: mem=%zu, vm=%zu\n" , |
1993 | n, mem, vm, nc, cmem, cvm); |
1994 | fflush(stderr); |
1995 | } |
1996 | |
1997 | /* |
1998 | * @+ BBP Readonly Interface |
1999 | * |
2000 | * These interface functions do not change the BBP tables. If they |
2001 | * only access one specific BAT, the caller must have ensured that no |
2002 | * other thread is modifying that BAT, therefore such functions do not |
2003 | * need locking. |
2004 | * |
2005 | * BBP index lookup by BAT name: |
2006 | */ |
2007 | static inline bat |
2008 | BBP_find(const char *nme, bool lock) |
2009 | { |
2010 | bat i = BBPnamecheck(nme); |
2011 | |
2012 | if (i != 0) { |
2013 | /* for tmp_X BATs, we already know X */ |
2014 | const char *s; |
2015 | |
2016 | if (i >= (bat) ATOMIC_GET(&BBPsize) || (s = BBP_logical(i)) == NULL || strcmp(s, nme)) { |
2017 | i = 0; |
2018 | } |
2019 | } else if (*nme != '.') { |
2020 | /* must lock since hash-lookup traverses other BATs */ |
2021 | if (lock) |
2022 | MT_lock_set(&GDKnameLock); |
2023 | for (i = BBP_hash[strHash(nme) & BBP_mask]; i; i = BBP_next(i)) { |
2024 | if (strcmp(BBP_logical(i), nme) == 0) |
2025 | break; |
2026 | } |
2027 | if (lock) |
2028 | MT_lock_unset(&GDKnameLock); |
2029 | } |
2030 | return i; |
2031 | } |
2032 | |
2033 | bat |
2034 | BBPindex(const char *nme) |
2035 | { |
2036 | return BBP_find(nme, true); |
2037 | } |
2038 | |
2039 | BAT * |
2040 | BBPgetdesc(bat i) |
2041 | { |
2042 | if (is_bat_nil(i)) |
2043 | return NULL; |
2044 | if (i < 0) |
2045 | i = -i; |
2046 | if (i != 0 && i < (bat) ATOMIC_GET(&BBPsize) && i && BBP_logical(i)) { |
2047 | return BBP_desc(i); |
2048 | } |
2049 | return NULL; |
2050 | } |
2051 | |
2052 | /* |
2053 | * @+ BBP Update Interface |
2054 | * Operations to insert, delete, clear, and modify BBP entries. |
2055 | * Our policy for the BBP is to provide unlocked BBP access for |
2056 | * speed, but still write operations have to be locked. |
2057 | * #ifdef DEBUG_THREADLOCAL_BATS |
2058 | * Create the shadow version (reversed) of a bat. |
2059 | * |
2060 | * An existing BAT is inserted into the BBP |
2061 | */ |
2062 | static inline str |
2063 | BBPsubdir_recursive(str s, bat i) |
2064 | { |
2065 | i >>= 6; |
2066 | if (i >= 0100) { |
2067 | s = BBPsubdir_recursive(s, i); |
2068 | *s++ = DIR_SEP; |
2069 | } |
2070 | i &= 077; |
2071 | *s++ = '0' + (i >> 3); |
2072 | *s++ = '0' + (i & 7); |
2073 | return s; |
2074 | } |
2075 | |
2076 | static inline void |
2077 | BBPgetsubdir(str s, bat i) |
2078 | { |
2079 | if (i >= 0100) { |
2080 | s = BBPsubdir_recursive(s, i); |
2081 | } |
2082 | *s = 0; |
2083 | } |
2084 | |
2085 | /* There are BBP_THREADMASK+1 (64) free lists, and ours (idx) is |
2086 | * empty. Here we find a longish free list (at least 20 entries), and |
2087 | * if we can find one, we take one entry from that list. If no long |
2088 | * enough list can be found, we create a new entry by either just |
2089 | * increasing BBPsize (up to BBPlimit) or extending the BBP (which |
2090 | * increases BBPlimit). Every time this function is called we start |
2091 | * searching in a following free list (variable "last"). */ |
2092 | static gdk_return |
2093 | maybeextend(int idx) |
2094 | { |
2095 | int t, m; |
2096 | int n, l; |
2097 | bat i; |
2098 | static int last = 0; |
2099 | |
2100 | l = 0; /* length of longest list */ |
2101 | m = 0; /* index of longest list */ |
2102 | /* find a longish free list */ |
2103 | for (t = 0; t <= BBP_THREADMASK && l <= 20; t++) { |
2104 | n = 0; |
2105 | for (i = BBP_free((t + last) & BBP_THREADMASK); |
2106 | i != 0 && n <= 20; |
2107 | i = BBP_next(i)) |
2108 | n++; |
2109 | if (n > l) { |
2110 | m = (t + last) & BBP_THREADMASK; |
2111 | l = n; |
2112 | } |
2113 | } |
2114 | if (l > 20) { |
2115 | /* list is long enough, get an entry from there */ |
2116 | i = BBP_free(m); |
2117 | BBP_free(m) = BBP_next(i); |
2118 | BBP_next(i) = 0; |
2119 | BBP_free(idx) = i; |
2120 | } else { |
2121 | /* let the longest list alone, get a fresh entry */ |
2122 | if ((bat) ATOMIC_ADD(&BBPsize, 1) >= BBPlimit) { |
2123 | if (BBPextend(idx, true) != GDK_SUCCEED) { |
2124 | /* undo add */ |
2125 | ATOMIC_SUB(&BBPsize, 1); |
2126 | /* couldn't extend; if there is any |
2127 | * free entry, take it from the |
2128 | * longest list after all */ |
2129 | if (l > 0) { |
2130 | i = BBP_free(m); |
2131 | BBP_free(m) = BBP_next(i); |
2132 | BBP_next(i) = 0; |
2133 | BBP_free(idx) = i; |
2134 | } else { |
2135 | /* nothing available */ |
2136 | return GDK_FAIL; |
2137 | } |
2138 | } |
2139 | } else { |
2140 | BBP_free(idx) = (bat) ATOMIC_GET(&BBPsize) - 1; |
2141 | } |
2142 | } |
2143 | last = (last + 1) & BBP_THREADMASK; |
2144 | return GDK_SUCCEED; |
2145 | } |
2146 | |
2147 | /* return new BAT id (> 0); return 0 on failure */ |
2148 | bat |
2149 | BBPinsert(BAT *bn) |
2150 | { |
2151 | MT_Id pid = MT_getpid(); |
2152 | bool lock = locked_by == 0 || locked_by != pid; |
2153 | char dirname[24]; |
2154 | bat i; |
2155 | int idx = threadmask(pid), len = 0; |
2156 | |
2157 | /* critical section: get a new BBP entry */ |
2158 | if (lock) { |
2159 | MT_lock_set(&GDKtrimLock(idx)); |
2160 | MT_lock_set(&GDKcacheLock(idx)); |
2161 | } |
2162 | |
2163 | /* find an empty slot */ |
2164 | if (BBP_free(idx) <= 0) { |
2165 | /* we need to extend the BBP */ |
2166 | gdk_return r = GDK_SUCCEED; |
2167 | if (lock) { |
2168 | /* we must take all locks in a consistent |
2169 | * order so first unset the one we've already |
2170 | * got */ |
2171 | MT_lock_unset(&GDKcacheLock(idx)); |
2172 | for (i = 0; i <= BBP_THREADMASK; i++) |
2173 | MT_lock_set(&GDKcacheLock(i)); |
2174 | } |
2175 | MT_lock_set(&GDKnameLock); |
2176 | /* check again in case some other thread extended |
2177 | * while we were waiting */ |
2178 | if (BBP_free(idx) <= 0) { |
2179 | r = maybeextend(idx); |
2180 | } |
2181 | MT_lock_unset(&GDKnameLock); |
2182 | if (lock) |
2183 | for (i = BBP_THREADMASK; i >= 0; i--) |
2184 | if (i != idx) |
2185 | MT_lock_unset(&GDKcacheLock(i)); |
2186 | if (r != GDK_SUCCEED) { |
2187 | if (lock) { |
2188 | MT_lock_unset(&GDKcacheLock(idx)); |
2189 | MT_lock_unset(&GDKtrimLock(idx)); |
2190 | } |
2191 | return 0; |
2192 | } |
2193 | } |
2194 | i = BBP_free(idx); |
2195 | assert(i > 0); |
2196 | BBP_free(idx) = BBP_next(i); |
2197 | |
2198 | if (lock) { |
2199 | MT_lock_unset(&GDKcacheLock(idx)); |
2200 | MT_lock_unset(&GDKtrimLock(idx)); |
2201 | } |
2202 | /* rest of the work outside the lock */ |
2203 | |
2204 | /* fill in basic BBP fields for the new bat */ |
2205 | |
2206 | bn->batCacheid = i; |
2207 | bn->creator_tid = MT_getpid(); |
2208 | |
2209 | BBP_status_set(i, BBPDELETING, "BBPinsert" ); |
2210 | BBP_cache(i) = NULL; |
2211 | BBP_desc(i) = NULL; |
2212 | BBP_refs(i) = 1; /* new bats have 1 pin */ |
2213 | BBP_lrefs(i) = 0; /* ie. no logical refs */ |
2214 | |
2215 | #ifdef HAVE_HGE |
2216 | if (bn->ttype == TYPE_hge) |
2217 | havehge = true; |
2218 | #endif |
2219 | |
2220 | if (*BBP_bak(i) == 0) |
2221 | len = snprintf(BBP_bak(i), sizeof(BBP_bak(i)), "tmp_%o" , (unsigned) i); |
2222 | if (len == -1 || len >= FILENAME_MAX) |
2223 | return 0; |
2224 | BBP_logical(i) = BBP_bak(i); |
2225 | |
2226 | /* Keep the physical location around forever */ |
2227 | if (!GDKinmemory() && *BBP_physical(i) == 0) { |
2228 | BBPgetsubdir(dirname, i); |
2229 | |
2230 | if (*dirname) /* i.e., i >= 0100 */ |
2231 | len = snprintf(BBP_physical(i), sizeof(BBP_physical(i)), |
2232 | "%s%c%o" , dirname, DIR_SEP, (unsigned) i); |
2233 | else |
2234 | len = snprintf(BBP_physical(i), sizeof(BBP_physical(i)), |
2235 | "%o" , (unsigned) i); |
2236 | if (len == -1 || len >= FILENAME_MAX) |
2237 | return 0; |
2238 | |
2239 | BATDEBUG fprintf(stderr, "#%d = new %s(%s)\n" , (int) i, BBPname(i), ATOMname(bn->ttype)); |
2240 | } |
2241 | |
2242 | return i; |
2243 | } |
2244 | |
2245 | gdk_return |
2246 | BBPcacheit(BAT *bn, bool lock) |
2247 | { |
2248 | bat i = bn->batCacheid; |
2249 | unsigned mode; |
2250 | |
2251 | if (lock) |
2252 | lock = locked_by == 0 || locked_by != MT_getpid(); |
2253 | |
2254 | if (i) { |
2255 | assert(i > 0); |
2256 | } else { |
2257 | i = BBPinsert(bn); /* bat was not previously entered */ |
2258 | if (i == 0) |
2259 | return GDK_FAIL; |
2260 | if (bn->tvheap) |
2261 | bn->tvheap->parentid = i; |
2262 | } |
2263 | assert(bn->batCacheid > 0); |
2264 | |
2265 | if (lock) |
2266 | MT_lock_set(&GDKswapLock(i)); |
2267 | mode = (BBP_status(i) | BBPLOADED) & ~(BBPLOADING | BBPDELETING); |
2268 | BBP_status_set(i, mode, "BBPcacheit" ); |
2269 | BBP_desc(i) = bn; |
2270 | |
2271 | /* cache it! */ |
2272 | BBP_cache(i) = bn; |
2273 | |
2274 | if (lock) |
2275 | MT_lock_unset(&GDKswapLock(i)); |
2276 | return GDK_SUCCEED; |
2277 | } |
2278 | |
2279 | /* |
2280 | * BBPuncacheit changes the BBP status to swapped out. Currently only |
2281 | * used in BBPfree (bat swapped out) and BBPclear (bat destroyed |
2282 | * forever). |
2283 | */ |
2284 | |
2285 | static void |
2286 | BBPuncacheit(bat i, bool unloaddesc) |
2287 | { |
2288 | if (i < 0) |
2289 | i = -i; |
2290 | if (BBPcheck(i, "BBPuncacheit" )) { |
2291 | BAT *b = BBP_desc(i); |
2292 | |
2293 | if (b) { |
2294 | if (BBP_cache(i)) { |
2295 | BATDEBUG fprintf(stderr, "#uncache %d (%s)\n" , (int) i, BBPname(i)); |
2296 | |
2297 | BBP_cache(i) = NULL; |
2298 | |
2299 | /* clearing bits can be done without the lock */ |
2300 | BBP_status_off(i, BBPLOADED, "BBPuncacheit" ); |
2301 | } |
2302 | if (unloaddesc) { |
2303 | BBP_desc(i) = NULL; |
2304 | BATdestroy(b); |
2305 | } |
2306 | } |
2307 | } |
2308 | } |
2309 | |
2310 | /* |
2311 | * @- BBPclear |
2312 | * BBPclear removes a BAT from the BBP directory forever. |
2313 | */ |
2314 | static inline void |
2315 | bbpclear(bat i, int idx, bool lock) |
2316 | { |
2317 | BATDEBUG { |
2318 | fprintf(stderr, "#clear %d (%s)\n" , (int) i, BBPname(i)); |
2319 | } |
2320 | BBPuncacheit(i, true); |
2321 | BATDEBUG { |
2322 | fprintf(stderr, "#BBPclear set to unloading %d\n" , i); |
2323 | } |
2324 | BBP_status_set(i, BBPUNLOADING, "BBPclear" ); |
2325 | BBP_refs(i) = 0; |
2326 | BBP_lrefs(i) = 0; |
2327 | if (lock) |
2328 | MT_lock_set(&GDKcacheLock(idx)); |
2329 | |
2330 | if (BBPtmpcheck(BBP_logical(i)) == 0) { |
2331 | MT_lock_set(&GDKnameLock); |
2332 | BBP_delete(i); |
2333 | MT_lock_unset(&GDKnameLock); |
2334 | } |
2335 | if (BBP_logical(i) != BBP_bak(i)) |
2336 | GDKfree(BBP_logical(i)); |
2337 | BBP_status_set(i, 0, "BBPclear" ); |
2338 | BBP_logical(i) = NULL; |
2339 | BBP_next(i) = BBP_free(idx); |
2340 | BBP_free(idx) = i; |
2341 | if (lock) |
2342 | MT_lock_unset(&GDKcacheLock(idx)); |
2343 | } |
2344 | |
2345 | void |
2346 | BBPclear(bat i) |
2347 | { |
2348 | MT_Id pid = MT_getpid(); |
2349 | bool lock = locked_by == 0 || locked_by != pid; |
2350 | |
2351 | if (BBPcheck(i, "BBPclear" )) { |
2352 | bbpclear(i, threadmask(pid), lock); |
2353 | } |
2354 | } |
2355 | |
2356 | /* |
2357 | * @- BBP rename |
2358 | * |
2359 | * Each BAT has a logical name that is globally unique. Its reverse |
2360 | * view can also be assigned a name, that also has to be globally |
2361 | * unique. The batId is the same as the logical BAT name. |
2362 | * |
2363 | * The default logical name of a BAT is tmp_X, where X is the |
2364 | * batCacheid. Apart from being globally unique, new logical bat |
2365 | * names cannot be of the form tmp_X, unless X is the batCacheid. |
2366 | * |
2367 | * Physical names consist of a directory name followed by a logical |
2368 | * name suffix. The directory name is derived from the batCacheid, |
2369 | * and is currently organized in a hierarchy that puts max 64 bats in |
2370 | * each directory (see BBPgetsubdir). |
2371 | * |
2372 | * Concerning the physical suffix: it is almost always bat_X. This |
2373 | * saves us a whole lot of trouble, as bat_X is always unique and no |
2374 | * conflicts can occur. Other suffixes are only supported in order |
2375 | * just for backward compatibility with old repositories (you won't |
2376 | * see them anymore in new repositories). |
2377 | */ |
2378 | int |
2379 | BBPrename(bat bid, const char *nme) |
2380 | { |
2381 | BAT *b = BBPdescriptor(bid); |
2382 | char dirname[24]; |
2383 | bat tmpid = 0, i; |
2384 | int idx; |
2385 | |
2386 | if (b == NULL) |
2387 | return 0; |
2388 | |
2389 | /* If name stays same, do nothing */ |
2390 | if (BBP_logical(bid) && strcmp(BBP_logical(bid), nme) == 0) |
2391 | return 0; |
2392 | |
2393 | BBPgetsubdir(dirname, bid); |
2394 | |
2395 | if ((tmpid = BBPnamecheck(nme)) && tmpid != bid) { |
2396 | GDKerror("BBPrename: illegal temporary name: '%s'\n" , nme); |
2397 | return BBPRENAME_ILLEGAL; |
2398 | } |
2399 | if (strlen(dirname) + strLen(nme) + 1 >= IDLENGTH) { |
2400 | GDKerror("BBPrename: illegal temporary name: '%s'\n" , nme); |
2401 | return BBPRENAME_LONG; |
2402 | } |
2403 | idx = threadmask(MT_getpid()); |
2404 | MT_lock_set(&GDKtrimLock(idx)); |
2405 | MT_lock_set(&GDKnameLock); |
2406 | i = BBP_find(nme, false); |
2407 | if (i != 0) { |
2408 | MT_lock_unset(&GDKnameLock); |
2409 | MT_lock_unset(&GDKtrimLock(idx)); |
2410 | GDKerror("BBPrename: name is in use: '%s'.\n" , nme); |
2411 | return BBPRENAME_ALREADY; |
2412 | } |
2413 | |
2414 | /* carry through the name change */ |
2415 | if (BBP_logical(bid) && BBPtmpcheck(BBP_logical(bid)) == 0) { |
2416 | BBP_delete(bid); |
2417 | } |
2418 | if (BBP_logical(bid) != BBP_bak(bid)) |
2419 | GDKfree(BBP_logical(bid)); |
2420 | BBP_logical(bid) = GDKstrdup(nme); |
2421 | if (tmpid == 0) { |
2422 | BBP_insert(bid); |
2423 | } |
2424 | b->batDirtydesc = true; |
2425 | if (!b->batTransient) { |
2426 | bool lock = locked_by == 0 || locked_by != MT_getpid(); |
2427 | |
2428 | if (lock) |
2429 | MT_lock_set(&GDKswapLock(i)); |
2430 | BBP_status_on(bid, BBPRENAMED, "BBPrename" ); |
2431 | if (lock) |
2432 | MT_lock_unset(&GDKswapLock(i)); |
2433 | } |
2434 | MT_lock_unset(&GDKnameLock); |
2435 | MT_lock_unset(&GDKtrimLock(idx)); |
2436 | return 0; |
2437 | } |
2438 | |
2439 | /* |
2440 | * @+ BBP swapping Policy |
2441 | * The BAT can be moved back to disk using the routine BBPfree. It |
2442 | * frees the storage for other BATs. After this call BAT* references |
2443 | * maintained for the BAT are wrong. We should keep track of dirty |
2444 | * unloaded BATs. They may have to be committed later on, which may |
2445 | * include reading them in again. |
2446 | * |
2447 | * BBPswappable: may this bat be unloaded? Only real bats without |
2448 | * memory references can be unloaded. |
2449 | */ |
2450 | static inline void |
2451 | BBPspin(bat i, const char *s, unsigned event) |
2452 | { |
2453 | if (BBPcheck(i, "BBPspin" ) && (BBP_status(i) & event)) { |
2454 | lng spin = LL_CONSTANT(0); |
2455 | |
2456 | do { |
2457 | MT_sleep_ms(KITTENNAP); |
2458 | spin++; |
2459 | } while (BBP_status(i) & event); |
2460 | BATDEBUG fprintf(stderr, "#BBPspin(%d,%s,%u): " LLFMT " loops\n" , (int) i, s, event, spin); |
2461 | } |
2462 | } |
2463 | |
2464 | /* This function can fail if the input parameter (i) is incorrect |
2465 | * (unlikely), of if the bat is a view, this is a physical (not |
2466 | * logical) incref (i.e. called through BBPfix(), and it is the first |
2467 | * reference (refs was 0 and should become 1). It can fail in this |
2468 | * case if the parent bat cannot be loaded. |
2469 | * This means the return value of BBPfix should be checked in these |
2470 | * circumstances, but not necessarily in others. */ |
2471 | static inline int |
2472 | incref(bat i, bool logical, bool lock) |
2473 | { |
2474 | int refs; |
2475 | bat tp, tvp; |
2476 | BAT *b, *pb = NULL, *pvb = NULL; |
2477 | bool load = false; |
2478 | |
2479 | if (!BBPcheck(i, logical ? "BBPretain" : "BBPfix" )) |
2480 | return 0; |
2481 | |
2482 | /* Before we get the lock and before we do all sorts of |
2483 | * things, make sure we can load the parent bats if there are |
2484 | * any. If we can't load them, we can still easily fail. If |
2485 | * this is indeed a view, but not the first physical |
2486 | * reference, getting the parent BAT descriptor is |
2487 | * superfluous, but not too expensive, so we do it anyway. */ |
2488 | if (!logical && (b = BBP_desc(i)) != NULL) { |
2489 | if (b->theap.parentid) { |
2490 | pb = BATdescriptor(b->theap.parentid); |
2491 | if (pb == NULL) |
2492 | return 0; |
2493 | } |
2494 | if (b->tvheap && b->tvheap->parentid != i) { |
2495 | pvb = BATdescriptor(b->tvheap->parentid); |
2496 | if (pvb == NULL) { |
2497 | if (pb) |
2498 | BBPunfix(pb->batCacheid); |
2499 | return 0; |
2500 | } |
2501 | } |
2502 | } |
2503 | |
2504 | if (lock) { |
2505 | for (;;) { |
2506 | MT_lock_set(&GDKswapLock(i)); |
2507 | if (!(BBP_status(i) & (BBPUNSTABLE|BBPLOADING))) |
2508 | break; |
2509 | /* the BATs is "unstable", try again */ |
2510 | MT_lock_unset(&GDKswapLock(i)); |
2511 | MT_sleep_ms(KITTENNAP); |
2512 | } |
2513 | } |
2514 | /* we have the lock */ |
2515 | |
2516 | b = BBP_desc(i); |
2517 | if (b == NULL) { |
2518 | /* should not have happened */ |
2519 | if (lock) |
2520 | MT_lock_unset(&GDKswapLock(i)); |
2521 | return 0; |
2522 | } |
2523 | |
2524 | assert(BBP_refs(i) + BBP_lrefs(i) || |
2525 | BBP_status(i) & (BBPDELETED | BBPSWAPPED)); |
2526 | if (logical) { |
2527 | /* parent BATs are not relevant for logical refs */ |
2528 | tp = tvp = 0; |
2529 | refs = ++BBP_lrefs(i); |
2530 | } else { |
2531 | tp = b->theap.parentid; |
2532 | assert(tp >= 0); |
2533 | tvp = b->tvheap == 0 || b->tvheap->parentid == i ? 0 : b->tvheap->parentid; |
2534 | refs = ++BBP_refs(i); |
2535 | if (refs == 1 && (tp || tvp)) { |
2536 | /* If this is a view, we must load the parent |
2537 | * BATs, but we must do that outside of the |
2538 | * lock. Set the BBPLOADING flag so that |
2539 | * other threads will wait until we're |
2540 | * done. */ |
2541 | BBP_status_on(i, BBPLOADING, "BBPfix" ); |
2542 | load = true; |
2543 | } |
2544 | } |
2545 | if (lock) |
2546 | MT_lock_unset(&GDKswapLock(i)); |
2547 | |
2548 | if (load) { |
2549 | /* load the parent BATs and set the heap base pointers |
2550 | * to the correct values */ |
2551 | assert(!logical); |
2552 | if (tp) { |
2553 | assert(pb != NULL); |
2554 | b->theap.base = pb->theap.base + (size_t) b->theap.base; |
2555 | } |
2556 | /* done loading, release descriptor */ |
2557 | BBP_status_off(i, BBPLOADING, "BBPfix" ); |
2558 | } else if (!logical) { |
2559 | /* this wasn't the first physical reference, so undo |
2560 | * the fixes on the parent bats */ |
2561 | if (pb) |
2562 | BBPunfix(pb->batCacheid); |
2563 | if (pvb) |
2564 | BBPunfix(pvb->batCacheid); |
2565 | } |
2566 | return refs; |
2567 | } |
2568 | |
2569 | /* see comment for incref */ |
2570 | int |
2571 | BBPfix(bat i) |
2572 | { |
2573 | bool lock = locked_by == 0 || locked_by != MT_getpid(); |
2574 | |
2575 | return incref(i, false, lock); |
2576 | } |
2577 | |
2578 | int |
2579 | BBPretain(bat i) |
2580 | { |
2581 | bool lock = locked_by == 0 || locked_by != MT_getpid(); |
2582 | |
2583 | return incref(i, true, lock); |
2584 | } |
2585 | |
2586 | void |
2587 | BBPshare(bat parent) |
2588 | { |
2589 | bool lock = locked_by == 0 || locked_by != MT_getpid(); |
2590 | |
2591 | assert(parent > 0); |
2592 | (void) incref(parent, true, lock); |
2593 | if (lock) |
2594 | MT_lock_set(&GDKswapLock(parent)); |
2595 | ++BBP_cache(parent)->batSharecnt; |
2596 | assert(BBP_refs(parent) > 0); |
2597 | if (lock) |
2598 | MT_lock_unset(&GDKswapLock(parent)); |
2599 | (void) incref(parent, false, lock); |
2600 | } |
2601 | |
2602 | static inline int |
2603 | decref(bat i, bool logical, bool releaseShare, bool lock, const char *func) |
2604 | { |
2605 | int refs = 0; |
2606 | bool swap = false; |
2607 | bat tp = 0, tvp = 0; |
2608 | BAT *b; |
2609 | |
2610 | assert(i > 0); |
2611 | if (lock) |
2612 | MT_lock_set(&GDKswapLock(i)); |
2613 | if (releaseShare) { |
2614 | --BBP_desc(i)->batSharecnt; |
2615 | if (lock) |
2616 | MT_lock_unset(&GDKswapLock(i)); |
2617 | return refs; |
2618 | } |
2619 | |
2620 | while (BBP_status(i) & BBPUNLOADING) { |
2621 | if (lock) |
2622 | MT_lock_unset(&GDKswapLock(i)); |
2623 | BBPspin(i, func, BBPUNLOADING); |
2624 | if (lock) |
2625 | MT_lock_set(&GDKswapLock(i)); |
2626 | } |
2627 | |
2628 | b = BBP_cache(i); |
2629 | |
2630 | /* decrement references by one */ |
2631 | if (logical) { |
2632 | if (BBP_lrefs(i) == 0) { |
2633 | GDKerror("%s: %s does not have logical references.\n" , func, BBPname(i)); |
2634 | assert(0); |
2635 | } else { |
2636 | refs = --BBP_lrefs(i); |
2637 | } |
2638 | } else { |
2639 | if (BBP_refs(i) == 0) { |
2640 | GDKerror("%s: %s does not have pointer fixes.\n" , func, BBPname(i)); |
2641 | assert(0); |
2642 | } else { |
2643 | assert(b == NULL || b->theap.parentid == 0 || BBP_refs(b->theap.parentid) > 0); |
2644 | assert(b == NULL || b->tvheap == NULL || b->tvheap->parentid == 0 || BBP_refs(b->tvheap->parentid) > 0); |
2645 | refs = --BBP_refs(i); |
2646 | if (b && refs == 0) { |
2647 | if ((tp = b->theap.parentid) != 0) |
2648 | b->theap.base = (char *) (b->theap.base - BBP_cache(tp)->theap.base); |
2649 | tvp = VIEWvtparent(b); |
2650 | } |
2651 | } |
2652 | } |
2653 | |
2654 | /* we destroy transients asap and unload persistent bats only |
2655 | * if they have been made cold or are not dirty */ |
2656 | if (BBP_refs(i) > 0 || |
2657 | (BBP_lrefs(i) > 0 && |
2658 | (b == NULL || BATdirty(b) || !(BBP_status(i) & BBPPERSISTENT) || GDKinmemory()))) { |
2659 | /* bat cannot be swapped out */ |
2660 | } else if (b ? b->batSharecnt == 0 : (BBP_status(i) & BBPTMP)) { |
2661 | /* bat will be unloaded now. set the UNLOADING bit |
2662 | * while locked so no other thread thinks it's |
2663 | * available anymore */ |
2664 | assert((BBP_status(i) & BBPUNLOADING) == 0); |
2665 | BATDEBUG { |
2666 | fprintf(stderr, "#%s set to unloading BAT %d\n" , func, i); |
2667 | } |
2668 | BBP_status_on(i, BBPUNLOADING, func); |
2669 | swap = true; |
2670 | } |
2671 | |
2672 | /* unlock before re-locking in unload; as saving a dirty |
2673 | * persistent bat may take a long time */ |
2674 | if (lock) |
2675 | MT_lock_unset(&GDKswapLock(i)); |
2676 | |
2677 | if (swap && b != NULL) { |
2678 | if (BBP_lrefs(i) == 0 && (BBP_status(i) & BBPDELETED) == 0) { |
2679 | /* free memory (if loaded) and delete from |
2680 | * disk (if transient but saved) */ |
2681 | BBPdestroy(b); |
2682 | } else { |
2683 | BATDEBUG { |
2684 | fprintf(stderr, "#%s unload and free bat %d\n" , func, i); |
2685 | } |
2686 | /* free memory of transient */ |
2687 | if (BBPfree(b, func) != GDK_SUCCEED) |
2688 | return -1; /* indicate failure */ |
2689 | } |
2690 | } |
2691 | if (tp) |
2692 | decref(tp, false, false, lock, func); |
2693 | if (tvp) |
2694 | decref(tvp, false, false, lock, func); |
2695 | return refs; |
2696 | } |
2697 | |
2698 | int |
2699 | BBPunfix(bat i) |
2700 | { |
2701 | if (BBPcheck(i, "BBPunfix" ) == 0) { |
2702 | return -1; |
2703 | } |
2704 | return decref(i, false, false, true, "BBPunfix" ); |
2705 | } |
2706 | |
2707 | int |
2708 | BBPrelease(bat i) |
2709 | { |
2710 | if (BBPcheck(i, "BBPrelease" ) == 0) { |
2711 | return -1; |
2712 | } |
2713 | return decref(i, true, false, true, "BBPrelease" ); |
2714 | } |
2715 | |
2716 | /* |
2717 | * M5 often changes the physical ref into a logical reference. This |
2718 | * state change consist of the sequence BBPretain(b);BBPunfix(b). |
2719 | * A faster solution is given below, because it does not trigger the |
2720 | * BBP management actions, such as garbage collecting the bats. |
2721 | * [first step, initiate code change] |
2722 | */ |
2723 | void |
2724 | BBPkeepref(bat i) |
2725 | { |
2726 | if (is_bat_nil(i)) |
2727 | return; |
2728 | if (BBPcheck(i, "BBPkeepref" )) { |
2729 | bool lock = locked_by == 0 || locked_by != MT_getpid(); |
2730 | BAT *b; |
2731 | |
2732 | if ((b = BBPdescriptor(i)) != NULL) { |
2733 | BATsettrivprop(b); |
2734 | if (GDKdebug & (CHECKMASK | PROPMASK)) |
2735 | BATassertProps(b); |
2736 | } |
2737 | |
2738 | incref(i, true, lock); |
2739 | assert(BBP_refs(i)); |
2740 | decref(i, false, false, lock, "BBPkeepref" ); |
2741 | } |
2742 | } |
2743 | |
2744 | static inline void |
2745 | GDKunshare(bat parent) |
2746 | { |
2747 | (void) decref(parent, false, true, true, "GDKunshare" ); |
2748 | (void) decref(parent, true, false, true, "GDKunshare" ); |
2749 | } |
2750 | |
2751 | void |
2752 | BBPunshare(bat parent) |
2753 | { |
2754 | GDKunshare(parent); |
2755 | } |
2756 | |
2757 | /* |
2758 | * BBPreclaim is a user-exported function; the common way to destroy a |
2759 | * BAT the hard way. |
2760 | * |
2761 | * Return values: |
2762 | * -1 = bat cannot be unloaded (it has more than your own memory fix) |
2763 | * 0 = unloaded successfully |
2764 | * 1 = unload failed (due to write-to-disk failure) |
2765 | */ |
2766 | int |
2767 | BBPreclaim(BAT *b) |
2768 | { |
2769 | bat i; |
2770 | bool lock = locked_by == 0 || locked_by != MT_getpid(); |
2771 | |
2772 | if (b == NULL) |
2773 | return -1; |
2774 | i = b->batCacheid; |
2775 | |
2776 | assert(BBP_refs(i) == 1); |
2777 | |
2778 | return decref(i, false, false, lock, "BBPreclaim" ) <0; |
2779 | } |
2780 | |
2781 | /* |
2782 | * BBPdescriptor checks whether BAT needs loading and does so if |
2783 | * necessary. You must have at least one fix on the BAT before calling |
2784 | * this. |
2785 | */ |
2786 | static BAT * |
2787 | getBBPdescriptor(bat i, bool lock) |
2788 | { |
2789 | bool load = false; |
2790 | BAT *b = NULL; |
2791 | |
2792 | assert(i > 0); |
2793 | if (!BBPcheck(i, "BBPdescriptor" )) { |
2794 | return NULL; |
2795 | } |
2796 | assert(BBP_refs(i)); |
2797 | if ((b = BBP_cache(i)) == NULL) { |
2798 | |
2799 | if (lock) |
2800 | MT_lock_set(&GDKswapLock(i)); |
2801 | while (BBP_status(i) & BBPWAITING) { /* wait for bat to be loaded by other thread */ |
2802 | if (lock) |
2803 | MT_lock_unset(&GDKswapLock(i)); |
2804 | MT_sleep_ms(KITTENNAP); |
2805 | if (lock) |
2806 | MT_lock_set(&GDKswapLock(i)); |
2807 | } |
2808 | if (BBPvalid(i)) { |
2809 | b = BBP_cache(i); |
2810 | if (b == NULL) { |
2811 | load = true; |
2812 | BATDEBUG { |
2813 | fprintf(stderr, "#BBPdescriptor set to loading BAT %d\n" , i); |
2814 | } |
2815 | BBP_status_on(i, BBPLOADING, "BBPdescriptor" ); |
2816 | } |
2817 | } |
2818 | if (lock) |
2819 | MT_lock_unset(&GDKswapLock(i)); |
2820 | } |
2821 | if (load) { |
2822 | IODEBUG fprintf(stderr, "#load %s\n" , BBPname(i)); |
2823 | |
2824 | b = BATload_intern(i, lock); |
2825 | |
2826 | /* clearing bits can be done without the lock */ |
2827 | BBP_status_off(i, BBPLOADING, "BBPdescriptor" ); |
2828 | CHECKDEBUG if (b != NULL) |
2829 | BATassertProps(b); |
2830 | } |
2831 | return b; |
2832 | } |
2833 | |
2834 | BAT * |
2835 | BBPdescriptor(bat i) |
2836 | { |
2837 | bool lock = locked_by == 0 || locked_by != MT_getpid(); |
2838 | |
2839 | return getBBPdescriptor(i, lock); |
2840 | } |
2841 | |
2842 | /* |
2843 | * In BBPsave executes unlocked; it just marks the BBP_status of the |
2844 | * BAT to BBPsaving, so others that want to save or unload this BAT |
2845 | * must spin lock on the BBP_status field. |
2846 | */ |
2847 | gdk_return |
2848 | BBPsave(BAT *b) |
2849 | { |
2850 | bool lock = locked_by == 0 || locked_by != MT_getpid(); |
2851 | bat bid = b->batCacheid; |
2852 | gdk_return ret = GDK_SUCCEED; |
2853 | |
2854 | if (BBP_lrefs(bid) == 0 || isVIEW(b) || !BATdirty(b)) |
2855 | /* do nothing */ |
2856 | return GDK_SUCCEED; |
2857 | |
2858 | if (lock) |
2859 | MT_lock_set(&GDKswapLock(bid)); |
2860 | |
2861 | if (BBP_status(bid) & BBPSAVING) { |
2862 | /* wait until save in other thread completes */ |
2863 | if (lock) |
2864 | MT_lock_unset(&GDKswapLock(bid)); |
2865 | BBPspin(bid, "BBPsave" , BBPSAVING); |
2866 | } else { |
2867 | /* save it */ |
2868 | unsigned flags = BBPSAVING; |
2869 | |
2870 | if (DELTAdirty(b)) { |
2871 | flags |= BBPSWAPPED; |
2872 | } |
2873 | if (b->batTransient) { |
2874 | flags |= BBPTMP; |
2875 | } |
2876 | BBP_status_on(bid, flags, "BBPsave" ); |
2877 | if (lock) |
2878 | MT_lock_unset(&GDKswapLock(bid)); |
2879 | |
2880 | IODEBUG fprintf(stderr, "#save %s\n" , BATgetId(b)); |
2881 | |
2882 | /* do the time-consuming work unlocked */ |
2883 | if (BBP_status(bid) & BBPEXISTING) |
2884 | ret = BBPbackup(b, false); |
2885 | if (ret == GDK_SUCCEED) { |
2886 | ret = BATsave(b); |
2887 | } |
2888 | /* clearing bits can be done without the lock */ |
2889 | BBP_status_off(bid, BBPSAVING, "BBPsave" ); |
2890 | } |
2891 | return ret; |
2892 | } |
2893 | |
2894 | /* |
2895 | * TODO merge BBPfree with BATfree? Its function is to prepare a BAT |
2896 | * for being unloaded (or even destroyed, if the BAT is not |
2897 | * persistent). |
2898 | */ |
2899 | static void |
2900 | BBPdestroy(BAT *b) |
2901 | { |
2902 | bat tp = b->theap.parentid; |
2903 | bat vtp = VIEWvtparent(b); |
2904 | |
2905 | if (isVIEW(b)) { /* a physical view */ |
2906 | VIEWdestroy(b); |
2907 | } else { |
2908 | /* bats that get destroyed must unfix their atoms */ |
2909 | int (*tunfix) (const void *) = BATatoms[b->ttype].atomUnfix; |
2910 | BUN p, q; |
2911 | BATiter bi = bat_iterator(b); |
2912 | |
2913 | assert(b->batSharecnt == 0); |
2914 | if (tunfix) { |
2915 | BATloop(b, p, q) { |
2916 | (*tunfix) (BUNtail(bi, p)); |
2917 | } |
2918 | } |
2919 | BATdelete(b); /* handles persistent case also (file deletes) */ |
2920 | } |
2921 | BBPclear(b->batCacheid); /* if destroyed; de-register from BBP */ |
2922 | |
2923 | /* parent released when completely done with child */ |
2924 | if (tp) |
2925 | GDKunshare(tp); |
2926 | if (vtp) |
2927 | GDKunshare(vtp); |
2928 | } |
2929 | |
2930 | static gdk_return |
2931 | BBPfree(BAT *b, const char *calledFrom) |
2932 | { |
2933 | bat bid = b->batCacheid, tp = VIEWtparent(b), vtp = VIEWvtparent(b); |
2934 | gdk_return ret; |
2935 | |
2936 | assert(bid > 0); |
2937 | assert(BBPswappable(b)); |
2938 | (void) calledFrom; |
2939 | |
2940 | BBP_unload_inc(); |
2941 | /* write dirty BATs before being unloaded */ |
2942 | ret = BBPsave(b); |
2943 | if (ret == GDK_SUCCEED) { |
2944 | if (isVIEW(b)) { /* physical view */ |
2945 | VIEWdestroy(b); |
2946 | } else { |
2947 | if (BBP_cache(bid)) |
2948 | BATfree(b); /* free memory */ |
2949 | } |
2950 | BBPuncacheit(bid, false); |
2951 | } |
2952 | /* clearing bits can be done without the lock */ |
2953 | BATDEBUG { |
2954 | fprintf(stderr, "#BBPfree turn off unloading %d\n" , bid); |
2955 | } |
2956 | BBP_status_off(bid, BBPUNLOADING, calledFrom); |
2957 | BBP_unload_dec(); |
2958 | |
2959 | /* parent released when completely done with child */ |
2960 | if (ret == GDK_SUCCEED && tp) |
2961 | GDKunshare(tp); |
2962 | if (ret == GDK_SUCCEED && vtp) |
2963 | GDKunshare(vtp); |
2964 | return ret; |
2965 | } |
2966 | |
2967 | /* |
2968 | * BBPquickdesc loads a BAT descriptor without loading the entire BAT, |
2969 | * of which the result be used only for a *limited* number of |
2970 | * purposes. Specifically, during the global sync/commit, we do not |
2971 | * want to load any BATs that are not already loaded, both because |
2972 | * this costs performance, and because getting into memory shortage |
2973 | * during a commit is extremely dangerous. Loading a BAT tends not to |
2974 | * be required, since the commit actions mostly involve moving some |
2975 | * pointers in the BAT descriptor. However, some column types do |
2976 | * require loading the full bat. This is tested by the complexatom() |
2977 | * routine. Such columns are those of which the type has a fix/unfix |
2978 | * method, or those that have HeapDelete methods. The HeapDelete |
2979 | * actions are not always required and therefore the BBPquickdesc is |
2980 | * parametrized. |
2981 | */ |
2982 | static bool |
2983 | complexatom(int t, bool delaccess) |
2984 | { |
2985 | if (t >= 0 && (BATatoms[t].atomFix || (delaccess && BATatoms[t].atomDel))) { |
2986 | return true; |
2987 | } |
2988 | return false; |
2989 | } |
2990 | |
2991 | BAT * |
2992 | BBPquickdesc(bat bid, bool delaccess) |
2993 | { |
2994 | BAT *b; |
2995 | |
2996 | if (is_bat_nil(bid)) |
2997 | return NULL; |
2998 | if (bid < 0) { |
2999 | GDKerror("BBPquickdesc: called with negative batid.\n" ); |
3000 | assert(0); |
3001 | return NULL; |
3002 | } |
3003 | if ((b = BBP_cache(bid)) != NULL) |
3004 | return b; /* already cached */ |
3005 | b = (BAT *) BBPgetdesc(bid); |
3006 | if (b == NULL || |
3007 | complexatom(b->ttype, delaccess)) { |
3008 | b = BATload_intern(bid, true); |
3009 | } |
3010 | return b; |
3011 | } |
3012 | |
3013 | /* |
3014 | * @+ Global Commit |
3015 | */ |
3016 | static BAT * |
3017 | dirty_bat(bat *i, bool subcommit) |
3018 | { |
3019 | if (BBPvalid(*i)) { |
3020 | BAT *b; |
3021 | BBPspin(*i, "dirty_bat" , BBPSAVING); |
3022 | b = BBP_cache(*i); |
3023 | if (b != NULL) { |
3024 | if ((BBP_status(*i) & BBPNEW) && |
3025 | BATcheckmodes(b, false) != GDK_SUCCEED) /* check mmap modes */ |
3026 | *i = 0; /* error */ |
3027 | if ((BBP_status(*i) & BBPPERSISTENT) && |
3028 | (subcommit || BATdirty(b))) |
3029 | return b; /* the bat is loaded, persistent and dirty */ |
3030 | } else if (BBP_status(*i) & BBPSWAPPED) { |
3031 | b = (BAT *) BBPquickdesc(*i, true); |
3032 | if (b && (subcommit || b->batDirtydesc)) |
3033 | return b; /* only the desc is loaded & dirty */ |
3034 | } |
3035 | } |
3036 | return NULL; |
3037 | } |
3038 | |
3039 | /* |
3040 | * @- backup-bat |
3041 | * Backup-bat moves all files of a BAT to a backup directory. Only |
3042 | * after this succeeds, it may be saved. If some failure occurs |
3043 | * halfway saving, we can thus always roll back. |
3044 | */ |
3045 | static gdk_return |
3046 | file_move(int farmid, const char *srcdir, const char *dstdir, const char *name, const char *ext) |
3047 | { |
3048 | if (GDKmove(farmid, srcdir, name, ext, dstdir, name, ext) == GDK_SUCCEED) { |
3049 | return GDK_SUCCEED; |
3050 | } else { |
3051 | char *path; |
3052 | struct stat st; |
3053 | |
3054 | path = GDKfilepath(farmid, srcdir, name, ext); |
3055 | if (path == NULL) |
3056 | return GDK_FAIL; |
3057 | if (stat(path, &st)) { |
3058 | /* source file does not exist; the best |
3059 | * recovery is to give an error but continue |
3060 | * by considering the BAT as not saved; making |
3061 | * sure that this time it does get saved. |
3062 | */ |
3063 | GDKsyserror("file_move: cannot stat %s\n" , path); |
3064 | GDKfree(path); |
3065 | return GDK_FAIL; /* fishy, but not fatal */ |
3066 | } |
3067 | GDKfree(path); |
3068 | } |
3069 | return GDK_FAIL; |
3070 | } |
3071 | |
3072 | /* returns true if the file exists */ |
3073 | static bool |
3074 | file_exists(int farmid, const char *dir, const char *name, const char *ext) |
3075 | { |
3076 | char *path; |
3077 | struct stat st; |
3078 | int ret = -1; |
3079 | |
3080 | path = GDKfilepath(farmid, dir, name, ext); |
3081 | if (path) { |
3082 | ret = stat(path, &st); |
3083 | IODEBUG fprintf(stderr, "#stat(%s) = %d\n" , path, ret); |
3084 | GDKfree(path); |
3085 | } |
3086 | return (ret == 0); |
3087 | } |
3088 | |
3089 | static gdk_return |
3090 | heap_move(Heap *hp, const char *srcdir, const char *dstdir, const char *nme, const char *ext) |
3091 | { |
3092 | /* see doc at BATsetaccess()/gdk_bat.c for an expose on mmap |
3093 | * heap modes */ |
3094 | if (file_exists(hp->farmid, dstdir, nme, ext)) { |
3095 | /* dont overwrite heap with the committed state |
3096 | * already in dstdir */ |
3097 | return GDK_SUCCEED; |
3098 | } else if (hp->newstorage == STORE_PRIV && |
3099 | !file_exists(hp->farmid, srcdir, nme, ext)) { |
3100 | |
3101 | /* In order to prevent half-saved X.new files |
3102 | * surviving a recover we create a dummy file in the |
3103 | * BACKUP(dstdir) whose presence will trigger |
3104 | * BBPrecover to remove them. Thus, X will prevail |
3105 | * where it otherwise wouldn't have. If X already has |
3106 | * a saved X.new, that one is backed up as normal. |
3107 | */ |
3108 | |
3109 | FILE *fp; |
3110 | long_str kill_ext; |
3111 | char *path; |
3112 | |
3113 | strconcat_len(kill_ext, sizeof(kill_ext), ext, ".kill" , NULL); |
3114 | path = GDKfilepath(hp->farmid, dstdir, nme, kill_ext); |
3115 | if (path == NULL) |
3116 | return GDK_FAIL; |
3117 | fp = fopen(path, "w" ); |
3118 | if (fp == NULL) |
3119 | GDKsyserror("heap_move: cannot open file %s\n" , path); |
3120 | IODEBUG fprintf(stderr, "#open %s = %d\n" , path, fp ? 0 : -1); |
3121 | GDKfree(path); |
3122 | |
3123 | if (fp != NULL) { |
3124 | fclose(fp); |
3125 | return GDK_SUCCEED; |
3126 | } else { |
3127 | return GDK_FAIL; |
3128 | } |
3129 | } |
3130 | return file_move(hp->farmid, srcdir, dstdir, nme, ext); |
3131 | } |
3132 | |
3133 | /* |
3134 | * @- BBPprepare |
3135 | * |
3136 | * this routine makes sure there is a BAKDIR/, and initiates one if |
3137 | * not. For subcommits, it does the same with SUBDIR. |
3138 | * |
3139 | * It is now locked, to get proper file counters, and also to prevent |
3140 | * concurrent BBPrecovers, etc. |
3141 | * |
3142 | * backup_dir == 0 => no backup BBP.dir |
3143 | * backup_dir == 1 => BBP.dir saved in BACKUP/ |
3144 | * backup_dir == 2 => BBP.dir saved in SUBCOMMIT/ |
3145 | */ |
3146 | |
3147 | static gdk_return |
3148 | BBPprepare(bool subcommit) |
3149 | { |
3150 | bool start_subcommit; |
3151 | int set = 1 + subcommit; |
3152 | str bakdirpath, subdirpath; |
3153 | gdk_return ret = GDK_SUCCEED; |
3154 | |
3155 | if(!(bakdirpath = GDKfilepath(0, NULL, BAKDIR, NULL))) |
3156 | return GDK_FAIL; |
3157 | if(!(subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL))) { |
3158 | GDKfree(bakdirpath); |
3159 | return GDK_FAIL; |
3160 | } |
3161 | |
3162 | /* tmLock is only used here, helds usually very shortly just |
3163 | * to protect the file counters */ |
3164 | MT_lock_set(&GDKtmLock); |
3165 | |
3166 | start_subcommit = (subcommit && backup_subdir == 0); |
3167 | if (start_subcommit) { |
3168 | /* starting a subcommit. Make sure SUBDIR and DELDIR |
3169 | * are clean */ |
3170 | ret = BBPrecover_subdir(); |
3171 | } |
3172 | if (backup_files == 0) { |
3173 | backup_dir = 0; |
3174 | ret = BBPrecover(0); |
3175 | if (ret == GDK_SUCCEED) { |
3176 | if (mkdir(bakdirpath, MONETDB_DIRMODE) < 0 && errno != EEXIST) { |
3177 | GDKsyserror("BBPprepare: cannot create directory %s\n" , bakdirpath); |
3178 | ret = GDK_FAIL; |
3179 | } |
3180 | /* if BAKDIR already exists, don't signal error */ |
3181 | IODEBUG fprintf(stderr, "#mkdir %s = %d\n" , bakdirpath, (int) ret); |
3182 | } |
3183 | } |
3184 | if (ret == GDK_SUCCEED && start_subcommit) { |
3185 | /* make a new SUBDIR (subdir of BAKDIR) */ |
3186 | if (mkdir(subdirpath, MONETDB_DIRMODE) < 0) { |
3187 | GDKsyserror("BBPprepare: cannot create directory %s\n" , subdirpath); |
3188 | ret = GDK_FAIL; |
3189 | } |
3190 | IODEBUG fprintf(stderr, "#mkdir %s = %d\n" , subdirpath, (int) ret); |
3191 | } |
3192 | if (ret == GDK_SUCCEED && backup_dir != set) { |
3193 | /* a valid backup dir *must* at least contain BBP.dir */ |
3194 | if ((ret = GDKmove(0, backup_dir ? BAKDIR : BATDIR, "BBP" , "dir" , subcommit ? SUBDIR : BAKDIR, "BBP" , "dir" )) == GDK_SUCCEED) { |
3195 | backup_dir = set; |
3196 | } |
3197 | } |
3198 | /* increase counters */ |
3199 | if (ret == GDK_SUCCEED) { |
3200 | backup_subdir += subcommit; |
3201 | backup_files++; |
3202 | } |
3203 | MT_lock_unset(&GDKtmLock); |
3204 | GDKfree(bakdirpath); |
3205 | GDKfree(subdirpath); |
3206 | return ret; |
3207 | } |
3208 | |
3209 | static gdk_return |
3210 | do_backup(const char *srcdir, const char *nme, const char *ext, |
3211 | Heap *h, bool dirty, bool subcommit) |
3212 | { |
3213 | gdk_return ret = GDK_SUCCEED; |
3214 | |
3215 | /* direct mmap is unprotected (readonly usage, or has WAL |
3216 | * protection); however, if we're backing up for subcommit |
3217 | * and a backup already exists in the main backup directory |
3218 | * (see GDKupgradevarheap), move the file */ |
3219 | if (subcommit && file_exists(h->farmid, BAKDIR, nme, ext)) { |
3220 | if (file_move(h->farmid, BAKDIR, SUBDIR, nme, ext) != GDK_SUCCEED) |
3221 | return GDK_FAIL; |
3222 | } |
3223 | if (h->storage != STORE_MMAP) { |
3224 | /* STORE_PRIV saves into X.new files. Two cases could |
3225 | * happen. The first is when a valid X.new exists |
3226 | * because of an access change or a previous |
3227 | * commit. This X.new should be backed up as |
3228 | * usual. The second case is when X.new doesn't |
3229 | * exist. In that case we could have half written |
3230 | * X.new files (after a crash). To protect against |
3231 | * these we write X.new.kill files in the backup |
3232 | * directory (see heap_move). */ |
3233 | char extnew[16]; |
3234 | gdk_return mvret = GDK_SUCCEED; |
3235 | |
3236 | strconcat_len(extnew, sizeof(extnew), ext, ".new" , NULL); |
3237 | if (dirty && |
3238 | !file_exists(h->farmid, BAKDIR, nme, extnew) && |
3239 | !file_exists(h->farmid, BAKDIR, nme, ext)) { |
3240 | /* if the heap is dirty and there is no heap |
3241 | * file (with or without .new extension) in |
3242 | * the BAKDIR, move the heap (preferably with |
3243 | * .new extension) to the correct backup |
3244 | * directory */ |
3245 | if (file_exists(h->farmid, srcdir, nme, extnew)) |
3246 | mvret = heap_move(h, srcdir, |
3247 | subcommit ? SUBDIR : BAKDIR, |
3248 | nme, extnew); |
3249 | else |
3250 | mvret = heap_move(h, srcdir, |
3251 | subcommit ? SUBDIR : BAKDIR, |
3252 | nme, ext); |
3253 | } else if (subcommit) { |
3254 | /* if subcommit, wqe may need to move an |
3255 | * already made backup from BAKDIR to |
3256 | * SUBSIR */ |
3257 | if (file_exists(h->farmid, BAKDIR, nme, extnew)) |
3258 | mvret = file_move(h->farmid, BAKDIR, SUBDIR, nme, extnew); |
3259 | else if (file_exists(h->farmid, BAKDIR, nme, ext)) |
3260 | mvret = file_move(h->farmid, BAKDIR, SUBDIR, nme, ext); |
3261 | } |
3262 | /* there is a situation where the move may fail, |
3263 | * namely if this heap was not supposed to be existing |
3264 | * before, i.e. after a BATmaterialize on a persistent |
3265 | * bat as a workaround, do not complain about move |
3266 | * failure if the source file is nonexistent |
3267 | */ |
3268 | if (mvret != GDK_SUCCEED && file_exists(h->farmid, srcdir, nme, ext)) { |
3269 | ret = GDK_FAIL; |
3270 | } |
3271 | if (subcommit && |
3272 | (h->storage == STORE_PRIV || h->newstorage == STORE_PRIV)) { |
3273 | long_str kill_ext; |
3274 | |
3275 | strconcat_len(kill_ext, sizeof(kill_ext), |
3276 | ext, ".new.kill" , NULL); |
3277 | if (file_exists(h->farmid, BAKDIR, nme, kill_ext) && |
3278 | file_move(h->farmid, BAKDIR, SUBDIR, nme, kill_ext) != GDK_SUCCEED) { |
3279 | ret = GDK_FAIL; |
3280 | } |
3281 | } |
3282 | } |
3283 | return ret; |
3284 | } |
3285 | |
3286 | static gdk_return |
3287 | BBPbackup(BAT *b, bool subcommit) |
3288 | { |
3289 | char *srcdir; |
3290 | long_str nme; |
3291 | const char *s = BBP_physical(b->batCacheid); |
3292 | size_t slen; |
3293 | |
3294 | if (BBPprepare(subcommit) != GDK_SUCCEED) { |
3295 | return GDK_FAIL; |
3296 | } |
3297 | if (!b->batCopiedtodisk || b->batTransient) { |
3298 | return GDK_SUCCEED; |
3299 | } |
3300 | /* determine location dir and physical suffix */ |
3301 | if (!(srcdir = GDKfilepath(NOFARM, BATDIR, s, NULL))) |
3302 | goto fail; |
3303 | s = strrchr(srcdir, DIR_SEP); |
3304 | if (!s) |
3305 | goto fail; |
3306 | |
3307 | slen = strlen(++s); |
3308 | if (slen >= sizeof(nme)) |
3309 | goto fail; |
3310 | memcpy(nme, s, slen + 1); |
3311 | srcdir[s - srcdir] = 0; |
3312 | |
3313 | if (b->ttype != TYPE_void && |
3314 | do_backup(srcdir, nme, "tail" , &b->theap, |
3315 | b->batDirtydesc || b->theap.dirty, |
3316 | subcommit) != GDK_SUCCEED) |
3317 | goto fail; |
3318 | if (b->tvheap && |
3319 | do_backup(srcdir, nme, "theap" , b->tvheap, |
3320 | b->batDirtydesc || b->tvheap->dirty, |
3321 | subcommit) != GDK_SUCCEED) |
3322 | goto fail; |
3323 | GDKfree(srcdir); |
3324 | return GDK_SUCCEED; |
3325 | fail: |
3326 | if(srcdir) |
3327 | GDKfree(srcdir); |
3328 | return GDK_FAIL; |
3329 | } |
3330 | |
3331 | /* |
3332 | * @+ Atomic Write |
3333 | * The atomic BBPsync() function first safeguards the old images of |
3334 | * all files to be written in BAKDIR. It then saves all files. If that |
3335 | * succeeds fully, BAKDIR is renamed to DELDIR. The rename is |
3336 | * considered an atomic action. If it succeeds, the DELDIR is removed. |
3337 | * If something fails, the pre-sync status can be obtained by moving |
3338 | * back all backed up files; this is done by BBPrecover(). |
3339 | * |
3340 | * The BBP.dir is also moved into the BAKDIR. |
3341 | */ |
3342 | gdk_return |
3343 | BBPsync(int cnt, bat *subcommit) |
3344 | { |
3345 | gdk_return ret = GDK_SUCCEED; |
3346 | int t0 = 0, t1 = 0; |
3347 | str bakdir, deldir; |
3348 | |
3349 | if(!(bakdir = GDKfilepath(0, NULL, subcommit ? SUBDIR : BAKDIR, NULL))) |
3350 | return GDK_FAIL; |
3351 | if(!(deldir = GDKfilepath(0, NULL, DELDIR, NULL))) { |
3352 | GDKfree(bakdir); |
3353 | return GDK_FAIL; |
3354 | } |
3355 | |
3356 | PERFDEBUG t0 = t1 = GDKms(); |
3357 | |
3358 | ret = BBPprepare(subcommit != NULL); |
3359 | |
3360 | /* PHASE 1: safeguard everything in a backup-dir */ |
3361 | if (ret == GDK_SUCCEED) { |
3362 | int idx = 0; |
3363 | |
3364 | while (++idx < cnt) { |
3365 | bat i = subcommit ? subcommit[idx] : idx; |
3366 | BAT *b = dirty_bat(&i, subcommit != NULL); |
3367 | if (i <= 0) |
3368 | break; |
3369 | if (BBP_status(i) & BBPEXISTING) { |
3370 | if (b != NULL && BBPbackup(b, subcommit != NULL) != GDK_SUCCEED) |
3371 | break; |
3372 | } else if (subcommit && (b = BBP_desc(i)) && BBP_status(i) & BBPDELETED) { |
3373 | char o[10]; |
3374 | char *f; |
3375 | snprintf(o, sizeof(o), "%o" , (unsigned) b->batCacheid); |
3376 | f = GDKfilepath(b->theap.farmid, BAKDIR, o, "tail" ); |
3377 | if (access(f, F_OK) == 0) |
3378 | file_move(b->theap.farmid, BAKDIR, SUBDIR, o, "tail" ); |
3379 | GDKfree(f); |
3380 | f = GDKfilepath(b->theap.farmid, BAKDIR, o, "theap" ); |
3381 | if (access(f, F_OK) == 0) |
3382 | file_move(b->theap.farmid, BAKDIR, SUBDIR, o, "theap" ); |
3383 | GDKfree(f); |
3384 | } |
3385 | } |
3386 | if (idx < cnt) |
3387 | ret = GDK_FAIL; |
3388 | } |
3389 | PERFDEBUG fprintf(stderr, "#BBPsync (move time %d) %d files\n" , (t1 = GDKms()) - t0, backup_files); |
3390 | |
3391 | /* PHASE 2: save the repository */ |
3392 | if (ret == GDK_SUCCEED) { |
3393 | int idx = 0; |
3394 | |
3395 | while (++idx < cnt) { |
3396 | bat i = subcommit ? subcommit[idx] : idx; |
3397 | |
3398 | if (BBP_status(i) & BBPPERSISTENT) { |
3399 | BAT *b = dirty_bat(&i, subcommit != NULL); |
3400 | if (i <= 0) |
3401 | break; |
3402 | if (b != NULL && BATsave(b) != GDK_SUCCEED) |
3403 | break; /* write error */ |
3404 | } |
3405 | } |
3406 | if (idx < cnt) |
3407 | ret = GDK_FAIL; |
3408 | } |
3409 | |
3410 | PERFDEBUG fprintf(stderr, "#BBPsync (write time %d)\n" , (t0 = GDKms()) - t1); |
3411 | |
3412 | if (ret == GDK_SUCCEED) { |
3413 | ret = BBPdir(cnt, subcommit); |
3414 | } |
3415 | |
3416 | PERFDEBUG fprintf(stderr, "#BBPsync (dir time %d) %d bats\n" , (t1 = GDKms()) - t0, (bat) ATOMIC_GET(&BBPsize)); |
3417 | |
3418 | if (ret == GDK_SUCCEED) { |
3419 | /* atomic switchover */ |
3420 | /* this is the big one: this call determines |
3421 | * whether the operation of this function |
3422 | * succeeded, so no changing of ret after this |
3423 | * call anymore */ |
3424 | |
3425 | if (rename(bakdir, deldir) < 0) |
3426 | ret = GDK_FAIL; |
3427 | if (ret != GDK_SUCCEED && |
3428 | GDKremovedir(0, DELDIR) == GDK_SUCCEED && /* maybe there was an old deldir */ |
3429 | rename(bakdir, deldir) < 0) |
3430 | ret = GDK_FAIL; |
3431 | if (ret != GDK_SUCCEED) |
3432 | GDKsyserror("BBPsync: rename(%s,%s) failed.\n" , bakdir, deldir); |
3433 | IODEBUG fprintf(stderr, "#BBPsync: rename %s %s = %d\n" , bakdir, deldir, (int) ret); |
3434 | } |
3435 | |
3436 | /* AFTERMATH */ |
3437 | if (ret == GDK_SUCCEED) { |
3438 | backup_files = subcommit ? (backup_files - backup_subdir) : 0; |
3439 | backup_dir = backup_subdir = 0; |
3440 | if (GDKremovedir(0, DELDIR) != GDK_SUCCEED) |
3441 | fprintf(stderr, "#BBPsync: cannot remove directory %s\n" , DELDIR); |
3442 | (void) BBPprepare(false); /* (try to) remove DELDIR and set up new BAKDIR */ |
3443 | if (backup_files > 1) { |
3444 | PERFDEBUG fprintf(stderr, "#BBPsync (backup_files %d > 1)\n" , backup_files); |
3445 | backup_files = 1; |
3446 | } |
3447 | } |
3448 | PERFDEBUG fprintf(stderr, "#BBPsync (ready time %d)\n" , (t0 = GDKms()) - t1); |
3449 | GDKfree(bakdir); |
3450 | GDKfree(deldir); |
3451 | return ret; |
3452 | } |
3453 | |
3454 | /* |
3455 | * Recovery just moves all files back to their original location. this |
3456 | * is an incremental process: if something fails, just stop with still |
3457 | * files left for moving in BACKUP/. The recovery process can resume |
3458 | * later with the left over files. |
3459 | */ |
3460 | static gdk_return |
3461 | force_move(int farmid, const char *srcdir, const char *dstdir, const char *name) |
3462 | { |
3463 | const char *p; |
3464 | char *dstpath, *killfile; |
3465 | gdk_return ret = GDK_SUCCEED; |
3466 | |
3467 | if ((p = strrchr(name, '.')) != NULL && strcmp(p, ".kill" ) == 0) { |
3468 | /* Found a X.new.kill file, ie remove the X.new file */ |
3469 | ptrdiff_t len = p - name; |
3470 | long_str srcpath; |
3471 | |
3472 | strncpy(srcpath, name, len); |
3473 | srcpath[len] = '\0'; |
3474 | if(!(dstpath = GDKfilepath(farmid, dstdir, srcpath, NULL))) { |
3475 | GDKsyserror("force_move: malloc fail\n" ); |
3476 | return GDK_FAIL; |
3477 | } |
3478 | |
3479 | /* step 1: remove the X.new file that is going to be |
3480 | * overridden by X */ |
3481 | if (remove(dstpath) != 0 && errno != ENOENT) { |
3482 | /* if it exists and cannot be removed, all |
3483 | * this is going to fail */ |
3484 | GDKsyserror("force_move: remove(%s)\n" , dstpath); |
3485 | GDKfree(dstpath); |
3486 | return GDK_FAIL; |
3487 | } |
3488 | GDKfree(dstpath); |
3489 | |
3490 | /* step 2: now remove the .kill file. This one is |
3491 | * crucial, otherwise we'll never finish recovering */ |
3492 | if(!(killfile = GDKfilepath(farmid, srcdir, name, NULL))) { |
3493 | GDKsyserror("force_move: malloc fail\n" ); |
3494 | return GDK_FAIL; |
3495 | } |
3496 | if (remove(killfile) != 0) { |
3497 | ret = GDK_FAIL; |
3498 | GDKsyserror("force_move: remove(%s)\n" , killfile); |
3499 | } |
3500 | GDKfree(killfile); |
3501 | return ret; |
3502 | } |
3503 | /* try to rename it */ |
3504 | ret = GDKmove(farmid, srcdir, name, NULL, dstdir, name, NULL); |
3505 | |
3506 | if (ret != GDK_SUCCEED) { |
3507 | char *srcpath; |
3508 | |
3509 | /* two legal possible causes: file exists or dir |
3510 | * doesn't exist */ |
3511 | if(!(dstpath = GDKfilepath(farmid, dstdir, name, NULL))) |
3512 | return GDK_FAIL; |
3513 | if(!(srcpath = GDKfilepath(farmid, srcdir, name, NULL))) { |
3514 | GDKfree(dstpath); |
3515 | return GDK_FAIL; |
3516 | } |
3517 | if (remove(dstpath) != 0) /* clear destination */ |
3518 | ret = GDK_FAIL; |
3519 | IODEBUG fprintf(stderr, "#remove %s = %d\n" , dstpath, (int) ret); |
3520 | |
3521 | (void) GDKcreatedir(dstdir); /* if fails, move will fail */ |
3522 | ret = GDKmove(farmid, srcdir, name, NULL, dstdir, name, NULL); |
3523 | IODEBUG fprintf(stderr, "#link %s %s = %d\n" , srcpath, dstpath, (int) ret); |
3524 | GDKfree(dstpath); |
3525 | GDKfree(srcpath); |
3526 | } |
3527 | return ret; |
3528 | } |
3529 | |
3530 | gdk_return |
3531 | BBPrecover(int farmid) |
3532 | { |
3533 | str bakdirpath; |
3534 | str leftdirpath; |
3535 | DIR *dirp; |
3536 | struct dirent *dent; |
3537 | long_str path, dstpath; |
3538 | bat i; |
3539 | size_t j = strlen(BATDIR); |
3540 | gdk_return ret = GDK_SUCCEED; |
3541 | bool dirseen = false; |
3542 | str dstdir; |
3543 | |
3544 | bakdirpath = GDKfilepath(farmid, NULL, BAKDIR, NULL); |
3545 | leftdirpath = GDKfilepath(farmid, NULL, LEFTDIR, NULL); |
3546 | if (bakdirpath == NULL || leftdirpath == NULL) { |
3547 | GDKfree(bakdirpath); |
3548 | GDKfree(leftdirpath); |
3549 | return GDK_FAIL; |
3550 | } |
3551 | dirp = opendir(bakdirpath); |
3552 | if (dirp == NULL) { |
3553 | GDKfree(bakdirpath); |
3554 | GDKfree(leftdirpath); |
3555 | return GDK_SUCCEED; /* nothing to do */ |
3556 | } |
3557 | memcpy(dstpath, BATDIR, j); |
3558 | dstpath[j] = DIR_SEP; |
3559 | dstpath[++j] = 0; |
3560 | dstdir = dstpath + j; |
3561 | IODEBUG fprintf(stderr, "#BBPrecover(start)\n" ); |
3562 | |
3563 | if (mkdir(leftdirpath, MONETDB_DIRMODE) < 0 && errno != EEXIST) { |
3564 | GDKsyserror("BBPrecover: cannot create directory %s\n" , leftdirpath); |
3565 | closedir(dirp); |
3566 | GDKfree(bakdirpath); |
3567 | GDKfree(leftdirpath); |
3568 | return GDK_FAIL; |
3569 | } |
3570 | |
3571 | /* move back all files */ |
3572 | while ((dent = readdir(dirp)) != NULL) { |
3573 | const char *q = strchr(dent->d_name, '.'); |
3574 | |
3575 | if (q == dent->d_name) { |
3576 | char *fn; |
3577 | |
3578 | if (strcmp(dent->d_name, "." ) == 0 || |
3579 | strcmp(dent->d_name, ".." ) == 0) |
3580 | continue; |
3581 | fn = GDKfilepath(farmid, BAKDIR, dent->d_name, NULL); |
3582 | if (fn) { |
3583 | int uret = remove(fn); |
3584 | IODEBUG fprintf(stderr, "#remove %s = %d\n" , |
3585 | fn, uret); |
3586 | GDKfree(fn); |
3587 | } |
3588 | continue; |
3589 | } else if (strcmp(dent->d_name, "BBP.dir" ) == 0) { |
3590 | dirseen = true; |
3591 | continue; |
3592 | } |
3593 | if (q == NULL) |
3594 | q = dent->d_name + strlen(dent->d_name); |
3595 | if ((j = q - dent->d_name) + 1 > sizeof(path)) { |
3596 | /* name too long: ignore */ |
3597 | continue; |
3598 | } |
3599 | strncpy(path, dent->d_name, j); |
3600 | path[j] = 0; |
3601 | if (GDKisdigit(*path)) { |
3602 | i = strtol(path, NULL, 8); |
3603 | } else { |
3604 | i = BBP_find(path, false); |
3605 | if (i < 0) |
3606 | i = -i; |
3607 | } |
3608 | if (i == 0 || i >= (bat) ATOMIC_GET(&BBPsize) || !BBPvalid(i)) { |
3609 | force_move(farmid, BAKDIR, LEFTDIR, dent->d_name); |
3610 | } else { |
3611 | BBPgetsubdir(dstdir, i); |
3612 | if (force_move(farmid, BAKDIR, dstpath, dent->d_name) != GDK_SUCCEED) |
3613 | ret = GDK_FAIL; |
3614 | } |
3615 | } |
3616 | closedir(dirp); |
3617 | if (dirseen && ret == GDK_SUCCEED) { /* we have a saved BBP.dir; it should be moved back!! */ |
3618 | struct stat st; |
3619 | char *fn; |
3620 | |
3621 | fn = GDKfilepath(farmid, BATDIR, "BBP" , "dir" ); |
3622 | ret = recover_dir(farmid, stat(fn, &st) == 0); |
3623 | GDKfree(fn); |
3624 | } |
3625 | |
3626 | if (ret == GDK_SUCCEED) { |
3627 | if (rmdir(bakdirpath) < 0) { |
3628 | GDKsyserror("BBPrecover: cannot remove directory %s\n" , bakdirpath); |
3629 | ret = GDK_FAIL; |
3630 | } |
3631 | IODEBUG fprintf(stderr, "#rmdir %s = %d\n" , bakdirpath, (int) ret); |
3632 | } |
3633 | if (ret != GDK_SUCCEED) |
3634 | GDKerror("BBPrecover: recovery failed. Please check whether your disk is full or write-protected.\n" ); |
3635 | |
3636 | IODEBUG fprintf(stderr, "#BBPrecover(end)\n" ); |
3637 | GDKfree(bakdirpath); |
3638 | GDKfree(leftdirpath); |
3639 | return ret; |
3640 | } |
3641 | |
3642 | /* |
3643 | * SUBDIR recovery is quite mindlessly moving all files back to the |
3644 | * parent (BAKDIR). We do recognize moving back BBP.dir and set |
3645 | * backed_up_subdir accordingly. |
3646 | */ |
3647 | gdk_return |
3648 | BBPrecover_subdir(void) |
3649 | { |
3650 | str subdirpath; |
3651 | DIR *dirp; |
3652 | struct dirent *dent; |
3653 | gdk_return ret = GDK_SUCCEED; |
3654 | |
3655 | subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL); |
3656 | if (subdirpath == NULL) |
3657 | return GDK_FAIL; |
3658 | dirp = opendir(subdirpath); |
3659 | GDKfree(subdirpath); |
3660 | if (dirp == NULL) { |
3661 | return GDK_SUCCEED; /* nothing to do */ |
3662 | } |
3663 | IODEBUG fprintf(stderr, "#BBPrecover_subdir(start)\n" ); |
3664 | |
3665 | /* move back all files */ |
3666 | while ((dent = readdir(dirp)) != NULL) { |
3667 | if (dent->d_name[0] == '.') |
3668 | continue; |
3669 | ret = GDKmove(0, SUBDIR, dent->d_name, NULL, BAKDIR, dent->d_name, NULL); |
3670 | if (ret == GDK_SUCCEED && strcmp(dent->d_name, "BBP.dir" ) == 0) |
3671 | backup_dir = 1; |
3672 | if (ret != GDK_SUCCEED) |
3673 | break; |
3674 | } |
3675 | closedir(dirp); |
3676 | |
3677 | /* delete the directory */ |
3678 | if (ret == GDK_SUCCEED) { |
3679 | ret = GDKremovedir(0, SUBDIR); |
3680 | if (backup_dir == 2) { |
3681 | IODEBUG fprintf(stderr, "#BBPrecover_subdir: %s%cBBP.dir had disappeared!" , SUBDIR, DIR_SEP); |
3682 | backup_dir = 0; |
3683 | } |
3684 | } |
3685 | IODEBUG fprintf(stderr, "#BBPrecover_subdir(end) = %d\n" , (int) ret); |
3686 | |
3687 | if (ret != GDK_SUCCEED) |
3688 | GDKerror("BBPrecover_subdir: recovery failed. Please check whether your disk is full or write-protected.\n" ); |
3689 | return ret; |
3690 | } |
3691 | |
3692 | /* |
3693 | * @- The diskscan |
3694 | * The BBPdiskscan routine walks through the BAT dir, cleans up |
3695 | * leftovers, and measures disk occupancy. Leftovers are files that |
3696 | * cannot belong to a BAT. in order to establish this for [ht]heap |
3697 | * files, the BAT descriptor is loaded in order to determine whether |
3698 | * these files are still required. |
3699 | * |
3700 | * The routine gathers all bat sizes in a bat that contains bat-ids |
3701 | * and bytesizes. The return value is the number of bytes of space |
3702 | * freed. |
3703 | */ |
3704 | static bool |
3705 | persistent_bat(bat bid) |
3706 | { |
3707 | if (bid >= 0 && bid < (bat) ATOMIC_GET(&BBPsize) && BBPvalid(bid)) { |
3708 | BAT *b = BBP_cache(bid); |
3709 | |
3710 | if (b == NULL || b->batCopiedtodisk) { |
3711 | return true; |
3712 | } |
3713 | } |
3714 | return false; |
3715 | } |
3716 | |
3717 | static BAT * |
3718 | getdesc(bat bid) |
3719 | { |
3720 | BAT *b = BBPgetdesc(bid); |
3721 | |
3722 | if (b == NULL) |
3723 | BBPclear(bid); |
3724 | return b; |
3725 | } |
3726 | |
3727 | static bool |
3728 | BBPdiskscan(const char *parent, size_t baseoff) |
3729 | { |
3730 | DIR *dirp = opendir(parent); |
3731 | struct dirent *dent; |
3732 | char fullname[FILENAME_MAX]; |
3733 | str dst = fullname; |
3734 | size_t dstlen = sizeof(fullname); |
3735 | const char *src = parent; |
3736 | |
3737 | if (dirp == NULL) |
3738 | return true; /* nothing to do */ |
3739 | |
3740 | while (*src) { |
3741 | *dst++ = *src++; |
3742 | dstlen--; |
3743 | } |
3744 | if (dst > fullname && dst[-1] != DIR_SEP) { |
3745 | *dst++ = DIR_SEP; |
3746 | dstlen--; |
3747 | } |
3748 | |
3749 | while ((dent = readdir(dirp)) != NULL) { |
3750 | const char *p; |
3751 | bat bid; |
3752 | bool ok, delete; |
3753 | |
3754 | if (dent->d_name[0] == '.') |
3755 | continue; /* ignore .dot files and directories (. ..) */ |
3756 | |
3757 | if (strncmp(dent->d_name, "BBP." , 4) == 0 && |
3758 | (strcmp(parent + baseoff, BATDIR) == 0 || |
3759 | strncmp(parent + baseoff, BAKDIR, strlen(BAKDIR)) == 0 || |
3760 | strncmp(parent + baseoff, SUBDIR, strlen(SUBDIR)) == 0)) |
3761 | continue; |
3762 | |
3763 | p = strchr(dent->d_name, '.'); |
3764 | |
3765 | if (strlen(dent->d_name) >= dstlen) { |
3766 | /* found a file with too long a name |
3767 | (i.e. unknown); stop pruning in this |
3768 | subdir */ |
3769 | fprintf(stderr, "BBPdiskscan: unexpected file %s, leaving %s.\n" , dent->d_name, parent); |
3770 | break; |
3771 | } |
3772 | strncpy(dst, dent->d_name, dstlen); |
3773 | fullname[sizeof(fullname) - 1] = 0; |
3774 | |
3775 | if (p == NULL && !BBPdiskscan(fullname, baseoff)) { |
3776 | /* it was a directory */ |
3777 | continue; |
3778 | } |
3779 | |
3780 | if (p && strcmp(p + 1, "tmp" ) == 0) { |
3781 | delete = true; |
3782 | ok = true; |
3783 | bid = 0; |
3784 | } else { |
3785 | bid = strtol(dent->d_name, NULL, 8); |
3786 | ok = p && bid; |
3787 | delete = false; |
3788 | |
3789 | if (!ok || !persistent_bat(bid)) { |
3790 | delete = true; |
3791 | } else if (strncmp(p + 1, "tail" , 4) == 0) { |
3792 | BAT *b = getdesc(bid); |
3793 | delete = (b == NULL || !b->ttype || !b->batCopiedtodisk); |
3794 | } else if (strncmp(p + 1, "theap" , 5) == 0) { |
3795 | BAT *b = getdesc(bid); |
3796 | delete = (b == NULL || !b->tvheap || !b->batCopiedtodisk); |
3797 | } else if (strncmp(p + 1, "thash" , 5) == 0) { |
3798 | #ifdef PERSISTENTHASH |
3799 | BAT *b = getdesc(bid); |
3800 | delete = b == NULL; |
3801 | if (!delete) |
3802 | b->thash = (Hash *) 1; |
3803 | #else |
3804 | delete = true; |
3805 | #endif |
3806 | } else if (strncmp(p + 1, "timprints" , 9) == 0) { |
3807 | BAT *b = getdesc(bid); |
3808 | delete = b == NULL; |
3809 | if (!delete) |
3810 | b->timprints = (Imprints *) 1; |
3811 | } else if (strncmp(p + 1, "torderidx" , 9) == 0) { |
3812 | #ifdef PERSISTENTIDX |
3813 | BAT *b = getdesc(bid); |
3814 | delete = b == NULL; |
3815 | if (!delete) |
3816 | b->torderidx = (Heap *) 1; |
3817 | #else |
3818 | delete = true; |
3819 | #endif |
3820 | } else if (strncmp(p + 1, "new" , 3) != 0) { |
3821 | ok = false; |
3822 | } |
3823 | } |
3824 | if (!ok) { |
3825 | /* found an unknown file; stop pruning in this |
3826 | * subdir */ |
3827 | fprintf(stderr, "BBPdiskscan: unexpected file %s, leaving %s.\n" , dent->d_name, parent); |
3828 | break; |
3829 | } |
3830 | if (delete) { |
3831 | if (remove(fullname) != 0 && errno != ENOENT) { |
3832 | GDKsyserror("BBPdiskscan: remove(%s)" , fullname); |
3833 | continue; |
3834 | } |
3835 | IODEBUG fprintf(stderr, "#BBPcleanup: remove(%s) = 0\n" , fullname); |
3836 | } |
3837 | } |
3838 | closedir(dirp); |
3839 | return false; |
3840 | } |
3841 | |
3842 | void |
3843 | gdk_bbp_reset(void) |
3844 | { |
3845 | int i; |
3846 | |
3847 | while (BBPlimit > 0) { |
3848 | BBPlimit -= BBPINIT; |
3849 | assert(BBPlimit >= 0); |
3850 | GDKfree(BBP[BBPlimit >> BBPINITLOG]); |
3851 | BBP[BBPlimit >> BBPINITLOG] = NULL; |
3852 | } |
3853 | ATOMIC_SET(&BBPsize, 0); |
3854 | for (i = 0; i < MAXFARMS; i++) |
3855 | GDKfree((void *) BBPfarms[i].dirname); /* loose "const" */ |
3856 | memset(BBPfarms, 0, sizeof(BBPfarms)); |
3857 | BBP_hash = 0; |
3858 | BBP_mask = 0; |
3859 | |
3860 | locked_by = 0; |
3861 | BBPunloadCnt = 0; |
3862 | backup_files = 0; |
3863 | backup_dir = 0; |
3864 | backup_subdir = 0; |
3865 | } |
3866 | |