1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/*
10 * @a M. L. Kersten, P. Boncz, N. Nes
11 *
12 * @* Database Storage Management
13 * Contains routines for writing and reading GDK data to and from
14 * disk. This section contains the primitives to manage the
15 * disk-based images of the BATs. It relies on the existence of a UNIX
16 * file system, including memory mapped files. Solaris and IRIX have
17 * different implementations of madvise().
18 *
19 * The current version assumes that all BATs are stored on a single
20 * disk partition. This simplistic assumption should be replaced in
21 * the near future by a multi-volume version. The intention is to use
22 * several BAT home locations. The files should be owned by the
23 * database server. Otherwise, IO operations are likely to fail. This
24 * is accomplished by setting the GID and UID upon system start.
25 */
26#include "monetdb_config.h"
27#include "gdk.h"
28#include "gdk_private.h"
29#include "gdk_storage.h"
30#include "mutils.h"
31#ifdef HAVE_FCNTL_H
32#include <fcntl.h>
33#endif
34
35#ifndef O_CLOEXEC
36#define O_CLOEXEC 0
37#endif
38
39/* GDKfilepath returns a newly allocated string containing the path
40 * name of a database farm.
41 * The arguments are the farmID or -1, the name of a subdirectory
42 * within the farm (i.e., something like BATDIR or BAKDIR -- see
43 * gdk.h) or NULL, the name of a BAT (i.e. the name that is stored in
44 * BBP.dir -- something like 07/714), and finally the file extension.
45 *
46 * If farmid is >= 0, GDKfilepath returns the complete path to the
47 * specified farm concatenated with the other arguments with
48 * appropriate separators. If farmid is -1, it returns the
49 * concatenation of its other arguments (in this case, the result
50 * cannot be used to access a file directly -- the farm needs to be
51 * prepended in some other place). */
52char *
53GDKfilepath(int farmid, const char *dir, const char *name, const char *ext)
54{
55 const char *sep;
56 size_t pathlen;
57 char *path;
58
59 if (GDKinmemory())
60 return GDKstrdup(":inmemory");
61
62 assert(dir == NULL || *dir != DIR_SEP);
63 assert(farmid == NOFARM ||
64 (farmid >= 0 && farmid < MAXFARMS && BBPfarms[farmid].dirname));
65 if (MT_path_absolute(name)) {
66 GDKerror("GDKfilepath: name should not be absolute\n");
67 return NULL;
68 }
69 if (dir && *dir == DIR_SEP)
70 dir++;
71 if (dir == NULL || dir[0] == 0 || dir[strlen(dir) - 1] == DIR_SEP) {
72 sep = "";
73 } else {
74 sep = DIR_SEP_STR;
75 }
76 pathlen = (farmid == NOFARM ? 0 : strlen(BBPfarms[farmid].dirname) + 1) +
77 (dir ? strlen(dir) : 0) + strlen(sep) + strlen(name) +
78 (ext ? strlen(ext) + 1 : 0) + 1;
79 path = GDKmalloc(pathlen);
80 if (path == NULL)
81 return NULL;
82 if (farmid == NOFARM) {
83 strconcat_len(path, pathlen,
84 dir ? dir : "", sep, name,
85 ext ? "." : NULL, ext, NULL);
86 } else {
87 strconcat_len(path, pathlen,
88 BBPfarms[farmid].dirname, DIR_SEP_STR,
89 dir ? dir : "", sep, name,
90 ext ? "." : NULL, ext, NULL);
91 }
92 return path;
93}
94
95/* make sure the parent directory of DIR exists (the argument itself
96 * is usually a file that is to be created) */
97gdk_return
98GDKcreatedir(const char *dir)
99{
100 char path[FILENAME_MAX];
101 char *r;
102 DIR *dirp;
103
104 IODEBUG fprintf(stderr, "#GDKcreatedir(%s)\n", dir);
105 assert(!GDKinmemory());
106 assert(MT_path_absolute(dir));
107 if (strlen(dir) >= FILENAME_MAX) {
108 GDKerror("GDKcreatedir: directory name too long\n");
109 return GDK_FAIL;
110 }
111 strcpy(path, dir); /* we know this fits (see above) */
112 /* skip initial /, if any */
113 for (r = strchr(path + 1, DIR_SEP); r; r = strchr(r, DIR_SEP)) {
114 *r = 0;
115 if (
116#ifdef WIN32
117 strlen(path) > 3 &&
118#endif
119 mkdir(path, MONETDB_DIRMODE) < 0) {
120 if (errno != EEXIST) {
121 GDKsyserror("GDKcreatedir: cannot create directory %s\n", path);
122 IODEBUG fprintf(stderr, "#GDKcreatedir: mkdir(%s) failed\n", path);
123 return GDK_FAIL;
124 }
125 if ((dirp = opendir(path)) == NULL) {
126 GDKsyserror("GDKcreatedir: %s not a directory\n", path);
127 IODEBUG fprintf(stderr, "#GDKcreatedir: opendir(%s) failed\n", path);
128 return GDK_FAIL;
129 }
130 /* it's a directory, we can continue */
131 closedir(dirp);
132 }
133 *r++ = DIR_SEP;
134 }
135 return GDK_SUCCEED;
136}
137
138/* remove the directory DIRNAME with its file contents; does not
139 * recurse into subdirectories */
140gdk_return
141GDKremovedir(int farmid, const char *dirname)
142{
143 str dirnamestr;
144 DIR *dirp;
145 char *path;
146 struct dirent *dent;
147 int ret;
148
149 assert(!GDKinmemory());
150 if ((dirnamestr = GDKfilepath(farmid, NULL, dirname, NULL)) == NULL)
151 return GDK_FAIL;
152
153 IODEBUG fprintf(stderr, "#GDKremovedir(%s)\n", dirnamestr);
154
155 if ((dirp = opendir(dirnamestr)) == NULL) {
156 GDKfree(dirnamestr);
157 return GDK_SUCCEED;
158 }
159 while ((dent = readdir(dirp)) != NULL) {
160 if (dent->d_name[0] == '.' &&
161 (dent->d_name[1] == 0 ||
162 (dent->d_name[1] == '.' && dent->d_name[2] == 0))) {
163 /* skip . and .. */
164 continue;
165 }
166 path = GDKfilepath(farmid, dirname, dent->d_name, NULL);
167 ret = remove(path);
168 IODEBUG fprintf(stderr, "#remove %s = %d\n", path, ret);
169 GDKfree(path);
170 }
171 closedir(dirp);
172 ret = rmdir(dirnamestr);
173 if (ret != 0)
174 GDKsyserror("GDKremovedir: rmdir(%s) failed.\n", dirnamestr);
175 IODEBUG fprintf(stderr, "#rmdir %s = %d\n", dirnamestr, ret);
176 GDKfree(dirnamestr);
177 return ret ? GDK_FAIL : GDK_SUCCEED;
178}
179
180#define _FUNBUF 0x040000
181#define _FWRTHR 0x080000
182#define _FRDSEQ 0x100000
183
184/* open a file and return its file descriptor; the file is specified
185 * using farmid, name and extension; if opening for writing, we create
186 * the parent directory if necessary */
187int
188GDKfdlocate(int farmid, const char *nme, const char *mode, const char *extension)
189{
190 char *path = NULL;
191 int fd, flags = O_CLOEXEC;
192
193 assert(!GDKinmemory());
194 if (nme == NULL || *nme == 0)
195 return -1;
196
197 assert(farmid != NOFARM || extension == NULL);
198 if (farmid != NOFARM) {
199 path = GDKfilepath(farmid, BATDIR, nme, extension);
200 if (path == NULL)
201 return -1;
202 nme = path;
203 }
204
205 if (*mode == 'm') { /* file open for mmap? */
206 mode++;
207#ifdef _CYGNUS_H_
208 } else {
209 flags |= _FRDSEQ; /* WIN32 CreateFile(FILE_FLAG_SEQUENTIAL_SCAN) */
210#endif
211 }
212
213 if (strchr(mode, 'w')) {
214 flags |= O_WRONLY | O_CREAT;
215 } else if (!strchr(mode, '+')) {
216 flags |= O_RDONLY;
217 } else {
218 flags |= O_RDWR;
219 }
220#ifdef WIN32
221 flags |= strchr(mode, 'b') ? O_BINARY : O_TEXT;
222#endif
223 fd = open(nme, flags, MONETDB_MODE);
224 if (fd < 0 && *mode == 'w') {
225 /* try to create the directory, in case that was the problem */
226 if (GDKcreatedir(nme) == GDK_SUCCEED) {
227 fd = open(nme, flags, MONETDB_MODE);
228 if (fd < 0)
229 GDKsyserror("GDKfdlocate: cannot open file %s\n", nme);
230 }
231 }
232 /* don't generate error if we can't open a file for reading */
233 GDKfree(path);
234 return fd;
235}
236
237/* like GDKfdlocate, except return a FILE pointer */
238FILE *
239GDKfilelocate(int farmid, const char *nme, const char *mode, const char *extension)
240{
241 int fd;
242 FILE *f;
243
244 if ((fd = GDKfdlocate(farmid, nme, mode, extension)) < 0)
245 return NULL;
246 if (*mode == 'm')
247 mode++;
248 if ((f = fdopen(fd, mode)) == NULL) {
249 GDKsyserror("GDKfilelocate: cannot fdopen file\n");
250 close(fd);
251 return NULL;
252 }
253 return f;
254}
255
256FILE *
257GDKfileopen(int farmid, const char *dir, const char *name, const char *extension, const char *mode)
258{
259 char *path;
260
261 /* if name is null, try to get one from dir (in case it was a path) */
262 path = GDKfilepath(farmid, dir, name, extension);
263
264 if (path != NULL) {
265 FILE *f;
266 IODEBUG fprintf(stderr, "#GDKfileopen(%s)\n", path);
267 f = fopen(path, mode);
268 GDKfree(path);
269 return f;
270 }
271 return NULL;
272}
273
274/* remove the file */
275gdk_return
276GDKunlink(int farmid, const char *dir, const char *nme, const char *ext)
277{
278 if (nme && *nme) {
279 char *path;
280
281 path = GDKfilepath(farmid, dir, nme, ext);
282 if (path == NULL)
283 return GDK_FAIL;
284 /* if file already doesn't exist, we don't care */
285 if (remove(path) != 0 && errno != ENOENT) {
286 GDKsyserror("GDKunlink(%s)\n", path);
287 IODEBUG fprintf(stderr, "#remove %s = -1\n", path);
288 GDKfree(path);
289 return GDK_FAIL;
290 }
291 GDKfree(path);
292 return GDK_SUCCEED;
293 }
294 return GDK_FAIL;
295}
296
297/*
298 * A move routine is overloaded to deal with extensions.
299 */
300gdk_return
301GDKmove(int farmid, const char *dir1, const char *nme1, const char *ext1, const char *dir2, const char *nme2, const char *ext2)
302{
303 char *path1;
304 char *path2;
305 int ret, t0 = 0;
306
307 IODEBUG t0 = GDKms();
308
309 if ((nme1 == NULL) || (*nme1 == 0)) {
310 GDKerror("GDKmove: no file specified\n");
311 return GDK_FAIL;
312 }
313 path1 = GDKfilepath(farmid, dir1, nme1, ext1);
314 path2 = GDKfilepath(farmid, dir2, nme2, ext2);
315 if (path1 && path2) {
316 ret = rename(path1, path2);
317 if (ret < 0)
318 GDKsyserror("GDKmove: cannot rename %s to %s\n", path1, path2);
319
320 IODEBUG fprintf(stderr, "#move %s %s = %d (%dms)\n", path1, path2, ret, GDKms() - t0);
321 } else {
322 ret = -1;
323 }
324 GDKfree(path1);
325 GDKfree(path2);
326 return ret < 0 ? GDK_FAIL : GDK_SUCCEED;
327}
328
329gdk_return
330GDKextendf(int fd, size_t size, const char *fn)
331{
332 struct stat stb;
333 int rt = 0;
334 int t0 = 0;
335
336 assert(!GDKinmemory());
337#ifdef STATIC_CODE_ANALYSIS
338 if (fd < 0) /* in real life, if fd < 0, fstat will fail */
339 return GDK_FAIL;
340#endif
341 if (fstat(fd, &stb) < 0) {
342 /* shouldn't happen */
343 GDKsyserror("GDKextendf: fstat unexpectedly failed\n");
344 return GDK_FAIL;
345 }
346 /* if necessary, extend the underlying file */
347 IODEBUG t0 = GDKms();
348 if (stb.st_size < (off_t) size) {
349#ifdef HAVE_FALLOCATE
350 if ((rt = fallocate(fd, 0, stb.st_size, (off_t) size - stb.st_size)) < 0 &&
351 errno == EOPNOTSUPP)
352 /* on Linux, posix_fallocate uses a slow
353 * method to allocate blocks if the underlying
354 * file system doesn't support the operation,
355 * so use fallocate instead and just resize
356 * the file if it fails */
357#else
358#ifdef HAVE_POSIX_FALLOCATE
359 /* posix_fallocate returns error number on failure,
360 * not -1 :-( */
361 if ((rt = posix_fallocate(fd, stb.st_size, (off_t) size - stb.st_size)) == EINVAL)
362 /* on Solaris/OpenIndiana, this may mean that
363 * the underlying file system doesn't support
364 * the operation, so just resize the file */
365#endif
366#endif
367 /* we get here when (posix_)fallocate fails because it
368 * is not supported on the file system, or if neither
369 * function exists */
370 rt = ftruncate(fd, (off_t) size);
371 if (rt != 0) {
372 /* extending failed, try to reduce file size
373 * back to original */
374 int err = errno;
375 if (ftruncate(fd, stb.st_size))
376 perror("ftruncate");
377 errno = err; /* restore for error message */
378 GDKsyserror("GDKextendf: could not extend file\n");
379 }
380 }
381 IODEBUG fprintf(stderr, "#GDKextend %s %zu -> %zu %dms%s\n",
382 fn, (size_t) stb.st_size, size,
383 GDKms() - t0, rt != 0 ? " (failed)" : "");
384 /* posix_fallocate returns != 0 on failure, fallocate and
385 * ftruncate return -1 on failure, but all three return 0 on
386 * success */
387 return rt != 0 ? GDK_FAIL : GDK_SUCCEED;
388}
389
390gdk_return
391GDKextend(const char *fn, size_t size)
392{
393 int fd, flags = O_RDWR;
394 gdk_return rt = GDK_FAIL;
395
396 assert(!GDKinmemory());
397#ifdef O_BINARY
398 /* On Windows, open() fails if the file is bigger than 2^32
399 * bytes without O_BINARY. */
400 flags |= O_BINARY;
401#endif
402 if ((fd = open(fn, flags | O_CLOEXEC)) >= 0) {
403 rt = GDKextendf(fd, size, fn);
404 close(fd);
405 } else {
406 GDKsyserror("GDKextend: cannot open file %s\n", fn);
407 }
408 return rt;
409}
410
411/*
412 * @+ Save and load.
413 * The BAT is saved on disk in several files. The extension DESC
414 * denotes the descriptor, BUNs the bun heap, and HHEAP and THEAP the
415 * other heaps. The storage mechanism off a file can be memory mapped
416 * (STORE_MMAP) or malloced (STORE_MEM).
417 *
418 * These modes indicates the disk-layout and the intended mapping.
419 * The primary concern here is to handle STORE_MMAP and STORE_MEM.
420 */
421gdk_return
422GDKsave(int farmid, const char *nme, const char *ext, void *buf, size_t size, storage_t mode, bool dosync)
423{
424 int err = 0;
425
426 IODEBUG fprintf(stderr, "#GDKsave: name=%s, ext=%s, mode %d, dosync=%d\n", nme, ext ? ext : "", (int) mode, dosync);
427
428 assert(!GDKinmemory());
429 if (mode == STORE_MMAP) {
430 if (dosync && size && !(GDKdebug & NOSYNCMASK) && MT_msync(buf, size) < 0)
431 err = -1;
432 if (err)
433 GDKsyserror("GDKsave: error on: name=%s, ext=%s, "
434 "mode=%d\n", nme, ext ? ext : "",
435 (int) mode);
436 IODEBUG fprintf(stderr,
437 "#MT_msync(buf %p, size %zu"
438 ") = %d\n",
439 buf, size, err);
440 } else {
441 int fd;
442
443 if ((fd = GDKfdlocate(farmid, nme, "wb", ext)) >= 0) {
444 /* write() on 64-bits Redhat for IA64 returns
445 * 32-bits signed result (= OS BUG)! write()
446 * on Windows only takes unsigned int as
447 * size */
448 while (size > 0) {
449 /* circumvent problems by writing huge
450 * buffers in chunks <= 1GiB */
451 ssize_t ret;
452
453 ret = write(fd, buf,
454 (unsigned) MIN(1 << 30, size));
455 if (ret < 0) {
456 err = -1;
457 GDKsyserror("GDKsave: error %zd"
458 " on: name=%s, ext=%s, "
459 "mode=%d\n", ret, nme,
460 ext ? ext : "", (int) mode);
461 break;
462 }
463 size -= ret;
464 buf = (void *) ((char *) buf + ret);
465 IODEBUG fprintf(stderr,
466 "#write(fd %d, buf %p"
467 ", size %u) = %zd\n",
468 fd, buf,
469 (unsigned) MIN(1 << 30, size),
470 ret);
471 }
472 if (dosync && !(GDKdebug & NOSYNCMASK)
473#if defined(NATIVE_WIN32)
474 && _commit(fd) < 0
475#elif defined(HAVE_FDATASYNC)
476 && fdatasync(fd) < 0
477#elif defined(HAVE_FSYNC)
478 && fsync(fd) < 0
479#endif
480 ) {
481 GDKsyserror("GDKsave: error on: name=%s, "
482 "ext=%s, mode=%d\n", nme,
483 ext ? ext : "", (int) mode);
484 err = -1;
485 }
486 err |= close(fd);
487 if (err && GDKunlink(farmid, BATDIR, nme, ext) != GDK_SUCCEED) {
488 /* do not tolerate corrupt heap images
489 * (BBPrecover on restart will kill
490 * them) */
491 GDKerror("GDKsave: could not remove: name=%s, "
492 "ext=%s, mode %d\n", nme,
493 ext ? ext : "", (int) mode);
494 return GDK_FAIL;
495 }
496 } else {
497 err = -1;
498 GDKerror("GDKsave: failed name=%s, ext=%s, mode %d\n",
499 nme, ext ? ext : "", (int) mode);
500 }
501 }
502 return err ? GDK_FAIL : GDK_SUCCEED;
503}
504
505/*
506 * Space for the load is directly allocated and the heaps are mapped.
507 * Further initialization of the atom heaps require a separate action
508 * defined in their implementation.
509 *
510 * size -- how much to read
511 * *maxsize -- (in/out) how much to allocate / how much was allocated
512 */
513char *
514GDKload(int farmid, const char *nme, const char *ext, size_t size, size_t *maxsize, storage_t mode)
515{
516 char *ret = NULL;
517
518 assert(!GDKinmemory());
519 assert(size <= *maxsize);
520 assert(farmid != NOFARM || ext == NULL);
521 IODEBUG {
522 fprintf(stderr, "#GDKload: name=%s, ext=%s, mode %d\n", nme, ext ? ext : "", (int) mode);
523 }
524 if (mode == STORE_MEM) {
525 int fd = GDKfdlocate(farmid, nme, "rb", ext);
526
527 if (fd >= 0) {
528 char *dst = ret = GDKmalloc(*maxsize);
529 ssize_t n_expected, n = 0;
530
531 if (ret) {
532 /* read in chunks, some OSs do not
533 * give you all at once and Windows
534 * only accepts int */
535 for (n_expected = (ssize_t) size; n_expected > 0; n_expected -= n) {
536 n = read(fd, dst, (unsigned) MIN(1 << 30, n_expected));
537 if (n < 0)
538 GDKsyserror("GDKload: cannot read: name=%s, ext=%s, %zu bytes missing.\n", nme, ext ? ext : "", (size_t) n_expected);
539#ifndef STATIC_CODE_ANALYSIS
540 /* Coverity doesn't seem to
541 * recognize that we're just
542 * printing the value of ptr,
543 * not its contents */
544 IODEBUG fprintf(stderr, "#read(dst %p, n_expected %zd, fd %d) = %zd\n", (void *)dst, n_expected, fd, n);
545#endif
546
547 if (n <= 0)
548 break;
549 dst += n;
550 }
551 if (n_expected > 0) {
552 /* we couldn't read all, error
553 * already generated */
554 GDKfree(ret);
555 ret = NULL;
556 }
557#ifndef NDEBUG
558 /* just to make valgrind happy, we
559 * initialize the whole thing */
560 if (ret && *maxsize > size)
561 memset(ret + size, 0, *maxsize - size);
562#endif
563 }
564 close(fd);
565 } else {
566 GDKerror("GDKload: cannot open: name=%s, ext=%s\n", nme, ext ? ext : "");
567 }
568 } else {
569 char *path = NULL;
570
571 /* round up to multiple of GDK_mmap_pagesize with a
572 * minimum of one */
573 size = (*maxsize + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1);
574 if (size == 0)
575 size = GDK_mmap_pagesize;
576 if (farmid != NOFARM) {
577 path = GDKfilepath(farmid, BATDIR, nme, ext);
578 nme = path;
579 }
580 if (nme != NULL && GDKextend(nme, size) == GDK_SUCCEED) {
581 int mod = MMAP_READ | MMAP_WRITE | MMAP_SEQUENTIAL;
582
583 if (mode == STORE_PRIV)
584 mod |= MMAP_COPY;
585 else
586 mod |= MMAP_SYNC;
587 ret = GDKmmap(nme, mod, size);
588 if (ret != NULL) {
589 /* success: update allocated size */
590 *maxsize = size;
591 }
592 IODEBUG fprintf(stderr, "#mmap(NULL, 0, maxsize %zu, mod %d, path %s, 0) = %p\n", size, mod, nme, (void *)ret);
593 }
594 GDKfree(path);
595 }
596 return ret;
597}
598
599/*
600 * @+ BAT disk storage
601 *
602 * Between sessions the BATs comprising the database are saved on
603 * disk. To simplify code, we assume a UNIX directory called its
604 * physical @%home@ where they are to be located. The subdirectories
605 * BAT and PRG contain what its name says.
606 *
607 * A BAT created by @%COLnew@ is considered temporary until one calls
608 * the routine @%BATsave@. This routine reserves disk space and checks
609 * for name clashes.
610 *
611 * Saving and restoring BATs is left to the upper layers. The library
612 * merely copies the data into place. Failure to read or write the
613 * BAT results in a NULL, otherwise it returns the BAT pointer.
614 */
615static BAT *
616DESCload(int i)
617{
618 const char *s, *nme = BBP_physical(i);
619 BAT *b = NULL;
620 int tt;
621
622 IODEBUG {
623 fprintf(stderr, "#DESCload %s\n", nme ? nme : "<noname>");
624 }
625 b = BBP_desc(i);
626
627 if (b == NULL)
628 return 0;
629
630 tt = b->ttype;
631 if ((tt < 0 && (tt = ATOMindex(s = ATOMunknown_name(tt))) < 0)) {
632 GDKerror("DESCload: atom '%s' unknown, in BAT '%s'.\n", s, nme);
633 return NULL;
634 }
635 b->ttype = tt;
636
637 /* reconstruct mode from BBP status (BATmode doesn't flush
638 * descriptor, so loaded mode may be stale) */
639 b->batTransient = (BBP_status(b->batCacheid) & BBPPERSISTENT) == 0;
640 b->batCopiedtodisk = true;
641 DESCclean(b);
642 return b;
643}
644
645void
646DESCclean(BAT *b)
647{
648 b->batDirtyflushed = DELTAdirty(b);
649 b->batDirtydesc = false;
650 b->theap.dirty = false;
651 if (b->tvheap)
652 b->tvheap->dirty = false;
653}
654
655/* spawning the background msync should be done carefully
656 * because there is a (small) chance that the BAT has been
657 * deleted by the time you issue the msync.
658 * This leaves you with possibly deadbeef BAT descriptors.
659 */
660
661/* #define DISABLE_MSYNC */
662#define MSYNC_BACKGROUND
663
664#ifndef DISABLE_MSYNC
665#ifndef MS_ASYNC
666struct msync {
667 bat id;
668 Heap *h;
669};
670
671static void
672BATmsyncImplementation(void *arg)
673{
674 Heap *h = ((struct msync *) arg)->h;
675
676 (void) MT_msync(h->base, h->size);
677 BBPunfix(((struct msync *) arg)->id);
678 GDKfree(arg);
679}
680#endif
681#endif
682
683void
684BATmsync(BAT *b)
685{
686 /* we don't sync views or if we're told not to */
687 if (GDKinmemory() || isVIEW(b) || (GDKdebug & NOSYNCMASK))
688 return;
689 /* we don't sync transients */
690 if (b->theap.farmid != 0 ||
691 (b->tvheap != NULL && b->tvheap->farmid != 0))
692 return;
693#ifndef DISABLE_MSYNC
694#ifdef MS_ASYNC
695 if (b->theap.storage == STORE_MMAP)
696 (void) msync(b->theap.base, b->theap.free, MS_ASYNC);
697 if (b->tvheap && b->tvheap->storage == STORE_MMAP)
698 (void) msync(b->tvheap->base, b->tvheap->free, MS_ASYNC);
699#else
700 {
701#ifdef MSYNC_BACKGROUND
702 MT_Id tid;
703#endif
704 struct msync *arg;
705
706 assert(!b->batTransient);
707 if (b->theap.storage == STORE_MMAP &&
708 (arg = GDKmalloc(sizeof(*arg))) != NULL) {
709 arg->id = b->batCacheid;
710 arg->h = &b->theap;
711 BBPfix(b->batCacheid);
712#ifdef MSYNC_BACKGROUND
713 char name[16];
714 snprintf(name, sizeof(name), "msync%d", b->batCacheid);
715 if (MT_create_thread(&tid, BATmsyncImplementation, arg,
716 MT_THR_DETACHED, name) < 0) {
717 /* don't bother if we can't create a thread */
718 BBPunfix(b->batCacheid);
719 GDKfree(arg);
720 }
721#else
722 BATmsyncImplementation(arg);
723#endif
724 }
725
726 if (b->tvheap && b->tvheap->storage == STORE_MMAP &&
727 (arg = GDKmalloc(sizeof(*arg))) != NULL) {
728 arg->id = b->batCacheid;
729 arg->h = b->tvheap;
730 BBPfix(b->batCacheid);
731#ifdef MSYNC_BACKGROUND
732 char name[16];
733 snprintf(name, sizeof(name), "msync%d", b->batCacheid);
734 if (MT_create_thread(&tid, BATmsyncImplementation, arg,
735 MT_THR_DETACHED, name) < 0) {
736 /* don't bother if we can't create a thread */
737 BBPunfix(b->batCacheid);
738 GDKfree(arg);
739 }
740#else
741 BATmsyncImplementation(arg);
742#endif
743 }
744 }
745#endif
746#else
747 (void) b;
748#endif /* DISABLE_MSYNC */
749}
750
751gdk_return
752BATsave(BAT *bd)
753{
754 gdk_return err = GDK_SUCCEED;
755 const char *nme;
756 BAT bs;
757 Heap vhs;
758 BAT *b = bd;
759
760 assert(!GDKinmemory());
761 BATcheck(b, "BATsave", GDK_FAIL);
762
763 assert(b->batCacheid > 0);
764 /* views cannot be saved, but make an exception for
765 * force-remapped views */
766 if (isVIEW(b) &&
767 !(b->theap.copied && b->theap.storage == STORE_MMAP)) {
768 GDKerror("BATsave: %s is a view on %s; cannot be saved\n", BATgetId(b), BBPname(VIEWtparent(b)));
769 return GDK_FAIL;
770 }
771 if (!BATdirty(b)) {
772 return GDK_SUCCEED;
773 }
774
775 /* copy the descriptor to a local variable in order to let our
776 * messing in the BAT descriptor not affect other threads that
777 * only read it. */
778 bs = *b;
779 b = &bs;
780
781 if (b->tvheap) {
782 vhs = *bd->tvheap;
783 b->tvheap = &vhs;
784 }
785
786 /* start saving data */
787 nme = BBP_physical(b->batCacheid);
788 if (!b->batCopiedtodisk || b->batDirtydesc || b->theap.dirty)
789 if (err == GDK_SUCCEED && b->ttype)
790 err = HEAPsave(&b->theap, nme, "tail");
791 if (b->tvheap
792 && (!b->batCopiedtodisk || b->batDirtydesc || b->tvheap->dirty)
793 && b->ttype
794 && b->tvarsized
795 && err == GDK_SUCCEED)
796 err = HEAPsave(b->tvheap, nme, "theap");
797
798 if (err == GDK_SUCCEED) {
799 bd->batCopiedtodisk = true;
800 DESCclean(bd);
801 return GDK_SUCCEED;
802 }
803 return err;
804}
805
806
807/*
808 * TODO: move to gdk_bbp.c
809 */
810BAT *
811BATload_intern(bat bid, bool lock)
812{
813 const char *nme;
814 BAT *b;
815
816 assert(!GDKinmemory());
817 assert(bid > 0);
818
819 nme = BBP_physical(bid);
820 b = DESCload(bid);
821
822 if (b == NULL) {
823 return NULL;
824 }
825
826 /* LOAD bun heap */
827 if (b->ttype != TYPE_void) {
828 if (HEAPload(&b->theap, nme, "tail", b->batRestricted == BAT_READ) != GDK_SUCCEED) {
829 HEAPfree(&b->theap, false);
830 return NULL;
831 }
832 assert(b->theap.size >> b->tshift <= BUN_MAX);
833 b->batCapacity = (BUN) (b->theap.size >> b->tshift);
834 } else {
835 b->theap.base = NULL;
836 }
837
838 /* LOAD tail heap */
839 if (ATOMvarsized(b->ttype)) {
840 if (HEAPload(b->tvheap, nme, "theap", b->batRestricted == BAT_READ) != GDK_SUCCEED) {
841 HEAPfree(&b->theap, false);
842 HEAPfree(b->tvheap, false);
843 return NULL;
844 }
845 if (ATOMstorage(b->ttype) == TYPE_str) {
846 strCleanHash(b->tvheap, false); /* ensure consistency */
847 } else {
848 HEAP_recover(b->tvheap, (const var_t *) Tloc(b, 0),
849 BATcount(b));
850 }
851 }
852
853 /* initialize descriptor */
854 b->batDirtydesc = false;
855 b->theap.parentid = 0;
856
857 /* load succeeded; register it in BBP */
858 if (BBPcacheit(b, lock) != GDK_SUCCEED) {
859 HEAPfree(&b->theap, false);
860 if (b->tvheap)
861 HEAPfree(b->tvheap, false);
862 return NULL;
863 }
864 return b;
865}
866
867/*
868 * @- BATdelete
869 * The new behavior is to let the routine produce warnings but always
870 * succeed. rationale: on a delete, we must get rid of *all* the
871 * files. We do not have to care about preserving them or be too much
872 * concerned if a file that had to be deleted was not found (end
873 * result is still that it does not exist). The past behavior to
874 * delete some files and then fail was erroneous. The BAT would
875 * continue to exist with an incorrect disk status, causing havoc
876 * later on.
877 *
878 * NT forces us to close all files before deleting them; in case of
879 * memory mapped files this means that we have to unload the BATs
880 * before deleting. This is enforced now.
881 */
882void
883BATdelete(BAT *b)
884{
885 bat bid = b->batCacheid;
886 const char *o = BBP_physical(bid);
887 BAT *loaded = BBP_cache(bid);
888
889 assert(bid > 0);
890 if (loaded) {
891 b = loaded;
892 HASHdestroy(b);
893 IMPSdestroy(b);
894 OIDXdestroy(b);
895 }
896 if (b->batCopiedtodisk || (b->theap.storage != STORE_MEM)) {
897 if (b->ttype != TYPE_void &&
898 HEAPdelete(&b->theap, o, "tail") != GDK_SUCCEED &&
899 b->batCopiedtodisk)
900 IODEBUG fprintf(stderr, "#BATdelete(%s): bun heap\n", BATgetId(b));
901 } else if (b->theap.base) {
902 HEAPfree(&b->theap, true);
903 }
904 if (b->tvheap) {
905 assert(b->tvheap->parentid == bid);
906 if (b->batCopiedtodisk || (b->tvheap->storage != STORE_MEM)) {
907 if (HEAPdelete(b->tvheap, o, "theap") != GDK_SUCCEED &&
908 b->batCopiedtodisk)
909 IODEBUG fprintf(stderr, "#BATdelete(%s): tail heap\n", BATgetId(b));
910 } else {
911 HEAPfree(b->tvheap, true);
912 }
913 }
914 b->batCopiedtodisk = false;
915}
916
917/*
918 * BAT specific printing
919 */
920
921gdk_return
922BATprintcolumns(stream *s, int argc, BAT *argv[])
923{
924 int i;
925 BUN n, cnt;
926 struct colinfo {
927 ssize_t (*s) (str *, size_t *, const void *, bool);
928 BATiter i;
929 } *colinfo;
930 char *buf;
931 size_t buflen = 0;
932 ssize_t len;
933
934 /* error checking */
935 for (i = 0; i < argc; i++) {
936 if (argv[i] == NULL) {
937 GDKerror("Columns missing\n");
938 return GDK_FAIL;
939 }
940 if (BATcount(argv[0]) != BATcount(argv[i])) {
941 GDKerror("Columns must be the same size\n");
942 return GDK_FAIL;
943 }
944 }
945
946 if ((colinfo = GDKmalloc(argc * sizeof(*colinfo))) == NULL) {
947 GDKerror("Cannot allocate memory\n");
948 return GDK_FAIL;
949 }
950
951 for (i = 0; i < argc; i++) {
952 colinfo[i].i = bat_iterator(argv[i]);
953 colinfo[i].s = BATatoms[argv[i]->ttype].atomToStr;
954 }
955
956 mnstr_write(s, "#--------------------------#\n", 1, 29);
957 mnstr_write(s, "# ", 1, 2);
958 for (i = 0; i < argc; i++) {
959 if (i > 0)
960 mnstr_write(s, "\t", 1, 1);
961 buf = argv[i]->tident;
962 mnstr_write(s, buf, 1, strlen(buf));
963 }
964 mnstr_write(s, " # name\n", 1, 9);
965 mnstr_write(s, "# ", 1, 2);
966 for (i = 0; i < argc; i++) {
967 if (i > 0)
968 mnstr_write(s, "\t", 1, 1);
969 buf = ATOMname(argv[i]->ttype);
970 mnstr_write(s, buf, 1, strlen(buf));
971 }
972 mnstr_write(s, " # type\n", 1, 9);
973 mnstr_write(s, "#--------------------------#\n", 1, 29);
974 buf = NULL;
975
976 for (n = 0, cnt = BATcount(argv[0]); n < cnt; n++) {
977 mnstr_write(s, "[ ", 1, 2);
978 for (i = 0; i < argc; i++) {
979 len = colinfo[i].s(&buf, &buflen, BUNtail(colinfo[i].i, n), true);
980 if (len < 0) {
981 GDKfree(buf);
982 GDKfree(colinfo);
983 return GDK_FAIL;
984 }
985 if (i > 0)
986 mnstr_write(s, ",\t", 1, 2);
987 mnstr_write(s, buf, 1, len);
988 }
989 mnstr_write(s, " ]\n", 1, 4);
990 }
991
992 GDKfree(buf);
993 GDKfree(colinfo);
994
995 return GDK_SUCCEED;
996}
997
998gdk_return
999BATprint(stream *fdout, BAT *b)
1000{
1001 BAT *argv[2];
1002 gdk_return ret = GDK_FAIL;
1003
1004 argv[0] = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
1005 argv[1] = b;
1006 if (argv[0] && argv[1]) {
1007 ret = BATroles(argv[0], "h");
1008 if (ret == GDK_SUCCEED)
1009 ret = BATprintcolumns(fdout, 2, argv);
1010 }
1011 if (argv[0])
1012 BBPunfix(argv[0]->batCacheid);
1013 return ret;
1014}
1015