| 1 | /* | 
|---|
| 2 | * This Source Code Form is subject to the terms of the Mozilla Public | 
|---|
| 3 | * License, v. 2.0.  If a copy of the MPL was not distributed with this | 
|---|
| 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. | 
|---|
| 5 | * | 
|---|
| 6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. | 
|---|
| 7 | */ | 
|---|
| 8 |  | 
|---|
| 9 | /* | 
|---|
| 10 | * @a M. L. Kersten, P. Boncz, N. Nes | 
|---|
| 11 | * | 
|---|
| 12 | * @* Database Storage Management | 
|---|
| 13 | * Contains routines for writing and reading GDK data to and from | 
|---|
| 14 | * disk.  This section contains the primitives to manage the | 
|---|
| 15 | * disk-based images of the BATs. It relies on the existence of a UNIX | 
|---|
| 16 | * file system, including memory mapped files. Solaris and IRIX have | 
|---|
| 17 | * different implementations of madvise(). | 
|---|
| 18 | * | 
|---|
| 19 | * The current version assumes that all BATs are stored on a single | 
|---|
| 20 | * disk partition. This simplistic assumption should be replaced in | 
|---|
| 21 | * the near future by a multi-volume version. The intention is to use | 
|---|
| 22 | * several BAT home locations.  The files should be owned by the | 
|---|
| 23 | * database server. Otherwise, IO operations are likely to fail. This | 
|---|
| 24 | * is accomplished by setting the GID and UID upon system start. | 
|---|
| 25 | */ | 
|---|
| 26 | #include "monetdb_config.h" | 
|---|
| 27 | #include "gdk.h" | 
|---|
| 28 | #include "gdk_private.h" | 
|---|
| 29 | #include "gdk_storage.h" | 
|---|
| 30 | #include "mutils.h" | 
|---|
| 31 | #ifdef HAVE_FCNTL_H | 
|---|
| 32 | #include <fcntl.h> | 
|---|
| 33 | #endif | 
|---|
| 34 |  | 
|---|
| 35 | #ifndef O_CLOEXEC | 
|---|
| 36 | #define O_CLOEXEC 0 | 
|---|
| 37 | #endif | 
|---|
| 38 |  | 
|---|
| 39 | /* GDKfilepath returns a newly allocated string containing the path | 
|---|
| 40 | * name of a database farm. | 
|---|
| 41 | * The arguments are the farmID or -1, the name of a subdirectory | 
|---|
| 42 | * within the farm (i.e., something like BATDIR or BAKDIR -- see | 
|---|
| 43 | * gdk.h) or NULL, the name of a BAT (i.e. the name that is stored in | 
|---|
| 44 | * BBP.dir -- something like 07/714), and finally the file extension. | 
|---|
| 45 | * | 
|---|
| 46 | * If farmid is >= 0, GDKfilepath returns the complete path to the | 
|---|
| 47 | * specified farm concatenated with the other arguments with | 
|---|
| 48 | * appropriate separators.  If farmid is -1, it returns the | 
|---|
| 49 | * concatenation of its other arguments (in this case, the result | 
|---|
| 50 | * cannot be used to access a file directly -- the farm needs to be | 
|---|
| 51 | * prepended in some other place). */ | 
|---|
| 52 | char * | 
|---|
| 53 | GDKfilepath(int farmid, const char *dir, const char *name, const char *ext) | 
|---|
| 54 | { | 
|---|
| 55 | const char *sep; | 
|---|
| 56 | size_t pathlen; | 
|---|
| 57 | char *path; | 
|---|
| 58 |  | 
|---|
| 59 | if (GDKinmemory()) | 
|---|
| 60 | return GDKstrdup( ":inmemory"); | 
|---|
| 61 |  | 
|---|
| 62 | assert(dir == NULL || *dir != DIR_SEP); | 
|---|
| 63 | assert(farmid == NOFARM || | 
|---|
| 64 | (farmid >= 0 && farmid < MAXFARMS && BBPfarms[farmid].dirname)); | 
|---|
| 65 | if (MT_path_absolute(name)) { | 
|---|
| 66 | GDKerror( "GDKfilepath: name should not be absolute\n"); | 
|---|
| 67 | return NULL; | 
|---|
| 68 | } | 
|---|
| 69 | if (dir && *dir == DIR_SEP) | 
|---|
| 70 | dir++; | 
|---|
| 71 | if (dir == NULL || dir[0] == 0 || dir[strlen(dir) - 1] == DIR_SEP) { | 
|---|
| 72 | sep = ""; | 
|---|
| 73 | } else { | 
|---|
| 74 | sep = DIR_SEP_STR; | 
|---|
| 75 | } | 
|---|
| 76 | pathlen = (farmid == NOFARM ? 0 : strlen(BBPfarms[farmid].dirname) + 1) + | 
|---|
| 77 | (dir ? strlen(dir) : 0) + strlen(sep) + strlen(name) + | 
|---|
| 78 | (ext ? strlen(ext) + 1 : 0) + 1; | 
|---|
| 79 | path = GDKmalloc(pathlen); | 
|---|
| 80 | if (path == NULL) | 
|---|
| 81 | return NULL; | 
|---|
| 82 | if (farmid == NOFARM) { | 
|---|
| 83 | strconcat_len(path, pathlen, | 
|---|
| 84 | dir ? dir : "", sep, name, | 
|---|
| 85 | ext ? ".": NULL, ext, NULL); | 
|---|
| 86 | } else { | 
|---|
| 87 | strconcat_len(path, pathlen, | 
|---|
| 88 | BBPfarms[farmid].dirname, DIR_SEP_STR, | 
|---|
| 89 | dir ? dir : "", sep, name, | 
|---|
| 90 | ext ? ".": NULL, ext, NULL); | 
|---|
| 91 | } | 
|---|
| 92 | return path; | 
|---|
| 93 | } | 
|---|
| 94 |  | 
|---|
| 95 | /* make sure the parent directory of DIR exists (the argument itself | 
|---|
| 96 | * is usually a file that is to be created) */ | 
|---|
| 97 | gdk_return | 
|---|
| 98 | GDKcreatedir(const char *dir) | 
|---|
| 99 | { | 
|---|
| 100 | char path[FILENAME_MAX]; | 
|---|
| 101 | char *r; | 
|---|
| 102 | DIR *dirp; | 
|---|
| 103 |  | 
|---|
| 104 | IODEBUG fprintf(stderr, "#GDKcreatedir(%s)\n", dir); | 
|---|
| 105 | assert(!GDKinmemory()); | 
|---|
| 106 | assert(MT_path_absolute(dir)); | 
|---|
| 107 | if (strlen(dir) >= FILENAME_MAX) { | 
|---|
| 108 | GDKerror( "GDKcreatedir: directory name too long\n"); | 
|---|
| 109 | return GDK_FAIL; | 
|---|
| 110 | } | 
|---|
| 111 | strcpy(path, dir);	/* we know this fits (see above) */ | 
|---|
| 112 | /* skip initial /, if any */ | 
|---|
| 113 | for (r = strchr(path + 1, DIR_SEP); r; r = strchr(r, DIR_SEP)) { | 
|---|
| 114 | *r = 0; | 
|---|
| 115 | if ( | 
|---|
| 116 | #ifdef WIN32 | 
|---|
| 117 | strlen(path) > 3 && | 
|---|
| 118 | #endif | 
|---|
| 119 | mkdir(path, MONETDB_DIRMODE) < 0) { | 
|---|
| 120 | if (errno != EEXIST) { | 
|---|
| 121 | GDKsyserror( "GDKcreatedir: cannot create directory %s\n", path); | 
|---|
| 122 | IODEBUG fprintf(stderr, "#GDKcreatedir: mkdir(%s) failed\n", path); | 
|---|
| 123 | return GDK_FAIL; | 
|---|
| 124 | } | 
|---|
| 125 | if ((dirp = opendir(path)) == NULL) { | 
|---|
| 126 | GDKsyserror( "GDKcreatedir: %s not a directory\n", path); | 
|---|
| 127 | IODEBUG fprintf(stderr, "#GDKcreatedir: opendir(%s) failed\n", path); | 
|---|
| 128 | return GDK_FAIL; | 
|---|
| 129 | } | 
|---|
| 130 | /* it's a directory, we can continue */ | 
|---|
| 131 | closedir(dirp); | 
|---|
| 132 | } | 
|---|
| 133 | *r++ = DIR_SEP; | 
|---|
| 134 | } | 
|---|
| 135 | return GDK_SUCCEED; | 
|---|
| 136 | } | 
|---|
| 137 |  | 
|---|
| 138 | /* remove the directory DIRNAME with its file contents; does not | 
|---|
| 139 | * recurse into subdirectories */ | 
|---|
| 140 | gdk_return | 
|---|
| 141 | GDKremovedir(int farmid, const char *dirname) | 
|---|
| 142 | { | 
|---|
| 143 | str dirnamestr; | 
|---|
| 144 | DIR *dirp; | 
|---|
| 145 | char *path; | 
|---|
| 146 | struct dirent *dent; | 
|---|
| 147 | int ret; | 
|---|
| 148 |  | 
|---|
| 149 | assert(!GDKinmemory()); | 
|---|
| 150 | if ((dirnamestr = GDKfilepath(farmid, NULL, dirname, NULL)) == NULL) | 
|---|
| 151 | return GDK_FAIL; | 
|---|
| 152 |  | 
|---|
| 153 | IODEBUG fprintf(stderr, "#GDKremovedir(%s)\n", dirnamestr); | 
|---|
| 154 |  | 
|---|
| 155 | if ((dirp = opendir(dirnamestr)) == NULL) { | 
|---|
| 156 | GDKfree(dirnamestr); | 
|---|
| 157 | return GDK_SUCCEED; | 
|---|
| 158 | } | 
|---|
| 159 | while ((dent = readdir(dirp)) != NULL) { | 
|---|
| 160 | if (dent->d_name[0] == '.' && | 
|---|
| 161 | (dent->d_name[1] == 0 || | 
|---|
| 162 | (dent->d_name[1] == '.' && dent->d_name[2] == 0))) { | 
|---|
| 163 | /* skip . and .. */ | 
|---|
| 164 | continue; | 
|---|
| 165 | } | 
|---|
| 166 | path = GDKfilepath(farmid, dirname, dent->d_name, NULL); | 
|---|
| 167 | ret = remove(path); | 
|---|
| 168 | IODEBUG fprintf(stderr, "#remove %s = %d\n", path, ret); | 
|---|
| 169 | GDKfree(path); | 
|---|
| 170 | } | 
|---|
| 171 | closedir(dirp); | 
|---|
| 172 | ret = rmdir(dirnamestr); | 
|---|
| 173 | if (ret != 0) | 
|---|
| 174 | GDKsyserror( "GDKremovedir: rmdir(%s) failed.\n", dirnamestr); | 
|---|
| 175 | IODEBUG fprintf(stderr, "#rmdir %s = %d\n", dirnamestr, ret); | 
|---|
| 176 | GDKfree(dirnamestr); | 
|---|
| 177 | return ret ? GDK_FAIL : GDK_SUCCEED; | 
|---|
| 178 | } | 
|---|
| 179 |  | 
|---|
| 180 | #define _FUNBUF		0x040000 | 
|---|
| 181 | #define _FWRTHR		0x080000 | 
|---|
| 182 | #define _FRDSEQ		0x100000 | 
|---|
| 183 |  | 
|---|
| 184 | /* open a file and return its file descriptor; the file is specified | 
|---|
| 185 | * using farmid, name and extension; if opening for writing, we create | 
|---|
| 186 | * the parent directory if necessary */ | 
|---|
| 187 | int | 
|---|
| 188 | GDKfdlocate(int farmid, const char *nme, const char *mode, const char *extension) | 
|---|
| 189 | { | 
|---|
| 190 | char *path = NULL; | 
|---|
| 191 | int fd, flags = O_CLOEXEC; | 
|---|
| 192 |  | 
|---|
| 193 | assert(!GDKinmemory()); | 
|---|
| 194 | if (nme == NULL || *nme == 0) | 
|---|
| 195 | return -1; | 
|---|
| 196 |  | 
|---|
| 197 | assert(farmid != NOFARM || extension == NULL); | 
|---|
| 198 | if (farmid != NOFARM) { | 
|---|
| 199 | path = GDKfilepath(farmid, BATDIR, nme, extension); | 
|---|
| 200 | if (path == NULL) | 
|---|
| 201 | return -1; | 
|---|
| 202 | nme = path; | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 | if (*mode == 'm') {	/* file open for mmap? */ | 
|---|
| 206 | mode++; | 
|---|
| 207 | #ifdef _CYGNUS_H_ | 
|---|
| 208 | } else { | 
|---|
| 209 | flags |= _FRDSEQ;	/* WIN32 CreateFile(FILE_FLAG_SEQUENTIAL_SCAN) */ | 
|---|
| 210 | #endif | 
|---|
| 211 | } | 
|---|
| 212 |  | 
|---|
| 213 | if (strchr(mode, 'w')) { | 
|---|
| 214 | flags |= O_WRONLY | O_CREAT; | 
|---|
| 215 | } else if (!strchr(mode, '+')) { | 
|---|
| 216 | flags |= O_RDONLY; | 
|---|
| 217 | } else { | 
|---|
| 218 | flags |= O_RDWR; | 
|---|
| 219 | } | 
|---|
| 220 | #ifdef WIN32 | 
|---|
| 221 | flags |= strchr(mode, 'b') ? O_BINARY : O_TEXT; | 
|---|
| 222 | #endif | 
|---|
| 223 | fd = open(nme, flags, MONETDB_MODE); | 
|---|
| 224 | if (fd < 0 && *mode == 'w') { | 
|---|
| 225 | /* try to create the directory, in case that was the problem */ | 
|---|
| 226 | if (GDKcreatedir(nme) == GDK_SUCCEED) { | 
|---|
| 227 | fd = open(nme, flags, MONETDB_MODE); | 
|---|
| 228 | if (fd < 0) | 
|---|
| 229 | GDKsyserror( "GDKfdlocate: cannot open file %s\n", nme); | 
|---|
| 230 | } | 
|---|
| 231 | } | 
|---|
| 232 | /* don't generate error if we can't open a file for reading */ | 
|---|
| 233 | GDKfree(path); | 
|---|
| 234 | return fd; | 
|---|
| 235 | } | 
|---|
| 236 |  | 
|---|
| 237 | /* like GDKfdlocate, except return a FILE pointer */ | 
|---|
| 238 | FILE * | 
|---|
| 239 | GDKfilelocate(int farmid, const char *nme, const char *mode, const char *extension) | 
|---|
| 240 | { | 
|---|
| 241 | int fd; | 
|---|
| 242 | FILE *f; | 
|---|
| 243 |  | 
|---|
| 244 | if ((fd = GDKfdlocate(farmid, nme, mode, extension)) < 0) | 
|---|
| 245 | return NULL; | 
|---|
| 246 | if (*mode == 'm') | 
|---|
| 247 | mode++; | 
|---|
| 248 | if ((f = fdopen(fd, mode)) == NULL) { | 
|---|
| 249 | GDKsyserror( "GDKfilelocate: cannot fdopen file\n"); | 
|---|
| 250 | close(fd); | 
|---|
| 251 | return NULL; | 
|---|
| 252 | } | 
|---|
| 253 | return f; | 
|---|
| 254 | } | 
|---|
| 255 |  | 
|---|
| 256 | FILE * | 
|---|
| 257 | GDKfileopen(int farmid, const char *dir, const char *name, const char *extension, const char *mode) | 
|---|
| 258 | { | 
|---|
| 259 | char *path; | 
|---|
| 260 |  | 
|---|
| 261 | /* if name is null, try to get one from dir (in case it was a path) */ | 
|---|
| 262 | path = GDKfilepath(farmid, dir, name, extension); | 
|---|
| 263 |  | 
|---|
| 264 | if (path != NULL) { | 
|---|
| 265 | FILE *f; | 
|---|
| 266 | IODEBUG fprintf(stderr, "#GDKfileopen(%s)\n", path); | 
|---|
| 267 | f = fopen(path, mode); | 
|---|
| 268 | GDKfree(path); | 
|---|
| 269 | return f; | 
|---|
| 270 | } | 
|---|
| 271 | return NULL; | 
|---|
| 272 | } | 
|---|
| 273 |  | 
|---|
| 274 | /* remove the file */ | 
|---|
| 275 | gdk_return | 
|---|
| 276 | GDKunlink(int farmid, const char *dir, const char *nme, const char *ext) | 
|---|
| 277 | { | 
|---|
| 278 | if (nme && *nme) { | 
|---|
| 279 | char *path; | 
|---|
| 280 |  | 
|---|
| 281 | path = GDKfilepath(farmid, dir, nme, ext); | 
|---|
| 282 | if (path == NULL) | 
|---|
| 283 | return GDK_FAIL; | 
|---|
| 284 | /* if file already doesn't exist, we don't care */ | 
|---|
| 285 | if (remove(path) != 0 && errno != ENOENT) { | 
|---|
| 286 | GDKsyserror( "GDKunlink(%s)\n", path); | 
|---|
| 287 | IODEBUG fprintf(stderr, "#remove %s = -1\n", path); | 
|---|
| 288 | GDKfree(path); | 
|---|
| 289 | return GDK_FAIL; | 
|---|
| 290 | } | 
|---|
| 291 | GDKfree(path); | 
|---|
| 292 | return GDK_SUCCEED; | 
|---|
| 293 | } | 
|---|
| 294 | return GDK_FAIL; | 
|---|
| 295 | } | 
|---|
| 296 |  | 
|---|
| 297 | /* | 
|---|
| 298 | * A move routine is overloaded to deal with extensions. | 
|---|
| 299 | */ | 
|---|
| 300 | gdk_return | 
|---|
| 301 | GDKmove(int farmid, const char *dir1, const char *nme1, const char *ext1, const char *dir2, const char *nme2, const char *ext2) | 
|---|
| 302 | { | 
|---|
| 303 | char *path1; | 
|---|
| 304 | char *path2; | 
|---|
| 305 | int ret, t0 = 0; | 
|---|
| 306 |  | 
|---|
| 307 | IODEBUG t0 = GDKms(); | 
|---|
| 308 |  | 
|---|
| 309 | if ((nme1 == NULL) || (*nme1 == 0)) { | 
|---|
| 310 | GDKerror( "GDKmove: no file specified\n"); | 
|---|
| 311 | return GDK_FAIL; | 
|---|
| 312 | } | 
|---|
| 313 | path1 = GDKfilepath(farmid, dir1, nme1, ext1); | 
|---|
| 314 | path2 = GDKfilepath(farmid, dir2, nme2, ext2); | 
|---|
| 315 | if (path1 && path2) { | 
|---|
| 316 | ret = rename(path1, path2); | 
|---|
| 317 | if (ret < 0) | 
|---|
| 318 | GDKsyserror( "GDKmove: cannot rename %s to %s\n", path1, path2); | 
|---|
| 319 |  | 
|---|
| 320 | IODEBUG fprintf(stderr, "#move %s %s = %d (%dms)\n", path1, path2, ret, GDKms() - t0); | 
|---|
| 321 | } else { | 
|---|
| 322 | ret = -1; | 
|---|
| 323 | } | 
|---|
| 324 | GDKfree(path1); | 
|---|
| 325 | GDKfree(path2); | 
|---|
| 326 | return ret < 0 ? GDK_FAIL : GDK_SUCCEED; | 
|---|
| 327 | } | 
|---|
| 328 |  | 
|---|
| 329 | gdk_return | 
|---|
| 330 | GDKextendf(int fd, size_t size, const char *fn) | 
|---|
| 331 | { | 
|---|
| 332 | struct stat stb; | 
|---|
| 333 | int rt = 0; | 
|---|
| 334 | int t0 = 0; | 
|---|
| 335 |  | 
|---|
| 336 | assert(!GDKinmemory()); | 
|---|
| 337 | #ifdef STATIC_CODE_ANALYSIS | 
|---|
| 338 | if (fd < 0)		/* in real life, if fd < 0, fstat will fail */ | 
|---|
| 339 | return GDK_FAIL; | 
|---|
| 340 | #endif | 
|---|
| 341 | if (fstat(fd, &stb) < 0) { | 
|---|
| 342 | /* shouldn't happen */ | 
|---|
| 343 | GDKsyserror( "GDKextendf: fstat unexpectedly failed\n"); | 
|---|
| 344 | return GDK_FAIL; | 
|---|
| 345 | } | 
|---|
| 346 | /* if necessary, extend the underlying file */ | 
|---|
| 347 | IODEBUG t0 = GDKms(); | 
|---|
| 348 | if (stb.st_size < (off_t) size) { | 
|---|
| 349 | #ifdef HAVE_FALLOCATE | 
|---|
| 350 | if ((rt = fallocate(fd, 0, stb.st_size, (off_t) size - stb.st_size)) < 0 && | 
|---|
| 351 | errno == EOPNOTSUPP) | 
|---|
| 352 | /* on Linux, posix_fallocate uses a slow | 
|---|
| 353 | * method to allocate blocks if the underlying | 
|---|
| 354 | * file system doesn't support the operation, | 
|---|
| 355 | * so use fallocate instead and just resize | 
|---|
| 356 | * the file if it fails */ | 
|---|
| 357 | #else | 
|---|
| 358 | #ifdef HAVE_POSIX_FALLOCATE | 
|---|
| 359 | /* posix_fallocate returns error number on failure, | 
|---|
| 360 | * not -1 :-( */ | 
|---|
| 361 | if ((rt = posix_fallocate(fd, stb.st_size, (off_t) size - stb.st_size)) == EINVAL) | 
|---|
| 362 | /* on Solaris/OpenIndiana, this may mean that | 
|---|
| 363 | * the underlying file system doesn't support | 
|---|
| 364 | * the operation, so just resize the file */ | 
|---|
| 365 | #endif | 
|---|
| 366 | #endif | 
|---|
| 367 | /* we get here when (posix_)fallocate fails because it | 
|---|
| 368 | * is not supported on the file system, or if neither | 
|---|
| 369 | * function exists */ | 
|---|
| 370 | rt = ftruncate(fd, (off_t) size); | 
|---|
| 371 | if (rt != 0) { | 
|---|
| 372 | /* extending failed, try to reduce file size | 
|---|
| 373 | * back to original */ | 
|---|
| 374 | int err = errno; | 
|---|
| 375 | if (ftruncate(fd, stb.st_size)) | 
|---|
| 376 | perror( "ftruncate"); | 
|---|
| 377 | errno = err; /* restore for error message */ | 
|---|
| 378 | GDKsyserror( "GDKextendf: could not extend file\n"); | 
|---|
| 379 | } | 
|---|
| 380 | } | 
|---|
| 381 | IODEBUG fprintf(stderr, "#GDKextend %s %zu -> %zu %dms%s\n", | 
|---|
| 382 | fn, (size_t) stb.st_size, size, | 
|---|
| 383 | GDKms() - t0, rt != 0 ? " (failed)": ""); | 
|---|
| 384 | /* posix_fallocate returns != 0 on failure, fallocate and | 
|---|
| 385 | * ftruncate return -1 on failure, but all three return 0 on | 
|---|
| 386 | * success */ | 
|---|
| 387 | return rt != 0 ? GDK_FAIL : GDK_SUCCEED; | 
|---|
| 388 | } | 
|---|
| 389 |  | 
|---|
| 390 | gdk_return | 
|---|
| 391 | GDKextend(const char *fn, size_t size) | 
|---|
| 392 | { | 
|---|
| 393 | int fd, flags = O_RDWR; | 
|---|
| 394 | gdk_return rt = GDK_FAIL; | 
|---|
| 395 |  | 
|---|
| 396 | assert(!GDKinmemory()); | 
|---|
| 397 | #ifdef O_BINARY | 
|---|
| 398 | /* On Windows, open() fails if the file is bigger than 2^32 | 
|---|
| 399 | * bytes without O_BINARY. */ | 
|---|
| 400 | flags |= O_BINARY; | 
|---|
| 401 | #endif | 
|---|
| 402 | if ((fd = open(fn, flags | O_CLOEXEC)) >= 0) { | 
|---|
| 403 | rt = GDKextendf(fd, size, fn); | 
|---|
| 404 | close(fd); | 
|---|
| 405 | } else { | 
|---|
| 406 | GDKsyserror( "GDKextend: cannot open file %s\n", fn); | 
|---|
| 407 | } | 
|---|
| 408 | return rt; | 
|---|
| 409 | } | 
|---|
| 410 |  | 
|---|
| 411 | /* | 
|---|
| 412 | * @+ Save and load. | 
|---|
| 413 | * The BAT is saved on disk in several files. The extension DESC | 
|---|
| 414 | * denotes the descriptor, BUNs the bun heap, and HHEAP and THEAP the | 
|---|
| 415 | * other heaps. The storage mechanism off a file can be memory mapped | 
|---|
| 416 | * (STORE_MMAP) or malloced (STORE_MEM). | 
|---|
| 417 | * | 
|---|
| 418 | * These modes indicates the disk-layout and the intended mapping. | 
|---|
| 419 | * The primary concern here is to handle STORE_MMAP and STORE_MEM. | 
|---|
| 420 | */ | 
|---|
| 421 | gdk_return | 
|---|
| 422 | GDKsave(int farmid, const char *nme, const char *ext, void *buf, size_t size, storage_t mode, bool dosync) | 
|---|
| 423 | { | 
|---|
| 424 | int err = 0; | 
|---|
| 425 |  | 
|---|
| 426 | IODEBUG fprintf(stderr, "#GDKsave: name=%s, ext=%s, mode %d, dosync=%d\n", nme, ext ? ext : "", (int) mode, dosync); | 
|---|
| 427 |  | 
|---|
| 428 | assert(!GDKinmemory()); | 
|---|
| 429 | if (mode == STORE_MMAP) { | 
|---|
| 430 | if (dosync && size && !(GDKdebug & NOSYNCMASK) && MT_msync(buf, size) < 0) | 
|---|
| 431 | err = -1; | 
|---|
| 432 | if (err) | 
|---|
| 433 | GDKsyserror( "GDKsave: error on: name=%s, ext=%s, " | 
|---|
| 434 | "mode=%d\n", nme, ext ? ext : "", | 
|---|
| 435 | (int) mode); | 
|---|
| 436 | IODEBUG fprintf(stderr, | 
|---|
| 437 | "#MT_msync(buf %p, size %zu" | 
|---|
| 438 | ") = %d\n", | 
|---|
| 439 | buf, size, err); | 
|---|
| 440 | } else { | 
|---|
| 441 | int fd; | 
|---|
| 442 |  | 
|---|
| 443 | if ((fd = GDKfdlocate(farmid, nme, "wb", ext)) >= 0) { | 
|---|
| 444 | /* write() on 64-bits Redhat for IA64 returns | 
|---|
| 445 | * 32-bits signed result (= OS BUG)! write() | 
|---|
| 446 | * on Windows only takes unsigned int as | 
|---|
| 447 | * size */ | 
|---|
| 448 | while (size > 0) { | 
|---|
| 449 | /* circumvent problems by writing huge | 
|---|
| 450 | * buffers in chunks <= 1GiB */ | 
|---|
| 451 | ssize_t ret; | 
|---|
| 452 |  | 
|---|
| 453 | ret = write(fd, buf, | 
|---|
| 454 | (unsigned) MIN(1 << 30, size)); | 
|---|
| 455 | if (ret < 0) { | 
|---|
| 456 | err = -1; | 
|---|
| 457 | GDKsyserror( "GDKsave: error %zd" | 
|---|
| 458 | " on: name=%s, ext=%s, " | 
|---|
| 459 | "mode=%d\n", ret, nme, | 
|---|
| 460 | ext ? ext : "", (int) mode); | 
|---|
| 461 | break; | 
|---|
| 462 | } | 
|---|
| 463 | size -= ret; | 
|---|
| 464 | buf = (void *) ((char *) buf + ret); | 
|---|
| 465 | IODEBUG fprintf(stderr, | 
|---|
| 466 | "#write(fd %d, buf %p" | 
|---|
| 467 | ", size %u) = %zd\n", | 
|---|
| 468 | fd, buf, | 
|---|
| 469 | (unsigned) MIN(1 << 30, size), | 
|---|
| 470 | ret); | 
|---|
| 471 | } | 
|---|
| 472 | if (dosync && !(GDKdebug & NOSYNCMASK) | 
|---|
| 473 | #if defined(NATIVE_WIN32) | 
|---|
| 474 | && _commit(fd) < 0 | 
|---|
| 475 | #elif defined(HAVE_FDATASYNC) | 
|---|
| 476 | && fdatasync(fd) < 0 | 
|---|
| 477 | #elif defined(HAVE_FSYNC) | 
|---|
| 478 | && fsync(fd) < 0 | 
|---|
| 479 | #endif | 
|---|
| 480 | ) { | 
|---|
| 481 | GDKsyserror( "GDKsave: error on: name=%s, " | 
|---|
| 482 | "ext=%s, mode=%d\n", nme, | 
|---|
| 483 | ext ? ext : "", (int) mode); | 
|---|
| 484 | err = -1; | 
|---|
| 485 | } | 
|---|
| 486 | err |= close(fd); | 
|---|
| 487 | if (err && GDKunlink(farmid, BATDIR, nme, ext) != GDK_SUCCEED) { | 
|---|
| 488 | /* do not tolerate corrupt heap images | 
|---|
| 489 | * (BBPrecover on restart will kill | 
|---|
| 490 | * them) */ | 
|---|
| 491 | GDKerror( "GDKsave: could not remove: name=%s, " | 
|---|
| 492 | "ext=%s, mode %d\n", nme, | 
|---|
| 493 | ext ? ext : "", (int) mode); | 
|---|
| 494 | return GDK_FAIL; | 
|---|
| 495 | } | 
|---|
| 496 | } else { | 
|---|
| 497 | err = -1; | 
|---|
| 498 | GDKerror( "GDKsave: failed name=%s, ext=%s, mode %d\n", | 
|---|
| 499 | nme, ext ? ext : "", (int) mode); | 
|---|
| 500 | } | 
|---|
| 501 | } | 
|---|
| 502 | return err ? GDK_FAIL : GDK_SUCCEED; | 
|---|
| 503 | } | 
|---|
| 504 |  | 
|---|
| 505 | /* | 
|---|
| 506 | * Space for the load is directly allocated and the heaps are mapped. | 
|---|
| 507 | * Further initialization of the atom heaps require a separate action | 
|---|
| 508 | * defined in their implementation. | 
|---|
| 509 | * | 
|---|
| 510 | * size -- how much to read | 
|---|
| 511 | * *maxsize -- (in/out) how much to allocate / how much was allocated | 
|---|
| 512 | */ | 
|---|
| 513 | char * | 
|---|
| 514 | GDKload(int farmid, const char *nme, const char *ext, size_t size, size_t *maxsize, storage_t mode) | 
|---|
| 515 | { | 
|---|
| 516 | char *ret = NULL; | 
|---|
| 517 |  | 
|---|
| 518 | assert(!GDKinmemory()); | 
|---|
| 519 | assert(size <= *maxsize); | 
|---|
| 520 | assert(farmid != NOFARM || ext == NULL); | 
|---|
| 521 | IODEBUG { | 
|---|
| 522 | fprintf(stderr, "#GDKload: name=%s, ext=%s, mode %d\n", nme, ext ? ext : "", (int) mode); | 
|---|
| 523 | } | 
|---|
| 524 | if (mode == STORE_MEM) { | 
|---|
| 525 | int fd = GDKfdlocate(farmid, nme, "rb", ext); | 
|---|
| 526 |  | 
|---|
| 527 | if (fd >= 0) { | 
|---|
| 528 | char *dst = ret = GDKmalloc(*maxsize); | 
|---|
| 529 | ssize_t n_expected, n = 0; | 
|---|
| 530 |  | 
|---|
| 531 | if (ret) { | 
|---|
| 532 | /* read in chunks, some OSs do not | 
|---|
| 533 | * give you all at once and Windows | 
|---|
| 534 | * only accepts int */ | 
|---|
| 535 | for (n_expected = (ssize_t) size; n_expected > 0; n_expected -= n) { | 
|---|
| 536 | n = read(fd, dst, (unsigned) MIN(1 << 30, n_expected)); | 
|---|
| 537 | if (n < 0) | 
|---|
| 538 | GDKsyserror( "GDKload: cannot read: name=%s, ext=%s, %zu bytes missing.\n", nme, ext ? ext : "", (size_t) n_expected); | 
|---|
| 539 | #ifndef STATIC_CODE_ANALYSIS | 
|---|
| 540 | /* Coverity doesn't seem to | 
|---|
| 541 | * recognize that we're just | 
|---|
| 542 | * printing the value of ptr, | 
|---|
| 543 | * not its contents */ | 
|---|
| 544 | IODEBUG fprintf(stderr, "#read(dst %p, n_expected %zd, fd %d) = %zd\n", (void *)dst, n_expected, fd, n); | 
|---|
| 545 | #endif | 
|---|
| 546 |  | 
|---|
| 547 | if (n <= 0) | 
|---|
| 548 | break; | 
|---|
| 549 | dst += n; | 
|---|
| 550 | } | 
|---|
| 551 | if (n_expected > 0) { | 
|---|
| 552 | /* we couldn't read all, error | 
|---|
| 553 | * already generated */ | 
|---|
| 554 | GDKfree(ret); | 
|---|
| 555 | ret = NULL; | 
|---|
| 556 | } | 
|---|
| 557 | #ifndef NDEBUG | 
|---|
| 558 | /* just to make valgrind happy, we | 
|---|
| 559 | * initialize the whole thing */ | 
|---|
| 560 | if (ret && *maxsize > size) | 
|---|
| 561 | memset(ret + size, 0, *maxsize - size); | 
|---|
| 562 | #endif | 
|---|
| 563 | } | 
|---|
| 564 | close(fd); | 
|---|
| 565 | } else { | 
|---|
| 566 | GDKerror( "GDKload: cannot open: name=%s, ext=%s\n", nme, ext ? ext : ""); | 
|---|
| 567 | } | 
|---|
| 568 | } else { | 
|---|
| 569 | char *path = NULL; | 
|---|
| 570 |  | 
|---|
| 571 | /* round up to multiple of GDK_mmap_pagesize with a | 
|---|
| 572 | * minimum of one */ | 
|---|
| 573 | size = (*maxsize + GDK_mmap_pagesize - 1) & ~(GDK_mmap_pagesize - 1); | 
|---|
| 574 | if (size == 0) | 
|---|
| 575 | size = GDK_mmap_pagesize; | 
|---|
| 576 | if (farmid != NOFARM) { | 
|---|
| 577 | path = GDKfilepath(farmid, BATDIR, nme, ext); | 
|---|
| 578 | nme = path; | 
|---|
| 579 | } | 
|---|
| 580 | if (nme != NULL && GDKextend(nme, size) == GDK_SUCCEED) { | 
|---|
| 581 | int mod = MMAP_READ | MMAP_WRITE | MMAP_SEQUENTIAL; | 
|---|
| 582 |  | 
|---|
| 583 | if (mode == STORE_PRIV) | 
|---|
| 584 | mod |= MMAP_COPY; | 
|---|
| 585 | else | 
|---|
| 586 | mod |= MMAP_SYNC; | 
|---|
| 587 | ret = GDKmmap(nme, mod, size); | 
|---|
| 588 | if (ret != NULL) { | 
|---|
| 589 | /* success: update allocated size */ | 
|---|
| 590 | *maxsize = size; | 
|---|
| 591 | } | 
|---|
| 592 | IODEBUG fprintf(stderr, "#mmap(NULL, 0, maxsize %zu, mod %d, path %s, 0) = %p\n", size, mod, nme, (void *)ret); | 
|---|
| 593 | } | 
|---|
| 594 | GDKfree(path); | 
|---|
| 595 | } | 
|---|
| 596 | return ret; | 
|---|
| 597 | } | 
|---|
| 598 |  | 
|---|
| 599 | /* | 
|---|
| 600 | * @+ BAT disk storage | 
|---|
| 601 | * | 
|---|
| 602 | * Between sessions the BATs comprising the database are saved on | 
|---|
| 603 | * disk.  To simplify code, we assume a UNIX directory called its | 
|---|
| 604 | * physical @%home@ where they are to be located.  The subdirectories | 
|---|
| 605 | * BAT and PRG contain what its name says. | 
|---|
| 606 | * | 
|---|
| 607 | * A BAT created by @%COLnew@ is considered temporary until one calls | 
|---|
| 608 | * the routine @%BATsave@. This routine reserves disk space and checks | 
|---|
| 609 | * for name clashes. | 
|---|
| 610 | * | 
|---|
| 611 | * Saving and restoring BATs is left to the upper layers. The library | 
|---|
| 612 | * merely copies the data into place.  Failure to read or write the | 
|---|
| 613 | * BAT results in a NULL, otherwise it returns the BAT pointer. | 
|---|
| 614 | */ | 
|---|
| 615 | static BAT * | 
|---|
| 616 | DESCload(int i) | 
|---|
| 617 | { | 
|---|
| 618 | const char *s, *nme = BBP_physical(i); | 
|---|
| 619 | BAT *b = NULL; | 
|---|
| 620 | int tt; | 
|---|
| 621 |  | 
|---|
| 622 | IODEBUG { | 
|---|
| 623 | fprintf(stderr, "#DESCload %s\n", nme ? nme : "<noname>"); | 
|---|
| 624 | } | 
|---|
| 625 | b = BBP_desc(i); | 
|---|
| 626 |  | 
|---|
| 627 | if (b == NULL) | 
|---|
| 628 | return 0; | 
|---|
| 629 |  | 
|---|
| 630 | tt = b->ttype; | 
|---|
| 631 | if ((tt < 0 && (tt = ATOMindex(s = ATOMunknown_name(tt))) < 0)) { | 
|---|
| 632 | GDKerror( "DESCload: atom '%s' unknown, in BAT '%s'.\n", s, nme); | 
|---|
| 633 | return NULL; | 
|---|
| 634 | } | 
|---|
| 635 | b->ttype = tt; | 
|---|
| 636 |  | 
|---|
| 637 | /* reconstruct mode from BBP status (BATmode doesn't flush | 
|---|
| 638 | * descriptor, so loaded mode may be stale) */ | 
|---|
| 639 | b->batTransient = (BBP_status(b->batCacheid) & BBPPERSISTENT) == 0; | 
|---|
| 640 | b->batCopiedtodisk = true; | 
|---|
| 641 | DESCclean(b); | 
|---|
| 642 | return b; | 
|---|
| 643 | } | 
|---|
| 644 |  | 
|---|
| 645 | void | 
|---|
| 646 | DESCclean(BAT *b) | 
|---|
| 647 | { | 
|---|
| 648 | b->batDirtyflushed = DELTAdirty(b); | 
|---|
| 649 | b->batDirtydesc = false; | 
|---|
| 650 | b->theap.dirty = false; | 
|---|
| 651 | if (b->tvheap) | 
|---|
| 652 | b->tvheap->dirty = false; | 
|---|
| 653 | } | 
|---|
| 654 |  | 
|---|
| 655 | /* spawning the background msync should be done carefully | 
|---|
| 656 | * because there is a (small) chance that the BAT has been | 
|---|
| 657 | * deleted by the time you issue the msync. | 
|---|
| 658 | * This leaves you with possibly deadbeef BAT descriptors. | 
|---|
| 659 | */ | 
|---|
| 660 |  | 
|---|
| 661 | /* #define DISABLE_MSYNC */ | 
|---|
| 662 | #define MSYNC_BACKGROUND | 
|---|
| 663 |  | 
|---|
| 664 | #ifndef DISABLE_MSYNC | 
|---|
| 665 | #ifndef MS_ASYNC | 
|---|
| 666 | struct msync { | 
|---|
| 667 | bat id; | 
|---|
| 668 | Heap *h; | 
|---|
| 669 | }; | 
|---|
| 670 |  | 
|---|
| 671 | static void | 
|---|
| 672 | BATmsyncImplementation(void *arg) | 
|---|
| 673 | { | 
|---|
| 674 | Heap *h = ((struct msync *) arg)->h; | 
|---|
| 675 |  | 
|---|
| 676 | (void) MT_msync(h->base, h->size); | 
|---|
| 677 | BBPunfix(((struct msync *) arg)->id); | 
|---|
| 678 | GDKfree(arg); | 
|---|
| 679 | } | 
|---|
| 680 | #endif | 
|---|
| 681 | #endif | 
|---|
| 682 |  | 
|---|
| 683 | void | 
|---|
| 684 | BATmsync(BAT *b) | 
|---|
| 685 | { | 
|---|
| 686 | /* we don't sync views or if we're told not to */ | 
|---|
| 687 | if (GDKinmemory() || isVIEW(b) || (GDKdebug & NOSYNCMASK)) | 
|---|
| 688 | return; | 
|---|
| 689 | /* we don't sync transients */ | 
|---|
| 690 | if (b->theap.farmid != 0 || | 
|---|
| 691 | (b->tvheap != NULL && b->tvheap->farmid != 0)) | 
|---|
| 692 | return; | 
|---|
| 693 | #ifndef DISABLE_MSYNC | 
|---|
| 694 | #ifdef MS_ASYNC | 
|---|
| 695 | if (b->theap.storage == STORE_MMAP) | 
|---|
| 696 | (void) msync(b->theap.base, b->theap.free, MS_ASYNC); | 
|---|
| 697 | if (b->tvheap && b->tvheap->storage == STORE_MMAP) | 
|---|
| 698 | (void) msync(b->tvheap->base, b->tvheap->free, MS_ASYNC); | 
|---|
| 699 | #else | 
|---|
| 700 | { | 
|---|
| 701 | #ifdef MSYNC_BACKGROUND | 
|---|
| 702 | MT_Id tid; | 
|---|
| 703 | #endif | 
|---|
| 704 | struct msync *arg; | 
|---|
| 705 |  | 
|---|
| 706 | assert(!b->batTransient); | 
|---|
| 707 | if (b->theap.storage == STORE_MMAP && | 
|---|
| 708 | (arg = GDKmalloc(sizeof(*arg))) != NULL) { | 
|---|
| 709 | arg->id = b->batCacheid; | 
|---|
| 710 | arg->h = &b->theap; | 
|---|
| 711 | BBPfix(b->batCacheid); | 
|---|
| 712 | #ifdef MSYNC_BACKGROUND | 
|---|
| 713 | char name[16]; | 
|---|
| 714 | snprintf(name, sizeof(name), "msync%d", b->batCacheid); | 
|---|
| 715 | if (MT_create_thread(&tid, BATmsyncImplementation, arg, | 
|---|
| 716 | MT_THR_DETACHED, name) < 0) { | 
|---|
| 717 | /* don't bother if we can't create a thread */ | 
|---|
| 718 | BBPunfix(b->batCacheid); | 
|---|
| 719 | GDKfree(arg); | 
|---|
| 720 | } | 
|---|
| 721 | #else | 
|---|
| 722 | BATmsyncImplementation(arg); | 
|---|
| 723 | #endif | 
|---|
| 724 | } | 
|---|
| 725 |  | 
|---|
| 726 | if (b->tvheap && b->tvheap->storage == STORE_MMAP && | 
|---|
| 727 | (arg = GDKmalloc(sizeof(*arg))) != NULL) { | 
|---|
| 728 | arg->id = b->batCacheid; | 
|---|
| 729 | arg->h = b->tvheap; | 
|---|
| 730 | BBPfix(b->batCacheid); | 
|---|
| 731 | #ifdef MSYNC_BACKGROUND | 
|---|
| 732 | char name[16]; | 
|---|
| 733 | snprintf(name, sizeof(name), "msync%d", b->batCacheid); | 
|---|
| 734 | if (MT_create_thread(&tid, BATmsyncImplementation, arg, | 
|---|
| 735 | MT_THR_DETACHED, name) < 0) { | 
|---|
| 736 | /* don't bother if we can't create a thread */ | 
|---|
| 737 | BBPunfix(b->batCacheid); | 
|---|
| 738 | GDKfree(arg); | 
|---|
| 739 | } | 
|---|
| 740 | #else | 
|---|
| 741 | BATmsyncImplementation(arg); | 
|---|
| 742 | #endif | 
|---|
| 743 | } | 
|---|
| 744 | } | 
|---|
| 745 | #endif | 
|---|
| 746 | #else | 
|---|
| 747 | (void) b; | 
|---|
| 748 | #endif	/* DISABLE_MSYNC */ | 
|---|
| 749 | } | 
|---|
| 750 |  | 
|---|
| 751 | gdk_return | 
|---|
| 752 | BATsave(BAT *bd) | 
|---|
| 753 | { | 
|---|
| 754 | gdk_return err = GDK_SUCCEED; | 
|---|
| 755 | const char *nme; | 
|---|
| 756 | BAT bs; | 
|---|
| 757 | Heap vhs; | 
|---|
| 758 | BAT *b = bd; | 
|---|
| 759 |  | 
|---|
| 760 | assert(!GDKinmemory()); | 
|---|
| 761 | BATcheck(b, "BATsave", GDK_FAIL); | 
|---|
| 762 |  | 
|---|
| 763 | assert(b->batCacheid > 0); | 
|---|
| 764 | /* views cannot be saved, but make an exception for | 
|---|
| 765 | * force-remapped views */ | 
|---|
| 766 | if (isVIEW(b) && | 
|---|
| 767 | !(b->theap.copied && b->theap.storage == STORE_MMAP)) { | 
|---|
| 768 | GDKerror( "BATsave: %s is a view on %s; cannot be saved\n", BATgetId(b), BBPname(VIEWtparent(b))); | 
|---|
| 769 | return GDK_FAIL; | 
|---|
| 770 | } | 
|---|
| 771 | if (!BATdirty(b)) { | 
|---|
| 772 | return GDK_SUCCEED; | 
|---|
| 773 | } | 
|---|
| 774 |  | 
|---|
| 775 | /* copy the descriptor to a local variable in order to let our | 
|---|
| 776 | * messing in the BAT descriptor not affect other threads that | 
|---|
| 777 | * only read it. */ | 
|---|
| 778 | bs = *b; | 
|---|
| 779 | b = &bs; | 
|---|
| 780 |  | 
|---|
| 781 | if (b->tvheap) { | 
|---|
| 782 | vhs = *bd->tvheap; | 
|---|
| 783 | b->tvheap = &vhs; | 
|---|
| 784 | } | 
|---|
| 785 |  | 
|---|
| 786 | /* start saving data */ | 
|---|
| 787 | nme = BBP_physical(b->batCacheid); | 
|---|
| 788 | if (!b->batCopiedtodisk || b->batDirtydesc || b->theap.dirty) | 
|---|
| 789 | if (err == GDK_SUCCEED && b->ttype) | 
|---|
| 790 | err = HEAPsave(&b->theap, nme, "tail"); | 
|---|
| 791 | if (b->tvheap | 
|---|
| 792 | && (!b->batCopiedtodisk || b->batDirtydesc || b->tvheap->dirty) | 
|---|
| 793 | && b->ttype | 
|---|
| 794 | && b->tvarsized | 
|---|
| 795 | && err == GDK_SUCCEED) | 
|---|
| 796 | err = HEAPsave(b->tvheap, nme, "theap"); | 
|---|
| 797 |  | 
|---|
| 798 | if (err == GDK_SUCCEED) { | 
|---|
| 799 | bd->batCopiedtodisk = true; | 
|---|
| 800 | DESCclean(bd); | 
|---|
| 801 | return GDK_SUCCEED; | 
|---|
| 802 | } | 
|---|
| 803 | return err; | 
|---|
| 804 | } | 
|---|
| 805 |  | 
|---|
| 806 |  | 
|---|
| 807 | /* | 
|---|
| 808 | * TODO: move to gdk_bbp.c | 
|---|
| 809 | */ | 
|---|
| 810 | BAT * | 
|---|
| 811 | BATload_intern(bat bid, bool lock) | 
|---|
| 812 | { | 
|---|
| 813 | const char *nme; | 
|---|
| 814 | BAT *b; | 
|---|
| 815 |  | 
|---|
| 816 | assert(!GDKinmemory()); | 
|---|
| 817 | assert(bid > 0); | 
|---|
| 818 |  | 
|---|
| 819 | nme = BBP_physical(bid); | 
|---|
| 820 | b = DESCload(bid); | 
|---|
| 821 |  | 
|---|
| 822 | if (b == NULL) { | 
|---|
| 823 | return NULL; | 
|---|
| 824 | } | 
|---|
| 825 |  | 
|---|
| 826 | /* LOAD bun heap */ | 
|---|
| 827 | if (b->ttype != TYPE_void) { | 
|---|
| 828 | if (HEAPload(&b->theap, nme, "tail", b->batRestricted == BAT_READ) != GDK_SUCCEED) { | 
|---|
| 829 | HEAPfree(&b->theap, false); | 
|---|
| 830 | return NULL; | 
|---|
| 831 | } | 
|---|
| 832 | assert(b->theap.size >> b->tshift <= BUN_MAX); | 
|---|
| 833 | b->batCapacity = (BUN) (b->theap.size >> b->tshift); | 
|---|
| 834 | } else { | 
|---|
| 835 | b->theap.base = NULL; | 
|---|
| 836 | } | 
|---|
| 837 |  | 
|---|
| 838 | /* LOAD tail heap */ | 
|---|
| 839 | if (ATOMvarsized(b->ttype)) { | 
|---|
| 840 | if (HEAPload(b->tvheap, nme, "theap", b->batRestricted == BAT_READ) != GDK_SUCCEED) { | 
|---|
| 841 | HEAPfree(&b->theap, false); | 
|---|
| 842 | HEAPfree(b->tvheap, false); | 
|---|
| 843 | return NULL; | 
|---|
| 844 | } | 
|---|
| 845 | if (ATOMstorage(b->ttype) == TYPE_str) { | 
|---|
| 846 | strCleanHash(b->tvheap, false);	/* ensure consistency */ | 
|---|
| 847 | } else { | 
|---|
| 848 | HEAP_recover(b->tvheap, (const var_t *) Tloc(b, 0), | 
|---|
| 849 | BATcount(b)); | 
|---|
| 850 | } | 
|---|
| 851 | } | 
|---|
| 852 |  | 
|---|
| 853 | /* initialize descriptor */ | 
|---|
| 854 | b->batDirtydesc = false; | 
|---|
| 855 | b->theap.parentid = 0; | 
|---|
| 856 |  | 
|---|
| 857 | /* load succeeded; register it in BBP */ | 
|---|
| 858 | if (BBPcacheit(b, lock) != GDK_SUCCEED) { | 
|---|
| 859 | HEAPfree(&b->theap, false); | 
|---|
| 860 | if (b->tvheap) | 
|---|
| 861 | HEAPfree(b->tvheap, false); | 
|---|
| 862 | return NULL; | 
|---|
| 863 | } | 
|---|
| 864 | return b; | 
|---|
| 865 | } | 
|---|
| 866 |  | 
|---|
| 867 | /* | 
|---|
| 868 | * @- BATdelete | 
|---|
| 869 | * The new behavior is to let the routine produce warnings but always | 
|---|
| 870 | * succeed.  rationale: on a delete, we must get rid of *all* the | 
|---|
| 871 | * files. We do not have to care about preserving them or be too much | 
|---|
| 872 | * concerned if a file that had to be deleted was not found (end | 
|---|
| 873 | * result is still that it does not exist). The past behavior to | 
|---|
| 874 | * delete some files and then fail was erroneous. The BAT would | 
|---|
| 875 | * continue to exist with an incorrect disk status, causing havoc | 
|---|
| 876 | * later on. | 
|---|
| 877 | * | 
|---|
| 878 | * NT forces us to close all files before deleting them; in case of | 
|---|
| 879 | * memory mapped files this means that we have to unload the BATs | 
|---|
| 880 | * before deleting. This is enforced now. | 
|---|
| 881 | */ | 
|---|
| 882 | void | 
|---|
| 883 | BATdelete(BAT *b) | 
|---|
| 884 | { | 
|---|
| 885 | bat bid = b->batCacheid; | 
|---|
| 886 | const char *o = BBP_physical(bid); | 
|---|
| 887 | BAT *loaded = BBP_cache(bid); | 
|---|
| 888 |  | 
|---|
| 889 | assert(bid > 0); | 
|---|
| 890 | if (loaded) { | 
|---|
| 891 | b = loaded; | 
|---|
| 892 | HASHdestroy(b); | 
|---|
| 893 | IMPSdestroy(b); | 
|---|
| 894 | OIDXdestroy(b); | 
|---|
| 895 | } | 
|---|
| 896 | if (b->batCopiedtodisk || (b->theap.storage != STORE_MEM)) { | 
|---|
| 897 | if (b->ttype != TYPE_void && | 
|---|
| 898 | HEAPdelete(&b->theap, o, "tail") != GDK_SUCCEED && | 
|---|
| 899 | b->batCopiedtodisk) | 
|---|
| 900 | IODEBUG fprintf(stderr, "#BATdelete(%s): bun heap\n", BATgetId(b)); | 
|---|
| 901 | } else if (b->theap.base) { | 
|---|
| 902 | HEAPfree(&b->theap, true); | 
|---|
| 903 | } | 
|---|
| 904 | if (b->tvheap) { | 
|---|
| 905 | assert(b->tvheap->parentid == bid); | 
|---|
| 906 | if (b->batCopiedtodisk || (b->tvheap->storage != STORE_MEM)) { | 
|---|
| 907 | if (HEAPdelete(b->tvheap, o, "theap") != GDK_SUCCEED && | 
|---|
| 908 | b->batCopiedtodisk) | 
|---|
| 909 | IODEBUG fprintf(stderr, "#BATdelete(%s): tail heap\n", BATgetId(b)); | 
|---|
| 910 | } else { | 
|---|
| 911 | HEAPfree(b->tvheap, true); | 
|---|
| 912 | } | 
|---|
| 913 | } | 
|---|
| 914 | b->batCopiedtodisk = false; | 
|---|
| 915 | } | 
|---|
| 916 |  | 
|---|
| 917 | /* | 
|---|
| 918 | * BAT specific printing | 
|---|
| 919 | */ | 
|---|
| 920 |  | 
|---|
| 921 | gdk_return | 
|---|
| 922 | BATprintcolumns(stream *s, int argc, BAT *argv[]) | 
|---|
| 923 | { | 
|---|
| 924 | int i; | 
|---|
| 925 | BUN n, cnt; | 
|---|
| 926 | struct colinfo { | 
|---|
| 927 | ssize_t (*s) (str *, size_t *, const void *, bool); | 
|---|
| 928 | BATiter i; | 
|---|
| 929 | } *colinfo; | 
|---|
| 930 | char *buf; | 
|---|
| 931 | size_t buflen = 0; | 
|---|
| 932 | ssize_t len; | 
|---|
| 933 |  | 
|---|
| 934 | /* error checking */ | 
|---|
| 935 | for (i = 0; i < argc; i++) { | 
|---|
| 936 | if (argv[i] == NULL) { | 
|---|
| 937 | GDKerror( "Columns missing\n"); | 
|---|
| 938 | return GDK_FAIL; | 
|---|
| 939 | } | 
|---|
| 940 | if (BATcount(argv[0]) != BATcount(argv[i])) { | 
|---|
| 941 | GDKerror( "Columns must be the same size\n"); | 
|---|
| 942 | return GDK_FAIL; | 
|---|
| 943 | } | 
|---|
| 944 | } | 
|---|
| 945 |  | 
|---|
| 946 | if ((colinfo = GDKmalloc(argc * sizeof(*colinfo))) == NULL) { | 
|---|
| 947 | GDKerror( "Cannot allocate memory\n"); | 
|---|
| 948 | return GDK_FAIL; | 
|---|
| 949 | } | 
|---|
| 950 |  | 
|---|
| 951 | for (i = 0; i < argc; i++) { | 
|---|
| 952 | colinfo[i].i = bat_iterator(argv[i]); | 
|---|
| 953 | colinfo[i].s = BATatoms[argv[i]->ttype].atomToStr; | 
|---|
| 954 | } | 
|---|
| 955 |  | 
|---|
| 956 | mnstr_write(s, "#--------------------------#\n", 1, 29); | 
|---|
| 957 | mnstr_write(s, "# ", 1, 2); | 
|---|
| 958 | for (i = 0; i < argc; i++) { | 
|---|
| 959 | if (i > 0) | 
|---|
| 960 | mnstr_write(s, "\t", 1, 1); | 
|---|
| 961 | buf = argv[i]->tident; | 
|---|
| 962 | mnstr_write(s, buf, 1, strlen(buf)); | 
|---|
| 963 | } | 
|---|
| 964 | mnstr_write(s, "  # name\n", 1, 9); | 
|---|
| 965 | mnstr_write(s, "# ", 1, 2); | 
|---|
| 966 | for (i = 0; i < argc; i++) { | 
|---|
| 967 | if (i > 0) | 
|---|
| 968 | mnstr_write(s, "\t", 1, 1); | 
|---|
| 969 | buf = ATOMname(argv[i]->ttype); | 
|---|
| 970 | mnstr_write(s, buf, 1, strlen(buf)); | 
|---|
| 971 | } | 
|---|
| 972 | mnstr_write(s, "  # type\n", 1, 9); | 
|---|
| 973 | mnstr_write(s, "#--------------------------#\n", 1, 29); | 
|---|
| 974 | buf = NULL; | 
|---|
| 975 |  | 
|---|
| 976 | for (n = 0, cnt = BATcount(argv[0]); n < cnt; n++) { | 
|---|
| 977 | mnstr_write(s, "[ ", 1, 2); | 
|---|
| 978 | for (i = 0; i < argc; i++) { | 
|---|
| 979 | len = colinfo[i].s(&buf, &buflen, BUNtail(colinfo[i].i, n), true); | 
|---|
| 980 | if (len < 0) { | 
|---|
| 981 | GDKfree(buf); | 
|---|
| 982 | GDKfree(colinfo); | 
|---|
| 983 | return GDK_FAIL; | 
|---|
| 984 | } | 
|---|
| 985 | if (i > 0) | 
|---|
| 986 | mnstr_write(s, ",\t", 1, 2); | 
|---|
| 987 | mnstr_write(s, buf, 1, len); | 
|---|
| 988 | } | 
|---|
| 989 | mnstr_write(s, "  ]\n", 1, 4); | 
|---|
| 990 | } | 
|---|
| 991 |  | 
|---|
| 992 | GDKfree(buf); | 
|---|
| 993 | GDKfree(colinfo); | 
|---|
| 994 |  | 
|---|
| 995 | return GDK_SUCCEED; | 
|---|
| 996 | } | 
|---|
| 997 |  | 
|---|
| 998 | gdk_return | 
|---|
| 999 | BATprint(stream *fdout, BAT *b) | 
|---|
| 1000 | { | 
|---|
| 1001 | BAT *argv[2]; | 
|---|
| 1002 | gdk_return ret = GDK_FAIL; | 
|---|
| 1003 |  | 
|---|
| 1004 | argv[0] = BATdense(b->hseqbase, b->hseqbase, BATcount(b)); | 
|---|
| 1005 | argv[1] = b; | 
|---|
| 1006 | if (argv[0] && argv[1]) { | 
|---|
| 1007 | ret = BATroles(argv[0], "h"); | 
|---|
| 1008 | if (ret == GDK_SUCCEED) | 
|---|
| 1009 | ret = BATprintcolumns(fdout, 2, argv); | 
|---|
| 1010 | } | 
|---|
| 1011 | if (argv[0]) | 
|---|
| 1012 | BBPunfix(argv[0]->batCacheid); | 
|---|
| 1013 | return ret; | 
|---|
| 1014 | } | 
|---|
| 1015 |  | 
|---|