| 1 | /* | 
| 2 |  * This Source Code Form is subject to the terms of the Mozilla Public | 
| 3 |  * License, v. 2.0.  If a copy of the MPL was not distributed with this | 
| 4 |  * file, You can obtain one at http://mozilla.org/MPL/2.0/. | 
| 5 |  * | 
| 6 |  * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. | 
| 7 |  */ | 
| 8 |  | 
| 9 | /* | 
| 10 |  * @a M. L. Kersten, P. Boncz, N. J. Nes | 
| 11 |  * @* BAT Buffer Pool (BBP) | 
| 12 |  * The BATs created and loaded are collected in a BAT buffer pool. | 
| 13 |  * The Bat Buffer Pool has a number of functions: | 
| 14 |  * @table @code | 
| 15 |  * | 
| 16 |  * @item administration and lookup | 
| 17 |  * The BBP is a directory which contains status information about all | 
| 18 |  * known BATs.  This interface may be used very heavily, by | 
| 19 |  * data-intensive applications.  To eliminate all overhead, read-only | 
| 20 |  * access to the BBP may be done by table-lookups. The integer index | 
| 21 |  * type for these lookups is @emph{bat}, as retrieved by | 
| 22 |  * @emph{b->batCacheid}. The @emph{bat} zero is reserved for the nil | 
| 23 |  * bat. | 
| 24 |  * | 
| 25 |  * @item persistence | 
| 26 |  * The BBP is made persistent by saving it to the dictionary file | 
| 27 |  * called @emph{BBP.dir} in the database. | 
| 28 |  * | 
| 29 |  * When the number of BATs rises, having all files in one directory | 
| 30 |  * becomes a bottleneck.  The BBP therefore implements a scheme that | 
| 31 |  * distributes all BATs in a growing directory tree with at most 64 | 
| 32 |  * BATs stored in one node. | 
| 33 |  * | 
| 34 |  * @item buffer management | 
| 35 |  * The BBP is responsible for loading and saving of BATs to disk. It | 
| 36 |  * also contains routines to unload BATs from memory when memory | 
| 37 |  * resources get scarce. For this purpose, it administers BAT memory | 
| 38 |  * reference counts (to know which BATs can be unloaded) and BAT usage | 
| 39 |  * statistics (it unloads the least recently used BATs). | 
| 40 |  * | 
| 41 |  * @item recovery | 
| 42 |  * When the database is closed or during a run-time syncpoint, the | 
| 43 |  * system tables must be written to disk in a safe way, that is immune | 
| 44 |  * for system failures (like disk full). To do so, the BBP implements | 
| 45 |  * an atomic commit and recovery protocol: first all files to be | 
| 46 |  * overwritten are moved to a BACKUP/ dir. If that succeeds, the | 
| 47 |  * writes are done. If that also fully succeeds the BACKUP/ dir is | 
| 48 |  * renamed to DELETE_ME/ and subsequently deleted.  If not, all files | 
| 49 |  * in BACKUP/ are moved back to their original location. | 
| 50 |  * | 
| 51 |  * @item unloading | 
| 52 |  * Bats which have a logical reference (ie. a lrefs > 0) but no memory | 
| 53 |  * reference (refcnt == 0) can be unloaded. Unloading dirty bats | 
| 54 |  * means, moving the original (committed version) to the BACKUP/ dir | 
| 55 |  * and saving the bat. This complicates the commit and recovery/abort | 
| 56 |  * issues.  The commit has to check if the bat is already moved. And | 
| 57 |  * The recovery has to always move back the files from the BACKUP/ | 
| 58 |  * dir. | 
| 59 |  * | 
| 60 |  * @item reference counting | 
| 61 |  * Bats use have two kinds of references: logical and physical | 
| 62 |  * (pointer) ones.  The logical references are administered by | 
| 63 |  * BBPretain/BBPrelease, the physical ones by BBPfix/BBPunfix. | 
| 64 |  * | 
| 65 |  * @item share counting | 
| 66 |  * Views use the heaps of there parent bats. To save guard this, the | 
| 67 |  * parent has a shared counter, which is incremented and decremented | 
| 68 |  * using BBPshare and BBPunshare. These functions make sure the parent | 
| 69 |  * is memory resident as required because of the 'pointer' sharing. | 
| 70 |  * @end table | 
| 71 |  */ | 
| 72 |  | 
| 73 | #include "monetdb_config.h" | 
| 74 | #include "gdk.h" | 
| 75 | #include "gdk_private.h" | 
| 76 | #include "gdk_storage.h" | 
| 77 | #include "mutils.h" | 
| 78 |  | 
| 79 | #ifndef F_OK | 
| 80 | #define F_OK 0 | 
| 81 | #endif | 
| 82 | #ifdef _MSC_VER | 
| 83 | #define access(f, m)	_access(f, m) | 
| 84 | #endif | 
| 85 |  | 
| 86 | /* | 
| 87 |  * The BBP has a fixed address, so re-allocation due to a growing BBP | 
| 88 |  * caused by one thread does not disturb reads to the old entries by | 
| 89 |  * another.  This is implemented using anonymous virtual memory; | 
| 90 |  * extensions on the same address are guaranteed because a large | 
| 91 |  * non-committed VM area is requested initially. New slots in the BBP | 
| 92 |  * are found in O(1) by keeping a freelist that uses the 'next' field | 
| 93 |  * in the BBPrec records. | 
| 94 |  */ | 
| 95 | BBPrec *BBP[N_BBPINIT];		/* fixed base VM address of BBP array */ | 
| 96 | bat BBPlimit = 0;		/* current committed VM BBP array */ | 
| 97 | static ATOMIC_TYPE BBPsize = ATOMIC_VAR_INIT(0); /* current used size of BBP array */ | 
| 98 |  | 
| 99 | struct BBPfarm_t BBPfarms[MAXFARMS]; | 
| 100 |  | 
| 101 | #define KITTENNAP 1		/* used to suspend processing */ | 
| 102 | #define BBPNONAME "."		/* filler for no name in BBP.dir */ | 
| 103 | /* | 
| 104 |  * The hash index uses a bucket index (int array) of size mask that is | 
| 105 |  * tuned for perfect hashing (1 lookup). The bucket chain uses the | 
| 106 |  * 'next' field in the BBPrec records. | 
| 107 |  */ | 
| 108 | bat *BBP_hash = NULL;		/* BBP logical name hash buckets */ | 
| 109 | bat BBP_mask = 0;		/* number of buckets = & mask */ | 
| 110 |  | 
| 111 | static gdk_return BBPfree(BAT *b, const char *calledFrom); | 
| 112 | static void BBPdestroy(BAT *b); | 
| 113 | static void BBPuncacheit(bat bid, bool unloaddesc); | 
| 114 | static gdk_return BBPprepare(bool subcommit); | 
| 115 | static BAT *getBBPdescriptor(bat i, bool lock); | 
| 116 | static gdk_return BBPbackup(BAT *b, bool subcommit); | 
| 117 | static gdk_return BBPdir(int cnt, bat *subcommit); | 
| 118 |  | 
| 119 | #ifdef HAVE_HGE | 
| 120 | /* start out by saying we have no hge, but as soon as we've seen one, | 
| 121 |  * we'll always say we do have it */ | 
| 122 | static bool havehge = false; | 
| 123 | #endif | 
| 124 |  | 
| 125 | #define BBPnamecheck(s) (BBPtmpcheck(s) ? strtol((s) + 4, NULL, 8) : 0) | 
| 126 |  | 
| 127 | static void | 
| 128 | BBP_insert(bat i) | 
| 129 | { | 
| 130 | 	bat idx = (bat) (strHash(BBP_logical(i)) & BBP_mask); | 
| 131 |  | 
| 132 | 	BBP_next(i) = BBP_hash[idx]; | 
| 133 | 	BBP_hash[idx] = i; | 
| 134 | } | 
| 135 |  | 
| 136 | static void | 
| 137 | BBP_delete(bat i) | 
| 138 | { | 
| 139 | 	bat *h = BBP_hash; | 
| 140 | 	const char *s = BBP_logical(i); | 
| 141 | 	bat idx = (bat) (strHash(s) & BBP_mask); | 
| 142 |  | 
| 143 | 	for (h += idx; (i = *h) != 0; h = &BBP_next(i)) { | 
| 144 | 		if (strcmp(BBP_logical(i), s) == 0) { | 
| 145 | 			*h = BBP_next(i); | 
| 146 | 			break; | 
| 147 | 		} | 
| 148 | 	} | 
| 149 | } | 
| 150 |  | 
| 151 | bat | 
| 152 | getBBPsize(void) | 
| 153 | { | 
| 154 | 	return (bat) ATOMIC_GET(&BBPsize); | 
| 155 | } | 
| 156 |  | 
| 157 |  | 
| 158 | /* | 
| 159 |  * @+ BBP Consistency and Concurrency | 
| 160 |  * While GDK provides the basic building blocks for an ACID system, in | 
| 161 |  * itself it is not such a system, as we this would entail too much | 
| 162 |  * overhead that is often not needed. Hence, some consistency control | 
| 163 |  * is left to the user. The first important user constraint is that if | 
| 164 |  * a user updates a BAT, (s)he himself must assure that no-one else | 
| 165 |  * accesses this BAT. | 
| 166 |  * | 
| 167 |  * Concerning buffer management, the BBP carries out a swapping | 
| 168 |  * policy.  BATs are kept in memory till the memory is full. If the | 
| 169 |  * memory is full, the malloc functions initiate BBP trim actions, | 
| 170 |  * that unload the coldest BATs that have a zero reference count. The | 
| 171 |  * second important user constraint is therefore that a user may only | 
| 172 |  * manipulate live BAT data in memory if it is sure that there is at | 
| 173 |  * least one reference count to that BAT. | 
| 174 |  * | 
| 175 |  * The main BBP array is protected by two locks: | 
| 176 |  * @table @code | 
| 177 |  * @item GDKcacheLock] | 
| 178 |  * this lock guards the free slot management in the BBP array.  The | 
| 179 |  * BBP operations that allocate a new slot for a new BAT | 
| 180 |  * (@emph{BBPinit},@emph{BBPcacheit}), delete the slot of a destroyed | 
| 181 |  * BAT (@emph{BBPreclaim}), or rename a BAT (@emph{BBPrename}), hold | 
| 182 |  * this lock. It also protects all BAT (re)naming actions include | 
| 183 |  * (read and write) in the hash table with BAT names. | 
| 184 |  * @item GDKswapLock | 
| 185 |  * this lock guards the swap (loaded/unloaded) status of the | 
| 186 |  * BATs. Hence, all BBP routines that influence the swapping policy, | 
| 187 |  * or actually carry out the swapping policy itself, acquire this lock | 
| 188 |  * (e.g. @emph{BBPfix},@emph{BBPunfix}).  Note that this also means | 
| 189 |  * that updates to the BBP_status indicator array must be protected by | 
| 190 |  * GDKswapLock. | 
| 191 |  * | 
| 192 |  * To reduce contention GDKswapLock was split into multiple locks; it | 
| 193 |  * is now an array of lock pointers which is accessed by | 
| 194 |  * GDKswapLock(bat) | 
| 195 |  * @end table | 
| 196 |  * | 
| 197 |  * Routines that need both locks should first acquire the locks in the | 
| 198 |  * GDKswapLock array (in ascending order) and then GDKcacheLock (and | 
| 199 |  * release them in reverse order). | 
| 200 |  * | 
| 201 |  * To obtain maximum speed, read operations to existing elements in | 
| 202 |  * the BBP are unguarded. As said, it is the users responsibility that | 
| 203 |  * the BAT that is being read is not being modified. BBP update | 
| 204 |  * actions that modify the BBP data structure itself are locked by the | 
| 205 |  * BBP functions themselves. Hence, multiple concurrent BBP read | 
| 206 |  * operations may be ongoing while at the same time at most one BBP | 
| 207 |  * write operation @strong{on a different BAT} is executing.  This | 
| 208 |  * holds for accesses to the public (quasi-) arrays @emph{BBPcache}, | 
| 209 |  * @emph{BBPstatus} and @emph{BBPrefs}. | 
| 210 |  * These arrays are called quasi as now they are | 
| 211 |  * actually stored together in one big BBPrec array called BBP, that | 
| 212 |  * is allocated in anonymous VM space, so we can reallocate this | 
| 213 |  * structure without changing the base address (a crucial feature if | 
| 214 |  * read actions are to go on unlocked while other entries in the BBP | 
| 215 |  * may be modified). | 
| 216 |  */ | 
| 217 | static volatile MT_Id locked_by = 0; | 
| 218 |  | 
| 219 | #define BBP_unload_inc()			\ | 
| 220 | 	do {					\ | 
| 221 | 		MT_lock_set(&GDKunloadLock);	\ | 
| 222 | 		BBPunloadCnt++;			\ | 
| 223 | 		MT_lock_unset(&GDKunloadLock);	\ | 
| 224 | 	} while (0) | 
| 225 |  | 
| 226 | #define BBP_unload_dec()			\ | 
| 227 | 	do {					\ | 
| 228 | 		MT_lock_set(&GDKunloadLock);	\ | 
| 229 | 		--BBPunloadCnt;			\ | 
| 230 | 		assert(BBPunloadCnt >= 0);	\ | 
| 231 | 		MT_lock_unset(&GDKunloadLock);	\ | 
| 232 | 	} while (0) | 
| 233 |  | 
| 234 | static int BBPunloadCnt = 0; | 
| 235 | static MT_Lock GDKunloadLock = MT_LOCK_INITIALIZER("GDKunloadLock" ); | 
| 236 |  | 
| 237 | void | 
| 238 | BBPlock(void) | 
| 239 | { | 
| 240 | 	int i; | 
| 241 |  | 
| 242 | 	/* wait for all pending unloads to finish */ | 
| 243 | 	MT_lock_set(&GDKunloadLock); | 
| 244 | 	while (BBPunloadCnt > 0) { | 
| 245 | 		MT_lock_unset(&GDKunloadLock); | 
| 246 | 		MT_sleep_ms(1); | 
| 247 | 		MT_lock_set(&GDKunloadLock); | 
| 248 | 	} | 
| 249 |  | 
| 250 | 	for (i = 0; i <= BBP_THREADMASK; i++) | 
| 251 | 		MT_lock_set(&GDKtrimLock(i)); | 
| 252 | 	for (i = 0; i <= BBP_THREADMASK; i++) | 
| 253 | 		MT_lock_set(&GDKcacheLock(i)); | 
| 254 | 	for (i = 0; i <= BBP_BATMASK; i++) | 
| 255 | 		MT_lock_set(&GDKswapLock(i)); | 
| 256 | 	locked_by = MT_getpid(); | 
| 257 |  | 
| 258 | 	MT_lock_unset(&GDKunloadLock); | 
| 259 | } | 
| 260 |  | 
| 261 | void | 
| 262 | BBPunlock(void) | 
| 263 | { | 
| 264 | 	int i; | 
| 265 |  | 
| 266 | 	for (i = BBP_BATMASK; i >= 0; i--) | 
| 267 | 		MT_lock_unset(&GDKswapLock(i)); | 
| 268 | 	for (i = BBP_THREADMASK; i >= 0; i--) | 
| 269 | 		MT_lock_unset(&GDKcacheLock(i)); | 
| 270 | 	locked_by = 0; | 
| 271 | 	for (i = BBP_THREADMASK; i >= 0; i--) | 
| 272 | 		MT_lock_unset(&GDKtrimLock(i)); | 
| 273 | } | 
| 274 |  | 
| 275 |  | 
| 276 | static gdk_return | 
| 277 | BBPinithash(int j) | 
| 278 | { | 
| 279 | 	bat i = (bat) ATOMIC_GET(&BBPsize); | 
| 280 |  | 
| 281 | 	assert(j >= 0 && j <= BBP_THREADMASK); | 
| 282 | 	for (BBP_mask = 1; (BBP_mask << 1) <= BBPlimit; BBP_mask <<= 1) | 
| 283 | 		; | 
| 284 | 	BBP_hash = (bat *) GDKzalloc(BBP_mask * sizeof(bat)); | 
| 285 | 	if (BBP_hash == NULL) { | 
| 286 | 		GDKerror("BBPinithash: cannot allocate memory\n" ); | 
| 287 | 		return GDK_FAIL; | 
| 288 | 	} | 
| 289 | 	BBP_mask--; | 
| 290 |  | 
| 291 | 	while (--i > 0) { | 
| 292 | 		const char *s = BBP_logical(i); | 
| 293 |  | 
| 294 | 		if (s) { | 
| 295 | 			if (*s != '.' && BBPtmpcheck(s) == 0) { | 
| 296 | 				BBP_insert(i); | 
| 297 | 			} | 
| 298 | 		} else { | 
| 299 | 			BBP_next(i) = BBP_free(j); | 
| 300 | 			BBP_free(j) = i; | 
| 301 | 			if (++j > BBP_THREADMASK) | 
| 302 | 				j = 0; | 
| 303 | 		} | 
| 304 | 	} | 
| 305 | 	return GDK_SUCCEED; | 
| 306 | } | 
| 307 |  | 
| 308 | int | 
| 309 | BBPselectfarm(role_t role, int type, enum heaptype hptype) | 
| 310 | { | 
| 311 | 	int i; | 
| 312 |  | 
| 313 | 	(void) type;		/* may use in future */ | 
| 314 | 	(void) hptype;		/* may use in future */ | 
| 315 |  | 
| 316 | 	if (GDKinmemory()) | 
| 317 | 		return 0; | 
| 318 |  | 
| 319 | #ifndef PERSISTENTHASH | 
| 320 | 	if (hptype == hashheap) | 
| 321 | 		role = TRANSIENT; | 
| 322 | #endif | 
| 323 | #ifndef PERSISTENTIDX | 
| 324 | 	if (hptype == orderidxheap) | 
| 325 | 		role = TRANSIENT; | 
| 326 | #endif | 
| 327 | 	for (i = 0; i < MAXFARMS; i++) | 
| 328 | 		if (BBPfarms[i].dirname && BBPfarms[i].roles & (1 << (int) role)) | 
| 329 | 			return i; | 
| 330 | 	/* must be able to find farms for TRANSIENT and PERSISTENT */ | 
| 331 | 	assert(role != TRANSIENT && role != PERSISTENT); | 
| 332 | 	return -1; | 
| 333 | } | 
| 334 |  | 
| 335 | /* | 
| 336 |  * BBPextend must take the trimlock, as it is called when other BBP | 
| 337 |  * locks are held and it will allocate memory. | 
| 338 |  */ | 
| 339 | static gdk_return | 
| 340 | BBPextend(int idx, bool buildhash) | 
| 341 | { | 
| 342 | 	if ((bat) ATOMIC_GET(&BBPsize) >= N_BBPINIT * BBPINIT) { | 
| 343 | 		GDKerror("BBPextend: trying to extend BAT pool beyond the "  | 
| 344 | 			 "limit (%d)\n" , N_BBPINIT * BBPINIT); | 
| 345 | 		return GDK_FAIL; | 
| 346 | 	} | 
| 347 |  | 
| 348 | 	/* make sure the new size is at least BBPsize large */ | 
| 349 | 	while (BBPlimit < (bat) ATOMIC_GET(&BBPsize)) { | 
| 350 | 		assert(BBP[BBPlimit >> BBPINITLOG] == NULL); | 
| 351 | 		BBP[BBPlimit >> BBPINITLOG] = GDKzalloc(BBPINIT * sizeof(BBPrec)); | 
| 352 | 		if (BBP[BBPlimit >> BBPINITLOG] == NULL) { | 
| 353 | 			GDKerror("BBPextend: failed to extend BAT pool\n" ); | 
| 354 | 			return GDK_FAIL; | 
| 355 | 		} | 
| 356 | 		BBPlimit += BBPINIT; | 
| 357 | 	} | 
| 358 |  | 
| 359 | 	if (buildhash) { | 
| 360 | 		int i; | 
| 361 |  | 
| 362 | 		GDKfree(BBP_hash); | 
| 363 | 		BBP_hash = NULL; | 
| 364 | 		for (i = 0; i <= BBP_THREADMASK; i++) | 
| 365 | 			BBP_free(i) = 0; | 
| 366 | 		if (BBPinithash(idx) != GDK_SUCCEED) | 
| 367 | 			return GDK_FAIL; | 
| 368 | 	} | 
| 369 | 	return GDK_SUCCEED; | 
| 370 | } | 
| 371 |  | 
| 372 | static gdk_return | 
| 373 | recover_dir(int farmid, bool direxists) | 
| 374 | { | 
| 375 | 	if (direxists) { | 
| 376 | 		/* just try; don't care about these non-vital files */ | 
| 377 | 		if (GDKunlink(farmid, BATDIR, "BBP" , "bak" ) != GDK_SUCCEED) | 
| 378 | 			fprintf(stderr, "#recover_dir: unlink of BBP.bak failed\n" ); | 
| 379 | 		if (GDKmove(farmid, BATDIR, "BBP" , "dir" , BATDIR, "BBP" , "bak" ) != GDK_SUCCEED) | 
| 380 | 			fprintf(stderr, "#recover_dir: rename of BBP.dir to BBP.bak failed\n" ); | 
| 381 | 	} | 
| 382 | 	return GDKmove(farmid, BAKDIR, "BBP" , "dir" , BATDIR, "BBP" , "dir" ); | 
| 383 | } | 
| 384 |  | 
| 385 | static gdk_return BBPrecover(int farmid); | 
| 386 | static gdk_return BBPrecover_subdir(void); | 
| 387 | static bool BBPdiskscan(const char *, size_t); | 
| 388 |  | 
| 389 | #ifdef GDKLIBRARY_NIL_NAN | 
| 390 | static gdk_return | 
| 391 | fixfltheap(BAT *b) | 
| 392 | { | 
| 393 | 	long_str filename; | 
| 394 | 	Heap h1;		/* old heap */ | 
| 395 | 	Heap h2;		/* new heap */ | 
| 396 | 	const char *nme, *bnme; | 
| 397 | 	char *srcdir; | 
| 398 | 	BUN i; | 
| 399 | 	bool nofix = true; | 
| 400 |  | 
| 401 | 	nme = BBP_physical(b->batCacheid); | 
| 402 | 	srcdir = GDKfilepath(NOFARM, BATDIR, nme, NULL); | 
| 403 | 	if (srcdir == NULL) { | 
| 404 | 		return GDK_FAIL; | 
| 405 | 	} | 
| 406 | 	*strrchr(srcdir, DIR_SEP) = 0; | 
| 407 |  | 
| 408 | 	if ((bnme = strrchr(nme, DIR_SEP)) != NULL) | 
| 409 | 		bnme++; | 
| 410 | 	else | 
| 411 | 		bnme = nme; | 
| 412 | 	sprintf(filename, "BACKUP%c%s" , DIR_SEP, bnme); | 
| 413 |  | 
| 414 | 	/* make backup of heap */ | 
| 415 | 	if (GDKmove(b->theap.farmid, srcdir, bnme, "tail" , BAKDIR, bnme, "tail" ) != GDK_SUCCEED) { | 
| 416 | 		GDKfree(srcdir); | 
| 417 | 		GDKerror("fixfltheap: cannot make backup of %s.tail\n" , nme); | 
| 418 | 		return GDK_FAIL; | 
| 419 | 	} | 
| 420 | 	/* load old heap */ | 
| 421 | 	h1 = b->theap; | 
| 422 | 	strconcat_len(h1.filename, sizeof(h1.filename), | 
| 423 | 		      filename, ".tail" , NULL); | 
| 424 | 	h1.base = NULL; | 
| 425 | 	h1.dirty = false; | 
| 426 | 	if (HEAPload(&h1, filename, "tail" , false) != GDK_SUCCEED) { | 
| 427 | 		GDKfree(srcdir); | 
| 428 | 		GDKerror("fixfltheap: loading old tail heap "  | 
| 429 | 			 "for BAT %d failed\n" , b->batCacheid); | 
| 430 | 		return GDK_FAIL; | 
| 431 | 	} | 
| 432 |  | 
| 433 | 	/* create new heap */ | 
| 434 | 	h2 = b->theap; | 
| 435 | 	strconcat_len(h2.filename, sizeof(h2.filename), nme, ".tail" , NULL); | 
| 436 | 	if (HEAPalloc(&h2, b->batCapacity, b->twidth) != GDK_SUCCEED) { | 
| 437 | 		GDKfree(srcdir); | 
| 438 | 		HEAPfree(&h1, false); | 
| 439 | 		GDKerror("fixfltheap: allocating new tail heap "  | 
| 440 | 			 "for BAT %d failed\n" , b->batCacheid); | 
| 441 | 		return GDK_FAIL; | 
| 442 | 	} | 
| 443 | 	h2.dirty = true; | 
| 444 | 	h2.free = h1.free; | 
| 445 |  | 
| 446 | 	switch (b->ttype) { | 
| 447 | 	case TYPE_flt: { | 
| 448 | 		const flt *restrict o = (const flt *) h1.base; | 
| 449 | 		flt *restrict n = (flt *) h2.base; | 
| 450 |  | 
| 451 | 		for (i = 0; i < b->batCount; i++) { | 
| 452 | 			if (o[i] == GDK_flt_min) { | 
| 453 | 				b->tnil = true; | 
| 454 | 				n[i] = flt_nil; | 
| 455 | 				nofix = false; | 
| 456 | 			} else { | 
| 457 | 				n[i] = o[i]; | 
| 458 | 			} | 
| 459 | 		} | 
| 460 | 		break; | 
| 461 | 	} | 
| 462 | 	case TYPE_dbl: { | 
| 463 | 		const dbl *restrict o = (const dbl *) h1.base; | 
| 464 | 		dbl *restrict n = (dbl *) h2.base; | 
| 465 |  | 
| 466 | 		for (i = 0; i < b->batCount; i++) { | 
| 467 | 			if (o[i] == GDK_dbl_min) { | 
| 468 | 				b->tnil = true; | 
| 469 | 				n[i] = dbl_nil; | 
| 470 | 				nofix = false; | 
| 471 | 			} else { | 
| 472 | 				n[i] = o[i]; | 
| 473 | 			} | 
| 474 | 		} | 
| 475 | 		break; | 
| 476 | 	} | 
| 477 | 	default: { | 
| 478 | 		struct mbr { | 
| 479 | 			float xmin, ymin, xmax, ymax; | 
| 480 | 		}; | 
| 481 | 		const struct mbr *restrict o = (const struct mbr *) h1.base; | 
| 482 | 		struct mbr *restrict n = (struct mbr *) h2.base; | 
| 483 |  | 
| 484 | 		assert(strcmp(ATOMunknown_name(b->ttype), "mbr" ) == 0); | 
| 485 | 		assert(b->twidth == 4 * sizeof(flt)); | 
| 486 |  | 
| 487 | 		for (i = 0; i < b->batCount; i++) { | 
| 488 | 			if (o[i].xmin == GDK_flt_min || | 
| 489 | 			    o[i].xmax == GDK_flt_min || | 
| 490 | 			    o[i].ymin == GDK_flt_min || | 
| 491 | 			    o[i].ymax == GDK_flt_min) { | 
| 492 | 				b->tnil = true; | 
| 493 | 				n[i].xmin = n[i].xmax = n[i].ymin = n[i].ymax = flt_nil; | 
| 494 | 				nofix = false; | 
| 495 | 			} else { | 
| 496 | 				n[i] = o[i]; | 
| 497 | 			} | 
| 498 | 		} | 
| 499 | 		break; | 
| 500 | 	} | 
| 501 | 	} | 
| 502 |  | 
| 503 | 	/* cleanup */ | 
| 504 | 	HEAPfree(&h1, false); | 
| 505 | 	if (nofix) { | 
| 506 | 		/* didn't fix anything, move backup back */ | 
| 507 | 		HEAPfree(&h2, true); | 
| 508 | 		if (GDKmove(b->theap.farmid, BAKDIR, bnme, "tail" , srcdir, bnme, "tail" ) != GDK_SUCCEED) { | 
| 509 | 			GDKfree(srcdir); | 
| 510 | 			GDKerror("fixfltheap: cannot restore backup of %s.tail\n" , nme); | 
| 511 | 			return GDK_FAIL; | 
| 512 | 		} | 
| 513 | 	} else { | 
| 514 | 		/* heap was fixed */ | 
| 515 | 		b->batDirtydesc = true; | 
| 516 | 		if (HEAPsave(&h2, nme, "tail" ) != GDK_SUCCEED) { | 
| 517 | 			HEAPfree(&h2, false); | 
| 518 | 			GDKfree(srcdir); | 
| 519 | 			GDKerror("fixfltheap: saving heap failed\n" ); | 
| 520 | 			return GDK_FAIL; | 
| 521 | 		} | 
| 522 | 		HEAPfree(&h2, false); | 
| 523 | 		b->theap = h2; | 
| 524 | 	} | 
| 525 | 	GDKfree(srcdir); | 
| 526 | 	return GDK_SUCCEED; | 
| 527 | } | 
| 528 |  | 
| 529 | static gdk_return | 
| 530 | fixfloatbats(void) | 
| 531 | { | 
| 532 | 	bat bid; | 
| 533 | 	BAT *b; | 
| 534 | 	char filename[FILENAME_MAX]; | 
| 535 | 	FILE *fp; | 
| 536 | 	size_t len; | 
| 537 | 	int written; | 
| 538 |  | 
| 539 | 	for (bid = 1; bid < (bat) ATOMIC_GET(&BBPsize); bid++) { | 
| 540 | 		if ((b = BBP_desc(bid)) == NULL) { | 
| 541 | 			/* not a valid BAT */ | 
| 542 | 			continue; | 
| 543 | 		} | 
| 544 | 		if (BBP_logical(bid) && | 
| 545 | 		    (len = strlen(BBP_logical(bid))) > 12 && | 
| 546 | 		    strcmp(BBP_logical(bid) + len - 12, "_catalog_nme" ) == 0) { | 
| 547 | 			/* this is one of the files used by the | 
| 548 | 			 * logger.  We need to communicate to the | 
| 549 | 			 * logger that it also needs to do a | 
| 550 | 			 * conversion.  That is done by creating a | 
| 551 | 			 * file here based on the name of this BAT. */ | 
| 552 | 			written = snprintf(filename, sizeof(filename), | 
| 553 | 				 "%s/%.*s_nil-nan-convert" , | 
| 554 | 				 BBPfarms[0].dirname, | 
| 555 | 				 (int) (len - 12), BBP_logical(bid)); | 
| 556 | 			if (written == -1 || written >= FILENAME_MAX) { | 
| 557 | 				GDKerror("fixfloatbats: cannot create file %s has a very large pathname\n" , | 
| 558 | 						 filename); | 
| 559 | 				return GDK_FAIL; | 
| 560 | 			} | 
| 561 | 			fp = fopen(filename, "w" ); | 
| 562 | 			if (fp == NULL) { | 
| 563 | 				GDKsyserror("fixfloatbats: cannot create file %s\n" , | 
| 564 | 					 filename); | 
| 565 | 				return GDK_FAIL; | 
| 566 | 			} | 
| 567 | 			fclose(fp); | 
| 568 | 		} | 
| 569 | 		if (b->batCount == 0 || b->tnonil) { | 
| 570 | 			/*  no NILs to convert */ | 
| 571 | 			continue; | 
| 572 | 		} | 
| 573 | 		if (b->ttype < 0) { | 
| 574 | 			const char *anme; | 
| 575 |  | 
| 576 | 			/* as yet unknown tail column type */ | 
| 577 | 			anme = ATOMunknown_name(b->ttype); | 
| 578 | 			/* known string types */ | 
| 579 | 			if (strcmp(anme, "mbr" ) != 0) | 
| 580 | 				continue; | 
| 581 | 		} else if (b->ttype != TYPE_flt && b->ttype != TYPE_dbl) | 
| 582 | 			continue; | 
| 583 | 		if (fixfltheap(b) != GDK_SUCCEED) | 
| 584 | 			return GDK_FAIL; | 
| 585 | 	} | 
| 586 | 	return GDK_SUCCEED; | 
| 587 | } | 
| 588 | #endif | 
| 589 |  | 
| 590 | #ifdef GDKLIBRARY_OLDDATE | 
| 591 | #define leapyear(y)		((y) % 4 == 0 && ((y) % 100 != 0 || (y) % 400 == 0)) | 
| 592 | #define YEARDAYS(y)		(leapyear(y) ? 366 : 365) | 
| 593 | static int CUMLEAPDAYS[13] = { | 
| 594 | 	0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 | 
| 595 | }; | 
| 596 | static int CUMDAYS[13] = { | 
| 597 | 	0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 | 
| 598 | }; | 
| 599 | static int | 
| 600 | leapyears(int year) | 
| 601 | { | 
| 602 | 	/* count the 4-fold years that passed since jan-1-0 */ | 
| 603 | 	int y4 = year / 4; | 
| 604 |  | 
| 605 | 	/* count the 100-fold years */ | 
| 606 | 	int y100 = year / 100; | 
| 607 |  | 
| 608 | 	/* count the 400-fold years */ | 
| 609 | 	int y400 = year / 400; | 
| 610 |  | 
| 611 | 	return y4 + y400 - y100 + (year >= 0);	/* may be negative */ | 
| 612 | } | 
| 613 |  | 
| 614 | #define YEAR_OFFSET	4712 | 
| 615 | #define YEAR_MIN	(-YEAR_OFFSET) | 
| 616 | #define DTDAY_WIDTH	5		/* 1..28/29/30/31, depending on month */ | 
| 617 | #define DTDAY_SHIFT	0 | 
| 618 | #define DTMONTH_WIDTH	21		/* enough for 174761 years */ | 
| 619 | #define DTMONTH_SHIFT	(DTDAY_WIDTH+DTDAY_SHIFT) | 
| 620 | #define YEAR_MAX	(YEAR_MIN+(1<<DTMONTH_WIDTH)/12-1) | 
| 621 | #define mkdate(d, m, y)	(((((y) + YEAR_OFFSET) * 12 + (m) - 1) << DTMONTH_SHIFT) \ | 
| 622 | 			 | ((d) << DTDAY_SHIFT)) | 
| 623 | #define TSTIME_WIDTH	37		/* [0..24*60*60*1000000) */ | 
| 624 | #define TSTIME_SHIFT	0 | 
| 625 | #define TSDATE_WIDTH	(DTDAY_WIDTH+DTMONTH_WIDTH) | 
| 626 | #define TSDATE_SHIFT	(TSTIME_SHIFT+TSTIME_WIDTH) | 
| 627 | #define mktimestamp(d, t)	((lng) (((uint64_t) (d) << TSDATE_SHIFT) | \ | 
| 628 | 					((uint64_t) (t) << TSTIME_SHIFT))) | 
| 629 |  | 
| 630 | int | 
| 631 | cvtdate(int n) | 
| 632 | { | 
| 633 | 	int day, month, year; | 
| 634 |  | 
| 635 | 	year = n / 365; | 
| 636 | 	day = (n - year * 365) - leapyears(year >= 0 ? year - 1 : year); | 
| 637 | 	if (n < 0) { | 
| 638 | 		year--; | 
| 639 | 		while (day >= 0) { | 
| 640 | 			year++; | 
| 641 | 			day -= YEARDAYS(year); | 
| 642 | 		} | 
| 643 | 		day = YEARDAYS(year) + day; | 
| 644 | 	} else { | 
| 645 | 		while (day < 0) { | 
| 646 | 			year--; | 
| 647 | 			day += YEARDAYS(year); | 
| 648 | 		} | 
| 649 | 	} | 
| 650 |  | 
| 651 | 	day++; | 
| 652 | 	if (leapyear(year)) { | 
| 653 | 		for (month = day / 31 == 0 ? 1 : day / 31; month <= 12; month++) | 
| 654 | 			if (day > CUMLEAPDAYS[month - 1] && day <= CUMLEAPDAYS[month]) { | 
| 655 | 				break; | 
| 656 | 			} | 
| 657 | 		day -= CUMLEAPDAYS[month - 1]; | 
| 658 | 	} else { | 
| 659 | 		for (month = day / 31 == 0 ? 1 : day / 31; month <= 12; month++) | 
| 660 | 			if (day > CUMDAYS[month - 1] && day <= CUMDAYS[month]) { | 
| 661 | 				break; | 
| 662 | 			} | 
| 663 | 		day -= CUMDAYS[month - 1]; | 
| 664 | 	} | 
| 665 | 	/* clamp date */ | 
| 666 | 	if (year < YEAR_MIN) { | 
| 667 | 		day = 1; | 
| 668 | 		month = 1; | 
| 669 | 		year = YEAR_MIN; | 
| 670 | 	} else if (year > YEAR_MAX) { | 
| 671 | 		day = 31; | 
| 672 | 		month = 12; | 
| 673 | 		year = YEAR_MAX; | 
| 674 | 	} | 
| 675 | 	return mkdate(day, month, year); | 
| 676 | } | 
| 677 |  | 
| 678 | static gdk_return | 
| 679 | fixdateheap(BAT *b, const char *anme) | 
| 680 | { | 
| 681 | 	long_str filename; | 
| 682 | 	Heap h1;		/* old heap */ | 
| 683 | 	Heap h2;		/* new heap */ | 
| 684 | 	const char *nme, *bnme; | 
| 685 | 	char *srcdir; | 
| 686 | 	BUN i; | 
| 687 | 	bool nofix = true; | 
| 688 |  | 
| 689 | 	nme = BBP_physical(b->batCacheid); | 
| 690 | 	srcdir = GDKfilepath(NOFARM, BATDIR, nme, NULL); | 
| 691 | 	if (srcdir == NULL) { | 
| 692 | 		return GDK_FAIL; | 
| 693 | 	} | 
| 694 | 	*strrchr(srcdir, DIR_SEP) = 0; | 
| 695 |  | 
| 696 | 	if ((bnme = strrchr(nme, DIR_SEP)) != NULL) | 
| 697 | 		bnme++; | 
| 698 | 	else | 
| 699 | 		bnme = nme; | 
| 700 | 	sprintf(filename, "BACKUP%c%s" , DIR_SEP, bnme); | 
| 701 |  | 
| 702 | 	/* make backup of heap */ | 
| 703 | 	if (GDKmove(b->theap.farmid, srcdir, bnme, "tail" , BAKDIR, bnme, "tail" ) != GDK_SUCCEED) { | 
| 704 | 		GDKfree(srcdir); | 
| 705 | 		GDKerror("fixdateheap: cannot make backup of %s.tail\n" , nme); | 
| 706 | 		return GDK_FAIL; | 
| 707 | 	} | 
| 708 | 	/* load old heap */ | 
| 709 | 	h1 = b->theap; | 
| 710 | 	strconcat_len(h1.filename, sizeof(h1.filename), | 
| 711 | 		      filename, ".tail" , NULL); | 
| 712 | 	h1.base = NULL; | 
| 713 | 	h1.dirty = false; | 
| 714 | 	if (HEAPload(&h1, filename, "tail" , false) != GDK_SUCCEED) { | 
| 715 | 		GDKfree(srcdir); | 
| 716 | 		GDKerror("fixdateheap: loading old tail heap "  | 
| 717 | 			 "for BAT %d failed\n" , b->batCacheid); | 
| 718 | 		return GDK_FAIL; | 
| 719 | 	} | 
| 720 |  | 
| 721 | 	/* create new heap */ | 
| 722 | 	h2 = b->theap; | 
| 723 | 	strconcat_len(h2.filename, sizeof(h2.filename), nme, ".tail" , NULL); | 
| 724 | 	if (HEAPalloc(&h2, b->batCapacity, strcmp(anme, "date" ) == 0 ? 4 : 8) != GDK_SUCCEED) { | 
| 725 | 		GDKfree(srcdir); | 
| 726 | 		HEAPfree(&h1, false); | 
| 727 | 		GDKerror("fixdateheap: allocating new tail heap "  | 
| 728 | 			 "for BAT %d failed\n" , b->batCacheid); | 
| 729 | 		return GDK_FAIL; | 
| 730 | 	} | 
| 731 | 	h2.dirty = true; | 
| 732 | 	h2.free = h1.free; | 
| 733 |  | 
| 734 | 	if (strcmp(anme, "date" ) == 0) { | 
| 735 | 		const int *restrict o = (const int *) h1.base; | 
| 736 | 		int *restrict n = (int *) h2.base; | 
| 737 |  | 
| 738 | 		for (i = 0; i < b->batCount; i++) { | 
| 739 | 			if (is_int_nil(o[i])) { | 
| 740 | 				b->tnil = true; | 
| 741 | 				n[i] = int_nil; | 
| 742 | 			} else { | 
| 743 | 				n[i] = cvtdate(o[i]); | 
| 744 | 				nofix = false; | 
| 745 | 			} | 
| 746 | 		} | 
| 747 | 	} else if (strcmp(anme, "timestamp" ) == 0) { | 
| 748 | 		union timestamp { | 
| 749 | 			lng l; | 
| 750 | 			struct { | 
| 751 | #ifndef WORDS_BIGENDIAN | 
| 752 | 				int p_msecs; | 
| 753 | 				int p_days; | 
| 754 | #else | 
| 755 | 				int p_days; | 
| 756 | 				int p_msecs; | 
| 757 | #endif | 
| 758 | 			} t; | 
| 759 | 		}; | 
| 760 | 		const union timestamp *restrict o = (const union timestamp *) h1.base; | 
| 761 | 		lng *restrict n = (lng *) h2.base; | 
| 762 | 		for (i = 0; i < b->batCount; i++) { | 
| 763 | 			if (is_lng_nil(o[i].l)) { | 
| 764 | 				b->tnil = true; | 
| 765 | 				n[i] = lng_nil; | 
| 766 | 			} else { | 
| 767 | 				n[i] = mktimestamp(cvtdate(o[i].t.p_days), | 
| 768 | 						   o[i].t.p_msecs * LL_CONSTANT(1000)); | 
| 769 | 				nofix = false; | 
| 770 | 			} | 
| 771 | 		} | 
| 772 | 	} else { | 
| 773 | 		/* daytime */ | 
| 774 | 		const int *restrict o = (const int *) h1.base; | 
| 775 | 		lng *restrict n = (lng *) h2.base; | 
| 776 |  | 
| 777 | 		h2.free <<= 1; | 
| 778 | 		nofix = false; | 
| 779 | 		for (i = 0; i < b->batCount; i++) { | 
| 780 | 			if (is_int_nil(o[i])) { | 
| 781 | 				b->tnil = true; | 
| 782 | 				n[i] = lng_nil; | 
| 783 | 			} else { | 
| 784 | 				n[i] = o[i] * LL_CONSTANT(1000); | 
| 785 | 			} | 
| 786 | 		} | 
| 787 | 	} | 
| 788 |  | 
| 789 | 	/* cleanup */ | 
| 790 | 	HEAPfree(&h1, false); | 
| 791 | 	if (nofix) { | 
| 792 | 		/* didn't fix anything, move backup back */ | 
| 793 | 		HEAPfree(&h2, true); | 
| 794 | 		if (GDKmove(b->theap.farmid, BAKDIR, bnme, "tail" , srcdir, bnme, "tail" ) != GDK_SUCCEED) { | 
| 795 | 			GDKfree(srcdir); | 
| 796 | 			GDKerror("fixdateheap: cannot restore backup of %s.tail\n" , nme); | 
| 797 | 			return GDK_FAIL; | 
| 798 | 		} | 
| 799 | 	} else { | 
| 800 | 		/* heap was fixed */ | 
| 801 | 		b->batDirtydesc = true; | 
| 802 | 		if (HEAPsave(&h2, nme, "tail" ) != GDK_SUCCEED) { | 
| 803 | 			HEAPfree(&h2, false); | 
| 804 | 			GDKfree(srcdir); | 
| 805 | 			GDKerror("fixdateheap: saving heap failed\n" ); | 
| 806 | 			return GDK_FAIL; | 
| 807 | 		} | 
| 808 | 		if (strcmp(anme, "daytime" ) == 0) { | 
| 809 | 			b->twidth = 8; | 
| 810 | 			b->tshift = 3; | 
| 811 | 		} | 
| 812 | 		HEAPfree(&h2, false); | 
| 813 | 		b->theap = h2; | 
| 814 | 	} | 
| 815 | 	GDKfree(srcdir); | 
| 816 | 	return GDK_SUCCEED; | 
| 817 | } | 
| 818 |  | 
| 819 | static gdk_return | 
| 820 | fixdatebats(void) | 
| 821 | { | 
| 822 | 	bat bid; | 
| 823 | 	BAT *b; | 
| 824 | 	char filename[FILENAME_MAX]; | 
| 825 | 	FILE *fp; | 
| 826 | 	size_t len; | 
| 827 | 	int written; | 
| 828 |  | 
| 829 | 	for (bid = 1; bid < (bat) ATOMIC_GET(&BBPsize); bid++) { | 
| 830 | 		if ((b = BBP_desc(bid)) == NULL) { | 
| 831 | 			/* not a valid BAT */ | 
| 832 | 			continue; | 
| 833 | 		} | 
| 834 | 		if (BBP_logical(bid) && | 
| 835 | 		    (len = strlen(BBP_logical(bid))) > 12 && | 
| 836 | 		    strcmp(BBP_logical(bid) + len - 12, "_catalog_nme" ) == 0) { | 
| 837 | 			/* this is one of the files used by the | 
| 838 | 			 * logger.  We need to communicate to the | 
| 839 | 			 * logger that it also needs to do a | 
| 840 | 			 * conversion.  That is done by creating a | 
| 841 | 			 * file here based on the name of this BAT. */ | 
| 842 | 			written = snprintf(filename, sizeof(filename), | 
| 843 | 				 "%s/%.*s_date-convert" , | 
| 844 | 				 BBPfarms[0].dirname, | 
| 845 | 				 (int) (len - 12), BBP_logical(bid)); | 
| 846 | 			if (written == -1 || written >= FILENAME_MAX) { | 
| 847 | 				GDKerror("fixdatebats: cannot create file %s has a very large pathname\n" , | 
| 848 | 						 filename); | 
| 849 | 				return GDK_FAIL; | 
| 850 | 			} | 
| 851 | 			fp = fopen(filename, "w" ); | 
| 852 | 			if (fp == NULL) { | 
| 853 | 				GDKsyserror("fixdatebats: cannot create file %s\n" , | 
| 854 | 					 filename); | 
| 855 | 				return GDK_FAIL; | 
| 856 | 			} | 
| 857 | 			fclose(fp); | 
| 858 | 		} | 
| 859 | 		/* The date type is not known in GDK when reading the BBP */ | 
| 860 | 		if (b->ttype < 0) { | 
| 861 | 			const char *anme; | 
| 862 |  | 
| 863 | 			/* as yet unknown tail column type */ | 
| 864 | 			anme = ATOMunknown_name(b->ttype); | 
| 865 | 			/* known string types */ | 
| 866 | 			if ((strcmp(anme, "date" ) == 0 || | 
| 867 | 			     strcmp(anme, "timestamp" ) == 0 || | 
| 868 | 			     strcmp(anme, "daytime" ) == 0) && | 
| 869 | 			    fixdateheap(b, anme) != GDK_SUCCEED) | 
| 870 | 				return GDK_FAIL; | 
| 871 | 		} | 
| 872 | 	} | 
| 873 | 	return GDK_SUCCEED; | 
| 874 | } | 
| 875 | #endif | 
| 876 |  | 
| 877 | static int | 
| 878 | heapinit(BAT *b, const char *buf, int *hashash, unsigned bbpversion, bat bid, const char *filename) | 
| 879 | { | 
| 880 | 	int t; | 
| 881 | 	char type[33]; | 
| 882 | 	uint16_t width; | 
| 883 | 	uint16_t var; | 
| 884 | 	uint16_t properties; | 
| 885 | 	uint64_t nokey0; | 
| 886 | 	uint64_t nokey1; | 
| 887 | 	uint64_t nosorted; | 
| 888 | 	uint64_t norevsorted; | 
| 889 | 	uint64_t base; | 
| 890 | 	uint64_t align; | 
| 891 | 	uint64_t free; | 
| 892 | 	uint64_t size; | 
| 893 | 	uint16_t storage; | 
| 894 | 	int n; | 
| 895 |  | 
| 896 | 	(void) bbpversion;	/* could be used to implement compatibility */ | 
| 897 |  | 
| 898 | 	norevsorted = 0; /* default for first case */ | 
| 899 | 	if (bbpversion <= GDKLIBRARY_TALIGN ? | 
| 900 | 	    sscanf(buf, | 
| 901 | 		   " %32s %"  SCNu16 " %"  SCNu16 " %"  SCNu16 " %"  SCNu64 | 
| 902 | 		   " %"  SCNu64 " %"  SCNu64 " %"  SCNu64 " %"  SCNu64 | 
| 903 | 		   " %"  SCNu64 " %"  SCNu64 " %"  SCNu64 " %"  SCNu16 | 
| 904 | 		   "%n" , | 
| 905 | 		   type, &width, &var, &properties, &nokey0, | 
| 906 | 		   &nokey1, &nosorted, &norevsorted, &base, | 
| 907 | 		   &align, &free, &size, &storage, | 
| 908 | 		   &n) < 13 : | 
| 909 | 	    sscanf(buf, | 
| 910 | 		   " %10s %"  SCNu16 " %"  SCNu16 " %"  SCNu16 " %"  SCNu64 | 
| 911 | 		   " %"  SCNu64 " %"  SCNu64 " %"  SCNu64 " %"  SCNu64 | 
| 912 | 		   " %"  SCNu64 " %"  SCNu64 " %"  SCNu16 | 
| 913 | 		   "%n" , | 
| 914 | 		   type, &width, &var, &properties, &nokey0, | 
| 915 | 		   &nokey1, &nosorted, &norevsorted, &base, | 
| 916 | 		   &free, &size, &storage, | 
| 917 | 		   &n) < 12) { | 
| 918 | 		GDKerror("BBPinit: invalid format for BBP.dir\n%s" , buf); | 
| 919 | 		return -1; | 
| 920 | 	} | 
| 921 |  | 
| 922 | 	if (properties & ~0x0F81) { | 
| 923 | 		GDKerror("BBPinit: unknown properties are set: incompatible database\n" ); | 
| 924 | 		return -1; | 
| 925 | 	} | 
| 926 | 	*hashash = var & 2; | 
| 927 | 	var &= ~2; | 
| 928 | #ifdef HAVE_HGE | 
| 929 | 	if (strcmp(type, "hge" ) == 0) | 
| 930 | 		havehge = true; | 
| 931 | #endif | 
| 932 | 	/* sqlblob was changed to plain blob in the Apr2019 release */ | 
| 933 | 	if (strcmp(type, "sqlblob" ) == 0) | 
| 934 | 		strcpy(type, "blob" ); | 
| 935 | 	if ((t = ATOMindex(type)) < 0) { | 
| 936 | 		if ((t = ATOMunknown_find(type)) == 0) { | 
| 937 | 			GDKerror("BBPinit: no space for atom %s" , type); | 
| 938 | 			return -1; | 
| 939 | 		} | 
| 940 | 	} else if (var != (t == TYPE_void || BATatoms[t].atomPut != NULL)) { | 
| 941 | 		GDKerror("BBPinit: inconsistent entry in BBP.dir: tvarsized mismatch for BAT %d\n" , (int) bid); | 
| 942 | 		return -1; | 
| 943 | 	} else if (var && t != 0 ? | 
| 944 | 		   ATOMsize(t) < width || | 
| 945 | 		   (width != 1 && width != 2 && width != 4 | 
| 946 | #if SIZEOF_VAR_T == 8 | 
| 947 | 		    && width != 8 | 
| 948 | #endif | 
| 949 | 			   ) : | 
| 950 | 		   ATOMsize(t) != width) { | 
| 951 | 		GDKerror("BBPinit: inconsistent entry in BBP.dir: tsize mismatch for BAT %d\n" , (int) bid); | 
| 952 | 		return -1; | 
| 953 | 	} | 
| 954 | 	b->ttype = t; | 
| 955 | 	b->twidth = width; | 
| 956 | 	b->tvarsized = var != 0; | 
| 957 | 	b->tshift = ATOMelmshift(width); | 
| 958 | 	assert_shift_width(b->tshift,b->twidth); | 
| 959 | 	b->tnokey[0] = (BUN) nokey0; | 
| 960 | 	b->tnokey[1] = (BUN) nokey1; | 
| 961 | 	b->tsorted = (bit) ((properties & 0x0001) != 0); | 
| 962 | 	b->trevsorted = (bit) ((properties & 0x0080) != 0); | 
| 963 | 	b->tkey = (properties & 0x0100) != 0; | 
| 964 | 	b->tnonil = (properties & 0x0400) != 0; | 
| 965 | 	b->tnil = (properties & 0x0800) != 0; | 
| 966 | 	b->tnosorted = (BUN) nosorted; | 
| 967 | 	b->tnorevsorted = (BUN) norevsorted; | 
| 968 | 	/* (properties & 0x0200) is the old tdense flag */ | 
| 969 | 	b->tseqbase = (properties & 0x0200) == 0 || base >= (uint64_t) oid_nil ? oid_nil : (oid) base; | 
| 970 | 	b->theap.free = (size_t) free; | 
| 971 | 	b->theap.size = (size_t) size; | 
| 972 | 	b->theap.base = NULL; | 
| 973 | 	strconcat_len(b->theap.filename, sizeof(b->theap.filename), | 
| 974 | 		      filename, ".tail" , NULL); | 
| 975 | 	b->theap.storage = (storage_t) storage; | 
| 976 | 	b->theap.copied = false; | 
| 977 | 	b->theap.newstorage = (storage_t) storage; | 
| 978 | 	b->theap.farmid = BBPselectfarm(PERSISTENT, b->ttype, offheap); | 
| 979 | 	b->theap.dirty = false; | 
| 980 | #ifdef GDKLIBRARY_BLOB_SORT | 
| 981 | 	if (bbpversion <= GDKLIBRARY_BLOB_SORT && strcmp(type, "blob" ) == 0) { | 
| 982 | 		b->tsorted = b->trevsorted = false; | 
| 983 | 		b->tnosorted = b->tnorevsorted = 0; | 
| 984 | 		OIDXdestroy(b); | 
| 985 | 	} | 
| 986 | #endif | 
| 987 | 	if (b->theap.free > b->theap.size) { | 
| 988 | 		GDKerror("BBPinit: \"free\" value larger than \"size\" in heap of bat %d\n" , (int) bid); | 
| 989 | 		return -1; | 
| 990 | 	} | 
| 991 | 	return n; | 
| 992 | } | 
| 993 |  | 
| 994 | static int | 
| 995 | vheapinit(BAT *b, const char *buf, int hashash, bat bid, const char *filename) | 
| 996 | { | 
| 997 | 	int n = 0; | 
| 998 | 	uint64_t free, size; | 
| 999 | 	uint16_t storage; | 
| 1000 |  | 
| 1001 | 	if (b->tvarsized && b->ttype != TYPE_void) { | 
| 1002 | 		b->tvheap = GDKzalloc(sizeof(Heap)); | 
| 1003 | 		if (b->tvheap == NULL) { | 
| 1004 | 			GDKerror("BBPinit: cannot allocate memory for heap." ); | 
| 1005 | 			return -1; | 
| 1006 | 		} | 
| 1007 | 		if (sscanf(buf, | 
| 1008 | 			   " %"  SCNu64 " %"  SCNu64 " %"  SCNu16 | 
| 1009 | 			   "%n" , | 
| 1010 | 			   &free, &size, &storage, &n) < 3) { | 
| 1011 | 			GDKerror("BBPinit: invalid format for BBP.dir\n%s" , buf); | 
| 1012 | 			return -1; | 
| 1013 | 		} | 
| 1014 | 		b->tvheap->free = (size_t) free; | 
| 1015 | 		b->tvheap->size = (size_t) size; | 
| 1016 | 		b->tvheap->base = NULL; | 
| 1017 | 		strconcat_len(b->tvheap->filename, sizeof(b->tvheap->filename), | 
| 1018 | 			      filename, ".theap" , NULL); | 
| 1019 | 		b->tvheap->storage = (storage_t) storage; | 
| 1020 | 		b->tvheap->copied = false; | 
| 1021 | 		b->tvheap->hashash = hashash != 0; | 
| 1022 | 		b->tvheap->cleanhash = true; | 
| 1023 | 		b->tvheap->newstorage = (storage_t) storage; | 
| 1024 | 		b->tvheap->dirty = false; | 
| 1025 | 		b->tvheap->parentid = bid; | 
| 1026 | 		b->tvheap->farmid = BBPselectfarm(PERSISTENT, b->ttype, varheap); | 
| 1027 | 		if (b->tvheap->free > b->tvheap->size) { | 
| 1028 | 			GDKerror("BBPinit: \"free\" value larger than \"size\" in var heap of bat %d\n" , (int) bid); | 
| 1029 | 			return -1; | 
| 1030 | 		} | 
| 1031 | 	} | 
| 1032 | 	return n; | 
| 1033 | } | 
| 1034 |  | 
| 1035 | static gdk_return | 
| 1036 | BBPreadEntries(FILE *fp, unsigned bbpversion) | 
| 1037 | { | 
| 1038 | 	bat bid = 0; | 
| 1039 | 	char buf[4096]; | 
| 1040 | 	BAT *bn; | 
| 1041 |  | 
| 1042 | 	/* read the BBP.dir and insert the BATs into the BBP */ | 
| 1043 | 	while (fgets(buf, sizeof(buf), fp) != NULL) { | 
| 1044 | 		uint64_t batid; | 
| 1045 | 		uint16_t status; | 
| 1046 | 		char headname[129]; | 
| 1047 | 		char filename[20]; | 
| 1048 | 		unsigned int properties; | 
| 1049 | 		int nread, n; | 
| 1050 | 		char *s, *options = NULL; | 
| 1051 | 		char logical[1024]; | 
| 1052 | 		uint64_t first = 0, count, capacity, base = 0; | 
| 1053 | 		int Thashash; | 
| 1054 |  | 
| 1055 | 		static_assert(sizeof(BBP_physical(0)) == sizeof(filename), | 
| 1056 | 			"filename should be same size as BBPrec.physical" ); | 
| 1057 | 		if ((s = strchr(buf, '\r')) != NULL) { | 
| 1058 | 			/* convert \r\n into just \n */ | 
| 1059 | 			if (s[1] != '\n') { | 
| 1060 | 				GDKerror("BBPinit: invalid format for BBP.dir" ); | 
| 1061 | 				return GDK_FAIL; | 
| 1062 | 			} | 
| 1063 | 			*s++ = '\n'; | 
| 1064 | 			*s = 0; | 
| 1065 | 		} | 
| 1066 |  | 
| 1067 | 		if (sscanf(buf, | 
| 1068 | 			   "%"  SCNu64 " %"  SCNu16 " %128s %19s %u %"  SCNu64 | 
| 1069 | 			   " %"  SCNu64 " %"  SCNu64 | 
| 1070 | 			   "%n" , | 
| 1071 | 			   &batid, &status, headname, filename, | 
| 1072 | 			   &properties, | 
| 1073 | 			   &count, &capacity, &base, | 
| 1074 | 			   &nread) < 8) { | 
| 1075 | 			GDKerror("BBPinit: invalid format for BBP.dir\n%s" , buf); | 
| 1076 | 			return GDK_FAIL; | 
| 1077 | 		} | 
| 1078 |  | 
| 1079 | 		if (batid >= N_BBPINIT * BBPINIT) { | 
| 1080 | 			GDKerror("BBPinit: bat ID (%"  PRIu64 ") too large to accomodate (max %d)." , batid, N_BBPINIT * BBPINIT - 1); | 
| 1081 | 			return GDK_FAIL; | 
| 1082 | 		} | 
| 1083 |  | 
| 1084 | 		/* convert both / and \ path separators to our own DIR_SEP */ | 
| 1085 | #if DIR_SEP != '/' | 
| 1086 | 		s = filename; | 
| 1087 | 		while ((s = strchr(s, '/')) != NULL) | 
| 1088 | 			*s++ = DIR_SEP; | 
| 1089 | #endif | 
| 1090 | #if DIR_SEP != '\\' | 
| 1091 | 		s = filename; | 
| 1092 | 		while ((s = strchr(s, '\\')) != NULL) | 
| 1093 | 			*s++ = DIR_SEP; | 
| 1094 | #endif | 
| 1095 |  | 
| 1096 | 		if (first != 0) { | 
| 1097 | 			GDKerror("BBPinit: first != 0 (ID = %"  PRIu64 ")." , | 
| 1098 | 				 batid); | 
| 1099 | 			return GDK_FAIL; | 
| 1100 | 		} | 
| 1101 |  | 
| 1102 | 		bid = (bat) batid; | 
| 1103 | 		if (batid >= (uint64_t) ATOMIC_GET(&BBPsize)) { | 
| 1104 | 			ATOMIC_SET(&BBPsize, batid + 1); | 
| 1105 | 			if ((bat) ATOMIC_GET(&BBPsize) >= BBPlimit) | 
| 1106 | 				BBPextend(0, false); | 
| 1107 | 		} | 
| 1108 | 		if (BBP_desc(bid) != NULL) { | 
| 1109 | 			GDKerror("BBPinit: duplicate entry in BBP.dir (ID = "  | 
| 1110 | 				 "%"  PRIu64 ")." , batid); | 
| 1111 | 			return GDK_FAIL; | 
| 1112 | 		} | 
| 1113 | 		bn = GDKzalloc(sizeof(BAT)); | 
| 1114 | 		if (bn == NULL) { | 
| 1115 | 			GDKerror("BBPinit: cannot allocate memory for BAT." ); | 
| 1116 | 			return GDK_FAIL; | 
| 1117 | 		} | 
| 1118 | 		bn->batCacheid = bid; | 
| 1119 | 		if (BATroles(bn, NULL) != GDK_SUCCEED) { | 
| 1120 | 			GDKfree(bn); | 
| 1121 | 			GDKerror("BBPinit: BATroles failed." ); | 
| 1122 | 			return GDK_FAIL; | 
| 1123 | 		} | 
| 1124 | 		bn->batTransient = false; | 
| 1125 | 		bn->batCopiedtodisk = true; | 
| 1126 | 		bn->batRestricted = (properties & 0x06) >> 1; | 
| 1127 | 		bn->batCount = (BUN) count; | 
| 1128 | 		bn->batInserted = bn->batCount; | 
| 1129 | 		bn->batCapacity = (BUN) capacity; | 
| 1130 | 		char name[16]; | 
| 1131 | 		snprintf(name, sizeof(name), "BATlock%d" , bn->batCacheid); /* fits */ | 
| 1132 | 		MT_lock_init(&bn->batIdxLock, name); | 
| 1133 |  | 
| 1134 | 		if (base > (uint64_t) GDK_oid_max) { | 
| 1135 | 			BATdestroy(bn); | 
| 1136 | 			GDKerror("BBPinit: head seqbase out of range (ID = %"  PRIu64 ", seq = %"  PRIu64 ")." , batid, base); | 
| 1137 | 			return GDK_FAIL; | 
| 1138 | 		} | 
| 1139 | 		bn->hseqbase = (oid) base; | 
| 1140 | 		n = heapinit(bn, buf + nread, &Thashash, bbpversion, bid, filename); | 
| 1141 | 		if (n < 0) { | 
| 1142 | 			BATdestroy(bn); | 
| 1143 | 			return GDK_FAIL; | 
| 1144 | 		} | 
| 1145 | 		nread += n; | 
| 1146 | 		n = vheapinit(bn, buf + nread, Thashash, bid, filename); | 
| 1147 | 		if (n < 0) { | 
| 1148 | 			BATdestroy(bn); | 
| 1149 | 			return GDK_FAIL; | 
| 1150 | 		} | 
| 1151 | 		nread += n; | 
| 1152 |  | 
| 1153 | 		if (buf[nread] != '\n' && buf[nread] != ' ') { | 
| 1154 | 			BATdestroy(bn); | 
| 1155 | 			GDKerror("BBPinit: invalid format for BBP.dir\n%s" , buf); | 
| 1156 | 			return GDK_FAIL; | 
| 1157 | 		} | 
| 1158 | 		if (buf[nread] == ' ') | 
| 1159 | 			options = buf + nread + 1; | 
| 1160 |  | 
| 1161 | 		if ((s = strchr(headname, '~')) != NULL && s == headname) { | 
| 1162 | 			int len = snprintf(logical, sizeof(logical), "tmp_%o" , (unsigned) bid); | 
| 1163 | 			if (len == -1 || len >= (int) sizeof(logical)) | 
| 1164 | 				GDKfatal("BBPinit: BBP logical filename directory is too large\n" ); | 
| 1165 | 		} else { | 
| 1166 | 			if (s) | 
| 1167 | 				*s = 0; | 
| 1168 | 			strcpy_len(logical, headname, sizeof(logical)); | 
| 1169 | 		} | 
| 1170 | 		s = logical; | 
| 1171 | 		BBP_logical(bid) = GDKstrdup(s); | 
| 1172 | 		if (BBP_logical(bid) == NULL) { | 
| 1173 | 			BATdestroy(bn); | 
| 1174 | 			return GDK_FAIL; | 
| 1175 | 		} | 
| 1176 | 		/* tailname is ignored */ | 
| 1177 | 		strcpy_len(BBP_physical(bid), filename, sizeof(BBP_physical(bid))); | 
| 1178 | #ifdef STATIC_CODE_ANALYSIS | 
| 1179 | 		/* help coverity */ | 
| 1180 | 		BBP_physical(bid)[sizeof(BBP_physical(bid)) - 1] = 0; | 
| 1181 | #endif | 
| 1182 | 		BBP_options(bid) = NULL; | 
| 1183 | 		if (options) | 
| 1184 | 			BBP_options(bid) = GDKstrdup(options); | 
| 1185 | 		BBP_refs(bid) = 0; | 
| 1186 | 		BBP_lrefs(bid) = 1;	/* any BAT we encounter here is persistent, so has a logical reference */ | 
| 1187 | 		BBP_desc(bid) = bn; | 
| 1188 | 		BBP_status(bid) = BBPEXISTING;	/* do we need other status bits? */ | 
| 1189 | 	} | 
| 1190 | 	return GDK_SUCCEED; | 
| 1191 | } | 
| 1192 |  | 
| 1193 | /* check that the necessary files for all BATs exist and are large | 
| 1194 |  * enough */ | 
| 1195 | static gdk_return | 
| 1196 | BBPcheckbats(void) | 
| 1197 | { | 
| 1198 | 	for (bat bid = 1; bid < (bat) ATOMIC_GET(&BBPsize); bid++) { | 
| 1199 | 		struct stat statb; | 
| 1200 | 		BAT *b; | 
| 1201 | 		char *path; | 
| 1202 |  | 
| 1203 | 		if ((b = BBP_desc(bid)) == NULL) { | 
| 1204 | 			/* not a valid BAT */ | 
| 1205 | 			continue; | 
| 1206 | 		} | 
| 1207 | 		if (b->ttype == TYPE_void) { | 
| 1208 | 			/* no files needed */ | 
| 1209 | 			continue; | 
| 1210 | 		} | 
| 1211 | 		path = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), "tail" ); | 
| 1212 | 		if (path == NULL) | 
| 1213 | 			return GDK_FAIL; | 
| 1214 | 		if (stat(path, &statb) < 0) { | 
| 1215 | 			GDKsyserror("BBPcheckbats: cannot stat file %s\n" , | 
| 1216 | 				    path); | 
| 1217 | 			GDKfree(path); | 
| 1218 | 			return GDK_FAIL; | 
| 1219 | 		} | 
| 1220 | 		if ((size_t) statb.st_size < b->theap.free) { | 
| 1221 | 			GDKerror("BBPcheckbats: file %s too small (expected %zu, actual %zu)\n" , path, b->theap.free, (size_t) statb.st_size); | 
| 1222 | 			GDKfree(path); | 
| 1223 | 			return GDK_FAIL; | 
| 1224 | 		} | 
| 1225 | 		GDKfree(path); | 
| 1226 | 		if (b->tvheap != NULL) { | 
| 1227 | 			path = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), "theap" ); | 
| 1228 | 			if (path == NULL) | 
| 1229 | 				return GDK_FAIL; | 
| 1230 | 			if (stat(path, &statb) < 0) { | 
| 1231 | 				GDKsyserror("BBPcheckbats: cannot stat file %s\n" , | 
| 1232 | 					    path); | 
| 1233 | 				GDKfree(path); | 
| 1234 | 				return GDK_FAIL; | 
| 1235 | 			} | 
| 1236 | 			if ((size_t) statb.st_size < b->tvheap->free) { | 
| 1237 | 				GDKerror("BBPcheckbats: file %s too small (expected %zu, actual %zu)\n" , path, b->tvheap->free, (size_t) statb.st_size); | 
| 1238 | 				GDKfree(path); | 
| 1239 | 				return GDK_FAIL; | 
| 1240 | 			} | 
| 1241 | 			GDKfree(path); | 
| 1242 | 		} | 
| 1243 | 	} | 
| 1244 | 	return GDK_SUCCEED; | 
| 1245 | } | 
| 1246 |  | 
| 1247 | #ifdef HAVE_HGE | 
| 1248 | #define SIZEOF_MAX_INT SIZEOF_HGE | 
| 1249 | #else | 
| 1250 | #define SIZEOF_MAX_INT SIZEOF_LNG | 
| 1251 | #endif | 
| 1252 |  | 
| 1253 | static unsigned | 
| 1254 | (FILE *fp) | 
| 1255 | { | 
| 1256 | 	char buf[BUFSIZ]; | 
| 1257 | 	int sz, ptrsize, oidsize, intsize; | 
| 1258 | 	unsigned bbpversion; | 
| 1259 |  | 
| 1260 | 	if (fgets(buf, sizeof(buf), fp) == NULL) { | 
| 1261 | 		GDKerror("BBPinit: BBP.dir is empty" ); | 
| 1262 | 		return 0; | 
| 1263 | 	} | 
| 1264 | 	if (sscanf(buf, "BBP.dir, GDKversion %u\n" , &bbpversion) != 1) { | 
| 1265 | 		GDKerror("BBPinit: old BBP without version number" ); | 
| 1266 | 		GDKerror("dump the database using a compatible version," ); | 
| 1267 | 		GDKerror("then restore into new database using this version.\n" ); | 
| 1268 | 		return 0; | 
| 1269 | 	} | 
| 1270 | 	if (bbpversion != GDKLIBRARY && | 
| 1271 | 	    bbpversion != GDKLIBRARY_OLDDATE && | 
| 1272 | 	    bbpversion != GDKLIBRARY_BLOB_SORT && | 
| 1273 | 	    bbpversion != GDKLIBRARY_NIL_NAN && | 
| 1274 | 	    bbpversion != GDKLIBRARY_TALIGN) { | 
| 1275 | 		GDKerror("BBPinit: incompatible BBP version: expected 0%o, got 0%o.\n"  | 
| 1276 | 			 "This database was probably created by %s version of MonetDB." , | 
| 1277 | 			 GDKLIBRARY, bbpversion, | 
| 1278 | 			 bbpversion > GDKLIBRARY ? "a newer"  : "a too old" ); | 
| 1279 | 		return 0; | 
| 1280 | 	} | 
| 1281 | 	if (fgets(buf, sizeof(buf), fp) == NULL) { | 
| 1282 | 		GDKerror("BBPinit: short BBP" ); | 
| 1283 | 		return 0; | 
| 1284 | 	} | 
| 1285 | 	if (sscanf(buf, "%d %d %d" , &ptrsize, &oidsize, &intsize) != 3) { | 
| 1286 | 		GDKerror("BBPinit: BBP.dir has incompatible format: pointer, OID, and max. integer sizes are missing" ); | 
| 1287 | 		return 0; | 
| 1288 | 	} | 
| 1289 | 	if (ptrsize != SIZEOF_SIZE_T || oidsize != SIZEOF_OID) { | 
| 1290 | 		GDKerror("BBPinit: database created with incompatible server:\n"  | 
| 1291 | 			 "expected pointer size %d, got %d, expected OID size %d, got %d." , | 
| 1292 | 			 SIZEOF_SIZE_T, ptrsize, SIZEOF_OID, oidsize); | 
| 1293 | 		return 0; | 
| 1294 | 	} | 
| 1295 | 	if (intsize > SIZEOF_MAX_INT) { | 
| 1296 | 		GDKerror("BBPinit: database created with incompatible server:\n"  | 
| 1297 | 			 "expected max. integer size %d, got %d." , | 
| 1298 | 			 SIZEOF_MAX_INT, intsize); | 
| 1299 | 		return 0; | 
| 1300 | 	} | 
| 1301 | 	if (fgets(buf, sizeof(buf), fp) == NULL) { | 
| 1302 | 		GDKerror("BBPinit: short BBP" ); | 
| 1303 | 		return 0; | 
| 1304 | 	} | 
| 1305 | #ifdef GDKLIBRARY_TALIGN | 
| 1306 | 	char *s; | 
| 1307 | 	if ((s = strstr(buf, "BBPsize" )) != NULL) { | 
| 1308 | 		if (sscanf(s, "BBPsize=%d" , &sz) != 1) { | 
| 1309 | 			GDKerror("BBPinit: no BBPsize value found\n" ); | 
| 1310 | 			return 0; | 
| 1311 | 		} | 
| 1312 | 		sz = (int) (sz * BATMARGIN); | 
| 1313 | 		if (sz > (bat) ATOMIC_GET(&BBPsize)) | 
| 1314 | 			ATOMIC_SET(&BBPsize, sz); | 
| 1315 | 	} | 
| 1316 | #else | 
| 1317 | 	if (sscanf(buf, "BBPsize=%d" , &sz) != 1) { | 
| 1318 | 		GDKerror("BBPinit: no BBPsize value found\n" ); | 
| 1319 | 		return 0; | 
| 1320 | 	} | 
| 1321 | 	sz = (int) (sz * BATMARGIN); | 
| 1322 | 	if (sz > (bat) ATOMIC_GET(&BBPsize)) | 
| 1323 | 		ATOMIC_SET(&BBPsize, sz); | 
| 1324 | #endif | 
| 1325 | 	assert(bbpversion != 0); | 
| 1326 | 	return bbpversion; | 
| 1327 | } | 
| 1328 |  | 
| 1329 | bool | 
| 1330 | GDKinmemory(void) | 
| 1331 | { | 
| 1332 | 	return BBPfarms[0].dirname == NULL; | 
| 1333 | } | 
| 1334 |  | 
| 1335 | /* all errors are fatal */ | 
| 1336 | gdk_return | 
| 1337 | BBPaddfarm(const char *dirname, int rolemask) | 
| 1338 | { | 
| 1339 | 	struct stat st; | 
| 1340 | 	int i; | 
| 1341 |  | 
| 1342 | 	if (dirname == NULL) { | 
| 1343 | 		assert(BBPfarms[0].dirname == NULL); | 
| 1344 | 		assert(rolemask & 1); | 
| 1345 | 		assert(BBPfarms[0].roles == 0); | 
| 1346 | 		BBPfarms[0].roles = rolemask; | 
| 1347 | 		return GDK_SUCCEED; | 
| 1348 | 	} | 
| 1349 | 	if (strchr(dirname, '\n') != NULL) { | 
| 1350 | 		GDKerror("BBPaddfarm: no newline allowed in directory name\n" ); | 
| 1351 | 		return GDK_FAIL; | 
| 1352 | 	} | 
| 1353 | 	if (rolemask == 0 || (rolemask & 1 && BBPfarms[0].dirname != NULL)) { | 
| 1354 | 		GDKerror("BBPaddfarm: bad rolemask\n" ); | 
| 1355 | 		return GDK_FAIL; | 
| 1356 | 	} | 
| 1357 | 	if (mkdir(dirname, MONETDB_DIRMODE) < 0) { | 
| 1358 | 		if (errno == EEXIST) { | 
| 1359 | 			if (stat(dirname, &st) == -1 || !S_ISDIR(st.st_mode)) { | 
| 1360 | 				GDKerror("BBPaddfarm: %s: not a directory\n" , dirname); | 
| 1361 | 				return GDK_FAIL; | 
| 1362 | 			} | 
| 1363 | 		} else { | 
| 1364 | 			GDKerror("BBPaddfarm: %s: cannot create directory\n" , dirname); | 
| 1365 | 			return GDK_FAIL; | 
| 1366 | 		} | 
| 1367 | 	} | 
| 1368 | 	for (i = 0; i < MAXFARMS; i++) { | 
| 1369 | 		if (BBPfarms[i].dirname == NULL) { | 
| 1370 | 			BBPfarms[i].dirname = GDKstrdup(dirname); | 
| 1371 | 			if (BBPfarms[i].dirname == NULL) | 
| 1372 | 				return GDK_FAIL; | 
| 1373 | 			BBPfarms[i].roles = rolemask; | 
| 1374 | 			if ((rolemask & 1) == 0) { | 
| 1375 | 				char *bbpdir; | 
| 1376 | 				int j; | 
| 1377 |  | 
| 1378 | 				for (j = 0; j < i; j++) | 
| 1379 | 					if (strcmp(BBPfarms[i].dirname, | 
| 1380 | 						   BBPfarms[j].dirname) == 0) | 
| 1381 | 						return GDK_SUCCEED; | 
| 1382 | 				/* if an extra farm, make sure we | 
| 1383 | 				 * don't find a BBP.dir there that | 
| 1384 | 				 * might belong to an existing | 
| 1385 | 				 * database */ | 
| 1386 | 				bbpdir = GDKfilepath(i, BATDIR, "BBP" , "dir" ); | 
| 1387 | 				if (bbpdir == NULL) { | 
| 1388 | 					GDKerror("BBPaddfarm: malloc failed\n" ); | 
| 1389 | 					return GDK_FAIL; | 
| 1390 | 				} | 
| 1391 | 				if (stat(bbpdir, &st) != -1 || errno != ENOENT) { | 
| 1392 | 					GDKfree(bbpdir); | 
| 1393 | 					GDKerror("BBPaddfarm: %s is a database\n" , dirname); | 
| 1394 | 					return GDK_FAIL; | 
| 1395 | 				} | 
| 1396 | 				GDKfree(bbpdir); | 
| 1397 | 				bbpdir = GDKfilepath(i, BAKDIR, "BBP" , "dir" ); | 
| 1398 | 				if (bbpdir == NULL) { | 
| 1399 | 					GDKerror("BBPaddfarm: malloc failed\n" ); | 
| 1400 | 					return GDK_FAIL; | 
| 1401 | 				} | 
| 1402 | 				if (stat(bbpdir, &st) != -1 || errno != ENOENT) { | 
| 1403 | 					GDKfree(bbpdir); | 
| 1404 | 					GDKerror("BBPaddfarm: %s is a database\n" , dirname); | 
| 1405 | 					return GDK_FAIL; | 
| 1406 | 				} | 
| 1407 | 				GDKfree(bbpdir); | 
| 1408 | 			} | 
| 1409 | 			return GDK_SUCCEED; | 
| 1410 | 		} | 
| 1411 | 	} | 
| 1412 | 	GDKerror("BBPaddfarm: too many farms\n" ); | 
| 1413 | 	return GDK_FAIL; | 
| 1414 | } | 
| 1415 |  | 
| 1416 | gdk_return | 
| 1417 | BBPinit(void) | 
| 1418 | { | 
| 1419 | 	FILE *fp = NULL; | 
| 1420 | 	struct stat st; | 
| 1421 | 	unsigned bbpversion = 0; | 
| 1422 | 	int i; | 
| 1423 |  | 
| 1424 | 	if (!GDKinmemory()) { | 
| 1425 | 		str bbpdirstr, backupbbpdirstr; | 
| 1426 |  | 
| 1427 | 		if (!(bbpdirstr = GDKfilepath(0, BATDIR, "BBP" , "dir" ))) { | 
| 1428 | 			GDKerror("BBPinit: GDKmalloc failed\n" ); | 
| 1429 | 			return GDK_FAIL; | 
| 1430 | 		} | 
| 1431 |  | 
| 1432 | 		if (!(backupbbpdirstr = GDKfilepath(0, BAKDIR, "BBP" , "dir" ))) { | 
| 1433 | 			GDKfree(bbpdirstr); | 
| 1434 | 			GDKerror("BBPinit: GDKmalloc failed\n" ); | 
| 1435 | 			return GDK_FAIL; | 
| 1436 | 		} | 
| 1437 |  | 
| 1438 | 		if (GDKremovedir(0, TEMPDIR) != GDK_SUCCEED) { | 
| 1439 | 			GDKfree(bbpdirstr); | 
| 1440 | 			GDKfree(backupbbpdirstr); | 
| 1441 | 			GDKerror("BBPinit: cannot remove directory %s\n" , TEMPDIR); | 
| 1442 | 			return GDK_FAIL; | 
| 1443 | 		} | 
| 1444 |  | 
| 1445 | 		if (GDKremovedir(0, DELDIR) != GDK_SUCCEED) { | 
| 1446 | 			GDKfree(bbpdirstr); | 
| 1447 | 			GDKfree(backupbbpdirstr); | 
| 1448 | 			GDKerror("BBPinit: cannot remove directory %s\n" , DELDIR); | 
| 1449 | 			return GDK_FAIL; | 
| 1450 | 		} | 
| 1451 |  | 
| 1452 | 		/* first move everything from SUBDIR to BAKDIR (its parent) */ | 
| 1453 | 		if (BBPrecover_subdir() != GDK_SUCCEED) { | 
| 1454 | 			GDKfree(bbpdirstr); | 
| 1455 | 			GDKfree(backupbbpdirstr); | 
| 1456 | 			GDKerror("BBPinit: cannot properly recover_subdir process %s. Please check whether your disk is full or write-protected" , SUBDIR); | 
| 1457 | 			return GDK_FAIL; | 
| 1458 | 		} | 
| 1459 |  | 
| 1460 | 		/* try to obtain a BBP.dir from bakdir */ | 
| 1461 | 		if (stat(backupbbpdirstr, &st) == 0) { | 
| 1462 | 			/* backup exists; *must* use it */ | 
| 1463 | 			if (recover_dir(0, stat(bbpdirstr, &st) == 0) != GDK_SUCCEED) | 
| 1464 | 				goto bailout; | 
| 1465 | 			if ((fp = GDKfilelocate(0, "BBP" , "r" , "dir" )) == NULL) { | 
| 1466 | 				GDKfree(bbpdirstr); | 
| 1467 | 				GDKfree(backupbbpdirstr); | 
| 1468 | 				GDKerror("BBPinit: cannot open recovered BBP.dir." ); | 
| 1469 | 				return GDK_FAIL; | 
| 1470 | 			} | 
| 1471 | 		} else if ((fp = GDKfilelocate(0, "BBP" , "r" , "dir" )) == NULL) { | 
| 1472 | 			/* there was no BBP.dir either. Panic! try to use a | 
| 1473 | 			 * BBP.bak */ | 
| 1474 | 			if (stat(backupbbpdirstr, &st) < 0) { | 
| 1475 | 				/* no BBP.bak (nor BBP.dir or BACKUP/BBP.dir): | 
| 1476 | 				 * create a new one */ | 
| 1477 | 				IODEBUG fprintf(stderr, "#BBPdir: initializing BBP.\n" );	/* BBPdir instead of BBPinit for backward compatibility of error messages */ | 
| 1478 | 				if (BBPdir(0, NULL) != GDK_SUCCEED) | 
| 1479 | 					goto bailout; | 
| 1480 | 			} else if (GDKmove(0, BATDIR, "BBP" , "bak" , BATDIR, "BBP" , "dir" ) == GDK_SUCCEED) | 
| 1481 | 				IODEBUG fprintf(stderr, "#BBPinit: reverting to dir saved in BBP.bak.\n" ); | 
| 1482 |  | 
| 1483 | 			if ((fp = GDKfilelocate(0, "BBP" , "r" , "dir" )) == NULL) | 
| 1484 | 				goto bailout; | 
| 1485 | 		} | 
| 1486 | 		assert(fp != NULL); | 
| 1487 | 		GDKfree(bbpdirstr); | 
| 1488 | 		GDKfree(backupbbpdirstr); | 
| 1489 | 	} | 
| 1490 |  | 
| 1491 | 	/* scan the BBP.dir to obtain current size */ | 
| 1492 | 	BBPlimit = 0; | 
| 1493 | 	memset(BBP, 0, sizeof(BBP)); | 
| 1494 | 	ATOMIC_SET(&BBPsize, 1); | 
| 1495 |  | 
| 1496 | 	if (GDKinmemory()) { | 
| 1497 | 		bbpversion = GDKLIBRARY; | 
| 1498 | 	} else { | 
| 1499 | 		bbpversion = BBPheader(fp); | 
| 1500 | 		if (bbpversion == 0) | 
| 1501 | 			return GDK_FAIL; | 
| 1502 | 	} | 
| 1503 |  | 
| 1504 | 	BBPextend(0, false);		/* allocate BBP records */ | 
| 1505 |  | 
| 1506 | 	if (!GDKinmemory()) { | 
| 1507 | 		ATOMIC_SET(&BBPsize, 1); | 
| 1508 | 		if (BBPreadEntries(fp, bbpversion) != GDK_SUCCEED) | 
| 1509 | 			return GDK_FAIL; | 
| 1510 | 		fclose(fp); | 
| 1511 | 	} | 
| 1512 |  | 
| 1513 | 	if (BBPinithash(0) != GDK_SUCCEED) { | 
| 1514 | 		GDKerror("BBPinit: BBPinithash failed" ); | 
| 1515 | 		return GDK_FAIL; | 
| 1516 | 	} | 
| 1517 |  | 
| 1518 | 	/* will call BBPrecover if needed */ | 
| 1519 | 	if (!GDKinmemory() && BBPprepare(false) != GDK_SUCCEED) { | 
| 1520 | 		GDKerror("BBPinit: cannot properly prepare process %s. Please check whether your disk is full or write-protected" , BAKDIR); | 
| 1521 | 		return GDK_FAIL; | 
| 1522 | 	} | 
| 1523 |  | 
| 1524 | 	if (BBPcheckbats() != GDK_SUCCEED) | 
| 1525 | 		return GDK_FAIL; | 
| 1526 |  | 
| 1527 | 	/* cleanup any leftovers (must be done after BBPrecover) */ | 
| 1528 | 	for (i = 0; i < MAXFARMS && BBPfarms[i].dirname != NULL; i++) { | 
| 1529 | 		int j; | 
| 1530 | 		for (j = 0; j < i; j++) { | 
| 1531 | 			/* don't clean a directory twice */ | 
| 1532 | 			if (BBPfarms[j].dirname && | 
| 1533 | 			    strcmp(BBPfarms[i].dirname, | 
| 1534 | 				   BBPfarms[j].dirname) == 0) | 
| 1535 | 				break; | 
| 1536 | 		} | 
| 1537 | 		if (j == i) { | 
| 1538 | 			char *d = GDKfilepath(i, NULL, BATDIR, NULL); | 
| 1539 | 			if (d == NULL) { | 
| 1540 | 				GDKerror("BBPinit: malloc failed\n" ); | 
| 1541 | 				return GDK_FAIL; | 
| 1542 | 			} | 
| 1543 | 			BBPdiskscan(d, strlen(d) - strlen(BATDIR)); | 
| 1544 | 			GDKfree(d); | 
| 1545 | 		} | 
| 1546 | 	} | 
| 1547 |  | 
| 1548 | #ifdef GDKLIBRARY_NIL_NAN | 
| 1549 | 	if (bbpversion <= GDKLIBRARY_NIL_NAN) | 
| 1550 | 		if (fixfloatbats() != GDK_SUCCEED) | 
| 1551 | 			return GDK_FAIL; | 
| 1552 | #endif | 
| 1553 | #ifdef GDKLIBRARY_OLDDATE | 
| 1554 | 	if (bbpversion <= GDKLIBRARY_OLDDATE) | 
| 1555 | 		if (fixdatebats() != GDK_SUCCEED) | 
| 1556 | 			return GDK_FAIL; | 
| 1557 | #endif | 
| 1558 | 	if (bbpversion < GDKLIBRARY) | 
| 1559 | 		TMcommit(); | 
| 1560 | 	return GDK_SUCCEED; | 
| 1561 |  | 
| 1562 |       bailout: | 
| 1563 | 	/* now it is time for real panic */ | 
| 1564 | 	GDKerror("BBPinit: could not write %s%cBBP.dir. Please check whether your disk is full or write-protected" , BATDIR, DIR_SEP); | 
| 1565 | 	return GDK_FAIL; | 
| 1566 | } | 
| 1567 |  | 
| 1568 | /* | 
| 1569 |  * During the exit phase all non-persistent BATs are removed.  Upon | 
| 1570 |  * exit the status of the BBP tables is saved on disk.  This function | 
| 1571 |  * is called once and during the shutdown of the server. Since | 
| 1572 |  * shutdown may be issued from any thread (dangerous) it may lead to | 
| 1573 |  * interference in a parallel session. | 
| 1574 |  */ | 
| 1575 |  | 
| 1576 | static int backup_files = 0, backup_dir = 0, backup_subdir = 0; | 
| 1577 |  | 
| 1578 | void | 
| 1579 | BBPexit(void) | 
| 1580 | { | 
| 1581 | 	bat i; | 
| 1582 | 	bool skipped; | 
| 1583 |  | 
| 1584 | 	BBPlock();	/* stop all threads ever touching more descriptors */ | 
| 1585 |  | 
| 1586 | 	/* free all memory (just for leak-checking in Purify) */ | 
| 1587 | 	do { | 
| 1588 | 		skipped = false; | 
| 1589 | 		for (i = 0; i < (bat) ATOMIC_GET(&BBPsize); i++) { | 
| 1590 | 			if (BBPvalid(i)) { | 
| 1591 | 				BAT *b = BBP_desc(i); | 
| 1592 |  | 
| 1593 | 				if (b) { | 
| 1594 | 					if (b->batSharecnt > 0) { | 
| 1595 | 						skipped = true; | 
| 1596 | 						continue; | 
| 1597 | 					} | 
| 1598 | 					if (isVIEW(b)) { | 
| 1599 | 						/* "manually" | 
| 1600 | 						 * decrement parent | 
| 1601 | 						 * references, since | 
| 1602 | 						 * VIEWdestroy doesn't | 
| 1603 | 						 * (and can't here due | 
| 1604 | 						 * to locks) do it */ | 
| 1605 | 						bat tp = VIEWtparent(b); | 
| 1606 | 						bat vtp = VIEWvtparent(b); | 
| 1607 | 						if (tp) { | 
| 1608 | 							BBP_desc(tp)->batSharecnt--; | 
| 1609 | 							--BBP_lrefs(tp); | 
| 1610 | 						} | 
| 1611 | 						if (vtp) { | 
| 1612 | 							BBP_desc(vtp)->batSharecnt--; | 
| 1613 | 							--BBP_lrefs(vtp); | 
| 1614 | 						} | 
| 1615 | 						VIEWdestroy(b); | 
| 1616 | 					} else { | 
| 1617 | 						BATfree(b); | 
| 1618 | 					} | 
| 1619 | 				} | 
| 1620 | 				BBPuncacheit(i, true); | 
| 1621 | 				if (BBP_logical(i) != BBP_bak(i)) | 
| 1622 | 					GDKfree(BBP_logical(i)); | 
| 1623 | 				BBP_logical(i) = NULL; | 
| 1624 | 			} | 
| 1625 | 		} | 
| 1626 | 	} while (skipped); | 
| 1627 | 	GDKfree(BBP_hash); | 
| 1628 | 	BBP_hash = 0; | 
| 1629 | 	// these need to be NULL, otherwise no new ones get created | 
| 1630 | 	backup_files = 0; | 
| 1631 | 	backup_dir = 0; | 
| 1632 | 	backup_subdir = 0; | 
| 1633 |  | 
| 1634 | } | 
| 1635 |  | 
| 1636 | /* | 
| 1637 |  * The routine BBPdir creates the BAT pool dictionary file.  It | 
| 1638 |  * includes some information about the current state of affair in the | 
| 1639 |  * pool.  The location in the buffer pool is saved for later use as | 
| 1640 |  * well.  This is merely done for ease of debugging and of no | 
| 1641 |  * importance to front-ends.  The tail of non-used entries is | 
| 1642 |  * reclaimed as well. | 
| 1643 |  */ | 
| 1644 | static inline int | 
| 1645 | heap_entry(FILE *fp, BAT *b) | 
| 1646 | { | 
| 1647 | 	return fprintf(fp, " %s %d %d %d "  BUNFMT " "  BUNFMT " "  BUNFMT " "  | 
| 1648 | 		       BUNFMT " "  OIDFMT " %zu %zu %d" , | 
| 1649 | 		       b->ttype >= 0 ? BATatoms[b->ttype].name : ATOMunknown_name(b->ttype), | 
| 1650 | 		       b->twidth, | 
| 1651 | 		       b->tvarsized | (b->tvheap ? b->tvheap->hashash << 1 : 0), | 
| 1652 | 		       (unsigned short) b->tsorted | | 
| 1653 | 			   ((unsigned short) b->trevsorted << 7) | | 
| 1654 | 			   (((unsigned short) b->tkey & 0x01) << 8) | | 
| 1655 | 		           ((unsigned short) BATtdense(b) << 9) | | 
| 1656 | 			   ((unsigned short) b->tnonil << 10) | | 
| 1657 | 			   ((unsigned short) b->tnil << 11), | 
| 1658 | 		       b->tnokey[0], | 
| 1659 | 		       b->tnokey[1], | 
| 1660 | 		       b->tnosorted, | 
| 1661 | 		       b->tnorevsorted, | 
| 1662 | 		       b->tseqbase, | 
| 1663 | 		       b->theap.free, | 
| 1664 | 		       b->theap.size, | 
| 1665 | 		       (int) b->theap.newstorage); | 
| 1666 | } | 
| 1667 |  | 
| 1668 | static inline int | 
| 1669 | vheap_entry(FILE *fp, Heap *h) | 
| 1670 | { | 
| 1671 | 	if (h == NULL) | 
| 1672 | 		return 0; | 
| 1673 | 	return fprintf(fp, " %zu %zu %d" , | 
| 1674 | 		       h->free, h->size, (int) h->newstorage); | 
| 1675 | } | 
| 1676 |  | 
| 1677 | static gdk_return | 
| 1678 | new_bbpentry(FILE *fp, bat i, const char *prefix) | 
| 1679 | { | 
| 1680 | #ifndef NDEBUG | 
| 1681 | 	assert(i > 0); | 
| 1682 | 	assert(i < (bat) ATOMIC_GET(&BBPsize)); | 
| 1683 | 	assert(BBP_desc(i)); | 
| 1684 | 	assert(BBP_desc(i)->batCacheid == i); | 
| 1685 | 	assert(BBP_desc(i)->batRole == PERSISTENT); | 
| 1686 | 	assert(0 <= BBP_desc(i)->theap.farmid && BBP_desc(i)->theap.farmid < MAXFARMS); | 
| 1687 | 	assert(BBPfarms[BBP_desc(i)->theap.farmid].roles & (1 << PERSISTENT)); | 
| 1688 | 	if (BBP_desc(i)->tvheap) { | 
| 1689 | 		assert(0 <= BBP_desc(i)->tvheap->farmid && BBP_desc(i)->tvheap->farmid < MAXFARMS); | 
| 1690 | 		assert(BBPfarms[BBP_desc(i)->tvheap->farmid].roles & (1 << PERSISTENT)); | 
| 1691 | 	} | 
| 1692 | #endif | 
| 1693 |  | 
| 1694 | 	if (fprintf(fp, "%s%zd %u %s %s %d "  BUNFMT " "  | 
| 1695 | 		    BUNFMT " "  OIDFMT, prefix, | 
| 1696 | 		    /* BAT info */ | 
| 1697 | 		    (ssize_t) i, | 
| 1698 | 		    BBP_status(i) & BBPPERSISTENT, | 
| 1699 | 		    BBP_logical(i), | 
| 1700 | 		    BBP_physical(i), | 
| 1701 | 		    BBP_desc(i)->batRestricted << 1, | 
| 1702 | 		    BBP_desc(i)->batCount, | 
| 1703 | 		    BBP_desc(i)->batCapacity, | 
| 1704 | 		    BBP_desc(i)->hseqbase) < 0 || | 
| 1705 | 	    heap_entry(fp, BBP_desc(i)) < 0 || | 
| 1706 | 	    vheap_entry(fp, BBP_desc(i)->tvheap) < 0 || | 
| 1707 | 	    (BBP_options(i) && | 
| 1708 | 	     fprintf(fp, " %s" , BBP_options(i)) < 0) || | 
| 1709 | 	    fprintf(fp, "\n" ) < 0) { | 
| 1710 | 		GDKsyserror("new_bbpentry: Writing BBP.dir entry failed\n" ); | 
| 1711 | 		return GDK_FAIL; | 
| 1712 | 	} | 
| 1713 |  | 
| 1714 | 	return GDK_SUCCEED; | 
| 1715 | } | 
| 1716 |  | 
| 1717 | static gdk_return | 
| 1718 | (FILE *f, int n) | 
| 1719 | { | 
| 1720 | 	if (fprintf(f, "BBP.dir, GDKversion %u\n%d %d %d\nBBPsize=%d\n" , | 
| 1721 | 		    GDKLIBRARY, SIZEOF_SIZE_T, SIZEOF_OID, | 
| 1722 | #ifdef HAVE_HGE | 
| 1723 | 		    havehge ? SIZEOF_HGE : | 
| 1724 | #endif | 
| 1725 | 		    SIZEOF_LNG, n) < 0 || | 
| 1726 | 	    ferror(f)) { | 
| 1727 | 		GDKsyserror("BBPdir_header: Writing BBP.dir header failed\n" ); | 
| 1728 | 		return GDK_FAIL; | 
| 1729 | 	} | 
| 1730 | 	return GDK_SUCCEED; | 
| 1731 | } | 
| 1732 |  | 
| 1733 | static gdk_return | 
| 1734 | BBPdir_subcommit(int cnt, bat *subcommit) | 
| 1735 | { | 
| 1736 | 	FILE *obbpf, *nbbpf; | 
| 1737 | 	bat j = 1; | 
| 1738 | 	char buf[3000]; | 
| 1739 | 	int n; | 
| 1740 |  | 
| 1741 | #ifndef NDEBUG | 
| 1742 | 	assert(subcommit != NULL); | 
| 1743 | 	for (n = 2; n < cnt; n++) | 
| 1744 | 		assert(subcommit[n - 1] < subcommit[n]); | 
| 1745 | #endif | 
| 1746 |  | 
| 1747 | 	if ((nbbpf = GDKfilelocate(0, "BBP" , "w" , "dir" )) == NULL) | 
| 1748 | 		return GDK_FAIL; | 
| 1749 |  | 
| 1750 | 	n = (bat) ATOMIC_GET(&BBPsize); | 
| 1751 |  | 
| 1752 | 	/* we need to copy the backup BBP.dir to the new, but | 
| 1753 | 	 * replacing the entries for the subcommitted bats */ | 
| 1754 | 	if ((obbpf = GDKfileopen(0, SUBDIR, "BBP" , "dir" , "r" )) == NULL && | 
| 1755 | 	    (obbpf = GDKfileopen(0, BAKDIR, "BBP" , "dir" , "r" )) == NULL) { | 
| 1756 | 		GDKerror("BBPdir: subcommit attempted without backup BBP.dir." ); | 
| 1757 | 		return GDK_FAIL; | 
| 1758 | 	} | 
| 1759 | 	/* read first three lines */ | 
| 1760 | 	if (fgets(buf, sizeof(buf), obbpf) == NULL || /* BBP.dir, GDKversion %d */ | 
| 1761 | 	    fgets(buf, sizeof(buf), obbpf) == NULL || /* SIZEOF_SIZE_T SIZEOF_OID SIZEOF_MAX_INT */ | 
| 1762 | 	    fgets(buf, sizeof(buf), obbpf) == NULL) { /* BBPsize=%d */ | 
| 1763 | 		GDKerror("BBPdir: subcommit attempted with invalid backup BBP.dir." ); | 
| 1764 | 		return GDK_FAIL; | 
| 1765 | 	} | 
| 1766 | 	/* third line contains BBPsize */ | 
| 1767 | 	sscanf(buf, "BBPsize=%d" , &n); | 
| 1768 | 	if (n < (bat) ATOMIC_GET(&BBPsize)) | 
| 1769 | 		n = (bat) ATOMIC_GET(&BBPsize); | 
| 1770 |  | 
| 1771 | 	IODEBUG fprintf(stderr, "#BBPdir: writing BBP.dir (%d bats).\n" , n); | 
| 1772 |  | 
| 1773 | 	if (BBPdir_header(nbbpf, n) != GDK_SUCCEED) { | 
| 1774 | 		goto bailout; | 
| 1775 | 	} | 
| 1776 | 	n = 0; | 
| 1777 | 	for (;;) { | 
| 1778 | 		/* but for subcommits, all except the bats in the list | 
| 1779 | 		 * retain their existing mode */ | 
| 1780 | 		if (n == 0 && obbpf != NULL) { | 
| 1781 | 			if (fgets(buf, sizeof(buf), obbpf) == NULL) { | 
| 1782 | 				fclose(obbpf); | 
| 1783 | 				obbpf = NULL; | 
| 1784 | 			} else if (sscanf(buf, "%d" , &n) != 1 || n <= 0) { | 
| 1785 | 				GDKerror("BBPdir: subcommit attempted with invalid backup BBP.dir." ); | 
| 1786 | 				return GDK_FAIL; | 
| 1787 | 			} | 
| 1788 | 			/* at this point, obbpf == NULL, or n > 0 */ | 
| 1789 | 		} | 
| 1790 | 		if (j == cnt && n == 0) { | 
| 1791 | 			assert(obbpf == NULL); | 
| 1792 | 			break; | 
| 1793 | 		} | 
| 1794 | 		if (j < cnt && (n == 0 || subcommit[j] <= n || obbpf == NULL)) { | 
| 1795 | 			bat i = subcommit[j]; | 
| 1796 | 			/* BBP.dir consists of all persistent bats only */ | 
| 1797 | 			if (BBP_status(i) & BBPPERSISTENT) { | 
| 1798 | 				if (new_bbpentry(nbbpf, i, "" ) != GDK_SUCCEED) { | 
| 1799 | 					goto bailout; | 
| 1800 | 				} | 
| 1801 | 				IODEBUG new_bbpentry(stderr, i, "#" ); | 
| 1802 | 			} | 
| 1803 | 			if (i == n) | 
| 1804 | 				n = 0;	/* read new entry (i.e. skip this one from old BBP.dir */ | 
| 1805 | 			do | 
| 1806 | 				/* go to next, skipping duplicates */ | 
| 1807 | 				j++; | 
| 1808 | 			while (j < cnt && subcommit[j] == i); | 
| 1809 | 		} else { | 
| 1810 | 			if (fprintf(nbbpf, "%s" , buf) < 0) { | 
| 1811 | 				GDKsyserror("BBPdir_subcommit: Copying BBP.dir entry failed\n" ); | 
| 1812 | 				goto bailout; | 
| 1813 | 			} | 
| 1814 | 			IODEBUG fprintf(stderr, "#%s" , buf); | 
| 1815 | 			n = 0; | 
| 1816 | 		} | 
| 1817 | 	} | 
| 1818 |  | 
| 1819 | 	if (fflush(nbbpf) == EOF || | 
| 1820 | 	    (!(GDKdebug & NOSYNCMASK) | 
| 1821 | #if defined(NATIVE_WIN32) | 
| 1822 | 	     && _commit(_fileno(nbbpf)) < 0 | 
| 1823 | #elif defined(HAVE_FDATASYNC) | 
| 1824 | 	     && fdatasync(fileno(nbbpf)) < 0 | 
| 1825 | #elif defined(HAVE_FSYNC) | 
| 1826 | 	     && fsync(fileno(nbbpf)) < 0 | 
| 1827 | #endif | 
| 1828 | 		    )) { | 
| 1829 | 		GDKsyserror("BBPdir_subcommit: Syncing BBP.dir file failed\n" ); | 
| 1830 | 		goto bailout; | 
| 1831 | 	} | 
| 1832 | 	if (fclose(nbbpf) == EOF) { | 
| 1833 | 		GDKsyserror("BBPdir_subcommit: Closing BBP.dir file failed\n" ); | 
| 1834 | 		goto bailout; | 
| 1835 | 	} | 
| 1836 |  | 
| 1837 | 	IODEBUG fprintf(stderr, "#BBPdir end\n" ); | 
| 1838 |  | 
| 1839 | 	return GDK_SUCCEED; | 
| 1840 |  | 
| 1841 |       bailout: | 
| 1842 | 	if (obbpf != NULL) | 
| 1843 | 		fclose(obbpf); | 
| 1844 | 	if (nbbpf != NULL) | 
| 1845 | 		fclose(nbbpf); | 
| 1846 | 	return GDK_FAIL; | 
| 1847 | } | 
| 1848 |  | 
| 1849 | gdk_return | 
| 1850 | BBPdir(int cnt, bat *subcommit) | 
| 1851 | { | 
| 1852 | 	FILE *fp; | 
| 1853 | 	bat i; | 
| 1854 |  | 
| 1855 | 	if (subcommit) | 
| 1856 | 		return BBPdir_subcommit(cnt, subcommit); | 
| 1857 |  | 
| 1858 | 	IODEBUG fprintf(stderr, "#BBPdir: writing BBP.dir (%d bats).\n" , (int) (bat) ATOMIC_GET(&BBPsize)); | 
| 1859 | 	if ((fp = GDKfilelocate(0, "BBP" , "w" , "dir" )) == NULL) { | 
| 1860 | 		goto bailout; | 
| 1861 | 	} | 
| 1862 |  | 
| 1863 | 	if (BBPdir_header(fp, (bat) ATOMIC_GET(&BBPsize)) != GDK_SUCCEED) { | 
| 1864 | 		goto bailout; | 
| 1865 | 	} | 
| 1866 |  | 
| 1867 | 	for (i = 1; i < (bat) ATOMIC_GET(&BBPsize); i++) { | 
| 1868 | 		/* write the entry | 
| 1869 | 		 * BBP.dir consists of all persistent bats */ | 
| 1870 | 		if (BBP_status(i) & BBPPERSISTENT) { | 
| 1871 | 			if (new_bbpentry(fp, i, "" ) != GDK_SUCCEED) { | 
| 1872 | 				goto bailout; | 
| 1873 | 			} | 
| 1874 | 			IODEBUG new_bbpentry(stderr, i, "#" ); | 
| 1875 | 		} | 
| 1876 | 	} | 
| 1877 |  | 
| 1878 | 	if (fflush(fp) == EOF || | 
| 1879 | 	    (!(GDKdebug & NOSYNCMASK) | 
| 1880 | #if defined(NATIVE_WIN32) | 
| 1881 | 	     && _commit(_fileno(fp)) < 0 | 
| 1882 | #elif defined(HAVE_FDATASYNC) | 
| 1883 | 	     && fdatasync(fileno(fp)) < 0 | 
| 1884 | #elif defined(HAVE_FSYNC) | 
| 1885 | 	     && fsync(fileno(fp)) < 0 | 
| 1886 | #endif | 
| 1887 | 		    )) { | 
| 1888 | 		GDKsyserror("BBPdir: Syncing BBP.dir file failed\n" ); | 
| 1889 | 		goto bailout; | 
| 1890 | 	} | 
| 1891 | 	if (fclose(fp) == EOF) { | 
| 1892 | 		GDKsyserror("BBPdir: Closing BBP.dir file failed\n" ); | 
| 1893 | 		return GDK_FAIL; | 
| 1894 | 	} | 
| 1895 |  | 
| 1896 | 	IODEBUG fprintf(stderr, "#BBPdir end\n" ); | 
| 1897 |  | 
| 1898 | 	if (i < (bat) ATOMIC_GET(&BBPsize)) | 
| 1899 | 		return GDK_FAIL; | 
| 1900 |  | 
| 1901 | 	return GDK_SUCCEED; | 
| 1902 |  | 
| 1903 |       bailout: | 
| 1904 | 	if (fp != NULL) | 
| 1905 | 		fclose(fp); | 
| 1906 | 	return GDK_FAIL; | 
| 1907 | } | 
| 1908 |  | 
| 1909 | /* function used for debugging */ | 
| 1910 | void | 
| 1911 | BBPdump(void) | 
| 1912 | { | 
| 1913 | 	bat i; | 
| 1914 | 	size_t mem = 0, vm = 0; | 
| 1915 | 	size_t cmem = 0, cvm = 0; | 
| 1916 | 	int n = 0, nc = 0; | 
| 1917 |  | 
| 1918 | 	for (i = 0; i < (bat) ATOMIC_GET(&BBPsize); i++) { | 
| 1919 | 		BAT *b = BBP_cache(i); | 
| 1920 | 		if (b == NULL) | 
| 1921 | 			continue; | 
| 1922 | 		fprintf(stderr, | 
| 1923 | 			"# %d[%s]: nme='%s' refs=%d lrefs=%d "  | 
| 1924 | 			"status=%u count="  BUNFMT, | 
| 1925 | 			i, | 
| 1926 | 			ATOMname(b->ttype), | 
| 1927 | 			BBP_logical(i) ? BBP_logical(i) : "<NULL>" , | 
| 1928 | 			BBP_refs(i), | 
| 1929 | 			BBP_lrefs(i), | 
| 1930 | 			BBP_status(i), | 
| 1931 | 			b->batCount); | 
| 1932 | 		if (b->batSharecnt > 0) | 
| 1933 | 			fprintf(stderr, " shares=%d" , b->batSharecnt); | 
| 1934 | 		if (b->batDirtydesc) | 
| 1935 | 			fprintf(stderr, " DirtyDesc" ); | 
| 1936 | 		if (b->theap.parentid) { | 
| 1937 | 			fprintf(stderr, " Theap -> %d" , b->theap.parentid); | 
| 1938 | 		} else { | 
| 1939 | 			fprintf(stderr, | 
| 1940 | 				" Theap=[%zu,%zu]%s" , | 
| 1941 | 				HEAPmemsize(&b->theap), | 
| 1942 | 				HEAPvmsize(&b->theap), | 
| 1943 | 				b->theap.dirty ? "(Dirty)"  : "" ); | 
| 1944 | 			if (BBP_logical(i) && BBP_logical(i)[0] == '.') { | 
| 1945 | 				cmem += HEAPmemsize(&b->theap); | 
| 1946 | 				cvm += HEAPvmsize(&b->theap); | 
| 1947 | 				nc++; | 
| 1948 | 			} else { | 
| 1949 | 				mem += HEAPmemsize(&b->theap); | 
| 1950 | 				vm += HEAPvmsize(&b->theap); | 
| 1951 | 				n++; | 
| 1952 | 			} | 
| 1953 | 		} | 
| 1954 | 		if (b->tvheap) { | 
| 1955 | 			if (b->tvheap->parentid != b->batCacheid) { | 
| 1956 | 				fprintf(stderr, | 
| 1957 | 					" Tvheap -> %d" , | 
| 1958 | 					b->tvheap->parentid); | 
| 1959 | 			} else { | 
| 1960 | 				fprintf(stderr, | 
| 1961 | 					" Tvheap=[%zu,%zu]%s" , | 
| 1962 | 					HEAPmemsize(b->tvheap), | 
| 1963 | 					HEAPvmsize(b->tvheap), | 
| 1964 | 				b->tvheap->dirty ? "(Dirty)"  : "" ); | 
| 1965 | 				if (BBP_logical(i) && BBP_logical(i)[0] == '.') { | 
| 1966 | 					cmem += HEAPmemsize(b->tvheap); | 
| 1967 | 					cvm += HEAPvmsize(b->tvheap); | 
| 1968 | 				} else { | 
| 1969 | 					mem += HEAPmemsize(b->tvheap); | 
| 1970 | 					vm += HEAPvmsize(b->tvheap); | 
| 1971 | 				} | 
| 1972 | 			} | 
| 1973 | 		} | 
| 1974 | 		if (b->thash && b->thash != (Hash *) 1) { | 
| 1975 | 			fprintf(stderr, | 
| 1976 | 				" Thash=[%zu,%zu]" , | 
| 1977 | 				HEAPmemsize(&b->thash->heap), | 
| 1978 | 				HEAPvmsize(&b->thash->heap)); | 
| 1979 | 			if (BBP_logical(i) && BBP_logical(i)[0] == '.') { | 
| 1980 | 				cmem += HEAPmemsize(&b->thash->heap); | 
| 1981 | 				cvm += HEAPvmsize(&b->thash->heap); | 
| 1982 | 			} else { | 
| 1983 | 				mem += HEAPmemsize(&b->thash->heap); | 
| 1984 | 				vm += HEAPvmsize(&b->thash->heap); | 
| 1985 | 			} | 
| 1986 | 		} | 
| 1987 | 		fprintf(stderr, " role: %s, persistence: %s\n" , | 
| 1988 | 			b->batRole == PERSISTENT ? "persistent"  : "transient" , | 
| 1989 | 			b->batTransient ? "transient"  : "persistent" ); | 
| 1990 | 	} | 
| 1991 | 	fprintf(stderr, | 
| 1992 | 		"# %d bats: mem=%zu, vm=%zu %d cached bats: mem=%zu, vm=%zu\n" , | 
| 1993 | 		n, mem, vm, nc, cmem, cvm); | 
| 1994 | 	fflush(stderr); | 
| 1995 | } | 
| 1996 |  | 
| 1997 | /* | 
| 1998 |  * @+ BBP Readonly Interface | 
| 1999 |  * | 
| 2000 |  * These interface functions do not change the BBP tables. If they | 
| 2001 |  * only access one specific BAT, the caller must have ensured that no | 
| 2002 |  * other thread is modifying that BAT, therefore such functions do not | 
| 2003 |  * need locking. | 
| 2004 |  * | 
| 2005 |  * BBP index lookup by BAT name: | 
| 2006 |  */ | 
| 2007 | static inline bat | 
| 2008 | BBP_find(const char *nme, bool lock) | 
| 2009 | { | 
| 2010 | 	bat i = BBPnamecheck(nme); | 
| 2011 |  | 
| 2012 | 	if (i != 0) { | 
| 2013 | 		/* for tmp_X BATs, we already know X */ | 
| 2014 | 		const char *s; | 
| 2015 |  | 
| 2016 | 		if (i >= (bat) ATOMIC_GET(&BBPsize) || (s = BBP_logical(i)) == NULL || strcmp(s, nme)) { | 
| 2017 | 			i = 0; | 
| 2018 | 		} | 
| 2019 | 	} else if (*nme != '.') { | 
| 2020 | 		/* must lock since hash-lookup traverses other BATs */ | 
| 2021 | 		if (lock) | 
| 2022 | 			MT_lock_set(&GDKnameLock); | 
| 2023 | 		for (i = BBP_hash[strHash(nme) & BBP_mask]; i; i = BBP_next(i)) { | 
| 2024 | 			if (strcmp(BBP_logical(i), nme) == 0) | 
| 2025 | 				break; | 
| 2026 | 		} | 
| 2027 | 		if (lock) | 
| 2028 | 			MT_lock_unset(&GDKnameLock); | 
| 2029 | 	} | 
| 2030 | 	return i; | 
| 2031 | } | 
| 2032 |  | 
| 2033 | bat | 
| 2034 | BBPindex(const char *nme) | 
| 2035 | { | 
| 2036 | 	return BBP_find(nme, true); | 
| 2037 | } | 
| 2038 |  | 
| 2039 | BAT * | 
| 2040 | BBPgetdesc(bat i) | 
| 2041 | { | 
| 2042 | 	if (is_bat_nil(i)) | 
| 2043 | 		return NULL; | 
| 2044 | 	if (i < 0) | 
| 2045 | 		i = -i; | 
| 2046 | 	if (i != 0 && i < (bat) ATOMIC_GET(&BBPsize) && i && BBP_logical(i)) { | 
| 2047 | 		return BBP_desc(i); | 
| 2048 | 	} | 
| 2049 | 	return NULL; | 
| 2050 | } | 
| 2051 |  | 
| 2052 | /* | 
| 2053 |  * @+ BBP Update Interface | 
| 2054 |  * Operations to insert, delete, clear, and modify BBP entries. | 
| 2055 |  * Our policy for the BBP is to provide unlocked BBP access for | 
| 2056 |  * speed, but still write operations have to be locked. | 
| 2057 |  * #ifdef DEBUG_THREADLOCAL_BATS | 
| 2058 |  * Create the shadow version (reversed) of a bat. | 
| 2059 |  * | 
| 2060 |  * An existing BAT is inserted into the BBP | 
| 2061 |  */ | 
| 2062 | static inline str | 
| 2063 | BBPsubdir_recursive(str s, bat i) | 
| 2064 | { | 
| 2065 | 	i >>= 6; | 
| 2066 | 	if (i >= 0100) { | 
| 2067 | 		s = BBPsubdir_recursive(s, i); | 
| 2068 | 		*s++ = DIR_SEP; | 
| 2069 | 	} | 
| 2070 | 	i &= 077; | 
| 2071 | 	*s++ = '0' + (i >> 3); | 
| 2072 | 	*s++ = '0' + (i & 7); | 
| 2073 | 	return s; | 
| 2074 | } | 
| 2075 |  | 
| 2076 | static inline void | 
| 2077 | BBPgetsubdir(str s, bat i) | 
| 2078 | { | 
| 2079 | 	if (i >= 0100) { | 
| 2080 | 		s = BBPsubdir_recursive(s, i); | 
| 2081 | 	} | 
| 2082 | 	*s = 0; | 
| 2083 | } | 
| 2084 |  | 
| 2085 | /* There are BBP_THREADMASK+1 (64) free lists, and ours (idx) is | 
| 2086 |  * empty.  Here we find a longish free list (at least 20 entries), and | 
| 2087 |  * if we can find one, we take one entry from that list.  If no long | 
| 2088 |  * enough list can be found, we create a new entry by either just | 
| 2089 |  * increasing BBPsize (up to BBPlimit) or extending the BBP (which | 
| 2090 |  * increases BBPlimit).  Every time this function is called we start | 
| 2091 |  * searching in a following free list (variable "last"). */ | 
| 2092 | static gdk_return | 
| 2093 | maybeextend(int idx) | 
| 2094 | { | 
| 2095 | 	int t, m; | 
| 2096 | 	int n, l; | 
| 2097 | 	bat i; | 
| 2098 | 	static int last = 0; | 
| 2099 |  | 
| 2100 | 	l = 0;			/* length of longest list */ | 
| 2101 | 	m = 0;			/* index of longest list */ | 
| 2102 | 	/* find a longish free list */ | 
| 2103 | 	for (t = 0; t <= BBP_THREADMASK && l <= 20; t++) { | 
| 2104 | 		n = 0; | 
| 2105 | 		for (i = BBP_free((t + last) & BBP_THREADMASK); | 
| 2106 | 		     i != 0 && n <= 20; | 
| 2107 | 		     i = BBP_next(i)) | 
| 2108 | 			n++; | 
| 2109 | 		if (n > l) { | 
| 2110 | 			m = (t + last) & BBP_THREADMASK; | 
| 2111 | 			l = n; | 
| 2112 | 		} | 
| 2113 | 	} | 
| 2114 | 	if (l > 20) { | 
| 2115 | 		/* list is long enough, get an entry from there */ | 
| 2116 | 		i = BBP_free(m); | 
| 2117 | 		BBP_free(m) = BBP_next(i); | 
| 2118 | 		BBP_next(i) = 0; | 
| 2119 | 		BBP_free(idx) = i; | 
| 2120 | 	} else { | 
| 2121 | 		/* let the longest list alone, get a fresh entry */ | 
| 2122 | 		if ((bat) ATOMIC_ADD(&BBPsize, 1) >= BBPlimit) { | 
| 2123 | 			if (BBPextend(idx, true) != GDK_SUCCEED) { | 
| 2124 | 				/* undo add */ | 
| 2125 | 				ATOMIC_SUB(&BBPsize, 1); | 
| 2126 | 				/* couldn't extend; if there is any | 
| 2127 | 				 * free entry, take it from the | 
| 2128 | 				 * longest list after all */ | 
| 2129 | 				if (l > 0) { | 
| 2130 | 					i = BBP_free(m); | 
| 2131 | 					BBP_free(m) = BBP_next(i); | 
| 2132 | 					BBP_next(i) = 0; | 
| 2133 | 					BBP_free(idx) = i; | 
| 2134 | 				} else { | 
| 2135 | 					/* nothing available */ | 
| 2136 | 					return GDK_FAIL; | 
| 2137 | 				} | 
| 2138 | 			} | 
| 2139 | 		} else { | 
| 2140 | 			BBP_free(idx) = (bat) ATOMIC_GET(&BBPsize) - 1; | 
| 2141 | 		} | 
| 2142 | 	} | 
| 2143 | 	last = (last + 1) & BBP_THREADMASK; | 
| 2144 | 	return GDK_SUCCEED; | 
| 2145 | } | 
| 2146 |  | 
| 2147 | /* return new BAT id (> 0); return 0 on failure */ | 
| 2148 | bat | 
| 2149 | BBPinsert(BAT *bn) | 
| 2150 | { | 
| 2151 | 	MT_Id pid = MT_getpid(); | 
| 2152 | 	bool lock = locked_by == 0 || locked_by != pid; | 
| 2153 | 	char dirname[24]; | 
| 2154 | 	bat i; | 
| 2155 | 	int idx = threadmask(pid), len = 0; | 
| 2156 |  | 
| 2157 | 	/* critical section: get a new BBP entry */ | 
| 2158 | 	if (lock) { | 
| 2159 | 		MT_lock_set(&GDKtrimLock(idx)); | 
| 2160 | 		MT_lock_set(&GDKcacheLock(idx)); | 
| 2161 | 	} | 
| 2162 |  | 
| 2163 | 	/* find an empty slot */ | 
| 2164 | 	if (BBP_free(idx) <= 0) { | 
| 2165 | 		/* we need to extend the BBP */ | 
| 2166 | 		gdk_return r = GDK_SUCCEED; | 
| 2167 | 		if (lock) { | 
| 2168 | 			/* we must take all locks in a consistent | 
| 2169 | 			 * order so first unset the one we've already | 
| 2170 | 			 * got */ | 
| 2171 | 			MT_lock_unset(&GDKcacheLock(idx)); | 
| 2172 | 			for (i = 0; i <= BBP_THREADMASK; i++) | 
| 2173 | 				MT_lock_set(&GDKcacheLock(i)); | 
| 2174 | 		} | 
| 2175 | 		MT_lock_set(&GDKnameLock); | 
| 2176 | 		/* check again in case some other thread extended | 
| 2177 | 		 * while we were waiting */ | 
| 2178 | 		if (BBP_free(idx) <= 0) { | 
| 2179 | 			r = maybeextend(idx); | 
| 2180 | 		} | 
| 2181 | 		MT_lock_unset(&GDKnameLock); | 
| 2182 | 		if (lock) | 
| 2183 | 			for (i = BBP_THREADMASK; i >= 0; i--) | 
| 2184 | 				if (i != idx) | 
| 2185 | 					MT_lock_unset(&GDKcacheLock(i)); | 
| 2186 | 		if (r != GDK_SUCCEED) { | 
| 2187 | 			if (lock) { | 
| 2188 | 				MT_lock_unset(&GDKcacheLock(idx)); | 
| 2189 | 				MT_lock_unset(&GDKtrimLock(idx)); | 
| 2190 | 			} | 
| 2191 | 			return 0; | 
| 2192 | 		} | 
| 2193 | 	} | 
| 2194 | 	i = BBP_free(idx); | 
| 2195 | 	assert(i > 0); | 
| 2196 | 	BBP_free(idx) = BBP_next(i); | 
| 2197 |  | 
| 2198 | 	if (lock) { | 
| 2199 | 		MT_lock_unset(&GDKcacheLock(idx)); | 
| 2200 | 		MT_lock_unset(&GDKtrimLock(idx)); | 
| 2201 | 	} | 
| 2202 | 	/* rest of the work outside the lock */ | 
| 2203 |  | 
| 2204 | 	/* fill in basic BBP fields for the new bat */ | 
| 2205 |  | 
| 2206 | 	bn->batCacheid = i; | 
| 2207 | 	bn->creator_tid = MT_getpid(); | 
| 2208 |  | 
| 2209 | 	BBP_status_set(i, BBPDELETING, "BBPinsert" ); | 
| 2210 | 	BBP_cache(i) = NULL; | 
| 2211 | 	BBP_desc(i) = NULL; | 
| 2212 | 	BBP_refs(i) = 1;	/* new bats have 1 pin */ | 
| 2213 | 	BBP_lrefs(i) = 0;	/* ie. no logical refs */ | 
| 2214 |  | 
| 2215 | #ifdef HAVE_HGE | 
| 2216 | 	if (bn->ttype == TYPE_hge) | 
| 2217 | 		havehge = true; | 
| 2218 | #endif | 
| 2219 |  | 
| 2220 | 	if (*BBP_bak(i) == 0) | 
| 2221 | 		len = snprintf(BBP_bak(i), sizeof(BBP_bak(i)), "tmp_%o" , (unsigned) i); | 
| 2222 | 	if (len == -1 || len >= FILENAME_MAX) | 
| 2223 | 		return 0; | 
| 2224 | 	BBP_logical(i) = BBP_bak(i); | 
| 2225 |  | 
| 2226 | 	/* Keep the physical location around forever */ | 
| 2227 | 	if (!GDKinmemory() && *BBP_physical(i) == 0) { | 
| 2228 | 		BBPgetsubdir(dirname, i); | 
| 2229 |  | 
| 2230 | 		if (*dirname)	/* i.e., i >= 0100 */ | 
| 2231 | 			len = snprintf(BBP_physical(i), sizeof(BBP_physical(i)), | 
| 2232 | 				 "%s%c%o" , dirname, DIR_SEP, (unsigned) i); | 
| 2233 | 		else | 
| 2234 | 			len = snprintf(BBP_physical(i), sizeof(BBP_physical(i)), | 
| 2235 | 				 "%o" , (unsigned) i); | 
| 2236 | 		if (len == -1 || len >= FILENAME_MAX) | 
| 2237 | 			return 0; | 
| 2238 |  | 
| 2239 | 		BATDEBUG fprintf(stderr, "#%d = new %s(%s)\n" , (int) i, BBPname(i), ATOMname(bn->ttype)); | 
| 2240 | 	} | 
| 2241 |  | 
| 2242 | 	return i; | 
| 2243 | } | 
| 2244 |  | 
| 2245 | gdk_return | 
| 2246 | BBPcacheit(BAT *bn, bool lock) | 
| 2247 | { | 
| 2248 | 	bat i = bn->batCacheid; | 
| 2249 | 	unsigned mode; | 
| 2250 |  | 
| 2251 | 	if (lock) | 
| 2252 | 		lock = locked_by == 0 || locked_by != MT_getpid(); | 
| 2253 |  | 
| 2254 | 	if (i) { | 
| 2255 | 		assert(i > 0); | 
| 2256 | 	} else { | 
| 2257 | 		i = BBPinsert(bn);	/* bat was not previously entered */ | 
| 2258 | 		if (i == 0) | 
| 2259 | 			return GDK_FAIL; | 
| 2260 | 		if (bn->tvheap) | 
| 2261 | 			bn->tvheap->parentid = i; | 
| 2262 | 	} | 
| 2263 | 	assert(bn->batCacheid > 0); | 
| 2264 |  | 
| 2265 | 	if (lock) | 
| 2266 | 		MT_lock_set(&GDKswapLock(i)); | 
| 2267 | 	mode = (BBP_status(i) | BBPLOADED) & ~(BBPLOADING | BBPDELETING); | 
| 2268 | 	BBP_status_set(i, mode, "BBPcacheit" ); | 
| 2269 | 	BBP_desc(i) = bn; | 
| 2270 |  | 
| 2271 | 	/* cache it! */ | 
| 2272 | 	BBP_cache(i) = bn; | 
| 2273 |  | 
| 2274 | 	if (lock) | 
| 2275 | 		MT_lock_unset(&GDKswapLock(i)); | 
| 2276 | 	return GDK_SUCCEED; | 
| 2277 | } | 
| 2278 |  | 
| 2279 | /* | 
| 2280 |  * BBPuncacheit changes the BBP status to swapped out.  Currently only | 
| 2281 |  * used in BBPfree (bat swapped out) and BBPclear (bat destroyed | 
| 2282 |  * forever). | 
| 2283 |  */ | 
| 2284 |  | 
| 2285 | static void | 
| 2286 | BBPuncacheit(bat i, bool unloaddesc) | 
| 2287 | { | 
| 2288 | 	if (i < 0) | 
| 2289 | 		i = -i; | 
| 2290 | 	if (BBPcheck(i, "BBPuncacheit" )) { | 
| 2291 | 		BAT *b = BBP_desc(i); | 
| 2292 |  | 
| 2293 | 		if (b) { | 
| 2294 | 			if (BBP_cache(i)) { | 
| 2295 | 				BATDEBUG fprintf(stderr, "#uncache %d (%s)\n" , (int) i, BBPname(i)); | 
| 2296 |  | 
| 2297 | 				BBP_cache(i) = NULL; | 
| 2298 |  | 
| 2299 | 				/* clearing bits can be done without the lock */ | 
| 2300 | 				BBP_status_off(i, BBPLOADED, "BBPuncacheit" ); | 
| 2301 | 			} | 
| 2302 | 			if (unloaddesc) { | 
| 2303 | 				BBP_desc(i) = NULL; | 
| 2304 | 				BATdestroy(b); | 
| 2305 | 			} | 
| 2306 | 		} | 
| 2307 | 	} | 
| 2308 | } | 
| 2309 |  | 
| 2310 | /* | 
| 2311 |  * @- BBPclear | 
| 2312 |  * BBPclear removes a BAT from the BBP directory forever. | 
| 2313 |  */ | 
| 2314 | static inline void | 
| 2315 | bbpclear(bat i, int idx, bool lock) | 
| 2316 | { | 
| 2317 | 	BATDEBUG { | 
| 2318 | 		fprintf(stderr, "#clear %d (%s)\n" , (int) i, BBPname(i)); | 
| 2319 | 	} | 
| 2320 | 	BBPuncacheit(i, true); | 
| 2321 | 	BATDEBUG { | 
| 2322 | 		fprintf(stderr, "#BBPclear set to unloading %d\n" , i); | 
| 2323 | 	} | 
| 2324 | 	BBP_status_set(i, BBPUNLOADING, "BBPclear" ); | 
| 2325 | 	BBP_refs(i) = 0; | 
| 2326 | 	BBP_lrefs(i) = 0; | 
| 2327 | 	if (lock) | 
| 2328 | 		MT_lock_set(&GDKcacheLock(idx)); | 
| 2329 |  | 
| 2330 | 	if (BBPtmpcheck(BBP_logical(i)) == 0) { | 
| 2331 | 		MT_lock_set(&GDKnameLock); | 
| 2332 | 		BBP_delete(i); | 
| 2333 | 		MT_lock_unset(&GDKnameLock); | 
| 2334 | 	} | 
| 2335 | 	if (BBP_logical(i) != BBP_bak(i)) | 
| 2336 | 		GDKfree(BBP_logical(i)); | 
| 2337 | 	BBP_status_set(i, 0, "BBPclear" ); | 
| 2338 | 	BBP_logical(i) = NULL; | 
| 2339 | 	BBP_next(i) = BBP_free(idx); | 
| 2340 | 	BBP_free(idx) = i; | 
| 2341 | 	if (lock) | 
| 2342 | 		MT_lock_unset(&GDKcacheLock(idx)); | 
| 2343 | } | 
| 2344 |  | 
| 2345 | void | 
| 2346 | BBPclear(bat i) | 
| 2347 | { | 
| 2348 | 	MT_Id pid = MT_getpid(); | 
| 2349 | 	bool lock = locked_by == 0 || locked_by != pid; | 
| 2350 |  | 
| 2351 | 	if (BBPcheck(i, "BBPclear" )) { | 
| 2352 | 		bbpclear(i, threadmask(pid), lock); | 
| 2353 | 	} | 
| 2354 | } | 
| 2355 |  | 
| 2356 | /* | 
| 2357 |  * @- BBP rename | 
| 2358 |  * | 
| 2359 |  * Each BAT has a logical name that is globally unique. Its reverse | 
| 2360 |  * view can also be assigned a name, that also has to be globally | 
| 2361 |  * unique.  The batId is the same as the logical BAT name. | 
| 2362 |  * | 
| 2363 |  * The default logical name of a BAT is tmp_X, where X is the | 
| 2364 |  * batCacheid.  Apart from being globally unique, new logical bat | 
| 2365 |  * names cannot be of the form tmp_X, unless X is the batCacheid. | 
| 2366 |  * | 
| 2367 |  * Physical names consist of a directory name followed by a logical | 
| 2368 |  * name suffix.  The directory name is derived from the batCacheid, | 
| 2369 |  * and is currently organized in a hierarchy that puts max 64 bats in | 
| 2370 |  * each directory (see BBPgetsubdir). | 
| 2371 |  * | 
| 2372 |  * Concerning the physical suffix: it is almost always bat_X. This | 
| 2373 |  * saves us a whole lot of trouble, as bat_X is always unique and no | 
| 2374 |  * conflicts can occur.  Other suffixes are only supported in order | 
| 2375 |  * just for backward compatibility with old repositories (you won't | 
| 2376 |  * see them anymore in new repositories). | 
| 2377 |  */ | 
| 2378 | int | 
| 2379 | BBPrename(bat bid, const char *nme) | 
| 2380 | { | 
| 2381 | 	BAT *b = BBPdescriptor(bid); | 
| 2382 | 	char dirname[24]; | 
| 2383 | 	bat tmpid = 0, i; | 
| 2384 | 	int idx; | 
| 2385 |  | 
| 2386 | 	if (b == NULL) | 
| 2387 | 		return 0; | 
| 2388 |  | 
| 2389 | 	/* If name stays same, do nothing */ | 
| 2390 | 	if (BBP_logical(bid) && strcmp(BBP_logical(bid), nme) == 0) | 
| 2391 | 		return 0; | 
| 2392 |  | 
| 2393 | 	BBPgetsubdir(dirname, bid); | 
| 2394 |  | 
| 2395 | 	if ((tmpid = BBPnamecheck(nme)) && tmpid != bid) { | 
| 2396 | 		GDKerror("BBPrename: illegal temporary name: '%s'\n" , nme); | 
| 2397 | 		return BBPRENAME_ILLEGAL; | 
| 2398 | 	} | 
| 2399 | 	if (strlen(dirname) + strLen(nme) + 1 >= IDLENGTH) { | 
| 2400 | 		GDKerror("BBPrename: illegal temporary name: '%s'\n" , nme); | 
| 2401 | 		return BBPRENAME_LONG; | 
| 2402 | 	} | 
| 2403 | 	idx = threadmask(MT_getpid()); | 
| 2404 | 	MT_lock_set(&GDKtrimLock(idx)); | 
| 2405 | 	MT_lock_set(&GDKnameLock); | 
| 2406 | 	i = BBP_find(nme, false); | 
| 2407 | 	if (i != 0) { | 
| 2408 | 		MT_lock_unset(&GDKnameLock); | 
| 2409 | 		MT_lock_unset(&GDKtrimLock(idx)); | 
| 2410 | 		GDKerror("BBPrename: name is in use: '%s'.\n" , nme); | 
| 2411 | 		return BBPRENAME_ALREADY; | 
| 2412 | 	} | 
| 2413 |  | 
| 2414 | 	/* carry through the name change */ | 
| 2415 | 	if (BBP_logical(bid) && BBPtmpcheck(BBP_logical(bid)) == 0) { | 
| 2416 | 		BBP_delete(bid); | 
| 2417 | 	} | 
| 2418 | 	if (BBP_logical(bid) != BBP_bak(bid)) | 
| 2419 | 		GDKfree(BBP_logical(bid)); | 
| 2420 | 	BBP_logical(bid) = GDKstrdup(nme); | 
| 2421 | 	if (tmpid == 0) { | 
| 2422 | 		BBP_insert(bid); | 
| 2423 | 	} | 
| 2424 | 	b->batDirtydesc = true; | 
| 2425 | 	if (!b->batTransient) { | 
| 2426 | 		bool lock = locked_by == 0 || locked_by != MT_getpid(); | 
| 2427 |  | 
| 2428 | 		if (lock) | 
| 2429 | 			MT_lock_set(&GDKswapLock(i)); | 
| 2430 | 		BBP_status_on(bid, BBPRENAMED, "BBPrename" ); | 
| 2431 | 		if (lock) | 
| 2432 | 			MT_lock_unset(&GDKswapLock(i)); | 
| 2433 | 	} | 
| 2434 | 	MT_lock_unset(&GDKnameLock); | 
| 2435 | 	MT_lock_unset(&GDKtrimLock(idx)); | 
| 2436 | 	return 0; | 
| 2437 | } | 
| 2438 |  | 
| 2439 | /* | 
| 2440 |  * @+ BBP swapping Policy | 
| 2441 |  * The BAT can be moved back to disk using the routine BBPfree.  It | 
| 2442 |  * frees the storage for other BATs. After this call BAT* references | 
| 2443 |  * maintained for the BAT are wrong.  We should keep track of dirty | 
| 2444 |  * unloaded BATs. They may have to be committed later on, which may | 
| 2445 |  * include reading them in again. | 
| 2446 |  * | 
| 2447 |  * BBPswappable: may this bat be unloaded?  Only real bats without | 
| 2448 |  * memory references can be unloaded. | 
| 2449 |  */ | 
| 2450 | static inline void | 
| 2451 | BBPspin(bat i, const char *s, unsigned event) | 
| 2452 | { | 
| 2453 | 	if (BBPcheck(i, "BBPspin" ) && (BBP_status(i) & event)) { | 
| 2454 | 		lng spin = LL_CONSTANT(0); | 
| 2455 |  | 
| 2456 | 		do { | 
| 2457 | 			MT_sleep_ms(KITTENNAP); | 
| 2458 | 			spin++; | 
| 2459 | 		} while (BBP_status(i) & event); | 
| 2460 | 		BATDEBUG fprintf(stderr, "#BBPspin(%d,%s,%u): "  LLFMT " loops\n" , (int) i, s, event, spin); | 
| 2461 | 	} | 
| 2462 | } | 
| 2463 |  | 
| 2464 | /* This function can fail if the input parameter (i) is incorrect | 
| 2465 |  * (unlikely), of if the bat is a view, this is a physical (not | 
| 2466 |  * logical) incref (i.e. called through BBPfix(), and it is the first | 
| 2467 |  * reference (refs was 0 and should become 1).  It can fail in this | 
| 2468 |  * case if the parent bat cannot be loaded. | 
| 2469 |  * This means the return value of BBPfix should be checked in these | 
| 2470 |  * circumstances, but not necessarily in others. */ | 
| 2471 | static inline int | 
| 2472 | incref(bat i, bool logical, bool lock) | 
| 2473 | { | 
| 2474 | 	int refs; | 
| 2475 | 	bat tp, tvp; | 
| 2476 | 	BAT *b, *pb = NULL, *pvb = NULL; | 
| 2477 | 	bool load = false; | 
| 2478 |  | 
| 2479 | 	if (!BBPcheck(i, logical ? "BBPretain"  : "BBPfix" )) | 
| 2480 | 		return 0; | 
| 2481 |  | 
| 2482 | 	/* Before we get the lock and before we do all sorts of | 
| 2483 | 	 * things, make sure we can load the parent bats if there are | 
| 2484 | 	 * any.  If we can't load them, we can still easily fail.  If | 
| 2485 | 	 * this is indeed a view, but not the first physical | 
| 2486 | 	 * reference, getting the parent BAT descriptor is | 
| 2487 | 	 * superfluous, but not too expensive, so we do it anyway. */ | 
| 2488 | 	if (!logical && (b = BBP_desc(i)) != NULL) { | 
| 2489 | 		if (b->theap.parentid) { | 
| 2490 | 			pb = BATdescriptor(b->theap.parentid); | 
| 2491 | 			if (pb == NULL) | 
| 2492 | 				return 0; | 
| 2493 | 		} | 
| 2494 | 		if (b->tvheap && b->tvheap->parentid != i) { | 
| 2495 | 			pvb = BATdescriptor(b->tvheap->parentid); | 
| 2496 | 			if (pvb == NULL) { | 
| 2497 | 				if (pb) | 
| 2498 | 					BBPunfix(pb->batCacheid); | 
| 2499 | 				return 0; | 
| 2500 | 			} | 
| 2501 | 		} | 
| 2502 | 	} | 
| 2503 |  | 
| 2504 | 	if (lock) { | 
| 2505 | 		for (;;) { | 
| 2506 | 			MT_lock_set(&GDKswapLock(i)); | 
| 2507 | 			if (!(BBP_status(i) & (BBPUNSTABLE|BBPLOADING))) | 
| 2508 | 				break; | 
| 2509 | 			/* the BATs is "unstable", try again */ | 
| 2510 | 			MT_lock_unset(&GDKswapLock(i)); | 
| 2511 | 			MT_sleep_ms(KITTENNAP); | 
| 2512 | 		} | 
| 2513 | 	} | 
| 2514 | 	/* we have the lock */ | 
| 2515 |  | 
| 2516 | 	b = BBP_desc(i); | 
| 2517 | 	if (b == NULL) { | 
| 2518 | 		/* should not have happened */ | 
| 2519 | 		if (lock) | 
| 2520 | 			MT_lock_unset(&GDKswapLock(i)); | 
| 2521 | 		return 0; | 
| 2522 | 	} | 
| 2523 |  | 
| 2524 | 	assert(BBP_refs(i) + BBP_lrefs(i) || | 
| 2525 | 	       BBP_status(i) & (BBPDELETED | BBPSWAPPED)); | 
| 2526 | 	if (logical) { | 
| 2527 | 		/* parent BATs are not relevant for logical refs */ | 
| 2528 | 		tp = tvp = 0; | 
| 2529 | 		refs = ++BBP_lrefs(i); | 
| 2530 | 	} else { | 
| 2531 | 		tp = b->theap.parentid; | 
| 2532 | 		assert(tp >= 0); | 
| 2533 | 		tvp = b->tvheap == 0 || b->tvheap->parentid == i ? 0 : b->tvheap->parentid; | 
| 2534 | 		refs = ++BBP_refs(i); | 
| 2535 | 		if (refs == 1 && (tp || tvp)) { | 
| 2536 | 			/* If this is a view, we must load the parent | 
| 2537 | 			 * BATs, but we must do that outside of the | 
| 2538 | 			 * lock.  Set the BBPLOADING flag so that | 
| 2539 | 			 * other threads will wait until we're | 
| 2540 | 			 * done. */ | 
| 2541 | 			BBP_status_on(i, BBPLOADING, "BBPfix" ); | 
| 2542 | 			load = true; | 
| 2543 | 		} | 
| 2544 | 	} | 
| 2545 | 	if (lock) | 
| 2546 | 		MT_lock_unset(&GDKswapLock(i)); | 
| 2547 |  | 
| 2548 | 	if (load) { | 
| 2549 | 		/* load the parent BATs and set the heap base pointers | 
| 2550 | 		 * to the correct values */ | 
| 2551 | 		assert(!logical); | 
| 2552 | 		if (tp) { | 
| 2553 | 			assert(pb != NULL); | 
| 2554 | 			b->theap.base = pb->theap.base + (size_t) b->theap.base; | 
| 2555 | 		} | 
| 2556 | 		/* done loading, release descriptor */ | 
| 2557 | 		BBP_status_off(i, BBPLOADING, "BBPfix" ); | 
| 2558 | 	} else if (!logical) { | 
| 2559 | 		/* this wasn't the first physical reference, so undo | 
| 2560 | 		 * the fixes on the parent bats */ | 
| 2561 | 		if (pb) | 
| 2562 | 			BBPunfix(pb->batCacheid); | 
| 2563 | 		if (pvb) | 
| 2564 | 			BBPunfix(pvb->batCacheid); | 
| 2565 | 	} | 
| 2566 | 	return refs; | 
| 2567 | } | 
| 2568 |  | 
| 2569 | /* see comment for incref */ | 
| 2570 | int | 
| 2571 | BBPfix(bat i) | 
| 2572 | { | 
| 2573 | 	bool lock = locked_by == 0 || locked_by != MT_getpid(); | 
| 2574 |  | 
| 2575 | 	return incref(i, false, lock); | 
| 2576 | } | 
| 2577 |  | 
| 2578 | int | 
| 2579 | BBPretain(bat i) | 
| 2580 | { | 
| 2581 | 	bool lock = locked_by == 0 || locked_by != MT_getpid(); | 
| 2582 |  | 
| 2583 | 	return incref(i, true, lock); | 
| 2584 | } | 
| 2585 |  | 
| 2586 | void | 
| 2587 | BBPshare(bat parent) | 
| 2588 | { | 
| 2589 | 	bool lock = locked_by == 0 || locked_by != MT_getpid(); | 
| 2590 |  | 
| 2591 | 	assert(parent > 0); | 
| 2592 | 	(void) incref(parent, true, lock); | 
| 2593 | 	if (lock) | 
| 2594 | 		MT_lock_set(&GDKswapLock(parent)); | 
| 2595 | 	++BBP_cache(parent)->batSharecnt; | 
| 2596 | 	assert(BBP_refs(parent) > 0); | 
| 2597 | 	if (lock) | 
| 2598 | 		MT_lock_unset(&GDKswapLock(parent)); | 
| 2599 | 	(void) incref(parent, false, lock); | 
| 2600 | } | 
| 2601 |  | 
| 2602 | static inline int | 
| 2603 | decref(bat i, bool logical, bool releaseShare, bool lock, const char *func) | 
| 2604 | { | 
| 2605 | 	int refs = 0; | 
| 2606 | 	bool swap = false; | 
| 2607 | 	bat tp = 0, tvp = 0; | 
| 2608 | 	BAT *b; | 
| 2609 |  | 
| 2610 | 	assert(i > 0); | 
| 2611 | 	if (lock) | 
| 2612 | 		MT_lock_set(&GDKswapLock(i)); | 
| 2613 | 	if (releaseShare) { | 
| 2614 | 		--BBP_desc(i)->batSharecnt; | 
| 2615 | 		if (lock) | 
| 2616 | 			MT_lock_unset(&GDKswapLock(i)); | 
| 2617 | 		return refs; | 
| 2618 | 	} | 
| 2619 |  | 
| 2620 | 	while (BBP_status(i) & BBPUNLOADING) { | 
| 2621 | 		if (lock) | 
| 2622 | 			MT_lock_unset(&GDKswapLock(i)); | 
| 2623 | 		BBPspin(i, func, BBPUNLOADING); | 
| 2624 | 		if (lock) | 
| 2625 | 			MT_lock_set(&GDKswapLock(i)); | 
| 2626 | 	} | 
| 2627 |  | 
| 2628 | 	b = BBP_cache(i); | 
| 2629 |  | 
| 2630 | 	/* decrement references by one */ | 
| 2631 | 	if (logical) { | 
| 2632 | 		if (BBP_lrefs(i) == 0) { | 
| 2633 | 			GDKerror("%s: %s does not have logical references.\n" , func, BBPname(i)); | 
| 2634 | 			assert(0); | 
| 2635 | 		} else { | 
| 2636 | 			refs = --BBP_lrefs(i); | 
| 2637 | 		} | 
| 2638 | 	} else { | 
| 2639 | 		if (BBP_refs(i) == 0) { | 
| 2640 | 			GDKerror("%s: %s does not have pointer fixes.\n" , func, BBPname(i)); | 
| 2641 | 			assert(0); | 
| 2642 | 		} else { | 
| 2643 | 			assert(b == NULL || b->theap.parentid == 0 || BBP_refs(b->theap.parentid) > 0); | 
| 2644 | 			assert(b == NULL || b->tvheap == NULL || b->tvheap->parentid == 0 || BBP_refs(b->tvheap->parentid) > 0); | 
| 2645 | 			refs = --BBP_refs(i); | 
| 2646 | 			if (b && refs == 0) { | 
| 2647 | 				if ((tp = b->theap.parentid) != 0) | 
| 2648 | 					b->theap.base = (char *) (b->theap.base - BBP_cache(tp)->theap.base); | 
| 2649 | 				tvp = VIEWvtparent(b); | 
| 2650 | 			} | 
| 2651 | 		} | 
| 2652 | 	} | 
| 2653 |  | 
| 2654 | 	/* we destroy transients asap and unload persistent bats only | 
| 2655 | 	 * if they have been made cold or are not dirty */ | 
| 2656 | 	if (BBP_refs(i) > 0 || | 
| 2657 | 	    (BBP_lrefs(i) > 0 && | 
| 2658 | 	     (b == NULL || BATdirty(b) || !(BBP_status(i) & BBPPERSISTENT) || GDKinmemory()))) { | 
| 2659 | 		/* bat cannot be swapped out */ | 
| 2660 | 	} else if (b ? b->batSharecnt == 0 : (BBP_status(i) & BBPTMP)) { | 
| 2661 | 		/* bat will be unloaded now. set the UNLOADING bit | 
| 2662 | 		 * while locked so no other thread thinks it's | 
| 2663 | 		 * available anymore */ | 
| 2664 | 		assert((BBP_status(i) & BBPUNLOADING) == 0); | 
| 2665 | 		BATDEBUG { | 
| 2666 | 			fprintf(stderr, "#%s set to unloading BAT %d\n" , func, i); | 
| 2667 | 		} | 
| 2668 | 		BBP_status_on(i, BBPUNLOADING, func); | 
| 2669 | 		swap = true; | 
| 2670 | 	} | 
| 2671 |  | 
| 2672 | 	/* unlock before re-locking in unload; as saving a dirty | 
| 2673 | 	 * persistent bat may take a long time */ | 
| 2674 | 	if (lock) | 
| 2675 | 		MT_lock_unset(&GDKswapLock(i)); | 
| 2676 |  | 
| 2677 | 	if (swap && b != NULL) { | 
| 2678 | 		if (BBP_lrefs(i) == 0 && (BBP_status(i) & BBPDELETED) == 0) { | 
| 2679 | 			/* free memory (if loaded) and delete from | 
| 2680 | 			 * disk (if transient but saved) */ | 
| 2681 | 			BBPdestroy(b); | 
| 2682 | 		} else { | 
| 2683 | 			BATDEBUG { | 
| 2684 | 				fprintf(stderr, "#%s unload and free bat %d\n" , func, i); | 
| 2685 | 			} | 
| 2686 | 			/* free memory of transient */ | 
| 2687 | 			if (BBPfree(b, func) != GDK_SUCCEED) | 
| 2688 | 				return -1;	/* indicate failure */ | 
| 2689 | 		} | 
| 2690 | 	} | 
| 2691 | 	if (tp) | 
| 2692 | 		decref(tp, false, false, lock, func); | 
| 2693 | 	if (tvp) | 
| 2694 | 		decref(tvp, false, false, lock, func); | 
| 2695 | 	return refs; | 
| 2696 | } | 
| 2697 |  | 
| 2698 | int | 
| 2699 | BBPunfix(bat i) | 
| 2700 | { | 
| 2701 | 	if (BBPcheck(i, "BBPunfix" ) == 0) { | 
| 2702 | 		return -1; | 
| 2703 | 	} | 
| 2704 | 	return decref(i, false, false, true, "BBPunfix" ); | 
| 2705 | } | 
| 2706 |  | 
| 2707 | int | 
| 2708 | BBPrelease(bat i) | 
| 2709 | { | 
| 2710 | 	if (BBPcheck(i, "BBPrelease" ) == 0) { | 
| 2711 | 		return -1; | 
| 2712 | 	} | 
| 2713 | 	return decref(i, true, false, true, "BBPrelease" ); | 
| 2714 | } | 
| 2715 |  | 
| 2716 | /* | 
| 2717 |  * M5 often changes the physical ref into a logical reference.  This | 
| 2718 |  * state change consist of the sequence BBPretain(b);BBPunfix(b). | 
| 2719 |  * A faster solution is given below, because it does not trigger the | 
| 2720 |  * BBP management actions, such as garbage collecting the bats. | 
| 2721 |  * [first step, initiate code change] | 
| 2722 |  */ | 
| 2723 | void | 
| 2724 | BBPkeepref(bat i) | 
| 2725 | { | 
| 2726 | 	if (is_bat_nil(i)) | 
| 2727 | 		return; | 
| 2728 | 	if (BBPcheck(i, "BBPkeepref" )) { | 
| 2729 | 		bool lock = locked_by == 0 || locked_by != MT_getpid(); | 
| 2730 | 		BAT *b; | 
| 2731 |  | 
| 2732 | 		if ((b = BBPdescriptor(i)) != NULL) { | 
| 2733 | 			BATsettrivprop(b); | 
| 2734 | 			if (GDKdebug & (CHECKMASK | PROPMASK)) | 
| 2735 | 				BATassertProps(b); | 
| 2736 | 		} | 
| 2737 |  | 
| 2738 | 		incref(i, true, lock); | 
| 2739 | 		assert(BBP_refs(i)); | 
| 2740 | 		decref(i, false, false, lock, "BBPkeepref" ); | 
| 2741 | 	} | 
| 2742 | } | 
| 2743 |  | 
| 2744 | static inline void | 
| 2745 | GDKunshare(bat parent) | 
| 2746 | { | 
| 2747 | 	(void) decref(parent, false, true, true, "GDKunshare" ); | 
| 2748 | 	(void) decref(parent, true, false, true, "GDKunshare" ); | 
| 2749 | } | 
| 2750 |  | 
| 2751 | void | 
| 2752 | BBPunshare(bat parent) | 
| 2753 | { | 
| 2754 | 	GDKunshare(parent); | 
| 2755 | } | 
| 2756 |  | 
| 2757 | /* | 
| 2758 |  * BBPreclaim is a user-exported function; the common way to destroy a | 
| 2759 |  * BAT the hard way. | 
| 2760 |  * | 
| 2761 |  * Return values: | 
| 2762 |  * -1 = bat cannot be unloaded (it has more than your own memory fix) | 
| 2763 |  *  0 = unloaded successfully | 
| 2764 |  *  1 = unload failed (due to write-to-disk failure) | 
| 2765 |  */ | 
| 2766 | int | 
| 2767 | BBPreclaim(BAT *b) | 
| 2768 | { | 
| 2769 | 	bat i; | 
| 2770 | 	bool lock = locked_by == 0 || locked_by != MT_getpid(); | 
| 2771 |  | 
| 2772 | 	if (b == NULL) | 
| 2773 | 		return -1; | 
| 2774 | 	i = b->batCacheid; | 
| 2775 |  | 
| 2776 | 	assert(BBP_refs(i) == 1); | 
| 2777 |  | 
| 2778 | 	return decref(i, false, false, lock, "BBPreclaim" ) <0; | 
| 2779 | } | 
| 2780 |  | 
| 2781 | /* | 
| 2782 |  * BBPdescriptor checks whether BAT needs loading and does so if | 
| 2783 |  * necessary. You must have at least one fix on the BAT before calling | 
| 2784 |  * this. | 
| 2785 |  */ | 
| 2786 | static BAT * | 
| 2787 | getBBPdescriptor(bat i, bool lock) | 
| 2788 | { | 
| 2789 | 	bool load = false; | 
| 2790 | 	BAT *b = NULL; | 
| 2791 |  | 
| 2792 | 	assert(i > 0); | 
| 2793 | 	if (!BBPcheck(i, "BBPdescriptor" )) { | 
| 2794 | 		return NULL; | 
| 2795 | 	} | 
| 2796 | 	assert(BBP_refs(i)); | 
| 2797 | 	if ((b = BBP_cache(i)) == NULL) { | 
| 2798 |  | 
| 2799 | 		if (lock) | 
| 2800 | 			MT_lock_set(&GDKswapLock(i)); | 
| 2801 | 		while (BBP_status(i) & BBPWAITING) {	/* wait for bat to be loaded by other thread */ | 
| 2802 | 			if (lock) | 
| 2803 | 				MT_lock_unset(&GDKswapLock(i)); | 
| 2804 | 			MT_sleep_ms(KITTENNAP); | 
| 2805 | 			if (lock) | 
| 2806 | 				MT_lock_set(&GDKswapLock(i)); | 
| 2807 | 		} | 
| 2808 | 		if (BBPvalid(i)) { | 
| 2809 | 			b = BBP_cache(i); | 
| 2810 | 			if (b == NULL) { | 
| 2811 | 				load = true; | 
| 2812 | 				BATDEBUG { | 
| 2813 | 					fprintf(stderr, "#BBPdescriptor set to loading BAT %d\n" , i); | 
| 2814 | 				} | 
| 2815 | 				BBP_status_on(i, BBPLOADING, "BBPdescriptor" ); | 
| 2816 | 			} | 
| 2817 | 		} | 
| 2818 | 		if (lock) | 
| 2819 | 			MT_lock_unset(&GDKswapLock(i)); | 
| 2820 | 	} | 
| 2821 | 	if (load) { | 
| 2822 | 		IODEBUG fprintf(stderr, "#load %s\n" , BBPname(i)); | 
| 2823 |  | 
| 2824 | 		b = BATload_intern(i, lock); | 
| 2825 |  | 
| 2826 | 		/* clearing bits can be done without the lock */ | 
| 2827 | 		BBP_status_off(i, BBPLOADING, "BBPdescriptor" ); | 
| 2828 | 		CHECKDEBUG if (b != NULL) | 
| 2829 | 			BATassertProps(b); | 
| 2830 | 	} | 
| 2831 | 	return b; | 
| 2832 | } | 
| 2833 |  | 
| 2834 | BAT * | 
| 2835 | BBPdescriptor(bat i) | 
| 2836 | { | 
| 2837 | 	bool lock = locked_by == 0 || locked_by != MT_getpid(); | 
| 2838 |  | 
| 2839 | 	return getBBPdescriptor(i, lock); | 
| 2840 | } | 
| 2841 |  | 
| 2842 | /* | 
| 2843 |  * In BBPsave executes unlocked; it just marks the BBP_status of the | 
| 2844 |  * BAT to BBPsaving, so others that want to save or unload this BAT | 
| 2845 |  * must spin lock on the BBP_status field. | 
| 2846 |  */ | 
| 2847 | gdk_return | 
| 2848 | BBPsave(BAT *b) | 
| 2849 | { | 
| 2850 | 	bool lock = locked_by == 0 || locked_by != MT_getpid(); | 
| 2851 | 	bat bid = b->batCacheid; | 
| 2852 | 	gdk_return ret = GDK_SUCCEED; | 
| 2853 |  | 
| 2854 | 	if (BBP_lrefs(bid) == 0 || isVIEW(b) || !BATdirty(b)) | 
| 2855 | 		/* do nothing */ | 
| 2856 | 		return GDK_SUCCEED; | 
| 2857 |  | 
| 2858 | 	if (lock) | 
| 2859 | 		MT_lock_set(&GDKswapLock(bid)); | 
| 2860 |  | 
| 2861 | 	if (BBP_status(bid) & BBPSAVING) { | 
| 2862 | 		/* wait until save in other thread completes */ | 
| 2863 | 		if (lock) | 
| 2864 | 			MT_lock_unset(&GDKswapLock(bid)); | 
| 2865 | 		BBPspin(bid, "BBPsave" , BBPSAVING); | 
| 2866 | 	} else { | 
| 2867 | 		/* save it */ | 
| 2868 | 		unsigned flags = BBPSAVING; | 
| 2869 |  | 
| 2870 | 		if (DELTAdirty(b)) { | 
| 2871 | 			flags |= BBPSWAPPED; | 
| 2872 | 		} | 
| 2873 | 		if (b->batTransient) { | 
| 2874 | 			flags |= BBPTMP; | 
| 2875 | 		} | 
| 2876 | 		BBP_status_on(bid, flags, "BBPsave" ); | 
| 2877 | 		if (lock) | 
| 2878 | 			MT_lock_unset(&GDKswapLock(bid)); | 
| 2879 |  | 
| 2880 | 		IODEBUG fprintf(stderr, "#save %s\n" , BATgetId(b)); | 
| 2881 |  | 
| 2882 | 		/* do the time-consuming work unlocked */ | 
| 2883 | 		if (BBP_status(bid) & BBPEXISTING) | 
| 2884 | 			ret = BBPbackup(b, false); | 
| 2885 | 		if (ret == GDK_SUCCEED) { | 
| 2886 | 			ret = BATsave(b); | 
| 2887 | 		} | 
| 2888 | 		/* clearing bits can be done without the lock */ | 
| 2889 | 		BBP_status_off(bid, BBPSAVING, "BBPsave" ); | 
| 2890 | 	} | 
| 2891 | 	return ret; | 
| 2892 | } | 
| 2893 |  | 
| 2894 | /* | 
| 2895 |  * TODO merge BBPfree with BATfree? Its function is to prepare a BAT | 
| 2896 |  * for being unloaded (or even destroyed, if the BAT is not | 
| 2897 |  * persistent). | 
| 2898 |  */ | 
| 2899 | static void | 
| 2900 | BBPdestroy(BAT *b) | 
| 2901 | { | 
| 2902 | 	bat tp = b->theap.parentid; | 
| 2903 | 	bat vtp = VIEWvtparent(b); | 
| 2904 |  | 
| 2905 | 	if (isVIEW(b)) {	/* a physical view */ | 
| 2906 | 		VIEWdestroy(b); | 
| 2907 | 	} else { | 
| 2908 | 		/* bats that get destroyed must unfix their atoms */ | 
| 2909 | 		int (*tunfix) (const void *) = BATatoms[b->ttype].atomUnfix; | 
| 2910 | 		BUN p, q; | 
| 2911 | 		BATiter bi = bat_iterator(b); | 
| 2912 |  | 
| 2913 | 		assert(b->batSharecnt == 0); | 
| 2914 | 		if (tunfix) { | 
| 2915 | 			BATloop(b, p, q) { | 
| 2916 | 				(*tunfix) (BUNtail(bi, p)); | 
| 2917 | 			} | 
| 2918 | 		} | 
| 2919 | 		BATdelete(b);	/* handles persistent case also (file deletes) */ | 
| 2920 | 	} | 
| 2921 | 	BBPclear(b->batCacheid);	/* if destroyed; de-register from BBP */ | 
| 2922 |  | 
| 2923 | 	/* parent released when completely done with child */ | 
| 2924 | 	if (tp) | 
| 2925 | 		GDKunshare(tp); | 
| 2926 | 	if (vtp) | 
| 2927 | 		GDKunshare(vtp); | 
| 2928 | } | 
| 2929 |  | 
| 2930 | static gdk_return | 
| 2931 | BBPfree(BAT *b, const char *calledFrom) | 
| 2932 | { | 
| 2933 | 	bat bid = b->batCacheid, tp = VIEWtparent(b), vtp = VIEWvtparent(b); | 
| 2934 | 	gdk_return ret; | 
| 2935 |  | 
| 2936 | 	assert(bid > 0); | 
| 2937 | 	assert(BBPswappable(b)); | 
| 2938 | 	(void) calledFrom; | 
| 2939 |  | 
| 2940 | 	BBP_unload_inc(); | 
| 2941 | 	/* write dirty BATs before being unloaded */ | 
| 2942 | 	ret = BBPsave(b); | 
| 2943 | 	if (ret == GDK_SUCCEED) { | 
| 2944 | 		if (isVIEW(b)) {	/* physical view */ | 
| 2945 | 			VIEWdestroy(b); | 
| 2946 | 		} else { | 
| 2947 | 			if (BBP_cache(bid)) | 
| 2948 | 				BATfree(b);	/* free memory */ | 
| 2949 | 		} | 
| 2950 | 		BBPuncacheit(bid, false); | 
| 2951 | 	} | 
| 2952 | 	/* clearing bits can be done without the lock */ | 
| 2953 | 	BATDEBUG { | 
| 2954 | 		fprintf(stderr, "#BBPfree turn off unloading %d\n" , bid); | 
| 2955 | 	} | 
| 2956 | 	BBP_status_off(bid, BBPUNLOADING, calledFrom); | 
| 2957 | 	BBP_unload_dec(); | 
| 2958 |  | 
| 2959 | 	/* parent released when completely done with child */ | 
| 2960 | 	if (ret == GDK_SUCCEED && tp) | 
| 2961 | 		GDKunshare(tp); | 
| 2962 | 	if (ret == GDK_SUCCEED && vtp) | 
| 2963 | 		GDKunshare(vtp); | 
| 2964 | 	return ret; | 
| 2965 | } | 
| 2966 |  | 
| 2967 | /* | 
| 2968 |  * BBPquickdesc loads a BAT descriptor without loading the entire BAT, | 
| 2969 |  * of which the result be used only for a *limited* number of | 
| 2970 |  * purposes. Specifically, during the global sync/commit, we do not | 
| 2971 |  * want to load any BATs that are not already loaded, both because | 
| 2972 |  * this costs performance, and because getting into memory shortage | 
| 2973 |  * during a commit is extremely dangerous. Loading a BAT tends not to | 
| 2974 |  * be required, since the commit actions mostly involve moving some | 
| 2975 |  * pointers in the BAT descriptor. However, some column types do | 
| 2976 |  * require loading the full bat. This is tested by the complexatom() | 
| 2977 |  * routine. Such columns are those of which the type has a fix/unfix | 
| 2978 |  * method, or those that have HeapDelete methods. The HeapDelete | 
| 2979 |  * actions are not always required and therefore the BBPquickdesc is | 
| 2980 |  * parametrized. | 
| 2981 |  */ | 
| 2982 | static bool | 
| 2983 | complexatom(int t, bool delaccess) | 
| 2984 | { | 
| 2985 | 	if (t >= 0 && (BATatoms[t].atomFix || (delaccess && BATatoms[t].atomDel))) { | 
| 2986 | 		return true; | 
| 2987 | 	} | 
| 2988 | 	return false; | 
| 2989 | } | 
| 2990 |  | 
| 2991 | BAT * | 
| 2992 | BBPquickdesc(bat bid, bool delaccess) | 
| 2993 | { | 
| 2994 | 	BAT *b; | 
| 2995 |  | 
| 2996 | 	if (is_bat_nil(bid)) | 
| 2997 | 		return NULL; | 
| 2998 | 	if (bid < 0) { | 
| 2999 | 		GDKerror("BBPquickdesc: called with negative batid.\n" ); | 
| 3000 | 		assert(0); | 
| 3001 | 		return NULL; | 
| 3002 | 	} | 
| 3003 | 	if ((b = BBP_cache(bid)) != NULL) | 
| 3004 | 		return b;	/* already cached */ | 
| 3005 | 	b = (BAT *) BBPgetdesc(bid); | 
| 3006 | 	if (b == NULL || | 
| 3007 | 	    complexatom(b->ttype, delaccess)) { | 
| 3008 | 		b = BATload_intern(bid, true); | 
| 3009 | 	} | 
| 3010 | 	return b; | 
| 3011 | } | 
| 3012 |  | 
| 3013 | /* | 
| 3014 |  * @+ Global Commit | 
| 3015 |  */ | 
| 3016 | static BAT * | 
| 3017 | dirty_bat(bat *i, bool subcommit) | 
| 3018 | { | 
| 3019 | 	if (BBPvalid(*i)) { | 
| 3020 | 		BAT *b; | 
| 3021 | 		BBPspin(*i, "dirty_bat" , BBPSAVING); | 
| 3022 | 		b = BBP_cache(*i); | 
| 3023 | 		if (b != NULL) { | 
| 3024 | 			if ((BBP_status(*i) & BBPNEW) && | 
| 3025 | 			    BATcheckmodes(b, false) != GDK_SUCCEED) /* check mmap modes */ | 
| 3026 | 				*i = 0;	/* error */ | 
| 3027 | 			if ((BBP_status(*i) & BBPPERSISTENT) && | 
| 3028 | 			    (subcommit || BATdirty(b))) | 
| 3029 | 				return b;	/* the bat is loaded, persistent and dirty */ | 
| 3030 | 		} else if (BBP_status(*i) & BBPSWAPPED) { | 
| 3031 | 			b = (BAT *) BBPquickdesc(*i, true); | 
| 3032 | 			if (b && (subcommit || b->batDirtydesc)) | 
| 3033 | 				return b;	/* only the desc is loaded & dirty */ | 
| 3034 | 		} | 
| 3035 | 	} | 
| 3036 | 	return NULL; | 
| 3037 | } | 
| 3038 |  | 
| 3039 | /* | 
| 3040 |  * @- backup-bat | 
| 3041 |  * Backup-bat moves all files of a BAT to a backup directory. Only | 
| 3042 |  * after this succeeds, it may be saved. If some failure occurs | 
| 3043 |  * halfway saving, we can thus always roll back. | 
| 3044 |  */ | 
| 3045 | static gdk_return | 
| 3046 | file_move(int farmid, const char *srcdir, const char *dstdir, const char *name, const char *ext) | 
| 3047 | { | 
| 3048 | 	if (GDKmove(farmid, srcdir, name, ext, dstdir, name, ext) == GDK_SUCCEED) { | 
| 3049 | 		return GDK_SUCCEED; | 
| 3050 | 	} else { | 
| 3051 | 		char *path; | 
| 3052 | 		struct stat st; | 
| 3053 |  | 
| 3054 | 		path = GDKfilepath(farmid, srcdir, name, ext); | 
| 3055 | 		if (path == NULL) | 
| 3056 | 			return GDK_FAIL; | 
| 3057 | 		if (stat(path, &st)) { | 
| 3058 | 			/* source file does not exist; the best | 
| 3059 | 			 * recovery is to give an error but continue | 
| 3060 | 			 * by considering the BAT as not saved; making | 
| 3061 | 			 * sure that this time it does get saved. | 
| 3062 | 			 */ | 
| 3063 | 			GDKsyserror("file_move: cannot stat %s\n" , path); | 
| 3064 | 			GDKfree(path); | 
| 3065 | 			return GDK_FAIL;	/* fishy, but not fatal */ | 
| 3066 | 		} | 
| 3067 | 		GDKfree(path); | 
| 3068 | 	} | 
| 3069 | 	return GDK_FAIL; | 
| 3070 | } | 
| 3071 |  | 
| 3072 | /* returns true if the file exists */ | 
| 3073 | static bool | 
| 3074 | file_exists(int farmid, const char *dir, const char *name, const char *ext) | 
| 3075 | { | 
| 3076 | 	char *path; | 
| 3077 | 	struct stat st; | 
| 3078 | 	int ret = -1; | 
| 3079 |  | 
| 3080 | 	path = GDKfilepath(farmid, dir, name, ext); | 
| 3081 | 	if (path) { | 
| 3082 | 		ret = stat(path, &st); | 
| 3083 | 		IODEBUG fprintf(stderr, "#stat(%s) = %d\n" , path, ret); | 
| 3084 | 		GDKfree(path); | 
| 3085 | 	} | 
| 3086 | 	return (ret == 0); | 
| 3087 | } | 
| 3088 |  | 
| 3089 | static gdk_return | 
| 3090 | heap_move(Heap *hp, const char *srcdir, const char *dstdir, const char *nme, const char *ext) | 
| 3091 | { | 
| 3092 | 	/* see doc at BATsetaccess()/gdk_bat.c for an expose on mmap | 
| 3093 | 	 * heap modes */ | 
| 3094 | 	if (file_exists(hp->farmid, dstdir, nme, ext)) { | 
| 3095 | 		/* dont overwrite heap with the committed state | 
| 3096 | 		 * already in dstdir */ | 
| 3097 | 		return GDK_SUCCEED; | 
| 3098 | 	} else if (hp->newstorage == STORE_PRIV && | 
| 3099 | 		   !file_exists(hp->farmid, srcdir, nme, ext)) { | 
| 3100 |  | 
| 3101 | 		/* In order to prevent half-saved X.new files | 
| 3102 | 		 * surviving a recover we create a dummy file in the | 
| 3103 | 		 * BACKUP(dstdir) whose presence will trigger | 
| 3104 | 		 * BBPrecover to remove them.  Thus, X will prevail | 
| 3105 | 		 * where it otherwise wouldn't have.  If X already has | 
| 3106 | 		 * a saved X.new, that one is backed up as normal. | 
| 3107 | 		 */ | 
| 3108 |  | 
| 3109 | 		FILE *fp; | 
| 3110 | 		long_str kill_ext; | 
| 3111 | 		char *path; | 
| 3112 |  | 
| 3113 | 		strconcat_len(kill_ext, sizeof(kill_ext), ext, ".kill" , NULL); | 
| 3114 | 		path = GDKfilepath(hp->farmid, dstdir, nme, kill_ext); | 
| 3115 | 		if (path == NULL) | 
| 3116 | 			return GDK_FAIL; | 
| 3117 | 		fp = fopen(path, "w" ); | 
| 3118 | 		if (fp == NULL) | 
| 3119 | 			GDKsyserror("heap_move: cannot open file %s\n" , path); | 
| 3120 | 		IODEBUG fprintf(stderr, "#open %s = %d\n" , path, fp ? 0 : -1); | 
| 3121 | 		GDKfree(path); | 
| 3122 |  | 
| 3123 | 		if (fp != NULL) { | 
| 3124 | 			fclose(fp); | 
| 3125 | 			return GDK_SUCCEED; | 
| 3126 | 		} else { | 
| 3127 | 			return GDK_FAIL; | 
| 3128 | 		} | 
| 3129 | 	} | 
| 3130 | 	return file_move(hp->farmid, srcdir, dstdir, nme, ext); | 
| 3131 | } | 
| 3132 |  | 
| 3133 | /* | 
| 3134 |  * @- BBPprepare | 
| 3135 |  * | 
| 3136 |  * this routine makes sure there is a BAKDIR/, and initiates one if | 
| 3137 |  * not.  For subcommits, it does the same with SUBDIR. | 
| 3138 |  * | 
| 3139 |  * It is now locked, to get proper file counters, and also to prevent | 
| 3140 |  * concurrent BBPrecovers, etc. | 
| 3141 |  * | 
| 3142 |  * backup_dir == 0 => no backup BBP.dir | 
| 3143 |  * backup_dir == 1 => BBP.dir saved in BACKUP/ | 
| 3144 |  * backup_dir == 2 => BBP.dir saved in SUBCOMMIT/ | 
| 3145 |  */ | 
| 3146 |  | 
| 3147 | static gdk_return | 
| 3148 | BBPprepare(bool subcommit) | 
| 3149 | { | 
| 3150 | 	bool start_subcommit; | 
| 3151 | 	int set = 1 + subcommit; | 
| 3152 | 	str bakdirpath, subdirpath; | 
| 3153 | 	gdk_return ret = GDK_SUCCEED; | 
| 3154 |  | 
| 3155 | 	if(!(bakdirpath = GDKfilepath(0, NULL, BAKDIR, NULL))) | 
| 3156 | 		return GDK_FAIL; | 
| 3157 | 	if(!(subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL))) { | 
| 3158 | 		GDKfree(bakdirpath); | 
| 3159 | 		return GDK_FAIL; | 
| 3160 | 	} | 
| 3161 |  | 
| 3162 | 	/* tmLock is only used here, helds usually very shortly just | 
| 3163 | 	 * to protect the file counters */ | 
| 3164 | 	MT_lock_set(&GDKtmLock); | 
| 3165 |  | 
| 3166 | 	start_subcommit = (subcommit && backup_subdir == 0); | 
| 3167 | 	if (start_subcommit) { | 
| 3168 | 		/* starting a subcommit. Make sure SUBDIR and DELDIR | 
| 3169 | 		 * are clean */ | 
| 3170 | 		ret = BBPrecover_subdir(); | 
| 3171 | 	} | 
| 3172 | 	if (backup_files == 0) { | 
| 3173 | 		backup_dir = 0; | 
| 3174 | 		ret = BBPrecover(0); | 
| 3175 | 		if (ret == GDK_SUCCEED) { | 
| 3176 | 			if (mkdir(bakdirpath, MONETDB_DIRMODE) < 0 && errno != EEXIST) { | 
| 3177 | 				GDKsyserror("BBPprepare: cannot create directory %s\n" , bakdirpath); | 
| 3178 | 				ret = GDK_FAIL; | 
| 3179 | 			} | 
| 3180 | 			/* if BAKDIR already exists, don't signal error */ | 
| 3181 | 			IODEBUG fprintf(stderr, "#mkdir %s = %d\n" , bakdirpath, (int) ret); | 
| 3182 | 		} | 
| 3183 | 	} | 
| 3184 | 	if (ret == GDK_SUCCEED && start_subcommit) { | 
| 3185 | 		/* make a new SUBDIR (subdir of BAKDIR) */ | 
| 3186 | 		if (mkdir(subdirpath, MONETDB_DIRMODE) < 0) { | 
| 3187 | 			GDKsyserror("BBPprepare: cannot create directory %s\n" , subdirpath); | 
| 3188 | 			ret = GDK_FAIL; | 
| 3189 | 		} | 
| 3190 | 		IODEBUG fprintf(stderr, "#mkdir %s = %d\n" , subdirpath, (int) ret); | 
| 3191 | 	} | 
| 3192 | 	if (ret == GDK_SUCCEED && backup_dir != set) { | 
| 3193 | 		/* a valid backup dir *must* at least contain BBP.dir */ | 
| 3194 | 		if ((ret = GDKmove(0, backup_dir ? BAKDIR : BATDIR, "BBP" , "dir" , subcommit ? SUBDIR : BAKDIR, "BBP" , "dir" )) == GDK_SUCCEED) { | 
| 3195 | 			backup_dir = set; | 
| 3196 | 		} | 
| 3197 | 	} | 
| 3198 | 	/* increase counters */ | 
| 3199 | 	if (ret == GDK_SUCCEED) { | 
| 3200 | 		backup_subdir += subcommit; | 
| 3201 | 		backup_files++; | 
| 3202 | 	} | 
| 3203 | 	MT_lock_unset(&GDKtmLock); | 
| 3204 | 	GDKfree(bakdirpath); | 
| 3205 | 	GDKfree(subdirpath); | 
| 3206 | 	return ret; | 
| 3207 | } | 
| 3208 |  | 
| 3209 | static gdk_return | 
| 3210 | do_backup(const char *srcdir, const char *nme, const char *ext, | 
| 3211 | 	  Heap *h, bool dirty, bool subcommit) | 
| 3212 | { | 
| 3213 | 	gdk_return ret = GDK_SUCCEED; | 
| 3214 |  | 
| 3215 | 	 /* direct mmap is unprotected (readonly usage, or has WAL | 
| 3216 | 	  * protection); however, if we're backing up for subcommit | 
| 3217 | 	  * and a backup already exists in the main backup directory | 
| 3218 | 	  * (see GDKupgradevarheap), move the file */ | 
| 3219 | 	if (subcommit && file_exists(h->farmid, BAKDIR, nme, ext)) { | 
| 3220 | 		if (file_move(h->farmid, BAKDIR, SUBDIR, nme, ext) != GDK_SUCCEED) | 
| 3221 | 			return GDK_FAIL; | 
| 3222 | 	} | 
| 3223 | 	if (h->storage != STORE_MMAP) { | 
| 3224 | 		/* STORE_PRIV saves into X.new files. Two cases could | 
| 3225 | 		 * happen. The first is when a valid X.new exists | 
| 3226 | 		 * because of an access change or a previous | 
| 3227 | 		 * commit. This X.new should be backed up as | 
| 3228 | 		 * usual. The second case is when X.new doesn't | 
| 3229 | 		 * exist. In that case we could have half written | 
| 3230 | 		 * X.new files (after a crash). To protect against | 
| 3231 | 		 * these we write X.new.kill files in the backup | 
| 3232 | 		 * directory (see heap_move). */ | 
| 3233 | 		char extnew[16]; | 
| 3234 | 		gdk_return mvret = GDK_SUCCEED; | 
| 3235 |  | 
| 3236 | 		strconcat_len(extnew, sizeof(extnew), ext, ".new" , NULL); | 
| 3237 | 		if (dirty && | 
| 3238 | 		    !file_exists(h->farmid, BAKDIR, nme, extnew) && | 
| 3239 | 		    !file_exists(h->farmid, BAKDIR, nme, ext)) { | 
| 3240 | 			/* if the heap is dirty and there is no heap | 
| 3241 | 			 * file (with or without .new extension) in | 
| 3242 | 			 * the BAKDIR, move the heap (preferably with | 
| 3243 | 			 * .new extension) to the correct backup | 
| 3244 | 			 * directory */ | 
| 3245 | 			if (file_exists(h->farmid, srcdir, nme, extnew)) | 
| 3246 | 				mvret = heap_move(h, srcdir, | 
| 3247 | 						  subcommit ? SUBDIR : BAKDIR, | 
| 3248 | 						  nme, extnew); | 
| 3249 | 			else | 
| 3250 | 				mvret = heap_move(h, srcdir, | 
| 3251 | 						  subcommit ? SUBDIR : BAKDIR, | 
| 3252 | 						  nme, ext); | 
| 3253 | 		} else if (subcommit) { | 
| 3254 | 			/* if subcommit, wqe may need to move an | 
| 3255 | 			 * already made backup from BAKDIR to | 
| 3256 | 			 * SUBSIR */ | 
| 3257 | 			if (file_exists(h->farmid, BAKDIR, nme, extnew)) | 
| 3258 | 				mvret = file_move(h->farmid, BAKDIR, SUBDIR, nme, extnew); | 
| 3259 | 			else if (file_exists(h->farmid, BAKDIR, nme, ext)) | 
| 3260 | 				mvret = file_move(h->farmid, BAKDIR, SUBDIR, nme, ext); | 
| 3261 | 		} | 
| 3262 | 		/* there is a situation where the move may fail, | 
| 3263 | 		 * namely if this heap was not supposed to be existing | 
| 3264 | 		 * before, i.e. after a BATmaterialize on a persistent | 
| 3265 | 		 * bat as a workaround, do not complain about move | 
| 3266 | 		 * failure if the source file is nonexistent | 
| 3267 | 		 */ | 
| 3268 | 		if (mvret != GDK_SUCCEED && file_exists(h->farmid, srcdir, nme, ext)) { | 
| 3269 | 			ret = GDK_FAIL; | 
| 3270 | 		} | 
| 3271 | 		if (subcommit && | 
| 3272 | 		    (h->storage == STORE_PRIV || h->newstorage == STORE_PRIV)) { | 
| 3273 | 			long_str kill_ext; | 
| 3274 |  | 
| 3275 | 			strconcat_len(kill_ext, sizeof(kill_ext), | 
| 3276 | 				      ext, ".new.kill" , NULL); | 
| 3277 | 			if (file_exists(h->farmid, BAKDIR, nme, kill_ext) && | 
| 3278 | 			    file_move(h->farmid, BAKDIR, SUBDIR, nme, kill_ext) != GDK_SUCCEED) { | 
| 3279 | 				ret = GDK_FAIL; | 
| 3280 | 			} | 
| 3281 | 		} | 
| 3282 | 	} | 
| 3283 | 	return ret; | 
| 3284 | } | 
| 3285 |  | 
| 3286 | static gdk_return | 
| 3287 | BBPbackup(BAT *b, bool subcommit) | 
| 3288 | { | 
| 3289 | 	char *srcdir; | 
| 3290 | 	long_str nme; | 
| 3291 | 	const char *s = BBP_physical(b->batCacheid); | 
| 3292 | 	size_t slen; | 
| 3293 |  | 
| 3294 | 	if (BBPprepare(subcommit) != GDK_SUCCEED) { | 
| 3295 | 		return GDK_FAIL; | 
| 3296 | 	} | 
| 3297 | 	if (!b->batCopiedtodisk || b->batTransient) { | 
| 3298 | 		return GDK_SUCCEED; | 
| 3299 | 	} | 
| 3300 | 	/* determine location dir and physical suffix */ | 
| 3301 | 	if (!(srcdir = GDKfilepath(NOFARM, BATDIR, s, NULL))) | 
| 3302 | 		goto fail; | 
| 3303 | 	s = strrchr(srcdir, DIR_SEP); | 
| 3304 | 	if (!s) | 
| 3305 | 		goto fail; | 
| 3306 |  | 
| 3307 | 	slen = strlen(++s); | 
| 3308 | 	if (slen >= sizeof(nme)) | 
| 3309 | 		goto fail; | 
| 3310 | 	memcpy(nme, s, slen + 1); | 
| 3311 | 	srcdir[s - srcdir] = 0; | 
| 3312 |  | 
| 3313 | 	if (b->ttype != TYPE_void && | 
| 3314 | 	    do_backup(srcdir, nme, "tail" , &b->theap, | 
| 3315 | 		      b->batDirtydesc || b->theap.dirty, | 
| 3316 | 		      subcommit) != GDK_SUCCEED) | 
| 3317 | 		goto fail; | 
| 3318 | 	if (b->tvheap && | 
| 3319 | 	    do_backup(srcdir, nme, "theap" , b->tvheap, | 
| 3320 | 		      b->batDirtydesc || b->tvheap->dirty, | 
| 3321 | 		      subcommit) != GDK_SUCCEED) | 
| 3322 | 		goto fail; | 
| 3323 | 	GDKfree(srcdir); | 
| 3324 | 	return GDK_SUCCEED; | 
| 3325 | fail: | 
| 3326 | 	if(srcdir) | 
| 3327 | 		GDKfree(srcdir); | 
| 3328 | 	return GDK_FAIL; | 
| 3329 | } | 
| 3330 |  | 
| 3331 | /* | 
| 3332 |  * @+ Atomic Write | 
| 3333 |  * The atomic BBPsync() function first safeguards the old images of | 
| 3334 |  * all files to be written in BAKDIR. It then saves all files. If that | 
| 3335 |  * succeeds fully, BAKDIR is renamed to DELDIR. The rename is | 
| 3336 |  * considered an atomic action. If it succeeds, the DELDIR is removed. | 
| 3337 |  * If something fails, the pre-sync status can be obtained by moving | 
| 3338 |  * back all backed up files; this is done by BBPrecover(). | 
| 3339 |  * | 
| 3340 |  * The BBP.dir is also moved into the BAKDIR. | 
| 3341 |  */ | 
| 3342 | gdk_return | 
| 3343 | BBPsync(int cnt, bat *subcommit) | 
| 3344 | { | 
| 3345 | 	gdk_return ret = GDK_SUCCEED; | 
| 3346 | 	int t0 = 0, t1 = 0; | 
| 3347 | 	str bakdir, deldir; | 
| 3348 |  | 
| 3349 | 	if(!(bakdir = GDKfilepath(0, NULL, subcommit ? SUBDIR : BAKDIR, NULL))) | 
| 3350 | 		return GDK_FAIL; | 
| 3351 | 	if(!(deldir = GDKfilepath(0, NULL, DELDIR, NULL))) { | 
| 3352 | 		GDKfree(bakdir); | 
| 3353 | 		return GDK_FAIL; | 
| 3354 | 	} | 
| 3355 |  | 
| 3356 | 	PERFDEBUG t0 = t1 = GDKms(); | 
| 3357 |  | 
| 3358 | 	ret = BBPprepare(subcommit != NULL); | 
| 3359 |  | 
| 3360 | 	/* PHASE 1: safeguard everything in a backup-dir */ | 
| 3361 | 	if (ret == GDK_SUCCEED) { | 
| 3362 | 		int idx = 0; | 
| 3363 |  | 
| 3364 | 		while (++idx < cnt) { | 
| 3365 | 			bat i = subcommit ? subcommit[idx] : idx; | 
| 3366 | 			BAT *b = dirty_bat(&i, subcommit != NULL); | 
| 3367 | 			if (i <= 0) | 
| 3368 | 				break; | 
| 3369 | 			if (BBP_status(i) & BBPEXISTING) { | 
| 3370 | 				if (b != NULL && BBPbackup(b, subcommit != NULL) != GDK_SUCCEED) | 
| 3371 | 					break; | 
| 3372 | 			} else if (subcommit && (b = BBP_desc(i)) && BBP_status(i) & BBPDELETED) { | 
| 3373 | 				char o[10]; | 
| 3374 | 				char *f; | 
| 3375 | 				snprintf(o, sizeof(o), "%o" , (unsigned) b->batCacheid); | 
| 3376 | 				f = GDKfilepath(b->theap.farmid, BAKDIR, o, "tail" ); | 
| 3377 | 				if (access(f, F_OK) == 0) | 
| 3378 | 					file_move(b->theap.farmid, BAKDIR, SUBDIR, o, "tail" ); | 
| 3379 | 				GDKfree(f); | 
| 3380 | 				f = GDKfilepath(b->theap.farmid, BAKDIR, o, "theap" ); | 
| 3381 | 				if (access(f, F_OK) == 0) | 
| 3382 | 					file_move(b->theap.farmid, BAKDIR, SUBDIR, o, "theap" ); | 
| 3383 | 				GDKfree(f); | 
| 3384 | 			} | 
| 3385 | 		} | 
| 3386 | 		if (idx < cnt) | 
| 3387 | 			ret = GDK_FAIL; | 
| 3388 | 	} | 
| 3389 | 	PERFDEBUG fprintf(stderr, "#BBPsync (move time %d) %d files\n" , (t1 = GDKms()) - t0, backup_files); | 
| 3390 |  | 
| 3391 | 	/* PHASE 2: save the repository */ | 
| 3392 | 	if (ret == GDK_SUCCEED) { | 
| 3393 | 		int idx = 0; | 
| 3394 |  | 
| 3395 | 		while (++idx < cnt) { | 
| 3396 | 			bat i = subcommit ? subcommit[idx] : idx; | 
| 3397 |  | 
| 3398 | 			if (BBP_status(i) & BBPPERSISTENT) { | 
| 3399 | 				BAT *b = dirty_bat(&i, subcommit != NULL); | 
| 3400 | 				if (i <= 0) | 
| 3401 | 					break; | 
| 3402 | 				if (b != NULL && BATsave(b) != GDK_SUCCEED) | 
| 3403 | 					break;	/* write error */ | 
| 3404 | 			} | 
| 3405 | 		} | 
| 3406 | 		if (idx < cnt) | 
| 3407 | 			ret = GDK_FAIL; | 
| 3408 | 	} | 
| 3409 |  | 
| 3410 | 	PERFDEBUG fprintf(stderr, "#BBPsync (write time %d)\n" , (t0 = GDKms()) - t1); | 
| 3411 |  | 
| 3412 | 	if (ret == GDK_SUCCEED) { | 
| 3413 | 		ret = BBPdir(cnt, subcommit); | 
| 3414 | 	} | 
| 3415 |  | 
| 3416 | 	PERFDEBUG fprintf(stderr, "#BBPsync (dir time %d) %d bats\n" , (t1 = GDKms()) - t0, (bat) ATOMIC_GET(&BBPsize)); | 
| 3417 |  | 
| 3418 | 	if (ret == GDK_SUCCEED) { | 
| 3419 | 		/* atomic switchover */ | 
| 3420 | 		/* this is the big one: this call determines | 
| 3421 | 		 * whether the operation of this function | 
| 3422 | 		 * succeeded, so no changing of ret after this | 
| 3423 | 		 * call anymore */ | 
| 3424 |  | 
| 3425 | 		if (rename(bakdir, deldir) < 0) | 
| 3426 | 			ret = GDK_FAIL; | 
| 3427 | 		if (ret != GDK_SUCCEED && | 
| 3428 | 		    GDKremovedir(0, DELDIR) == GDK_SUCCEED && /* maybe there was an old deldir */ | 
| 3429 | 		    rename(bakdir, deldir) < 0) | 
| 3430 | 			ret = GDK_FAIL; | 
| 3431 | 		if (ret != GDK_SUCCEED) | 
| 3432 | 			GDKsyserror("BBPsync: rename(%s,%s) failed.\n" , bakdir, deldir); | 
| 3433 | 		IODEBUG fprintf(stderr, "#BBPsync: rename %s %s = %d\n" , bakdir, deldir, (int) ret); | 
| 3434 | 	} | 
| 3435 |  | 
| 3436 | 	/* AFTERMATH */ | 
| 3437 | 	if (ret == GDK_SUCCEED) { | 
| 3438 | 		backup_files = subcommit ? (backup_files - backup_subdir) : 0; | 
| 3439 | 		backup_dir = backup_subdir = 0; | 
| 3440 | 		if (GDKremovedir(0, DELDIR) != GDK_SUCCEED) | 
| 3441 | 			fprintf(stderr, "#BBPsync: cannot remove directory %s\n" , DELDIR); | 
| 3442 | 		(void) BBPprepare(false); /* (try to) remove DELDIR and set up new BAKDIR */ | 
| 3443 | 		if (backup_files > 1) { | 
| 3444 | 			PERFDEBUG fprintf(stderr, "#BBPsync (backup_files %d > 1)\n" , backup_files); | 
| 3445 | 			backup_files = 1; | 
| 3446 | 		} | 
| 3447 | 	} | 
| 3448 | 	PERFDEBUG fprintf(stderr, "#BBPsync (ready time %d)\n" , (t0 = GDKms()) - t1); | 
| 3449 | 	GDKfree(bakdir); | 
| 3450 | 	GDKfree(deldir); | 
| 3451 | 	return ret; | 
| 3452 | } | 
| 3453 |  | 
| 3454 | /* | 
| 3455 |  * Recovery just moves all files back to their original location. this | 
| 3456 |  * is an incremental process: if something fails, just stop with still | 
| 3457 |  * files left for moving in BACKUP/.  The recovery process can resume | 
| 3458 |  * later with the left over files. | 
| 3459 |  */ | 
| 3460 | static gdk_return | 
| 3461 | force_move(int farmid, const char *srcdir, const char *dstdir, const char *name) | 
| 3462 | { | 
| 3463 | 	const char *p; | 
| 3464 | 	char *dstpath, *killfile; | 
| 3465 | 	gdk_return ret = GDK_SUCCEED; | 
| 3466 |  | 
| 3467 | 	if ((p = strrchr(name, '.')) != NULL && strcmp(p, ".kill" ) == 0) { | 
| 3468 | 		/* Found a X.new.kill file, ie remove the X.new file */ | 
| 3469 | 		ptrdiff_t len = p - name; | 
| 3470 | 		long_str srcpath; | 
| 3471 |  | 
| 3472 | 		strncpy(srcpath, name, len); | 
| 3473 | 		srcpath[len] = '\0'; | 
| 3474 | 		if(!(dstpath = GDKfilepath(farmid, dstdir, srcpath, NULL))) { | 
| 3475 | 			GDKsyserror("force_move: malloc fail\n" ); | 
| 3476 | 			return GDK_FAIL; | 
| 3477 | 		} | 
| 3478 |  | 
| 3479 | 		/* step 1: remove the X.new file that is going to be | 
| 3480 | 		 * overridden by X */ | 
| 3481 | 		if (remove(dstpath) != 0 && errno != ENOENT) { | 
| 3482 | 			/* if it exists and cannot be removed, all | 
| 3483 | 			 * this is going to fail */ | 
| 3484 | 			GDKsyserror("force_move: remove(%s)\n" , dstpath); | 
| 3485 | 			GDKfree(dstpath); | 
| 3486 | 			return GDK_FAIL; | 
| 3487 | 		} | 
| 3488 | 		GDKfree(dstpath); | 
| 3489 |  | 
| 3490 | 		/* step 2: now remove the .kill file. This one is | 
| 3491 | 		 * crucial, otherwise we'll never finish recovering */ | 
| 3492 | 		if(!(killfile = GDKfilepath(farmid, srcdir, name, NULL))) { | 
| 3493 | 			GDKsyserror("force_move: malloc fail\n" ); | 
| 3494 | 			return GDK_FAIL; | 
| 3495 | 		} | 
| 3496 | 		if (remove(killfile) != 0) { | 
| 3497 | 			ret = GDK_FAIL; | 
| 3498 | 			GDKsyserror("force_move: remove(%s)\n" , killfile); | 
| 3499 | 		} | 
| 3500 | 		GDKfree(killfile); | 
| 3501 | 		return ret; | 
| 3502 | 	} | 
| 3503 | 	/* try to rename it */ | 
| 3504 | 	ret = GDKmove(farmid, srcdir, name, NULL, dstdir, name, NULL); | 
| 3505 |  | 
| 3506 | 	if (ret != GDK_SUCCEED) { | 
| 3507 | 		char *srcpath; | 
| 3508 |  | 
| 3509 | 		/* two legal possible causes: file exists or dir | 
| 3510 | 		 * doesn't exist */ | 
| 3511 | 		if(!(dstpath = GDKfilepath(farmid, dstdir, name, NULL))) | 
| 3512 | 			return GDK_FAIL; | 
| 3513 | 		if(!(srcpath = GDKfilepath(farmid, srcdir, name, NULL))) { | 
| 3514 | 			GDKfree(dstpath); | 
| 3515 | 			return GDK_FAIL; | 
| 3516 | 		} | 
| 3517 | 		if (remove(dstpath) != 0)	/* clear destination */ | 
| 3518 | 			ret = GDK_FAIL; | 
| 3519 | 		IODEBUG fprintf(stderr, "#remove %s = %d\n" , dstpath, (int) ret); | 
| 3520 |  | 
| 3521 | 		(void) GDKcreatedir(dstdir); /* if fails, move will fail */ | 
| 3522 | 		ret = GDKmove(farmid, srcdir, name, NULL, dstdir, name, NULL); | 
| 3523 | 		IODEBUG fprintf(stderr, "#link %s %s = %d\n" , srcpath, dstpath, (int) ret); | 
| 3524 | 		GDKfree(dstpath); | 
| 3525 | 		GDKfree(srcpath); | 
| 3526 | 	} | 
| 3527 | 	return ret; | 
| 3528 | } | 
| 3529 |  | 
| 3530 | gdk_return | 
| 3531 | BBPrecover(int farmid) | 
| 3532 | { | 
| 3533 | 	str bakdirpath; | 
| 3534 | 	str leftdirpath; | 
| 3535 | 	DIR *dirp; | 
| 3536 | 	struct dirent *dent; | 
| 3537 | 	long_str path, dstpath; | 
| 3538 | 	bat i; | 
| 3539 | 	size_t j = strlen(BATDIR); | 
| 3540 | 	gdk_return ret = GDK_SUCCEED; | 
| 3541 | 	bool dirseen = false; | 
| 3542 | 	str dstdir; | 
| 3543 |  | 
| 3544 | 	bakdirpath = GDKfilepath(farmid, NULL, BAKDIR, NULL); | 
| 3545 | 	leftdirpath = GDKfilepath(farmid, NULL, LEFTDIR, NULL); | 
| 3546 | 	if (bakdirpath == NULL || leftdirpath == NULL) { | 
| 3547 | 		GDKfree(bakdirpath); | 
| 3548 | 		GDKfree(leftdirpath); | 
| 3549 | 		return GDK_FAIL; | 
| 3550 | 	} | 
| 3551 | 	dirp = opendir(bakdirpath); | 
| 3552 | 	if (dirp == NULL) { | 
| 3553 | 		GDKfree(bakdirpath); | 
| 3554 | 		GDKfree(leftdirpath); | 
| 3555 | 		return GDK_SUCCEED;	/* nothing to do */ | 
| 3556 | 	} | 
| 3557 | 	memcpy(dstpath, BATDIR, j); | 
| 3558 | 	dstpath[j] = DIR_SEP; | 
| 3559 | 	dstpath[++j] = 0; | 
| 3560 | 	dstdir = dstpath + j; | 
| 3561 | 	IODEBUG fprintf(stderr, "#BBPrecover(start)\n" ); | 
| 3562 |  | 
| 3563 | 	if (mkdir(leftdirpath, MONETDB_DIRMODE) < 0 && errno != EEXIST) { | 
| 3564 | 		GDKsyserror("BBPrecover: cannot create directory %s\n" , leftdirpath); | 
| 3565 | 		closedir(dirp); | 
| 3566 | 		GDKfree(bakdirpath); | 
| 3567 | 		GDKfree(leftdirpath); | 
| 3568 | 		return GDK_FAIL; | 
| 3569 | 	} | 
| 3570 |  | 
| 3571 | 	/* move back all files */ | 
| 3572 | 	while ((dent = readdir(dirp)) != NULL) { | 
| 3573 | 		const char *q = strchr(dent->d_name, '.'); | 
| 3574 |  | 
| 3575 | 		if (q == dent->d_name) { | 
| 3576 | 			char *fn; | 
| 3577 |  | 
| 3578 | 			if (strcmp(dent->d_name, "." ) == 0 || | 
| 3579 | 			    strcmp(dent->d_name, ".." ) == 0) | 
| 3580 | 				continue; | 
| 3581 | 			fn = GDKfilepath(farmid, BAKDIR, dent->d_name, NULL); | 
| 3582 | 			if (fn) { | 
| 3583 | 				int uret = remove(fn); | 
| 3584 | 				IODEBUG fprintf(stderr, "#remove %s = %d\n" , | 
| 3585 | 						fn, uret); | 
| 3586 | 				GDKfree(fn); | 
| 3587 | 			} | 
| 3588 | 			continue; | 
| 3589 | 		} else if (strcmp(dent->d_name, "BBP.dir" ) == 0) { | 
| 3590 | 			dirseen = true; | 
| 3591 | 			continue; | 
| 3592 | 		} | 
| 3593 | 		if (q == NULL) | 
| 3594 | 			q = dent->d_name + strlen(dent->d_name); | 
| 3595 | 		if ((j = q - dent->d_name) + 1 > sizeof(path)) { | 
| 3596 | 			/* name too long: ignore */ | 
| 3597 | 			continue; | 
| 3598 | 		} | 
| 3599 | 		strncpy(path, dent->d_name, j); | 
| 3600 | 		path[j] = 0; | 
| 3601 | 		if (GDKisdigit(*path)) { | 
| 3602 | 			i = strtol(path, NULL, 8); | 
| 3603 | 		} else { | 
| 3604 | 			i = BBP_find(path, false); | 
| 3605 | 			if (i < 0) | 
| 3606 | 				i = -i; | 
| 3607 | 		} | 
| 3608 | 		if (i == 0 || i >= (bat) ATOMIC_GET(&BBPsize) || !BBPvalid(i)) { | 
| 3609 | 			force_move(farmid, BAKDIR, LEFTDIR, dent->d_name); | 
| 3610 | 		} else { | 
| 3611 | 			BBPgetsubdir(dstdir, i); | 
| 3612 | 			if (force_move(farmid, BAKDIR, dstpath, dent->d_name) != GDK_SUCCEED) | 
| 3613 | 				ret = GDK_FAIL; | 
| 3614 | 		} | 
| 3615 | 	} | 
| 3616 | 	closedir(dirp); | 
| 3617 | 	if (dirseen && ret == GDK_SUCCEED) {	/* we have a saved BBP.dir; it should be moved back!! */ | 
| 3618 | 		struct stat st; | 
| 3619 | 		char *fn; | 
| 3620 |  | 
| 3621 | 		fn = GDKfilepath(farmid, BATDIR, "BBP" , "dir" ); | 
| 3622 | 		ret = recover_dir(farmid, stat(fn, &st) == 0); | 
| 3623 | 		GDKfree(fn); | 
| 3624 | 	} | 
| 3625 |  | 
| 3626 | 	if (ret == GDK_SUCCEED) { | 
| 3627 | 		if (rmdir(bakdirpath) < 0) { | 
| 3628 | 			GDKsyserror("BBPrecover: cannot remove directory %s\n" , bakdirpath); | 
| 3629 | 			ret = GDK_FAIL; | 
| 3630 | 		} | 
| 3631 | 		IODEBUG fprintf(stderr, "#rmdir %s = %d\n" , bakdirpath, (int) ret); | 
| 3632 | 	} | 
| 3633 | 	if (ret != GDK_SUCCEED) | 
| 3634 | 		GDKerror("BBPrecover: recovery failed. Please check whether your disk is full or write-protected.\n" ); | 
| 3635 |  | 
| 3636 | 	IODEBUG fprintf(stderr, "#BBPrecover(end)\n" ); | 
| 3637 | 	GDKfree(bakdirpath); | 
| 3638 | 	GDKfree(leftdirpath); | 
| 3639 | 	return ret; | 
| 3640 | } | 
| 3641 |  | 
| 3642 | /* | 
| 3643 |  * SUBDIR recovery is quite mindlessly moving all files back to the | 
| 3644 |  * parent (BAKDIR).  We do recognize moving back BBP.dir and set | 
| 3645 |  * backed_up_subdir accordingly. | 
| 3646 |  */ | 
| 3647 | gdk_return | 
| 3648 | BBPrecover_subdir(void) | 
| 3649 | { | 
| 3650 | 	str subdirpath; | 
| 3651 | 	DIR *dirp; | 
| 3652 | 	struct dirent *dent; | 
| 3653 | 	gdk_return ret = GDK_SUCCEED; | 
| 3654 |  | 
| 3655 | 	subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL); | 
| 3656 | 	if (subdirpath == NULL) | 
| 3657 | 		return GDK_FAIL; | 
| 3658 | 	dirp = opendir(subdirpath); | 
| 3659 | 	GDKfree(subdirpath); | 
| 3660 | 	if (dirp == NULL) { | 
| 3661 | 		return GDK_SUCCEED;	/* nothing to do */ | 
| 3662 | 	} | 
| 3663 | 	IODEBUG fprintf(stderr, "#BBPrecover_subdir(start)\n" ); | 
| 3664 |  | 
| 3665 | 	/* move back all files */ | 
| 3666 | 	while ((dent = readdir(dirp)) != NULL) { | 
| 3667 | 		if (dent->d_name[0] == '.') | 
| 3668 | 			continue; | 
| 3669 | 		ret = GDKmove(0, SUBDIR, dent->d_name, NULL, BAKDIR, dent->d_name, NULL); | 
| 3670 | 		if (ret == GDK_SUCCEED && strcmp(dent->d_name, "BBP.dir" ) == 0) | 
| 3671 | 			backup_dir = 1; | 
| 3672 | 		if (ret != GDK_SUCCEED) | 
| 3673 | 			break; | 
| 3674 | 	} | 
| 3675 | 	closedir(dirp); | 
| 3676 |  | 
| 3677 | 	/* delete the directory */ | 
| 3678 | 	if (ret == GDK_SUCCEED) { | 
| 3679 | 		ret = GDKremovedir(0, SUBDIR); | 
| 3680 | 		if (backup_dir == 2) { | 
| 3681 | 			IODEBUG fprintf(stderr, "#BBPrecover_subdir: %s%cBBP.dir had disappeared!" , SUBDIR, DIR_SEP); | 
| 3682 | 			backup_dir = 0; | 
| 3683 | 		} | 
| 3684 | 	} | 
| 3685 | 	IODEBUG fprintf(stderr, "#BBPrecover_subdir(end) = %d\n" , (int) ret); | 
| 3686 |  | 
| 3687 | 	if (ret != GDK_SUCCEED) | 
| 3688 | 		GDKerror("BBPrecover_subdir: recovery failed. Please check whether your disk is full or write-protected.\n" ); | 
| 3689 | 	return ret; | 
| 3690 | } | 
| 3691 |  | 
| 3692 | /* | 
| 3693 |  * @- The diskscan | 
| 3694 |  * The BBPdiskscan routine walks through the BAT dir, cleans up | 
| 3695 |  * leftovers, and measures disk occupancy.  Leftovers are files that | 
| 3696 |  * cannot belong to a BAT. in order to establish this for [ht]heap | 
| 3697 |  * files, the BAT descriptor is loaded in order to determine whether | 
| 3698 |  * these files are still required. | 
| 3699 |  * | 
| 3700 |  * The routine gathers all bat sizes in a bat that contains bat-ids | 
| 3701 |  * and bytesizes. The return value is the number of bytes of space | 
| 3702 |  * freed. | 
| 3703 |  */ | 
| 3704 | static bool | 
| 3705 | persistent_bat(bat bid) | 
| 3706 | { | 
| 3707 | 	if (bid >= 0 && bid < (bat) ATOMIC_GET(&BBPsize) && BBPvalid(bid)) { | 
| 3708 | 		BAT *b = BBP_cache(bid); | 
| 3709 |  | 
| 3710 | 		if (b == NULL || b->batCopiedtodisk) { | 
| 3711 | 			return true; | 
| 3712 | 		} | 
| 3713 | 	} | 
| 3714 | 	return false; | 
| 3715 | } | 
| 3716 |  | 
| 3717 | static BAT * | 
| 3718 | getdesc(bat bid) | 
| 3719 | { | 
| 3720 | 	BAT *b = BBPgetdesc(bid); | 
| 3721 |  | 
| 3722 | 	if (b == NULL) | 
| 3723 | 		BBPclear(bid); | 
| 3724 | 	return b; | 
| 3725 | } | 
| 3726 |  | 
| 3727 | static bool | 
| 3728 | BBPdiskscan(const char *parent, size_t baseoff) | 
| 3729 | { | 
| 3730 | 	DIR *dirp = opendir(parent); | 
| 3731 | 	struct dirent *dent; | 
| 3732 | 	char fullname[FILENAME_MAX]; | 
| 3733 | 	str dst = fullname; | 
| 3734 | 	size_t dstlen = sizeof(fullname); | 
| 3735 | 	const char *src = parent; | 
| 3736 |  | 
| 3737 | 	if (dirp == NULL) | 
| 3738 | 		return true;	/* nothing to do */ | 
| 3739 |  | 
| 3740 | 	while (*src) { | 
| 3741 | 		*dst++ = *src++; | 
| 3742 | 		dstlen--; | 
| 3743 | 	} | 
| 3744 | 	if (dst > fullname && dst[-1] != DIR_SEP) { | 
| 3745 | 		*dst++ = DIR_SEP; | 
| 3746 | 		dstlen--; | 
| 3747 | 	} | 
| 3748 |  | 
| 3749 | 	while ((dent = readdir(dirp)) != NULL) { | 
| 3750 | 		const char *p; | 
| 3751 | 		bat bid; | 
| 3752 | 		bool ok, delete; | 
| 3753 |  | 
| 3754 | 		if (dent->d_name[0] == '.') | 
| 3755 | 			continue;	/* ignore .dot files and directories (. ..) */ | 
| 3756 |  | 
| 3757 | 		if (strncmp(dent->d_name, "BBP." , 4) == 0 && | 
| 3758 | 		    (strcmp(parent + baseoff, BATDIR) == 0 || | 
| 3759 | 		     strncmp(parent + baseoff, BAKDIR, strlen(BAKDIR)) == 0 || | 
| 3760 | 		     strncmp(parent + baseoff, SUBDIR, strlen(SUBDIR)) == 0)) | 
| 3761 | 			continue; | 
| 3762 |  | 
| 3763 | 		p = strchr(dent->d_name, '.'); | 
| 3764 |  | 
| 3765 | 		if (strlen(dent->d_name) >= dstlen) { | 
| 3766 | 			/* found a file with too long a name | 
| 3767 | 			   (i.e. unknown); stop pruning in this | 
| 3768 | 			   subdir */ | 
| 3769 | 			fprintf(stderr, "BBPdiskscan: unexpected file %s, leaving %s.\n" , dent->d_name, parent); | 
| 3770 | 			break; | 
| 3771 | 		} | 
| 3772 | 		strncpy(dst, dent->d_name, dstlen); | 
| 3773 | 		fullname[sizeof(fullname) - 1] = 0; | 
| 3774 |  | 
| 3775 | 		if (p == NULL && !BBPdiskscan(fullname, baseoff)) { | 
| 3776 | 			/* it was a directory */ | 
| 3777 | 			continue; | 
| 3778 | 		} | 
| 3779 |  | 
| 3780 | 		if (p && strcmp(p + 1, "tmp" ) == 0) { | 
| 3781 | 			delete = true; | 
| 3782 | 			ok = true; | 
| 3783 | 			bid = 0; | 
| 3784 | 		} else { | 
| 3785 | 			bid = strtol(dent->d_name, NULL, 8); | 
| 3786 | 			ok = p && bid; | 
| 3787 | 			delete = false; | 
| 3788 |  | 
| 3789 | 			if (!ok || !persistent_bat(bid)) { | 
| 3790 | 				delete = true; | 
| 3791 | 			} else if (strncmp(p + 1, "tail" , 4) == 0) { | 
| 3792 | 				BAT *b = getdesc(bid); | 
| 3793 | 				delete = (b == NULL || !b->ttype || !b->batCopiedtodisk); | 
| 3794 | 			} else if (strncmp(p + 1, "theap" , 5) == 0) { | 
| 3795 | 				BAT *b = getdesc(bid); | 
| 3796 | 				delete = (b == NULL || !b->tvheap || !b->batCopiedtodisk); | 
| 3797 | 			} else if (strncmp(p + 1, "thash" , 5) == 0) { | 
| 3798 | #ifdef PERSISTENTHASH | 
| 3799 | 				BAT *b = getdesc(bid); | 
| 3800 | 				delete = b == NULL; | 
| 3801 | 				if (!delete) | 
| 3802 | 					b->thash = (Hash *) 1; | 
| 3803 | #else | 
| 3804 | 				delete = true; | 
| 3805 | #endif | 
| 3806 | 			} else if (strncmp(p + 1, "timprints" , 9) == 0) { | 
| 3807 | 				BAT *b = getdesc(bid); | 
| 3808 | 				delete = b == NULL; | 
| 3809 | 				if (!delete) | 
| 3810 | 					b->timprints = (Imprints *) 1; | 
| 3811 | 			} else if (strncmp(p + 1, "torderidx" , 9) == 0) { | 
| 3812 | #ifdef PERSISTENTIDX | 
| 3813 | 				BAT *b = getdesc(bid); | 
| 3814 | 				delete = b == NULL; | 
| 3815 | 				if (!delete) | 
| 3816 | 					b->torderidx = (Heap *) 1; | 
| 3817 | #else | 
| 3818 | 				delete = true; | 
| 3819 | #endif | 
| 3820 | 			} else if (strncmp(p + 1, "new" , 3) != 0) { | 
| 3821 | 				ok = false; | 
| 3822 | 			} | 
| 3823 | 		} | 
| 3824 | 		if (!ok) { | 
| 3825 | 			/* found an unknown file; stop pruning in this | 
| 3826 | 			 * subdir */ | 
| 3827 | 			fprintf(stderr, "BBPdiskscan: unexpected file %s, leaving %s.\n" , dent->d_name, parent); | 
| 3828 | 			break; | 
| 3829 | 		} | 
| 3830 | 		if (delete) { | 
| 3831 | 			if (remove(fullname) != 0 && errno != ENOENT) { | 
| 3832 | 				GDKsyserror("BBPdiskscan: remove(%s)" , fullname); | 
| 3833 | 				continue; | 
| 3834 | 			} | 
| 3835 | 			IODEBUG fprintf(stderr, "#BBPcleanup: remove(%s) = 0\n" , fullname); | 
| 3836 | 		} | 
| 3837 | 	} | 
| 3838 | 	closedir(dirp); | 
| 3839 | 	return false; | 
| 3840 | } | 
| 3841 |  | 
| 3842 | void | 
| 3843 | gdk_bbp_reset(void) | 
| 3844 | { | 
| 3845 | 	int i; | 
| 3846 |  | 
| 3847 | 	while (BBPlimit > 0) { | 
| 3848 | 		BBPlimit -= BBPINIT; | 
| 3849 | 		assert(BBPlimit >= 0); | 
| 3850 | 		GDKfree(BBP[BBPlimit >> BBPINITLOG]); | 
| 3851 | 		BBP[BBPlimit >> BBPINITLOG] = NULL; | 
| 3852 | 	} | 
| 3853 | 	ATOMIC_SET(&BBPsize, 0); | 
| 3854 | 	for (i = 0; i < MAXFARMS; i++) | 
| 3855 | 		GDKfree((void *) BBPfarms[i].dirname); /* loose "const" */ | 
| 3856 | 	memset(BBPfarms, 0, sizeof(BBPfarms)); | 
| 3857 | 	BBP_hash = 0; | 
| 3858 | 	BBP_mask = 0; | 
| 3859 |  | 
| 3860 | 	locked_by = 0; | 
| 3861 | 	BBPunloadCnt = 0; | 
| 3862 | 	backup_files = 0; | 
| 3863 | 	backup_dir = 0; | 
| 3864 | 	backup_subdir = 0; | 
| 3865 | } | 
| 3866 |  |