| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * dshash.c |
| 4 | * Concurrent hash tables backed by dynamic shared memory areas. |
| 5 | * |
| 6 | * This is an open hashing hash table, with a linked list at each table |
| 7 | * entry. It supports dynamic resizing, as required to prevent the linked |
| 8 | * lists from growing too long on average. Currently, only growing is |
| 9 | * supported: the hash table never becomes smaller. |
| 10 | * |
| 11 | * To deal with concurrency, it has a fixed size set of partitions, each of |
| 12 | * which is independently locked. Each bucket maps to a partition; so insert, |
| 13 | * find and iterate operations normally only acquire one lock. Therefore, |
| 14 | * good concurrency is achieved whenever such operations don't collide at the |
| 15 | * lock partition level. However, when a resize operation begins, all |
| 16 | * partition locks must be acquired simultaneously for a brief period. This |
| 17 | * is only expected to happen a small number of times until a stable size is |
| 18 | * found, since growth is geometric. |
| 19 | * |
| 20 | * Future versions may support iterators and incremental resizing; for now |
| 21 | * the implementation is minimalist. |
| 22 | * |
| 23 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 24 | * Portions Copyright (c) 1994, Regents of the University of California |
| 25 | * |
| 26 | * IDENTIFICATION |
| 27 | * src/backend/lib/dshash.c |
| 28 | * |
| 29 | *------------------------------------------------------------------------- |
| 30 | */ |
| 31 | |
| 32 | #include "postgres.h" |
| 33 | |
| 34 | #include "lib/dshash.h" |
| 35 | #include "storage/ipc.h" |
| 36 | #include "storage/lwlock.h" |
| 37 | #include "utils/dsa.h" |
| 38 | #include "utils/hsearch.h" |
| 39 | #include "utils/memutils.h" |
| 40 | |
| 41 | /* |
| 42 | * An item in the hash table. This wraps the user's entry object in an |
| 43 | * envelop that holds a pointer back to the bucket and a pointer to the next |
| 44 | * item in the bucket. |
| 45 | */ |
| 46 | struct dshash_table_item |
| 47 | { |
| 48 | /* The next item in the same bucket. */ |
| 49 | dsa_pointer next; |
| 50 | /* The hashed key, to avoid having to recompute it. */ |
| 51 | dshash_hash hash; |
| 52 | /* The user's entry object follows here. See ENTRY_FROM_ITEM(item). */ |
| 53 | }; |
| 54 | |
| 55 | /* |
| 56 | * The number of partitions for locking purposes. This is set to match |
| 57 | * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for |
| 58 | * the buffer pool must be good enough for any other purpose. This could |
| 59 | * become a runtime parameter in future. |
| 60 | */ |
| 61 | #define DSHASH_NUM_PARTITIONS_LOG2 7 |
| 62 | #define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2) |
| 63 | |
| 64 | /* A magic value used to identify our hash tables. */ |
| 65 | #define DSHASH_MAGIC 0x75ff6a20 |
| 66 | |
| 67 | /* |
| 68 | * Tracking information for each lock partition. Initially, each partition |
| 69 | * corresponds to one bucket, but each time the hash table grows, the buckets |
| 70 | * covered by each partition split so the number of buckets covered doubles. |
| 71 | * |
| 72 | * We might want to add padding here so that each partition is on a different |
| 73 | * cache line, but doing so would bloat this structure considerably. |
| 74 | */ |
| 75 | typedef struct dshash_partition |
| 76 | { |
| 77 | LWLock lock; /* Protects all buckets in this partition. */ |
| 78 | size_t count; /* # of items in this partition's buckets */ |
| 79 | } dshash_partition; |
| 80 | |
| 81 | /* |
| 82 | * The head object for a hash table. This will be stored in dynamic shared |
| 83 | * memory. |
| 84 | */ |
| 85 | typedef struct dshash_table_control |
| 86 | { |
| 87 | dshash_table_handle handle; |
| 88 | uint32 magic; |
| 89 | dshash_partition partitions[DSHASH_NUM_PARTITIONS]; |
| 90 | int lwlock_tranche_id; |
| 91 | |
| 92 | /* |
| 93 | * The following members are written to only when ALL partitions locks are |
| 94 | * held. They can be read when any one partition lock is held. |
| 95 | */ |
| 96 | |
| 97 | /* Number of buckets expressed as power of 2 (8 = 256 buckets). */ |
| 98 | size_t size_log2; /* log2(number of buckets) */ |
| 99 | dsa_pointer buckets; /* current bucket array */ |
| 100 | } dshash_table_control; |
| 101 | |
| 102 | /* |
| 103 | * Per-backend state for a dynamic hash table. |
| 104 | */ |
| 105 | struct dshash_table |
| 106 | { |
| 107 | dsa_area *area; /* Backing dynamic shared memory area. */ |
| 108 | dshash_parameters params; /* Parameters. */ |
| 109 | void *arg; /* User-supplied data pointer. */ |
| 110 | dshash_table_control *control; /* Control object in DSM. */ |
| 111 | dsa_pointer *buckets; /* Current bucket pointers in DSM. */ |
| 112 | size_t size_log2; /* log2(number of buckets) */ |
| 113 | bool find_locked; /* Is any partition lock held by 'find'? */ |
| 114 | bool find_exclusively_locked; /* ... exclusively? */ |
| 115 | }; |
| 116 | |
| 117 | /* Given a pointer to an item, find the entry (user data) it holds. */ |
| 118 | #define ENTRY_FROM_ITEM(item) \ |
| 119 | ((char *)(item) + MAXALIGN(sizeof(dshash_table_item))) |
| 120 | |
| 121 | /* Given a pointer to an entry, find the item that holds it. */ |
| 122 | #define ITEM_FROM_ENTRY(entry) \ |
| 123 | ((dshash_table_item *)((char *)(entry) - \ |
| 124 | MAXALIGN(sizeof(dshash_table_item)))) |
| 125 | |
| 126 | /* How many resize operations (bucket splits) have there been? */ |
| 127 | #define NUM_SPLITS(size_log2) \ |
| 128 | (size_log2 - DSHASH_NUM_PARTITIONS_LOG2) |
| 129 | |
| 130 | /* How many buckets are there in each partition at a given size? */ |
| 131 | #define BUCKETS_PER_PARTITION(size_log2) \ |
| 132 | (((size_t) 1) << NUM_SPLITS(size_log2)) |
| 133 | |
| 134 | /* Max entries before we need to grow. Half + quarter = 75% load factor. */ |
| 135 | #define MAX_COUNT_PER_PARTITION(hash_table) \ |
| 136 | (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \ |
| 137 | BUCKETS_PER_PARTITION(hash_table->size_log2) / 4) |
| 138 | |
| 139 | /* Choose partition based on the highest order bits of the hash. */ |
| 140 | #define PARTITION_FOR_HASH(hash) \ |
| 141 | (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2)) |
| 142 | |
| 143 | /* |
| 144 | * Find the bucket index for a given hash and table size. Each time the table |
| 145 | * doubles in size, the appropriate bucket for a given hash value doubles and |
| 146 | * possibly adds one, depending on the newly revealed bit, so that all buckets |
| 147 | * are split. |
| 148 | */ |
| 149 | #define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2) \ |
| 150 | (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2))) |
| 151 | |
| 152 | /* The index of the first bucket in a given partition. */ |
| 153 | #define BUCKET_INDEX_FOR_PARTITION(partition, size_log2) \ |
| 154 | ((partition) << NUM_SPLITS(size_log2)) |
| 155 | |
| 156 | /* The head of the active bucket for a given hash value (lvalue). */ |
| 157 | #define BUCKET_FOR_HASH(hash_table, hash) \ |
| 158 | (hash_table->buckets[ \ |
| 159 | BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, \ |
| 160 | hash_table->size_log2)]) |
| 161 | |
| 162 | static void delete_item(dshash_table *hash_table, |
| 163 | dshash_table_item *item); |
| 164 | static void resize(dshash_table *hash_table, size_t new_size); |
| 165 | static inline void ensure_valid_bucket_pointers(dshash_table *hash_table); |
| 166 | static inline dshash_table_item *find_in_bucket(dshash_table *hash_table, |
| 167 | const void *key, |
| 168 | dsa_pointer item_pointer); |
| 169 | static void insert_item_into_bucket(dshash_table *hash_table, |
| 170 | dsa_pointer item_pointer, |
| 171 | dshash_table_item *item, |
| 172 | dsa_pointer *bucket); |
| 173 | static dshash_table_item *insert_into_bucket(dshash_table *hash_table, |
| 174 | const void *key, |
| 175 | dsa_pointer *bucket); |
| 176 | static bool delete_key_from_bucket(dshash_table *hash_table, |
| 177 | const void *key, |
| 178 | dsa_pointer *bucket_head); |
| 179 | static bool delete_item_from_bucket(dshash_table *hash_table, |
| 180 | dshash_table_item *item, |
| 181 | dsa_pointer *bucket_head); |
| 182 | static inline dshash_hash hash_key(dshash_table *hash_table, const void *key); |
| 183 | static inline bool equal_keys(dshash_table *hash_table, |
| 184 | const void *a, const void *b); |
| 185 | |
| 186 | #define PARTITION_LOCK(hash_table, i) \ |
| 187 | (&(hash_table)->control->partitions[(i)].lock) |
| 188 | |
| 189 | /* |
| 190 | * Create a new hash table backed by the given dynamic shared area, with the |
| 191 | * given parameters. The returned object is allocated in backend-local memory |
| 192 | * using the current MemoryContext. 'arg' will be passed through to the |
| 193 | * compare and hash functions. |
| 194 | */ |
| 195 | dshash_table * |
| 196 | dshash_create(dsa_area *area, const dshash_parameters *params, void *arg) |
| 197 | { |
| 198 | dshash_table *hash_table; |
| 199 | dsa_pointer control; |
| 200 | |
| 201 | /* Allocate the backend-local object representing the hash table. */ |
| 202 | hash_table = palloc(sizeof(dshash_table)); |
| 203 | |
| 204 | /* Allocate the control object in shared memory. */ |
| 205 | control = dsa_allocate(area, sizeof(dshash_table_control)); |
| 206 | |
| 207 | /* Set up the local and shared hash table structs. */ |
| 208 | hash_table->area = area; |
| 209 | hash_table->params = *params; |
| 210 | hash_table->arg = arg; |
| 211 | hash_table->control = dsa_get_address(area, control); |
| 212 | hash_table->control->handle = control; |
| 213 | hash_table->control->magic = DSHASH_MAGIC; |
| 214 | hash_table->control->lwlock_tranche_id = params->tranche_id; |
| 215 | |
| 216 | /* Set up the array of lock partitions. */ |
| 217 | { |
| 218 | dshash_partition *partitions = hash_table->control->partitions; |
| 219 | int tranche_id = hash_table->control->lwlock_tranche_id; |
| 220 | int i; |
| 221 | |
| 222 | for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) |
| 223 | { |
| 224 | LWLockInitialize(&partitions[i].lock, tranche_id); |
| 225 | partitions[i].count = 0; |
| 226 | } |
| 227 | } |
| 228 | |
| 229 | hash_table->find_locked = false; |
| 230 | hash_table->find_exclusively_locked = false; |
| 231 | |
| 232 | /* |
| 233 | * Set up the initial array of buckets. Our initial size is the same as |
| 234 | * the number of partitions. |
| 235 | */ |
| 236 | hash_table->control->size_log2 = DSHASH_NUM_PARTITIONS_LOG2; |
| 237 | hash_table->control->buckets = |
| 238 | dsa_allocate_extended(area, |
| 239 | sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS, |
| 240 | DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); |
| 241 | if (!DsaPointerIsValid(hash_table->control->buckets)) |
| 242 | { |
| 243 | dsa_free(area, control); |
| 244 | ereport(ERROR, |
| 245 | (errcode(ERRCODE_OUT_OF_MEMORY), |
| 246 | errmsg("out of memory" ), |
| 247 | errdetail("Failed on DSA request of size %zu." , |
| 248 | sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS))); |
| 249 | } |
| 250 | hash_table->buckets = dsa_get_address(area, |
| 251 | hash_table->control->buckets); |
| 252 | hash_table->size_log2 = hash_table->control->size_log2; |
| 253 | |
| 254 | return hash_table; |
| 255 | } |
| 256 | |
| 257 | /* |
| 258 | * Attach to an existing hash table using a handle. The returned object is |
| 259 | * allocated in backend-local memory using the current MemoryContext. 'arg' |
| 260 | * will be passed through to the compare and hash functions. |
| 261 | */ |
| 262 | dshash_table * |
| 263 | dshash_attach(dsa_area *area, const dshash_parameters *params, |
| 264 | dshash_table_handle handle, void *arg) |
| 265 | { |
| 266 | dshash_table *hash_table; |
| 267 | dsa_pointer control; |
| 268 | |
| 269 | /* Allocate the backend-local object representing the hash table. */ |
| 270 | hash_table = palloc(sizeof(dshash_table)); |
| 271 | |
| 272 | /* Find the control object in shared memory. */ |
| 273 | control = handle; |
| 274 | |
| 275 | /* Set up the local hash table struct. */ |
| 276 | hash_table->area = area; |
| 277 | hash_table->params = *params; |
| 278 | hash_table->arg = arg; |
| 279 | hash_table->control = dsa_get_address(area, control); |
| 280 | hash_table->find_locked = false; |
| 281 | hash_table->find_exclusively_locked = false; |
| 282 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
| 283 | |
| 284 | /* |
| 285 | * These will later be set to the correct values by |
| 286 | * ensure_valid_bucket_pointers(), at which time we'll be holding a |
| 287 | * partition lock for interlocking against concurrent resizing. |
| 288 | */ |
| 289 | hash_table->buckets = NULL; |
| 290 | hash_table->size_log2 = 0; |
| 291 | |
| 292 | return hash_table; |
| 293 | } |
| 294 | |
| 295 | /* |
| 296 | * Detach from a hash table. This frees backend-local resources associated |
| 297 | * with the hash table, but the hash table will continue to exist until it is |
| 298 | * either explicitly destroyed (by a backend that is still attached to it), or |
| 299 | * the area that backs it is returned to the operating system. |
| 300 | */ |
| 301 | void |
| 302 | dshash_detach(dshash_table *hash_table) |
| 303 | { |
| 304 | Assert(!hash_table->find_locked); |
| 305 | |
| 306 | /* The hash table may have been destroyed. Just free local memory. */ |
| 307 | pfree(hash_table); |
| 308 | } |
| 309 | |
| 310 | /* |
| 311 | * Destroy a hash table, returning all memory to the area. The caller must be |
| 312 | * certain that no other backend will attempt to access the hash table before |
| 313 | * calling this function. Other backend must explicitly call dshash_detach to |
| 314 | * free up backend-local memory associated with the hash table. The backend |
| 315 | * that calls dshash_destroy must not call dshash_detach. |
| 316 | */ |
| 317 | void |
| 318 | dshash_destroy(dshash_table *hash_table) |
| 319 | { |
| 320 | size_t size; |
| 321 | size_t i; |
| 322 | |
| 323 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
| 324 | ensure_valid_bucket_pointers(hash_table); |
| 325 | |
| 326 | /* Free all the entries. */ |
| 327 | size = ((size_t) 1) << hash_table->size_log2; |
| 328 | for (i = 0; i < size; ++i) |
| 329 | { |
| 330 | dsa_pointer item_pointer = hash_table->buckets[i]; |
| 331 | |
| 332 | while (DsaPointerIsValid(item_pointer)) |
| 333 | { |
| 334 | dshash_table_item *item; |
| 335 | dsa_pointer next_item_pointer; |
| 336 | |
| 337 | item = dsa_get_address(hash_table->area, item_pointer); |
| 338 | next_item_pointer = item->next; |
| 339 | dsa_free(hash_table->area, item_pointer); |
| 340 | item_pointer = next_item_pointer; |
| 341 | } |
| 342 | } |
| 343 | |
| 344 | /* |
| 345 | * Vandalize the control block to help catch programming errors where |
| 346 | * other backends access the memory formerly occupied by this hash table. |
| 347 | */ |
| 348 | hash_table->control->magic = 0; |
| 349 | |
| 350 | /* Free the active table and control object. */ |
| 351 | dsa_free(hash_table->area, hash_table->control->buckets); |
| 352 | dsa_free(hash_table->area, hash_table->control->handle); |
| 353 | |
| 354 | pfree(hash_table); |
| 355 | } |
| 356 | |
| 357 | /* |
| 358 | * Get a handle that can be used by other processes to attach to this hash |
| 359 | * table. |
| 360 | */ |
| 361 | dshash_table_handle |
| 362 | dshash_get_hash_table_handle(dshash_table *hash_table) |
| 363 | { |
| 364 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
| 365 | |
| 366 | return hash_table->control->handle; |
| 367 | } |
| 368 | |
| 369 | /* |
| 370 | * Look up an entry, given a key. Returns a pointer to an entry if one can be |
| 371 | * found with the given key. Returns NULL if the key is not found. If a |
| 372 | * non-NULL value is returned, the entry is locked and must be released by |
| 373 | * calling dshash_release_lock. If an error is raised before |
| 374 | * dshash_release_lock is called, the lock will be released automatically, but |
| 375 | * the caller must take care to ensure that the entry is not left corrupted. |
| 376 | * The lock mode is either shared or exclusive depending on 'exclusive'. |
| 377 | * |
| 378 | * The caller must not lock a lock already. |
| 379 | * |
| 380 | * Note that the lock held is in fact an LWLock, so interrupts will be held on |
| 381 | * return from this function, and not resumed until dshash_release_lock is |
| 382 | * called. It is a very good idea for the caller to release the lock quickly. |
| 383 | */ |
| 384 | void * |
| 385 | dshash_find(dshash_table *hash_table, const void *key, bool exclusive) |
| 386 | { |
| 387 | dshash_hash hash; |
| 388 | size_t partition; |
| 389 | dshash_table_item *item; |
| 390 | |
| 391 | hash = hash_key(hash_table, key); |
| 392 | partition = PARTITION_FOR_HASH(hash); |
| 393 | |
| 394 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
| 395 | Assert(!hash_table->find_locked); |
| 396 | |
| 397 | LWLockAcquire(PARTITION_LOCK(hash_table, partition), |
| 398 | exclusive ? LW_EXCLUSIVE : LW_SHARED); |
| 399 | ensure_valid_bucket_pointers(hash_table); |
| 400 | |
| 401 | /* Search the active bucket. */ |
| 402 | item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash)); |
| 403 | |
| 404 | if (!item) |
| 405 | { |
| 406 | /* Not found. */ |
| 407 | LWLockRelease(PARTITION_LOCK(hash_table, partition)); |
| 408 | return NULL; |
| 409 | } |
| 410 | else |
| 411 | { |
| 412 | /* The caller will free the lock by calling dshash_release. */ |
| 413 | hash_table->find_locked = true; |
| 414 | hash_table->find_exclusively_locked = exclusive; |
| 415 | return ENTRY_FROM_ITEM(item); |
| 416 | } |
| 417 | } |
| 418 | |
| 419 | /* |
| 420 | * Returns a pointer to an exclusively locked item which must be released with |
| 421 | * dshash_release_lock. If the key is found in the hash table, 'found' is set |
| 422 | * to true and a pointer to the existing entry is returned. If the key is not |
| 423 | * found, 'found' is set to false, and a pointer to a newly created entry is |
| 424 | * returned. |
| 425 | * |
| 426 | * Notes above dshash_find() regarding locking and error handling equally |
| 427 | * apply here. |
| 428 | */ |
| 429 | void * |
| 430 | dshash_find_or_insert(dshash_table *hash_table, |
| 431 | const void *key, |
| 432 | bool *found) |
| 433 | { |
| 434 | dshash_hash hash; |
| 435 | size_t partition_index; |
| 436 | dshash_partition *partition; |
| 437 | dshash_table_item *item; |
| 438 | |
| 439 | hash = hash_key(hash_table, key); |
| 440 | partition_index = PARTITION_FOR_HASH(hash); |
| 441 | partition = &hash_table->control->partitions[partition_index]; |
| 442 | |
| 443 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
| 444 | Assert(!hash_table->find_locked); |
| 445 | |
| 446 | restart: |
| 447 | LWLockAcquire(PARTITION_LOCK(hash_table, partition_index), |
| 448 | LW_EXCLUSIVE); |
| 449 | ensure_valid_bucket_pointers(hash_table); |
| 450 | |
| 451 | /* Search the active bucket. */ |
| 452 | item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash)); |
| 453 | |
| 454 | if (item) |
| 455 | *found = true; |
| 456 | else |
| 457 | { |
| 458 | *found = false; |
| 459 | |
| 460 | /* Check if we are getting too full. */ |
| 461 | if (partition->count > MAX_COUNT_PER_PARTITION(hash_table)) |
| 462 | { |
| 463 | /* |
| 464 | * The load factor (= keys / buckets) for all buckets protected by |
| 465 | * this partition is > 0.75. Presumably the same applies |
| 466 | * generally across the whole hash table (though we don't attempt |
| 467 | * to track that directly to avoid contention on some kind of |
| 468 | * central counter; we just assume that this partition is |
| 469 | * representative). This is a good time to resize. |
| 470 | * |
| 471 | * Give up our existing lock first, because resizing needs to |
| 472 | * reacquire all the locks in the right order to avoid deadlocks. |
| 473 | */ |
| 474 | LWLockRelease(PARTITION_LOCK(hash_table, partition_index)); |
| 475 | resize(hash_table, hash_table->size_log2 + 1); |
| 476 | |
| 477 | goto restart; |
| 478 | } |
| 479 | |
| 480 | /* Finally we can try to insert the new item. */ |
| 481 | item = insert_into_bucket(hash_table, key, |
| 482 | &BUCKET_FOR_HASH(hash_table, hash)); |
| 483 | item->hash = hash; |
| 484 | /* Adjust per-lock-partition counter for load factor knowledge. */ |
| 485 | ++partition->count; |
| 486 | } |
| 487 | |
| 488 | /* The caller must release the lock with dshash_release_lock. */ |
| 489 | hash_table->find_locked = true; |
| 490 | hash_table->find_exclusively_locked = true; |
| 491 | return ENTRY_FROM_ITEM(item); |
| 492 | } |
| 493 | |
| 494 | /* |
| 495 | * Remove an entry by key. Returns true if the key was found and the |
| 496 | * corresponding entry was removed. |
| 497 | * |
| 498 | * To delete an entry that you already have a pointer to, see |
| 499 | * dshash_delete_entry. |
| 500 | */ |
| 501 | bool |
| 502 | dshash_delete_key(dshash_table *hash_table, const void *key) |
| 503 | { |
| 504 | dshash_hash hash; |
| 505 | size_t partition; |
| 506 | bool found; |
| 507 | |
| 508 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
| 509 | Assert(!hash_table->find_locked); |
| 510 | |
| 511 | hash = hash_key(hash_table, key); |
| 512 | partition = PARTITION_FOR_HASH(hash); |
| 513 | |
| 514 | LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE); |
| 515 | ensure_valid_bucket_pointers(hash_table); |
| 516 | |
| 517 | if (delete_key_from_bucket(hash_table, key, |
| 518 | &BUCKET_FOR_HASH(hash_table, hash))) |
| 519 | { |
| 520 | Assert(hash_table->control->partitions[partition].count > 0); |
| 521 | found = true; |
| 522 | --hash_table->control->partitions[partition].count; |
| 523 | } |
| 524 | else |
| 525 | found = false; |
| 526 | |
| 527 | LWLockRelease(PARTITION_LOCK(hash_table, partition)); |
| 528 | |
| 529 | return found; |
| 530 | } |
| 531 | |
| 532 | /* |
| 533 | * Remove an entry. The entry must already be exclusively locked, and must |
| 534 | * have been obtained by dshash_find or dshash_find_or_insert. Note that this |
| 535 | * function releases the lock just like dshash_release_lock. |
| 536 | * |
| 537 | * To delete an entry by key, see dshash_delete_key. |
| 538 | */ |
| 539 | void |
| 540 | dshash_delete_entry(dshash_table *hash_table, void *entry) |
| 541 | { |
| 542 | dshash_table_item *item = ITEM_FROM_ENTRY(entry); |
| 543 | size_t partition = PARTITION_FOR_HASH(item->hash); |
| 544 | |
| 545 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
| 546 | Assert(hash_table->find_locked); |
| 547 | Assert(hash_table->find_exclusively_locked); |
| 548 | Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition), |
| 549 | LW_EXCLUSIVE)); |
| 550 | |
| 551 | delete_item(hash_table, item); |
| 552 | hash_table->find_locked = false; |
| 553 | hash_table->find_exclusively_locked = false; |
| 554 | LWLockRelease(PARTITION_LOCK(hash_table, partition)); |
| 555 | } |
| 556 | |
| 557 | /* |
| 558 | * Unlock an entry which was locked by dshash_find or dshash_find_or_insert. |
| 559 | */ |
| 560 | void |
| 561 | dshash_release_lock(dshash_table *hash_table, void *entry) |
| 562 | { |
| 563 | dshash_table_item *item = ITEM_FROM_ENTRY(entry); |
| 564 | size_t partition_index = PARTITION_FOR_HASH(item->hash); |
| 565 | |
| 566 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
| 567 | Assert(hash_table->find_locked); |
| 568 | Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition_index), |
| 569 | hash_table->find_exclusively_locked |
| 570 | ? LW_EXCLUSIVE : LW_SHARED)); |
| 571 | |
| 572 | hash_table->find_locked = false; |
| 573 | hash_table->find_exclusively_locked = false; |
| 574 | LWLockRelease(PARTITION_LOCK(hash_table, partition_index)); |
| 575 | } |
| 576 | |
| 577 | /* |
| 578 | * A compare function that forwards to memcmp. |
| 579 | */ |
| 580 | int |
| 581 | dshash_memcmp(const void *a, const void *b, size_t size, void *arg) |
| 582 | { |
| 583 | return memcmp(a, b, size); |
| 584 | } |
| 585 | |
| 586 | /* |
| 587 | * A hash function that forwards to tag_hash. |
| 588 | */ |
| 589 | dshash_hash |
| 590 | dshash_memhash(const void *v, size_t size, void *arg) |
| 591 | { |
| 592 | return tag_hash(v, size); |
| 593 | } |
| 594 | |
| 595 | /* |
| 596 | * Print debugging information about the internal state of the hash table to |
| 597 | * stderr. The caller must hold no partition locks. |
| 598 | */ |
| 599 | void |
| 600 | dshash_dump(dshash_table *hash_table) |
| 601 | { |
| 602 | size_t i; |
| 603 | size_t j; |
| 604 | |
| 605 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
| 606 | Assert(!hash_table->find_locked); |
| 607 | |
| 608 | for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) |
| 609 | { |
| 610 | Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i))); |
| 611 | LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_SHARED); |
| 612 | } |
| 613 | |
| 614 | ensure_valid_bucket_pointers(hash_table); |
| 615 | |
| 616 | fprintf(stderr, |
| 617 | "hash table size = %zu\n" , (size_t) 1 << hash_table->size_log2); |
| 618 | for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) |
| 619 | { |
| 620 | dshash_partition *partition = &hash_table->control->partitions[i]; |
| 621 | size_t begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2); |
| 622 | size_t end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2); |
| 623 | |
| 624 | fprintf(stderr, " partition %zu\n" , i); |
| 625 | fprintf(stderr, |
| 626 | " active buckets (key count = %zu)\n" , partition->count); |
| 627 | |
| 628 | for (j = begin; j < end; ++j) |
| 629 | { |
| 630 | size_t count = 0; |
| 631 | dsa_pointer bucket = hash_table->buckets[j]; |
| 632 | |
| 633 | while (DsaPointerIsValid(bucket)) |
| 634 | { |
| 635 | dshash_table_item *item; |
| 636 | |
| 637 | item = dsa_get_address(hash_table->area, bucket); |
| 638 | |
| 639 | bucket = item->next; |
| 640 | ++count; |
| 641 | } |
| 642 | fprintf(stderr, " bucket %zu (key count = %zu)\n" , j, count); |
| 643 | } |
| 644 | } |
| 645 | |
| 646 | for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) |
| 647 | LWLockRelease(PARTITION_LOCK(hash_table, i)); |
| 648 | } |
| 649 | |
| 650 | /* |
| 651 | * Delete a locked item to which we have a pointer. |
| 652 | */ |
| 653 | static void |
| 654 | delete_item(dshash_table *hash_table, dshash_table_item *item) |
| 655 | { |
| 656 | size_t hash = item->hash; |
| 657 | size_t partition = PARTITION_FOR_HASH(hash); |
| 658 | |
| 659 | Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition))); |
| 660 | |
| 661 | if (delete_item_from_bucket(hash_table, item, |
| 662 | &BUCKET_FOR_HASH(hash_table, hash))) |
| 663 | { |
| 664 | Assert(hash_table->control->partitions[partition].count > 0); |
| 665 | --hash_table->control->partitions[partition].count; |
| 666 | } |
| 667 | else |
| 668 | { |
| 669 | Assert(false); |
| 670 | } |
| 671 | } |
| 672 | |
| 673 | /* |
| 674 | * Grow the hash table if necessary to the requested number of buckets. The |
| 675 | * requested size must be double some previously observed size. |
| 676 | * |
| 677 | * Must be called without any partition lock held. |
| 678 | */ |
| 679 | static void |
| 680 | resize(dshash_table *hash_table, size_t new_size_log2) |
| 681 | { |
| 682 | dsa_pointer old_buckets; |
| 683 | dsa_pointer new_buckets_shared; |
| 684 | dsa_pointer *new_buckets; |
| 685 | size_t size; |
| 686 | size_t new_size = ((size_t) 1) << new_size_log2; |
| 687 | size_t i; |
| 688 | |
| 689 | /* |
| 690 | * Acquire the locks for all lock partitions. This is expensive, but we |
| 691 | * shouldn't have to do it many times. |
| 692 | */ |
| 693 | for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) |
| 694 | { |
| 695 | Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i))); |
| 696 | |
| 697 | LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_EXCLUSIVE); |
| 698 | if (i == 0 && hash_table->control->size_log2 >= new_size_log2) |
| 699 | { |
| 700 | /* |
| 701 | * Another backend has already increased the size; we can avoid |
| 702 | * obtaining all the locks and return early. |
| 703 | */ |
| 704 | LWLockRelease(PARTITION_LOCK(hash_table, 0)); |
| 705 | return; |
| 706 | } |
| 707 | } |
| 708 | |
| 709 | Assert(new_size_log2 == hash_table->control->size_log2 + 1); |
| 710 | |
| 711 | /* Allocate the space for the new table. */ |
| 712 | new_buckets_shared = dsa_allocate0(hash_table->area, |
| 713 | sizeof(dsa_pointer) * new_size); |
| 714 | new_buckets = dsa_get_address(hash_table->area, new_buckets_shared); |
| 715 | |
| 716 | /* |
| 717 | * We've allocated the new bucket array; all that remains to do now is to |
| 718 | * reinsert all items, which amounts to adjusting all the pointers. |
| 719 | */ |
| 720 | size = ((size_t) 1) << hash_table->control->size_log2; |
| 721 | for (i = 0; i < size; ++i) |
| 722 | { |
| 723 | dsa_pointer item_pointer = hash_table->buckets[i]; |
| 724 | |
| 725 | while (DsaPointerIsValid(item_pointer)) |
| 726 | { |
| 727 | dshash_table_item *item; |
| 728 | dsa_pointer next_item_pointer; |
| 729 | |
| 730 | item = dsa_get_address(hash_table->area, item_pointer); |
| 731 | next_item_pointer = item->next; |
| 732 | insert_item_into_bucket(hash_table, item_pointer, item, |
| 733 | &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash, |
| 734 | new_size_log2)]); |
| 735 | item_pointer = next_item_pointer; |
| 736 | } |
| 737 | } |
| 738 | |
| 739 | /* Swap the hash table into place and free the old one. */ |
| 740 | old_buckets = hash_table->control->buckets; |
| 741 | hash_table->control->buckets = new_buckets_shared; |
| 742 | hash_table->control->size_log2 = new_size_log2; |
| 743 | hash_table->buckets = new_buckets; |
| 744 | dsa_free(hash_table->area, old_buckets); |
| 745 | |
| 746 | /* Release all the locks. */ |
| 747 | for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) |
| 748 | LWLockRelease(PARTITION_LOCK(hash_table, i)); |
| 749 | } |
| 750 | |
| 751 | /* |
| 752 | * Make sure that our backend-local bucket pointers are up to date. The |
| 753 | * caller must have locked one lock partition, which prevents resize() from |
| 754 | * running concurrently. |
| 755 | */ |
| 756 | static inline void |
| 757 | ensure_valid_bucket_pointers(dshash_table *hash_table) |
| 758 | { |
| 759 | if (hash_table->size_log2 != hash_table->control->size_log2) |
| 760 | { |
| 761 | hash_table->buckets = dsa_get_address(hash_table->area, |
| 762 | hash_table->control->buckets); |
| 763 | hash_table->size_log2 = hash_table->control->size_log2; |
| 764 | } |
| 765 | } |
| 766 | |
| 767 | /* |
| 768 | * Scan a locked bucket for a match, using the provided compare function. |
| 769 | */ |
| 770 | static inline dshash_table_item * |
| 771 | find_in_bucket(dshash_table *hash_table, const void *key, |
| 772 | dsa_pointer item_pointer) |
| 773 | { |
| 774 | while (DsaPointerIsValid(item_pointer)) |
| 775 | { |
| 776 | dshash_table_item *item; |
| 777 | |
| 778 | item = dsa_get_address(hash_table->area, item_pointer); |
| 779 | if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item))) |
| 780 | return item; |
| 781 | item_pointer = item->next; |
| 782 | } |
| 783 | return NULL; |
| 784 | } |
| 785 | |
| 786 | /* |
| 787 | * Insert an already-allocated item into a bucket. |
| 788 | */ |
| 789 | static void |
| 790 | insert_item_into_bucket(dshash_table *hash_table, |
| 791 | dsa_pointer item_pointer, |
| 792 | dshash_table_item *item, |
| 793 | dsa_pointer *bucket) |
| 794 | { |
| 795 | Assert(item == dsa_get_address(hash_table->area, item_pointer)); |
| 796 | |
| 797 | item->next = *bucket; |
| 798 | *bucket = item_pointer; |
| 799 | } |
| 800 | |
| 801 | /* |
| 802 | * Allocate space for an entry with the given key and insert it into the |
| 803 | * provided bucket. |
| 804 | */ |
| 805 | static dshash_table_item * |
| 806 | insert_into_bucket(dshash_table *hash_table, |
| 807 | const void *key, |
| 808 | dsa_pointer *bucket) |
| 809 | { |
| 810 | dsa_pointer item_pointer; |
| 811 | dshash_table_item *item; |
| 812 | |
| 813 | item_pointer = dsa_allocate(hash_table->area, |
| 814 | hash_table->params.entry_size + |
| 815 | MAXALIGN(sizeof(dshash_table_item))); |
| 816 | item = dsa_get_address(hash_table->area, item_pointer); |
| 817 | memcpy(ENTRY_FROM_ITEM(item), key, hash_table->params.key_size); |
| 818 | insert_item_into_bucket(hash_table, item_pointer, item, bucket); |
| 819 | return item; |
| 820 | } |
| 821 | |
| 822 | /* |
| 823 | * Search a bucket for a matching key and delete it. |
| 824 | */ |
| 825 | static bool |
| 826 | delete_key_from_bucket(dshash_table *hash_table, |
| 827 | const void *key, |
| 828 | dsa_pointer *bucket_head) |
| 829 | { |
| 830 | while (DsaPointerIsValid(*bucket_head)) |
| 831 | { |
| 832 | dshash_table_item *item; |
| 833 | |
| 834 | item = dsa_get_address(hash_table->area, *bucket_head); |
| 835 | |
| 836 | if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item))) |
| 837 | { |
| 838 | dsa_pointer next; |
| 839 | |
| 840 | next = item->next; |
| 841 | dsa_free(hash_table->area, *bucket_head); |
| 842 | *bucket_head = next; |
| 843 | |
| 844 | return true; |
| 845 | } |
| 846 | bucket_head = &item->next; |
| 847 | } |
| 848 | return false; |
| 849 | } |
| 850 | |
| 851 | /* |
| 852 | * Delete the specified item from the bucket. |
| 853 | */ |
| 854 | static bool |
| 855 | delete_item_from_bucket(dshash_table *hash_table, |
| 856 | dshash_table_item *item, |
| 857 | dsa_pointer *bucket_head) |
| 858 | { |
| 859 | while (DsaPointerIsValid(*bucket_head)) |
| 860 | { |
| 861 | dshash_table_item *bucket_item; |
| 862 | |
| 863 | bucket_item = dsa_get_address(hash_table->area, *bucket_head); |
| 864 | |
| 865 | if (bucket_item == item) |
| 866 | { |
| 867 | dsa_pointer next; |
| 868 | |
| 869 | next = item->next; |
| 870 | dsa_free(hash_table->area, *bucket_head); |
| 871 | *bucket_head = next; |
| 872 | return true; |
| 873 | } |
| 874 | bucket_head = &bucket_item->next; |
| 875 | } |
| 876 | return false; |
| 877 | } |
| 878 | |
| 879 | /* |
| 880 | * Compute the hash value for a key. |
| 881 | */ |
| 882 | static inline dshash_hash |
| 883 | hash_key(dshash_table *hash_table, const void *key) |
| 884 | { |
| 885 | return hash_table->params.hash_function(key, |
| 886 | hash_table->params.key_size, |
| 887 | hash_table->arg); |
| 888 | } |
| 889 | |
| 890 | /* |
| 891 | * Check whether two keys compare equal. |
| 892 | */ |
| 893 | static inline bool |
| 894 | equal_keys(dshash_table *hash_table, const void *a, const void *b) |
| 895 | { |
| 896 | return hash_table->params.compare_function(a, b, |
| 897 | hash_table->params.key_size, |
| 898 | hash_table->arg) == 0; |
| 899 | } |
| 900 | |