1/*-------------------------------------------------------------------------
2 *
3 * dshash.c
4 * Concurrent hash tables backed by dynamic shared memory areas.
5 *
6 * This is an open hashing hash table, with a linked list at each table
7 * entry. It supports dynamic resizing, as required to prevent the linked
8 * lists from growing too long on average. Currently, only growing is
9 * supported: the hash table never becomes smaller.
10 *
11 * To deal with concurrency, it has a fixed size set of partitions, each of
12 * which is independently locked. Each bucket maps to a partition; so insert,
13 * find and iterate operations normally only acquire one lock. Therefore,
14 * good concurrency is achieved whenever such operations don't collide at the
15 * lock partition level. However, when a resize operation begins, all
16 * partition locks must be acquired simultaneously for a brief period. This
17 * is only expected to happen a small number of times until a stable size is
18 * found, since growth is geometric.
19 *
20 * Future versions may support iterators and incremental resizing; for now
21 * the implementation is minimalist.
22 *
23 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
25 *
26 * IDENTIFICATION
27 * src/backend/lib/dshash.c
28 *
29 *-------------------------------------------------------------------------
30 */
31
32#include "postgres.h"
33
34#include "lib/dshash.h"
35#include "storage/ipc.h"
36#include "storage/lwlock.h"
37#include "utils/dsa.h"
38#include "utils/hsearch.h"
39#include "utils/memutils.h"
40
41/*
42 * An item in the hash table. This wraps the user's entry object in an
43 * envelop that holds a pointer back to the bucket and a pointer to the next
44 * item in the bucket.
45 */
46struct dshash_table_item
47{
48 /* The next item in the same bucket. */
49 dsa_pointer next;
50 /* The hashed key, to avoid having to recompute it. */
51 dshash_hash hash;
52 /* The user's entry object follows here. See ENTRY_FROM_ITEM(item). */
53};
54
55/*
56 * The number of partitions for locking purposes. This is set to match
57 * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
58 * the buffer pool must be good enough for any other purpose. This could
59 * become a runtime parameter in future.
60 */
61#define DSHASH_NUM_PARTITIONS_LOG2 7
62#define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
63
64/* A magic value used to identify our hash tables. */
65#define DSHASH_MAGIC 0x75ff6a20
66
67/*
68 * Tracking information for each lock partition. Initially, each partition
69 * corresponds to one bucket, but each time the hash table grows, the buckets
70 * covered by each partition split so the number of buckets covered doubles.
71 *
72 * We might want to add padding here so that each partition is on a different
73 * cache line, but doing so would bloat this structure considerably.
74 */
75typedef struct dshash_partition
76{
77 LWLock lock; /* Protects all buckets in this partition. */
78 size_t count; /* # of items in this partition's buckets */
79} dshash_partition;
80
81/*
82 * The head object for a hash table. This will be stored in dynamic shared
83 * memory.
84 */
85typedef struct dshash_table_control
86{
87 dshash_table_handle handle;
88 uint32 magic;
89 dshash_partition partitions[DSHASH_NUM_PARTITIONS];
90 int lwlock_tranche_id;
91
92 /*
93 * The following members are written to only when ALL partitions locks are
94 * held. They can be read when any one partition lock is held.
95 */
96
97 /* Number of buckets expressed as power of 2 (8 = 256 buckets). */
98 size_t size_log2; /* log2(number of buckets) */
99 dsa_pointer buckets; /* current bucket array */
100} dshash_table_control;
101
102/*
103 * Per-backend state for a dynamic hash table.
104 */
105struct dshash_table
106{
107 dsa_area *area; /* Backing dynamic shared memory area. */
108 dshash_parameters params; /* Parameters. */
109 void *arg; /* User-supplied data pointer. */
110 dshash_table_control *control; /* Control object in DSM. */
111 dsa_pointer *buckets; /* Current bucket pointers in DSM. */
112 size_t size_log2; /* log2(number of buckets) */
113 bool find_locked; /* Is any partition lock held by 'find'? */
114 bool find_exclusively_locked; /* ... exclusively? */
115};
116
117/* Given a pointer to an item, find the entry (user data) it holds. */
118#define ENTRY_FROM_ITEM(item) \
119 ((char *)(item) + MAXALIGN(sizeof(dshash_table_item)))
120
121/* Given a pointer to an entry, find the item that holds it. */
122#define ITEM_FROM_ENTRY(entry) \
123 ((dshash_table_item *)((char *)(entry) - \
124 MAXALIGN(sizeof(dshash_table_item))))
125
126/* How many resize operations (bucket splits) have there been? */
127#define NUM_SPLITS(size_log2) \
128 (size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
129
130/* How many buckets are there in each partition at a given size? */
131#define BUCKETS_PER_PARTITION(size_log2) \
132 (((size_t) 1) << NUM_SPLITS(size_log2))
133
134/* Max entries before we need to grow. Half + quarter = 75% load factor. */
135#define MAX_COUNT_PER_PARTITION(hash_table) \
136 (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \
137 BUCKETS_PER_PARTITION(hash_table->size_log2) / 4)
138
139/* Choose partition based on the highest order bits of the hash. */
140#define PARTITION_FOR_HASH(hash) \
141 (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2))
142
143/*
144 * Find the bucket index for a given hash and table size. Each time the table
145 * doubles in size, the appropriate bucket for a given hash value doubles and
146 * possibly adds one, depending on the newly revealed bit, so that all buckets
147 * are split.
148 */
149#define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2) \
150 (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2)))
151
152/* The index of the first bucket in a given partition. */
153#define BUCKET_INDEX_FOR_PARTITION(partition, size_log2) \
154 ((partition) << NUM_SPLITS(size_log2))
155
156/* The head of the active bucket for a given hash value (lvalue). */
157#define BUCKET_FOR_HASH(hash_table, hash) \
158 (hash_table->buckets[ \
159 BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, \
160 hash_table->size_log2)])
161
162static void delete_item(dshash_table *hash_table,
163 dshash_table_item *item);
164static void resize(dshash_table *hash_table, size_t new_size);
165static inline void ensure_valid_bucket_pointers(dshash_table *hash_table);
166static inline dshash_table_item *find_in_bucket(dshash_table *hash_table,
167 const void *key,
168 dsa_pointer item_pointer);
169static void insert_item_into_bucket(dshash_table *hash_table,
170 dsa_pointer item_pointer,
171 dshash_table_item *item,
172 dsa_pointer *bucket);
173static dshash_table_item *insert_into_bucket(dshash_table *hash_table,
174 const void *key,
175 dsa_pointer *bucket);
176static bool delete_key_from_bucket(dshash_table *hash_table,
177 const void *key,
178 dsa_pointer *bucket_head);
179static bool delete_item_from_bucket(dshash_table *hash_table,
180 dshash_table_item *item,
181 dsa_pointer *bucket_head);
182static inline dshash_hash hash_key(dshash_table *hash_table, const void *key);
183static inline bool equal_keys(dshash_table *hash_table,
184 const void *a, const void *b);
185
186#define PARTITION_LOCK(hash_table, i) \
187 (&(hash_table)->control->partitions[(i)].lock)
188
189/*
190 * Create a new hash table backed by the given dynamic shared area, with the
191 * given parameters. The returned object is allocated in backend-local memory
192 * using the current MemoryContext. 'arg' will be passed through to the
193 * compare and hash functions.
194 */
195dshash_table *
196dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
197{
198 dshash_table *hash_table;
199 dsa_pointer control;
200
201 /* Allocate the backend-local object representing the hash table. */
202 hash_table = palloc(sizeof(dshash_table));
203
204 /* Allocate the control object in shared memory. */
205 control = dsa_allocate(area, sizeof(dshash_table_control));
206
207 /* Set up the local and shared hash table structs. */
208 hash_table->area = area;
209 hash_table->params = *params;
210 hash_table->arg = arg;
211 hash_table->control = dsa_get_address(area, control);
212 hash_table->control->handle = control;
213 hash_table->control->magic = DSHASH_MAGIC;
214 hash_table->control->lwlock_tranche_id = params->tranche_id;
215
216 /* Set up the array of lock partitions. */
217 {
218 dshash_partition *partitions = hash_table->control->partitions;
219 int tranche_id = hash_table->control->lwlock_tranche_id;
220 int i;
221
222 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
223 {
224 LWLockInitialize(&partitions[i].lock, tranche_id);
225 partitions[i].count = 0;
226 }
227 }
228
229 hash_table->find_locked = false;
230 hash_table->find_exclusively_locked = false;
231
232 /*
233 * Set up the initial array of buckets. Our initial size is the same as
234 * the number of partitions.
235 */
236 hash_table->control->size_log2 = DSHASH_NUM_PARTITIONS_LOG2;
237 hash_table->control->buckets =
238 dsa_allocate_extended(area,
239 sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS,
240 DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO);
241 if (!DsaPointerIsValid(hash_table->control->buckets))
242 {
243 dsa_free(area, control);
244 ereport(ERROR,
245 (errcode(ERRCODE_OUT_OF_MEMORY),
246 errmsg("out of memory"),
247 errdetail("Failed on DSA request of size %zu.",
248 sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS)));
249 }
250 hash_table->buckets = dsa_get_address(area,
251 hash_table->control->buckets);
252 hash_table->size_log2 = hash_table->control->size_log2;
253
254 return hash_table;
255}
256
257/*
258 * Attach to an existing hash table using a handle. The returned object is
259 * allocated in backend-local memory using the current MemoryContext. 'arg'
260 * will be passed through to the compare and hash functions.
261 */
262dshash_table *
263dshash_attach(dsa_area *area, const dshash_parameters *params,
264 dshash_table_handle handle, void *arg)
265{
266 dshash_table *hash_table;
267 dsa_pointer control;
268
269 /* Allocate the backend-local object representing the hash table. */
270 hash_table = palloc(sizeof(dshash_table));
271
272 /* Find the control object in shared memory. */
273 control = handle;
274
275 /* Set up the local hash table struct. */
276 hash_table->area = area;
277 hash_table->params = *params;
278 hash_table->arg = arg;
279 hash_table->control = dsa_get_address(area, control);
280 hash_table->find_locked = false;
281 hash_table->find_exclusively_locked = false;
282 Assert(hash_table->control->magic == DSHASH_MAGIC);
283
284 /*
285 * These will later be set to the correct values by
286 * ensure_valid_bucket_pointers(), at which time we'll be holding a
287 * partition lock for interlocking against concurrent resizing.
288 */
289 hash_table->buckets = NULL;
290 hash_table->size_log2 = 0;
291
292 return hash_table;
293}
294
295/*
296 * Detach from a hash table. This frees backend-local resources associated
297 * with the hash table, but the hash table will continue to exist until it is
298 * either explicitly destroyed (by a backend that is still attached to it), or
299 * the area that backs it is returned to the operating system.
300 */
301void
302dshash_detach(dshash_table *hash_table)
303{
304 Assert(!hash_table->find_locked);
305
306 /* The hash table may have been destroyed. Just free local memory. */
307 pfree(hash_table);
308}
309
310/*
311 * Destroy a hash table, returning all memory to the area. The caller must be
312 * certain that no other backend will attempt to access the hash table before
313 * calling this function. Other backend must explicitly call dshash_detach to
314 * free up backend-local memory associated with the hash table. The backend
315 * that calls dshash_destroy must not call dshash_detach.
316 */
317void
318dshash_destroy(dshash_table *hash_table)
319{
320 size_t size;
321 size_t i;
322
323 Assert(hash_table->control->magic == DSHASH_MAGIC);
324 ensure_valid_bucket_pointers(hash_table);
325
326 /* Free all the entries. */
327 size = ((size_t) 1) << hash_table->size_log2;
328 for (i = 0; i < size; ++i)
329 {
330 dsa_pointer item_pointer = hash_table->buckets[i];
331
332 while (DsaPointerIsValid(item_pointer))
333 {
334 dshash_table_item *item;
335 dsa_pointer next_item_pointer;
336
337 item = dsa_get_address(hash_table->area, item_pointer);
338 next_item_pointer = item->next;
339 dsa_free(hash_table->area, item_pointer);
340 item_pointer = next_item_pointer;
341 }
342 }
343
344 /*
345 * Vandalize the control block to help catch programming errors where
346 * other backends access the memory formerly occupied by this hash table.
347 */
348 hash_table->control->magic = 0;
349
350 /* Free the active table and control object. */
351 dsa_free(hash_table->area, hash_table->control->buckets);
352 dsa_free(hash_table->area, hash_table->control->handle);
353
354 pfree(hash_table);
355}
356
357/*
358 * Get a handle that can be used by other processes to attach to this hash
359 * table.
360 */
361dshash_table_handle
362dshash_get_hash_table_handle(dshash_table *hash_table)
363{
364 Assert(hash_table->control->magic == DSHASH_MAGIC);
365
366 return hash_table->control->handle;
367}
368
369/*
370 * Look up an entry, given a key. Returns a pointer to an entry if one can be
371 * found with the given key. Returns NULL if the key is not found. If a
372 * non-NULL value is returned, the entry is locked and must be released by
373 * calling dshash_release_lock. If an error is raised before
374 * dshash_release_lock is called, the lock will be released automatically, but
375 * the caller must take care to ensure that the entry is not left corrupted.
376 * The lock mode is either shared or exclusive depending on 'exclusive'.
377 *
378 * The caller must not lock a lock already.
379 *
380 * Note that the lock held is in fact an LWLock, so interrupts will be held on
381 * return from this function, and not resumed until dshash_release_lock is
382 * called. It is a very good idea for the caller to release the lock quickly.
383 */
384void *
385dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
386{
387 dshash_hash hash;
388 size_t partition;
389 dshash_table_item *item;
390
391 hash = hash_key(hash_table, key);
392 partition = PARTITION_FOR_HASH(hash);
393
394 Assert(hash_table->control->magic == DSHASH_MAGIC);
395 Assert(!hash_table->find_locked);
396
397 LWLockAcquire(PARTITION_LOCK(hash_table, partition),
398 exclusive ? LW_EXCLUSIVE : LW_SHARED);
399 ensure_valid_bucket_pointers(hash_table);
400
401 /* Search the active bucket. */
402 item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
403
404 if (!item)
405 {
406 /* Not found. */
407 LWLockRelease(PARTITION_LOCK(hash_table, partition));
408 return NULL;
409 }
410 else
411 {
412 /* The caller will free the lock by calling dshash_release. */
413 hash_table->find_locked = true;
414 hash_table->find_exclusively_locked = exclusive;
415 return ENTRY_FROM_ITEM(item);
416 }
417}
418
419/*
420 * Returns a pointer to an exclusively locked item which must be released with
421 * dshash_release_lock. If the key is found in the hash table, 'found' is set
422 * to true and a pointer to the existing entry is returned. If the key is not
423 * found, 'found' is set to false, and a pointer to a newly created entry is
424 * returned.
425 *
426 * Notes above dshash_find() regarding locking and error handling equally
427 * apply here.
428 */
429void *
430dshash_find_or_insert(dshash_table *hash_table,
431 const void *key,
432 bool *found)
433{
434 dshash_hash hash;
435 size_t partition_index;
436 dshash_partition *partition;
437 dshash_table_item *item;
438
439 hash = hash_key(hash_table, key);
440 partition_index = PARTITION_FOR_HASH(hash);
441 partition = &hash_table->control->partitions[partition_index];
442
443 Assert(hash_table->control->magic == DSHASH_MAGIC);
444 Assert(!hash_table->find_locked);
445
446restart:
447 LWLockAcquire(PARTITION_LOCK(hash_table, partition_index),
448 LW_EXCLUSIVE);
449 ensure_valid_bucket_pointers(hash_table);
450
451 /* Search the active bucket. */
452 item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
453
454 if (item)
455 *found = true;
456 else
457 {
458 *found = false;
459
460 /* Check if we are getting too full. */
461 if (partition->count > MAX_COUNT_PER_PARTITION(hash_table))
462 {
463 /*
464 * The load factor (= keys / buckets) for all buckets protected by
465 * this partition is > 0.75. Presumably the same applies
466 * generally across the whole hash table (though we don't attempt
467 * to track that directly to avoid contention on some kind of
468 * central counter; we just assume that this partition is
469 * representative). This is a good time to resize.
470 *
471 * Give up our existing lock first, because resizing needs to
472 * reacquire all the locks in the right order to avoid deadlocks.
473 */
474 LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
475 resize(hash_table, hash_table->size_log2 + 1);
476
477 goto restart;
478 }
479
480 /* Finally we can try to insert the new item. */
481 item = insert_into_bucket(hash_table, key,
482 &BUCKET_FOR_HASH(hash_table, hash));
483 item->hash = hash;
484 /* Adjust per-lock-partition counter for load factor knowledge. */
485 ++partition->count;
486 }
487
488 /* The caller must release the lock with dshash_release_lock. */
489 hash_table->find_locked = true;
490 hash_table->find_exclusively_locked = true;
491 return ENTRY_FROM_ITEM(item);
492}
493
494/*
495 * Remove an entry by key. Returns true if the key was found and the
496 * corresponding entry was removed.
497 *
498 * To delete an entry that you already have a pointer to, see
499 * dshash_delete_entry.
500 */
501bool
502dshash_delete_key(dshash_table *hash_table, const void *key)
503{
504 dshash_hash hash;
505 size_t partition;
506 bool found;
507
508 Assert(hash_table->control->magic == DSHASH_MAGIC);
509 Assert(!hash_table->find_locked);
510
511 hash = hash_key(hash_table, key);
512 partition = PARTITION_FOR_HASH(hash);
513
514 LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE);
515 ensure_valid_bucket_pointers(hash_table);
516
517 if (delete_key_from_bucket(hash_table, key,
518 &BUCKET_FOR_HASH(hash_table, hash)))
519 {
520 Assert(hash_table->control->partitions[partition].count > 0);
521 found = true;
522 --hash_table->control->partitions[partition].count;
523 }
524 else
525 found = false;
526
527 LWLockRelease(PARTITION_LOCK(hash_table, partition));
528
529 return found;
530}
531
532/*
533 * Remove an entry. The entry must already be exclusively locked, and must
534 * have been obtained by dshash_find or dshash_find_or_insert. Note that this
535 * function releases the lock just like dshash_release_lock.
536 *
537 * To delete an entry by key, see dshash_delete_key.
538 */
539void
540dshash_delete_entry(dshash_table *hash_table, void *entry)
541{
542 dshash_table_item *item = ITEM_FROM_ENTRY(entry);
543 size_t partition = PARTITION_FOR_HASH(item->hash);
544
545 Assert(hash_table->control->magic == DSHASH_MAGIC);
546 Assert(hash_table->find_locked);
547 Assert(hash_table->find_exclusively_locked);
548 Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
549 LW_EXCLUSIVE));
550
551 delete_item(hash_table, item);
552 hash_table->find_locked = false;
553 hash_table->find_exclusively_locked = false;
554 LWLockRelease(PARTITION_LOCK(hash_table, partition));
555}
556
557/*
558 * Unlock an entry which was locked by dshash_find or dshash_find_or_insert.
559 */
560void
561dshash_release_lock(dshash_table *hash_table, void *entry)
562{
563 dshash_table_item *item = ITEM_FROM_ENTRY(entry);
564 size_t partition_index = PARTITION_FOR_HASH(item->hash);
565
566 Assert(hash_table->control->magic == DSHASH_MAGIC);
567 Assert(hash_table->find_locked);
568 Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition_index),
569 hash_table->find_exclusively_locked
570 ? LW_EXCLUSIVE : LW_SHARED));
571
572 hash_table->find_locked = false;
573 hash_table->find_exclusively_locked = false;
574 LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
575}
576
577/*
578 * A compare function that forwards to memcmp.
579 */
580int
581dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
582{
583 return memcmp(a, b, size);
584}
585
586/*
587 * A hash function that forwards to tag_hash.
588 */
589dshash_hash
590dshash_memhash(const void *v, size_t size, void *arg)
591{
592 return tag_hash(v, size);
593}
594
595/*
596 * Print debugging information about the internal state of the hash table to
597 * stderr. The caller must hold no partition locks.
598 */
599void
600dshash_dump(dshash_table *hash_table)
601{
602 size_t i;
603 size_t j;
604
605 Assert(hash_table->control->magic == DSHASH_MAGIC);
606 Assert(!hash_table->find_locked);
607
608 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
609 {
610 Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
611 LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_SHARED);
612 }
613
614 ensure_valid_bucket_pointers(hash_table);
615
616 fprintf(stderr,
617 "hash table size = %zu\n", (size_t) 1 << hash_table->size_log2);
618 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
619 {
620 dshash_partition *partition = &hash_table->control->partitions[i];
621 size_t begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2);
622 size_t end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2);
623
624 fprintf(stderr, " partition %zu\n", i);
625 fprintf(stderr,
626 " active buckets (key count = %zu)\n", partition->count);
627
628 for (j = begin; j < end; ++j)
629 {
630 size_t count = 0;
631 dsa_pointer bucket = hash_table->buckets[j];
632
633 while (DsaPointerIsValid(bucket))
634 {
635 dshash_table_item *item;
636
637 item = dsa_get_address(hash_table->area, bucket);
638
639 bucket = item->next;
640 ++count;
641 }
642 fprintf(stderr, " bucket %zu (key count = %zu)\n", j, count);
643 }
644 }
645
646 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
647 LWLockRelease(PARTITION_LOCK(hash_table, i));
648}
649
650/*
651 * Delete a locked item to which we have a pointer.
652 */
653static void
654delete_item(dshash_table *hash_table, dshash_table_item *item)
655{
656 size_t hash = item->hash;
657 size_t partition = PARTITION_FOR_HASH(hash);
658
659 Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition)));
660
661 if (delete_item_from_bucket(hash_table, item,
662 &BUCKET_FOR_HASH(hash_table, hash)))
663 {
664 Assert(hash_table->control->partitions[partition].count > 0);
665 --hash_table->control->partitions[partition].count;
666 }
667 else
668 {
669 Assert(false);
670 }
671}
672
673/*
674 * Grow the hash table if necessary to the requested number of buckets. The
675 * requested size must be double some previously observed size.
676 *
677 * Must be called without any partition lock held.
678 */
679static void
680resize(dshash_table *hash_table, size_t new_size_log2)
681{
682 dsa_pointer old_buckets;
683 dsa_pointer new_buckets_shared;
684 dsa_pointer *new_buckets;
685 size_t size;
686 size_t new_size = ((size_t) 1) << new_size_log2;
687 size_t i;
688
689 /*
690 * Acquire the locks for all lock partitions. This is expensive, but we
691 * shouldn't have to do it many times.
692 */
693 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
694 {
695 Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
696
697 LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_EXCLUSIVE);
698 if (i == 0 && hash_table->control->size_log2 >= new_size_log2)
699 {
700 /*
701 * Another backend has already increased the size; we can avoid
702 * obtaining all the locks and return early.
703 */
704 LWLockRelease(PARTITION_LOCK(hash_table, 0));
705 return;
706 }
707 }
708
709 Assert(new_size_log2 == hash_table->control->size_log2 + 1);
710
711 /* Allocate the space for the new table. */
712 new_buckets_shared = dsa_allocate0(hash_table->area,
713 sizeof(dsa_pointer) * new_size);
714 new_buckets = dsa_get_address(hash_table->area, new_buckets_shared);
715
716 /*
717 * We've allocated the new bucket array; all that remains to do now is to
718 * reinsert all items, which amounts to adjusting all the pointers.
719 */
720 size = ((size_t) 1) << hash_table->control->size_log2;
721 for (i = 0; i < size; ++i)
722 {
723 dsa_pointer item_pointer = hash_table->buckets[i];
724
725 while (DsaPointerIsValid(item_pointer))
726 {
727 dshash_table_item *item;
728 dsa_pointer next_item_pointer;
729
730 item = dsa_get_address(hash_table->area, item_pointer);
731 next_item_pointer = item->next;
732 insert_item_into_bucket(hash_table, item_pointer, item,
733 &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash,
734 new_size_log2)]);
735 item_pointer = next_item_pointer;
736 }
737 }
738
739 /* Swap the hash table into place and free the old one. */
740 old_buckets = hash_table->control->buckets;
741 hash_table->control->buckets = new_buckets_shared;
742 hash_table->control->size_log2 = new_size_log2;
743 hash_table->buckets = new_buckets;
744 dsa_free(hash_table->area, old_buckets);
745
746 /* Release all the locks. */
747 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
748 LWLockRelease(PARTITION_LOCK(hash_table, i));
749}
750
751/*
752 * Make sure that our backend-local bucket pointers are up to date. The
753 * caller must have locked one lock partition, which prevents resize() from
754 * running concurrently.
755 */
756static inline void
757ensure_valid_bucket_pointers(dshash_table *hash_table)
758{
759 if (hash_table->size_log2 != hash_table->control->size_log2)
760 {
761 hash_table->buckets = dsa_get_address(hash_table->area,
762 hash_table->control->buckets);
763 hash_table->size_log2 = hash_table->control->size_log2;
764 }
765}
766
767/*
768 * Scan a locked bucket for a match, using the provided compare function.
769 */
770static inline dshash_table_item *
771find_in_bucket(dshash_table *hash_table, const void *key,
772 dsa_pointer item_pointer)
773{
774 while (DsaPointerIsValid(item_pointer))
775 {
776 dshash_table_item *item;
777
778 item = dsa_get_address(hash_table->area, item_pointer);
779 if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
780 return item;
781 item_pointer = item->next;
782 }
783 return NULL;
784}
785
786/*
787 * Insert an already-allocated item into a bucket.
788 */
789static void
790insert_item_into_bucket(dshash_table *hash_table,
791 dsa_pointer item_pointer,
792 dshash_table_item *item,
793 dsa_pointer *bucket)
794{
795 Assert(item == dsa_get_address(hash_table->area, item_pointer));
796
797 item->next = *bucket;
798 *bucket = item_pointer;
799}
800
801/*
802 * Allocate space for an entry with the given key and insert it into the
803 * provided bucket.
804 */
805static dshash_table_item *
806insert_into_bucket(dshash_table *hash_table,
807 const void *key,
808 dsa_pointer *bucket)
809{
810 dsa_pointer item_pointer;
811 dshash_table_item *item;
812
813 item_pointer = dsa_allocate(hash_table->area,
814 hash_table->params.entry_size +
815 MAXALIGN(sizeof(dshash_table_item)));
816 item = dsa_get_address(hash_table->area, item_pointer);
817 memcpy(ENTRY_FROM_ITEM(item), key, hash_table->params.key_size);
818 insert_item_into_bucket(hash_table, item_pointer, item, bucket);
819 return item;
820}
821
822/*
823 * Search a bucket for a matching key and delete it.
824 */
825static bool
826delete_key_from_bucket(dshash_table *hash_table,
827 const void *key,
828 dsa_pointer *bucket_head)
829{
830 while (DsaPointerIsValid(*bucket_head))
831 {
832 dshash_table_item *item;
833
834 item = dsa_get_address(hash_table->area, *bucket_head);
835
836 if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
837 {
838 dsa_pointer next;
839
840 next = item->next;
841 dsa_free(hash_table->area, *bucket_head);
842 *bucket_head = next;
843
844 return true;
845 }
846 bucket_head = &item->next;
847 }
848 return false;
849}
850
851/*
852 * Delete the specified item from the bucket.
853 */
854static bool
855delete_item_from_bucket(dshash_table *hash_table,
856 dshash_table_item *item,
857 dsa_pointer *bucket_head)
858{
859 while (DsaPointerIsValid(*bucket_head))
860 {
861 dshash_table_item *bucket_item;
862
863 bucket_item = dsa_get_address(hash_table->area, *bucket_head);
864
865 if (bucket_item == item)
866 {
867 dsa_pointer next;
868
869 next = item->next;
870 dsa_free(hash_table->area, *bucket_head);
871 *bucket_head = next;
872 return true;
873 }
874 bucket_head = &bucket_item->next;
875 }
876 return false;
877}
878
879/*
880 * Compute the hash value for a key.
881 */
882static inline dshash_hash
883hash_key(dshash_table *hash_table, const void *key)
884{
885 return hash_table->params.hash_function(key,
886 hash_table->params.key_size,
887 hash_table->arg);
888}
889
890/*
891 * Check whether two keys compare equal.
892 */
893static inline bool
894equal_keys(dshash_table *hash_table, const void *a, const void *b)
895{
896 return hash_table->params.compare_function(a, b,
897 hash_table->params.key_size,
898 hash_table->arg) == 0;
899}
900