1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * dshash.c |
4 | * Concurrent hash tables backed by dynamic shared memory areas. |
5 | * |
6 | * This is an open hashing hash table, with a linked list at each table |
7 | * entry. It supports dynamic resizing, as required to prevent the linked |
8 | * lists from growing too long on average. Currently, only growing is |
9 | * supported: the hash table never becomes smaller. |
10 | * |
11 | * To deal with concurrency, it has a fixed size set of partitions, each of |
12 | * which is independently locked. Each bucket maps to a partition; so insert, |
13 | * find and iterate operations normally only acquire one lock. Therefore, |
14 | * good concurrency is achieved whenever such operations don't collide at the |
15 | * lock partition level. However, when a resize operation begins, all |
16 | * partition locks must be acquired simultaneously for a brief period. This |
17 | * is only expected to happen a small number of times until a stable size is |
18 | * found, since growth is geometric. |
19 | * |
20 | * Future versions may support iterators and incremental resizing; for now |
21 | * the implementation is minimalist. |
22 | * |
23 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
24 | * Portions Copyright (c) 1994, Regents of the University of California |
25 | * |
26 | * IDENTIFICATION |
27 | * src/backend/lib/dshash.c |
28 | * |
29 | *------------------------------------------------------------------------- |
30 | */ |
31 | |
32 | #include "postgres.h" |
33 | |
34 | #include "lib/dshash.h" |
35 | #include "storage/ipc.h" |
36 | #include "storage/lwlock.h" |
37 | #include "utils/dsa.h" |
38 | #include "utils/hsearch.h" |
39 | #include "utils/memutils.h" |
40 | |
41 | /* |
42 | * An item in the hash table. This wraps the user's entry object in an |
43 | * envelop that holds a pointer back to the bucket and a pointer to the next |
44 | * item in the bucket. |
45 | */ |
46 | struct dshash_table_item |
47 | { |
48 | /* The next item in the same bucket. */ |
49 | dsa_pointer next; |
50 | /* The hashed key, to avoid having to recompute it. */ |
51 | dshash_hash hash; |
52 | /* The user's entry object follows here. See ENTRY_FROM_ITEM(item). */ |
53 | }; |
54 | |
55 | /* |
56 | * The number of partitions for locking purposes. This is set to match |
57 | * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for |
58 | * the buffer pool must be good enough for any other purpose. This could |
59 | * become a runtime parameter in future. |
60 | */ |
61 | #define DSHASH_NUM_PARTITIONS_LOG2 7 |
62 | #define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2) |
63 | |
64 | /* A magic value used to identify our hash tables. */ |
65 | #define DSHASH_MAGIC 0x75ff6a20 |
66 | |
67 | /* |
68 | * Tracking information for each lock partition. Initially, each partition |
69 | * corresponds to one bucket, but each time the hash table grows, the buckets |
70 | * covered by each partition split so the number of buckets covered doubles. |
71 | * |
72 | * We might want to add padding here so that each partition is on a different |
73 | * cache line, but doing so would bloat this structure considerably. |
74 | */ |
75 | typedef struct dshash_partition |
76 | { |
77 | LWLock lock; /* Protects all buckets in this partition. */ |
78 | size_t count; /* # of items in this partition's buckets */ |
79 | } dshash_partition; |
80 | |
81 | /* |
82 | * The head object for a hash table. This will be stored in dynamic shared |
83 | * memory. |
84 | */ |
85 | typedef struct dshash_table_control |
86 | { |
87 | dshash_table_handle handle; |
88 | uint32 magic; |
89 | dshash_partition partitions[DSHASH_NUM_PARTITIONS]; |
90 | int lwlock_tranche_id; |
91 | |
92 | /* |
93 | * The following members are written to only when ALL partitions locks are |
94 | * held. They can be read when any one partition lock is held. |
95 | */ |
96 | |
97 | /* Number of buckets expressed as power of 2 (8 = 256 buckets). */ |
98 | size_t size_log2; /* log2(number of buckets) */ |
99 | dsa_pointer buckets; /* current bucket array */ |
100 | } dshash_table_control; |
101 | |
102 | /* |
103 | * Per-backend state for a dynamic hash table. |
104 | */ |
105 | struct dshash_table |
106 | { |
107 | dsa_area *area; /* Backing dynamic shared memory area. */ |
108 | dshash_parameters params; /* Parameters. */ |
109 | void *arg; /* User-supplied data pointer. */ |
110 | dshash_table_control *control; /* Control object in DSM. */ |
111 | dsa_pointer *buckets; /* Current bucket pointers in DSM. */ |
112 | size_t size_log2; /* log2(number of buckets) */ |
113 | bool find_locked; /* Is any partition lock held by 'find'? */ |
114 | bool find_exclusively_locked; /* ... exclusively? */ |
115 | }; |
116 | |
117 | /* Given a pointer to an item, find the entry (user data) it holds. */ |
118 | #define ENTRY_FROM_ITEM(item) \ |
119 | ((char *)(item) + MAXALIGN(sizeof(dshash_table_item))) |
120 | |
121 | /* Given a pointer to an entry, find the item that holds it. */ |
122 | #define ITEM_FROM_ENTRY(entry) \ |
123 | ((dshash_table_item *)((char *)(entry) - \ |
124 | MAXALIGN(sizeof(dshash_table_item)))) |
125 | |
126 | /* How many resize operations (bucket splits) have there been? */ |
127 | #define NUM_SPLITS(size_log2) \ |
128 | (size_log2 - DSHASH_NUM_PARTITIONS_LOG2) |
129 | |
130 | /* How many buckets are there in each partition at a given size? */ |
131 | #define BUCKETS_PER_PARTITION(size_log2) \ |
132 | (((size_t) 1) << NUM_SPLITS(size_log2)) |
133 | |
134 | /* Max entries before we need to grow. Half + quarter = 75% load factor. */ |
135 | #define MAX_COUNT_PER_PARTITION(hash_table) \ |
136 | (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \ |
137 | BUCKETS_PER_PARTITION(hash_table->size_log2) / 4) |
138 | |
139 | /* Choose partition based on the highest order bits of the hash. */ |
140 | #define PARTITION_FOR_HASH(hash) \ |
141 | (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2)) |
142 | |
143 | /* |
144 | * Find the bucket index for a given hash and table size. Each time the table |
145 | * doubles in size, the appropriate bucket for a given hash value doubles and |
146 | * possibly adds one, depending on the newly revealed bit, so that all buckets |
147 | * are split. |
148 | */ |
149 | #define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2) \ |
150 | (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2))) |
151 | |
152 | /* The index of the first bucket in a given partition. */ |
153 | #define BUCKET_INDEX_FOR_PARTITION(partition, size_log2) \ |
154 | ((partition) << NUM_SPLITS(size_log2)) |
155 | |
156 | /* The head of the active bucket for a given hash value (lvalue). */ |
157 | #define BUCKET_FOR_HASH(hash_table, hash) \ |
158 | (hash_table->buckets[ \ |
159 | BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, \ |
160 | hash_table->size_log2)]) |
161 | |
162 | static void delete_item(dshash_table *hash_table, |
163 | dshash_table_item *item); |
164 | static void resize(dshash_table *hash_table, size_t new_size); |
165 | static inline void ensure_valid_bucket_pointers(dshash_table *hash_table); |
166 | static inline dshash_table_item *find_in_bucket(dshash_table *hash_table, |
167 | const void *key, |
168 | dsa_pointer item_pointer); |
169 | static void insert_item_into_bucket(dshash_table *hash_table, |
170 | dsa_pointer item_pointer, |
171 | dshash_table_item *item, |
172 | dsa_pointer *bucket); |
173 | static dshash_table_item *insert_into_bucket(dshash_table *hash_table, |
174 | const void *key, |
175 | dsa_pointer *bucket); |
176 | static bool delete_key_from_bucket(dshash_table *hash_table, |
177 | const void *key, |
178 | dsa_pointer *bucket_head); |
179 | static bool delete_item_from_bucket(dshash_table *hash_table, |
180 | dshash_table_item *item, |
181 | dsa_pointer *bucket_head); |
182 | static inline dshash_hash hash_key(dshash_table *hash_table, const void *key); |
183 | static inline bool equal_keys(dshash_table *hash_table, |
184 | const void *a, const void *b); |
185 | |
186 | #define PARTITION_LOCK(hash_table, i) \ |
187 | (&(hash_table)->control->partitions[(i)].lock) |
188 | |
189 | /* |
190 | * Create a new hash table backed by the given dynamic shared area, with the |
191 | * given parameters. The returned object is allocated in backend-local memory |
192 | * using the current MemoryContext. 'arg' will be passed through to the |
193 | * compare and hash functions. |
194 | */ |
195 | dshash_table * |
196 | dshash_create(dsa_area *area, const dshash_parameters *params, void *arg) |
197 | { |
198 | dshash_table *hash_table; |
199 | dsa_pointer control; |
200 | |
201 | /* Allocate the backend-local object representing the hash table. */ |
202 | hash_table = palloc(sizeof(dshash_table)); |
203 | |
204 | /* Allocate the control object in shared memory. */ |
205 | control = dsa_allocate(area, sizeof(dshash_table_control)); |
206 | |
207 | /* Set up the local and shared hash table structs. */ |
208 | hash_table->area = area; |
209 | hash_table->params = *params; |
210 | hash_table->arg = arg; |
211 | hash_table->control = dsa_get_address(area, control); |
212 | hash_table->control->handle = control; |
213 | hash_table->control->magic = DSHASH_MAGIC; |
214 | hash_table->control->lwlock_tranche_id = params->tranche_id; |
215 | |
216 | /* Set up the array of lock partitions. */ |
217 | { |
218 | dshash_partition *partitions = hash_table->control->partitions; |
219 | int tranche_id = hash_table->control->lwlock_tranche_id; |
220 | int i; |
221 | |
222 | for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) |
223 | { |
224 | LWLockInitialize(&partitions[i].lock, tranche_id); |
225 | partitions[i].count = 0; |
226 | } |
227 | } |
228 | |
229 | hash_table->find_locked = false; |
230 | hash_table->find_exclusively_locked = false; |
231 | |
232 | /* |
233 | * Set up the initial array of buckets. Our initial size is the same as |
234 | * the number of partitions. |
235 | */ |
236 | hash_table->control->size_log2 = DSHASH_NUM_PARTITIONS_LOG2; |
237 | hash_table->control->buckets = |
238 | dsa_allocate_extended(area, |
239 | sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS, |
240 | DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); |
241 | if (!DsaPointerIsValid(hash_table->control->buckets)) |
242 | { |
243 | dsa_free(area, control); |
244 | ereport(ERROR, |
245 | (errcode(ERRCODE_OUT_OF_MEMORY), |
246 | errmsg("out of memory" ), |
247 | errdetail("Failed on DSA request of size %zu." , |
248 | sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS))); |
249 | } |
250 | hash_table->buckets = dsa_get_address(area, |
251 | hash_table->control->buckets); |
252 | hash_table->size_log2 = hash_table->control->size_log2; |
253 | |
254 | return hash_table; |
255 | } |
256 | |
257 | /* |
258 | * Attach to an existing hash table using a handle. The returned object is |
259 | * allocated in backend-local memory using the current MemoryContext. 'arg' |
260 | * will be passed through to the compare and hash functions. |
261 | */ |
262 | dshash_table * |
263 | dshash_attach(dsa_area *area, const dshash_parameters *params, |
264 | dshash_table_handle handle, void *arg) |
265 | { |
266 | dshash_table *hash_table; |
267 | dsa_pointer control; |
268 | |
269 | /* Allocate the backend-local object representing the hash table. */ |
270 | hash_table = palloc(sizeof(dshash_table)); |
271 | |
272 | /* Find the control object in shared memory. */ |
273 | control = handle; |
274 | |
275 | /* Set up the local hash table struct. */ |
276 | hash_table->area = area; |
277 | hash_table->params = *params; |
278 | hash_table->arg = arg; |
279 | hash_table->control = dsa_get_address(area, control); |
280 | hash_table->find_locked = false; |
281 | hash_table->find_exclusively_locked = false; |
282 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
283 | |
284 | /* |
285 | * These will later be set to the correct values by |
286 | * ensure_valid_bucket_pointers(), at which time we'll be holding a |
287 | * partition lock for interlocking against concurrent resizing. |
288 | */ |
289 | hash_table->buckets = NULL; |
290 | hash_table->size_log2 = 0; |
291 | |
292 | return hash_table; |
293 | } |
294 | |
295 | /* |
296 | * Detach from a hash table. This frees backend-local resources associated |
297 | * with the hash table, but the hash table will continue to exist until it is |
298 | * either explicitly destroyed (by a backend that is still attached to it), or |
299 | * the area that backs it is returned to the operating system. |
300 | */ |
301 | void |
302 | dshash_detach(dshash_table *hash_table) |
303 | { |
304 | Assert(!hash_table->find_locked); |
305 | |
306 | /* The hash table may have been destroyed. Just free local memory. */ |
307 | pfree(hash_table); |
308 | } |
309 | |
310 | /* |
311 | * Destroy a hash table, returning all memory to the area. The caller must be |
312 | * certain that no other backend will attempt to access the hash table before |
313 | * calling this function. Other backend must explicitly call dshash_detach to |
314 | * free up backend-local memory associated with the hash table. The backend |
315 | * that calls dshash_destroy must not call dshash_detach. |
316 | */ |
317 | void |
318 | dshash_destroy(dshash_table *hash_table) |
319 | { |
320 | size_t size; |
321 | size_t i; |
322 | |
323 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
324 | ensure_valid_bucket_pointers(hash_table); |
325 | |
326 | /* Free all the entries. */ |
327 | size = ((size_t) 1) << hash_table->size_log2; |
328 | for (i = 0; i < size; ++i) |
329 | { |
330 | dsa_pointer item_pointer = hash_table->buckets[i]; |
331 | |
332 | while (DsaPointerIsValid(item_pointer)) |
333 | { |
334 | dshash_table_item *item; |
335 | dsa_pointer next_item_pointer; |
336 | |
337 | item = dsa_get_address(hash_table->area, item_pointer); |
338 | next_item_pointer = item->next; |
339 | dsa_free(hash_table->area, item_pointer); |
340 | item_pointer = next_item_pointer; |
341 | } |
342 | } |
343 | |
344 | /* |
345 | * Vandalize the control block to help catch programming errors where |
346 | * other backends access the memory formerly occupied by this hash table. |
347 | */ |
348 | hash_table->control->magic = 0; |
349 | |
350 | /* Free the active table and control object. */ |
351 | dsa_free(hash_table->area, hash_table->control->buckets); |
352 | dsa_free(hash_table->area, hash_table->control->handle); |
353 | |
354 | pfree(hash_table); |
355 | } |
356 | |
357 | /* |
358 | * Get a handle that can be used by other processes to attach to this hash |
359 | * table. |
360 | */ |
361 | dshash_table_handle |
362 | dshash_get_hash_table_handle(dshash_table *hash_table) |
363 | { |
364 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
365 | |
366 | return hash_table->control->handle; |
367 | } |
368 | |
369 | /* |
370 | * Look up an entry, given a key. Returns a pointer to an entry if one can be |
371 | * found with the given key. Returns NULL if the key is not found. If a |
372 | * non-NULL value is returned, the entry is locked and must be released by |
373 | * calling dshash_release_lock. If an error is raised before |
374 | * dshash_release_lock is called, the lock will be released automatically, but |
375 | * the caller must take care to ensure that the entry is not left corrupted. |
376 | * The lock mode is either shared or exclusive depending on 'exclusive'. |
377 | * |
378 | * The caller must not lock a lock already. |
379 | * |
380 | * Note that the lock held is in fact an LWLock, so interrupts will be held on |
381 | * return from this function, and not resumed until dshash_release_lock is |
382 | * called. It is a very good idea for the caller to release the lock quickly. |
383 | */ |
384 | void * |
385 | dshash_find(dshash_table *hash_table, const void *key, bool exclusive) |
386 | { |
387 | dshash_hash hash; |
388 | size_t partition; |
389 | dshash_table_item *item; |
390 | |
391 | hash = hash_key(hash_table, key); |
392 | partition = PARTITION_FOR_HASH(hash); |
393 | |
394 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
395 | Assert(!hash_table->find_locked); |
396 | |
397 | LWLockAcquire(PARTITION_LOCK(hash_table, partition), |
398 | exclusive ? LW_EXCLUSIVE : LW_SHARED); |
399 | ensure_valid_bucket_pointers(hash_table); |
400 | |
401 | /* Search the active bucket. */ |
402 | item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash)); |
403 | |
404 | if (!item) |
405 | { |
406 | /* Not found. */ |
407 | LWLockRelease(PARTITION_LOCK(hash_table, partition)); |
408 | return NULL; |
409 | } |
410 | else |
411 | { |
412 | /* The caller will free the lock by calling dshash_release. */ |
413 | hash_table->find_locked = true; |
414 | hash_table->find_exclusively_locked = exclusive; |
415 | return ENTRY_FROM_ITEM(item); |
416 | } |
417 | } |
418 | |
419 | /* |
420 | * Returns a pointer to an exclusively locked item which must be released with |
421 | * dshash_release_lock. If the key is found in the hash table, 'found' is set |
422 | * to true and a pointer to the existing entry is returned. If the key is not |
423 | * found, 'found' is set to false, and a pointer to a newly created entry is |
424 | * returned. |
425 | * |
426 | * Notes above dshash_find() regarding locking and error handling equally |
427 | * apply here. |
428 | */ |
429 | void * |
430 | dshash_find_or_insert(dshash_table *hash_table, |
431 | const void *key, |
432 | bool *found) |
433 | { |
434 | dshash_hash hash; |
435 | size_t partition_index; |
436 | dshash_partition *partition; |
437 | dshash_table_item *item; |
438 | |
439 | hash = hash_key(hash_table, key); |
440 | partition_index = PARTITION_FOR_HASH(hash); |
441 | partition = &hash_table->control->partitions[partition_index]; |
442 | |
443 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
444 | Assert(!hash_table->find_locked); |
445 | |
446 | restart: |
447 | LWLockAcquire(PARTITION_LOCK(hash_table, partition_index), |
448 | LW_EXCLUSIVE); |
449 | ensure_valid_bucket_pointers(hash_table); |
450 | |
451 | /* Search the active bucket. */ |
452 | item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash)); |
453 | |
454 | if (item) |
455 | *found = true; |
456 | else |
457 | { |
458 | *found = false; |
459 | |
460 | /* Check if we are getting too full. */ |
461 | if (partition->count > MAX_COUNT_PER_PARTITION(hash_table)) |
462 | { |
463 | /* |
464 | * The load factor (= keys / buckets) for all buckets protected by |
465 | * this partition is > 0.75. Presumably the same applies |
466 | * generally across the whole hash table (though we don't attempt |
467 | * to track that directly to avoid contention on some kind of |
468 | * central counter; we just assume that this partition is |
469 | * representative). This is a good time to resize. |
470 | * |
471 | * Give up our existing lock first, because resizing needs to |
472 | * reacquire all the locks in the right order to avoid deadlocks. |
473 | */ |
474 | LWLockRelease(PARTITION_LOCK(hash_table, partition_index)); |
475 | resize(hash_table, hash_table->size_log2 + 1); |
476 | |
477 | goto restart; |
478 | } |
479 | |
480 | /* Finally we can try to insert the new item. */ |
481 | item = insert_into_bucket(hash_table, key, |
482 | &BUCKET_FOR_HASH(hash_table, hash)); |
483 | item->hash = hash; |
484 | /* Adjust per-lock-partition counter for load factor knowledge. */ |
485 | ++partition->count; |
486 | } |
487 | |
488 | /* The caller must release the lock with dshash_release_lock. */ |
489 | hash_table->find_locked = true; |
490 | hash_table->find_exclusively_locked = true; |
491 | return ENTRY_FROM_ITEM(item); |
492 | } |
493 | |
494 | /* |
495 | * Remove an entry by key. Returns true if the key was found and the |
496 | * corresponding entry was removed. |
497 | * |
498 | * To delete an entry that you already have a pointer to, see |
499 | * dshash_delete_entry. |
500 | */ |
501 | bool |
502 | dshash_delete_key(dshash_table *hash_table, const void *key) |
503 | { |
504 | dshash_hash hash; |
505 | size_t partition; |
506 | bool found; |
507 | |
508 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
509 | Assert(!hash_table->find_locked); |
510 | |
511 | hash = hash_key(hash_table, key); |
512 | partition = PARTITION_FOR_HASH(hash); |
513 | |
514 | LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE); |
515 | ensure_valid_bucket_pointers(hash_table); |
516 | |
517 | if (delete_key_from_bucket(hash_table, key, |
518 | &BUCKET_FOR_HASH(hash_table, hash))) |
519 | { |
520 | Assert(hash_table->control->partitions[partition].count > 0); |
521 | found = true; |
522 | --hash_table->control->partitions[partition].count; |
523 | } |
524 | else |
525 | found = false; |
526 | |
527 | LWLockRelease(PARTITION_LOCK(hash_table, partition)); |
528 | |
529 | return found; |
530 | } |
531 | |
532 | /* |
533 | * Remove an entry. The entry must already be exclusively locked, and must |
534 | * have been obtained by dshash_find or dshash_find_or_insert. Note that this |
535 | * function releases the lock just like dshash_release_lock. |
536 | * |
537 | * To delete an entry by key, see dshash_delete_key. |
538 | */ |
539 | void |
540 | dshash_delete_entry(dshash_table *hash_table, void *entry) |
541 | { |
542 | dshash_table_item *item = ITEM_FROM_ENTRY(entry); |
543 | size_t partition = PARTITION_FOR_HASH(item->hash); |
544 | |
545 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
546 | Assert(hash_table->find_locked); |
547 | Assert(hash_table->find_exclusively_locked); |
548 | Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition), |
549 | LW_EXCLUSIVE)); |
550 | |
551 | delete_item(hash_table, item); |
552 | hash_table->find_locked = false; |
553 | hash_table->find_exclusively_locked = false; |
554 | LWLockRelease(PARTITION_LOCK(hash_table, partition)); |
555 | } |
556 | |
557 | /* |
558 | * Unlock an entry which was locked by dshash_find or dshash_find_or_insert. |
559 | */ |
560 | void |
561 | dshash_release_lock(dshash_table *hash_table, void *entry) |
562 | { |
563 | dshash_table_item *item = ITEM_FROM_ENTRY(entry); |
564 | size_t partition_index = PARTITION_FOR_HASH(item->hash); |
565 | |
566 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
567 | Assert(hash_table->find_locked); |
568 | Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition_index), |
569 | hash_table->find_exclusively_locked |
570 | ? LW_EXCLUSIVE : LW_SHARED)); |
571 | |
572 | hash_table->find_locked = false; |
573 | hash_table->find_exclusively_locked = false; |
574 | LWLockRelease(PARTITION_LOCK(hash_table, partition_index)); |
575 | } |
576 | |
577 | /* |
578 | * A compare function that forwards to memcmp. |
579 | */ |
580 | int |
581 | dshash_memcmp(const void *a, const void *b, size_t size, void *arg) |
582 | { |
583 | return memcmp(a, b, size); |
584 | } |
585 | |
586 | /* |
587 | * A hash function that forwards to tag_hash. |
588 | */ |
589 | dshash_hash |
590 | dshash_memhash(const void *v, size_t size, void *arg) |
591 | { |
592 | return tag_hash(v, size); |
593 | } |
594 | |
595 | /* |
596 | * Print debugging information about the internal state of the hash table to |
597 | * stderr. The caller must hold no partition locks. |
598 | */ |
599 | void |
600 | dshash_dump(dshash_table *hash_table) |
601 | { |
602 | size_t i; |
603 | size_t j; |
604 | |
605 | Assert(hash_table->control->magic == DSHASH_MAGIC); |
606 | Assert(!hash_table->find_locked); |
607 | |
608 | for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) |
609 | { |
610 | Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i))); |
611 | LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_SHARED); |
612 | } |
613 | |
614 | ensure_valid_bucket_pointers(hash_table); |
615 | |
616 | fprintf(stderr, |
617 | "hash table size = %zu\n" , (size_t) 1 << hash_table->size_log2); |
618 | for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) |
619 | { |
620 | dshash_partition *partition = &hash_table->control->partitions[i]; |
621 | size_t begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2); |
622 | size_t end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2); |
623 | |
624 | fprintf(stderr, " partition %zu\n" , i); |
625 | fprintf(stderr, |
626 | " active buckets (key count = %zu)\n" , partition->count); |
627 | |
628 | for (j = begin; j < end; ++j) |
629 | { |
630 | size_t count = 0; |
631 | dsa_pointer bucket = hash_table->buckets[j]; |
632 | |
633 | while (DsaPointerIsValid(bucket)) |
634 | { |
635 | dshash_table_item *item; |
636 | |
637 | item = dsa_get_address(hash_table->area, bucket); |
638 | |
639 | bucket = item->next; |
640 | ++count; |
641 | } |
642 | fprintf(stderr, " bucket %zu (key count = %zu)\n" , j, count); |
643 | } |
644 | } |
645 | |
646 | for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) |
647 | LWLockRelease(PARTITION_LOCK(hash_table, i)); |
648 | } |
649 | |
650 | /* |
651 | * Delete a locked item to which we have a pointer. |
652 | */ |
653 | static void |
654 | delete_item(dshash_table *hash_table, dshash_table_item *item) |
655 | { |
656 | size_t hash = item->hash; |
657 | size_t partition = PARTITION_FOR_HASH(hash); |
658 | |
659 | Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition))); |
660 | |
661 | if (delete_item_from_bucket(hash_table, item, |
662 | &BUCKET_FOR_HASH(hash_table, hash))) |
663 | { |
664 | Assert(hash_table->control->partitions[partition].count > 0); |
665 | --hash_table->control->partitions[partition].count; |
666 | } |
667 | else |
668 | { |
669 | Assert(false); |
670 | } |
671 | } |
672 | |
673 | /* |
674 | * Grow the hash table if necessary to the requested number of buckets. The |
675 | * requested size must be double some previously observed size. |
676 | * |
677 | * Must be called without any partition lock held. |
678 | */ |
679 | static void |
680 | resize(dshash_table *hash_table, size_t new_size_log2) |
681 | { |
682 | dsa_pointer old_buckets; |
683 | dsa_pointer new_buckets_shared; |
684 | dsa_pointer *new_buckets; |
685 | size_t size; |
686 | size_t new_size = ((size_t) 1) << new_size_log2; |
687 | size_t i; |
688 | |
689 | /* |
690 | * Acquire the locks for all lock partitions. This is expensive, but we |
691 | * shouldn't have to do it many times. |
692 | */ |
693 | for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) |
694 | { |
695 | Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i))); |
696 | |
697 | LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_EXCLUSIVE); |
698 | if (i == 0 && hash_table->control->size_log2 >= new_size_log2) |
699 | { |
700 | /* |
701 | * Another backend has already increased the size; we can avoid |
702 | * obtaining all the locks and return early. |
703 | */ |
704 | LWLockRelease(PARTITION_LOCK(hash_table, 0)); |
705 | return; |
706 | } |
707 | } |
708 | |
709 | Assert(new_size_log2 == hash_table->control->size_log2 + 1); |
710 | |
711 | /* Allocate the space for the new table. */ |
712 | new_buckets_shared = dsa_allocate0(hash_table->area, |
713 | sizeof(dsa_pointer) * new_size); |
714 | new_buckets = dsa_get_address(hash_table->area, new_buckets_shared); |
715 | |
716 | /* |
717 | * We've allocated the new bucket array; all that remains to do now is to |
718 | * reinsert all items, which amounts to adjusting all the pointers. |
719 | */ |
720 | size = ((size_t) 1) << hash_table->control->size_log2; |
721 | for (i = 0; i < size; ++i) |
722 | { |
723 | dsa_pointer item_pointer = hash_table->buckets[i]; |
724 | |
725 | while (DsaPointerIsValid(item_pointer)) |
726 | { |
727 | dshash_table_item *item; |
728 | dsa_pointer next_item_pointer; |
729 | |
730 | item = dsa_get_address(hash_table->area, item_pointer); |
731 | next_item_pointer = item->next; |
732 | insert_item_into_bucket(hash_table, item_pointer, item, |
733 | &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash, |
734 | new_size_log2)]); |
735 | item_pointer = next_item_pointer; |
736 | } |
737 | } |
738 | |
739 | /* Swap the hash table into place and free the old one. */ |
740 | old_buckets = hash_table->control->buckets; |
741 | hash_table->control->buckets = new_buckets_shared; |
742 | hash_table->control->size_log2 = new_size_log2; |
743 | hash_table->buckets = new_buckets; |
744 | dsa_free(hash_table->area, old_buckets); |
745 | |
746 | /* Release all the locks. */ |
747 | for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) |
748 | LWLockRelease(PARTITION_LOCK(hash_table, i)); |
749 | } |
750 | |
751 | /* |
752 | * Make sure that our backend-local bucket pointers are up to date. The |
753 | * caller must have locked one lock partition, which prevents resize() from |
754 | * running concurrently. |
755 | */ |
756 | static inline void |
757 | ensure_valid_bucket_pointers(dshash_table *hash_table) |
758 | { |
759 | if (hash_table->size_log2 != hash_table->control->size_log2) |
760 | { |
761 | hash_table->buckets = dsa_get_address(hash_table->area, |
762 | hash_table->control->buckets); |
763 | hash_table->size_log2 = hash_table->control->size_log2; |
764 | } |
765 | } |
766 | |
767 | /* |
768 | * Scan a locked bucket for a match, using the provided compare function. |
769 | */ |
770 | static inline dshash_table_item * |
771 | find_in_bucket(dshash_table *hash_table, const void *key, |
772 | dsa_pointer item_pointer) |
773 | { |
774 | while (DsaPointerIsValid(item_pointer)) |
775 | { |
776 | dshash_table_item *item; |
777 | |
778 | item = dsa_get_address(hash_table->area, item_pointer); |
779 | if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item))) |
780 | return item; |
781 | item_pointer = item->next; |
782 | } |
783 | return NULL; |
784 | } |
785 | |
786 | /* |
787 | * Insert an already-allocated item into a bucket. |
788 | */ |
789 | static void |
790 | insert_item_into_bucket(dshash_table *hash_table, |
791 | dsa_pointer item_pointer, |
792 | dshash_table_item *item, |
793 | dsa_pointer *bucket) |
794 | { |
795 | Assert(item == dsa_get_address(hash_table->area, item_pointer)); |
796 | |
797 | item->next = *bucket; |
798 | *bucket = item_pointer; |
799 | } |
800 | |
801 | /* |
802 | * Allocate space for an entry with the given key and insert it into the |
803 | * provided bucket. |
804 | */ |
805 | static dshash_table_item * |
806 | insert_into_bucket(dshash_table *hash_table, |
807 | const void *key, |
808 | dsa_pointer *bucket) |
809 | { |
810 | dsa_pointer item_pointer; |
811 | dshash_table_item *item; |
812 | |
813 | item_pointer = dsa_allocate(hash_table->area, |
814 | hash_table->params.entry_size + |
815 | MAXALIGN(sizeof(dshash_table_item))); |
816 | item = dsa_get_address(hash_table->area, item_pointer); |
817 | memcpy(ENTRY_FROM_ITEM(item), key, hash_table->params.key_size); |
818 | insert_item_into_bucket(hash_table, item_pointer, item, bucket); |
819 | return item; |
820 | } |
821 | |
822 | /* |
823 | * Search a bucket for a matching key and delete it. |
824 | */ |
825 | static bool |
826 | delete_key_from_bucket(dshash_table *hash_table, |
827 | const void *key, |
828 | dsa_pointer *bucket_head) |
829 | { |
830 | while (DsaPointerIsValid(*bucket_head)) |
831 | { |
832 | dshash_table_item *item; |
833 | |
834 | item = dsa_get_address(hash_table->area, *bucket_head); |
835 | |
836 | if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item))) |
837 | { |
838 | dsa_pointer next; |
839 | |
840 | next = item->next; |
841 | dsa_free(hash_table->area, *bucket_head); |
842 | *bucket_head = next; |
843 | |
844 | return true; |
845 | } |
846 | bucket_head = &item->next; |
847 | } |
848 | return false; |
849 | } |
850 | |
851 | /* |
852 | * Delete the specified item from the bucket. |
853 | */ |
854 | static bool |
855 | delete_item_from_bucket(dshash_table *hash_table, |
856 | dshash_table_item *item, |
857 | dsa_pointer *bucket_head) |
858 | { |
859 | while (DsaPointerIsValid(*bucket_head)) |
860 | { |
861 | dshash_table_item *bucket_item; |
862 | |
863 | bucket_item = dsa_get_address(hash_table->area, *bucket_head); |
864 | |
865 | if (bucket_item == item) |
866 | { |
867 | dsa_pointer next; |
868 | |
869 | next = item->next; |
870 | dsa_free(hash_table->area, *bucket_head); |
871 | *bucket_head = next; |
872 | return true; |
873 | } |
874 | bucket_head = &bucket_item->next; |
875 | } |
876 | return false; |
877 | } |
878 | |
879 | /* |
880 | * Compute the hash value for a key. |
881 | */ |
882 | static inline dshash_hash |
883 | hash_key(dshash_table *hash_table, const void *key) |
884 | { |
885 | return hash_table->params.hash_function(key, |
886 | hash_table->params.key_size, |
887 | hash_table->arg); |
888 | } |
889 | |
890 | /* |
891 | * Check whether two keys compare equal. |
892 | */ |
893 | static inline bool |
894 | equal_keys(dshash_table *hash_table, const void *a, const void *b) |
895 | { |
896 | return hash_table->params.compare_function(a, b, |
897 | hash_table->params.key_size, |
898 | hash_table->arg) == 0; |
899 | } |
900 | |