1 | /* |
2 | * qht.c - QEMU Hash Table, designed to scale for read-mostly workloads. |
3 | * |
4 | * Copyright (C) 2016, Emilio G. Cota <cota@braap.org> |
5 | * |
6 | * License: GNU GPL, version 2 or later. |
7 | * See the COPYING file in the top-level directory. |
8 | * |
9 | * Assumptions: |
10 | * - NULL cannot be inserted/removed as a pointer value. |
11 | * - Trying to insert an already-existing hash-pointer pair is OK. However, |
12 | * it is not OK to insert into the same hash table different hash-pointer |
13 | * pairs that have the same pointer value, but not the hashes. |
14 | * - Lookups are performed under an RCU read-critical section; removals |
15 | * must wait for a grace period to elapse before freeing removed objects. |
16 | * |
17 | * Features: |
18 | * - Reads (i.e. lookups and iterators) can be concurrent with other reads. |
19 | * Lookups that are concurrent with writes to the same bucket will retry |
20 | * via a seqlock; iterators acquire all bucket locks and therefore can be |
21 | * concurrent with lookups and are serialized wrt writers. |
22 | * - Writes (i.e. insertions/removals) can be concurrent with writes to |
23 | * different buckets; writes to the same bucket are serialized through a lock. |
24 | * - Optional auto-resizing: the hash table resizes up if the load surpasses |
25 | * a certain threshold. Resizing is done concurrently with readers; writes |
26 | * are serialized with the resize operation. |
27 | * |
28 | * The key structure is the bucket, which is cacheline-sized. Buckets |
29 | * contain a few hash values and pointers; the u32 hash values are stored in |
30 | * full so that resizing is fast. Having this structure instead of directly |
31 | * chaining items has two advantages: |
32 | * - Failed lookups fail fast, and touch a minimum number of cache lines. |
33 | * - Resizing the hash table with concurrent lookups is easy. |
34 | * |
35 | * There are two types of buckets: |
36 | * 1. "head" buckets are the ones allocated in the array of buckets in qht_map. |
37 | * 2. all "non-head" buckets (i.e. all others) are members of a chain that |
38 | * starts from a head bucket. |
39 | * Note that the seqlock and spinlock of a head bucket applies to all buckets |
40 | * chained to it; these two fields are unused in non-head buckets. |
41 | * |
42 | * On removals, we move the last valid item in the chain to the position of the |
43 | * just-removed entry. This makes lookups slightly faster, since the moment an |
44 | * invalid entry is found, the (failed) lookup is over. |
45 | * |
46 | * Resizing is done by taking all bucket spinlocks (so that no other writers can |
47 | * race with us) and then copying all entries into a new hash map. Then, the |
48 | * ht->map pointer is set, and the old map is freed once no RCU readers can see |
49 | * it anymore. |
50 | * |
51 | * Writers check for concurrent resizes by comparing ht->map before and after |
52 | * acquiring their bucket lock. If they don't match, a resize has occured |
53 | * while the bucket spinlock was being acquired. |
54 | * |
55 | * Related Work: |
56 | * - Idea of cacheline-sized buckets with full hashes taken from: |
57 | * David, Guerraoui & Trigonakis, "Asynchronized Concurrency: |
58 | * The Secret to Scaling Concurrent Search Data Structures", ASPLOS'15. |
59 | * - Why not RCU-based hash tables? They would allow us to get rid of the |
60 | * seqlock, but resizing would take forever since RCU read critical |
61 | * sections in QEMU take quite a long time. |
62 | * More info on relativistic hash tables: |
63 | * + Triplett, McKenney & Walpole, "Resizable, Scalable, Concurrent Hash |
64 | * Tables via Relativistic Programming", USENIX ATC'11. |
65 | * + Corbet, "Relativistic hash tables, part 1: Algorithms", @ lwn.net, 2014. |
66 | * https://lwn.net/Articles/612021/ |
67 | */ |
68 | #include "qemu/osdep.h" |
69 | #include "qemu/qht.h" |
70 | #include "qemu/atomic.h" |
71 | #include "qemu/rcu.h" |
72 | |
73 | //#define QHT_DEBUG |
74 | |
75 | /* |
76 | * We want to avoid false sharing of cache lines. Most systems have 64-byte |
77 | * cache lines so we go with it for simplicity. |
78 | * |
79 | * Note that systems with smaller cache lines will be fine (the struct is |
80 | * almost 64-bytes); systems with larger cache lines might suffer from |
81 | * some false sharing. |
82 | */ |
83 | #define QHT_BUCKET_ALIGN 64 |
84 | |
85 | /* define these to keep sizeof(qht_bucket) within QHT_BUCKET_ALIGN */ |
86 | #if HOST_LONG_BITS == 32 |
87 | #define QHT_BUCKET_ENTRIES 6 |
88 | #else /* 64-bit */ |
89 | #define QHT_BUCKET_ENTRIES 4 |
90 | #endif |
91 | |
92 | enum qht_iter_type { |
93 | QHT_ITER_VOID, /* do nothing; use retvoid */ |
94 | QHT_ITER_RM, /* remove element if retbool returns true */ |
95 | }; |
96 | |
97 | struct qht_iter { |
98 | union { |
99 | qht_iter_func_t retvoid; |
100 | qht_iter_bool_func_t retbool; |
101 | } f; |
102 | enum qht_iter_type type; |
103 | }; |
104 | |
105 | /* |
106 | * Do _not_ use qemu_mutex_[try]lock directly! Use these macros, otherwise |
107 | * the profiler (QSP) will deadlock. |
108 | */ |
109 | static inline void qht_lock(struct qht *ht) |
110 | { |
111 | if (ht->mode & QHT_MODE_RAW_MUTEXES) { |
112 | qemu_mutex_lock__raw(&ht->lock); |
113 | } else { |
114 | qemu_mutex_lock(&ht->lock); |
115 | } |
116 | } |
117 | |
118 | static inline int qht_trylock(struct qht *ht) |
119 | { |
120 | if (ht->mode & QHT_MODE_RAW_MUTEXES) { |
121 | return qemu_mutex_trylock__raw(&(ht)->lock); |
122 | } |
123 | return qemu_mutex_trylock(&(ht)->lock); |
124 | } |
125 | |
126 | /* this inline is not really necessary, but it helps keep code consistent */ |
127 | static inline void qht_unlock(struct qht *ht) |
128 | { |
129 | qemu_mutex_unlock(&ht->lock); |
130 | } |
131 | |
132 | /* |
133 | * Note: reading partially-updated pointers in @pointers could lead to |
134 | * segfaults. We thus access them with atomic_read/set; this guarantees |
135 | * that the compiler makes all those accesses atomic. We also need the |
136 | * volatile-like behavior in atomic_read, since otherwise the compiler |
137 | * might refetch the pointer. |
138 | * atomic_read's are of course not necessary when the bucket lock is held. |
139 | * |
140 | * If both ht->lock and b->lock are grabbed, ht->lock should always |
141 | * be grabbed first. |
142 | */ |
143 | struct qht_bucket { |
144 | QemuSpin lock; |
145 | QemuSeqLock sequence; |
146 | uint32_t hashes[QHT_BUCKET_ENTRIES]; |
147 | void *pointers[QHT_BUCKET_ENTRIES]; |
148 | struct qht_bucket *next; |
149 | } QEMU_ALIGNED(QHT_BUCKET_ALIGN); |
150 | |
151 | QEMU_BUILD_BUG_ON(sizeof(struct qht_bucket) > QHT_BUCKET_ALIGN); |
152 | |
153 | /** |
154 | * struct qht_map - structure to track an array of buckets |
155 | * @rcu: used by RCU. Keep it as the top field in the struct to help valgrind |
156 | * find the whole struct. |
157 | * @buckets: array of head buckets. It is constant once the map is created. |
158 | * @n_buckets: number of head buckets. It is constant once the map is created. |
159 | * @n_added_buckets: number of added (i.e. "non-head") buckets |
160 | * @n_added_buckets_threshold: threshold to trigger an upward resize once the |
161 | * number of added buckets surpasses it. |
162 | * |
163 | * Buckets are tracked in what we call a "map", i.e. this structure. |
164 | */ |
165 | struct qht_map { |
166 | struct rcu_head rcu; |
167 | struct qht_bucket *buckets; |
168 | size_t n_buckets; |
169 | size_t n_added_buckets; |
170 | size_t n_added_buckets_threshold; |
171 | }; |
172 | |
173 | /* trigger a resize when n_added_buckets > n_buckets / div */ |
174 | #define QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV 8 |
175 | |
176 | static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, |
177 | bool reset); |
178 | static void qht_grow_maybe(struct qht *ht); |
179 | |
180 | #ifdef QHT_DEBUG |
181 | |
182 | #define qht_debug_assert(X) do { assert(X); } while (0) |
183 | |
184 | static void qht_bucket_debug__locked(struct qht_bucket *b) |
185 | { |
186 | bool seen_empty = false; |
187 | bool corrupt = false; |
188 | int i; |
189 | |
190 | do { |
191 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { |
192 | if (b->pointers[i] == NULL) { |
193 | seen_empty = true; |
194 | continue; |
195 | } |
196 | if (seen_empty) { |
197 | fprintf(stderr, "%s: b: %p, pos: %i, hash: 0x%x, p: %p\n" , |
198 | __func__, b, i, b->hashes[i], b->pointers[i]); |
199 | corrupt = true; |
200 | } |
201 | } |
202 | b = b->next; |
203 | } while (b); |
204 | qht_debug_assert(!corrupt); |
205 | } |
206 | |
207 | static void qht_map_debug__all_locked(struct qht_map *map) |
208 | { |
209 | int i; |
210 | |
211 | for (i = 0; i < map->n_buckets; i++) { |
212 | qht_bucket_debug__locked(&map->buckets[i]); |
213 | } |
214 | } |
215 | #else |
216 | |
217 | #define qht_debug_assert(X) do { (void)(X); } while (0) |
218 | |
219 | static inline void qht_bucket_debug__locked(struct qht_bucket *b) |
220 | { } |
221 | |
222 | static inline void qht_map_debug__all_locked(struct qht_map *map) |
223 | { } |
224 | #endif /* QHT_DEBUG */ |
225 | |
226 | static inline size_t qht_elems_to_buckets(size_t n_elems) |
227 | { |
228 | return pow2ceil(n_elems / QHT_BUCKET_ENTRIES); |
229 | } |
230 | |
231 | static inline void qht_head_init(struct qht_bucket *b) |
232 | { |
233 | memset(b, 0, sizeof(*b)); |
234 | qemu_spin_init(&b->lock); |
235 | seqlock_init(&b->sequence); |
236 | } |
237 | |
238 | static inline |
239 | struct qht_bucket *qht_map_to_bucket(const struct qht_map *map, uint32_t hash) |
240 | { |
241 | return &map->buckets[hash & (map->n_buckets - 1)]; |
242 | } |
243 | |
244 | /* acquire all bucket locks from a map */ |
245 | static void qht_map_lock_buckets(struct qht_map *map) |
246 | { |
247 | size_t i; |
248 | |
249 | for (i = 0; i < map->n_buckets; i++) { |
250 | struct qht_bucket *b = &map->buckets[i]; |
251 | |
252 | qemu_spin_lock(&b->lock); |
253 | } |
254 | } |
255 | |
256 | static void qht_map_unlock_buckets(struct qht_map *map) |
257 | { |
258 | size_t i; |
259 | |
260 | for (i = 0; i < map->n_buckets; i++) { |
261 | struct qht_bucket *b = &map->buckets[i]; |
262 | |
263 | qemu_spin_unlock(&b->lock); |
264 | } |
265 | } |
266 | |
267 | /* |
268 | * Call with at least a bucket lock held. |
269 | * @map should be the value read before acquiring the lock (or locks). |
270 | */ |
271 | static inline bool qht_map_is_stale__locked(const struct qht *ht, |
272 | const struct qht_map *map) |
273 | { |
274 | return map != ht->map; |
275 | } |
276 | |
277 | /* |
278 | * Grab all bucket locks, and set @pmap after making sure the map isn't stale. |
279 | * |
280 | * Pairs with qht_map_unlock_buckets(), hence the pass-by-reference. |
281 | * |
282 | * Note: callers cannot have ht->lock held. |
283 | */ |
284 | static inline |
285 | void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap) |
286 | { |
287 | struct qht_map *map; |
288 | |
289 | map = atomic_rcu_read(&ht->map); |
290 | qht_map_lock_buckets(map); |
291 | if (likely(!qht_map_is_stale__locked(ht, map))) { |
292 | *pmap = map; |
293 | return; |
294 | } |
295 | qht_map_unlock_buckets(map); |
296 | |
297 | /* we raced with a resize; acquire ht->lock to see the updated ht->map */ |
298 | qht_lock(ht); |
299 | map = ht->map; |
300 | qht_map_lock_buckets(map); |
301 | qht_unlock(ht); |
302 | *pmap = map; |
303 | return; |
304 | } |
305 | |
306 | /* |
307 | * Get a head bucket and lock it, making sure its parent map is not stale. |
308 | * @pmap is filled with a pointer to the bucket's parent map. |
309 | * |
310 | * Unlock with qemu_spin_unlock(&b->lock). |
311 | * |
312 | * Note: callers cannot have ht->lock held. |
313 | */ |
314 | static inline |
315 | struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash, |
316 | struct qht_map **pmap) |
317 | { |
318 | struct qht_bucket *b; |
319 | struct qht_map *map; |
320 | |
321 | map = atomic_rcu_read(&ht->map); |
322 | b = qht_map_to_bucket(map, hash); |
323 | |
324 | qemu_spin_lock(&b->lock); |
325 | if (likely(!qht_map_is_stale__locked(ht, map))) { |
326 | *pmap = map; |
327 | return b; |
328 | } |
329 | qemu_spin_unlock(&b->lock); |
330 | |
331 | /* we raced with a resize; acquire ht->lock to see the updated ht->map */ |
332 | qht_lock(ht); |
333 | map = ht->map; |
334 | b = qht_map_to_bucket(map, hash); |
335 | qemu_spin_lock(&b->lock); |
336 | qht_unlock(ht); |
337 | *pmap = map; |
338 | return b; |
339 | } |
340 | |
341 | static inline bool qht_map_needs_resize(const struct qht_map *map) |
342 | { |
343 | return atomic_read(&map->n_added_buckets) > map->n_added_buckets_threshold; |
344 | } |
345 | |
346 | static inline void qht_chain_destroy(const struct qht_bucket *head) |
347 | { |
348 | struct qht_bucket *curr = head->next; |
349 | struct qht_bucket *prev; |
350 | |
351 | while (curr) { |
352 | prev = curr; |
353 | curr = curr->next; |
354 | qemu_vfree(prev); |
355 | } |
356 | } |
357 | |
358 | /* pass only an orphan map */ |
359 | static void qht_map_destroy(struct qht_map *map) |
360 | { |
361 | size_t i; |
362 | |
363 | for (i = 0; i < map->n_buckets; i++) { |
364 | qht_chain_destroy(&map->buckets[i]); |
365 | } |
366 | qemu_vfree(map->buckets); |
367 | g_free(map); |
368 | } |
369 | |
370 | static struct qht_map *qht_map_create(size_t n_buckets) |
371 | { |
372 | struct qht_map *map; |
373 | size_t i; |
374 | |
375 | map = g_malloc(sizeof(*map)); |
376 | map->n_buckets = n_buckets; |
377 | |
378 | map->n_added_buckets = 0; |
379 | map->n_added_buckets_threshold = n_buckets / |
380 | QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV; |
381 | |
382 | /* let tiny hash tables to at least add one non-head bucket */ |
383 | if (unlikely(map->n_added_buckets_threshold == 0)) { |
384 | map->n_added_buckets_threshold = 1; |
385 | } |
386 | |
387 | map->buckets = qemu_memalign(QHT_BUCKET_ALIGN, |
388 | sizeof(*map->buckets) * n_buckets); |
389 | for (i = 0; i < n_buckets; i++) { |
390 | qht_head_init(&map->buckets[i]); |
391 | } |
392 | return map; |
393 | } |
394 | |
395 | void qht_init(struct qht *ht, qht_cmp_func_t cmp, size_t n_elems, |
396 | unsigned int mode) |
397 | { |
398 | struct qht_map *map; |
399 | size_t n_buckets = qht_elems_to_buckets(n_elems); |
400 | |
401 | g_assert(cmp); |
402 | ht->cmp = cmp; |
403 | ht->mode = mode; |
404 | qemu_mutex_init(&ht->lock); |
405 | map = qht_map_create(n_buckets); |
406 | atomic_rcu_set(&ht->map, map); |
407 | } |
408 | |
409 | /* call only when there are no readers/writers left */ |
410 | void qht_destroy(struct qht *ht) |
411 | { |
412 | qht_map_destroy(ht->map); |
413 | memset(ht, 0, sizeof(*ht)); |
414 | } |
415 | |
416 | static void qht_bucket_reset__locked(struct qht_bucket *head) |
417 | { |
418 | struct qht_bucket *b = head; |
419 | int i; |
420 | |
421 | seqlock_write_begin(&head->sequence); |
422 | do { |
423 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { |
424 | if (b->pointers[i] == NULL) { |
425 | goto done; |
426 | } |
427 | atomic_set(&b->hashes[i], 0); |
428 | atomic_set(&b->pointers[i], NULL); |
429 | } |
430 | b = b->next; |
431 | } while (b); |
432 | done: |
433 | seqlock_write_end(&head->sequence); |
434 | } |
435 | |
436 | /* call with all bucket locks held */ |
437 | static void qht_map_reset__all_locked(struct qht_map *map) |
438 | { |
439 | size_t i; |
440 | |
441 | for (i = 0; i < map->n_buckets; i++) { |
442 | qht_bucket_reset__locked(&map->buckets[i]); |
443 | } |
444 | qht_map_debug__all_locked(map); |
445 | } |
446 | |
447 | void qht_reset(struct qht *ht) |
448 | { |
449 | struct qht_map *map; |
450 | |
451 | qht_map_lock_buckets__no_stale(ht, &map); |
452 | qht_map_reset__all_locked(map); |
453 | qht_map_unlock_buckets(map); |
454 | } |
455 | |
456 | static inline void qht_do_resize(struct qht *ht, struct qht_map *new) |
457 | { |
458 | qht_do_resize_reset(ht, new, false); |
459 | } |
460 | |
461 | static inline void qht_do_resize_and_reset(struct qht *ht, struct qht_map *new) |
462 | { |
463 | qht_do_resize_reset(ht, new, true); |
464 | } |
465 | |
466 | bool qht_reset_size(struct qht *ht, size_t n_elems) |
467 | { |
468 | struct qht_map *new = NULL; |
469 | struct qht_map *map; |
470 | size_t n_buckets; |
471 | |
472 | n_buckets = qht_elems_to_buckets(n_elems); |
473 | |
474 | qht_lock(ht); |
475 | map = ht->map; |
476 | if (n_buckets != map->n_buckets) { |
477 | new = qht_map_create(n_buckets); |
478 | } |
479 | qht_do_resize_and_reset(ht, new); |
480 | qht_unlock(ht); |
481 | |
482 | return !!new; |
483 | } |
484 | |
485 | static inline |
486 | void *qht_do_lookup(const struct qht_bucket *head, qht_lookup_func_t func, |
487 | const void *userp, uint32_t hash) |
488 | { |
489 | const struct qht_bucket *b = head; |
490 | int i; |
491 | |
492 | do { |
493 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { |
494 | if (atomic_read(&b->hashes[i]) == hash) { |
495 | /* The pointer is dereferenced before seqlock_read_retry, |
496 | * so (unlike qht_insert__locked) we need to use |
497 | * atomic_rcu_read here. |
498 | */ |
499 | void *p = atomic_rcu_read(&b->pointers[i]); |
500 | |
501 | if (likely(p) && likely(func(p, userp))) { |
502 | return p; |
503 | } |
504 | } |
505 | } |
506 | b = atomic_rcu_read(&b->next); |
507 | } while (b); |
508 | |
509 | return NULL; |
510 | } |
511 | |
512 | static __attribute__((noinline)) |
513 | void *qht_lookup__slowpath(const struct qht_bucket *b, qht_lookup_func_t func, |
514 | const void *userp, uint32_t hash) |
515 | { |
516 | unsigned int version; |
517 | void *ret; |
518 | |
519 | do { |
520 | version = seqlock_read_begin(&b->sequence); |
521 | ret = qht_do_lookup(b, func, userp, hash); |
522 | } while (seqlock_read_retry(&b->sequence, version)); |
523 | return ret; |
524 | } |
525 | |
526 | void *qht_lookup_custom(const struct qht *ht, const void *userp, uint32_t hash, |
527 | qht_lookup_func_t func) |
528 | { |
529 | const struct qht_bucket *b; |
530 | const struct qht_map *map; |
531 | unsigned int version; |
532 | void *ret; |
533 | |
534 | map = atomic_rcu_read(&ht->map); |
535 | b = qht_map_to_bucket(map, hash); |
536 | |
537 | version = seqlock_read_begin(&b->sequence); |
538 | ret = qht_do_lookup(b, func, userp, hash); |
539 | if (likely(!seqlock_read_retry(&b->sequence, version))) { |
540 | return ret; |
541 | } |
542 | /* |
543 | * Removing the do/while from the fastpath gives a 4% perf. increase when |
544 | * running a 100%-lookup microbenchmark. |
545 | */ |
546 | return qht_lookup__slowpath(b, func, userp, hash); |
547 | } |
548 | |
549 | void *qht_lookup(const struct qht *ht, const void *userp, uint32_t hash) |
550 | { |
551 | return qht_lookup_custom(ht, userp, hash, ht->cmp); |
552 | } |
553 | |
554 | /* |
555 | * call with head->lock held |
556 | * @ht is const since it is only used for ht->cmp() |
557 | */ |
558 | static void *qht_insert__locked(const struct qht *ht, struct qht_map *map, |
559 | struct qht_bucket *head, void *p, uint32_t hash, |
560 | bool *needs_resize) |
561 | { |
562 | struct qht_bucket *b = head; |
563 | struct qht_bucket *prev = NULL; |
564 | struct qht_bucket *new = NULL; |
565 | int i; |
566 | |
567 | do { |
568 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { |
569 | if (b->pointers[i]) { |
570 | if (unlikely(b->hashes[i] == hash && |
571 | ht->cmp(b->pointers[i], p))) { |
572 | return b->pointers[i]; |
573 | } |
574 | } else { |
575 | goto found; |
576 | } |
577 | } |
578 | prev = b; |
579 | b = b->next; |
580 | } while (b); |
581 | |
582 | b = qemu_memalign(QHT_BUCKET_ALIGN, sizeof(*b)); |
583 | memset(b, 0, sizeof(*b)); |
584 | new = b; |
585 | i = 0; |
586 | atomic_inc(&map->n_added_buckets); |
587 | if (unlikely(qht_map_needs_resize(map)) && needs_resize) { |
588 | *needs_resize = true; |
589 | } |
590 | |
591 | found: |
592 | /* found an empty key: acquire the seqlock and write */ |
593 | seqlock_write_begin(&head->sequence); |
594 | if (new) { |
595 | atomic_rcu_set(&prev->next, b); |
596 | } |
597 | /* smp_wmb() implicit in seqlock_write_begin. */ |
598 | atomic_set(&b->hashes[i], hash); |
599 | atomic_set(&b->pointers[i], p); |
600 | seqlock_write_end(&head->sequence); |
601 | return NULL; |
602 | } |
603 | |
604 | static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht) |
605 | { |
606 | struct qht_map *map; |
607 | |
608 | /* |
609 | * If the lock is taken it probably means there's an ongoing resize, |
610 | * so bail out. |
611 | */ |
612 | if (qht_trylock(ht)) { |
613 | return; |
614 | } |
615 | map = ht->map; |
616 | /* another thread might have just performed the resize we were after */ |
617 | if (qht_map_needs_resize(map)) { |
618 | struct qht_map *new = qht_map_create(map->n_buckets * 2); |
619 | |
620 | qht_do_resize(ht, new); |
621 | } |
622 | qht_unlock(ht); |
623 | } |
624 | |
625 | bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing) |
626 | { |
627 | struct qht_bucket *b; |
628 | struct qht_map *map; |
629 | bool needs_resize = false; |
630 | void *prev; |
631 | |
632 | /* NULL pointers are not supported */ |
633 | qht_debug_assert(p); |
634 | |
635 | b = qht_bucket_lock__no_stale(ht, hash, &map); |
636 | prev = qht_insert__locked(ht, map, b, p, hash, &needs_resize); |
637 | qht_bucket_debug__locked(b); |
638 | qemu_spin_unlock(&b->lock); |
639 | |
640 | if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) { |
641 | qht_grow_maybe(ht); |
642 | } |
643 | if (likely(prev == NULL)) { |
644 | return true; |
645 | } |
646 | if (existing) { |
647 | *existing = prev; |
648 | } |
649 | return false; |
650 | } |
651 | |
652 | static inline bool qht_entry_is_last(const struct qht_bucket *b, int pos) |
653 | { |
654 | if (pos == QHT_BUCKET_ENTRIES - 1) { |
655 | if (b->next == NULL) { |
656 | return true; |
657 | } |
658 | return b->next->pointers[0] == NULL; |
659 | } |
660 | return b->pointers[pos + 1] == NULL; |
661 | } |
662 | |
663 | static void |
664 | qht_entry_move(struct qht_bucket *to, int i, struct qht_bucket *from, int j) |
665 | { |
666 | qht_debug_assert(!(to == from && i == j)); |
667 | qht_debug_assert(to->pointers[i]); |
668 | qht_debug_assert(from->pointers[j]); |
669 | |
670 | atomic_set(&to->hashes[i], from->hashes[j]); |
671 | atomic_set(&to->pointers[i], from->pointers[j]); |
672 | |
673 | atomic_set(&from->hashes[j], 0); |
674 | atomic_set(&from->pointers[j], NULL); |
675 | } |
676 | |
677 | /* |
678 | * Find the last valid entry in @orig, and swap it with @orig[pos], which has |
679 | * just been invalidated. |
680 | */ |
681 | static inline void qht_bucket_remove_entry(struct qht_bucket *orig, int pos) |
682 | { |
683 | struct qht_bucket *b = orig; |
684 | struct qht_bucket *prev = NULL; |
685 | int i; |
686 | |
687 | if (qht_entry_is_last(orig, pos)) { |
688 | orig->hashes[pos] = 0; |
689 | atomic_set(&orig->pointers[pos], NULL); |
690 | return; |
691 | } |
692 | do { |
693 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { |
694 | if (b->pointers[i]) { |
695 | continue; |
696 | } |
697 | if (i > 0) { |
698 | return qht_entry_move(orig, pos, b, i - 1); |
699 | } |
700 | qht_debug_assert(prev); |
701 | return qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1); |
702 | } |
703 | prev = b; |
704 | b = b->next; |
705 | } while (b); |
706 | /* no free entries other than orig[pos], so swap it with the last one */ |
707 | qht_entry_move(orig, pos, prev, QHT_BUCKET_ENTRIES - 1); |
708 | } |
709 | |
710 | /* call with b->lock held */ |
711 | static inline |
712 | bool qht_remove__locked(struct qht_bucket *head, const void *p, uint32_t hash) |
713 | { |
714 | struct qht_bucket *b = head; |
715 | int i; |
716 | |
717 | do { |
718 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { |
719 | void *q = b->pointers[i]; |
720 | |
721 | if (unlikely(q == NULL)) { |
722 | return false; |
723 | } |
724 | if (q == p) { |
725 | qht_debug_assert(b->hashes[i] == hash); |
726 | seqlock_write_begin(&head->sequence); |
727 | qht_bucket_remove_entry(b, i); |
728 | seqlock_write_end(&head->sequence); |
729 | return true; |
730 | } |
731 | } |
732 | b = b->next; |
733 | } while (b); |
734 | return false; |
735 | } |
736 | |
737 | bool qht_remove(struct qht *ht, const void *p, uint32_t hash) |
738 | { |
739 | struct qht_bucket *b; |
740 | struct qht_map *map; |
741 | bool ret; |
742 | |
743 | /* NULL pointers are not supported */ |
744 | qht_debug_assert(p); |
745 | |
746 | b = qht_bucket_lock__no_stale(ht, hash, &map); |
747 | ret = qht_remove__locked(b, p, hash); |
748 | qht_bucket_debug__locked(b); |
749 | qemu_spin_unlock(&b->lock); |
750 | return ret; |
751 | } |
752 | |
753 | static inline void qht_bucket_iter(struct qht_bucket *head, |
754 | const struct qht_iter *iter, void *userp) |
755 | { |
756 | struct qht_bucket *b = head; |
757 | int i; |
758 | |
759 | do { |
760 | for (i = 0; i < QHT_BUCKET_ENTRIES; i++) { |
761 | if (b->pointers[i] == NULL) { |
762 | return; |
763 | } |
764 | switch (iter->type) { |
765 | case QHT_ITER_VOID: |
766 | iter->f.retvoid(b->pointers[i], b->hashes[i], userp); |
767 | break; |
768 | case QHT_ITER_RM: |
769 | if (iter->f.retbool(b->pointers[i], b->hashes[i], userp)) { |
770 | /* replace i with the last valid element in the bucket */ |
771 | seqlock_write_begin(&head->sequence); |
772 | qht_bucket_remove_entry(b, i); |
773 | seqlock_write_end(&head->sequence); |
774 | qht_bucket_debug__locked(b); |
775 | /* reevaluate i, since it just got replaced */ |
776 | i--; |
777 | continue; |
778 | } |
779 | break; |
780 | default: |
781 | g_assert_not_reached(); |
782 | } |
783 | } |
784 | b = b->next; |
785 | } while (b); |
786 | } |
787 | |
788 | /* call with all of the map's locks held */ |
789 | static inline void qht_map_iter__all_locked(struct qht_map *map, |
790 | const struct qht_iter *iter, |
791 | void *userp) |
792 | { |
793 | size_t i; |
794 | |
795 | for (i = 0; i < map->n_buckets; i++) { |
796 | qht_bucket_iter(&map->buckets[i], iter, userp); |
797 | } |
798 | } |
799 | |
800 | static inline void |
801 | do_qht_iter(struct qht *ht, const struct qht_iter *iter, void *userp) |
802 | { |
803 | struct qht_map *map; |
804 | |
805 | map = atomic_rcu_read(&ht->map); |
806 | qht_map_lock_buckets(map); |
807 | qht_map_iter__all_locked(map, iter, userp); |
808 | qht_map_unlock_buckets(map); |
809 | } |
810 | |
811 | void qht_iter(struct qht *ht, qht_iter_func_t func, void *userp) |
812 | { |
813 | const struct qht_iter iter = { |
814 | .f.retvoid = func, |
815 | .type = QHT_ITER_VOID, |
816 | }; |
817 | |
818 | do_qht_iter(ht, &iter, userp); |
819 | } |
820 | |
821 | void qht_iter_remove(struct qht *ht, qht_iter_bool_func_t func, void *userp) |
822 | { |
823 | const struct qht_iter iter = { |
824 | .f.retbool = func, |
825 | .type = QHT_ITER_RM, |
826 | }; |
827 | |
828 | do_qht_iter(ht, &iter, userp); |
829 | } |
830 | |
831 | struct qht_map_copy_data { |
832 | struct qht *ht; |
833 | struct qht_map *new; |
834 | }; |
835 | |
836 | static void qht_map_copy(void *p, uint32_t hash, void *userp) |
837 | { |
838 | struct qht_map_copy_data *data = userp; |
839 | struct qht *ht = data->ht; |
840 | struct qht_map *new = data->new; |
841 | struct qht_bucket *b = qht_map_to_bucket(new, hash); |
842 | |
843 | /* no need to acquire b->lock because no thread has seen this map yet */ |
844 | qht_insert__locked(ht, new, b, p, hash, NULL); |
845 | } |
846 | |
847 | /* |
848 | * Atomically perform a resize and/or reset. |
849 | * Call with ht->lock held. |
850 | */ |
851 | static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, bool reset) |
852 | { |
853 | struct qht_map *old; |
854 | const struct qht_iter iter = { |
855 | .f.retvoid = qht_map_copy, |
856 | .type = QHT_ITER_VOID, |
857 | }; |
858 | struct qht_map_copy_data data; |
859 | |
860 | old = ht->map; |
861 | qht_map_lock_buckets(old); |
862 | |
863 | if (reset) { |
864 | qht_map_reset__all_locked(old); |
865 | } |
866 | |
867 | if (new == NULL) { |
868 | qht_map_unlock_buckets(old); |
869 | return; |
870 | } |
871 | |
872 | g_assert(new->n_buckets != old->n_buckets); |
873 | data.ht = ht; |
874 | data.new = new; |
875 | qht_map_iter__all_locked(old, &iter, &data); |
876 | qht_map_debug__all_locked(new); |
877 | |
878 | atomic_rcu_set(&ht->map, new); |
879 | qht_map_unlock_buckets(old); |
880 | call_rcu(old, qht_map_destroy, rcu); |
881 | } |
882 | |
883 | bool qht_resize(struct qht *ht, size_t n_elems) |
884 | { |
885 | size_t n_buckets = qht_elems_to_buckets(n_elems); |
886 | size_t ret = false; |
887 | |
888 | qht_lock(ht); |
889 | if (n_buckets != ht->map->n_buckets) { |
890 | struct qht_map *new; |
891 | |
892 | new = qht_map_create(n_buckets); |
893 | qht_do_resize(ht, new); |
894 | ret = true; |
895 | } |
896 | qht_unlock(ht); |
897 | |
898 | return ret; |
899 | } |
900 | |
901 | /* pass @stats to qht_statistics_destroy() when done */ |
902 | void qht_statistics_init(const struct qht *ht, struct qht_stats *stats) |
903 | { |
904 | const struct qht_map *map; |
905 | int i; |
906 | |
907 | map = atomic_rcu_read(&ht->map); |
908 | |
909 | stats->used_head_buckets = 0; |
910 | stats->entries = 0; |
911 | qdist_init(&stats->chain); |
912 | qdist_init(&stats->occupancy); |
913 | /* bail out if the qht has not yet been initialized */ |
914 | if (unlikely(map == NULL)) { |
915 | stats->head_buckets = 0; |
916 | return; |
917 | } |
918 | stats->head_buckets = map->n_buckets; |
919 | |
920 | for (i = 0; i < map->n_buckets; i++) { |
921 | const struct qht_bucket *head = &map->buckets[i]; |
922 | const struct qht_bucket *b; |
923 | unsigned int version; |
924 | size_t buckets; |
925 | size_t entries; |
926 | int j; |
927 | |
928 | do { |
929 | version = seqlock_read_begin(&head->sequence); |
930 | buckets = 0; |
931 | entries = 0; |
932 | b = head; |
933 | do { |
934 | for (j = 0; j < QHT_BUCKET_ENTRIES; j++) { |
935 | if (atomic_read(&b->pointers[j]) == NULL) { |
936 | break; |
937 | } |
938 | entries++; |
939 | } |
940 | buckets++; |
941 | b = atomic_rcu_read(&b->next); |
942 | } while (b); |
943 | } while (seqlock_read_retry(&head->sequence, version)); |
944 | |
945 | if (entries) { |
946 | qdist_inc(&stats->chain, buckets); |
947 | qdist_inc(&stats->occupancy, |
948 | (double)entries / QHT_BUCKET_ENTRIES / buckets); |
949 | stats->used_head_buckets++; |
950 | stats->entries += entries; |
951 | } else { |
952 | qdist_inc(&stats->occupancy, 0); |
953 | } |
954 | } |
955 | } |
956 | |
957 | void qht_statistics_destroy(struct qht_stats *stats) |
958 | { |
959 | qdist_destroy(&stats->occupancy); |
960 | qdist_destroy(&stats->chain); |
961 | } |
962 | |