1 | /* |
2 | * Copyright 2010-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"). |
5 | * You may not use this file except in compliance with the License. |
6 | * A copy of the License is located at |
7 | * |
8 | * http://aws.amazon.com/apache2.0 |
9 | * |
10 | * or in the "license" file accompanying this file. This file is distributed |
11 | * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either |
12 | * express or implied. See the License for the specific language governing |
13 | * permissions and limitations under the License. |
14 | */ |
15 | |
16 | /* For more information on how the RH hash works and in particular how we do |
17 | * deletions, see: |
18 | * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/ |
19 | */ |
20 | |
21 | #include <aws/common/hash_table.h> |
22 | #include <aws/common/math.h> |
23 | #include <aws/common/private/hash_table_impl.h> |
24 | #include <aws/common/string.h> |
25 | |
26 | #include <limits.h> |
27 | #include <stdio.h> |
28 | #include <stdlib.h> |
29 | |
30 | /* Include lookup3.c so we can (potentially) inline it and make use of the mix() |
31 | * macro. */ |
32 | #include <aws/common/private/lookup3.inl> |
33 | |
34 | static void s_suppress_unused_lookup3_func_warnings(void) { |
35 | /* We avoid making changes to lookup3 if we can avoid it, but since it has functions |
36 | * we're not using, reference them somewhere to suppress the unused function warning. |
37 | */ |
38 | (void)hashword; |
39 | (void)hashword2; |
40 | (void)hashlittle; |
41 | (void)hashbig; |
42 | } |
43 | |
44 | /** |
45 | * Calculate the hash for the given key. |
46 | * Ensures a reasonable semantics for null keys. |
47 | * Ensures that no object ever hashes to 0, which is the sentinal value for an empty hash element. |
48 | */ |
49 | static uint64_t s_hash_for(struct hash_table_state *state, const void *key) { |
50 | AWS_PRECONDITION(hash_table_state_is_valid(state)); |
51 | s_suppress_unused_lookup3_func_warnings(); |
52 | |
53 | if (key == NULL) { |
54 | /* The best answer */ |
55 | return 42; |
56 | } |
57 | |
58 | uint64_t hash_code = state->hash_fn(key); |
59 | if (!hash_code) { |
60 | hash_code = 1; |
61 | } |
62 | AWS_RETURN_WITH_POSTCONDITION(hash_code, hash_code != 0); |
63 | } |
64 | |
65 | /** |
66 | * Check equality of two objects, with a reasonable semantics for null. |
67 | */ |
68 | static bool s_safe_eq_check(aws_hash_callback_eq_fn *equals_fn, const void *a, const void *b) { |
69 | /* Short circuit if the pointers are the same */ |
70 | if (a == b) { |
71 | return true; |
72 | } |
73 | /* If one but not both are null, the objects are not equal */ |
74 | if (a == NULL || b == NULL) { |
75 | return false; |
76 | } |
77 | /* If both are non-null, call the underlying equals fn */ |
78 | return equals_fn(a, b); |
79 | } |
80 | |
81 | /** |
82 | * Check equality of two hash keys, with a reasonable semantics for null keys. |
83 | */ |
84 | static bool s_hash_keys_eq(struct hash_table_state *state, const void *a, const void *b) { |
85 | AWS_PRECONDITION(hash_table_state_is_valid(state)); |
86 | bool rval = s_safe_eq_check(state->equals_fn, a, b); |
87 | AWS_RETURN_WITH_POSTCONDITION(rval, hash_table_state_is_valid(state)); |
88 | } |
89 | |
90 | static size_t s_index_for(struct hash_table_state *map, struct hash_table_entry *entry) { |
91 | AWS_PRECONDITION(hash_table_state_is_valid(map)); |
92 | size_t index = entry - map->slots; |
93 | AWS_RETURN_WITH_POSTCONDITION(index, index < map->size && hash_table_state_is_valid(map)); |
94 | } |
95 | |
96 | #if 0 |
97 | /* Useful debugging code for anyone working on this in the future */ |
98 | static uint64_t s_distance(struct hash_table_state *state, int index) { |
99 | return (index - state->slots[index].hash_code) & state->mask; |
100 | } |
101 | |
102 | void hash_dump(struct aws_hash_table *tbl) { |
103 | struct hash_table_state *state = tbl->p_impl; |
104 | |
105 | printf("Dumping hash table contents:\n" ); |
106 | |
107 | for (int i = 0; i < state->size; i++) { |
108 | printf("%7d: " , i); |
109 | struct hash_table_entry *e = &state->slots[i]; |
110 | if (!e->hash_code) { |
111 | printf("EMPTY\n" ); |
112 | } else { |
113 | printf("k: %p v: %p hash_code: %lld displacement: %lld\n" , |
114 | e->element.key, e->element.value, e->hash_code, |
115 | (i - e->hash_code) & state->mask); |
116 | } |
117 | } |
118 | } |
119 | #endif |
120 | |
121 | #if 0 |
122 | /* Not currently exposed as an API. Should we have something like this? Useful for benchmarks */ |
123 | AWS_COMMON_API |
124 | void aws_hash_table_print_stats(struct aws_hash_table *table) { |
125 | struct hash_table_state *state = table->p_impl; |
126 | uint64_t total_disp = 0; |
127 | uint64_t max_disp = 0; |
128 | |
129 | printf("\n=== Hash table statistics ===\n" ); |
130 | printf("Table size: %zu/%zu (max load %zu, remaining %zu)\n" , state->entry_count, state->size, state->max_load, state->max_load - state->entry_count); |
131 | printf("Load factor: %02.2lf%% (max %02.2lf%%)\n" , |
132 | 100.0 * ((double)state->entry_count / (double)state->size), |
133 | state->max_load_factor); |
134 | |
135 | for (size_t i = 0; i < state->size; i++) { |
136 | if (state->slots[i].hash_code) { |
137 | int displacement = distance(state, i); |
138 | total_disp += displacement; |
139 | if (displacement > max_disp) { |
140 | max_disp = displacement; |
141 | } |
142 | } |
143 | } |
144 | |
145 | size_t *disp_counts = calloc(sizeof(*disp_counts), max_disp + 1); |
146 | |
147 | for (size_t i = 0; i < state->size; i++) { |
148 | if (state->slots[i].hash_code) { |
149 | disp_counts[distance(state, i)]++; |
150 | } |
151 | } |
152 | |
153 | uint64_t median = 0; |
154 | uint64_t passed = 0; |
155 | for (uint64_t i = 0; i <= max_disp && passed < total_disp / 2; i++) { |
156 | median = i; |
157 | passed += disp_counts[i]; |
158 | } |
159 | |
160 | printf("Displacement statistics: Avg %02.2lf max %llu median %llu\n" , (double)total_disp / (double)state->entry_count, max_disp, median); |
161 | for (uint64_t i = 0; i <= max_disp; i++) { |
162 | printf("Displacement %2lld: %zu entries\n" , i, disp_counts[i]); |
163 | } |
164 | free(disp_counts); |
165 | printf("\n" ); |
166 | } |
167 | #endif |
168 | |
169 | size_t aws_hash_table_get_entry_count(const struct aws_hash_table *map) { |
170 | struct hash_table_state *state = map->p_impl; |
171 | return state->entry_count; |
172 | } |
173 | |
174 | /* Given a header template, allocates space for a hash table of the appropriate |
175 | * size, and copies the state header into this allocated memory, which is |
176 | * returned. |
177 | */ |
178 | static struct hash_table_state *s_alloc_state(const struct hash_table_state *template) { |
179 | size_t required_bytes; |
180 | if (hash_table_state_required_bytes(template->size, &required_bytes)) { |
181 | return NULL; |
182 | } |
183 | |
184 | /* An empty slot has hashcode 0. So this marks all slots as empty */ |
185 | struct hash_table_state *state = aws_mem_calloc(template->alloc, 1, required_bytes); |
186 | |
187 | if (state == NULL) { |
188 | return state; |
189 | } |
190 | |
191 | *state = *template; |
192 | return state; |
193 | } |
194 | |
195 | /* Computes the correct size and max_load based on a requested size. */ |
196 | static int s_update_template_size(struct hash_table_state *template, size_t expected_elements) { |
197 | size_t min_size = expected_elements; |
198 | |
199 | if (min_size < 2) { |
200 | min_size = 2; |
201 | } |
202 | |
203 | /* size is always a power of 2 */ |
204 | size_t size; |
205 | if (aws_round_up_to_power_of_two(min_size, &size)) { |
206 | return AWS_OP_ERR; |
207 | } |
208 | |
209 | /* Update the template once we've calculated everything successfully */ |
210 | template->size = size; |
211 | template->max_load = (size_t)(template->max_load_factor * (double)template->size); |
212 | /* Ensure that there is always at least one empty slot in the hash table */ |
213 | if (template->max_load >= size) { |
214 | template->max_load = size - 1; |
215 | } |
216 | |
217 | /* Since size is a power of 2: (index & (size - 1)) == (index % size) */ |
218 | template->mask = size - 1; |
219 | |
220 | return AWS_OP_SUCCESS; |
221 | } |
222 | |
223 | int aws_hash_table_init( |
224 | struct aws_hash_table *map, |
225 | struct aws_allocator *alloc, |
226 | size_t size, |
227 | aws_hash_fn *hash_fn, |
228 | aws_hash_callback_eq_fn *equals_fn, |
229 | aws_hash_callback_destroy_fn *destroy_key_fn, |
230 | aws_hash_callback_destroy_fn *destroy_value_fn) { |
231 | AWS_PRECONDITION(map != NULL); |
232 | AWS_PRECONDITION(alloc != NULL); |
233 | AWS_PRECONDITION(hash_fn != NULL); |
234 | AWS_PRECONDITION(equals_fn != NULL); |
235 | |
236 | struct hash_table_state template; |
237 | template.hash_fn = hash_fn; |
238 | template.equals_fn = equals_fn; |
239 | template.destroy_key_fn = destroy_key_fn; |
240 | template.destroy_value_fn = destroy_value_fn; |
241 | template.alloc = alloc; |
242 | |
243 | template.entry_count = 0; |
244 | template.max_load_factor = 0.95; /* TODO - make configurable? */ |
245 | |
246 | if (s_update_template_size(&template, size)) { |
247 | return AWS_OP_ERR; |
248 | } |
249 | map->p_impl = s_alloc_state(&template); |
250 | |
251 | if (!map->p_impl) { |
252 | return AWS_OP_ERR; |
253 | } |
254 | |
255 | AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map)); |
256 | } |
257 | |
258 | void aws_hash_table_clean_up(struct aws_hash_table *map) { |
259 | AWS_PRECONDITION(map != NULL); |
260 | AWS_PRECONDITION( |
261 | map->p_impl == NULL || aws_hash_table_is_valid(map), |
262 | "Input aws_hash_table [map] must be valid or hash_table_state pointer [map->p_impl] must be NULL, in case " |
263 | "aws_hash_table_clean_up was called twice." ); |
264 | struct hash_table_state *state = map->p_impl; |
265 | |
266 | /* Ensure that we're idempotent */ |
267 | if (!state) { |
268 | return; |
269 | } |
270 | |
271 | aws_hash_table_clear(map); |
272 | aws_mem_release(map->p_impl->alloc, map->p_impl); |
273 | |
274 | map->p_impl = NULL; |
275 | AWS_POSTCONDITION(map->p_impl == NULL); |
276 | } |
277 | |
278 | void aws_hash_table_swap(struct aws_hash_table *AWS_RESTRICT a, struct aws_hash_table *AWS_RESTRICT b) { |
279 | AWS_PRECONDITION(a != b); |
280 | struct aws_hash_table tmp = *a; |
281 | *a = *b; |
282 | *b = tmp; |
283 | } |
284 | |
285 | void aws_hash_table_move(struct aws_hash_table *AWS_RESTRICT to, struct aws_hash_table *AWS_RESTRICT from) { |
286 | AWS_PRECONDITION(to != NULL); |
287 | AWS_PRECONDITION(from != NULL); |
288 | AWS_PRECONDITION(to != from); |
289 | AWS_PRECONDITION(aws_hash_table_is_valid(from)); |
290 | |
291 | *to = *from; |
292 | AWS_ZERO_STRUCT(*from); |
293 | AWS_POSTCONDITION(aws_hash_table_is_valid(to)); |
294 | } |
295 | |
296 | /* Tries to find where the requested key is or where it should go if put. |
297 | * Returns AWS_ERROR_SUCCESS if the item existed (leaving it in *entry), |
298 | * or AWS_ERROR_HASHTBL_ITEM_NOT_FOUND if it did not (putting its destination |
299 | * in *entry). Note that this does not take care of displacing whatever was in |
300 | * that entry before. |
301 | * |
302 | * probe_idx is set to the probe index of the entry found. |
303 | */ |
304 | |
305 | static int s_find_entry1( |
306 | struct hash_table_state *state, |
307 | uint64_t hash_code, |
308 | const void *key, |
309 | struct hash_table_entry **p_entry, |
310 | size_t *p_probe_idx); |
311 | |
312 | /* Inlined fast path: Check the first slot, only. */ |
313 | /* TODO: Force inlining? */ |
314 | static int inline s_find_entry( |
315 | struct hash_table_state *state, |
316 | uint64_t hash_code, |
317 | const void *key, |
318 | struct hash_table_entry **p_entry, |
319 | size_t *p_probe_idx) { |
320 | struct hash_table_entry *entry = &state->slots[hash_code & state->mask]; |
321 | |
322 | if (entry->hash_code == 0) { |
323 | if (p_probe_idx) { |
324 | *p_probe_idx = 0; |
325 | } |
326 | *p_entry = entry; |
327 | return AWS_ERROR_HASHTBL_ITEM_NOT_FOUND; |
328 | } |
329 | |
330 | if (entry->hash_code == hash_code && s_hash_keys_eq(state, key, entry->element.key)) { |
331 | if (p_probe_idx) { |
332 | *p_probe_idx = 0; |
333 | } |
334 | *p_entry = entry; |
335 | return AWS_OP_SUCCESS; |
336 | } |
337 | |
338 | return s_find_entry1(state, hash_code, key, p_entry, p_probe_idx); |
339 | } |
340 | |
341 | static int s_find_entry1( |
342 | struct hash_table_state *state, |
343 | uint64_t hash_code, |
344 | const void *key, |
345 | struct hash_table_entry **p_entry, |
346 | size_t *p_probe_idx) { |
347 | size_t probe_idx = 1; |
348 | /* If we find a deleted entry, we record that index and return it as our probe index (i.e. we'll keep searching to |
349 | * see if it already exists, but if not we'll overwrite the deleted entry). |
350 | */ |
351 | |
352 | int rv; |
353 | struct hash_table_entry *entry; |
354 | /* This loop is guaranteed to terminate because entry_probe is bounded above by state->mask (i.e. state->size - 1). |
355 | * Since probe_idx increments every loop iteration, it will become larger than entry_probe after at most state->size |
356 | * transitions and the loop will exit (if it hasn't already) |
357 | */ |
358 | while (1) { |
359 | #ifdef CBMC |
360 | # pragma CPROVER check push |
361 | # pragma CPROVER check disable "unsigned-overflow" |
362 | #endif |
363 | uint64_t index = (hash_code + probe_idx) & state->mask; |
364 | #ifdef CBMC |
365 | # pragma CPROVER check pop |
366 | #endif |
367 | entry = &state->slots[index]; |
368 | if (!entry->hash_code) { |
369 | rv = AWS_ERROR_HASHTBL_ITEM_NOT_FOUND; |
370 | break; |
371 | } |
372 | |
373 | if (entry->hash_code == hash_code && s_hash_keys_eq(state, key, entry->element.key)) { |
374 | rv = AWS_ERROR_SUCCESS; |
375 | break; |
376 | } |
377 | |
378 | #ifdef CBMC |
379 | # pragma CPROVER check push |
380 | # pragma CPROVER check disable "unsigned-overflow" |
381 | #endif |
382 | uint64_t entry_probe = (index - entry->hash_code) & state->mask; |
383 | #ifdef CBMC |
384 | # pragma CPROVER check pop |
385 | #endif |
386 | |
387 | if (entry_probe < probe_idx) { |
388 | /* We now know that our target entry cannot exist; if it did exist, |
389 | * it would be at the current location as it has a higher probe |
390 | * length than the entry we are examining and thus would have |
391 | * preempted that item |
392 | */ |
393 | rv = AWS_ERROR_HASHTBL_ITEM_NOT_FOUND; |
394 | break; |
395 | } |
396 | |
397 | probe_idx++; |
398 | } |
399 | |
400 | *p_entry = entry; |
401 | if (p_probe_idx) { |
402 | *p_probe_idx = probe_idx; |
403 | } |
404 | |
405 | return rv; |
406 | } |
407 | |
408 | int aws_hash_table_find(const struct aws_hash_table *map, const void *key, struct aws_hash_element **p_elem) { |
409 | AWS_PRECONDITION(aws_hash_table_is_valid(map)); |
410 | AWS_PRECONDITION(AWS_OBJECT_PTR_IS_WRITABLE(p_elem), "Input aws_hash_element pointer [p_elem] must be writable." ); |
411 | |
412 | struct hash_table_state *state = map->p_impl; |
413 | uint64_t hash_code = s_hash_for(state, key); |
414 | struct hash_table_entry *entry; |
415 | |
416 | int rv = s_find_entry(state, hash_code, key, &entry, NULL); |
417 | |
418 | if (rv == AWS_ERROR_SUCCESS) { |
419 | *p_elem = &entry->element; |
420 | } else { |
421 | *p_elem = NULL; |
422 | } |
423 | AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map)); |
424 | } |
425 | |
426 | /** |
427 | * Attempts to find a home for the given entry. |
428 | * If the entry was empty (i.e. hash-code of 0), then the function does nothing and returns NULL |
429 | * Otherwise, it emplaces the item, and returns a pointer to the newly emplaced entry. |
430 | * This function is only called after the hash-table has been expanded to fit the new element, |
431 | * so it should never fail. |
432 | */ |
433 | static struct hash_table_entry *s_emplace_item( |
434 | struct hash_table_state *state, |
435 | struct hash_table_entry entry, |
436 | size_t probe_idx) { |
437 | AWS_PRECONDITION(hash_table_state_is_valid(state)); |
438 | |
439 | if (entry.hash_code == 0) { |
440 | AWS_RETURN_WITH_POSTCONDITION(NULL, hash_table_state_is_valid(state)); |
441 | } |
442 | |
443 | struct hash_table_entry *rval = NULL; |
444 | |
445 | /* Since a valid hash_table has at least one empty element, this loop will always terminate in at most linear time |
446 | */ |
447 | while (entry.hash_code != 0) { |
448 | #ifdef CBMC |
449 | # pragma CPROVER check push |
450 | # pragma CPROVER check disable "unsigned-overflow" |
451 | #endif |
452 | size_t index = (size_t)(entry.hash_code + probe_idx) & state->mask; |
453 | #ifdef CBMC |
454 | # pragma CPROVER check pop |
455 | #endif |
456 | struct hash_table_entry *victim = &state->slots[index]; |
457 | |
458 | #ifdef CBMC |
459 | # pragma CPROVER check push |
460 | # pragma CPROVER check disable "unsigned-overflow" |
461 | #endif |
462 | size_t victim_probe_idx = (size_t)(index - victim->hash_code) & state->mask; |
463 | #ifdef CBMC |
464 | # pragma CPROVER check pop |
465 | #endif |
466 | |
467 | if (!victim->hash_code || victim_probe_idx < probe_idx) { |
468 | /* The first thing we emplace is the entry itself. A pointer to its location becomes the rval */ |
469 | if (!rval) { |
470 | rval = victim; |
471 | } |
472 | |
473 | struct hash_table_entry tmp = *victim; |
474 | *victim = entry; |
475 | entry = tmp; |
476 | |
477 | probe_idx = victim_probe_idx + 1; |
478 | } else { |
479 | probe_idx++; |
480 | } |
481 | } |
482 | |
483 | AWS_RETURN_WITH_POSTCONDITION( |
484 | rval, |
485 | hash_table_state_is_valid(state) && rval >= &state->slots[0] && rval < &state->slots[state->size], |
486 | "Output hash_table_entry pointer [rval] must point in the slots of [state]." ); |
487 | } |
488 | |
489 | static int s_expand_table(struct aws_hash_table *map) { |
490 | struct hash_table_state *old_state = map->p_impl; |
491 | struct hash_table_state template = *old_state; |
492 | |
493 | size_t new_size; |
494 | if (aws_mul_size_checked(template.size, 2, &new_size)) { |
495 | return AWS_OP_ERR; |
496 | } |
497 | |
498 | if (s_update_template_size(&template, new_size)) { |
499 | return AWS_OP_ERR; |
500 | } |
501 | |
502 | struct hash_table_state *new_state = s_alloc_state(&template); |
503 | if (!new_state) { |
504 | return AWS_OP_ERR; |
505 | } |
506 | |
507 | for (size_t i = 0; i < old_state->size; i++) { |
508 | struct hash_table_entry entry = old_state->slots[i]; |
509 | if (entry.hash_code) { |
510 | /* We can directly emplace since we know we won't put the same item twice */ |
511 | s_emplace_item(new_state, entry, 0); |
512 | } |
513 | } |
514 | |
515 | map->p_impl = new_state; |
516 | aws_mem_release(new_state->alloc, old_state); |
517 | |
518 | return AWS_OP_SUCCESS; |
519 | } |
520 | |
521 | int aws_hash_table_create( |
522 | struct aws_hash_table *map, |
523 | const void *key, |
524 | struct aws_hash_element **p_elem, |
525 | int *was_created) { |
526 | |
527 | struct hash_table_state *state = map->p_impl; |
528 | uint64_t hash_code = s_hash_for(state, key); |
529 | struct hash_table_entry *entry; |
530 | size_t probe_idx; |
531 | int ignored; |
532 | if (!was_created) { |
533 | was_created = &ignored; |
534 | } |
535 | |
536 | int rv = s_find_entry(state, hash_code, key, &entry, &probe_idx); |
537 | |
538 | if (rv == AWS_ERROR_SUCCESS) { |
539 | if (p_elem) { |
540 | *p_elem = &entry->element; |
541 | } |
542 | *was_created = 0; |
543 | return AWS_OP_SUCCESS; |
544 | } |
545 | |
546 | /* Okay, we need to add an entry. Check the load factor first. */ |
547 | size_t incr_entry_count; |
548 | if (aws_add_size_checked(state->entry_count, 1, &incr_entry_count)) { |
549 | return AWS_OP_ERR; |
550 | } |
551 | if (incr_entry_count > state->max_load) { |
552 | rv = s_expand_table(map); |
553 | if (rv != AWS_OP_SUCCESS) { |
554 | /* Any error was already raised in expand_table */ |
555 | return rv; |
556 | } |
557 | state = map->p_impl; |
558 | /* If we expanded the table, we need to discard the probe index returned from find_entry, |
559 | * as it's likely that we can find a more desirable slot. If we don't, then later gets will |
560 | * terminate before reaching our probe index. |
561 | |
562 | * n.b. currently we ignore this probe_idx subsequently, but leaving |
563 | this here so we don't |
564 | * forget when we optimize later. */ |
565 | probe_idx = 0; |
566 | } |
567 | |
568 | state->entry_count++; |
569 | struct hash_table_entry new_entry; |
570 | new_entry.element.key = key; |
571 | new_entry.element.value = NULL; |
572 | new_entry.hash_code = hash_code; |
573 | |
574 | entry = s_emplace_item(state, new_entry, probe_idx); |
575 | |
576 | if (p_elem) { |
577 | *p_elem = &entry->element; |
578 | } |
579 | |
580 | *was_created = 1; |
581 | |
582 | return AWS_OP_SUCCESS; |
583 | } |
584 | |
585 | AWS_COMMON_API |
586 | int aws_hash_table_put(struct aws_hash_table *map, const void *key, void *value, int *was_created) { |
587 | struct aws_hash_element *p_elem; |
588 | int was_created_fallback; |
589 | |
590 | if (!was_created) { |
591 | was_created = &was_created_fallback; |
592 | } |
593 | |
594 | if (aws_hash_table_create(map, key, &p_elem, was_created)) { |
595 | return AWS_OP_ERR; |
596 | } |
597 | |
598 | /* |
599 | * aws_hash_table_create might resize the table, which results in map->p_impl changing. |
600 | * It is therefore important to wait to read p_impl until after we return. |
601 | */ |
602 | struct hash_table_state *state = map->p_impl; |
603 | |
604 | if (!*was_created) { |
605 | if (p_elem->key != key && state->destroy_key_fn) { |
606 | state->destroy_key_fn((void *)p_elem->key); |
607 | } |
608 | |
609 | if (state->destroy_value_fn) { |
610 | state->destroy_value_fn((void *)p_elem->value); |
611 | } |
612 | } |
613 | |
614 | p_elem->key = key; |
615 | p_elem->value = value; |
616 | |
617 | return AWS_OP_SUCCESS; |
618 | } |
619 | |
620 | /* Clears an entry. Does _not_ invoke destructor callbacks. |
621 | * Returns the last slot touched (note that if we wrap, we'll report an index |
622 | * lower than the original entry's index) |
623 | */ |
624 | static size_t s_remove_entry(struct hash_table_state *state, struct hash_table_entry *entry) { |
625 | AWS_PRECONDITION(hash_table_state_is_valid(state)); |
626 | AWS_PRECONDITION(state->entry_count > 0); |
627 | AWS_PRECONDITION( |
628 | entry >= &state->slots[0] && entry < &state->slots[state->size], |
629 | "Input hash_table_entry [entry] pointer must point in the available slots." ); |
630 | state->entry_count--; |
631 | |
632 | /* Shift subsequent entries back until we find an entry that belongs at its |
633 | * current position. This is important to ensure that subsequent searches |
634 | * don't terminate at the removed element. |
635 | */ |
636 | size_t index = s_index_for(state, entry); |
637 | /* There is always at least one empty slot in the hash table, so this loop always terminates */ |
638 | while (1) { |
639 | size_t next_index = (index + 1) & state->mask; |
640 | |
641 | /* If we hit an empty slot, stop */ |
642 | if (!state->slots[next_index].hash_code) { |
643 | break; |
644 | } |
645 | /* If the next slot is at the start of the probe sequence, stop. |
646 | * We know that nothing with an earlier home slot is after this; |
647 | * otherwise this index-zero entry would have been evicted from its |
648 | * home. |
649 | */ |
650 | if ((state->slots[next_index].hash_code & state->mask) == next_index) { |
651 | break; |
652 | } |
653 | |
654 | /* Okay, shift this one back */ |
655 | state->slots[index] = state->slots[next_index]; |
656 | index = next_index; |
657 | } |
658 | |
659 | /* Clear the entry we shifted out of */ |
660 | AWS_ZERO_STRUCT(state->slots[index]); |
661 | AWS_RETURN_WITH_POSTCONDITION(index, hash_table_state_is_valid(state) && index <= state->size); |
662 | } |
663 | |
664 | int aws_hash_table_remove( |
665 | struct aws_hash_table *map, |
666 | const void *key, |
667 | struct aws_hash_element *p_value, |
668 | int *was_present) { |
669 | AWS_PRECONDITION(aws_hash_table_is_valid(map)); |
670 | AWS_PRECONDITION( |
671 | p_value == NULL || AWS_OBJECT_PTR_IS_WRITABLE(p_value), "Input pointer [p_value] must be NULL or writable." ); |
672 | AWS_PRECONDITION( |
673 | was_present == NULL || AWS_OBJECT_PTR_IS_WRITABLE(was_present), |
674 | "Input pointer [was_present] must be NULL or writable." ); |
675 | |
676 | struct hash_table_state *state = map->p_impl; |
677 | uint64_t hash_code = s_hash_for(state, key); |
678 | struct hash_table_entry *entry; |
679 | int ignored; |
680 | |
681 | if (!was_present) { |
682 | was_present = &ignored; |
683 | } |
684 | |
685 | int rv = s_find_entry(state, hash_code, key, &entry, NULL); |
686 | |
687 | if (rv != AWS_ERROR_SUCCESS) { |
688 | *was_present = 0; |
689 | AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map)); |
690 | } |
691 | |
692 | *was_present = 1; |
693 | |
694 | if (p_value) { |
695 | *p_value = entry->element; |
696 | } else { |
697 | if (state->destroy_key_fn) { |
698 | state->destroy_key_fn((void *)entry->element.key); |
699 | } |
700 | if (state->destroy_value_fn) { |
701 | state->destroy_value_fn(entry->element.value); |
702 | } |
703 | } |
704 | s_remove_entry(state, entry); |
705 | |
706 | AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map)); |
707 | } |
708 | |
709 | int aws_hash_table_remove_element(struct aws_hash_table *map, struct aws_hash_element *p_value) { |
710 | AWS_PRECONDITION(aws_hash_table_is_valid(map)); |
711 | AWS_PRECONDITION(p_value != NULL); |
712 | |
713 | struct hash_table_state *state = map->p_impl; |
714 | struct hash_table_entry *entry = AWS_CONTAINER_OF(p_value, struct hash_table_entry, element); |
715 | |
716 | s_remove_entry(state, entry); |
717 | |
718 | AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map)); |
719 | } |
720 | |
721 | int aws_hash_table_foreach( |
722 | struct aws_hash_table *map, |
723 | int (*callback)(void *context, struct aws_hash_element *pElement), |
724 | void *context) { |
725 | |
726 | for (struct aws_hash_iter iter = aws_hash_iter_begin(map); !aws_hash_iter_done(&iter); aws_hash_iter_next(&iter)) { |
727 | int rv = callback(context, &iter.element); |
728 | |
729 | if (rv & AWS_COMMON_HASH_TABLE_ITER_DELETE) { |
730 | aws_hash_iter_delete(&iter, false); |
731 | } |
732 | |
733 | if (!(rv & AWS_COMMON_HASH_TABLE_ITER_CONTINUE)) { |
734 | break; |
735 | } |
736 | } |
737 | |
738 | return AWS_OP_SUCCESS; |
739 | } |
740 | |
741 | bool aws_hash_table_eq( |
742 | const struct aws_hash_table *a, |
743 | const struct aws_hash_table *b, |
744 | aws_hash_callback_eq_fn *value_eq) { |
745 | AWS_PRECONDITION(aws_hash_table_is_valid(a)); |
746 | AWS_PRECONDITION(aws_hash_table_is_valid(b)); |
747 | AWS_PRECONDITION(value_eq != NULL); |
748 | |
749 | if (aws_hash_table_get_entry_count(a) != aws_hash_table_get_entry_count(b)) { |
750 | AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b)); |
751 | } |
752 | |
753 | /* |
754 | * Now that we have established that the two tables have the same number of |
755 | * entries, we can simply iterate one and compare against the same key in |
756 | * the other. |
757 | */ |
758 | for (size_t i = 0; i < a->p_impl->size; ++i) { |
759 | const struct hash_table_entry *const a_entry = &a->p_impl->slots[i]; |
760 | if (a_entry->hash_code == 0) { |
761 | continue; |
762 | } |
763 | |
764 | struct aws_hash_element *b_element = NULL; |
765 | |
766 | aws_hash_table_find(b, a_entry->element.key, &b_element); |
767 | |
768 | if (!b_element) { |
769 | /* Key is present in A only */ |
770 | AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b)); |
771 | } |
772 | |
773 | if (!s_safe_eq_check(value_eq, a_entry->element.value, b_element->value)) { |
774 | AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b)); |
775 | } |
776 | } |
777 | AWS_RETURN_WITH_POSTCONDITION(true, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b)); |
778 | } |
779 | |
780 | /** |
781 | * Given an iterator, and a start slot, find the next available filled slot if it exists |
782 | * Otherwise, return an iter that will return true for aws_hash_iter_done(). |
783 | * Note that aws_hash_iter_is_valid() need not hold on entry to the function, since |
784 | * it can be called on a partially constructed iter from aws_hash_iter_begin(). |
785 | * |
786 | * Note that calling this on an iterator which is "done" is idempotent: it will return another |
787 | * iterator which is "done". |
788 | */ |
789 | static inline void s_get_next_element(struct aws_hash_iter *iter, size_t start_slot) { |
790 | AWS_PRECONDITION(iter != NULL); |
791 | AWS_PRECONDITION(aws_hash_table_is_valid(iter->map)); |
792 | struct hash_table_state *state = iter->map->p_impl; |
793 | size_t limit = iter->limit; |
794 | |
795 | for (size_t i = start_slot; i < limit; i++) { |
796 | struct hash_table_entry *entry = &state->slots[i]; |
797 | |
798 | if (entry->hash_code) { |
799 | iter->element = entry->element; |
800 | iter->slot = i; |
801 | iter->status = AWS_HASH_ITER_STATUS_READY_FOR_USE; |
802 | return; |
803 | } |
804 | } |
805 | iter->element.key = NULL; |
806 | iter->element.value = NULL; |
807 | iter->slot = iter->limit; |
808 | iter->status = AWS_HASH_ITER_STATUS_DONE; |
809 | AWS_POSTCONDITION(aws_hash_iter_is_valid(iter)); |
810 | } |
811 | |
812 | struct aws_hash_iter aws_hash_iter_begin(const struct aws_hash_table *map) { |
813 | AWS_PRECONDITION(aws_hash_table_is_valid(map)); |
814 | struct hash_table_state *state = map->p_impl; |
815 | struct aws_hash_iter iter; |
816 | AWS_ZERO_STRUCT(iter); |
817 | iter.map = map; |
818 | iter.limit = state->size; |
819 | s_get_next_element(&iter, 0); |
820 | AWS_RETURN_WITH_POSTCONDITION( |
821 | iter, |
822 | aws_hash_iter_is_valid(&iter) && |
823 | (iter.status == AWS_HASH_ITER_STATUS_DONE || iter.status == AWS_HASH_ITER_STATUS_READY_FOR_USE), |
824 | "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE." ); |
825 | } |
826 | |
827 | bool aws_hash_iter_done(const struct aws_hash_iter *iter) { |
828 | AWS_PRECONDITION(aws_hash_iter_is_valid(iter)); |
829 | AWS_PRECONDITION( |
830 | iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE, |
831 | "Input aws_hash_iter [iter] must either be done, or ready to use." ); |
832 | /* |
833 | * SIZE_MAX is a valid (non-terminal) value for iter->slot in the event that |
834 | * we delete slot 0. See comments in aws_hash_iter_delete. |
835 | * |
836 | * As such we must use == rather than >= here. |
837 | */ |
838 | bool rval = (iter->slot == iter->limit); |
839 | AWS_POSTCONDITION( |
840 | iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE, |
841 | "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE." ); |
842 | AWS_POSTCONDITION( |
843 | rval == (iter->status == AWS_HASH_ITER_STATUS_DONE), |
844 | "Output bool [rval] must be true if and only if the status of [iter] is DONE." ); |
845 | AWS_POSTCONDITION(aws_hash_iter_is_valid(iter)); |
846 | return rval; |
847 | } |
848 | |
849 | void aws_hash_iter_next(struct aws_hash_iter *iter) { |
850 | AWS_PRECONDITION(aws_hash_iter_is_valid(iter)); |
851 | #ifdef CBMC |
852 | # pragma CPROVER check push |
853 | # pragma CPROVER check disable "unsigned-overflow" |
854 | #endif |
855 | s_get_next_element(iter, iter->slot + 1); |
856 | #ifdef CBMC |
857 | # pragma CPROVER check pop |
858 | #endif |
859 | AWS_POSTCONDITION( |
860 | iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE, |
861 | "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE." ); |
862 | AWS_POSTCONDITION(aws_hash_iter_is_valid(iter)); |
863 | } |
864 | |
865 | void aws_hash_iter_delete(struct aws_hash_iter *iter, bool destroy_contents) { |
866 | AWS_PRECONDITION( |
867 | iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE, "Input aws_hash_iter [iter] must be ready for use." ); |
868 | AWS_PRECONDITION(aws_hash_iter_is_valid(iter)); |
869 | AWS_PRECONDITION( |
870 | iter->map->p_impl->entry_count > 0, |
871 | "The hash_table_state pointed by input [iter] must contain at least one entry." ); |
872 | |
873 | struct hash_table_state *state = iter->map->p_impl; |
874 | if (destroy_contents) { |
875 | if (state->destroy_key_fn) { |
876 | state->destroy_key_fn((void *)iter->element.key); |
877 | } |
878 | if (state->destroy_value_fn) { |
879 | state->destroy_value_fn(iter->element.value); |
880 | } |
881 | } |
882 | |
883 | size_t last_index = s_remove_entry(state, &state->slots[iter->slot]); |
884 | |
885 | /* If we shifted elements that are not part of the window we intend to iterate |
886 | * over, it means we shifted an element that we already visited into the |
887 | * iter->limit - 1 position. To avoid double iteration, we'll now reduce the |
888 | * limit to compensate. |
889 | * |
890 | * Note that last_index cannot equal iter->slot, because slots[iter->slot] |
891 | * is empty before we start walking the table. |
892 | */ |
893 | if (last_index < iter->slot || last_index >= iter->limit) { |
894 | iter->limit--; |
895 | } |
896 | |
897 | /* |
898 | * After removing this entry, the next entry might be in the same slot, or |
899 | * in some later slot, or we might have no further entries. |
900 | * |
901 | * We also expect that the caller will call aws_hash_iter_done and aws_hash_iter_next |
902 | * after this delete call. This gets a bit tricky if we just deleted the value |
903 | * in slot 0, and a new value has shifted in. |
904 | * |
905 | * To deal with this, we'll just step back one slot, and let _next start iteration |
906 | * at our current slot. Note that if we just deleted slot 0, this will result in |
907 | * underflowing to SIZE_MAX; we have to take care in aws_hash_iter_done to avoid |
908 | * treating this as an end-of-iteration condition. |
909 | */ |
910 | #ifdef CBMC |
911 | # pragma CPROVER check push |
912 | # pragma CPROVER check disable "unsigned-overflow" |
913 | #endif |
914 | iter->slot--; |
915 | #ifdef CBMC |
916 | # pragma CPROVER check pop |
917 | #endif |
918 | iter->status = AWS_HASH_ITER_STATUS_DELETE_CALLED; |
919 | AWS_POSTCONDITION( |
920 | iter->status == AWS_HASH_ITER_STATUS_DELETE_CALLED, |
921 | "The status of output aws_hash_iter [iter] must be DELETE_CALLED." ); |
922 | AWS_POSTCONDITION(aws_hash_iter_is_valid(iter)); |
923 | } |
924 | |
925 | void aws_hash_table_clear(struct aws_hash_table *map) { |
926 | AWS_PRECONDITION(aws_hash_table_is_valid(map)); |
927 | struct hash_table_state *state = map->p_impl; |
928 | |
929 | /* Check that we have at least one destructor before iterating over the table */ |
930 | if (state->destroy_key_fn || state->destroy_value_fn) { |
931 | for (size_t i = 0; i < state->size; ++i) { |
932 | struct hash_table_entry *entry = &state->slots[i]; |
933 | if (!entry->hash_code) { |
934 | continue; |
935 | } |
936 | if (state->destroy_key_fn) { |
937 | state->destroy_key_fn((void *)entry->element.key); |
938 | } |
939 | if (state->destroy_value_fn) { |
940 | state->destroy_value_fn(entry->element.value); |
941 | } |
942 | } |
943 | } |
944 | /* Since hash code 0 represents an empty slot we can just zero out the |
945 | * entire table. */ |
946 | memset(state->slots, 0, sizeof(*state->slots) * state->size); |
947 | |
948 | state->entry_count = 0; |
949 | AWS_POSTCONDITION(aws_hash_table_is_valid(map)); |
950 | } |
951 | |
952 | uint64_t aws_hash_c_string(const void *item) { |
953 | AWS_PRECONDITION(aws_c_string_is_valid(item)); |
954 | const char *str = item; |
955 | |
956 | /* first digits of pi in hex */ |
957 | uint32_t b = 0x3243F6A8, c = 0x885A308D; |
958 | hashlittle2(str, strlen(str), &c, &b); |
959 | |
960 | return ((uint64_t)b << 32) | c; |
961 | } |
962 | |
963 | uint64_t aws_hash_string(const void *item) { |
964 | AWS_PRECONDITION(aws_string_is_valid(item)); |
965 | const struct aws_string *str = item; |
966 | |
967 | /* first digits of pi in hex */ |
968 | uint32_t b = 0x3243F6A8, c = 0x885A308D; |
969 | hashlittle2(aws_string_bytes(str), str->len, &c, &b); |
970 | AWS_RETURN_WITH_POSTCONDITION(((uint64_t)b << 32) | c, aws_string_is_valid(str)); |
971 | } |
972 | |
973 | uint64_t aws_hash_byte_cursor_ptr(const void *item) { |
974 | AWS_PRECONDITION(aws_byte_cursor_is_valid(item)); |
975 | const struct aws_byte_cursor *cur = item; |
976 | |
977 | /* first digits of pi in hex */ |
978 | uint32_t b = 0x3243F6A8, c = 0x885A308D; |
979 | hashlittle2(cur->ptr, cur->len, &c, &b); |
980 | AWS_RETURN_WITH_POSTCONDITION(((uint64_t)b << 32) | c, aws_byte_cursor_is_valid(cur)); |
981 | } |
982 | |
983 | uint64_t aws_hash_ptr(const void *item) { |
984 | /* Since the numeric value of the pointer is considered, not the memory behind it, 0 is an acceptable value */ |
985 | /* first digits of e in hex |
986 | * 2.b7e 1516 28ae d2a6 */ |
987 | uint32_t b = 0x2b7e1516, c = 0x28aed2a6; |
988 | |
989 | hashlittle2(&item, sizeof(item), &c, &b); |
990 | |
991 | return ((uint64_t)b << 32) | c; |
992 | } |
993 | |
994 | bool aws_hash_callback_c_str_eq(const void *a, const void *b) { |
995 | AWS_PRECONDITION(aws_c_string_is_valid(a)); |
996 | AWS_PRECONDITION(aws_c_string_is_valid(b)); |
997 | bool rval = !strcmp(a, b); |
998 | AWS_RETURN_WITH_POSTCONDITION(rval, aws_c_string_is_valid(a) && aws_c_string_is_valid(b)); |
999 | } |
1000 | |
1001 | bool aws_hash_callback_string_eq(const void *a, const void *b) { |
1002 | AWS_PRECONDITION(aws_string_is_valid(a)); |
1003 | AWS_PRECONDITION(aws_string_is_valid(b)); |
1004 | bool rval = aws_string_eq(a, b); |
1005 | AWS_RETURN_WITH_POSTCONDITION(rval, aws_c_string_is_valid(a) && aws_c_string_is_valid(b)); |
1006 | } |
1007 | |
1008 | void aws_hash_callback_string_destroy(void *a) { |
1009 | AWS_PRECONDITION(aws_string_is_valid(a)); |
1010 | aws_string_destroy(a); |
1011 | } |
1012 | |
1013 | bool aws_ptr_eq(const void *a, const void *b) { |
1014 | return a == b; |
1015 | } |
1016 | |
1017 | /** |
1018 | * Best-effort check of hash_table_state data-structure invariants |
1019 | * Some invariants, such as that the number of entries is actually the |
1020 | * same as the entry_count field, would require a loop to check |
1021 | */ |
1022 | bool aws_hash_table_is_valid(const struct aws_hash_table *map) { |
1023 | return map && map->p_impl && hash_table_state_is_valid(map->p_impl); |
1024 | } |
1025 | |
1026 | /** |
1027 | * Best-effort check of hash_table_state data-structure invariants |
1028 | * Some invariants, such as that the number of entries is actually the |
1029 | * same as the entry_count field, would require a loop to check |
1030 | */ |
1031 | bool hash_table_state_is_valid(const struct hash_table_state *map) { |
1032 | if (!map) { |
1033 | return false; |
1034 | } |
1035 | bool hash_fn_nonnull = (map->hash_fn != NULL); |
1036 | bool equals_fn_nonnull = (map->equals_fn != NULL); |
1037 | /*destroy_key_fn and destroy_value_fn are both allowed to be NULL*/ |
1038 | bool alloc_nonnull = (map->alloc != NULL); |
1039 | bool size_at_least_two = (map->size >= 2); |
1040 | bool size_is_power_of_two = aws_is_power_of_two(map->size); |
1041 | bool entry_count = (map->entry_count <= map->max_load); |
1042 | bool max_load = (map->max_load < map->size); |
1043 | bool mask_is_correct = (map->mask == (map->size - 1)); |
1044 | bool max_load_factor_bounded = map->max_load_factor == 0.95; //(map->max_load_factor < 1.0); |
1045 | bool slots_allocated = AWS_MEM_IS_WRITABLE(&map->slots[0], sizeof(map->slots[0]) * map->size); |
1046 | |
1047 | return hash_fn_nonnull && equals_fn_nonnull && alloc_nonnull && size_at_least_two && size_is_power_of_two && |
1048 | entry_count && max_load && mask_is_correct && max_load_factor_bounded && slots_allocated; |
1049 | } |
1050 | |
1051 | /** |
1052 | * Given a pointer to a hash_iter, checks that it is well-formed, with all data-structure invariants. |
1053 | */ |
1054 | bool aws_hash_iter_is_valid(const struct aws_hash_iter *iter) { |
1055 | if (!iter) { |
1056 | return false; |
1057 | } |
1058 | if (!iter->map) { |
1059 | return false; |
1060 | } |
1061 | if (!aws_hash_table_is_valid(iter->map)) { |
1062 | return false; |
1063 | } |
1064 | if (iter->limit > iter->map->p_impl->size) { |
1065 | return false; |
1066 | } |
1067 | |
1068 | switch (iter->status) { |
1069 | case AWS_HASH_ITER_STATUS_DONE: |
1070 | /* Done iff slot == limit */ |
1071 | return iter->slot == iter->limit; |
1072 | case AWS_HASH_ITER_STATUS_DELETE_CALLED: |
1073 | /* iter->slot can underflow to SIZE_MAX after a delete |
1074 | * see the comments for aws_hash_iter_delete() */ |
1075 | return iter->slot <= iter->limit || iter->slot == SIZE_MAX; |
1076 | case AWS_HASH_ITER_STATUS_READY_FOR_USE: |
1077 | /* A slot must point to a valid location (i.e. hash_code != 0) */ |
1078 | return iter->slot < iter->limit && iter->map->p_impl->slots[iter->slot].hash_code != 0; |
1079 | } |
1080 | /* Invalid status code */ |
1081 | return false; |
1082 | } |
1083 | |
1084 | /** |
1085 | * Determine the total number of bytes needed for a hash-table with |
1086 | * "size" slots. If the result would overflow a size_t, return |
1087 | * AWS_OP_ERR; otherwise, return AWS_OP_SUCCESS with the result in |
1088 | * "required_bytes". |
1089 | */ |
1090 | int hash_table_state_required_bytes(size_t size, size_t *required_bytes) { |
1091 | |
1092 | size_t elemsize; |
1093 | if (aws_mul_size_checked(size, sizeof(struct hash_table_entry), &elemsize)) { |
1094 | return AWS_OP_ERR; |
1095 | } |
1096 | |
1097 | if (aws_add_size_checked(elemsize, sizeof(struct hash_table_state), required_bytes)) { |
1098 | return AWS_OP_ERR; |
1099 | } |
1100 | |
1101 | return AWS_OP_SUCCESS; |
1102 | } |
1103 | |