1/*
2 * Copyright 2010-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License").
5 * You may not use this file except in compliance with the License.
6 * A copy of the License is located at
7 *
8 * http://aws.amazon.com/apache2.0
9 *
10 * or in the "license" file accompanying this file. This file is distributed
11 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12 * express or implied. See the License for the specific language governing
13 * permissions and limitations under the License.
14 */
15
16/* For more information on how the RH hash works and in particular how we do
17 * deletions, see:
18 * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
19 */
20
21#include <aws/common/hash_table.h>
22#include <aws/common/math.h>
23#include <aws/common/private/hash_table_impl.h>
24#include <aws/common/string.h>
25
26#include <limits.h>
27#include <stdio.h>
28#include <stdlib.h>
29
30/* Include lookup3.c so we can (potentially) inline it and make use of the mix()
31 * macro. */
32#include <aws/common/private/lookup3.inl>
33
34static void s_suppress_unused_lookup3_func_warnings(void) {
35 /* We avoid making changes to lookup3 if we can avoid it, but since it has functions
36 * we're not using, reference them somewhere to suppress the unused function warning.
37 */
38 (void)hashword;
39 (void)hashword2;
40 (void)hashlittle;
41 (void)hashbig;
42}
43
44/**
45 * Calculate the hash for the given key.
46 * Ensures a reasonable semantics for null keys.
47 * Ensures that no object ever hashes to 0, which is the sentinal value for an empty hash element.
48 */
49static uint64_t s_hash_for(struct hash_table_state *state, const void *key) {
50 AWS_PRECONDITION(hash_table_state_is_valid(state));
51 s_suppress_unused_lookup3_func_warnings();
52
53 if (key == NULL) {
54 /* The best answer */
55 return 42;
56 }
57
58 uint64_t hash_code = state->hash_fn(key);
59 if (!hash_code) {
60 hash_code = 1;
61 }
62 AWS_RETURN_WITH_POSTCONDITION(hash_code, hash_code != 0);
63}
64
65/**
66 * Check equality of two objects, with a reasonable semantics for null.
67 */
68static bool s_safe_eq_check(aws_hash_callback_eq_fn *equals_fn, const void *a, const void *b) {
69 /* Short circuit if the pointers are the same */
70 if (a == b) {
71 return true;
72 }
73 /* If one but not both are null, the objects are not equal */
74 if (a == NULL || b == NULL) {
75 return false;
76 }
77 /* If both are non-null, call the underlying equals fn */
78 return equals_fn(a, b);
79}
80
81/**
82 * Check equality of two hash keys, with a reasonable semantics for null keys.
83 */
84static bool s_hash_keys_eq(struct hash_table_state *state, const void *a, const void *b) {
85 AWS_PRECONDITION(hash_table_state_is_valid(state));
86 bool rval = s_safe_eq_check(state->equals_fn, a, b);
87 AWS_RETURN_WITH_POSTCONDITION(rval, hash_table_state_is_valid(state));
88}
89
90static size_t s_index_for(struct hash_table_state *map, struct hash_table_entry *entry) {
91 AWS_PRECONDITION(hash_table_state_is_valid(map));
92 size_t index = entry - map->slots;
93 AWS_RETURN_WITH_POSTCONDITION(index, index < map->size && hash_table_state_is_valid(map));
94}
95
96#if 0
97/* Useful debugging code for anyone working on this in the future */
98static uint64_t s_distance(struct hash_table_state *state, int index) {
99 return (index - state->slots[index].hash_code) & state->mask;
100}
101
102void hash_dump(struct aws_hash_table *tbl) {
103 struct hash_table_state *state = tbl->p_impl;
104
105 printf("Dumping hash table contents:\n");
106
107 for (int i = 0; i < state->size; i++) {
108 printf("%7d: ", i);
109 struct hash_table_entry *e = &state->slots[i];
110 if (!e->hash_code) {
111 printf("EMPTY\n");
112 } else {
113 printf("k: %p v: %p hash_code: %lld displacement: %lld\n",
114 e->element.key, e->element.value, e->hash_code,
115 (i - e->hash_code) & state->mask);
116 }
117 }
118}
119#endif
120
121#if 0
122/* Not currently exposed as an API. Should we have something like this? Useful for benchmarks */
123AWS_COMMON_API
124void aws_hash_table_print_stats(struct aws_hash_table *table) {
125 struct hash_table_state *state = table->p_impl;
126 uint64_t total_disp = 0;
127 uint64_t max_disp = 0;
128
129 printf("\n=== Hash table statistics ===\n");
130 printf("Table size: %zu/%zu (max load %zu, remaining %zu)\n", state->entry_count, state->size, state->max_load, state->max_load - state->entry_count);
131 printf("Load factor: %02.2lf%% (max %02.2lf%%)\n",
132 100.0 * ((double)state->entry_count / (double)state->size),
133 state->max_load_factor);
134
135 for (size_t i = 0; i < state->size; i++) {
136 if (state->slots[i].hash_code) {
137 int displacement = distance(state, i);
138 total_disp += displacement;
139 if (displacement > max_disp) {
140 max_disp = displacement;
141 }
142 }
143 }
144
145 size_t *disp_counts = calloc(sizeof(*disp_counts), max_disp + 1);
146
147 for (size_t i = 0; i < state->size; i++) {
148 if (state->slots[i].hash_code) {
149 disp_counts[distance(state, i)]++;
150 }
151 }
152
153 uint64_t median = 0;
154 uint64_t passed = 0;
155 for (uint64_t i = 0; i <= max_disp && passed < total_disp / 2; i++) {
156 median = i;
157 passed += disp_counts[i];
158 }
159
160 printf("Displacement statistics: Avg %02.2lf max %llu median %llu\n", (double)total_disp / (double)state->entry_count, max_disp, median);
161 for (uint64_t i = 0; i <= max_disp; i++) {
162 printf("Displacement %2lld: %zu entries\n", i, disp_counts[i]);
163 }
164 free(disp_counts);
165 printf("\n");
166}
167#endif
168
169size_t aws_hash_table_get_entry_count(const struct aws_hash_table *map) {
170 struct hash_table_state *state = map->p_impl;
171 return state->entry_count;
172}
173
174/* Given a header template, allocates space for a hash table of the appropriate
175 * size, and copies the state header into this allocated memory, which is
176 * returned.
177 */
178static struct hash_table_state *s_alloc_state(const struct hash_table_state *template) {
179 size_t required_bytes;
180 if (hash_table_state_required_bytes(template->size, &required_bytes)) {
181 return NULL;
182 }
183
184 /* An empty slot has hashcode 0. So this marks all slots as empty */
185 struct hash_table_state *state = aws_mem_calloc(template->alloc, 1, required_bytes);
186
187 if (state == NULL) {
188 return state;
189 }
190
191 *state = *template;
192 return state;
193}
194
195/* Computes the correct size and max_load based on a requested size. */
196static int s_update_template_size(struct hash_table_state *template, size_t expected_elements) {
197 size_t min_size = expected_elements;
198
199 if (min_size < 2) {
200 min_size = 2;
201 }
202
203 /* size is always a power of 2 */
204 size_t size;
205 if (aws_round_up_to_power_of_two(min_size, &size)) {
206 return AWS_OP_ERR;
207 }
208
209 /* Update the template once we've calculated everything successfully */
210 template->size = size;
211 template->max_load = (size_t)(template->max_load_factor * (double)template->size);
212 /* Ensure that there is always at least one empty slot in the hash table */
213 if (template->max_load >= size) {
214 template->max_load = size - 1;
215 }
216
217 /* Since size is a power of 2: (index & (size - 1)) == (index % size) */
218 template->mask = size - 1;
219
220 return AWS_OP_SUCCESS;
221}
222
223int aws_hash_table_init(
224 struct aws_hash_table *map,
225 struct aws_allocator *alloc,
226 size_t size,
227 aws_hash_fn *hash_fn,
228 aws_hash_callback_eq_fn *equals_fn,
229 aws_hash_callback_destroy_fn *destroy_key_fn,
230 aws_hash_callback_destroy_fn *destroy_value_fn) {
231 AWS_PRECONDITION(map != NULL);
232 AWS_PRECONDITION(alloc != NULL);
233 AWS_PRECONDITION(hash_fn != NULL);
234 AWS_PRECONDITION(equals_fn != NULL);
235
236 struct hash_table_state template;
237 template.hash_fn = hash_fn;
238 template.equals_fn = equals_fn;
239 template.destroy_key_fn = destroy_key_fn;
240 template.destroy_value_fn = destroy_value_fn;
241 template.alloc = alloc;
242
243 template.entry_count = 0;
244 template.max_load_factor = 0.95; /* TODO - make configurable? */
245
246 if (s_update_template_size(&template, size)) {
247 return AWS_OP_ERR;
248 }
249 map->p_impl = s_alloc_state(&template);
250
251 if (!map->p_impl) {
252 return AWS_OP_ERR;
253 }
254
255 AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
256}
257
258void aws_hash_table_clean_up(struct aws_hash_table *map) {
259 AWS_PRECONDITION(map != NULL);
260 AWS_PRECONDITION(
261 map->p_impl == NULL || aws_hash_table_is_valid(map),
262 "Input aws_hash_table [map] must be valid or hash_table_state pointer [map->p_impl] must be NULL, in case "
263 "aws_hash_table_clean_up was called twice.");
264 struct hash_table_state *state = map->p_impl;
265
266 /* Ensure that we're idempotent */
267 if (!state) {
268 return;
269 }
270
271 aws_hash_table_clear(map);
272 aws_mem_release(map->p_impl->alloc, map->p_impl);
273
274 map->p_impl = NULL;
275 AWS_POSTCONDITION(map->p_impl == NULL);
276}
277
278void aws_hash_table_swap(struct aws_hash_table *AWS_RESTRICT a, struct aws_hash_table *AWS_RESTRICT b) {
279 AWS_PRECONDITION(a != b);
280 struct aws_hash_table tmp = *a;
281 *a = *b;
282 *b = tmp;
283}
284
285void aws_hash_table_move(struct aws_hash_table *AWS_RESTRICT to, struct aws_hash_table *AWS_RESTRICT from) {
286 AWS_PRECONDITION(to != NULL);
287 AWS_PRECONDITION(from != NULL);
288 AWS_PRECONDITION(to != from);
289 AWS_PRECONDITION(aws_hash_table_is_valid(from));
290
291 *to = *from;
292 AWS_ZERO_STRUCT(*from);
293 AWS_POSTCONDITION(aws_hash_table_is_valid(to));
294}
295
296/* Tries to find where the requested key is or where it should go if put.
297 * Returns AWS_ERROR_SUCCESS if the item existed (leaving it in *entry),
298 * or AWS_ERROR_HASHTBL_ITEM_NOT_FOUND if it did not (putting its destination
299 * in *entry). Note that this does not take care of displacing whatever was in
300 * that entry before.
301 *
302 * probe_idx is set to the probe index of the entry found.
303 */
304
305static int s_find_entry1(
306 struct hash_table_state *state,
307 uint64_t hash_code,
308 const void *key,
309 struct hash_table_entry **p_entry,
310 size_t *p_probe_idx);
311
312/* Inlined fast path: Check the first slot, only. */
313/* TODO: Force inlining? */
314static int inline s_find_entry(
315 struct hash_table_state *state,
316 uint64_t hash_code,
317 const void *key,
318 struct hash_table_entry **p_entry,
319 size_t *p_probe_idx) {
320 struct hash_table_entry *entry = &state->slots[hash_code & state->mask];
321
322 if (entry->hash_code == 0) {
323 if (p_probe_idx) {
324 *p_probe_idx = 0;
325 }
326 *p_entry = entry;
327 return AWS_ERROR_HASHTBL_ITEM_NOT_FOUND;
328 }
329
330 if (entry->hash_code == hash_code && s_hash_keys_eq(state, key, entry->element.key)) {
331 if (p_probe_idx) {
332 *p_probe_idx = 0;
333 }
334 *p_entry = entry;
335 return AWS_OP_SUCCESS;
336 }
337
338 return s_find_entry1(state, hash_code, key, p_entry, p_probe_idx);
339}
340
341static int s_find_entry1(
342 struct hash_table_state *state,
343 uint64_t hash_code,
344 const void *key,
345 struct hash_table_entry **p_entry,
346 size_t *p_probe_idx) {
347 size_t probe_idx = 1;
348 /* If we find a deleted entry, we record that index and return it as our probe index (i.e. we'll keep searching to
349 * see if it already exists, but if not we'll overwrite the deleted entry).
350 */
351
352 int rv;
353 struct hash_table_entry *entry;
354 /* This loop is guaranteed to terminate because entry_probe is bounded above by state->mask (i.e. state->size - 1).
355 * Since probe_idx increments every loop iteration, it will become larger than entry_probe after at most state->size
356 * transitions and the loop will exit (if it hasn't already)
357 */
358 while (1) {
359#ifdef CBMC
360# pragma CPROVER check push
361# pragma CPROVER check disable "unsigned-overflow"
362#endif
363 uint64_t index = (hash_code + probe_idx) & state->mask;
364#ifdef CBMC
365# pragma CPROVER check pop
366#endif
367 entry = &state->slots[index];
368 if (!entry->hash_code) {
369 rv = AWS_ERROR_HASHTBL_ITEM_NOT_FOUND;
370 break;
371 }
372
373 if (entry->hash_code == hash_code && s_hash_keys_eq(state, key, entry->element.key)) {
374 rv = AWS_ERROR_SUCCESS;
375 break;
376 }
377
378#ifdef CBMC
379# pragma CPROVER check push
380# pragma CPROVER check disable "unsigned-overflow"
381#endif
382 uint64_t entry_probe = (index - entry->hash_code) & state->mask;
383#ifdef CBMC
384# pragma CPROVER check pop
385#endif
386
387 if (entry_probe < probe_idx) {
388 /* We now know that our target entry cannot exist; if it did exist,
389 * it would be at the current location as it has a higher probe
390 * length than the entry we are examining and thus would have
391 * preempted that item
392 */
393 rv = AWS_ERROR_HASHTBL_ITEM_NOT_FOUND;
394 break;
395 }
396
397 probe_idx++;
398 }
399
400 *p_entry = entry;
401 if (p_probe_idx) {
402 *p_probe_idx = probe_idx;
403 }
404
405 return rv;
406}
407
408int aws_hash_table_find(const struct aws_hash_table *map, const void *key, struct aws_hash_element **p_elem) {
409 AWS_PRECONDITION(aws_hash_table_is_valid(map));
410 AWS_PRECONDITION(AWS_OBJECT_PTR_IS_WRITABLE(p_elem), "Input aws_hash_element pointer [p_elem] must be writable.");
411
412 struct hash_table_state *state = map->p_impl;
413 uint64_t hash_code = s_hash_for(state, key);
414 struct hash_table_entry *entry;
415
416 int rv = s_find_entry(state, hash_code, key, &entry, NULL);
417
418 if (rv == AWS_ERROR_SUCCESS) {
419 *p_elem = &entry->element;
420 } else {
421 *p_elem = NULL;
422 }
423 AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
424}
425
426/**
427 * Attempts to find a home for the given entry.
428 * If the entry was empty (i.e. hash-code of 0), then the function does nothing and returns NULL
429 * Otherwise, it emplaces the item, and returns a pointer to the newly emplaced entry.
430 * This function is only called after the hash-table has been expanded to fit the new element,
431 * so it should never fail.
432 */
433static struct hash_table_entry *s_emplace_item(
434 struct hash_table_state *state,
435 struct hash_table_entry entry,
436 size_t probe_idx) {
437 AWS_PRECONDITION(hash_table_state_is_valid(state));
438
439 if (entry.hash_code == 0) {
440 AWS_RETURN_WITH_POSTCONDITION(NULL, hash_table_state_is_valid(state));
441 }
442
443 struct hash_table_entry *rval = NULL;
444
445 /* Since a valid hash_table has at least one empty element, this loop will always terminate in at most linear time
446 */
447 while (entry.hash_code != 0) {
448#ifdef CBMC
449# pragma CPROVER check push
450# pragma CPROVER check disable "unsigned-overflow"
451#endif
452 size_t index = (size_t)(entry.hash_code + probe_idx) & state->mask;
453#ifdef CBMC
454# pragma CPROVER check pop
455#endif
456 struct hash_table_entry *victim = &state->slots[index];
457
458#ifdef CBMC
459# pragma CPROVER check push
460# pragma CPROVER check disable "unsigned-overflow"
461#endif
462 size_t victim_probe_idx = (size_t)(index - victim->hash_code) & state->mask;
463#ifdef CBMC
464# pragma CPROVER check pop
465#endif
466
467 if (!victim->hash_code || victim_probe_idx < probe_idx) {
468 /* The first thing we emplace is the entry itself. A pointer to its location becomes the rval */
469 if (!rval) {
470 rval = victim;
471 }
472
473 struct hash_table_entry tmp = *victim;
474 *victim = entry;
475 entry = tmp;
476
477 probe_idx = victim_probe_idx + 1;
478 } else {
479 probe_idx++;
480 }
481 }
482
483 AWS_RETURN_WITH_POSTCONDITION(
484 rval,
485 hash_table_state_is_valid(state) && rval >= &state->slots[0] && rval < &state->slots[state->size],
486 "Output hash_table_entry pointer [rval] must point in the slots of [state].");
487}
488
489static int s_expand_table(struct aws_hash_table *map) {
490 struct hash_table_state *old_state = map->p_impl;
491 struct hash_table_state template = *old_state;
492
493 size_t new_size;
494 if (aws_mul_size_checked(template.size, 2, &new_size)) {
495 return AWS_OP_ERR;
496 }
497
498 if (s_update_template_size(&template, new_size)) {
499 return AWS_OP_ERR;
500 }
501
502 struct hash_table_state *new_state = s_alloc_state(&template);
503 if (!new_state) {
504 return AWS_OP_ERR;
505 }
506
507 for (size_t i = 0; i < old_state->size; i++) {
508 struct hash_table_entry entry = old_state->slots[i];
509 if (entry.hash_code) {
510 /* We can directly emplace since we know we won't put the same item twice */
511 s_emplace_item(new_state, entry, 0);
512 }
513 }
514
515 map->p_impl = new_state;
516 aws_mem_release(new_state->alloc, old_state);
517
518 return AWS_OP_SUCCESS;
519}
520
521int aws_hash_table_create(
522 struct aws_hash_table *map,
523 const void *key,
524 struct aws_hash_element **p_elem,
525 int *was_created) {
526
527 struct hash_table_state *state = map->p_impl;
528 uint64_t hash_code = s_hash_for(state, key);
529 struct hash_table_entry *entry;
530 size_t probe_idx;
531 int ignored;
532 if (!was_created) {
533 was_created = &ignored;
534 }
535
536 int rv = s_find_entry(state, hash_code, key, &entry, &probe_idx);
537
538 if (rv == AWS_ERROR_SUCCESS) {
539 if (p_elem) {
540 *p_elem = &entry->element;
541 }
542 *was_created = 0;
543 return AWS_OP_SUCCESS;
544 }
545
546 /* Okay, we need to add an entry. Check the load factor first. */
547 size_t incr_entry_count;
548 if (aws_add_size_checked(state->entry_count, 1, &incr_entry_count)) {
549 return AWS_OP_ERR;
550 }
551 if (incr_entry_count > state->max_load) {
552 rv = s_expand_table(map);
553 if (rv != AWS_OP_SUCCESS) {
554 /* Any error was already raised in expand_table */
555 return rv;
556 }
557 state = map->p_impl;
558 /* If we expanded the table, we need to discard the probe index returned from find_entry,
559 * as it's likely that we can find a more desirable slot. If we don't, then later gets will
560 * terminate before reaching our probe index.
561
562 * n.b. currently we ignore this probe_idx subsequently, but leaving
563 this here so we don't
564 * forget when we optimize later. */
565 probe_idx = 0;
566 }
567
568 state->entry_count++;
569 struct hash_table_entry new_entry;
570 new_entry.element.key = key;
571 new_entry.element.value = NULL;
572 new_entry.hash_code = hash_code;
573
574 entry = s_emplace_item(state, new_entry, probe_idx);
575
576 if (p_elem) {
577 *p_elem = &entry->element;
578 }
579
580 *was_created = 1;
581
582 return AWS_OP_SUCCESS;
583}
584
585AWS_COMMON_API
586int aws_hash_table_put(struct aws_hash_table *map, const void *key, void *value, int *was_created) {
587 struct aws_hash_element *p_elem;
588 int was_created_fallback;
589
590 if (!was_created) {
591 was_created = &was_created_fallback;
592 }
593
594 if (aws_hash_table_create(map, key, &p_elem, was_created)) {
595 return AWS_OP_ERR;
596 }
597
598 /*
599 * aws_hash_table_create might resize the table, which results in map->p_impl changing.
600 * It is therefore important to wait to read p_impl until after we return.
601 */
602 struct hash_table_state *state = map->p_impl;
603
604 if (!*was_created) {
605 if (p_elem->key != key && state->destroy_key_fn) {
606 state->destroy_key_fn((void *)p_elem->key);
607 }
608
609 if (state->destroy_value_fn) {
610 state->destroy_value_fn((void *)p_elem->value);
611 }
612 }
613
614 p_elem->key = key;
615 p_elem->value = value;
616
617 return AWS_OP_SUCCESS;
618}
619
620/* Clears an entry. Does _not_ invoke destructor callbacks.
621 * Returns the last slot touched (note that if we wrap, we'll report an index
622 * lower than the original entry's index)
623 */
624static size_t s_remove_entry(struct hash_table_state *state, struct hash_table_entry *entry) {
625 AWS_PRECONDITION(hash_table_state_is_valid(state));
626 AWS_PRECONDITION(state->entry_count > 0);
627 AWS_PRECONDITION(
628 entry >= &state->slots[0] && entry < &state->slots[state->size],
629 "Input hash_table_entry [entry] pointer must point in the available slots.");
630 state->entry_count--;
631
632 /* Shift subsequent entries back until we find an entry that belongs at its
633 * current position. This is important to ensure that subsequent searches
634 * don't terminate at the removed element.
635 */
636 size_t index = s_index_for(state, entry);
637 /* There is always at least one empty slot in the hash table, so this loop always terminates */
638 while (1) {
639 size_t next_index = (index + 1) & state->mask;
640
641 /* If we hit an empty slot, stop */
642 if (!state->slots[next_index].hash_code) {
643 break;
644 }
645 /* If the next slot is at the start of the probe sequence, stop.
646 * We know that nothing with an earlier home slot is after this;
647 * otherwise this index-zero entry would have been evicted from its
648 * home.
649 */
650 if ((state->slots[next_index].hash_code & state->mask) == next_index) {
651 break;
652 }
653
654 /* Okay, shift this one back */
655 state->slots[index] = state->slots[next_index];
656 index = next_index;
657 }
658
659 /* Clear the entry we shifted out of */
660 AWS_ZERO_STRUCT(state->slots[index]);
661 AWS_RETURN_WITH_POSTCONDITION(index, hash_table_state_is_valid(state) && index <= state->size);
662}
663
664int aws_hash_table_remove(
665 struct aws_hash_table *map,
666 const void *key,
667 struct aws_hash_element *p_value,
668 int *was_present) {
669 AWS_PRECONDITION(aws_hash_table_is_valid(map));
670 AWS_PRECONDITION(
671 p_value == NULL || AWS_OBJECT_PTR_IS_WRITABLE(p_value), "Input pointer [p_value] must be NULL or writable.");
672 AWS_PRECONDITION(
673 was_present == NULL || AWS_OBJECT_PTR_IS_WRITABLE(was_present),
674 "Input pointer [was_present] must be NULL or writable.");
675
676 struct hash_table_state *state = map->p_impl;
677 uint64_t hash_code = s_hash_for(state, key);
678 struct hash_table_entry *entry;
679 int ignored;
680
681 if (!was_present) {
682 was_present = &ignored;
683 }
684
685 int rv = s_find_entry(state, hash_code, key, &entry, NULL);
686
687 if (rv != AWS_ERROR_SUCCESS) {
688 *was_present = 0;
689 AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
690 }
691
692 *was_present = 1;
693
694 if (p_value) {
695 *p_value = entry->element;
696 } else {
697 if (state->destroy_key_fn) {
698 state->destroy_key_fn((void *)entry->element.key);
699 }
700 if (state->destroy_value_fn) {
701 state->destroy_value_fn(entry->element.value);
702 }
703 }
704 s_remove_entry(state, entry);
705
706 AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
707}
708
709int aws_hash_table_remove_element(struct aws_hash_table *map, struct aws_hash_element *p_value) {
710 AWS_PRECONDITION(aws_hash_table_is_valid(map));
711 AWS_PRECONDITION(p_value != NULL);
712
713 struct hash_table_state *state = map->p_impl;
714 struct hash_table_entry *entry = AWS_CONTAINER_OF(p_value, struct hash_table_entry, element);
715
716 s_remove_entry(state, entry);
717
718 AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
719}
720
721int aws_hash_table_foreach(
722 struct aws_hash_table *map,
723 int (*callback)(void *context, struct aws_hash_element *pElement),
724 void *context) {
725
726 for (struct aws_hash_iter iter = aws_hash_iter_begin(map); !aws_hash_iter_done(&iter); aws_hash_iter_next(&iter)) {
727 int rv = callback(context, &iter.element);
728
729 if (rv & AWS_COMMON_HASH_TABLE_ITER_DELETE) {
730 aws_hash_iter_delete(&iter, false);
731 }
732
733 if (!(rv & AWS_COMMON_HASH_TABLE_ITER_CONTINUE)) {
734 break;
735 }
736 }
737
738 return AWS_OP_SUCCESS;
739}
740
741bool aws_hash_table_eq(
742 const struct aws_hash_table *a,
743 const struct aws_hash_table *b,
744 aws_hash_callback_eq_fn *value_eq) {
745 AWS_PRECONDITION(aws_hash_table_is_valid(a));
746 AWS_PRECONDITION(aws_hash_table_is_valid(b));
747 AWS_PRECONDITION(value_eq != NULL);
748
749 if (aws_hash_table_get_entry_count(a) != aws_hash_table_get_entry_count(b)) {
750 AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
751 }
752
753 /*
754 * Now that we have established that the two tables have the same number of
755 * entries, we can simply iterate one and compare against the same key in
756 * the other.
757 */
758 for (size_t i = 0; i < a->p_impl->size; ++i) {
759 const struct hash_table_entry *const a_entry = &a->p_impl->slots[i];
760 if (a_entry->hash_code == 0) {
761 continue;
762 }
763
764 struct aws_hash_element *b_element = NULL;
765
766 aws_hash_table_find(b, a_entry->element.key, &b_element);
767
768 if (!b_element) {
769 /* Key is present in A only */
770 AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
771 }
772
773 if (!s_safe_eq_check(value_eq, a_entry->element.value, b_element->value)) {
774 AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
775 }
776 }
777 AWS_RETURN_WITH_POSTCONDITION(true, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
778}
779
780/**
781 * Given an iterator, and a start slot, find the next available filled slot if it exists
782 * Otherwise, return an iter that will return true for aws_hash_iter_done().
783 * Note that aws_hash_iter_is_valid() need not hold on entry to the function, since
784 * it can be called on a partially constructed iter from aws_hash_iter_begin().
785 *
786 * Note that calling this on an iterator which is "done" is idempotent: it will return another
787 * iterator which is "done".
788 */
789static inline void s_get_next_element(struct aws_hash_iter *iter, size_t start_slot) {
790 AWS_PRECONDITION(iter != NULL);
791 AWS_PRECONDITION(aws_hash_table_is_valid(iter->map));
792 struct hash_table_state *state = iter->map->p_impl;
793 size_t limit = iter->limit;
794
795 for (size_t i = start_slot; i < limit; i++) {
796 struct hash_table_entry *entry = &state->slots[i];
797
798 if (entry->hash_code) {
799 iter->element = entry->element;
800 iter->slot = i;
801 iter->status = AWS_HASH_ITER_STATUS_READY_FOR_USE;
802 return;
803 }
804 }
805 iter->element.key = NULL;
806 iter->element.value = NULL;
807 iter->slot = iter->limit;
808 iter->status = AWS_HASH_ITER_STATUS_DONE;
809 AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
810}
811
812struct aws_hash_iter aws_hash_iter_begin(const struct aws_hash_table *map) {
813 AWS_PRECONDITION(aws_hash_table_is_valid(map));
814 struct hash_table_state *state = map->p_impl;
815 struct aws_hash_iter iter;
816 AWS_ZERO_STRUCT(iter);
817 iter.map = map;
818 iter.limit = state->size;
819 s_get_next_element(&iter, 0);
820 AWS_RETURN_WITH_POSTCONDITION(
821 iter,
822 aws_hash_iter_is_valid(&iter) &&
823 (iter.status == AWS_HASH_ITER_STATUS_DONE || iter.status == AWS_HASH_ITER_STATUS_READY_FOR_USE),
824 "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE.");
825}
826
827bool aws_hash_iter_done(const struct aws_hash_iter *iter) {
828 AWS_PRECONDITION(aws_hash_iter_is_valid(iter));
829 AWS_PRECONDITION(
830 iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE,
831 "Input aws_hash_iter [iter] must either be done, or ready to use.");
832 /*
833 * SIZE_MAX is a valid (non-terminal) value for iter->slot in the event that
834 * we delete slot 0. See comments in aws_hash_iter_delete.
835 *
836 * As such we must use == rather than >= here.
837 */
838 bool rval = (iter->slot == iter->limit);
839 AWS_POSTCONDITION(
840 iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE,
841 "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE.");
842 AWS_POSTCONDITION(
843 rval == (iter->status == AWS_HASH_ITER_STATUS_DONE),
844 "Output bool [rval] must be true if and only if the status of [iter] is DONE.");
845 AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
846 return rval;
847}
848
849void aws_hash_iter_next(struct aws_hash_iter *iter) {
850 AWS_PRECONDITION(aws_hash_iter_is_valid(iter));
851#ifdef CBMC
852# pragma CPROVER check push
853# pragma CPROVER check disable "unsigned-overflow"
854#endif
855 s_get_next_element(iter, iter->slot + 1);
856#ifdef CBMC
857# pragma CPROVER check pop
858#endif
859 AWS_POSTCONDITION(
860 iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE,
861 "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE.");
862 AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
863}
864
865void aws_hash_iter_delete(struct aws_hash_iter *iter, bool destroy_contents) {
866 AWS_PRECONDITION(
867 iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE, "Input aws_hash_iter [iter] must be ready for use.");
868 AWS_PRECONDITION(aws_hash_iter_is_valid(iter));
869 AWS_PRECONDITION(
870 iter->map->p_impl->entry_count > 0,
871 "The hash_table_state pointed by input [iter] must contain at least one entry.");
872
873 struct hash_table_state *state = iter->map->p_impl;
874 if (destroy_contents) {
875 if (state->destroy_key_fn) {
876 state->destroy_key_fn((void *)iter->element.key);
877 }
878 if (state->destroy_value_fn) {
879 state->destroy_value_fn(iter->element.value);
880 }
881 }
882
883 size_t last_index = s_remove_entry(state, &state->slots[iter->slot]);
884
885 /* If we shifted elements that are not part of the window we intend to iterate
886 * over, it means we shifted an element that we already visited into the
887 * iter->limit - 1 position. To avoid double iteration, we'll now reduce the
888 * limit to compensate.
889 *
890 * Note that last_index cannot equal iter->slot, because slots[iter->slot]
891 * is empty before we start walking the table.
892 */
893 if (last_index < iter->slot || last_index >= iter->limit) {
894 iter->limit--;
895 }
896
897 /*
898 * After removing this entry, the next entry might be in the same slot, or
899 * in some later slot, or we might have no further entries.
900 *
901 * We also expect that the caller will call aws_hash_iter_done and aws_hash_iter_next
902 * after this delete call. This gets a bit tricky if we just deleted the value
903 * in slot 0, and a new value has shifted in.
904 *
905 * To deal with this, we'll just step back one slot, and let _next start iteration
906 * at our current slot. Note that if we just deleted slot 0, this will result in
907 * underflowing to SIZE_MAX; we have to take care in aws_hash_iter_done to avoid
908 * treating this as an end-of-iteration condition.
909 */
910#ifdef CBMC
911# pragma CPROVER check push
912# pragma CPROVER check disable "unsigned-overflow"
913#endif
914 iter->slot--;
915#ifdef CBMC
916# pragma CPROVER check pop
917#endif
918 iter->status = AWS_HASH_ITER_STATUS_DELETE_CALLED;
919 AWS_POSTCONDITION(
920 iter->status == AWS_HASH_ITER_STATUS_DELETE_CALLED,
921 "The status of output aws_hash_iter [iter] must be DELETE_CALLED.");
922 AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
923}
924
925void aws_hash_table_clear(struct aws_hash_table *map) {
926 AWS_PRECONDITION(aws_hash_table_is_valid(map));
927 struct hash_table_state *state = map->p_impl;
928
929 /* Check that we have at least one destructor before iterating over the table */
930 if (state->destroy_key_fn || state->destroy_value_fn) {
931 for (size_t i = 0; i < state->size; ++i) {
932 struct hash_table_entry *entry = &state->slots[i];
933 if (!entry->hash_code) {
934 continue;
935 }
936 if (state->destroy_key_fn) {
937 state->destroy_key_fn((void *)entry->element.key);
938 }
939 if (state->destroy_value_fn) {
940 state->destroy_value_fn(entry->element.value);
941 }
942 }
943 }
944 /* Since hash code 0 represents an empty slot we can just zero out the
945 * entire table. */
946 memset(state->slots, 0, sizeof(*state->slots) * state->size);
947
948 state->entry_count = 0;
949 AWS_POSTCONDITION(aws_hash_table_is_valid(map));
950}
951
952uint64_t aws_hash_c_string(const void *item) {
953 AWS_PRECONDITION(aws_c_string_is_valid(item));
954 const char *str = item;
955
956 /* first digits of pi in hex */
957 uint32_t b = 0x3243F6A8, c = 0x885A308D;
958 hashlittle2(str, strlen(str), &c, &b);
959
960 return ((uint64_t)b << 32) | c;
961}
962
963uint64_t aws_hash_string(const void *item) {
964 AWS_PRECONDITION(aws_string_is_valid(item));
965 const struct aws_string *str = item;
966
967 /* first digits of pi in hex */
968 uint32_t b = 0x3243F6A8, c = 0x885A308D;
969 hashlittle2(aws_string_bytes(str), str->len, &c, &b);
970 AWS_RETURN_WITH_POSTCONDITION(((uint64_t)b << 32) | c, aws_string_is_valid(str));
971}
972
973uint64_t aws_hash_byte_cursor_ptr(const void *item) {
974 AWS_PRECONDITION(aws_byte_cursor_is_valid(item));
975 const struct aws_byte_cursor *cur = item;
976
977 /* first digits of pi in hex */
978 uint32_t b = 0x3243F6A8, c = 0x885A308D;
979 hashlittle2(cur->ptr, cur->len, &c, &b);
980 AWS_RETURN_WITH_POSTCONDITION(((uint64_t)b << 32) | c, aws_byte_cursor_is_valid(cur));
981}
982
983uint64_t aws_hash_ptr(const void *item) {
984 /* Since the numeric value of the pointer is considered, not the memory behind it, 0 is an acceptable value */
985 /* first digits of e in hex
986 * 2.b7e 1516 28ae d2a6 */
987 uint32_t b = 0x2b7e1516, c = 0x28aed2a6;
988
989 hashlittle2(&item, sizeof(item), &c, &b);
990
991 return ((uint64_t)b << 32) | c;
992}
993
994bool aws_hash_callback_c_str_eq(const void *a, const void *b) {
995 AWS_PRECONDITION(aws_c_string_is_valid(a));
996 AWS_PRECONDITION(aws_c_string_is_valid(b));
997 bool rval = !strcmp(a, b);
998 AWS_RETURN_WITH_POSTCONDITION(rval, aws_c_string_is_valid(a) && aws_c_string_is_valid(b));
999}
1000
1001bool aws_hash_callback_string_eq(const void *a, const void *b) {
1002 AWS_PRECONDITION(aws_string_is_valid(a));
1003 AWS_PRECONDITION(aws_string_is_valid(b));
1004 bool rval = aws_string_eq(a, b);
1005 AWS_RETURN_WITH_POSTCONDITION(rval, aws_c_string_is_valid(a) && aws_c_string_is_valid(b));
1006}
1007
1008void aws_hash_callback_string_destroy(void *a) {
1009 AWS_PRECONDITION(aws_string_is_valid(a));
1010 aws_string_destroy(a);
1011}
1012
1013bool aws_ptr_eq(const void *a, const void *b) {
1014 return a == b;
1015}
1016
1017/**
1018 * Best-effort check of hash_table_state data-structure invariants
1019 * Some invariants, such as that the number of entries is actually the
1020 * same as the entry_count field, would require a loop to check
1021 */
1022bool aws_hash_table_is_valid(const struct aws_hash_table *map) {
1023 return map && map->p_impl && hash_table_state_is_valid(map->p_impl);
1024}
1025
1026/**
1027 * Best-effort check of hash_table_state data-structure invariants
1028 * Some invariants, such as that the number of entries is actually the
1029 * same as the entry_count field, would require a loop to check
1030 */
1031bool hash_table_state_is_valid(const struct hash_table_state *map) {
1032 if (!map) {
1033 return false;
1034 }
1035 bool hash_fn_nonnull = (map->hash_fn != NULL);
1036 bool equals_fn_nonnull = (map->equals_fn != NULL);
1037 /*destroy_key_fn and destroy_value_fn are both allowed to be NULL*/
1038 bool alloc_nonnull = (map->alloc != NULL);
1039 bool size_at_least_two = (map->size >= 2);
1040 bool size_is_power_of_two = aws_is_power_of_two(map->size);
1041 bool entry_count = (map->entry_count <= map->max_load);
1042 bool max_load = (map->max_load < map->size);
1043 bool mask_is_correct = (map->mask == (map->size - 1));
1044 bool max_load_factor_bounded = map->max_load_factor == 0.95; //(map->max_load_factor < 1.0);
1045 bool slots_allocated = AWS_MEM_IS_WRITABLE(&map->slots[0], sizeof(map->slots[0]) * map->size);
1046
1047 return hash_fn_nonnull && equals_fn_nonnull && alloc_nonnull && size_at_least_two && size_is_power_of_two &&
1048 entry_count && max_load && mask_is_correct && max_load_factor_bounded && slots_allocated;
1049}
1050
1051/**
1052 * Given a pointer to a hash_iter, checks that it is well-formed, with all data-structure invariants.
1053 */
1054bool aws_hash_iter_is_valid(const struct aws_hash_iter *iter) {
1055 if (!iter) {
1056 return false;
1057 }
1058 if (!iter->map) {
1059 return false;
1060 }
1061 if (!aws_hash_table_is_valid(iter->map)) {
1062 return false;
1063 }
1064 if (iter->limit > iter->map->p_impl->size) {
1065 return false;
1066 }
1067
1068 switch (iter->status) {
1069 case AWS_HASH_ITER_STATUS_DONE:
1070 /* Done iff slot == limit */
1071 return iter->slot == iter->limit;
1072 case AWS_HASH_ITER_STATUS_DELETE_CALLED:
1073 /* iter->slot can underflow to SIZE_MAX after a delete
1074 * see the comments for aws_hash_iter_delete() */
1075 return iter->slot <= iter->limit || iter->slot == SIZE_MAX;
1076 case AWS_HASH_ITER_STATUS_READY_FOR_USE:
1077 /* A slot must point to a valid location (i.e. hash_code != 0) */
1078 return iter->slot < iter->limit && iter->map->p_impl->slots[iter->slot].hash_code != 0;
1079 }
1080 /* Invalid status code */
1081 return false;
1082}
1083
1084/**
1085 * Determine the total number of bytes needed for a hash-table with
1086 * "size" slots. If the result would overflow a size_t, return
1087 * AWS_OP_ERR; otherwise, return AWS_OP_SUCCESS with the result in
1088 * "required_bytes".
1089 */
1090int hash_table_state_required_bytes(size_t size, size_t *required_bytes) {
1091
1092 size_t elemsize;
1093 if (aws_mul_size_checked(size, sizeof(struct hash_table_entry), &elemsize)) {
1094 return AWS_OP_ERR;
1095 }
1096
1097 if (aws_add_size_checked(elemsize, sizeof(struct hash_table_state), required_bytes)) {
1098 return AWS_OP_ERR;
1099 }
1100
1101 return AWS_OP_SUCCESS;
1102}
1103