hash_table.c source code [ClickHouse/contrib/aws-c-common/source/hash_table.c]

1	/*
2	* Copyright 2010-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License").
5	* You may not use this file except in compliance with the License.
6	* A copy of the License is located at
7	*
8	* http://aws.amazon.com/apache2.0
9	*
10	* or in the "license" file accompanying this file. This file is distributed
11	* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12	* express or implied. See the License for the specific language governing
13	* permissions and limitations under the License.
14	*/
15
16	/ For more information on how the RH hash works and in particular how we do*
17	* deletions, see:
18	* http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
19	*/
20
21	#include <aws/common/hash_table.h>
22	#include <aws/common/math.h>
23	#include <aws/common/private/hash_table_impl.h>
24	#include <aws/common/string.h>
25
26	#include <limits.h>
27	#include <stdio.h>
28	#include <stdlib.h>
29
30	/ Include lookup3.c so we can (potentially) inline it and make use of the mix()*
31	* macro. */
32	#include <aws/common/private/lookup3.inl>
33
34	static void s_suppress_unused_lookup3_func_warnings(void) {
35	/ We avoid making changes to lookup3 if we can avoid it, but since it has functions*
36	* we're not using, reference them somewhere to suppress the unused function warning.
37	*/
38	(void)hashword;
39	(void)hashword2;
40	(void)hashlittle;
41	(void)hashbig;
42	}
43
44	/**
45	* Calculate the hash for the given key.
46	* Ensures a reasonable semantics for null keys.
47	* Ensures that no object ever hashes to 0, which is the sentinal value for an empty hash element.
48	*/
49	static uint64_t s_hash_for(struct hash_table_state state, const* void *key) {
50	AWS_PRECONDITION(hash_table_state_is_valid(state));
51	s_suppress_unused_lookup3_func_warnings();
52
53	if (key == NULL) {
54	/ The best answer /
55	return `42`;
56	}
57
58	uint64_t hash_code = state->hash_fn(key);
59	if (!hash_code) {
60	hash_code = `1`;
61	}
62	AWS_RETURN_WITH_POSTCONDITION(hash_code, hash_code != `0`);
63	}
64
65	/**
66	* Check equality of two objects, with a reasonable semantics for null.
67	*/
68	static bool s_safe_eq_check(aws_hash_callback_eq_fn equals_fn, const* void a, const* void *b) {
69	/ Short circuit if the pointers are the same /
70	if (a == b) {
71	return true;
72	}
73	/ If one but not both are null, the objects are not equal /
74	if (a == NULL \|\| b == NULL) {
75	return false;
76	}
77	/ If both are non-null, call the underlying equals fn /
78	return equals_fn(a, b);
79	}
80
81	/**
82	* Check equality of two hash keys, with a reasonable semantics for null keys.
83	*/
84	static bool s_hash_keys_eq(struct hash_table_state state, const* void a, const* void *b) {
85	AWS_PRECONDITION(hash_table_state_is_valid(state));
86	bool rval = s_safe_eq_check(state->equals_fn, a, b);
87	AWS_RETURN_WITH_POSTCONDITION(rval, hash_table_state_is_valid(state));
88	}
89
90	static size_t s_index_for(struct hash_table_state map, struct* hash_table_entry *entry) {
91	AWS_PRECONDITION(hash_table_state_is_valid(map));
92	size_t index = entry - map->slots;
93	AWS_RETURN_WITH_POSTCONDITION(index, index < map->size && hash_table_state_is_valid(map));
94	}
95
96	#if 0
97	/ Useful debugging code for anyone working on this in the future /
98	static uint64_t s_distance(struct hash_table_state state, int* index) {
99	return (index - state->slots[index].hash_code) & state->mask;
100	}
101
102	void hash_dump(struct aws_hash_table *tbl) {
103	struct hash_table_state *state = tbl->p_impl;
104
105	printf("Dumping hash table contents:\n");
106
107	for (int i = `0`; i < state->size; i++) {
108	printf("%7d: ", i);
109	struct hash_table_entry *e = &state->slots[i];
110	if (!e->hash_code) {
111	printf("EMPTY\n");
112	} else {
113	printf("k: %p v: %p hash_code: %lld displacement: %lld\n",
114	e->element.key, e->element.value, e->hash_code,
115	(i - e->hash_code) & state->mask);
116	}
117	}
118	}
119	#endif
120
121	#if 0
122	/ Not currently exposed as an API. Should we have something like this? Useful for benchmarks /
123	AWS_COMMON_API
124	void aws_hash_table_print_stats(struct aws_hash_table *table) {
125	struct hash_table_state *state = table->p_impl;
126	uint64_t total_disp = `0`;
127	uint64_t max_disp = `0`;
128
129	printf("\n=== Hash table statistics ===\n");
130	printf("Table size: %zu/%zu (max load %zu, remaining %zu)\n", state->entry_count, state->size, state->max_load, state->max_load - state->entry_count);
131	printf("Load factor: %02.2lf%% (max %02.2lf%%)\n",
132	`100.0` * ((double)state->entry_count / (double)state->size),
133	state->max_load_factor);
134
135	for (size_t i = `0`; i < state->size; i++) {
136	if (state->slots[i].hash_code) {
137	int displacement = distance(state, i);
138	total_disp += displacement;
139	if (displacement > max_disp) {
140	max_disp = displacement;
141	}
142	}
143	}
144
145	size_t disp_counts = calloc(sizeof(disp_counts), max_disp + `1`);
146
147	for (size_t i = `0`; i < state->size; i++) {
148	if (state->slots[i].hash_code) {
149	disp_counts[distance(state, i)]++;
150	}
151	}
152
153	uint64_t median = `0`;
154	uint64_t passed = `0`;
155	for (uint64_t i = `0`; i <= max_disp && passed < total_disp / `2`; i++) {
156	median = i;
157	passed += disp_counts[i];
158	}
159
160	printf("Displacement statistics: Avg %02.2lf max %llu median %llu\n", (double)total_disp / (double)state->entry_count, max_disp, median);
161	for (uint64_t i = `0`; i <= max_disp; i++) {
162	printf("Displacement %2lld: %zu entries\n", i, disp_counts[i]);
163	}
164	free(disp_counts);
165	printf("\n");
166	}
167	#endif
168
169	size_t aws_hash_table_get_entry_count(const struct aws_hash_table *map) {
170	struct hash_table_state *state = map->p_impl;
171	return state->entry_count;
172	}
173
174	/ Given a header template, allocates space for a hash table of the appropriate*
175	* size, and copies the state header into this allocated memory, which is
176	* returned.
177	*/
178	static struct hash_table_state s_alloc_state(const* struct hash_table_state *template) {
179	size_t required_bytes;
180	if (hash_table_state_required_bytes(template->size, &required_bytes)) {
181	return NULL;
182	}
183
184	/ An empty slot has hashcode 0. So this marks all slots as empty /
185	struct hash_table_state *state = aws_mem_calloc(template->alloc, `1`, required_bytes);
186
187	if (state == NULL) {
188	return state;
189	}
190
191	state = template;
192	return state;
193	}
194
195	/ Computes the correct size and max_load based on a requested size. /
196	static int s_update_template_size(struct hash_table_state *template, size_t expected_elements) {
197	size_t min_size = expected_elements;
198
199	if (min_size < `2`) {
200	min_size = `2`;
201	}
202
203	/ size is always a power of 2 /
204	size_t size;
205	if (aws_round_up_to_power_of_two(min_size, &size)) {
206	return AWS_OP_ERR;
207	}
208
209	/ Update the template once we've calculated everything successfully /
210	template->size = size;
211	template->max_load = (size_t)(template->max_load_factor * (double)template->size);
212	/ Ensure that there is always at least one empty slot in the hash table /
213	if (template->max_load >= size) {
214	template->max_load = size - `1`;
215	}
216
217	/ Since size is a power of 2: (index & (size - 1)) == (index % size) /
218	template->mask = size - `1`;
219
220	return AWS_OP_SUCCESS;
221	}
222
223	int aws_hash_table_init(
224	struct aws_hash_table *map,
225	struct aws_allocator *alloc,
226	size_t size,
227	aws_hash_fn *hash_fn,
228	aws_hash_callback_eq_fn *equals_fn,
229	aws_hash_callback_destroy_fn *destroy_key_fn,
230	aws_hash_callback_destroy_fn *destroy_value_fn) {
231	AWS_PRECONDITION(map != NULL);
232	AWS_PRECONDITION(alloc != NULL);
233	AWS_PRECONDITION(hash_fn != NULL);
234	AWS_PRECONDITION(equals_fn != NULL);
235
236	struct hash_table_state template;
237	template.hash_fn = hash_fn;
238	template.equals_fn = equals_fn;
239	template.destroy_key_fn = destroy_key_fn;
240	template.destroy_value_fn = destroy_value_fn;
241	template.alloc = alloc;
242
243	template.entry_count = `0`;
244	template.max_load_factor = `0.95`; / TODO - make configurable? /
245
246	if (s_update_template_size(&template, size)) {
247	return AWS_OP_ERR;
248	}
249	map->p_impl = s_alloc_state(&template);
250
251	if (!map->p_impl) {
252	return AWS_OP_ERR;
253	}
254
255	AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
256	}
257
258	void aws_hash_table_clean_up(struct aws_hash_table *map) {
259	AWS_PRECONDITION(map != NULL);
260	AWS_PRECONDITION(
261	map->p_impl == NULL \|\| aws_hash_table_is_valid(map),
262	"Input aws_hash_table [map] must be valid or hash_table_state pointer [map->p_impl] must be NULL, in case "
263	"aws_hash_table_clean_up was called twice.");
264	struct hash_table_state *state = map->p_impl;
265
266	/ Ensure that we're idempotent /
267	if (!state) {
268	return;
269	}
270
271	aws_hash_table_clear(map);
272	aws_mem_release(map->p_impl->alloc, map->p_impl);
273
274	map->p_impl = NULL;
275	AWS_POSTCONDITION(map->p_impl == NULL);
276	}
277
278	void aws_hash_table_swap(struct aws_hash_table AWS_RESTRICT a, struct* aws_hash_table *AWS_RESTRICT b) {
279	AWS_PRECONDITION(a != b);
280	struct aws_hash_table tmp = *a;
281	a = b;
282	*b = tmp;
283	}
284
285	void aws_hash_table_move(struct aws_hash_table AWS_RESTRICT to, struct* aws_hash_table *AWS_RESTRICT from) {
286	AWS_PRECONDITION(to != NULL);
287	AWS_PRECONDITION(from != NULL);
288	AWS_PRECONDITION(to != from);
289	AWS_PRECONDITION(aws_hash_table_is_valid(from));
290
291	to = from;
292	AWS_ZERO_STRUCT(*from);
293	AWS_POSTCONDITION(aws_hash_table_is_valid(to));
294	}
295
296	/ Tries to find where the requested key is or where it should go if put.*
297	* Returns AWS_ERROR_SUCCESS if the item existed (leaving it in *entry),
298	* or AWS_ERROR_HASHTBL_ITEM_NOT_FOUND if it did not (putting its destination
299	* in *entry). Note that this does not take care of displacing whatever was in
300	* that entry before.
301	*
302	* probe_idx is set to the probe index of the entry found.
303	*/
304
305	static int s_find_entry1(
306	struct hash_table_state *state,
307	uint64_t hash_code,
308	const void *key,
309	struct hash_table_entry **p_entry,
310	size_t *p_probe_idx);
311
312	/ Inlined fast path: Check the first slot, only. /
313	/ TODO: Force inlining? /
314	static int inline s_find_entry(
315	struct hash_table_state *state,
316	uint64_t hash_code,
317	const void *key,
318	struct hash_table_entry **p_entry,
319	size_t *p_probe_idx) {
320	struct hash_table_entry *entry = &state->slots[hash_code & state->mask];
321
322	if (entry->hash_code == `0`) {
323	if (p_probe_idx) {
324	*p_probe_idx = `0`;
325	}
326	*p_entry = entry;
327	return AWS_ERROR_HASHTBL_ITEM_NOT_FOUND;
328	}
329
330	if (entry->hash_code == hash_code && s_hash_keys_eq(state, key, entry->element.key)) {
331	if (p_probe_idx) {
332	*p_probe_idx = `0`;
333	}
334	*p_entry = entry;
335	return AWS_OP_SUCCESS;
336	}
337
338	return s_find_entry1(state, hash_code, key, p_entry, p_probe_idx);
339	}
340
341	static int s_find_entry1(
342	struct hash_table_state *state,
343	uint64_t hash_code,
344	const void *key,
345	struct hash_table_entry **p_entry,
346	size_t *p_probe_idx) {
347	size_t probe_idx = `1`;
348	/ If we find a deleted entry, we record that index and return it as our probe index (i.e. we'll keep searching to*
349	* see if it already exists, but if not we'll overwrite the deleted entry).
350	*/
351
352	int rv;
353	struct hash_table_entry *entry;
354	/ This loop is guaranteed to terminate because entry_probe is bounded above by state->mask (i.e. state->size - 1).*
355	* Since probe_idx increments every loop iteration, it will become larger than entry_probe after at most state->size
356	* transitions and the loop will exit (if it hasn't already)
357	*/
358	while (`1`) {
359	#ifdef CBMC
360	# pragma CPROVER check push
361	# pragma CPROVER check disable "unsigned-overflow"
362	#endif
363	uint64_t index = (hash_code + probe_idx) & state->mask;
364	#ifdef CBMC
365	# pragma CPROVER check pop
366	#endif
367	entry = &state->slots[index];
368	if (!entry->hash_code) {
369	rv = AWS_ERROR_HASHTBL_ITEM_NOT_FOUND;
370	break;
371	}
372
373	if (entry->hash_code == hash_code && s_hash_keys_eq(state, key, entry->element.key)) {
374	rv = AWS_ERROR_SUCCESS;
375	break;
376	}
377
378	#ifdef CBMC
379	# pragma CPROVER check push
380	# pragma CPROVER check disable "unsigned-overflow"
381	#endif
382	uint64_t entry_probe = (index - entry->hash_code) & state->mask;
383	#ifdef CBMC
384	# pragma CPROVER check pop
385	#endif
386
387	if (entry_probe < probe_idx) {
388	/ We now know that our target entry cannot exist; if it did exist,*
389	* it would be at the current location as it has a higher probe
390	* length than the entry we are examining and thus would have
391	* preempted that item
392	*/
393	rv = AWS_ERROR_HASHTBL_ITEM_NOT_FOUND;
394	break;
395	}
396
397	probe_idx++;
398	}
399
400	*p_entry = entry;
401	if (p_probe_idx) {
402	*p_probe_idx = probe_idx;
403	}
404
405	return rv;
406	}
407
408	int aws_hash_table_find(const struct aws_hash_table map, const* void key, struct* aws_hash_element **p_elem) {
409	AWS_PRECONDITION(aws_hash_table_is_valid(map));
410	AWS_PRECONDITION(AWS_OBJECT_PTR_IS_WRITABLE(p_elem), "Input aws_hash_element pointer [p_elem] must be writable.");
411
412	struct hash_table_state *state = map->p_impl;
413	uint64_t hash_code = s_hash_for(state, key);
414	struct hash_table_entry *entry;
415
416	int rv = s_find_entry(state, hash_code, key, &entry, NULL);
417
418	if (rv == AWS_ERROR_SUCCESS) {
419	*p_elem = &entry->element;
420	} else {
421	*p_elem = NULL;
422	}
423	AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
424	}
425
426	/**
427	* Attempts to find a home for the given entry.
428	* If the entry was empty (i.e. hash-code of 0), then the function does nothing and returns NULL
429	* Otherwise, it emplaces the item, and returns a pointer to the newly emplaced entry.
430	* This function is only called after the hash-table has been expanded to fit the new element,
431	* so it should never fail.
432	*/
433	static struct hash_table_entry *s_emplace_item(
434	struct hash_table_state *state,
435	struct hash_table_entry entry,
436	size_t probe_idx) {
437	AWS_PRECONDITION(hash_table_state_is_valid(state));
438
439	if (entry.hash_code == `0`) {
440	AWS_RETURN_WITH_POSTCONDITION(NULL, hash_table_state_is_valid(state));
441	}
442
443	struct hash_table_entry *rval = NULL;
444
445	/ Since a valid hash_table has at least one empty element, this loop will always terminate in at most linear time*
446	*/
447	while (entry.hash_code != `0`) {
448	#ifdef CBMC
449	# pragma CPROVER check push
450	# pragma CPROVER check disable "unsigned-overflow"
451	#endif
452	size_t index = (size_t)(entry.hash_code + probe_idx) & state->mask;
453	#ifdef CBMC
454	# pragma CPROVER check pop
455	#endif
456	struct hash_table_entry *victim = &state->slots[index];
457
458	#ifdef CBMC
459	# pragma CPROVER check push
460	# pragma CPROVER check disable "unsigned-overflow"
461	#endif
462	size_t victim_probe_idx = (size_t)(index - victim->hash_code) & state->mask;
463	#ifdef CBMC
464	# pragma CPROVER check pop
465	#endif
466
467	if (!victim->hash_code \|\| victim_probe_idx < probe_idx) {
468	/ The first thing we emplace is the entry itself. A pointer to its location becomes the rval /
469	if (!rval) {
470	rval = victim;
471	}
472
473	struct hash_table_entry tmp = *victim;
474	*victim = entry;
475	entry = tmp;
476
477	probe_idx = victim_probe_idx + `1`;
478	} else {
479	probe_idx++;
480	}
481	}
482
483	AWS_RETURN_WITH_POSTCONDITION(
484	rval,
485	hash_table_state_is_valid(state) && rval >= &state->slots[`0`] && rval < &state->slots[state->size],
486	"Output hash_table_entry pointer [rval] must point in the slots of [state].");
487	}
488
489	static int s_expand_table(struct aws_hash_table *map) {
490	struct hash_table_state *old_state = map->p_impl;
491	struct hash_table_state template = *old_state;
492
493	size_t new_size;
494	if (aws_mul_size_checked(template.size, `2`, &new_size)) {
495	return AWS_OP_ERR;
496	}
497
498	if (s_update_template_size(&template, new_size)) {
499	return AWS_OP_ERR;
500	}
501
502	struct hash_table_state *new_state = s_alloc_state(&template);
503	if (!new_state) {
504	return AWS_OP_ERR;
505	}
506
507	for (size_t i = `0`; i < old_state->size; i++) {
508	struct hash_table_entry entry = old_state->slots[i];
509	if (entry.hash_code) {
510	/ We can directly emplace since we know we won't put the same item twice /
511	s_emplace_item(new_state, entry, `0`);
512	}
513	}
514
515	map->p_impl = new_state;
516	aws_mem_release(new_state->alloc, old_state);
517
518	return AWS_OP_SUCCESS;
519	}
520
521	int aws_hash_table_create(
522	struct aws_hash_table *map,
523	const void *key,
524	struct aws_hash_element **p_elem,
525	int *was_created) {
526
527	struct hash_table_state *state = map->p_impl;
528	uint64_t hash_code = s_hash_for(state, key);
529	struct hash_table_entry *entry;
530	size_t probe_idx;
531	int ignored;
532	if (!was_created) {
533	was_created = &ignored;
534	}
535
536	int rv = s_find_entry(state, hash_code, key, &entry, &probe_idx);
537
538	if (rv == AWS_ERROR_SUCCESS) {
539	if (p_elem) {
540	*p_elem = &entry->element;
541	}
542	*was_created = `0`;
543	return AWS_OP_SUCCESS;
544	}
545
546	/ Okay, we need to add an entry. Check the load factor first. /
547	size_t incr_entry_count;
548	if (aws_add_size_checked(state->entry_count, `1`, &incr_entry_count)) {
549	return AWS_OP_ERR;
550	}
551	if (incr_entry_count > state->max_load) {
552	rv = s_expand_table(map);
553	if (rv != AWS_OP_SUCCESS) {
554	/ Any error was already raised in expand_table /
555	return rv;
556	}
557	state = map->p_impl;
558	/ If we expanded the table, we need to discard the probe index returned from find_entry,*
559	* as it's likely that we can find a more desirable slot. If we don't, then later gets will
560	* terminate before reaching our probe index.
561
562	* n.b. currently we ignore this probe_idx subsequently, but leaving
563	this here so we don't
564	* forget when we optimize later. */
565	probe_idx = `0`;
566	}
567
568	state->entry_count++;
569	struct hash_table_entry new_entry;
570	new_entry.element.key = key;
571	new_entry.element.value = NULL;
572	new_entry.hash_code = hash_code;
573
574	entry = s_emplace_item(state, new_entry, probe_idx);
575
576	if (p_elem) {
577	*p_elem = &entry->element;
578	}
579
580	*was_created = `1`;
581
582	return AWS_OP_SUCCESS;
583	}
584
585	AWS_COMMON_API
586	int aws_hash_table_put(struct aws_hash_table map, const* void key, void* value, int* *was_created) {
587	struct aws_hash_element *p_elem;
588	int was_created_fallback;
589
590	if (!was_created) {
591	was_created = &was_created_fallback;
592	}
593
594	if (aws_hash_table_create(map, key, &p_elem, was_created)) {
595	return AWS_OP_ERR;
596	}
597
598	/*
599	* aws_hash_table_create might resize the table, which results in map->p_impl changing.
600	* It is therefore important to wait to read p_impl until after we return.
601	*/
602	struct hash_table_state *state = map->p_impl;
603
604	if (!*was_created) {
605	if (p_elem->key != key && state->destroy_key_fn) {
606	state->destroy_key_fn((void *)p_elem->key);
607	}
608
609	if (state->destroy_value_fn) {
610	state->destroy_value_fn((void *)p_elem->value);
611	}
612	}
613
614	p_elem->key = key;
615	p_elem->value = value;
616
617	return AWS_OP_SUCCESS;
618	}
619
620	/ Clears an entry. Does _not_ invoke destructor callbacks.*
621	* Returns the last slot touched (note that if we wrap, we'll report an index
622	* lower than the original entry's index)
623	*/
624	static size_t s_remove_entry(struct hash_table_state state, struct* hash_table_entry *entry) {
625	AWS_PRECONDITION(hash_table_state_is_valid(state));
626	AWS_PRECONDITION(state->entry_count > `0`);
627	AWS_PRECONDITION(
628	entry >= &state->slots[`0`] && entry < &state->slots[state->size],
629	"Input hash_table_entry [entry] pointer must point in the available slots.");
630	state->entry_count--;
631
632	/ Shift subsequent entries back until we find an entry that belongs at its*
633	* current position. This is important to ensure that subsequent searches
634	* don't terminate at the removed element.
635	*/
636	size_t index = s_index_for(state, entry);
637	/ There is always at least one empty slot in the hash table, so this loop always terminates /
638	while (`1`) {
639	size_t next_index = (index + `1`) & state->mask;
640
641	/ If we hit an empty slot, stop /
642	if (!state->slots[next_index].hash_code) {
643	break;
644	}
645	/ If the next slot is at the start of the probe sequence, stop.*
646	* We know that nothing with an earlier home slot is after this;
647	* otherwise this index-zero entry would have been evicted from its
648	* home.
649	*/
650	if ((state->slots[next_index].hash_code & state->mask) == next_index) {
651	break;
652	}
653
654	/ Okay, shift this one back /
655	state->slots[index] = state->slots[next_index];
656	index = next_index;
657	}
658
659	/ Clear the entry we shifted out of /
660	AWS_ZERO_STRUCT(state->slots[index]);
661	AWS_RETURN_WITH_POSTCONDITION(index, hash_table_state_is_valid(state) && index <= state->size);
662	}
663
664	int aws_hash_table_remove(
665	struct aws_hash_table *map,
666	const void *key,
667	struct aws_hash_element *p_value,
668	int *was_present) {
669	AWS_PRECONDITION(aws_hash_table_is_valid(map));
670	AWS_PRECONDITION(
671	p_value == NULL \|\| AWS_OBJECT_PTR_IS_WRITABLE(p_value), "Input pointer [p_value] must be NULL or writable.");
672	AWS_PRECONDITION(
673	was_present == NULL \|\| AWS_OBJECT_PTR_IS_WRITABLE(was_present),
674	"Input pointer [was_present] must be NULL or writable.");
675
676	struct hash_table_state *state = map->p_impl;
677	uint64_t hash_code = s_hash_for(state, key);
678	struct hash_table_entry *entry;
679	int ignored;
680
681	if (!was_present) {
682	was_present = &ignored;
683	}
684
685	int rv = s_find_entry(state, hash_code, key, &entry, NULL);
686
687	if (rv != AWS_ERROR_SUCCESS) {
688	*was_present = `0`;
689	AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
690	}
691
692	*was_present = `1`;
693
694	if (p_value) {
695	*p_value = entry->element;
696	} else {
697	if (state->destroy_key_fn) {
698	state->destroy_key_fn((void *)entry->element.key);
699	}
700	if (state->destroy_value_fn) {
701	state->destroy_value_fn(entry->element.value);
702	}
703	}
704	s_remove_entry(state, entry);
705
706	AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
707	}
708
709	int aws_hash_table_remove_element(struct aws_hash_table map, struct* aws_hash_element *p_value) {
710	AWS_PRECONDITION(aws_hash_table_is_valid(map));
711	AWS_PRECONDITION(p_value != NULL);
712
713	struct hash_table_state *state = map->p_impl;
714	struct hash_table_entry entry = AWS_CONTAINER_OF(p_value, struct* hash_table_entry, element);
715
716	s_remove_entry(state, entry);
717
718	AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map));
719	}
720
721	int aws_hash_table_foreach(
722	struct aws_hash_table *map,
723	int (callback)(void* context, struct* aws_hash_element *pElement),
724	void *context) {
725
726	for (struct aws_hash_iter iter = aws_hash_iter_begin(map); !aws_hash_iter_done(&iter); aws_hash_iter_next(&iter)) {
727	int rv = callback(context, &iter.element);
728
729	if (rv & AWS_COMMON_HASH_TABLE_ITER_DELETE) {
730	aws_hash_iter_delete(&iter, false);
731	}
732
733	if (!(rv & AWS_COMMON_HASH_TABLE_ITER_CONTINUE)) {
734	break;
735	}
736	}
737
738	return AWS_OP_SUCCESS;
739	}
740
741	bool aws_hash_table_eq(
742	const struct aws_hash_table *a,
743	const struct aws_hash_table *b,
744	aws_hash_callback_eq_fn *value_eq) {
745	AWS_PRECONDITION(aws_hash_table_is_valid(a));
746	AWS_PRECONDITION(aws_hash_table_is_valid(b));
747	AWS_PRECONDITION(value_eq != NULL);
748
749	if (aws_hash_table_get_entry_count(a) != aws_hash_table_get_entry_count(b)) {
750	AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
751	}
752
753	/*
754	* Now that we have established that the two tables have the same number of
755	* entries, we can simply iterate one and compare against the same key in
756	* the other.
757	*/
758	for (size_t i = `0`; i < a->p_impl->size; ++i) {
759	const struct hash_table_entry *const a_entry = &a->p_impl->slots[i];
760	if (a_entry->hash_code == `0`) {
761	continue;
762	}
763
764	struct aws_hash_element *b_element = NULL;
765
766	aws_hash_table_find(b, a_entry->element.key, &b_element);
767
768	if (!b_element) {
769	/ Key is present in A only /
770	AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
771	}
772
773	if (!s_safe_eq_check(value_eq, a_entry->element.value, b_element->value)) {
774	AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
775	}
776	}
777	AWS_RETURN_WITH_POSTCONDITION(true, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b));
778	}
779
780	/**
781	* Given an iterator, and a start slot, find the next available filled slot if it exists
782	* Otherwise, return an iter that will return true for aws_hash_iter_done().
783	* Note that aws_hash_iter_is_valid() need not hold on entry to the function, since
784	* it can be called on a partially constructed iter from aws_hash_iter_begin().
785	*
786	* Note that calling this on an iterator which is "done" is idempotent: it will return another
787	* iterator which is "done".
788	*/
789	static inline void s_get_next_element(struct aws_hash_iter *iter, size_t start_slot) {
790	AWS_PRECONDITION(iter != NULL);
791	AWS_PRECONDITION(aws_hash_table_is_valid(iter->map));
792	struct hash_table_state *state = iter->map->p_impl;
793	size_t limit = iter->limit;
794
795	for (size_t i = start_slot; i < limit; i++) {
796	struct hash_table_entry *entry = &state->slots[i];
797
798	if (entry->hash_code) {
799	iter->element = entry->element;
800	iter->slot = i;
801	iter->status = AWS_HASH_ITER_STATUS_READY_FOR_USE;
802	return;
803	}
804	}
805	iter->element.key = NULL;
806	iter->element.value = NULL;
807	iter->slot = iter->limit;
808	iter->status = AWS_HASH_ITER_STATUS_DONE;
809	AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
810	}
811
812	struct aws_hash_iter aws_hash_iter_begin(const struct aws_hash_table *map) {
813	AWS_PRECONDITION(aws_hash_table_is_valid(map));
814	struct hash_table_state *state = map->p_impl;
815	struct aws_hash_iter iter;
816	AWS_ZERO_STRUCT(iter);
817	iter.map = map;
818	iter.limit = state->size;
819	s_get_next_element(&iter, `0`);
820	AWS_RETURN_WITH_POSTCONDITION(
821	iter,
822	aws_hash_iter_is_valid(&iter) &&
823	(iter.status == AWS_HASH_ITER_STATUS_DONE \|\| iter.status == AWS_HASH_ITER_STATUS_READY_FOR_USE),
824	"The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE.");
825	}
826
827	bool aws_hash_iter_done(const struct aws_hash_iter *iter) {
828	AWS_PRECONDITION(aws_hash_iter_is_valid(iter));
829	AWS_PRECONDITION(
830	iter->status == AWS_HASH_ITER_STATUS_DONE \|\| iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE,
831	"Input aws_hash_iter [iter] must either be done, or ready to use.");
832	/*
833	* SIZE_MAX is a valid (non-terminal) value for iter->slot in the event that
834	* we delete slot 0. See comments in aws_hash_iter_delete.
835	*
836	* As such we must use == rather than >= here.
837	*/
838	bool rval = (iter->slot == iter->limit);
839	AWS_POSTCONDITION(
840	iter->status == AWS_HASH_ITER_STATUS_DONE \|\| iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE,
841	"The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE.");
842	AWS_POSTCONDITION(
843	rval == (iter->status == AWS_HASH_ITER_STATUS_DONE),
844	"Output bool [rval] must be true if and only if the status of [iter] is DONE.");
845	AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
846	return rval;
847	}
848
849	void aws_hash_iter_next(struct aws_hash_iter *iter) {
850	AWS_PRECONDITION(aws_hash_iter_is_valid(iter));
851	#ifdef CBMC
852	# pragma CPROVER check push
853	# pragma CPROVER check disable "unsigned-overflow"
854	#endif
855	s_get_next_element(iter, iter->slot + `1`);
856	#ifdef CBMC
857	# pragma CPROVER check pop
858	#endif
859	AWS_POSTCONDITION(
860	iter->status == AWS_HASH_ITER_STATUS_DONE \|\| iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE,
861	"The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE.");
862	AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
863	}
864
865	void aws_hash_iter_delete(struct aws_hash_iter *iter, bool destroy_contents) {
866	AWS_PRECONDITION(
867	iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE, "Input aws_hash_iter [iter] must be ready for use.");
868	AWS_PRECONDITION(aws_hash_iter_is_valid(iter));
869	AWS_PRECONDITION(
870	iter->map->p_impl->entry_count > `0`,
871	"The hash_table_state pointed by input [iter] must contain at least one entry.");
872
873	struct hash_table_state *state = iter->map->p_impl;
874	if (destroy_contents) {
875	if (state->destroy_key_fn) {
876	state->destroy_key_fn((void *)iter->element.key);
877	}
878	if (state->destroy_value_fn) {
879	state->destroy_value_fn(iter->element.value);
880	}
881	}
882
883	size_t last_index = s_remove_entry(state, &state->slots[iter->slot]);
884
885	/ If we shifted elements that are not part of the window we intend to iterate*
886	* over, it means we shifted an element that we already visited into the
887	* iter->limit - 1 position. To avoid double iteration, we'll now reduce the
888	* limit to compensate.
889	*
890	* Note that last_index cannot equal iter->slot, because slots[iter->slot]
891	* is empty before we start walking the table.
892	*/
893	if (last_index < iter->slot \|\| last_index >= iter->limit) {
894	iter->limit--;
895	}
896
897	/*
898	* After removing this entry, the next entry might be in the same slot, or
899	* in some later slot, or we might have no further entries.
900	*
901	* We also expect that the caller will call aws_hash_iter_done and aws_hash_iter_next
902	* after this delete call. This gets a bit tricky if we just deleted the value
903	* in slot 0, and a new value has shifted in.
904	*
905	* To deal with this, we'll just step back one slot, and let _next start iteration
906	* at our current slot. Note that if we just deleted slot 0, this will result in
907	* underflowing to SIZE_MAX; we have to take care in aws_hash_iter_done to avoid
908	* treating this as an end-of-iteration condition.
909	*/
910	#ifdef CBMC
911	# pragma CPROVER check push
912	# pragma CPROVER check disable "unsigned-overflow"
913	#endif
914	iter->slot--;
915	#ifdef CBMC
916	# pragma CPROVER check pop
917	#endif
918	iter->status = AWS_HASH_ITER_STATUS_DELETE_CALLED;
919	AWS_POSTCONDITION(
920	iter->status == AWS_HASH_ITER_STATUS_DELETE_CALLED,
921	"The status of output aws_hash_iter [iter] must be DELETE_CALLED.");
922	AWS_POSTCONDITION(aws_hash_iter_is_valid(iter));
923	}
924
925	void aws_hash_table_clear(struct aws_hash_table *map) {
926	AWS_PRECONDITION(aws_hash_table_is_valid(map));
927	struct hash_table_state *state = map->p_impl;
928
929	/ Check that we have at least one destructor before iterating over the table /
930	if (state->destroy_key_fn \|\| state->destroy_value_fn) {
931	for (size_t i = `0`; i < state->size; ++i) {
932	struct hash_table_entry *entry = &state->slots[i];
933	if (!entry->hash_code) {
934	continue;
935	}
936	if (state->destroy_key_fn) {
937	state->destroy_key_fn((void *)entry->element.key);
938	}
939	if (state->destroy_value_fn) {
940	state->destroy_value_fn(entry->element.value);
941	}
942	}
943	}
944	/ Since hash code 0 represents an empty slot we can just zero out the*
945	* entire table. */
946	memset(state->slots, `0`, sizeof(state->slots) state->size);
947
948	state->entry_count = `0`;
949	AWS_POSTCONDITION(aws_hash_table_is_valid(map));
950	}
951
952	uint64_t aws_hash_c_string(const void *item) {
953	AWS_PRECONDITION(aws_c_string_is_valid(item));
954	const char *str = item;
955
956	/ first digits of pi in hex /
957	uint32_t b = `0x3243F6A8`, c = `0x885A308D`;
958	hashlittle2(str, strlen(str), &c, &b);
959
960	return ((uint64_t)b << `32`) \| c;
961	}
962
963	uint64_t aws_hash_string(const void *item) {
964	AWS_PRECONDITION(aws_string_is_valid(item));
965	const struct aws_string *str = item;
966
967	/ first digits of pi in hex /
968	uint32_t b = `0x3243F6A8`, c = `0x885A308D`;
969	hashlittle2(aws_string_bytes(str), str->len, &c, &b);
970	AWS_RETURN_WITH_POSTCONDITION(((uint64_t)b << `32`) \| c, aws_string_is_valid(str));
971	}
972
973	uint64_t aws_hash_byte_cursor_ptr(const void *item) {
974	AWS_PRECONDITION(aws_byte_cursor_is_valid(item));
975	const struct aws_byte_cursor *cur = item;
976
977	/ first digits of pi in hex /
978	uint32_t b = `0x3243F6A8`, c = `0x885A308D`;
979	hashlittle2(cur->ptr, cur->len, &c, &b);
980	AWS_RETURN_WITH_POSTCONDITION(((uint64_t)b << `32`) \| c, aws_byte_cursor_is_valid(cur));
981	}
982
983	uint64_t aws_hash_ptr(const void *item) {
984	/ Since the numeric value of the pointer is considered, not the memory behind it, 0 is an acceptable value /
985	/ first digits of e in hex*
986	* 2.b7e 1516 28ae d2a6 */
987	uint32_t b = `0x2b7e1516`, c = `0x28aed2a6`;
988
989	hashlittle2(&item, sizeof(item), &c, &b);
990
991	return ((uint64_t)b << `32`) \| c;
992	}
993
994	bool aws_hash_callback_c_str_eq(const void a, const* void *b) {
995	AWS_PRECONDITION(aws_c_string_is_valid(a));
996	AWS_PRECONDITION(aws_c_string_is_valid(b));
997	bool rval = !strcmp(a, b);
998	AWS_RETURN_WITH_POSTCONDITION(rval, aws_c_string_is_valid(a) && aws_c_string_is_valid(b));
999	}
1000
1001	bool aws_hash_callback_string_eq(const void a, const* void *b) {
1002	AWS_PRECONDITION(aws_string_is_valid(a));
1003	AWS_PRECONDITION(aws_string_is_valid(b));
1004	bool rval = aws_string_eq(a, b);
1005	AWS_RETURN_WITH_POSTCONDITION(rval, aws_c_string_is_valid(a) && aws_c_string_is_valid(b));
1006	}
1007
1008	void aws_hash_callback_string_destroy(void *a) {
1009	AWS_PRECONDITION(aws_string_is_valid(a));
1010	aws_string_destroy(a);
1011	}
1012
1013	bool aws_ptr_eq(const void a, const* void *b) {
1014	return a == b;
1015	}
1016
1017	/**
1018	* Best-effort check of hash_table_state data-structure invariants
1019	* Some invariants, such as that the number of entries is actually the
1020	* same as the entry_count field, would require a loop to check
1021	*/
1022	bool aws_hash_table_is_valid(const struct aws_hash_table *map) {
1023	return map && map->p_impl && hash_table_state_is_valid(map->p_impl);
1024	}
1025
1026	/**
1027	* Best-effort check of hash_table_state data-structure invariants
1028	* Some invariants, such as that the number of entries is actually the
1029	* same as the entry_count field, would require a loop to check
1030	*/
1031	bool hash_table_state_is_valid(const struct hash_table_state *map) {
1032	if (!map) {
1033	return false;
1034	}
1035	bool hash_fn_nonnull = (map->hash_fn != NULL);
1036	bool equals_fn_nonnull = (map->equals_fn != NULL);
1037	/destroy_key_fn and destroy_value_fn are both allowed to be NULL/
1038	bool alloc_nonnull = (map->alloc != NULL);
1039	bool size_at_least_two = (map->size >= `2`);
1040	bool size_is_power_of_two = aws_is_power_of_two(map->size);
1041	bool entry_count = (map->entry_count <= map->max_load);
1042	bool max_load = (map->max_load < map->size);
1043	bool mask_is_correct = (map->mask == (map->size - `1`));
1044	bool max_load_factor_bounded = map->max_load_factor == `0.95`; //(map->max_load_factor < 1.0);
1045	bool slots_allocated = AWS_MEM_IS_WRITABLE(&map->slots[`0`], sizeof(map->slots[`0`]) * map->size);
1046
1047	return hash_fn_nonnull && equals_fn_nonnull && alloc_nonnull && size_at_least_two && size_is_power_of_two &&
1048	entry_count && max_load && mask_is_correct && max_load_factor_bounded && slots_allocated;
1049	}
1050
1051	/**
1052	* Given a pointer to a hash_iter, checks that it is well-formed, with all data-structure invariants.
1053	*/
1054	bool aws_hash_iter_is_valid(const struct aws_hash_iter *iter) {
1055	if (!iter) {
1056	return false;
1057	}
1058	if (!iter->map) {
1059	return false;
1060	}
1061	if (!aws_hash_table_is_valid(iter->map)) {
1062	return false;
1063	}
1064	if (iter->limit > iter->map->p_impl->size) {
1065	return false;
1066	}
1067
1068	switch (iter->status) {
1069	case AWS_HASH_ITER_STATUS_DONE:
1070	/ Done iff slot == limit /
1071	return iter->slot == iter->limit;
1072	case AWS_HASH_ITER_STATUS_DELETE_CALLED:
1073	/ iter->slot can underflow to SIZE_MAX after a delete*
1074	* see the comments for aws_hash_iter_delete() */
1075	return iter->slot <= iter->limit \|\| iter->slot == SIZE_MAX;
1076	case AWS_HASH_ITER_STATUS_READY_FOR_USE:
1077	/ A slot must point to a valid location (i.e. hash_code != 0) /
1078	return iter->slot < iter->limit && iter->map->p_impl->slots[iter->slot].hash_code != `0`;
1079	}
1080	/ Invalid status code /
1081	return false;
1082	}
1083
1084	/**
1085	* Determine the total number of bytes needed for a hash-table with
1086	* "size" slots. If the result would overflow a size_t, return
1087	* AWS_OP_ERR; otherwise, return AWS_OP_SUCCESS with the result in
1088	* "required_bytes".
1089	*/
1090	int hash_table_state_required_bytes(size_t size, size_t *required_bytes) {
1091
1092	size_t elemsize;
1093	if (aws_mul_size_checked(size, sizeof(struct hash_table_entry), &elemsize)) {
1094	return AWS_OP_ERR;
1095	}
1096
1097	if (aws_add_size_checked(elemsize, sizeof(struct hash_table_state), required_bytes)) {
1098	return AWS_OP_ERR;
1099	}
1100
1101	return AWS_OP_SUCCESS;
1102	}
1103

Browse the source code of ClickHouse/contrib/aws-c-common/source/hash_table.c