1 | #ifndef AWS_COMMON_HASH_TABLE_H |
2 | #define AWS_COMMON_HASH_TABLE_H |
3 | |
4 | /* |
5 | * Copyright 2010-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. |
6 | * |
7 | * Licensed under the Apache License, Version 2.0 (the "License"). |
8 | * You may not use this file except in compliance with the License. |
9 | * A copy of the License is located at |
10 | * |
11 | * http://aws.amazon.com/apache2.0 |
12 | * |
13 | * or in the "license" file accompanying this file. This file is distributed |
14 | * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either |
15 | * express or implied. See the License for the specific language governing |
16 | * permissions and limitations under the License. |
17 | */ |
18 | |
19 | #include <aws/common/common.h> |
20 | |
21 | #include <stddef.h> |
22 | |
23 | #define AWS_COMMON_HASH_TABLE_ITER_CONTINUE (1 << 0) |
24 | #define AWS_COMMON_HASH_TABLE_ITER_DELETE (1 << 1) |
25 | |
26 | /** |
27 | * Hash table data structure. This module provides an automatically resizing |
28 | * hash table implementation for general purpose use. The hash table stores a |
29 | * mapping between void * keys and values; it is expected that in most cases, |
30 | * these will point to a structure elsewhere in the heap, instead of inlining a |
31 | * key or value into the hash table element itself. |
32 | * |
33 | * Currently, this hash table implements a variant of robin hood hashing, but |
34 | * we do not guarantee that this won't change in the future. |
35 | * |
36 | * Associated with each hash function are four callbacks: |
37 | * |
38 | * hash_fn - A hash function from the keys to a uint64_t. It is critical that |
39 | * the hash function for a key does not change while the key is in the hash |
40 | * table; violating this results in undefined behavior. Collisions are |
41 | * tolerated, though naturally with reduced performance. |
42 | * |
43 | * equals_fn - An equality comparison function. This function must be |
44 | * reflexive and consistent with hash_fn. |
45 | * |
46 | * destroy_key_fn, destroy_value_fn - Optional callbacks invoked when the |
47 | * table is cleared or cleaned up and at the caller's option when an element |
48 | * is removed from the table. Either or both may be set to NULL, which |
49 | * has the same effect as a no-op destroy function. |
50 | * |
51 | * This datastructure can be safely moved between threads, subject to the |
52 | * requirements of the underlying allocator. It is also safe to invoke |
53 | * non-mutating operations on the hash table from multiple threads. A suitable |
54 | * memory barrier must be used when transitioning from single-threaded mutating |
55 | * usage to multithreaded usage. |
56 | */ |
57 | struct hash_table_state; /* Opaque pointer */ |
58 | struct aws_hash_table { |
59 | struct hash_table_state *p_impl; |
60 | }; |
61 | |
62 | /** |
63 | * Represents an element in the hash table. Various operations on the hash |
64 | * table may provide pointers to elements stored within the hash table; |
65 | * generally, calling code may alter value, but must not alter key (or any |
66 | * information used to compute key's hash code). |
67 | * |
68 | * Pointers to elements within the hash are invalidated whenever an operation |
69 | * which may change the number of elements in the hash is invoked (i.e. put, |
70 | * delete, clear, and clean_up), regardless of whether the number of elements |
71 | * actually changes. |
72 | */ |
73 | struct aws_hash_element { |
74 | const void *key; |
75 | void *value; |
76 | }; |
77 | |
78 | enum aws_hash_iter_status { |
79 | AWS_HASH_ITER_STATUS_DONE, |
80 | AWS_HASH_ITER_STATUS_DELETE_CALLED, |
81 | AWS_HASH_ITER_STATUS_READY_FOR_USE, |
82 | }; |
83 | |
84 | struct aws_hash_iter { |
85 | const struct aws_hash_table *map; |
86 | struct aws_hash_element element; |
87 | size_t slot; |
88 | size_t limit; |
89 | enum aws_hash_iter_status status; |
90 | /* |
91 | * Reserving extra fields for binary compatibility with future expansion of |
92 | * iterator in case hash table implementation changes. |
93 | */ |
94 | int unused_0; |
95 | void *unused_1; |
96 | void *unused_2; |
97 | }; |
98 | |
99 | /** |
100 | * Prototype for a key hashing function pointer. |
101 | */ |
102 | typedef uint64_t(aws_hash_fn)(const void *key); |
103 | |
104 | /** |
105 | * Prototype for a hash table equality check function pointer. |
106 | * |
107 | * This type is usually used for a function that compares two hash table |
108 | * keys, but note that the same type is used for a function that compares |
109 | * two hash table values in aws_hash_table_eq. |
110 | * |
111 | * Equality functions used in a hash table must be reflexive (i.e., a == b if |
112 | * and only if b == a), and must be consistent with the hash function in use. |
113 | */ |
114 | typedef bool(aws_hash_callback_eq_fn)(const void *a, const void *b); |
115 | |
116 | /** |
117 | * Prototype for a hash table key or value destructor function pointer. |
118 | * |
119 | * This function is used to destroy elements in the hash table when the |
120 | * table is cleared or cleaned up. |
121 | * |
122 | * Note that functions which remove individual elements from the hash |
123 | * table provide options of whether or not to invoke the destructors |
124 | * on the key and value of a removed element. |
125 | */ |
126 | typedef void(aws_hash_callback_destroy_fn)(void *key_or_value); |
127 | |
128 | AWS_EXTERN_C_BEGIN |
129 | |
130 | /** |
131 | * Initializes a hash map with initial capacity for 'size' elements |
132 | * without resizing. Uses hash_fn to compute the hash of each element. |
133 | * equals_fn to compute equality of two keys. Whenever an element is |
134 | * removed without being returned, destroy_key_fn is run on the pointer |
135 | * to the key and destroy_value_fn is run on the pointer to the value. |
136 | * Either or both may be NULL if a callback is not desired in this case. |
137 | */ |
138 | AWS_COMMON_API |
139 | int aws_hash_table_init( |
140 | struct aws_hash_table *map, |
141 | struct aws_allocator *alloc, |
142 | size_t size, |
143 | aws_hash_fn *hash_fn, |
144 | aws_hash_callback_eq_fn *equals_fn, |
145 | aws_hash_callback_destroy_fn *destroy_key_fn, |
146 | aws_hash_callback_destroy_fn *destroy_value_fn); |
147 | |
148 | /** |
149 | * Deletes every element from map and frees all associated memory. |
150 | * destroy_fn will be called for each element. aws_hash_table_init |
151 | * must be called before reusing the hash table. |
152 | * |
153 | * This method is idempotent. |
154 | */ |
155 | AWS_COMMON_API |
156 | void aws_hash_table_clean_up(struct aws_hash_table *map); |
157 | |
158 | /** |
159 | * Safely swaps two hash tables. Note that we swap the entirety of the hash |
160 | * table, including which allocator is associated. |
161 | * |
162 | * Neither hash table is required to be initialized; if one or both is |
163 | * uninitialized, then the uninitialized state is also swapped. |
164 | */ |
165 | AWS_COMMON_API |
166 | void aws_hash_table_swap(struct aws_hash_table *AWS_RESTRICT a, struct aws_hash_table *AWS_RESTRICT b); |
167 | |
168 | /** |
169 | * Moves the hash table in 'from' to 'to'. After this move, 'from' will |
170 | * be identical to the state of the original 'to' hash table, and 'to' |
171 | * will be in the same state as if it had been passed to aws_hash_table_clean_up |
172 | * (that is, it will have no memory allocated, and it will be safe to |
173 | * either discard it or call aws_hash_table_clean_up again). |
174 | * |
175 | * Note that 'to' will not be cleaned up. You should make sure that 'to' |
176 | * is either uninitialized or cleaned up before moving a hashtable into |
177 | * it. |
178 | */ |
179 | AWS_COMMON_API |
180 | void aws_hash_table_move(struct aws_hash_table *AWS_RESTRICT to, struct aws_hash_table *AWS_RESTRICT from); |
181 | |
182 | /** |
183 | * Returns the current number of entries in the table. |
184 | */ |
185 | AWS_COMMON_API |
186 | size_t aws_hash_table_get_entry_count(const struct aws_hash_table *map); |
187 | |
188 | /** |
189 | * Returns an iterator to be used for iterating through a hash table. |
190 | * Iterator will already point to the first element of the table it finds, |
191 | * which can be accessed as iter.element. |
192 | * |
193 | * This function cannot fail, but if there are no elements in the table, |
194 | * the returned iterator will return true for aws_hash_iter_done(&iter). |
195 | */ |
196 | AWS_COMMON_API |
197 | struct aws_hash_iter aws_hash_iter_begin(const struct aws_hash_table *map); |
198 | |
199 | /** |
200 | * Returns true if iterator is done iterating through table, false otherwise. |
201 | * If this is true, the iterator will not include an element of the table. |
202 | */ |
203 | AWS_COMMON_API |
204 | bool aws_hash_iter_done(const struct aws_hash_iter *iter); |
205 | |
206 | /** |
207 | * Updates iterator so that it points to next element of hash table. |
208 | * |
209 | * This and the two previous functions are designed to be used together with |
210 | * the following idiom: |
211 | * |
212 | * for (struct aws_hash_iter iter = aws_hash_iter_begin(&map); |
213 | * !aws_hash_iter_done(&iter); aws_hash_iter_next(&iter)) { |
214 | * const key_type key = *(const key_type *)iter.element.key; |
215 | * value_type value = *(value_type *)iter.element.value; |
216 | * // etc. |
217 | * } |
218 | * |
219 | * Note that calling this on an iter which is "done" is idempotent: |
220 | * i.e. it will return another iter which is "done". |
221 | */ |
222 | AWS_COMMON_API |
223 | void aws_hash_iter_next(struct aws_hash_iter *iter); |
224 | |
225 | /** |
226 | * Deletes the element currently pointed-to by the hash iterator. |
227 | * After calling this method, the element member of the iterator |
228 | * should not be accessed until the next call to aws_hash_iter_next. |
229 | * |
230 | * @param destroy_contents If true, the destructors for the key and value |
231 | * will be called. |
232 | */ |
233 | AWS_COMMON_API |
234 | void aws_hash_iter_delete(struct aws_hash_iter *iter, bool destroy_contents); |
235 | |
236 | /** |
237 | * Attempts to locate an element at key. If the element is found, a |
238 | * pointer to the value is placed in *p_elem; if it is not found, |
239 | * *pElem is set to NULL. Either way, AWS_OP_SUCCESS is returned. |
240 | * |
241 | * This method does not change the state of the hash table. Therefore, it |
242 | * is safe to call _find from multiple threads on the same hash table, |
243 | * provided no mutating operations happen in parallel. |
244 | * |
245 | * Calling code may update the value in the hash table by modifying **pElem |
246 | * after a successful find. However, this pointer is not guaranteed to |
247 | * remain usable after a subsequent call to _put, _delete, _clear, or |
248 | * _clean_up. |
249 | */ |
250 | |
251 | AWS_COMMON_API |
252 | int aws_hash_table_find(const struct aws_hash_table *map, const void *key, struct aws_hash_element **p_elem); |
253 | |
254 | /** |
255 | * Attempts to locate an element at key. If no such element was found, |
256 | * creates a new element, with value initialized to NULL. In either case, a |
257 | * pointer to the element is placed in *p_elem. |
258 | * |
259 | * If was_created is non-NULL, *was_created is set to 0 if an existing |
260 | * element was found, or 1 is a new element was created. |
261 | * |
262 | * Returns AWS_OP_SUCCESS if an item was found or created. |
263 | * Raises AWS_ERROR_OOM if hash table expansion was required and memory |
264 | * allocation failed. |
265 | */ |
266 | AWS_COMMON_API |
267 | int aws_hash_table_create( |
268 | struct aws_hash_table *map, |
269 | const void *key, |
270 | struct aws_hash_element **p_elem, |
271 | int *was_created); |
272 | |
273 | /** |
274 | * Inserts a new element at key, with the given value. If another element |
275 | * exists at that key, the old element will be overwritten; both old key and |
276 | * value objects will be destroyed. |
277 | * |
278 | * If was_created is non-NULL, *was_created is set to 0 if an existing |
279 | * element was found, or 1 is a new element was created. |
280 | * |
281 | * Returns AWS_OP_SUCCESS if an item was found or created. |
282 | * Raises AWS_ERROR_OOM if hash table expansion was required and memory |
283 | */ |
284 | AWS_COMMON_API |
285 | int aws_hash_table_put(struct aws_hash_table *map, const void *key, void *value, int *was_created); |
286 | |
287 | /** |
288 | * Removes element at key. Always returns AWS_OP_SUCCESS. |
289 | * |
290 | * If pValue is non-NULL, the existing value (if any) is moved into |
291 | * (*value) before removing from the table, and destroy_fn is _not_ |
292 | * invoked. If pValue is NULL, then (if the element existed) destroy_fn |
293 | * will be invoked on the element being removed. |
294 | * |
295 | * If was_present is non-NULL, it is set to 0 if the element was |
296 | * not present, or 1 if it was present (and is now removed). |
297 | */ |
298 | AWS_COMMON_API |
299 | int aws_hash_table_remove( |
300 | struct aws_hash_table *map, |
301 | const void *key, |
302 | struct aws_hash_element *p_value, |
303 | int *was_present); |
304 | |
305 | /** |
306 | * Removes element already known (typically by find()). |
307 | * |
308 | * p_value should point to a valid element returned by create() or find(). |
309 | * |
310 | * NOTE: DO NOT call this method from inside of a aws_hash_table_foreach callback, return |
311 | * AWS_COMMON_HASH_TABLE_ITER_DELETE instead. |
312 | */ |
313 | AWS_COMMON_API |
314 | int aws_hash_table_remove_element(struct aws_hash_table *map, struct aws_hash_element *p_value); |
315 | |
316 | /** |
317 | * Iterates through every element in the map and invokes the callback on |
318 | * that item. Iteration is performed in an arbitrary, implementation-defined |
319 | * order, and is not guaranteed to be consistent across invocations. |
320 | * |
321 | * The callback may change the value associated with the key by overwriting |
322 | * the value pointed-to by value. In this case, the on_element_removed |
323 | * callback will not be invoked, unless the callback invokes |
324 | * AWS_COMMON_HASH_TABLE_ITER_DELETE (in which case the on_element_removed |
325 | * is given the updated value). |
326 | * |
327 | * The callback must return a bitmask of zero or more of the following values |
328 | * ORed together: |
329 | * |
330 | * # AWS_COMMON_HASH_TABLE_ITER_CONTINUE - Continues iteration to the next |
331 | * element (if not set, iteration stops) |
332 | * # AWS_COMMON_HASH_TABLE_ITER_DELETE - Deletes the current value and |
333 | * continues iteration. destroy_fn will NOT be invoked. |
334 | * |
335 | * Invoking any method which may change the contents of the hashtable |
336 | * during iteration results in undefined behavior. However, you may safely |
337 | * invoke non-mutating operations during an iteration. |
338 | * |
339 | * This operation is mutating only if AWS_COMMON_HASH_TABLE_ITER_DELETE |
340 | * is returned at some point during iteration. Otherwise, it is non-mutating |
341 | * and is safe to invoke in parallel with other non-mutating operations. |
342 | */ |
343 | |
344 | AWS_COMMON_API |
345 | int aws_hash_table_foreach( |
346 | struct aws_hash_table *map, |
347 | int (*callback)(void *context, struct aws_hash_element *p_element), |
348 | void *context); |
349 | |
350 | /** |
351 | * Compares two hash tables for equality. Both hash tables must have equivalent |
352 | * key comparators; values will be compared using the comparator passed into this |
353 | * function. The key hash function does not need to be equivalent between the |
354 | * two hash tables. |
355 | */ |
356 | AWS_COMMON_API |
357 | bool aws_hash_table_eq( |
358 | const struct aws_hash_table *a, |
359 | const struct aws_hash_table *b, |
360 | aws_hash_callback_eq_fn *value_eq); |
361 | |
362 | /** |
363 | * Removes every element from the hash map. destroy_fn will be called for |
364 | * each element. |
365 | */ |
366 | AWS_COMMON_API |
367 | void aws_hash_table_clear(struct aws_hash_table *map); |
368 | |
369 | /** |
370 | * Convenience hash function for NULL-terminated C-strings |
371 | */ |
372 | AWS_COMMON_API |
373 | uint64_t aws_hash_c_string(const void *item); |
374 | |
375 | /** |
376 | * Convenience hash function for struct aws_strings. |
377 | * Hash is same as used on the string bytes by aws_hash_c_string. |
378 | */ |
379 | AWS_COMMON_API |
380 | uint64_t aws_hash_string(const void *item); |
381 | |
382 | /** |
383 | * Convenience hash function for struct aws_byte_cursor. |
384 | * Hash is same as used on the string bytes by aws_hash_c_string. |
385 | */ |
386 | AWS_COMMON_API |
387 | uint64_t aws_hash_byte_cursor_ptr(const void *item); |
388 | |
389 | /** |
390 | * Convenience hash function which hashes the pointer value directly, |
391 | * without dereferencing. This can be used in cases where pointer identity |
392 | * is desired, or where a uintptr_t is encoded into a const void *. |
393 | */ |
394 | AWS_COMMON_API |
395 | uint64_t aws_hash_ptr(const void *item); |
396 | |
397 | /** |
398 | * Convenience eq callback for NULL-terminated C-strings |
399 | */ |
400 | AWS_COMMON_API |
401 | bool aws_hash_callback_c_str_eq(const void *a, const void *b); |
402 | |
403 | /** |
404 | * Convenience eq callback for AWS strings |
405 | */ |
406 | AWS_COMMON_API |
407 | bool aws_hash_callback_string_eq(const void *a, const void *b); |
408 | |
409 | /** |
410 | * Convenience destroy callback for AWS strings |
411 | */ |
412 | AWS_COMMON_API |
413 | void aws_hash_callback_string_destroy(void *a); |
414 | |
415 | /** |
416 | * Equality function which compares pointer equality. |
417 | */ |
418 | AWS_COMMON_API |
419 | bool aws_ptr_eq(const void *a, const void *b); |
420 | |
421 | /** |
422 | * Best-effort check of hash_table_state data-structure invariants |
423 | */ |
424 | AWS_COMMON_API |
425 | bool aws_hash_table_is_valid(const struct aws_hash_table *map); |
426 | |
427 | /** |
428 | * Given a pointer to a hash_iter, checks that it is well-formed, with all data-structure invariants. |
429 | */ |
430 | AWS_COMMON_API |
431 | bool aws_hash_iter_is_valid(const struct aws_hash_iter *iter); |
432 | |
433 | AWS_EXTERN_C_END |
434 | |
435 | #endif /* AWS_COMMON_HASH_TABLE_H */ |
436 | |