| 1 | #ifndef AWS_COMMON_HASH_TABLE_H |
| 2 | #define AWS_COMMON_HASH_TABLE_H |
| 3 | |
| 4 | /* |
| 5 | * Copyright 2010-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 6 | * |
| 7 | * Licensed under the Apache License, Version 2.0 (the "License"). |
| 8 | * You may not use this file except in compliance with the License. |
| 9 | * A copy of the License is located at |
| 10 | * |
| 11 | * http://aws.amazon.com/apache2.0 |
| 12 | * |
| 13 | * or in the "license" file accompanying this file. This file is distributed |
| 14 | * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either |
| 15 | * express or implied. See the License for the specific language governing |
| 16 | * permissions and limitations under the License. |
| 17 | */ |
| 18 | |
| 19 | #include <aws/common/common.h> |
| 20 | |
| 21 | #include <stddef.h> |
| 22 | |
| 23 | #define AWS_COMMON_HASH_TABLE_ITER_CONTINUE (1 << 0) |
| 24 | #define AWS_COMMON_HASH_TABLE_ITER_DELETE (1 << 1) |
| 25 | |
| 26 | /** |
| 27 | * Hash table data structure. This module provides an automatically resizing |
| 28 | * hash table implementation for general purpose use. The hash table stores a |
| 29 | * mapping between void * keys and values; it is expected that in most cases, |
| 30 | * these will point to a structure elsewhere in the heap, instead of inlining a |
| 31 | * key or value into the hash table element itself. |
| 32 | * |
| 33 | * Currently, this hash table implements a variant of robin hood hashing, but |
| 34 | * we do not guarantee that this won't change in the future. |
| 35 | * |
| 36 | * Associated with each hash function are four callbacks: |
| 37 | * |
| 38 | * hash_fn - A hash function from the keys to a uint64_t. It is critical that |
| 39 | * the hash function for a key does not change while the key is in the hash |
| 40 | * table; violating this results in undefined behavior. Collisions are |
| 41 | * tolerated, though naturally with reduced performance. |
| 42 | * |
| 43 | * equals_fn - An equality comparison function. This function must be |
| 44 | * reflexive and consistent with hash_fn. |
| 45 | * |
| 46 | * destroy_key_fn, destroy_value_fn - Optional callbacks invoked when the |
| 47 | * table is cleared or cleaned up and at the caller's option when an element |
| 48 | * is removed from the table. Either or both may be set to NULL, which |
| 49 | * has the same effect as a no-op destroy function. |
| 50 | * |
| 51 | * This datastructure can be safely moved between threads, subject to the |
| 52 | * requirements of the underlying allocator. It is also safe to invoke |
| 53 | * non-mutating operations on the hash table from multiple threads. A suitable |
| 54 | * memory barrier must be used when transitioning from single-threaded mutating |
| 55 | * usage to multithreaded usage. |
| 56 | */ |
| 57 | struct hash_table_state; /* Opaque pointer */ |
| 58 | struct aws_hash_table { |
| 59 | struct hash_table_state *p_impl; |
| 60 | }; |
| 61 | |
| 62 | /** |
| 63 | * Represents an element in the hash table. Various operations on the hash |
| 64 | * table may provide pointers to elements stored within the hash table; |
| 65 | * generally, calling code may alter value, but must not alter key (or any |
| 66 | * information used to compute key's hash code). |
| 67 | * |
| 68 | * Pointers to elements within the hash are invalidated whenever an operation |
| 69 | * which may change the number of elements in the hash is invoked (i.e. put, |
| 70 | * delete, clear, and clean_up), regardless of whether the number of elements |
| 71 | * actually changes. |
| 72 | */ |
| 73 | struct aws_hash_element { |
| 74 | const void *key; |
| 75 | void *value; |
| 76 | }; |
| 77 | |
| 78 | enum aws_hash_iter_status { |
| 79 | AWS_HASH_ITER_STATUS_DONE, |
| 80 | AWS_HASH_ITER_STATUS_DELETE_CALLED, |
| 81 | AWS_HASH_ITER_STATUS_READY_FOR_USE, |
| 82 | }; |
| 83 | |
| 84 | struct aws_hash_iter { |
| 85 | const struct aws_hash_table *map; |
| 86 | struct aws_hash_element element; |
| 87 | size_t slot; |
| 88 | size_t limit; |
| 89 | enum aws_hash_iter_status status; |
| 90 | /* |
| 91 | * Reserving extra fields for binary compatibility with future expansion of |
| 92 | * iterator in case hash table implementation changes. |
| 93 | */ |
| 94 | int unused_0; |
| 95 | void *unused_1; |
| 96 | void *unused_2; |
| 97 | }; |
| 98 | |
| 99 | /** |
| 100 | * Prototype for a key hashing function pointer. |
| 101 | */ |
| 102 | typedef uint64_t(aws_hash_fn)(const void *key); |
| 103 | |
| 104 | /** |
| 105 | * Prototype for a hash table equality check function pointer. |
| 106 | * |
| 107 | * This type is usually used for a function that compares two hash table |
| 108 | * keys, but note that the same type is used for a function that compares |
| 109 | * two hash table values in aws_hash_table_eq. |
| 110 | * |
| 111 | * Equality functions used in a hash table must be reflexive (i.e., a == b if |
| 112 | * and only if b == a), and must be consistent with the hash function in use. |
| 113 | */ |
| 114 | typedef bool(aws_hash_callback_eq_fn)(const void *a, const void *b); |
| 115 | |
| 116 | /** |
| 117 | * Prototype for a hash table key or value destructor function pointer. |
| 118 | * |
| 119 | * This function is used to destroy elements in the hash table when the |
| 120 | * table is cleared or cleaned up. |
| 121 | * |
| 122 | * Note that functions which remove individual elements from the hash |
| 123 | * table provide options of whether or not to invoke the destructors |
| 124 | * on the key and value of a removed element. |
| 125 | */ |
| 126 | typedef void(aws_hash_callback_destroy_fn)(void *key_or_value); |
| 127 | |
| 128 | AWS_EXTERN_C_BEGIN |
| 129 | |
| 130 | /** |
| 131 | * Initializes a hash map with initial capacity for 'size' elements |
| 132 | * without resizing. Uses hash_fn to compute the hash of each element. |
| 133 | * equals_fn to compute equality of two keys. Whenever an element is |
| 134 | * removed without being returned, destroy_key_fn is run on the pointer |
| 135 | * to the key and destroy_value_fn is run on the pointer to the value. |
| 136 | * Either or both may be NULL if a callback is not desired in this case. |
| 137 | */ |
| 138 | AWS_COMMON_API |
| 139 | int aws_hash_table_init( |
| 140 | struct aws_hash_table *map, |
| 141 | struct aws_allocator *alloc, |
| 142 | size_t size, |
| 143 | aws_hash_fn *hash_fn, |
| 144 | aws_hash_callback_eq_fn *equals_fn, |
| 145 | aws_hash_callback_destroy_fn *destroy_key_fn, |
| 146 | aws_hash_callback_destroy_fn *destroy_value_fn); |
| 147 | |
| 148 | /** |
| 149 | * Deletes every element from map and frees all associated memory. |
| 150 | * destroy_fn will be called for each element. aws_hash_table_init |
| 151 | * must be called before reusing the hash table. |
| 152 | * |
| 153 | * This method is idempotent. |
| 154 | */ |
| 155 | AWS_COMMON_API |
| 156 | void aws_hash_table_clean_up(struct aws_hash_table *map); |
| 157 | |
| 158 | /** |
| 159 | * Safely swaps two hash tables. Note that we swap the entirety of the hash |
| 160 | * table, including which allocator is associated. |
| 161 | * |
| 162 | * Neither hash table is required to be initialized; if one or both is |
| 163 | * uninitialized, then the uninitialized state is also swapped. |
| 164 | */ |
| 165 | AWS_COMMON_API |
| 166 | void aws_hash_table_swap(struct aws_hash_table *AWS_RESTRICT a, struct aws_hash_table *AWS_RESTRICT b); |
| 167 | |
| 168 | /** |
| 169 | * Moves the hash table in 'from' to 'to'. After this move, 'from' will |
| 170 | * be identical to the state of the original 'to' hash table, and 'to' |
| 171 | * will be in the same state as if it had been passed to aws_hash_table_clean_up |
| 172 | * (that is, it will have no memory allocated, and it will be safe to |
| 173 | * either discard it or call aws_hash_table_clean_up again). |
| 174 | * |
| 175 | * Note that 'to' will not be cleaned up. You should make sure that 'to' |
| 176 | * is either uninitialized or cleaned up before moving a hashtable into |
| 177 | * it. |
| 178 | */ |
| 179 | AWS_COMMON_API |
| 180 | void aws_hash_table_move(struct aws_hash_table *AWS_RESTRICT to, struct aws_hash_table *AWS_RESTRICT from); |
| 181 | |
| 182 | /** |
| 183 | * Returns the current number of entries in the table. |
| 184 | */ |
| 185 | AWS_COMMON_API |
| 186 | size_t aws_hash_table_get_entry_count(const struct aws_hash_table *map); |
| 187 | |
| 188 | /** |
| 189 | * Returns an iterator to be used for iterating through a hash table. |
| 190 | * Iterator will already point to the first element of the table it finds, |
| 191 | * which can be accessed as iter.element. |
| 192 | * |
| 193 | * This function cannot fail, but if there are no elements in the table, |
| 194 | * the returned iterator will return true for aws_hash_iter_done(&iter). |
| 195 | */ |
| 196 | AWS_COMMON_API |
| 197 | struct aws_hash_iter aws_hash_iter_begin(const struct aws_hash_table *map); |
| 198 | |
| 199 | /** |
| 200 | * Returns true if iterator is done iterating through table, false otherwise. |
| 201 | * If this is true, the iterator will not include an element of the table. |
| 202 | */ |
| 203 | AWS_COMMON_API |
| 204 | bool aws_hash_iter_done(const struct aws_hash_iter *iter); |
| 205 | |
| 206 | /** |
| 207 | * Updates iterator so that it points to next element of hash table. |
| 208 | * |
| 209 | * This and the two previous functions are designed to be used together with |
| 210 | * the following idiom: |
| 211 | * |
| 212 | * for (struct aws_hash_iter iter = aws_hash_iter_begin(&map); |
| 213 | * !aws_hash_iter_done(&iter); aws_hash_iter_next(&iter)) { |
| 214 | * const key_type key = *(const key_type *)iter.element.key; |
| 215 | * value_type value = *(value_type *)iter.element.value; |
| 216 | * // etc. |
| 217 | * } |
| 218 | * |
| 219 | * Note that calling this on an iter which is "done" is idempotent: |
| 220 | * i.e. it will return another iter which is "done". |
| 221 | */ |
| 222 | AWS_COMMON_API |
| 223 | void aws_hash_iter_next(struct aws_hash_iter *iter); |
| 224 | |
| 225 | /** |
| 226 | * Deletes the element currently pointed-to by the hash iterator. |
| 227 | * After calling this method, the element member of the iterator |
| 228 | * should not be accessed until the next call to aws_hash_iter_next. |
| 229 | * |
| 230 | * @param destroy_contents If true, the destructors for the key and value |
| 231 | * will be called. |
| 232 | */ |
| 233 | AWS_COMMON_API |
| 234 | void aws_hash_iter_delete(struct aws_hash_iter *iter, bool destroy_contents); |
| 235 | |
| 236 | /** |
| 237 | * Attempts to locate an element at key. If the element is found, a |
| 238 | * pointer to the value is placed in *p_elem; if it is not found, |
| 239 | * *pElem is set to NULL. Either way, AWS_OP_SUCCESS is returned. |
| 240 | * |
| 241 | * This method does not change the state of the hash table. Therefore, it |
| 242 | * is safe to call _find from multiple threads on the same hash table, |
| 243 | * provided no mutating operations happen in parallel. |
| 244 | * |
| 245 | * Calling code may update the value in the hash table by modifying **pElem |
| 246 | * after a successful find. However, this pointer is not guaranteed to |
| 247 | * remain usable after a subsequent call to _put, _delete, _clear, or |
| 248 | * _clean_up. |
| 249 | */ |
| 250 | |
| 251 | AWS_COMMON_API |
| 252 | int aws_hash_table_find(const struct aws_hash_table *map, const void *key, struct aws_hash_element **p_elem); |
| 253 | |
| 254 | /** |
| 255 | * Attempts to locate an element at key. If no such element was found, |
| 256 | * creates a new element, with value initialized to NULL. In either case, a |
| 257 | * pointer to the element is placed in *p_elem. |
| 258 | * |
| 259 | * If was_created is non-NULL, *was_created is set to 0 if an existing |
| 260 | * element was found, or 1 is a new element was created. |
| 261 | * |
| 262 | * Returns AWS_OP_SUCCESS if an item was found or created. |
| 263 | * Raises AWS_ERROR_OOM if hash table expansion was required and memory |
| 264 | * allocation failed. |
| 265 | */ |
| 266 | AWS_COMMON_API |
| 267 | int aws_hash_table_create( |
| 268 | struct aws_hash_table *map, |
| 269 | const void *key, |
| 270 | struct aws_hash_element **p_elem, |
| 271 | int *was_created); |
| 272 | |
| 273 | /** |
| 274 | * Inserts a new element at key, with the given value. If another element |
| 275 | * exists at that key, the old element will be overwritten; both old key and |
| 276 | * value objects will be destroyed. |
| 277 | * |
| 278 | * If was_created is non-NULL, *was_created is set to 0 if an existing |
| 279 | * element was found, or 1 is a new element was created. |
| 280 | * |
| 281 | * Returns AWS_OP_SUCCESS if an item was found or created. |
| 282 | * Raises AWS_ERROR_OOM if hash table expansion was required and memory |
| 283 | */ |
| 284 | AWS_COMMON_API |
| 285 | int aws_hash_table_put(struct aws_hash_table *map, const void *key, void *value, int *was_created); |
| 286 | |
| 287 | /** |
| 288 | * Removes element at key. Always returns AWS_OP_SUCCESS. |
| 289 | * |
| 290 | * If pValue is non-NULL, the existing value (if any) is moved into |
| 291 | * (*value) before removing from the table, and destroy_fn is _not_ |
| 292 | * invoked. If pValue is NULL, then (if the element existed) destroy_fn |
| 293 | * will be invoked on the element being removed. |
| 294 | * |
| 295 | * If was_present is non-NULL, it is set to 0 if the element was |
| 296 | * not present, or 1 if it was present (and is now removed). |
| 297 | */ |
| 298 | AWS_COMMON_API |
| 299 | int aws_hash_table_remove( |
| 300 | struct aws_hash_table *map, |
| 301 | const void *key, |
| 302 | struct aws_hash_element *p_value, |
| 303 | int *was_present); |
| 304 | |
| 305 | /** |
| 306 | * Removes element already known (typically by find()). |
| 307 | * |
| 308 | * p_value should point to a valid element returned by create() or find(). |
| 309 | * |
| 310 | * NOTE: DO NOT call this method from inside of a aws_hash_table_foreach callback, return |
| 311 | * AWS_COMMON_HASH_TABLE_ITER_DELETE instead. |
| 312 | */ |
| 313 | AWS_COMMON_API |
| 314 | int aws_hash_table_remove_element(struct aws_hash_table *map, struct aws_hash_element *p_value); |
| 315 | |
| 316 | /** |
| 317 | * Iterates through every element in the map and invokes the callback on |
| 318 | * that item. Iteration is performed in an arbitrary, implementation-defined |
| 319 | * order, and is not guaranteed to be consistent across invocations. |
| 320 | * |
| 321 | * The callback may change the value associated with the key by overwriting |
| 322 | * the value pointed-to by value. In this case, the on_element_removed |
| 323 | * callback will not be invoked, unless the callback invokes |
| 324 | * AWS_COMMON_HASH_TABLE_ITER_DELETE (in which case the on_element_removed |
| 325 | * is given the updated value). |
| 326 | * |
| 327 | * The callback must return a bitmask of zero or more of the following values |
| 328 | * ORed together: |
| 329 | * |
| 330 | * # AWS_COMMON_HASH_TABLE_ITER_CONTINUE - Continues iteration to the next |
| 331 | * element (if not set, iteration stops) |
| 332 | * # AWS_COMMON_HASH_TABLE_ITER_DELETE - Deletes the current value and |
| 333 | * continues iteration. destroy_fn will NOT be invoked. |
| 334 | * |
| 335 | * Invoking any method which may change the contents of the hashtable |
| 336 | * during iteration results in undefined behavior. However, you may safely |
| 337 | * invoke non-mutating operations during an iteration. |
| 338 | * |
| 339 | * This operation is mutating only if AWS_COMMON_HASH_TABLE_ITER_DELETE |
| 340 | * is returned at some point during iteration. Otherwise, it is non-mutating |
| 341 | * and is safe to invoke in parallel with other non-mutating operations. |
| 342 | */ |
| 343 | |
| 344 | AWS_COMMON_API |
| 345 | int aws_hash_table_foreach( |
| 346 | struct aws_hash_table *map, |
| 347 | int (*callback)(void *context, struct aws_hash_element *p_element), |
| 348 | void *context); |
| 349 | |
| 350 | /** |
| 351 | * Compares two hash tables for equality. Both hash tables must have equivalent |
| 352 | * key comparators; values will be compared using the comparator passed into this |
| 353 | * function. The key hash function does not need to be equivalent between the |
| 354 | * two hash tables. |
| 355 | */ |
| 356 | AWS_COMMON_API |
| 357 | bool aws_hash_table_eq( |
| 358 | const struct aws_hash_table *a, |
| 359 | const struct aws_hash_table *b, |
| 360 | aws_hash_callback_eq_fn *value_eq); |
| 361 | |
| 362 | /** |
| 363 | * Removes every element from the hash map. destroy_fn will be called for |
| 364 | * each element. |
| 365 | */ |
| 366 | AWS_COMMON_API |
| 367 | void aws_hash_table_clear(struct aws_hash_table *map); |
| 368 | |
| 369 | /** |
| 370 | * Convenience hash function for NULL-terminated C-strings |
| 371 | */ |
| 372 | AWS_COMMON_API |
| 373 | uint64_t aws_hash_c_string(const void *item); |
| 374 | |
| 375 | /** |
| 376 | * Convenience hash function for struct aws_strings. |
| 377 | * Hash is same as used on the string bytes by aws_hash_c_string. |
| 378 | */ |
| 379 | AWS_COMMON_API |
| 380 | uint64_t aws_hash_string(const void *item); |
| 381 | |
| 382 | /** |
| 383 | * Convenience hash function for struct aws_byte_cursor. |
| 384 | * Hash is same as used on the string bytes by aws_hash_c_string. |
| 385 | */ |
| 386 | AWS_COMMON_API |
| 387 | uint64_t aws_hash_byte_cursor_ptr(const void *item); |
| 388 | |
| 389 | /** |
| 390 | * Convenience hash function which hashes the pointer value directly, |
| 391 | * without dereferencing. This can be used in cases where pointer identity |
| 392 | * is desired, or where a uintptr_t is encoded into a const void *. |
| 393 | */ |
| 394 | AWS_COMMON_API |
| 395 | uint64_t aws_hash_ptr(const void *item); |
| 396 | |
| 397 | /** |
| 398 | * Convenience eq callback for NULL-terminated C-strings |
| 399 | */ |
| 400 | AWS_COMMON_API |
| 401 | bool aws_hash_callback_c_str_eq(const void *a, const void *b); |
| 402 | |
| 403 | /** |
| 404 | * Convenience eq callback for AWS strings |
| 405 | */ |
| 406 | AWS_COMMON_API |
| 407 | bool aws_hash_callback_string_eq(const void *a, const void *b); |
| 408 | |
| 409 | /** |
| 410 | * Convenience destroy callback for AWS strings |
| 411 | */ |
| 412 | AWS_COMMON_API |
| 413 | void aws_hash_callback_string_destroy(void *a); |
| 414 | |
| 415 | /** |
| 416 | * Equality function which compares pointer equality. |
| 417 | */ |
| 418 | AWS_COMMON_API |
| 419 | bool aws_ptr_eq(const void *a, const void *b); |
| 420 | |
| 421 | /** |
| 422 | * Best-effort check of hash_table_state data-structure invariants |
| 423 | */ |
| 424 | AWS_COMMON_API |
| 425 | bool aws_hash_table_is_valid(const struct aws_hash_table *map); |
| 426 | |
| 427 | /** |
| 428 | * Given a pointer to a hash_iter, checks that it is well-formed, with all data-structure invariants. |
| 429 | */ |
| 430 | AWS_COMMON_API |
| 431 | bool aws_hash_iter_is_valid(const struct aws_hash_iter *iter); |
| 432 | |
| 433 | AWS_EXTERN_C_END |
| 434 | |
| 435 | #endif /* AWS_COMMON_HASH_TABLE_H */ |
| 436 | |