| 1 | /* |
| 2 | Simple DirectMedia Layer |
| 3 | Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org> |
| 4 | |
| 5 | This software is provided 'as-is', without any express or implied |
| 6 | warranty. In no event will the authors be held liable for any damages |
| 7 | arising from the use of this software. |
| 8 | |
| 9 | Permission is granted to anyone to use this software for any purpose, |
| 10 | including commercial applications, and to alter it and redistribute it |
| 11 | freely, subject to the following restrictions: |
| 12 | |
| 13 | 1. The origin of this software must not be misrepresented; you must not |
| 14 | claim that you wrote the original software. If you use this software |
| 15 | in a product, an acknowledgment in the product documentation would be |
| 16 | appreciated but is not required. |
| 17 | 2. Altered source versions must be plainly marked as such, and must not be |
| 18 | misrepresented as being the original software. |
| 19 | 3. This notice may not be removed or altered from any source distribution. |
| 20 | */ |
| 21 | #include "SDL_internal.h" |
| 22 | |
| 23 | typedef struct SDL_HashItem |
| 24 | { |
| 25 | // TODO: Splitting off values into a separate array might be more cache-friendly |
| 26 | const void *key; |
| 27 | const void *value; |
| 28 | Uint32 hash; |
| 29 | Uint32 probe_len : 31; |
| 30 | Uint32 live : 1; |
| 31 | } SDL_HashItem; |
| 32 | |
| 33 | // Must be a power of 2 >= sizeof(SDL_HashItem) |
| 34 | #define MAX_HASHITEM_SIZEOF 32u |
| 35 | SDL_COMPILE_TIME_ASSERT(sizeof_SDL_HashItem, sizeof(SDL_HashItem) <= MAX_HASHITEM_SIZEOF); |
| 36 | |
| 37 | // Anything larger than this will cause integer overflows |
| 38 | #define MAX_HASHTABLE_SIZE (0x80000000u / (MAX_HASHITEM_SIZEOF)) |
| 39 | |
| 40 | struct SDL_HashTable |
| 41 | { |
| 42 | SDL_RWLock *lock; // NULL if not created threadsafe |
| 43 | SDL_HashItem *table; |
| 44 | SDL_HashCallback hash; |
| 45 | SDL_HashKeyMatchCallback keymatch; |
| 46 | SDL_HashDestroyCallback destroy; |
| 47 | void *userdata; |
| 48 | Uint32 hash_mask; |
| 49 | Uint32 max_probe_len; |
| 50 | Uint32 num_occupied_slots; |
| 51 | }; |
| 52 | |
| 53 | |
| 54 | static Uint32 CalculateHashBucketsFromEstimate(int estimated_capacity) |
| 55 | { |
| 56 | if (estimated_capacity <= 0) { |
| 57 | return 4; // start small, grow as necessary. |
| 58 | } |
| 59 | |
| 60 | const Uint32 estimated32 = (Uint32) estimated_capacity; |
| 61 | Uint32 buckets = ((Uint32) 1) << SDL_MostSignificantBitIndex32(estimated32); |
| 62 | if (!SDL_HasExactlyOneBitSet32(estimated32)) { |
| 63 | buckets <<= 1; // need next power of two up to fit overflow capacity bits. |
| 64 | } |
| 65 | |
| 66 | return SDL_min(buckets, MAX_HASHTABLE_SIZE); |
| 67 | } |
| 68 | |
| 69 | SDL_HashTable *SDL_CreateHashTable(int estimated_capacity, bool threadsafe, SDL_HashCallback hash, |
| 70 | SDL_HashKeyMatchCallback keymatch, |
| 71 | SDL_HashDestroyCallback destroy, void *userdata) |
| 72 | { |
| 73 | const Uint32 num_buckets = CalculateHashBucketsFromEstimate(estimated_capacity); |
| 74 | SDL_HashTable *table = (SDL_HashTable *)SDL_calloc(1, sizeof(SDL_HashTable)); |
| 75 | if (!table) { |
| 76 | return NULL; |
| 77 | } |
| 78 | |
| 79 | if (threadsafe) { |
| 80 | table->lock = SDL_CreateRWLock(); |
| 81 | if (!table->lock) { |
| 82 | SDL_DestroyHashTable(table); |
| 83 | return NULL; |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | table->table = (SDL_HashItem *)SDL_calloc(num_buckets, sizeof(SDL_HashItem)); |
| 88 | if (!table->table) { |
| 89 | SDL_DestroyHashTable(table); |
| 90 | return NULL; |
| 91 | } |
| 92 | |
| 93 | table->hash_mask = num_buckets - 1; |
| 94 | table->userdata = userdata; |
| 95 | table->hash = hash; |
| 96 | table->keymatch = keymatch; |
| 97 | table->destroy = destroy; |
| 98 | return table; |
| 99 | } |
| 100 | |
| 101 | static SDL_INLINE Uint32 calc_hash(const SDL_HashTable *table, const void *key) |
| 102 | { |
| 103 | const Uint32 BitMixer = 0x9E3779B1u; |
| 104 | return table->hash(table->userdata, key) * BitMixer; |
| 105 | } |
| 106 | |
| 107 | static SDL_INLINE Uint32 get_probe_length(Uint32 zero_idx, Uint32 actual_idx, Uint32 num_buckets) |
| 108 | { |
| 109 | // returns the probe sequence length from zero_idx to actual_idx |
| 110 | if (actual_idx < zero_idx) { |
| 111 | return num_buckets - zero_idx + actual_idx; |
| 112 | } |
| 113 | |
| 114 | return actual_idx - zero_idx; |
| 115 | } |
| 116 | |
| 117 | static SDL_HashItem *find_item(const SDL_HashTable *ht, const void *key, Uint32 hash, Uint32 *i, Uint32 *probe_len) |
| 118 | { |
| 119 | Uint32 hash_mask = ht->hash_mask; |
| 120 | Uint32 max_probe_len = ht->max_probe_len; |
| 121 | |
| 122 | SDL_HashItem *table = ht->table; |
| 123 | |
| 124 | while (true) { |
| 125 | SDL_HashItem *item = table + *i; |
| 126 | Uint32 item_hash = item->hash; |
| 127 | |
| 128 | if (!item->live) { |
| 129 | return NULL; |
| 130 | } |
| 131 | |
| 132 | if (item_hash == hash && ht->keymatch(ht->userdata, item->key, key)) { |
| 133 | return item; |
| 134 | } |
| 135 | |
| 136 | Uint32 item_probe_len = item->probe_len; |
| 137 | SDL_assert(item_probe_len == get_probe_length(item_hash & hash_mask, (Uint32)(item - table), hash_mask + 1)); |
| 138 | |
| 139 | if (*probe_len > item_probe_len) { |
| 140 | return NULL; |
| 141 | } |
| 142 | |
| 143 | if (++*probe_len > max_probe_len) { |
| 144 | return NULL; |
| 145 | } |
| 146 | |
| 147 | *i = (*i + 1) & hash_mask; |
| 148 | } |
| 149 | } |
| 150 | |
| 151 | static SDL_HashItem *find_first_item(const SDL_HashTable *ht, const void *key, Uint32 hash) |
| 152 | { |
| 153 | Uint32 i = hash & ht->hash_mask; |
| 154 | Uint32 probe_len = 0; |
| 155 | return find_item(ht, key, hash, &i, &probe_len); |
| 156 | } |
| 157 | |
| 158 | static SDL_HashItem *insert_item(SDL_HashItem *item_to_insert, SDL_HashItem *table, Uint32 hash_mask, Uint32 *max_probe_len_ptr) |
| 159 | { |
| 160 | const Uint32 num_buckets = hash_mask + 1; |
| 161 | Uint32 idx = item_to_insert->hash & hash_mask; |
| 162 | SDL_HashItem *target = NULL; |
| 163 | SDL_HashItem temp_item; |
| 164 | |
| 165 | while (true) { |
| 166 | SDL_HashItem *candidate = table + idx; |
| 167 | |
| 168 | if (!candidate->live) { |
| 169 | // Found an empty slot. Put it here and we're done. |
| 170 | *candidate = *item_to_insert; |
| 171 | |
| 172 | if (target == NULL) { |
| 173 | target = candidate; |
| 174 | } |
| 175 | |
| 176 | const Uint32 probe_len = get_probe_length(candidate->hash & hash_mask, idx, num_buckets); |
| 177 | candidate->probe_len = probe_len; |
| 178 | |
| 179 | if (*max_probe_len_ptr < probe_len) { |
| 180 | *max_probe_len_ptr = probe_len; |
| 181 | } |
| 182 | |
| 183 | break; |
| 184 | } |
| 185 | |
| 186 | const Uint32 candidate_probe_len = candidate->probe_len; |
| 187 | SDL_assert(candidate_probe_len == get_probe_length(candidate->hash & hash_mask, idx, num_buckets)); |
| 188 | const Uint32 new_probe_len = get_probe_length(item_to_insert->hash & hash_mask, idx, num_buckets); |
| 189 | |
| 190 | if (candidate_probe_len < new_probe_len) { |
| 191 | // Robin Hood hashing: the item at idx has a better probe length than our item would at this position. |
| 192 | // Evict it and put our item in its place, then continue looking for a new spot for the displaced item. |
| 193 | // This algorithm significantly reduces clustering in the table, making lookups take very few probes. |
| 194 | |
| 195 | temp_item = *candidate; |
| 196 | *candidate = *item_to_insert; |
| 197 | |
| 198 | if (target == NULL) { |
| 199 | target = candidate; |
| 200 | } |
| 201 | |
| 202 | *item_to_insert = temp_item; |
| 203 | |
| 204 | SDL_assert(new_probe_len == get_probe_length(candidate->hash & hash_mask, idx, num_buckets)); |
| 205 | candidate->probe_len = new_probe_len; |
| 206 | |
| 207 | if (*max_probe_len_ptr < new_probe_len) { |
| 208 | *max_probe_len_ptr = new_probe_len; |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | idx = (idx + 1) & hash_mask; |
| 213 | } |
| 214 | |
| 215 | return target; |
| 216 | } |
| 217 | |
| 218 | static void delete_item(SDL_HashTable *ht, SDL_HashItem *item) |
| 219 | { |
| 220 | const Uint32 hash_mask = ht->hash_mask; |
| 221 | SDL_HashItem *table = ht->table; |
| 222 | |
| 223 | if (ht->destroy) { |
| 224 | ht->destroy(ht->userdata, item->key, item->value); |
| 225 | } |
| 226 | |
| 227 | SDL_assert(ht->num_occupied_slots > 0); |
| 228 | ht->num_occupied_slots--; |
| 229 | |
| 230 | Uint32 idx = (Uint32)(item - ht->table); |
| 231 | |
| 232 | while (true) { |
| 233 | idx = (idx + 1) & hash_mask; |
| 234 | SDL_HashItem *next_item = table + idx; |
| 235 | |
| 236 | if (next_item->probe_len < 1) { |
| 237 | SDL_zerop(item); |
| 238 | return; |
| 239 | } |
| 240 | |
| 241 | *item = *next_item; |
| 242 | item->probe_len -= 1; |
| 243 | SDL_assert(item->probe_len < ht->max_probe_len); |
| 244 | item = next_item; |
| 245 | } |
| 246 | } |
| 247 | |
| 248 | static bool resize(SDL_HashTable *ht, Uint32 new_size) |
| 249 | { |
| 250 | const Uint32 new_hash_mask = new_size - 1; |
| 251 | SDL_HashItem *new_table = SDL_calloc(new_size, sizeof(*new_table)); |
| 252 | |
| 253 | if (!new_table) { |
| 254 | return false; |
| 255 | } |
| 256 | |
| 257 | SDL_HashItem *old_table = ht->table; |
| 258 | const Uint32 old_size = ht->hash_mask + 1; |
| 259 | |
| 260 | ht->max_probe_len = 0; |
| 261 | ht->hash_mask = new_hash_mask; |
| 262 | ht->table = new_table; |
| 263 | |
| 264 | for (Uint32 i = 0; i < old_size; ++i) { |
| 265 | SDL_HashItem *item = old_table + i; |
| 266 | if (item->live) { |
| 267 | insert_item(item, new_table, new_hash_mask, &ht->max_probe_len); |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | SDL_free(old_table); |
| 272 | return true; |
| 273 | } |
| 274 | |
| 275 | static bool maybe_resize(SDL_HashTable *ht) |
| 276 | { |
| 277 | const Uint32 capacity = ht->hash_mask + 1; |
| 278 | |
| 279 | if (capacity >= MAX_HASHTABLE_SIZE) { |
| 280 | return false; |
| 281 | } |
| 282 | |
| 283 | const Uint32 max_load_factor = 217; // range: 0-255; 217 is roughly 85% |
| 284 | const Uint32 resize_threshold = (Uint32)((max_load_factor * (Uint64)capacity) >> 8); |
| 285 | |
| 286 | if (ht->num_occupied_slots > resize_threshold) { |
| 287 | return resize(ht, capacity * 2); |
| 288 | } |
| 289 | |
| 290 | return true; |
| 291 | } |
| 292 | |
| 293 | bool SDL_InsertIntoHashTable(SDL_HashTable *table, const void *key, const void *value, bool replace) |
| 294 | { |
| 295 | if (!table) { |
| 296 | return SDL_InvalidParamError("table" ); |
| 297 | } |
| 298 | |
| 299 | bool result = false; |
| 300 | |
| 301 | SDL_LockRWLockForWriting(table->lock); |
| 302 | |
| 303 | const Uint32 hash = calc_hash(table, key); |
| 304 | SDL_HashItem *item = find_first_item(table, key, hash); |
| 305 | bool do_insert = true; |
| 306 | |
| 307 | if (item) { |
| 308 | if (replace) { |
| 309 | delete_item(table, item); |
| 310 | } else { |
| 311 | SDL_SetError("key already exists and replace is disabled" ); |
| 312 | do_insert = false; |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | if (do_insert) { |
| 317 | SDL_HashItem new_item; |
| 318 | new_item.key = key; |
| 319 | new_item.value = value; |
| 320 | new_item.hash = hash; |
| 321 | new_item.live = true; |
| 322 | new_item.probe_len = 0; |
| 323 | |
| 324 | table->num_occupied_slots++; |
| 325 | |
| 326 | if (!maybe_resize(table)) { |
| 327 | table->num_occupied_slots--; |
| 328 | } else { |
| 329 | // This never returns NULL |
| 330 | insert_item(&new_item, table->table, table->hash_mask, &table->max_probe_len); |
| 331 | result = true; |
| 332 | } |
| 333 | } |
| 334 | |
| 335 | SDL_UnlockRWLock(table->lock); |
| 336 | return result; |
| 337 | } |
| 338 | |
| 339 | bool SDL_FindInHashTable(const SDL_HashTable *table, const void *key, const void **value) |
| 340 | { |
| 341 | if (!table) { |
| 342 | if (value) { |
| 343 | *value = NULL; |
| 344 | } |
| 345 | return SDL_InvalidParamError("table" ); |
| 346 | } |
| 347 | |
| 348 | SDL_LockRWLockForReading(table->lock); |
| 349 | |
| 350 | bool result = false; |
| 351 | const Uint32 hash = calc_hash(table, key); |
| 352 | SDL_HashItem *i = find_first_item(table, key, hash); |
| 353 | if (i) { |
| 354 | if (value) { |
| 355 | *value = i->value; |
| 356 | } |
| 357 | result = true; |
| 358 | } |
| 359 | |
| 360 | SDL_UnlockRWLock(table->lock); |
| 361 | |
| 362 | return result; |
| 363 | } |
| 364 | |
| 365 | bool SDL_RemoveFromHashTable(SDL_HashTable *table, const void *key) |
| 366 | { |
| 367 | if (!table) { |
| 368 | return SDL_InvalidParamError("table" ); |
| 369 | } |
| 370 | |
| 371 | SDL_LockRWLockForWriting(table->lock); |
| 372 | |
| 373 | bool result = false; |
| 374 | const Uint32 hash = calc_hash(table, key); |
| 375 | SDL_HashItem *item = find_first_item(table, key, hash); |
| 376 | if (item) { |
| 377 | delete_item(table, item); |
| 378 | result = true; |
| 379 | } |
| 380 | |
| 381 | SDL_UnlockRWLock(table->lock); |
| 382 | return result; |
| 383 | } |
| 384 | |
| 385 | bool SDL_IterateHashTable(const SDL_HashTable *table, SDL_HashTableIterateCallback callback, void *userdata) |
| 386 | { |
| 387 | if (!table) { |
| 388 | return SDL_InvalidParamError("table" ); |
| 389 | } else if (!callback) { |
| 390 | return SDL_InvalidParamError("callback" ); |
| 391 | } |
| 392 | |
| 393 | SDL_LockRWLockForReading(table->lock); |
| 394 | SDL_HashItem *end = table->table + (table->hash_mask + 1); |
| 395 | Uint32 num_iterated = 0; |
| 396 | |
| 397 | for (SDL_HashItem *item = table->table; item < end; item++) { |
| 398 | if (item->live) { |
| 399 | if (!callback(userdata, table, item->key, item->value)) { |
| 400 | break; // callback requested iteration stop. |
| 401 | } else if (++num_iterated >= table->num_occupied_slots) { |
| 402 | break; // we can drop out early because we've seen all the live items. |
| 403 | } |
| 404 | } |
| 405 | } |
| 406 | |
| 407 | SDL_UnlockRWLock(table->lock); |
| 408 | return true; |
| 409 | } |
| 410 | |
| 411 | bool SDL_HashTableEmpty(SDL_HashTable *table) |
| 412 | { |
| 413 | if (!table) { |
| 414 | return SDL_InvalidParamError("table" ); |
| 415 | } |
| 416 | |
| 417 | SDL_LockRWLockForReading(table->lock); |
| 418 | const bool retval = (table->num_occupied_slots == 0); |
| 419 | SDL_UnlockRWLock(table->lock); |
| 420 | return retval; |
| 421 | } |
| 422 | |
| 423 | |
| 424 | static void destroy_all(SDL_HashTable *table) |
| 425 | { |
| 426 | SDL_HashDestroyCallback destroy = table->destroy; |
| 427 | if (destroy) { |
| 428 | void *userdata = table->userdata; |
| 429 | SDL_HashItem *end = table->table + (table->hash_mask + 1); |
| 430 | for (SDL_HashItem *i = table->table; i < end; ++i) { |
| 431 | if (i->live) { |
| 432 | i->live = false; |
| 433 | destroy(userdata, i->key, i->value); |
| 434 | } |
| 435 | } |
| 436 | } |
| 437 | } |
| 438 | |
| 439 | void SDL_ClearHashTable(SDL_HashTable *table) |
| 440 | { |
| 441 | if (table) { |
| 442 | SDL_LockRWLockForWriting(table->lock); |
| 443 | { |
| 444 | destroy_all(table); |
| 445 | SDL_memset(table->table, 0, sizeof(*table->table) * (table->hash_mask + 1)); |
| 446 | table->num_occupied_slots = 0; |
| 447 | } |
| 448 | SDL_UnlockRWLock(table->lock); |
| 449 | } |
| 450 | } |
| 451 | |
| 452 | void SDL_DestroyHashTable(SDL_HashTable *table) |
| 453 | { |
| 454 | if (table) { |
| 455 | destroy_all(table); |
| 456 | if (table->lock) { |
| 457 | SDL_DestroyRWLock(table->lock); |
| 458 | } |
| 459 | SDL_free(table->table); |
| 460 | SDL_free(table); |
| 461 | } |
| 462 | } |
| 463 | |
| 464 | // this is djb's xor hashing function. |
| 465 | static SDL_INLINE Uint32 hash_string_djbxor(const char *str, size_t len) |
| 466 | { |
| 467 | Uint32 hash = 5381; |
| 468 | while (len--) { |
| 469 | hash = ((hash << 5) + hash) ^ *(str++); |
| 470 | } |
| 471 | return hash; |
| 472 | } |
| 473 | |
| 474 | Uint32 SDL_HashPointer(void *unused, const void *key) |
| 475 | { |
| 476 | (void)unused; |
| 477 | return SDL_murmur3_32(&key, sizeof(key), 0); |
| 478 | } |
| 479 | |
| 480 | bool SDL_KeyMatchPointer(void *unused, const void *a, const void *b) |
| 481 | { |
| 482 | (void)unused; |
| 483 | return (a == b); |
| 484 | } |
| 485 | |
| 486 | Uint32 SDL_HashString(void *unused, const void *key) |
| 487 | { |
| 488 | (void)unused; |
| 489 | const char *str = (const char *)key; |
| 490 | return hash_string_djbxor(str, SDL_strlen(str)); |
| 491 | } |
| 492 | |
| 493 | bool SDL_KeyMatchString(void *unused, const void *a, const void *b) |
| 494 | { |
| 495 | const char *a_string = (const char *)a; |
| 496 | const char *b_string = (const char *)b; |
| 497 | |
| 498 | (void)unused; |
| 499 | if (a == b) { |
| 500 | return true; // same pointer, must match. |
| 501 | } else if (!a || !b) { |
| 502 | return false; // one pointer is NULL (and first test shows they aren't the same pointer), must not match. |
| 503 | } else if (a_string[0] != b_string[0]) { |
| 504 | return false; // we know they don't match |
| 505 | } |
| 506 | return (SDL_strcmp(a_string, b_string) == 0); // Check against actual string contents. |
| 507 | } |
| 508 | |
| 509 | // We assume we can fit the ID in the key directly |
| 510 | SDL_COMPILE_TIME_ASSERT(SDL_HashID_KeySize, sizeof(Uint32) <= sizeof(const void *)); |
| 511 | |
| 512 | Uint32 SDL_HashID(void *unused, const void *key) |
| 513 | { |
| 514 | (void)unused; |
| 515 | return (Uint32)(uintptr_t)key; |
| 516 | } |
| 517 | |
| 518 | bool SDL_KeyMatchID(void *unused, const void *a, const void *b) |
| 519 | { |
| 520 | (void)unused; |
| 521 | return (a == b); |
| 522 | } |
| 523 | |
| 524 | void SDL_DestroyHashKeyAndValue(void *unused, const void *key, const void *value) |
| 525 | { |
| 526 | (void)unused; |
| 527 | SDL_free((void *)key); |
| 528 | SDL_free((void *)value); |
| 529 | } |
| 530 | |
| 531 | void SDL_DestroyHashKey(void *unused, const void *key, const void *value) |
| 532 | { |
| 533 | (void)value; |
| 534 | (void)unused; |
| 535 | SDL_free((void *)key); |
| 536 | } |
| 537 | |
| 538 | void SDL_DestroyHashValue(void *unused, const void *key, const void *value) |
| 539 | { |
| 540 | (void)key; |
| 541 | (void)unused; |
| 542 | SDL_free((void *)value); |
| 543 | } |
| 544 | |