1 | /* |
2 | Simple DirectMedia Layer |
3 | Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org> |
4 | |
5 | This software is provided 'as-is', without any express or implied |
6 | warranty. In no event will the authors be held liable for any damages |
7 | arising from the use of this software. |
8 | |
9 | Permission is granted to anyone to use this software for any purpose, |
10 | including commercial applications, and to alter it and redistribute it |
11 | freely, subject to the following restrictions: |
12 | |
13 | 1. The origin of this software must not be misrepresented; you must not |
14 | claim that you wrote the original software. If you use this software |
15 | in a product, an acknowledgment in the product documentation would be |
16 | appreciated but is not required. |
17 | 2. Altered source versions must be plainly marked as such, and must not be |
18 | misrepresented as being the original software. |
19 | 3. This notice may not be removed or altered from any source distribution. |
20 | */ |
21 | #include "SDL_internal.h" |
22 | |
23 | typedef struct SDL_HashItem |
24 | { |
25 | // TODO: Splitting off values into a separate array might be more cache-friendly |
26 | const void *key; |
27 | const void *value; |
28 | Uint32 hash; |
29 | Uint32 probe_len : 31; |
30 | Uint32 live : 1; |
31 | } SDL_HashItem; |
32 | |
33 | // Must be a power of 2 >= sizeof(SDL_HashItem) |
34 | #define MAX_HASHITEM_SIZEOF 32u |
35 | SDL_COMPILE_TIME_ASSERT(sizeof_SDL_HashItem, sizeof(SDL_HashItem) <= MAX_HASHITEM_SIZEOF); |
36 | |
37 | // Anything larger than this will cause integer overflows |
38 | #define MAX_HASHTABLE_SIZE (0x80000000u / (MAX_HASHITEM_SIZEOF)) |
39 | |
40 | struct SDL_HashTable |
41 | { |
42 | SDL_RWLock *lock; // NULL if not created threadsafe |
43 | SDL_HashItem *table; |
44 | SDL_HashCallback hash; |
45 | SDL_HashKeyMatchCallback keymatch; |
46 | SDL_HashDestroyCallback destroy; |
47 | void *userdata; |
48 | Uint32 hash_mask; |
49 | Uint32 max_probe_len; |
50 | Uint32 num_occupied_slots; |
51 | }; |
52 | |
53 | |
54 | static Uint32 CalculateHashBucketsFromEstimate(int estimated_capacity) |
55 | { |
56 | if (estimated_capacity <= 0) { |
57 | return 4; // start small, grow as necessary. |
58 | } |
59 | |
60 | const Uint32 estimated32 = (Uint32) estimated_capacity; |
61 | Uint32 buckets = ((Uint32) 1) << SDL_MostSignificantBitIndex32(estimated32); |
62 | if (!SDL_HasExactlyOneBitSet32(estimated32)) { |
63 | buckets <<= 1; // need next power of two up to fit overflow capacity bits. |
64 | } |
65 | |
66 | return SDL_min(buckets, MAX_HASHTABLE_SIZE); |
67 | } |
68 | |
69 | SDL_HashTable *SDL_CreateHashTable(int estimated_capacity, bool threadsafe, SDL_HashCallback hash, |
70 | SDL_HashKeyMatchCallback keymatch, |
71 | SDL_HashDestroyCallback destroy, void *userdata) |
72 | { |
73 | const Uint32 num_buckets = CalculateHashBucketsFromEstimate(estimated_capacity); |
74 | SDL_HashTable *table = (SDL_HashTable *)SDL_calloc(1, sizeof(SDL_HashTable)); |
75 | if (!table) { |
76 | return NULL; |
77 | } |
78 | |
79 | if (threadsafe) { |
80 | table->lock = SDL_CreateRWLock(); |
81 | if (!table->lock) { |
82 | SDL_DestroyHashTable(table); |
83 | return NULL; |
84 | } |
85 | } |
86 | |
87 | table->table = (SDL_HashItem *)SDL_calloc(num_buckets, sizeof(SDL_HashItem)); |
88 | if (!table->table) { |
89 | SDL_DestroyHashTable(table); |
90 | return NULL; |
91 | } |
92 | |
93 | table->hash_mask = num_buckets - 1; |
94 | table->userdata = userdata; |
95 | table->hash = hash; |
96 | table->keymatch = keymatch; |
97 | table->destroy = destroy; |
98 | return table; |
99 | } |
100 | |
101 | static SDL_INLINE Uint32 calc_hash(const SDL_HashTable *table, const void *key) |
102 | { |
103 | const Uint32 BitMixer = 0x9E3779B1u; |
104 | return table->hash(table->userdata, key) * BitMixer; |
105 | } |
106 | |
107 | static SDL_INLINE Uint32 get_probe_length(Uint32 zero_idx, Uint32 actual_idx, Uint32 num_buckets) |
108 | { |
109 | // returns the probe sequence length from zero_idx to actual_idx |
110 | if (actual_idx < zero_idx) { |
111 | return num_buckets - zero_idx + actual_idx; |
112 | } |
113 | |
114 | return actual_idx - zero_idx; |
115 | } |
116 | |
117 | static SDL_HashItem *find_item(const SDL_HashTable *ht, const void *key, Uint32 hash, Uint32 *i, Uint32 *probe_len) |
118 | { |
119 | Uint32 hash_mask = ht->hash_mask; |
120 | Uint32 max_probe_len = ht->max_probe_len; |
121 | |
122 | SDL_HashItem *table = ht->table; |
123 | |
124 | while (true) { |
125 | SDL_HashItem *item = table + *i; |
126 | Uint32 item_hash = item->hash; |
127 | |
128 | if (!item->live) { |
129 | return NULL; |
130 | } |
131 | |
132 | if (item_hash == hash && ht->keymatch(ht->userdata, item->key, key)) { |
133 | return item; |
134 | } |
135 | |
136 | Uint32 item_probe_len = item->probe_len; |
137 | SDL_assert(item_probe_len == get_probe_length(item_hash & hash_mask, (Uint32)(item - table), hash_mask + 1)); |
138 | |
139 | if (*probe_len > item_probe_len) { |
140 | return NULL; |
141 | } |
142 | |
143 | if (++*probe_len > max_probe_len) { |
144 | return NULL; |
145 | } |
146 | |
147 | *i = (*i + 1) & hash_mask; |
148 | } |
149 | } |
150 | |
151 | static SDL_HashItem *find_first_item(const SDL_HashTable *ht, const void *key, Uint32 hash) |
152 | { |
153 | Uint32 i = hash & ht->hash_mask; |
154 | Uint32 probe_len = 0; |
155 | return find_item(ht, key, hash, &i, &probe_len); |
156 | } |
157 | |
158 | static SDL_HashItem *insert_item(SDL_HashItem *item_to_insert, SDL_HashItem *table, Uint32 hash_mask, Uint32 *max_probe_len_ptr) |
159 | { |
160 | const Uint32 num_buckets = hash_mask + 1; |
161 | Uint32 idx = item_to_insert->hash & hash_mask; |
162 | SDL_HashItem *target = NULL; |
163 | SDL_HashItem temp_item; |
164 | |
165 | while (true) { |
166 | SDL_HashItem *candidate = table + idx; |
167 | |
168 | if (!candidate->live) { |
169 | // Found an empty slot. Put it here and we're done. |
170 | *candidate = *item_to_insert; |
171 | |
172 | if (target == NULL) { |
173 | target = candidate; |
174 | } |
175 | |
176 | const Uint32 probe_len = get_probe_length(candidate->hash & hash_mask, idx, num_buckets); |
177 | candidate->probe_len = probe_len; |
178 | |
179 | if (*max_probe_len_ptr < probe_len) { |
180 | *max_probe_len_ptr = probe_len; |
181 | } |
182 | |
183 | break; |
184 | } |
185 | |
186 | const Uint32 candidate_probe_len = candidate->probe_len; |
187 | SDL_assert(candidate_probe_len == get_probe_length(candidate->hash & hash_mask, idx, num_buckets)); |
188 | const Uint32 new_probe_len = get_probe_length(item_to_insert->hash & hash_mask, idx, num_buckets); |
189 | |
190 | if (candidate_probe_len < new_probe_len) { |
191 | // Robin Hood hashing: the item at idx has a better probe length than our item would at this position. |
192 | // Evict it and put our item in its place, then continue looking for a new spot for the displaced item. |
193 | // This algorithm significantly reduces clustering in the table, making lookups take very few probes. |
194 | |
195 | temp_item = *candidate; |
196 | *candidate = *item_to_insert; |
197 | |
198 | if (target == NULL) { |
199 | target = candidate; |
200 | } |
201 | |
202 | *item_to_insert = temp_item; |
203 | |
204 | SDL_assert(new_probe_len == get_probe_length(candidate->hash & hash_mask, idx, num_buckets)); |
205 | candidate->probe_len = new_probe_len; |
206 | |
207 | if (*max_probe_len_ptr < new_probe_len) { |
208 | *max_probe_len_ptr = new_probe_len; |
209 | } |
210 | } |
211 | |
212 | idx = (idx + 1) & hash_mask; |
213 | } |
214 | |
215 | return target; |
216 | } |
217 | |
218 | static void delete_item(SDL_HashTable *ht, SDL_HashItem *item) |
219 | { |
220 | const Uint32 hash_mask = ht->hash_mask; |
221 | SDL_HashItem *table = ht->table; |
222 | |
223 | if (ht->destroy) { |
224 | ht->destroy(ht->userdata, item->key, item->value); |
225 | } |
226 | |
227 | SDL_assert(ht->num_occupied_slots > 0); |
228 | ht->num_occupied_slots--; |
229 | |
230 | Uint32 idx = (Uint32)(item - ht->table); |
231 | |
232 | while (true) { |
233 | idx = (idx + 1) & hash_mask; |
234 | SDL_HashItem *next_item = table + idx; |
235 | |
236 | if (next_item->probe_len < 1) { |
237 | SDL_zerop(item); |
238 | return; |
239 | } |
240 | |
241 | *item = *next_item; |
242 | item->probe_len -= 1; |
243 | SDL_assert(item->probe_len < ht->max_probe_len); |
244 | item = next_item; |
245 | } |
246 | } |
247 | |
248 | static bool resize(SDL_HashTable *ht, Uint32 new_size) |
249 | { |
250 | const Uint32 new_hash_mask = new_size - 1; |
251 | SDL_HashItem *new_table = SDL_calloc(new_size, sizeof(*new_table)); |
252 | |
253 | if (!new_table) { |
254 | return false; |
255 | } |
256 | |
257 | SDL_HashItem *old_table = ht->table; |
258 | const Uint32 old_size = ht->hash_mask + 1; |
259 | |
260 | ht->max_probe_len = 0; |
261 | ht->hash_mask = new_hash_mask; |
262 | ht->table = new_table; |
263 | |
264 | for (Uint32 i = 0; i < old_size; ++i) { |
265 | SDL_HashItem *item = old_table + i; |
266 | if (item->live) { |
267 | insert_item(item, new_table, new_hash_mask, &ht->max_probe_len); |
268 | } |
269 | } |
270 | |
271 | SDL_free(old_table); |
272 | return true; |
273 | } |
274 | |
275 | static bool maybe_resize(SDL_HashTable *ht) |
276 | { |
277 | const Uint32 capacity = ht->hash_mask + 1; |
278 | |
279 | if (capacity >= MAX_HASHTABLE_SIZE) { |
280 | return false; |
281 | } |
282 | |
283 | const Uint32 max_load_factor = 217; // range: 0-255; 217 is roughly 85% |
284 | const Uint32 resize_threshold = (Uint32)((max_load_factor * (Uint64)capacity) >> 8); |
285 | |
286 | if (ht->num_occupied_slots > resize_threshold) { |
287 | return resize(ht, capacity * 2); |
288 | } |
289 | |
290 | return true; |
291 | } |
292 | |
293 | bool SDL_InsertIntoHashTable(SDL_HashTable *table, const void *key, const void *value, bool replace) |
294 | { |
295 | if (!table) { |
296 | return SDL_InvalidParamError("table" ); |
297 | } |
298 | |
299 | bool result = false; |
300 | |
301 | SDL_LockRWLockForWriting(table->lock); |
302 | |
303 | const Uint32 hash = calc_hash(table, key); |
304 | SDL_HashItem *item = find_first_item(table, key, hash); |
305 | bool do_insert = true; |
306 | |
307 | if (item) { |
308 | if (replace) { |
309 | delete_item(table, item); |
310 | } else { |
311 | SDL_SetError("key already exists and replace is disabled" ); |
312 | do_insert = false; |
313 | } |
314 | } |
315 | |
316 | if (do_insert) { |
317 | SDL_HashItem new_item; |
318 | new_item.key = key; |
319 | new_item.value = value; |
320 | new_item.hash = hash; |
321 | new_item.live = true; |
322 | new_item.probe_len = 0; |
323 | |
324 | table->num_occupied_slots++; |
325 | |
326 | if (!maybe_resize(table)) { |
327 | table->num_occupied_slots--; |
328 | } else { |
329 | // This never returns NULL |
330 | insert_item(&new_item, table->table, table->hash_mask, &table->max_probe_len); |
331 | result = true; |
332 | } |
333 | } |
334 | |
335 | SDL_UnlockRWLock(table->lock); |
336 | return result; |
337 | } |
338 | |
339 | bool SDL_FindInHashTable(const SDL_HashTable *table, const void *key, const void **value) |
340 | { |
341 | if (!table) { |
342 | if (value) { |
343 | *value = NULL; |
344 | } |
345 | return SDL_InvalidParamError("table" ); |
346 | } |
347 | |
348 | SDL_LockRWLockForReading(table->lock); |
349 | |
350 | bool result = false; |
351 | const Uint32 hash = calc_hash(table, key); |
352 | SDL_HashItem *i = find_first_item(table, key, hash); |
353 | if (i) { |
354 | if (value) { |
355 | *value = i->value; |
356 | } |
357 | result = true; |
358 | } |
359 | |
360 | SDL_UnlockRWLock(table->lock); |
361 | |
362 | return result; |
363 | } |
364 | |
365 | bool SDL_RemoveFromHashTable(SDL_HashTable *table, const void *key) |
366 | { |
367 | if (!table) { |
368 | return SDL_InvalidParamError("table" ); |
369 | } |
370 | |
371 | SDL_LockRWLockForWriting(table->lock); |
372 | |
373 | bool result = false; |
374 | const Uint32 hash = calc_hash(table, key); |
375 | SDL_HashItem *item = find_first_item(table, key, hash); |
376 | if (item) { |
377 | delete_item(table, item); |
378 | result = true; |
379 | } |
380 | |
381 | SDL_UnlockRWLock(table->lock); |
382 | return result; |
383 | } |
384 | |
385 | bool SDL_IterateHashTable(const SDL_HashTable *table, SDL_HashTableIterateCallback callback, void *userdata) |
386 | { |
387 | if (!table) { |
388 | return SDL_InvalidParamError("table" ); |
389 | } else if (!callback) { |
390 | return SDL_InvalidParamError("callback" ); |
391 | } |
392 | |
393 | SDL_LockRWLockForReading(table->lock); |
394 | SDL_HashItem *end = table->table + (table->hash_mask + 1); |
395 | Uint32 num_iterated = 0; |
396 | |
397 | for (SDL_HashItem *item = table->table; item < end; item++) { |
398 | if (item->live) { |
399 | if (!callback(userdata, table, item->key, item->value)) { |
400 | break; // callback requested iteration stop. |
401 | } else if (++num_iterated >= table->num_occupied_slots) { |
402 | break; // we can drop out early because we've seen all the live items. |
403 | } |
404 | } |
405 | } |
406 | |
407 | SDL_UnlockRWLock(table->lock); |
408 | return true; |
409 | } |
410 | |
411 | bool SDL_HashTableEmpty(SDL_HashTable *table) |
412 | { |
413 | if (!table) { |
414 | return SDL_InvalidParamError("table" ); |
415 | } |
416 | |
417 | SDL_LockRWLockForReading(table->lock); |
418 | const bool retval = (table->num_occupied_slots == 0); |
419 | SDL_UnlockRWLock(table->lock); |
420 | return retval; |
421 | } |
422 | |
423 | |
424 | static void destroy_all(SDL_HashTable *table) |
425 | { |
426 | SDL_HashDestroyCallback destroy = table->destroy; |
427 | if (destroy) { |
428 | void *userdata = table->userdata; |
429 | SDL_HashItem *end = table->table + (table->hash_mask + 1); |
430 | for (SDL_HashItem *i = table->table; i < end; ++i) { |
431 | if (i->live) { |
432 | i->live = false; |
433 | destroy(userdata, i->key, i->value); |
434 | } |
435 | } |
436 | } |
437 | } |
438 | |
439 | void SDL_ClearHashTable(SDL_HashTable *table) |
440 | { |
441 | if (table) { |
442 | SDL_LockRWLockForWriting(table->lock); |
443 | { |
444 | destroy_all(table); |
445 | SDL_memset(table->table, 0, sizeof(*table->table) * (table->hash_mask + 1)); |
446 | table->num_occupied_slots = 0; |
447 | } |
448 | SDL_UnlockRWLock(table->lock); |
449 | } |
450 | } |
451 | |
452 | void SDL_DestroyHashTable(SDL_HashTable *table) |
453 | { |
454 | if (table) { |
455 | destroy_all(table); |
456 | if (table->lock) { |
457 | SDL_DestroyRWLock(table->lock); |
458 | } |
459 | SDL_free(table->table); |
460 | SDL_free(table); |
461 | } |
462 | } |
463 | |
464 | // this is djb's xor hashing function. |
465 | static SDL_INLINE Uint32 hash_string_djbxor(const char *str, size_t len) |
466 | { |
467 | Uint32 hash = 5381; |
468 | while (len--) { |
469 | hash = ((hash << 5) + hash) ^ *(str++); |
470 | } |
471 | return hash; |
472 | } |
473 | |
474 | Uint32 SDL_HashPointer(void *unused, const void *key) |
475 | { |
476 | (void)unused; |
477 | return SDL_murmur3_32(&key, sizeof(key), 0); |
478 | } |
479 | |
480 | bool SDL_KeyMatchPointer(void *unused, const void *a, const void *b) |
481 | { |
482 | (void)unused; |
483 | return (a == b); |
484 | } |
485 | |
486 | Uint32 SDL_HashString(void *unused, const void *key) |
487 | { |
488 | (void)unused; |
489 | const char *str = (const char *)key; |
490 | return hash_string_djbxor(str, SDL_strlen(str)); |
491 | } |
492 | |
493 | bool SDL_KeyMatchString(void *unused, const void *a, const void *b) |
494 | { |
495 | const char *a_string = (const char *)a; |
496 | const char *b_string = (const char *)b; |
497 | |
498 | (void)unused; |
499 | if (a == b) { |
500 | return true; // same pointer, must match. |
501 | } else if (!a || !b) { |
502 | return false; // one pointer is NULL (and first test shows they aren't the same pointer), must not match. |
503 | } else if (a_string[0] != b_string[0]) { |
504 | return false; // we know they don't match |
505 | } |
506 | return (SDL_strcmp(a_string, b_string) == 0); // Check against actual string contents. |
507 | } |
508 | |
509 | // We assume we can fit the ID in the key directly |
510 | SDL_COMPILE_TIME_ASSERT(SDL_HashID_KeySize, sizeof(Uint32) <= sizeof(const void *)); |
511 | |
512 | Uint32 SDL_HashID(void *unused, const void *key) |
513 | { |
514 | (void)unused; |
515 | return (Uint32)(uintptr_t)key; |
516 | } |
517 | |
518 | bool SDL_KeyMatchID(void *unused, const void *a, const void *b) |
519 | { |
520 | (void)unused; |
521 | return (a == b); |
522 | } |
523 | |
524 | void SDL_DestroyHashKeyAndValue(void *unused, const void *key, const void *value) |
525 | { |
526 | (void)unused; |
527 | SDL_free((void *)key); |
528 | SDL_free((void *)value); |
529 | } |
530 | |
531 | void SDL_DestroyHashKey(void *unused, const void *key, const void *value) |
532 | { |
533 | (void)value; |
534 | (void)unused; |
535 | SDL_free((void *)key); |
536 | } |
537 | |
538 | void SDL_DestroyHashValue(void *unused, const void *key, const void *value) |
539 | { |
540 | (void)key; |
541 | (void)unused; |
542 | SDL_free((void *)value); |
543 | } |
544 | |